def test_iqr_filteringa(thermostat_emg_aux_constant_on_outlier): thermostats = list(thermostat_emg_aux_constant_on_outlier) # Run the metrics / statistics with the outlier thermostat in place iqflt_metrics = multiple_thermostat_calculate_epa_field_savings_metrics(thermostats) iqflt_output_dataframe = pd.DataFrame(iqflt_metrics, columns=COLUMNS) iqflt_summary_statistics = compute_summary_statistics(iqflt_output_dataframe) # Remove the outlier thermostat for i in range(0, len(thermostats) - 1): if thermostats[i].thermostat_id == 'thermostat_single_emg_aux_constant_on_outlier': outlier_thermostat = thermostats.pop(i) # Re-run the metrics / statistics with the outlier thermostat removed noiq_metrics = multiple_thermostat_calculate_epa_field_savings_metrics(thermostats) noiq_output_dataframe = pd.DataFrame(noiq_metrics, columns=COLUMNS) noiq_summary_statistics = compute_summary_statistics(noiq_output_dataframe) # Verify that the IQFLT removed the outliers by comparing this with the # metrics with the outlier thermostat already removed. for column in range(0, len(iqflt_summary_statistics) - 1): fields_iqflt = [x for x in iqflt_summary_statistics[column] if '_IQFLT' in x] for field_iqflt in fields_iqflt: field_noiq = field_iqflt.replace('IQFLT', 'NOIQ') left_side = iqflt_summary_statistics[column][field_iqflt] right_side = noiq_summary_statistics[column][field_noiq] if np.isnan(left_side) or np.isnan(right_side): assert(np.isnan(left_side) and np.isnan(right_side)) else: assert(left_side == right_side)
def test_compute_summary_statistics(combined_dataframe): summary_statistics = compute_summary_statistics(combined_dataframe) assert [len(s) for s in summary_statistics] == [ 49, 49, 49, 49, 9105, 901, 9105, 901, ] def test_compute_summary_statistics_advanced(combined_dataframe): summary_statistics = compute_summary_statistics(combined_dataframe, advanced_filtering=True) assert [len(s) for s in summary_statistics] == [ 49, 49, 49, 49, 49, 49, 49, 49, 9105, 901, 9105, 901, 9105, 901, 9105, 901, ] def test_summary_statistics_to_csv(combined_dataframe): summary_statistics = compute_summary_statistics(combined_dataframe) _, fname = tempfile.mkstemp() product_id = "FAKE" stats_df = summary_statistics_to_csv(summary_statistics, fname, product_id) assert isinstance(stats_df, pd.DataFrame) stats_df_reread = pd.read_csv(fname) assert stats_df_reread.shape == (9241, 9)
def test_compute_summary_statistics_advanced(combined_dataframe): summary_statistics = compute_summary_statistics(combined_dataframe, advanced_filtering=True) assert [len(s) for s in summary_statistics] == [ 21, 21, 21, 21, 21, 21, 21, 21, 593, 397, 593, 397, 593, 397, 593, 397, ]
def main(): logging.basicConfig() # Example logging configuration for file and console output # logging.json: Normal logging example # logging_noisy.json: Turns on all debugging information # logging_quiet.json: Only logs error messages with open("logging.json", "r") as logging_config: logging.config.dictConfig(json.load(logging_config)) logger = logging.getLogger( 'epathermostat') # Uses the 'epathermostat' logging logger.debug("Starting...") logging.captureWarnings( True ) # Set to True to log additional warning messages, False to only display on console data_dir = os.path.join("..", "tests", "data") metadata_filename = os.path.join(data_dir, "metadata.csv") # Use this to save the weather cache to local disk files # thermostats = from_csv(metadata_filename, verbose=True, save_cache=True, cache_path='/tmp/epa_weather_files/') # Verbose will override logging to display the imported thermostats. Set to "False" to use the logging level instead thermostats = from_csv(metadata_filename, verbose=True) output_dir = "." metrics = multiple_thermostat_calculate_epa_field_savings_metrics( thermostats) output_filename = os.path.join(output_dir, "thermostat_example_output.csv") metrics_out = metrics_to_csv(metrics, output_filename) stats = compute_summary_statistics(metrics_out) stats_advanced = compute_summary_statistics(metrics_out, advanced_filtering=True) product_id = "test_product" stats_filepath = os.path.join(data_dir, "thermostat_example_stats.csv") summary_statistics_to_csv(stats, stats_filepath, product_id) stats_advanced_filepath = os.path.join( data_dir, "thermostat_example_stats_advanced.csv") summary_statistics_to_csv(stats_advanced, stats_advanced_filepath, product_id)
def test_compute_summary_statistics_advanced(combined_dataframe): summary_statistics = compute_summary_statistics(combined_dataframe, advanced_filtering=True) assert [len(s) for s in summary_statistics] == [ 49, 49, 49, 49, 49, 49, 49, 49, 9105, 901, 9105, 901, 9105, 901, 9105, 901, ] def test_summary_statistics_to_csv(combined_dataframe): summary_statistics = compute_summary_statistics(combined_dataframe)
def test_summary_statistics_to_csv(combined_dataframe): summary_statistics = compute_summary_statistics(combined_dataframe) _, fname = tempfile.mkstemp() product_id = "FAKE" stats_df = summary_statistics_to_csv(summary_statistics, fname, product_id) assert isinstance(stats_df, pd.DataFrame) stats_df_reread = pd.read_csv(fname) assert stats_df_reread.shape == (687, 9)
def test_summary_statistics_to_csv(combined_dataframe): summary_statistics = compute_summary_statistics(combined_dataframe, "label") _, fname = tempfile.mkstemp() stats_df = summary_statistics_to_csv(summary_statistics, fname) assert isinstance(stats_df, pd.DataFrame) with open(fname, 'r') as f: columns = f.readline().split(",") assert len(columns) == 12 * 88 + 15
def test_compute_summary_statistics(combined_dataframe): summary_statistics = compute_summary_statistics(combined_dataframe, "label") assert len(summary_statistics) == 2 assert len(summary_statistics[0]) == 12 * 61 + 5 assert len(summary_statistics[1]) == 12 * 47 + 5 assert summary_statistics[0]["label"] == "label_heating" for key, value in summary_statistics[0].items(): if key not in ["label"]: assert pd.notnull(value) assert not np.isinf(value)
def test_compute_summary_statistics(combined_dataframe): summary_statistics = compute_summary_statistics(combined_dataframe) assert [len(s) for s in summary_statistics] == [ 21, 21, 21, 21, 593, 397, 593, 397, ]
def test_summary_statistics_to_csv(combined_dataframe): summary_statistics = compute_summary_statistics(combined_dataframe)