def main(): # Whether to compute Advanced Statistics (in most cases this is NOT needed) ADVANCED_STATS = False logging.basicConfig() # Example logging configuration for file and console output # logging.json: Normal logging example # logging_noisy.json: Turns on all debugging information # logging_quiet.json: Only logs error messages with open("logging.json", "r") as logging_config: logging.config.dictConfig(json.load(logging_config)) # Uses the 'epathermostat' logging logger = logging.getLogger("epathermostat") logger.debug("Starting...") # Set to True to log additional warning messages, False to only display on # console logging.captureWarnings(True) # data_dir = os.path.join("..", "tests", "data", "single_stage") # data_dir = os.path.join("..", "tests", "data", "two_stage") data_dir = os.path.join("..", "tests", "data", "two_stage_ert") metadata_filename = os.path.join(data_dir, "epa_two_stage_metadata.csv") # Use this to save the weather cache to local disk files # thermostats = from_csv(metadata_filename, verbose=True, save_cache=True, # cache_path='/tmp/epa_weather_files/') # Verbose will override logging to display the imported thermostats. Set to # "False" to use the logging level instead thermostats = from_csv(metadata_filename, verbose=True) output_dir = "." metrics = multiple_thermostat_calculate_epa_field_savings_metrics(thermostats) output_filename = os.path.join(output_dir, "thermostat_example_output.csv") metrics_out = metrics_to_csv(metrics, output_filename) stats = compute_summary_statistics(metrics_out) if ADVANCED_STATS: stats_advanced = compute_summary_statistics( metrics_out, advanced_filtering=True ) product_id = "test_product" certification_filepath = os.path.join( data_dir, "thermostat_example_certification.csv" ) certification_to_csv(stats, certification_filepath, product_id) stats_filepath = os.path.join(data_dir, "thermostat_example_stats.csv") summary_statistics_to_csv(stats, stats_filepath, product_id) if ADVANCED_STATS: stats_advanced_filepath = os.path.join( data_dir, "thermostat_example_stats_advanced.csv" ) summary_statistics_to_csv(stats_advanced, stats_advanced_filepath, product_id)
def test_certification(combined_dataframe): _, fname_stats = tempfile.mkstemp() _, fname_cert = tempfile.mkstemp() product_id = "FAKE" stats_df = compute_summary_statistics(combined_dataframe) certification_df = certification_to_csv(stats_df, fname_cert, product_id) assert certification_df.shape == (5, 8)
def test_iqr_filtering(thermostat_emg_aux_constant_on_outlier): thermostats_iqflt = list(thermostat_emg_aux_constant_on_outlier) # Run the metrics / statistics with the outlier thermostat in place iqflt_metrics = multiple_thermostat_calculate_epa_field_savings_metrics( thermostats_iqflt, how="entire_dataset") iqflt_output_dataframe = pd.DataFrame(iqflt_metrics, columns=EXPORT_COLUMNS) iqflt_summary_statistics = compute_summary_statistics( iqflt_output_dataframe) # Remove the outlier thermostat thermostats_noiq = [] for thermostat in list(thermostats_iqflt): if thermostat.thermostat_id != "thermostat_single_emg_aux_constant_on_outlier": thermostats_noiq.append(thermostat) if len(thermostats_noiq) == 5: raise ValueError("Try again") # Re-run the metrics / statistics with the outlier thermostat removed noiq_metrics = multiple_thermostat_calculate_epa_field_savings_metrics( thermostats_noiq, how="entire_dataset") noiq_output_dataframe = pd.DataFrame(noiq_metrics, columns=EXPORT_COLUMNS) noiq_summary_statistics = compute_summary_statistics(noiq_output_dataframe) # Verify that the IQFLT removed the outliers by comparing this with the # metrics with the outlier thermostat already removed. for column in range(0, len(iqflt_summary_statistics)): fields_iqflt = [ x for x in iqflt_summary_statistics[column] if "IQFLT" in x ] for field_iqflt in fields_iqflt: field_noiq = field_iqflt.replace("rhu2IQFLT", "rhu2") left_side = iqflt_summary_statistics[column][field_iqflt] right_side = noiq_summary_statistics[column][field_noiq] if np.isnan(left_side) or np.isnan(right_side): assert np.isnan(left_side) and np.isnan(right_side) else: assert left_side == right_side
def test_compute_summary_statistics(combined_dataframe): summary_statistics = compute_summary_statistics(combined_dataframe) assert [len(s) for s in summary_statistics] == [ 49, 49, 49, 49, 3057, 1657, 3057, 1657, ] def test_compute_summary_statistics_advanced(combined_dataframe): summary_statistics = compute_summary_statistics( combined_dataframe, advanced_filtering=True) assert [len(s) for s in summary_statistics] == [ 49, 49, 49, 49, 49, 49, 49, 49, 3057, 1657, 3057, 1657, 3057, 1657, 3057, 1657, ] def test_summary_statistics_to_csv(combined_dataframe): summary_statistics = compute_summary_statistics(combined_dataframe) _, fname = tempfile.mkstemp() product_id = "FAKE" stats_df = summary_statistics_to_csv(summary_statistics, fname, product_id) assert isinstance(stats_df, pd.DataFrame) stats_df_reread = pd.read_csv(fname) assert stats_df_reread.shape == (3225, 5)
def test_compute_summary_statistics_advanced(combined_dataframe): summary_statistics = compute_summary_statistics( combined_dataframe, advanced_filtering=True) assert [len(s) for s in summary_statistics] == [ 49, 49, 49, 49, 49, 49, 49, 49, 3057, 1657, 3057, 1657, 3057, 1657, 3057, 1657, ] def test_summary_statistics_to_csv(combined_dataframe): summary_statistics = compute_summary_statistics(combined_dataframe)
def test_summary_statistics_to_csv(combined_dataframe): summary_statistics = compute_summary_statistics(combined_dataframe)