def get_hourly_model(data): logger.info('get_hourly_model: ...') # create a design matrix for occupancy and segmentation logger.info('get_hourly_model: creating baseline_design_matrix ...') preliminary_design_matrix = ( eemeter.create_caltrack_hourly_preliminary_design_matrix( data['baseline_meter_data'], data['temperature_data'], )) # build 12 monthly models - each step from now on operates on each segment logger.info('get_hourly_model: creating segment_time_series ...') segmentation = eemeter.segment_time_series(preliminary_design_matrix.index, 'three_month_weighted') # assign an occupancy status to each hour of the week (0-167) logger.info('get_hourly_model: creating occupancy_lookup ...') occupancy_lookup = eemeter.estimate_hour_of_week_occupancy( preliminary_design_matrix, segmentation=segmentation, ) # assign temperatures to bins logger.info('get_hourly_model: creating temperature_bins ...') occupied_temperature_bins, unoccupied_temperature_bins = eemeter.fit_temperature_bins( preliminary_design_matrix, segmentation=segmentation, occupancy_lookup=occupancy_lookup, ) # build a design matrix for each monthly segment logger.info('get_hourly_model: creating segmented_design_matrices ...') segmented_design_matrices = ( eemeter.create_caltrack_hourly_segmented_design_matrices( preliminary_design_matrix, segmentation, occupancy_lookup, occupied_temperature_bins, unoccupied_temperature_bins, )) # build a CalTRACK hourly model logger.info('get_hourly_model: building CalTRACK model ...') baseline_model = eemeter.fit_caltrack_hourly_model( segmented_design_matrices, occupancy_lookup, occupied_temperature_bins, unoccupied_temperature_bins, ) logger.info('get_hourly_model: DONE') return baseline_model
def test_json_hourly(): meter_data, temperature_data, sample_metadata = ( eemeter.load_sample("il-electricity-cdd-hdd-hourly")) blackout_start_date = sample_metadata["blackout_start_date"] blackout_end_date = sample_metadata["blackout_end_date"] # get meter data suitable for fitting a baseline model baseline_meter_data, warnings = eemeter.get_baseline_data( meter_data, end=blackout_start_date, max_days=365) # create a design matrix for occupancy and segmentation preliminary_design_matrix = ( eemeter.create_caltrack_hourly_preliminary_design_matrix( baseline_meter_data, temperature_data, )) # build 12 monthly models - each step from now on operates on each segment segmentation = eemeter.segment_time_series(preliminary_design_matrix.index, 'three_month_weighted') # assign an occupancy status to each hour of the week (0-167) occupancy_lookup = eemeter.estimate_hour_of_week_occupancy( preliminary_design_matrix, segmentation=segmentation, ) # assign temperatures to bins temperature_bins = eemeter.fit_temperature_bins( preliminary_design_matrix, segmentation=segmentation, ) # build a design matrix for each monthly segment segmented_design_matrices = ( eemeter.create_caltrack_hourly_segmented_design_matrices( preliminary_design_matrix, segmentation, occupancy_lookup, temperature_bins, )) # build a CalTRACK hourly model baseline_model = eemeter.fit_caltrack_hourly_model( segmented_design_matrices, occupancy_lookup, temperature_bins, ) # get a year of reporting period data reporting_meter_data, warnings = eemeter.get_reporting_data( meter_data, start=blackout_end_date, max_days=365) # compute metered savings metered_savings_dataframe, error_bands = eemeter.metered_savings( baseline_model, reporting_meter_data, temperature_data, with_disaggregated=True) # total metered savings total_metered_savings = metered_savings_dataframe.metered_savings.sum() # test JSON json_str = json.dumps(baseline_model.json()) m = eemeter.CalTRACKHourlyModelResults.from_json(json.loads(json_str)) # compute metered savings from the loaded model metered_savings_dataframe, error_bands = eemeter.metered_savings( m, reporting_meter_data, temperature_data, with_disaggregated=True) # total metered savings total_metered_savings_2 = metered_savings_dataframe.metered_savings.sum() assert total_metered_savings == total_metered_savings_2
def eemeter_baseline_ami(file, temperature, install_start, install_end): """ This method uses linear regression to create hourly load profile to serve as the counterfactual for the period where the new measure has been installed. The two key parts of this model are the temperature and occupancy binning. CalTRACK refers to a standardized model used in california to measure energy efficiency savings from various measures (i.e. LED lightbulbs, efficient appliances, weatherization, etc.). In this case it will be used to model energy growth instead of savings. """ ami_data = eemeter.meter_data_from_csv(file, freq='hourly') # get meter data suitable for fitting a baseline model baseline_meter_data, warnings = eemeter.get_baseline_data( ami_data, end=install_start, max_days=365) # create design matrix for occupancy and segmentation preliminary_design_matrix = ( eemeter.create_caltrack_hourly_preliminary_design_matrix( baseline_meter_data, temperature, )) # build matrix with weights for monthly models of: # 0.5 = prior month # 1.0 = current month # 0.5 = post month segmentation = eemeter.segment_time_series( preliminary_design_matrix.index, 'three_month_weighted' # using 3 month weighted approach ) # assign an occupancy status to each hour of the week (0-167) occupancy_lookup = eemeter.estimate_hour_of_week_occupancy( preliminary_design_matrix, segmentation=segmentation, ) # assign temperatures to bins temperature_bins = eemeter.fit_temperature_bins( preliminary_design_matrix, segmentation=segmentation, ) # build a desgin matrix for each monthly segment segmented_design_matrices = ( eemeter.create_caltrack_hourly_segmented_design_matrices( preliminary_design_matrix, segmentation, occupancy_lookup, temperature_bins, )) # build a CalTRACK hourly model baseline_model = eemeter.fit_caltrack_hourly_model( segmented_design_matrices, occupancy_lookup, temperature_bins) # get a year of post installation reporting data reporting_meter_data, warnings = eemeter.get_reporting_data( ami_data, start=install_end, max_days=365) # compute metered load growth for the year of reporitng period metered_growth_dataframe, error_bands = eemeter.metered_savings( baseline_model, reporting_meter_data, temperature, with_disaggregated=True) metered_growth_dataframe['temp'] = temperature # append temperature # change signs for load growth metered_growth_dataframe['metered_savings'] = metered_growth_dataframe[ 'metered_savings'].apply(lambda x: x * -1) # totaled load growth additional_load = metered_growth_dataframe.metered_savings.sum() # metrics r_squared_adj_list = [] cvrmse_adj_list = [] # results in a dict model_results = list(baseline_model.json().values()) results = model_results[6] for segment, measures in results.items(): for measure, value in measures.items(): if measure == 'r_squared_adj': r_squared_adj_list.append(value) if measure == 'cvrmse_adj': cvrmse_adj_list.append(value) r_squared_adj = mean(r_squared_adj_list) cvrmse_adj = mean(cvrmse_adj_list) metrics = [r_squared_adj, cvrmse_adj, additional_load] # Return Section return metered_growth_dataframe, metrics, baseline_model