def test_meter_data_from_csv_daily_freq(sample_metadata): meter_item = sample_metadata["il-electricity-cdd-hdd-daily"] meter_data_filename = meter_item["meter_data_filename"] with resource_stream("eemeter.samples", meter_data_filename) as f: meter_data = meter_data_from_csv(f, gzipped=True, freq="daily") assert meter_data.shape == (810, 1) assert meter_data.index.tz.zone == "UTC" assert meter_data.index.freq == "D"
def test_meter_data_from_csv_custom_columns(sample_metadata): with TemporaryFile() as f: f.write(b"start_custom,kWh\n" b"2017-01-01T00:00:00,10\n") f.seek(0) meter_data = meter_data_from_csv(f, start_col="start_custom", value_col="kWh") assert meter_data.shape == (1, 1) assert meter_data.index.tz.zone == "UTC" assert meter_data.index.freq is None
def test_meter_data_from_csv(sample_metadata): meter_item = sample_metadata["il-electricity-cdd-hdd-daily"] meter_data_filename = meter_item["meter_data_filename"] fname = resource_filename("eemeter.samples", meter_data_filename) with gzip.open(fname) as f: meter_data = meter_data_from_csv(f) assert meter_data.shape == (810, 1) assert meter_data.index.tz.zone == "UTC" assert meter_data.index.freq is None
def eemeter_baseline_daily(file, temperature, install_start, install_end): """ This method uses linear regression to create daily load profile to serve as the counterfactual for the period where the new measure has been installed. The key part of this model is the temperature changepoint determined by the heating and cooling degree days (HDD & CDD). CalTRACK refers to a standardized model used in california to measure energy efficiency savings from various measures (i.e. LED lightbulbs, efficient appliances, weatherization, etc.). In this case it will be used to model energy growth instead of savings. """ file = 'data/cchp_daily.csv' daily_meter_data = eemeter.meter_data_from_csv(file, freq='daily') # get meter data suitable for fitting a baseline model baseline_meter_data, warnings = eemeter.get_baseline_data( daily_meter_data, end=install_start, max_days=365) # create a design matrix (the input to the model fitting step) baseline_design_matrix = eemeter.create_caltrack_daily_design_matrix( baseline_meter_data, temperature, ) # build a daily CalTRACK model baseline_model = eemeter.fit_caltrack_usage_per_day_model( baseline_design_matrix) # get a year of reporting period data reporting_meter_data, warnings = eemeter.get_reporting_data( daily_meter_data, start=install_end, max_days=365) # compute metered savings for the year of the reporting period we've selected metered_growth_dataframe, error_bands = eemeter.metered_savings( baseline_model, reporting_meter_data, temperature, with_disaggregated=True) # change signs for load growth metered_growth_dataframe['metered_savings'] = metered_growth_dataframe[ 'metered_savings'].apply(lambda x: x * -1) # total metered savings additional_load = metered_growth_dataframe.metered_savings.sum() # metrics metrics_raw = baseline_model.json() r_squared_adj = metrics_raw['r_squared_adj'] cvrmse_adj = metrics_raw['avgs_metrics']['cvrmse_adj'] metrics = [r_squared_adj, cvrmse_adj, additional_load] return metered_growth_dataframe, metrics, baseline_model
def load_sample(sample): """ Load meter data, temperature data, and metadata for associated with a particular sample identifier. Note: samples are simulated, not real, data. Parameters ---------- sample : :any:`str` Identifier of sample. Complete list can be obtained with :any:`eemeter.samples`. Returns ------- meter_data, temperature_data, metadata : :any:`tuple` of :any:`pandas.DataFrame`, :any:`pandas.Series`, and :any:`dict` Meter data, temperature data, and metadata for this sample identifier. """ sample_metadata = _load_sample_metadata() metadata = sample_metadata.get(sample) if metadata is None: raise ValueError("Sample not found: {}. Try one of these?\n{}".format( sample, "\n".join([ " - {}".format(key) for key in sorted(sample_metadata.keys()) ]), )) freq = metadata.get("freq") if freq not in ("hourly", "daily"): freq = None meter_data_filename = metadata["meter_data_filename"] with resource_stream("eemeter.samples", meter_data_filename) as f: meter_data = meter_data_from_csv(f, gzipped=True, freq=freq) temperature_filename = metadata["temperature_filename"] with resource_stream("eemeter.samples", temperature_filename) as f: temperature_data = temperature_data_from_csv(f, gzipped=True, freq="hourly") metadata["blackout_start_date"] = pytz.UTC.localize( parse_date(metadata["blackout_start_date"])) metadata["blackout_end_date"] = pytz.UTC.localize( parse_date(metadata["blackout_end_date"])) return meter_data, temperature_data, metadata
def _get_data( sample, meter_file, temperature_file, heating_balance_points, cooling_balance_points ): if sample is not None: with resource_stream("eemeter.samples", "metadata.json") as f: metadata = json.loads(f.read().decode("utf-8")) if sample in metadata: click.echo("Loading sample: {}".format(sample)) meter_file = resource_stream( "eemeter.samples", metadata[sample]["meter_data_filename"] ) temperature_file = resource_stream( "eemeter.samples", metadata[sample]["temperature_filename"] ) else: raise click.ClickException( "Sample not found. Try one of these?\n{}".format( "\n".join([" - {}".format(key) for key in sorted(metadata.keys())]) ) ) if meter_file is not None: gzipped = meter_file.name.endswith(".gz") meter_data = meter_data_from_csv(meter_file, gzipped=gzipped) else: raise click.ClickException("Meter data not specified.") if temperature_file is not None: gzipped = temperature_file.name.endswith(".gz") temperature_data = temperature_data_from_csv( temperature_file, gzipped=gzipped, freq="hourly" ) else: raise click.ClickException("Temperature data not specified.") return merge_temperature_data( meter_data, temperature_data, heating_balance_points=heating_balance_points, cooling_balance_points=cooling_balance_points, )
def eemeter_baseline_ami(file, temperature, install_start, install_end): """ This method uses linear regression to create hourly load profile to serve as the counterfactual for the period where the new measure has been installed. The two key parts of this model are the temperature and occupancy binning. CalTRACK refers to a standardized model used in california to measure energy efficiency savings from various measures (i.e. LED lightbulbs, efficient appliances, weatherization, etc.). In this case it will be used to model energy growth instead of savings. """ ami_data = eemeter.meter_data_from_csv(file, freq='hourly') # get meter data suitable for fitting a baseline model baseline_meter_data, warnings = eemeter.get_baseline_data( ami_data, end=install_start, max_days=365) # create design matrix for occupancy and segmentation preliminary_design_matrix = ( eemeter.create_caltrack_hourly_preliminary_design_matrix( baseline_meter_data, temperature, )) # build matrix with weights for monthly models of: # 0.5 = prior month # 1.0 = current month # 0.5 = post month segmentation = eemeter.segment_time_series( preliminary_design_matrix.index, 'three_month_weighted' # using 3 month weighted approach ) # assign an occupancy status to each hour of the week (0-167) occupancy_lookup = eemeter.estimate_hour_of_week_occupancy( preliminary_design_matrix, segmentation=segmentation, ) # assign temperatures to bins temperature_bins = eemeter.fit_temperature_bins( preliminary_design_matrix, segmentation=segmentation, ) # build a desgin matrix for each monthly segment segmented_design_matrices = ( eemeter.create_caltrack_hourly_segmented_design_matrices( preliminary_design_matrix, segmentation, occupancy_lookup, temperature_bins, )) # build a CalTRACK hourly model baseline_model = eemeter.fit_caltrack_hourly_model( segmented_design_matrices, occupancy_lookup, temperature_bins) # get a year of post installation reporting data reporting_meter_data, warnings = eemeter.get_reporting_data( ami_data, start=install_end, max_days=365) # compute metered load growth for the year of reporitng period metered_growth_dataframe, error_bands = eemeter.metered_savings( baseline_model, reporting_meter_data, temperature, with_disaggregated=True) metered_growth_dataframe['temp'] = temperature # append temperature # change signs for load growth metered_growth_dataframe['metered_savings'] = metered_growth_dataframe[ 'metered_savings'].apply(lambda x: x * -1) # totaled load growth additional_load = metered_growth_dataframe.metered_savings.sum() # metrics r_squared_adj_list = [] cvrmse_adj_list = [] # results in a dict model_results = list(baseline_model.json().values()) results = model_results[6] for segment, measures in results.items(): for measure, value in measures.items(): if measure == 'r_squared_adj': r_squared_adj_list.append(value) if measure == 'cvrmse_adj': cvrmse_adj_list.append(value) r_squared_adj = mean(r_squared_adj_list) cvrmse_adj = mean(cvrmse_adj_list) metrics = [r_squared_adj, cvrmse_adj, additional_load] # Return Section return metered_growth_dataframe, metrics, baseline_model