def test_basic_usage(trace, modeling_period_set, mock_isd_weather_source): # create SplitModeledEnergyTrace formatter = ModelDataFormatter('D') model_mapping = { 'modeling_period_1': SeasonalElasticNetCVModel(65, 65), 'modeling_period_2': SeasonalElasticNetCVModel(65, 65), } smet = SplitModeledEnergyTrace(trace, formatter, model_mapping, modeling_period_set) # fit normally outputs = smet.fit(mock_isd_weather_source) assert 'modeling_period_1' in smet.fit_outputs assert 'modeling_period_2' in smet.fit_outputs assert len(smet.fit_outputs) == 2 assert outputs['modeling_period_1']['status'] == 'SUCCESS' assert outputs['modeling_period_1']['start_date'] == \ datetime(2000, 1, 1, tzinfo=pytz.UTC) assert outputs['modeling_period_1']['end_date'] == \ datetime(2000, 9, 1, tzinfo=pytz.UTC) assert outputs['modeling_period_1']['n_rows'] == 245 index = pd.date_range('2001-01-01', periods=6, freq='D', tz=pytz.UTC) demand_fixture_data = \ smet.formatter.create_demand_fixture(index, mock_isd_weather_source) mp1_pred = smet.predict('modeling_period_1', demand_fixture_data) mp2_pred = smet.predict('modeling_period_2', demand_fixture_data) assert mp1_pred.shape == (6, ) assert mp2_pred is None with pytest.raises(KeyError): smet.predict('modeling_period_3', demand_fixture_data) def callable_(formatter, model, returnme): return returnme mp1_deriv = smet.compute_derivative('modeling_period_1', callable_, returnme="A") mp2_deriv = smet.compute_derivative('modeling_period_2', callable_, returnme="A") assert mp1_deriv == "A" assert mp2_deriv is None # bad weather source smet.fit(None) assert outputs['modeling_period_1']['status'] == 'FAILURE'
def split_modeled_energy_trace_daily(daily_trace, modeling_period_set, mock_isd_weather_source): # create SplitModeledEnergyTrace formatter = ModelDataFormatter('D') model_mapping = { 'modeling_period_1': SeasonalElasticNetCVModel(65, 65), 'modeling_period_2': SeasonalElasticNetCVModel(65, 65), } smet = SplitModeledEnergyTrace(daily_trace, formatter, model_mapping, modeling_period_set) smet.fit(mock_isd_weather_source) return smet
def split_modeled_energy_trace_monthly(monthly_trace, modeling_period_set, mock_isd_weather_source): # create SplitModeledEnergyTrace formatter = ModelDataBillingFormatter() model_mapping = { 'modeling_period_1': BillingElasticNetCVModel(65, 65), 'modeling_period_2': BillingElasticNetCVModel(65, 65), } smet = SplitModeledEnergyTrace(monthly_trace, formatter, model_mapping, modeling_period_set) smet.fit(mock_isd_weather_source) return smet
def test_bad_weather_source(trace, modeling_period_set): # create SplitModeledEnergyTrace formatter = ModelDataFormatter('D') model_mapping = { 'modeling_period_1': SeasonalElasticNetCVModel(65, 65), 'modeling_period_2': SeasonalElasticNetCVModel(65, 65), } smet = SplitModeledEnergyTrace( trace, formatter, model_mapping, modeling_period_set) # need to see that it gives a data sufficiency exception outputs = smet.fit(None) assert 'DataSufficiencyException' in outputs['modeling_period_1']['traceback'] assert 'DataSufficiencyException' in outputs['modeling_period_2']['traceback']
def test_basic_usage(trace, modeling_period_set, mock_isd_weather_source): # create SplitModeledEnergyTrace formatter = ModelDataFormatter('D') model_mapping = { 'modeling_period_1': SeasonalElasticNetCVModel(65, 65), 'modeling_period_2': SeasonalElasticNetCVModel(65, 65), } smet = SplitModeledEnergyTrace( trace, formatter, model_mapping, modeling_period_set) # fit normally outputs = smet.fit(mock_isd_weather_source) assert 'modeling_period_1' in smet.fit_outputs assert 'modeling_period_2' in smet.fit_outputs assert len(smet.fit_outputs) == 2 assert outputs['modeling_period_1']['status'] == 'SUCCESS' assert outputs['modeling_period_1']['start_date'] == \ datetime(2000, 1, 1, tzinfo=pytz.UTC) assert outputs['modeling_period_1']['end_date'] == \ datetime(2000, 9, 1, tzinfo=pytz.UTC) assert outputs['modeling_period_1']['n_rows'] == 245 index = pd.date_range('2001-01-01', periods=6, freq='D', tz=pytz.UTC) demand_fixture_data = \ smet.formatter.create_demand_fixture(index, mock_isd_weather_source) mp1_pred = smet.predict('modeling_period_1', demand_fixture_data) mp2_pred = smet.predict('modeling_period_2', demand_fixture_data) assert mp1_pred.shape == (6,) assert mp2_pred is None with pytest.raises(KeyError): smet.predict('modeling_period_3', demand_fixture_data) def callable_(formatter, model, returnme): return returnme mp1_deriv = smet.compute_derivative( 'modeling_period_1', callable_, returnme="A") mp2_deriv = smet.compute_derivative( 'modeling_period_2', callable_, returnme="A") assert mp1_deriv == "A" assert mp2_deriv is None # bad weather source smet.fit(None) assert outputs['modeling_period_1']['status'] == 'FAILURE'
def get_energy_modeling_dispatches(modeling_period_set, trace_set): ''' Dispatches a set of applicable models and formatters for each pairing of modeling period sets and trace sets given. Parameters ---------- modeling_period_set : eemeter.structures.ModelingPeriodSet :code:`ModelingPeriod` s to dispatch. trace_set : eemeter.structures.EnergyTraceSet :code:`EnergyTrace` s to dispatch. ''' dispatches = {} for trace_label, trace in trace_set.itertraces(): dispatches[trace_label] = None if trace.placeholder: logger.info( 'Skipping modeling for placeholder trace "{}" ({}).'.format( trace_label, trace.interpretation)) continue frequency = _get_approximate_frequency(logger, trace.data, trace_label) if frequency not in ['H', 'D', '15T', '30T']: frequency = None model_class_selector = (trace.interpretation, frequency) try: ( FormatterClass, formatter_settings, ModelClass, model_settings, ) = ENERGY_MODEL_CLASS_MAPPING[model_class_selector] except KeyError: logger.error( 'Could not dispatch formatter/model for' ' model class selector {}.'.format(model_class_selector)) continue formatter = FormatterClass(**formatter_settings) model = ModelClass(**model_settings) model_mapping = { modeling_period_label: ModelClass(**model_settings) for modeling_period_label, _ in modeling_period_set.iter_modeling_periods() } modeled_energy_trace = SplitModeledEnergyTrace(trace, formatter, model_mapping, modeling_period_set) logger.info('Successfully created SplitModeledEnergyTrace formatter {}' ' and model {} for {} and trace "{}" ({})' ' using model class selector {}.'.format( formatter, model, modeling_period_set, trace_label, trace.interpretation, model_class_selector)) dispatches[trace_label] = modeled_energy_trace return dispatches
def evaluate(self, meter_input, formatter=None, model=None, weather_source=None, weather_normal_source=None): ''' Main entry point to the meter, which models traces and calculates derivatives. Parameters ---------- meter_input : dict Serialized input containing trace and project data. formatter : tuple of (class, dict), default None Formatter for trace and weather data. Used to create input for model. If None is provided, will be auto-matched to appropriate default formatter. Class name can be provided as a string (class.__name__) or object. model : tuple of (class, dict), default None Model to use in modeling. If None is provided, will be auto-matched to appropriate default model. Class can be provided as a string (class.__name__) or class object. weather_source : eemeter.weather.WeatherSource Weather source to be used for this meter. Overrides weather source found using :code:`project.site`. Useful for test mocking. weather_normal_source : eemeter.weather.WeatherSource Weather normal source to be used for this meter. Overrides weather source found using :code:`project.site`. Useful for test mocking. Returns ------- results : dict Dictionary of results with the following keys: - :code:`"status"`: SUCCESS/FAILURE - :code:`"failure_message"`: if FAILURE, message indicates reason for failure, may include traceback - :code:`"logs"`: list of collected log messages - :code:`"model_class"`: Name of model class - :code:`"model_kwargs"`: dict of model keyword arguments (settings) - :code:`"formatter_class"`: Name of formatter class - :code:`"formatter_kwargs"`: dict of formatter keyword arguments (settings) - :code:`"eemeter_version"`: version of the eemeter package - :code:`"modeled_energy_trace"`: modeled energy trace - :code:`"derivatives"`: derivatives for each interpretation - :code:`"weather_source_station"`: Matched weather source station. - :code:`"weather_normal_source_station"`: Matched weather normal source station. ''' SUCCESS = "SUCCESS" FAILURE = "FAILURE" output = OrderedDict([ ("status", None), ("failure_message", None), ("logs", []), ("eemeter_version", get_version()), ("trace_id", None), ("project_id", None), ("interval", None), ("meter_kwargs", self.kwargs), ("model_class", None), ("model_kwargs", None), ("formatter_class", None), ("formatter_kwargs", None), ("weather_source_station", None), ("weather_normal_source_station", None), ("derivatives", None), ("modeled_energy_trace", None), ]) # Step 1: Deserialize input and validate deserialized_input = deserialize_meter_input(meter_input) if "error" in deserialized_input: message = ("Meter input could not be deserialized:\n{}".format( deserialized_input)) output['status'] = FAILURE output['failure_message'] = message return output # Assume that deserialized input fails without these keys, so don't # bother error checking trace = deserialized_input["trace"] project = deserialized_input["project"] zipcode = project["zipcode"] site = ZIPCodeSite(zipcode) # Can be blank for models capable of structural change analysis, so # provide default modeling_period_set = project.get("modeling_period_set", None) project_id = project["project_id"] trace_id = trace.trace_id interval = trace.interval output['project_id'] = project_id output['trace_id'] = trace_id output['interval'] = interval logger.debug('Running meter for for trace {} and project {}'.format( project_id, trace_id)) # Step 2: Match weather use_cz2010 = (self.weather_station_mapping == 'CZ2010') if weather_source is None: weather_source = get_weather_source(site, use_cz2010=use_cz2010) if weather_source is None: message = ( "Could not find weather normal source matching site {}". format(site)) weather_source_usaf_id = None else: message = "Using weather_source {}".format(weather_source) weather_source_usaf_id = weather_source.usaf_id else: message = "Using supplied weather_source" weather_source_usaf_id = weather_source.usaf_id output['weather_source_station'] = weather_source_usaf_id output['logs'].append(message) logger.debug(message) if weather_normal_source is None: use_cz2010 = (self.weather_normal_station_mapping == 'CZ2010') weather_normal_source = get_weather_normal_source( site, use_cz2010=use_cz2010) if weather_normal_source is None: message = ( "Could not find weather normal source matching site {}". format(site)) weather_normal_source_usaf_id = None else: message = ("Using weather_normal_source {}".format( weather_normal_source)) weather_normal_source_usaf_id = weather_normal_source.usaf_id else: message = "Using supplied weather_normal_source" weather_normal_source_usaf_id = weather_normal_source.usaf_id output['weather_normal_source_station'] = weather_normal_source_usaf_id output['logs'].append(message) logger.debug(message) # Step 3: Check to see if trace is placeholder. If so, # return with SUCCESS, empty derivatives. if trace.placeholder: message = ( 'Skipping modeling for placeholder trace {}'.format(trace)) logger.info(message) output['logs'].append(message) output['status'] = SUCCESS output['derivatives'] = [] return output # Step 4: Determine trace interpretation and frequency # TODO use trace interval here. And enforce upstream that interval use # pandas interval strings? trace_frequency = get_approximate_frequency(trace) if trace_frequency not in ['H', 'D', '15T', '30T']: trace_frequency = None selector = (trace.interpretation, trace_frequency) # Step 5: create formatter instance FormatterClass, formatter_kwargs = self._get_formatter( formatter, selector) if FormatterClass is None: message = ("Default formatter mapping did not find a match for the" " selector {}".format(selector)) output['status'] = FAILURE output['failure_message'] = message return output output["formatter_class"] = FormatterClass.__name__ output["formatter_kwargs"] = formatter_kwargs formatter_instance = FormatterClass(**formatter_kwargs) # Step 6: create model instance ModelClass, model_kwargs = self._get_model(model, selector) if ModelClass is None: message = ("Default model mapping did not find a match for the" " selector {}".format(selector)) output['status'] = FAILURE output['failure_message'] = message return output output["model_class"] = ModelClass.__name__ output["model_kwargs"] = model_kwargs # Step 7: validate modeling period set. Always fails for now, since # no models are yet fully structural change analysis aware if modeling_period_set is None: message = ( "Model is not structural-change capable, so `modeling_period`" " argument must be supplied.") output['status'] == FAILURE output['failure_message'] = message return output # Step 8: create split modeled energy trace model_mapping = { modeling_period_label: ModelClass(modeling_period_interpretation=modeling_period_label, **model_kwargs) for modeling_period_label, _ in modeling_period_set.iter_modeling_periods() } modeled_trace = SplitModeledEnergyTrace(trace, formatter_instance, model_mapping, modeling_period_set) modeled_trace.fit(weather_source) output["modeled_energy_trace"] = \ serialize_split_modeled_energy_trace(modeled_trace) # Step 9: for each modeling period group, create derivatives derivative_freq = 'D' if 'freq_str' in formatter_kwargs.keys() and \ formatter_kwargs['freq_str'] == 'H': derivative_freq = 'H' derivatives = [] for ((baseline_label, reporting_label), (baseline_period, reporting_period)) in \ modeling_period_set.iter_modeling_period_groups(): raw_derivatives = [] deriv_input = unpack(modeled_trace, baseline_label, reporting_label, baseline_period, reporting_period, weather_source, weather_normal_source, site, derivative_freq=derivative_freq) if deriv_input is None: continue raw_derivatives.extend([ hdd_balance_point_baseline(deriv_input), hdd_coefficient_baseline(deriv_input), cdd_balance_point_baseline(deriv_input), cdd_coefficient_baseline(deriv_input), intercept_baseline(deriv_input), hdd_balance_point_reporting(deriv_input), hdd_coefficient_reporting(deriv_input), cdd_balance_point_reporting(deriv_input), cdd_coefficient_reporting(deriv_input), intercept_reporting(deriv_input), cumulative_baseline_model_minus_reporting_model_normal_year( deriv_input), baseline_model_minus_reporting_model_normal_year(deriv_input), cumulative_baseline_model_normal_year(deriv_input), baseline_model_normal_year(deriv_input), cumulative_baseline_model_reporting_period(deriv_input), baseline_model_reporting_period(deriv_input), masked_baseline_model_reporting_period(deriv_input), cumulative_baseline_model_minus_observed_reporting_period( deriv_input), baseline_model_minus_observed_reporting_period(deriv_input), masked_baseline_model_minus_observed_reporting_period( deriv_input), baseline_model_baseline_period(deriv_input), cumulative_reporting_model_normal_year(deriv_input), reporting_model_normal_year(deriv_input), reporting_model_reporting_period(deriv_input), cumulative_observed_reporting_period(deriv_input), observed_reporting_period(deriv_input), masked_observed_reporting_period(deriv_input), cumulative_observed_baseline_period(deriv_input), observed_baseline_period(deriv_input), observed_project_period(deriv_input), temperature_baseline_period(deriv_input), temperature_reporting_period(deriv_input), masked_temperature_reporting_period(deriv_input), temperature_normal_year(deriv_input), baseline_mask(deriv_input), reporting_mask(deriv_input), reporting_period_resource_curve(deriv_input) ]) resource_curve_normal_year = normal_year_resource_curve( deriv_input) raw_derivatives.extend([resource_curve_normal_year]) if resource_curve_normal_year is not None: resource_curve_normal_year = pd.Series( resource_curve_normal_year['value'], index=pd.to_datetime( resource_curve_normal_year['orderable'])) raw_derivatives.extend([ normal_year_co2_avoided(deriv_input, resource_curve_normal_year) ]) derivatives += [ Derivative( (baseline_label, reporting_label), d['series'], reduce(lambda a, b: a + ' ' + b, d['description'].split()), d['orderable'], d['value'], d['variance'], ) for d in raw_derivatives if d is not None ] output["derivatives"] = serialize_derivatives(derivatives) output["status"] = SUCCESS return output