def test_has_trace_id_and_interval(): et = EnergyTrace(interpretation='ELECTRICITY_CONSUMPTION_SUPPLIED', placeholder=True) assert et.trace_id is None assert et.interval is None et = EnergyTrace(interpretation='ELECTRICITY_CONSUMPTION_SUPPLIED', placeholder=True, trace_id='ABC', interval='daily') assert et.trace_id == 'ABC' assert et.interval == 'daily' assert 'ABC' in str(et)
def test_placeholder_valid(interpretation): et = EnergyTrace(interpretation=interpretation, placeholder=True) assert et.interpretation == interpretation assert et.data is None assert et.unit is None assert et.placeholder
def test_serializer(interpretation, records, unit, serializer): et = EnergyTrace(interpretation=interpretation, records=records, unit=unit, serializer=serializer) assert et.data.value.iloc[0] == records[0]['value'] assert not et.data.estimated.iloc[0]
def eemeter_consumption_data(self): records = [r.eemeter_record() for r in self.records.all()] interpretation = dict(INTERPRETATION_CHOICES)[self.interpretation] unit_name = dict(UNIT_CHOICES)[self.unit] return EnergyTrace(interpretation, records=records, unit=unit_name, serializer=ArbitraryStartSerializer())
def trace2(): data = {"value": [np.nan], "estimated": [True]} columns = ["value", "estimated"] index = [ datetime(2011, 1, 1, tzinfo=pytz.UTC), ] df = pd.DataFrame(data, index=index, columns=columns) return EnergyTrace("ELECTRICITY_CONSUMPTION_SUPPLIED", df, unit="KWH")
def test_non_timeseries_data(interpretation, unit): data = {"value": [1, np.nan], "estimated": [False, False]} columns = ["value", "estimated"] df = pd.DataFrame(data, columns=columns) with pytest.raises(ValueError): EnergyTrace(interpretation=interpretation, data=df, unit=unit)
def trace(): data = { "value": np.tile(1, (365, )), "estimated": np.tile(False, (365, )), } columns = ["value", "estimated"] index = pd.date_range('2000-01-01', periods=365, freq='D', tz=pytz.UTC) df = pd.DataFrame(data, index=index, columns=columns) return EnergyTrace("ELECTRICITY_CONSUMPTION_SUPPLIED", df, unit="KWH")
def trace1(): columns = {"value": [1, np.nan], "estimated": [False, False]} column_names = ["value", "estimated"] index = pd.date_range('2000-01-01', periods=2, freq='D') data = pd.DataFrame(columns, index=index, columns=column_names) return EnergyTrace("ELECTRICITY_CONSUMPTION_SUPPLIED", data=data, unit="KWH")
def test_bad_column_name_data(interpretation, unit): data = {"energy": [1, np.nan], "estimated": [False, False]} columns = ["energy", "estimated"] index = pd.date_range('2000-01-01', periods=2, freq='D') df = pd.DataFrame(data, index=index, columns=columns) with pytest.raises(ValueError): EnergyTrace(interpretation=interpretation, data=df, unit=unit)
def fifteen_min_trace(): trace_length = 9600 data = { "value": [1 for _ in range(trace_length)], "estimated": [False for _ in range(trace_length)] } columns = ["value", "estimated"] index = pd.date_range(start=datetime(2011, 1, 1, tzinfo=pytz.UTC), periods=trace_length, freq='15T', tz=pytz.UTC) df = pd.DataFrame(data, index=index, columns=columns) return EnergyTrace("ELECTRICITY_CONSUMPTION_SUPPLIED", df, unit="KWH")
def trace(): index = pd.date_range('6/6/2012','6/6/2013',freq='M', tz=pytz.UTC) data = pd.DataFrame( { "value": [1,] * 12, "estimated": [False,] * 12 }, index=index, columns=['value', 'estimated']) return EnergyTrace( interpretation="NATURAL_GAS_CONSUMPTION_SUPPLIED", unit="THERM", data=data)
def trace_set(): columns = { "value": [1, 1, 1, 1, np.nan], "estimated": [False, False, False, False, False] } column_names = ["value", "estimated"] index = pd.date_range('2000-01-01', periods=5, freq='D') data = pd.DataFrame(columns, index=index, columns=column_names) trace = EnergyTrace("ELECTRICITY_ON_SITE_GENERATION_UNCONSUMED", data=data, unit="KWH") return EnergyTraceSet([trace], ["trace"])
def build_trace(trace_records): if trace_records[0]['interpretation'] == 'gas': unit = "THM" interpretation = "NATURAL_GAS_CONSUMPTION_SUPPLIED" else: unit = "KWH" interpretation = "ELECTRICITY_CONSUMPTION_SUPPLIED" trace_object = EnergyTrace(records=trace_records, unit=unit, interpretation=interpretation, serializer=ArbitraryStartSerializer(), trace_id=trace_records[0]['project_id']) return trace_object
def trace(): index = pd.DatetimeIndex( ["2012-06-06", "2012-07-06", "2012-08-06", "2012-09-06"], dtype='datetime64[ns, UTC]', freq=None) data = pd.DataFrame( { "value": [1, 1, 1, np.nan], "estimated": [False, False, False, False] }, index=index, columns=['value', 'estimated']) return EnergyTrace( interpretation="NATURAL_GAS_CONSUMPTION_SUPPLIED", unit="THERM", data=data)
def test_data_and_valid_unit( interpretation, unnormalized_unit_with_target_unit, unit_timeseries): unnormalized_unit, normalized_unit, mult = \ unnormalized_unit_with_target_unit et = EnergyTrace(interpretation=interpretation, data=unit_timeseries, unit=unnormalized_unit) assert et.interpretation == interpretation assert et.unit == normalized_unit np.testing.assert_allclose( et.data.value.iloc[0], (unit_timeseries.value * mult).iloc[0], rtol=1e-3, atol=1e-3) assert not et.data.estimated.iloc[0] assert not et.placeholder
def get_energy_traces(self, service_kind_default="electricity"): ''' Retrieve all energy trace records stored as IntervalReading elements in the given ESPI Energy Usage XML. Energy records are grouped by interpretation and returned in EnergyTrace objects. Parameters ---------- service_kind_default : str Default fuel type to use in parser if ReadingType/commodity field is missing. Yields ------ energy_trace : eemeter.structures.EnergyTrace Energy data traces as described in the xml file. ''' INTERPRETATION_MAPPING = { ("electricity", "forward"): "ELECTRICITY_CONSUMPTION_SUPPLIED", ("natural_gas", "forward"): "NATURAL_GAS_CONSUMPTION_SUPPLIED", ("electricity", "reverse"): "ELECTRICITY_ON_SITE_GENERATION_UNCONSUMED", ("electriicty", "net"): "ELECTRICITY_CONSUMPTION_NET", } # Get all consumption records, group by fuel type. for flow_direction, records in self._get_consumption_record_groups(): if len(records) > 0: fuel_type_records = defaultdict(list) for record in records: fuel_type_records[record["fuel_type"]].append(record) # Wrap records in EnergyTrace objects, by fuel type. for fuel_type, records in fuel_type_records.items(): if fuel_type is None: fuel_type = service_kind_default selector = (fuel_type, flow_direction) interpretation = INTERPRETATION_MAPPING[selector] yield EnergyTrace(interpretation, records=records, unit=records[0]["unit_name"], serializer=ArbitrarySerializer())
def billing_trace(): data = { "value": [1, 1, 1, 1, np.nan] + [ 1, ] * 13, "estimated": [False, False, True, False, False] + [ False, ] * 13 } columns = ["value", "estimated"] index = [ datetime(2011, 1, 1, tzinfo=pytz.UTC), datetime(2011, 2, 1, tzinfo=pytz.UTC), datetime(2011, 3, 2, tzinfo=pytz.UTC), datetime(2011, 4, 3, tzinfo=pytz.UTC), datetime(2011, 4, 29, tzinfo=pytz.UTC), ] + [ datetime(2011, 6, 1, tzinfo=pytz.UTC) + timedelta(days=30 * i) for i in range(13) ] df = pd.DataFrame(data, index=index, columns=columns) return EnergyTrace("ELECTRICITY_CONSUMPTION_SUPPLIED", df, unit="KWH")
def _deserialize_single_trace(trace): # verify type type_ = trace.get('type', None) if type_ is None: return {'error': 'Serialization "type" not given for trace.'} # check for "interpretation" key interpretation = trace.get('interpretation', None) if interpretation is None: return { 'error': ('Trace serializations must provide key "interpretation".') } # check for "unit" key unit = trace.get('unit', None) if unit is None: return {'error': ('Trace serializations must provide key "unit".')} # check for "records" key records = trace.get('records', None) if records is None: return {'error': ('Trace serializations must provide key "records".')} # check for optional "trace_id" key trace_id = trace.get('trace_id', None) # check for optional "interval" key interval = trace.get('interval', None) # switch on type if type_ == 'ARBITRARY': return { "trace": EnergyTrace( interpretation=interpretation, unit=unit, records=records, serializer=ArbitrarySerializer(parse_dates=True), trace_id=trace_id, interval=interval, ) } elif type_ == 'ARBITRARY_START': return { "trace": EnergyTrace( interpretation=interpretation, unit=unit, records=records, serializer=ArbitraryStartSerializer(parse_dates=True), trace_id=trace_id, interval=interval, ) } elif type_ == 'ARBITRARY_END': return { "trace": EnergyTrace( interpretation=interpretation, unit=unit, records=records, serializer=ArbitraryEndSerializer(parse_dates=True), trace_id=trace_id, interval=interval, ) } else: return { 'error': ('Serialization type "{}" not recognized for trace.'.format(type_)) }
def test_no_data_no_placeholder(interpretation): with pytest.raises(ValueError): EnergyTrace(interpretation=interpretation)
def test_repr(interpretation): et = EnergyTrace(interpretation=interpretation, placeholder=True) assert 'EnergyTrace' in str(et)
def placeholder_trace_set(): trace = EnergyTrace("ELECTRICITY_ON_SITE_GENERATION_UNCONSUMED", placeholder=True) return EnergyTraceSet([trace], ["trace"])
def test_data_and_placeholder(interpretation): with pytest.raises(ValueError): EnergyTrace(interpretation=interpretation, data=pd.DataFrame(), placeholder=True)
def test_invalid_interpretation(): with pytest.raises(ValueError): EnergyTrace(interpretation="INVALID", placeholder=True)
def energy_trace_set(daily_data): energy_trace_set = EnergyTraceSet([ EnergyTrace('ELECTRICITY_CONSUMPTION_SUPPLIED', data=daily_data, unit='kWh'), ]) return energy_trace_set
def fit(self, weather_source): ''' Fit all models associated with this trace. Parameters ---------- weather_source : eemeter.weather.ISDWeatherSource Weather source to use in creating covariate data. ''' for modeling_period_label, modeling_period in \ self.modeling_period_set.iter_modeling_periods(): filtered_data = self._filter_by_modeling_period( self.trace, modeling_period) filtered_trace = EnergyTrace( self.trace.interpretation, data=filtered_data, unit=self.trace.unit) model = self.model_mapping[modeling_period_label] try: input_data = self.formatter.create_input( filtered_trace, weather_source) except: logger.warn( 'For trace "{}" and modeling_period "{}", was not' ' able to format input data for {}.' .format(self.trace.interpretation, modeling_period_label, model) ) self.fit_outputs[modeling_period_label] = { "status": "FAILURE", "traceback": traceback.format_exc(), "start_date": None, "end_date": None, "rows": None, } continue else: input_description = self.formatter.describe_input(input_data) outputs = { "start_date": input_description.get('start_date'), "end_date": input_description.get('end_date'), "n_rows": input_description.get('n_rows'), } try: outputs.update(model.fit(input_data)) except: logger.warn( 'For trace "{}" and modeling_period "{}", {} was not' ' able to fit using input data: {}' .format(self.trace.interpretation, modeling_period_label, model, input_data) ) outputs.update({ "status": "FAILURE", "traceback": traceback.format_exc(), }) else: logger.info( 'Successfully fitted {} to formatted input data for' ' trace "{}" and modeling_period "{}".' .format(model, self.trace.interpretation, modeling_period_label) ) outputs.update({"status": "SUCCESS"}) self.fit_outputs[modeling_period_label] = outputs return self.fit_outputs
def fit(self, weather_source): ''' Fit all models associated with this trace. Parameters ---------- weather_source : eemeter.weather.ISDWeatherSource Weather source to use in creating covariate data. ''' for modeling_period_label, modeling_period in \ self.modeling_period_set.iter_modeling_periods(): filtered_data = self._filter_by_modeling_period( self.trace, modeling_period) filtered_trace = EnergyTrace(self.trace.interpretation, data=filtered_data, unit=self.trace.unit) model = self.model_mapping[modeling_period_label] outputs = { "status": None, "traceback": None, "input_data": None, "start_date": None, "end_date": None, "n_rows": None, "model_fit": {}, } # fail with DataSufficiencyException if bad weather source if weather_source is None: message = ( 'No weather source found for trace {} in {} period'.format( self.trace.trace_id, modeling_period_label)) logger.warn(message) try: raise model_exceptions.DataSufficiencyException(message) except: outputs.update({ "status": "FAILURE", "traceback": traceback.format_exc(), }) self.fit_outputs[modeling_period_label] = outputs continue # attempt to create input data try: input_data = self.formatter.create_input( filtered_trace, weather_source) except: logger.warn( 'Input data formatting failed for trace {} in {} period.'. format(self.trace.trace_id, modeling_period_label)) outputs.update({ "status": "FAILURE", "traceback": traceback.format_exc(), }) else: input_description = self.formatter.describe_input(input_data) input_serialization = self.formatter.serialize_input( input_data) input_mask = self.formatter.get_input_data_mask(input_data) outputs.update({ "input_data_serialization": input_serialization, "input_mask": input_mask, # missing days "start_date": input_description.get('start_date'), "end_date": input_description.get('end_date'), "n_rows": input_description.get('n_rows'), "trace": filtered_trace, }) try: model_fit = model.fit(input_data) except: tb = traceback.format_exc() logger.warn( '{} fit failed for trace {} in {} period.'.format( model, self.trace.trace_id, modeling_period_label)) outputs.update({ "status": "FAILURE", "traceback": tb, }) else: logger.debug( '{} fit successful for trace {} in {} period.'.format( model, self.trace.trace_id, modeling_period_label)) outputs["model_fit"].update(model_fit) outputs.update({ "status": "SUCCESS", }) self.fit_outputs[modeling_period_label] = outputs return self.fit_outputs
def test_valid_interpretation(valid_interpretation): et = EnergyTrace(interpretation=valid_interpretation, placeholder=True) assert et.interpretation == valid_interpretation
def hourly_trace(): data = {"value": [1, 1, np.nan], "estimated": [False, False, False]} columns = ["value", "estimated"] index = pd.date_range('2000-01-01', periods=3, freq='H', tz=pytz.UTC) df = pd.DataFrame(data, index=index, columns=columns) return EnergyTrace("ELECTRICITY_CONSUMPTION_SUPPLIED", df, unit="KWH")
def test_data_but_no_unit(interpretation): with pytest.raises(ValueError): EnergyTrace(interpretation=interpretation, data=pd.DataFrame())
def test_data_but_invalid_unit(interpretation): with pytest.raises(ValueError): EnergyTrace(interpretation=interpretation, data=pd.DataFrame(), unit="INVALID")