def create_caltrack_hourly_preliminary_design_matrix(meter_data, temperature_data): """A helper function which calls basic feature creation methods to create an input suitable for use in the first step of creating a CalTRACK hourly model. Parameters ---------- meter_data : :any:`pandas.DataFrame` Hourly meter data in eemeter format. temperature_data : :any:`pandas.Series` Hourly temperature data in eemeter format. Returns ------- design_matrix : :any:`pandas.DataFrame` A design matrix with meter_value, hour_of_week, hdd_50, and cdd_65 features. """ time_features = compute_time_features( meter_data.index, hour_of_week=True, hour_of_day=False, day_of_week=False ) temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[50], cooling_balance_points=[65], degree_day_method="hourly", ) design_matrix = merge_features( [meter_data.value.to_frame("meter_value"), temperature_features, time_features] ) return design_matrix
def create_caltrack_billing_design_matrix(meter_data, temperature_data): """A helper function which calls basic feature creation methods to create a design matrix suitable for use with CalTRACK Billing methods. Parameters ---------- meter_data : :any:`pandas.DataFrame` Hourly meter data in eemeter format. temperature_data : :any:`pandas.Series` Hourly temperature data in eemeter format. Returns ------- design_matrix : :any:`pandas.DataFrame` A design matrics with mean usage_per_day, hdd_30-hdd_90, and cdd_30-cdd_90 features. """ usage_per_day = compute_usage_per_day_feature(meter_data, series_name="meter_value") temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=range(30, 91), cooling_balance_points=range(30, 91), data_quality=True, tolerance=pd.Timedelta( "35D" ), # limit temperature data matching to periods of up to 35 days. ) design_matrix = merge_features([usage_per_day, temperature_features]) return design_matrix
def create_caltrack_daily_design_matrix(meter_data, temperature_data): usage_per_day = compute_usage_per_day_feature(meter_data, series_name="meter_value") temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=range(30, 91), cooling_balance_points=range(30, 91), data_quality=True, ) design_matrix = merge_features([usage_per_day, temperature_features]) return design_matrix
def occupancy_precursor(il_electricity_cdd_hdd_hourly): meter_data = il_electricity_cdd_hdd_hourly["meter_data"] temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"] time_features = compute_time_features(meter_data.index) temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[50], cooling_balance_points=[65], degree_day_method="hourly", ) return merge_features([ meter_data.value.to_frame("meter_value"), temperature_features, time_features ])
def create_caltrack_billing_design_matrix(meter_data, temperature_data): usage_per_day = compute_usage_per_day_feature(meter_data, series_name="meter_value") temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=range(30, 91), cooling_balance_points=range(30, 91), data_quality=True, tolerance=pd.Timedelta( "35D" ), # limit temperature data matching to periods of up to 35 days. ) design_matrix = merge_features([usage_per_day, temperature_features]) return design_matrix
def occupancy_precursor_only_nan(il_electricity_cdd_hdd_hourly): meter_data = il_electricity_cdd_hdd_hourly["meter_data"] meter_data = meter_data[datetime(2017, 1, 4):datetime(2017, 6, 1)] meter_data.iloc[-1] = np.nan # Simulates a segment where there is only a single nan value temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"] time_features = compute_time_features(meter_data.index) temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[50], cooling_balance_points=[65], degree_day_method="hourly", ) return merge_features([ meter_data.value.to_frame("meter_value"), temperature_features, time_features ])
def create_caltrack_hourly_preliminary_design_matrix(meter_data, temperature_data): time_features = compute_time_features(meter_data.index, hour_of_week=True, hour_of_day=False, day_of_week=False) temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[50], cooling_balance_points=[65], degree_day_method="hourly", ) design_matrix = merge_features([ meter_data.value.to_frame("meter_value"), temperature_features, time_features ]) return design_matrix
def test_merge_features(): index = pd.date_range("2017-01-01", periods=100, freq="H", tz="UTC") features = merge_features([ pd.Series(1, index=index, name="a"), pd.DataFrame({"b": 2}, index=index), pd.DataFrame({ "c": 3, "d": 4 }, index=index), ]) assert list(features.columns) == ["a", "b", "c", "d"] assert features.shape == (100, 4) assert features.sum().sum() == 1000 assert features.a.sum() == 100 assert features.b.sum() == 200 assert features.c.sum() == 300 assert features.d.sum() == 400 assert features.index[0] == index[0] assert features.index[-1] == index[-1]
def test_merge_features_empty_raises(): with pytest.raises(ValueError): features = merge_features([])