def test_compute_time_features_none(): index = pd.date_range("2017-01-01", periods=168, freq="H", tz="UTC") with pytest.raises(ValueError): compute_time_features(index, hour_of_week=False, day_of_week=False, hour_of_day=False)
def create_caltrack_hourly_preliminary_design_matrix(meter_data, temperature_data): """A helper function which calls basic feature creation methods to create an input suitable for use in the first step of creating a CalTRACK hourly model. Parameters ---------- meter_data : :any:`pandas.DataFrame` Hourly meter data in eemeter format. temperature_data : :any:`pandas.Series` Hourly temperature data in eemeter format. Returns ------- design_matrix : :any:`pandas.DataFrame` A design matrix with meter_value, hour_of_week, hdd_50, and cdd_65 features. """ time_features = compute_time_features( meter_data.index, hour_of_week=True, hour_of_day=False, day_of_week=False ) temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[50], cooling_balance_points=[65], degree_day_method="hourly", ) design_matrix = merge_features( [meter_data.value.to_frame("meter_value"), temperature_features, time_features] ) return design_matrix
def test_compute_occupancy_feature(even_occupancy): index = pd.date_range("2017-01-01", periods=1000, freq="H", tz="UTC") time_features = compute_time_features(index, hour_of_week=True) hour_of_week = time_features.hour_of_week occupancy = compute_occupancy_feature(hour_of_week, even_occupancy) assert occupancy.name == "occupancy" assert occupancy.shape == (1000, ) assert occupancy.sum().sum() == 500
def test_compute_occupancy_feature_hour_of_week_has_nan(even_occupancy): index = pd.date_range("2017-01-01", periods=72, freq="H", tz="UTC") time_features = compute_time_features(index, hour_of_week=True) hour_of_week = time_features.hour_of_week hour_of_week.iloc[-1] = np.nan occupancy = compute_occupancy_feature(hour_of_week, even_occupancy) assert occupancy.name == "occupancy" assert occupancy.shape == (72, ) assert occupancy.sum() == 36
def segmented_data(): index = pd.date_range(start="2017-01-01", periods=24, freq="H", tz="UTC") time_features = compute_time_features(index) segmented_data = pd.DataFrame( { "hour_of_week": time_features.hour_of_week, "temperature_mean": np.linspace(0, 100, 24), "meter_value": np.linspace(10, 70, 24), "weight": np.ones((24, )), }, index=index, ) return segmented_data
def test_compute_occupancy_feature_with_nans(even_occupancy): """If there are less than 168 periods, the NaN at the end causes problems""" index = pd.date_range("2017-01-01", periods=100, freq="H", tz="UTC") time_features = compute_time_features(index, hour_of_week=True) hour_of_week = time_features.hour_of_week hour_of_week[-1] = np.nan # comment out line below to see the error from not dropping na when # calculationg _add_weights when there are less than 168 periods. # TODO (ssuffian): Refactor so get_missing_hours_warnings propogates. # right now, it will error if the dropna below isn't used. hour_of_week.dropna(inplace=True) occupancy = compute_occupancy_feature(hour_of_week, even_occupancy)
def occupancy_precursor(il_electricity_cdd_hdd_hourly): meter_data = il_electricity_cdd_hdd_hourly["meter_data"] temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"] time_features = compute_time_features(meter_data.index) temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[50], cooling_balance_points=[65], degree_day_method="hourly", ) return merge_features([ meter_data.value.to_frame("meter_value"), temperature_features, time_features ])
def test_compute_time_features_all(): index = pd.date_range("2017-01-01", periods=168, freq="H", tz="UTC") features = compute_time_features(index) assert list( features.columns) == ["day_of_week", "hour_of_day", "hour_of_week"] assert features.shape == (168, 3) assert features.sum().sum() == 16464.0 with pytest.raises(TypeError): # categoricals features.day_of_week.sum() with pytest.raises(TypeError): features.hour_of_day.sum() with pytest.raises(TypeError): features.hour_of_week.sum() assert features.day_of_week.astype("float").sum() == sum(range(7)) * 24 assert features.hour_of_day.astype("float").sum() == sum(range(24)) * 7 assert features.hour_of_week.astype("float").sum() == sum(range(168)) assert features.index[0] == index[0] assert features.index[-1] == index[-1]
def occupancy_precursor_only_nan(il_electricity_cdd_hdd_hourly): meter_data = il_electricity_cdd_hdd_hourly["meter_data"] meter_data = meter_data[datetime(2017, 1, 4):datetime(2017, 6, 1)] meter_data.iloc[-1] = np.nan # Simulates a segment where there is only a single nan value temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"] time_features = compute_time_features(meter_data.index) temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[50], cooling_balance_points=[65], degree_day_method="hourly", ) return merge_features([ meter_data.value.to_frame("meter_value"), temperature_features, time_features ])
def create_caltrack_hourly_preliminary_design_matrix(meter_data, temperature_data): time_features = compute_time_features(meter_data.index, hour_of_week=True, hour_of_day=False, day_of_week=False) temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[50], cooling_balance_points=[65], degree_day_method="hourly", ) design_matrix = merge_features([ meter_data.value.to_frame("meter_value"), temperature_features, time_features ]) return design_matrix
def test_compute_time_features_bad_freq(): index = pd.date_range("2017-01-01", periods=168, freq="D", tz="UTC") with pytest.raises(ValueError): compute_time_features(index)
def partial_hour_of_week_feature(): index = pd.date_range("2017-01-01", periods=84, freq="H", tz="UTC") time_features = compute_time_features(index, hour_of_week=True) hour_of_week_feature = time_features.hour_of_week return hour_of_week_feature