コード例 #1
0
def test_compute_time_features_none():
    index = pd.date_range("2017-01-01", periods=168, freq="H", tz="UTC")
    with pytest.raises(ValueError):
        compute_time_features(index,
                              hour_of_week=False,
                              day_of_week=False,
                              hour_of_day=False)
コード例 #2
0
def create_caltrack_hourly_preliminary_design_matrix(meter_data, temperature_data):
    """A helper function which calls basic feature creation methods to create an
    input suitable for use in the first step of creating a CalTRACK hourly model.

    Parameters
    ----------
    meter_data : :any:`pandas.DataFrame`
        Hourly meter data in eemeter format.
    temperature_data : :any:`pandas.Series`
        Hourly temperature data in eemeter format.

    Returns
    -------
    design_matrix : :any:`pandas.DataFrame`
        A design matrix with meter_value, hour_of_week, hdd_50, and cdd_65 features.
    """
    time_features = compute_time_features(
        meter_data.index, hour_of_week=True, hour_of_day=False, day_of_week=False
    )
    temperature_features = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=[50],
        cooling_balance_points=[65],
        degree_day_method="hourly",
    )
    design_matrix = merge_features(
        [meter_data.value.to_frame("meter_value"), temperature_features, time_features]
    )
    return design_matrix
コード例 #3
0
def test_compute_occupancy_feature(even_occupancy):
    index = pd.date_range("2017-01-01", periods=1000, freq="H", tz="UTC")
    time_features = compute_time_features(index, hour_of_week=True)
    hour_of_week = time_features.hour_of_week
    occupancy = compute_occupancy_feature(hour_of_week, even_occupancy)
    assert occupancy.name == "occupancy"
    assert occupancy.shape == (1000, )
    assert occupancy.sum().sum() == 500
コード例 #4
0
def test_compute_occupancy_feature_hour_of_week_has_nan(even_occupancy):
    index = pd.date_range("2017-01-01", periods=72, freq="H", tz="UTC")
    time_features = compute_time_features(index, hour_of_week=True)
    hour_of_week = time_features.hour_of_week
    hour_of_week.iloc[-1] = np.nan
    occupancy = compute_occupancy_feature(hour_of_week, even_occupancy)
    assert occupancy.name == "occupancy"
    assert occupancy.shape == (72, )
    assert occupancy.sum() == 36
コード例 #5
0
def segmented_data():
    index = pd.date_range(start="2017-01-01", periods=24, freq="H", tz="UTC")
    time_features = compute_time_features(index)
    segmented_data = pd.DataFrame(
        {
            "hour_of_week": time_features.hour_of_week,
            "temperature_mean": np.linspace(0, 100, 24),
            "meter_value": np.linspace(10, 70, 24),
            "weight": np.ones((24, )),
        },
        index=index,
    )
    return segmented_data
コード例 #6
0
def test_compute_occupancy_feature_with_nans(even_occupancy):
    """If there are less than 168 periods, the NaN at the end causes problems"""
    index = pd.date_range("2017-01-01", periods=100, freq="H", tz="UTC")
    time_features = compute_time_features(index, hour_of_week=True)
    hour_of_week = time_features.hour_of_week
    hour_of_week[-1] = np.nan
    #  comment out line below to see the error from not dropping na when
    # calculationg _add_weights when there are less than 168 periods.

    # TODO (ssuffian): Refactor so get_missing_hours_warnings propogates.
    # right now, it will error if the dropna below isn't used.
    hour_of_week.dropna(inplace=True)
    occupancy = compute_occupancy_feature(hour_of_week, even_occupancy)
コード例 #7
0
def occupancy_precursor(il_electricity_cdd_hdd_hourly):
    meter_data = il_electricity_cdd_hdd_hourly["meter_data"]
    temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"]
    time_features = compute_time_features(meter_data.index)
    temperature_features = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=[50],
        cooling_balance_points=[65],
        degree_day_method="hourly",
    )
    return merge_features([
        meter_data.value.to_frame("meter_value"), temperature_features,
        time_features
    ])
コード例 #8
0
def test_compute_time_features_all():
    index = pd.date_range("2017-01-01", periods=168, freq="H", tz="UTC")
    features = compute_time_features(index)
    assert list(
        features.columns) == ["day_of_week", "hour_of_day", "hour_of_week"]
    assert features.shape == (168, 3)
    assert features.sum().sum() == 16464.0
    with pytest.raises(TypeError):  # categoricals
        features.day_of_week.sum()
    with pytest.raises(TypeError):
        features.hour_of_day.sum()
    with pytest.raises(TypeError):
        features.hour_of_week.sum()
    assert features.day_of_week.astype("float").sum() == sum(range(7)) * 24
    assert features.hour_of_day.astype("float").sum() == sum(range(24)) * 7
    assert features.hour_of_week.astype("float").sum() == sum(range(168))
    assert features.index[0] == index[0]
    assert features.index[-1] == index[-1]
コード例 #9
0
def occupancy_precursor_only_nan(il_electricity_cdd_hdd_hourly):
    meter_data = il_electricity_cdd_hdd_hourly["meter_data"]
    meter_data = meter_data[datetime(2017, 1, 4):datetime(2017, 6, 1)]
    meter_data.iloc[-1] = np.nan
    # Simulates a segment where there is only a single nan value
    temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"]
    time_features = compute_time_features(meter_data.index)
    temperature_features = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=[50],
        cooling_balance_points=[65],
        degree_day_method="hourly",
    )
    return merge_features([
        meter_data.value.to_frame("meter_value"), temperature_features,
        time_features
    ])
コード例 #10
0
def create_caltrack_hourly_preliminary_design_matrix(meter_data,
                                                     temperature_data):
    time_features = compute_time_features(meter_data.index,
                                          hour_of_week=True,
                                          hour_of_day=False,
                                          day_of_week=False)
    temperature_features = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=[50],
        cooling_balance_points=[65],
        degree_day_method="hourly",
    )
    design_matrix = merge_features([
        meter_data.value.to_frame("meter_value"), temperature_features,
        time_features
    ])
    return design_matrix
コード例 #11
0
def test_compute_time_features_bad_freq():
    index = pd.date_range("2017-01-01", periods=168, freq="D", tz="UTC")
    with pytest.raises(ValueError):
        compute_time_features(index)
コード例 #12
0
def partial_hour_of_week_feature():
    index = pd.date_range("2017-01-01", periods=84, freq="H", tz="UTC")
    time_features = compute_time_features(index, hour_of_week=True)
    hour_of_week_feature = time_features.hour_of_week
    return hour_of_week_feature