예제 #1
0
def test_compute_temperature_features_no_meter_data_tz(
        il_electricity_cdd_hdd_billing_monthly):
    meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"]
    temperature_data = il_electricity_cdd_hdd_billing_monthly[
        "temperature_data"]
    meter_data.index = meter_data.index.tz_localize(None)
    with pytest.raises(ValueError):
        compute_temperature_features(meter_data.index, temperature_data)
예제 #2
0
def test_compute_temperature_features_no_temp_data_tz(
        il_electricity_cdd_hdd_billing_monthly):
    # pick a slice with both hdd and cdd
    meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"]
    temperature_data = il_electricity_cdd_hdd_billing_monthly[
        "temperature_data"]
    temperature_data = temperature_data.tz_localize(None)
    with pytest.raises(ValueError):
        compute_temperature_features(meter_data.index, temperature_data)
예제 #3
0
def test_compute_temperature_features_no_freq_index(
        il_electricity_cdd_hdd_billing_monthly):
    # pick a slice with both hdd and cdd
    meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"]
    temperature_data = il_electricity_cdd_hdd_billing_monthly[
        "temperature_data"]
    temperature_data.index.freq = None
    with pytest.raises(ValueError):
        compute_temperature_features(meter_data.index, temperature_data)
예제 #4
0
def test_compute_temperature_features_daily_bad_degree_days(
        il_electricity_cdd_hdd_daily):
    meter_data = il_electricity_cdd_hdd_daily["meter_data"]
    temperature_data = il_electricity_cdd_hdd_daily["temperature_data"]
    with pytest.raises(ValueError):
        compute_temperature_features(
            meter_data.index,
            temperature_data,
            heating_balance_points=[60, 61],
            cooling_balance_points=[65, 66],
            degree_day_method="UNKNOWN",
        )
예제 #5
0
def test_compute_temperature_features_with_duplicated_index(
        il_electricity_cdd_hdd_billing_monthly):
    meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"]
    temperature_data = il_electricity_cdd_hdd_billing_monthly[
        "temperature_data"]

    # these are specifically formed to give a less readable error if
    # duplicates are not caught
    meter_data = meter_data.append(meter_data).sort_index()
    temperature_data = temperature_data.iloc[8000:]

    with pytest.raises(ValueError) as excinfo:
        compute_temperature_features(meter_data.index, temperature_data)
    assert str(excinfo.value) == "Duplicates found in input meter trace index."
예제 #6
0
def test_compute_temperature_features_hourly_bad_degree_days(
        il_electricity_cdd_hdd_hourly):
    # pick a slice with both hdd and cdd
    meter_data = il_electricity_cdd_hdd_hourly["meter_data"][
        "2016-03-01":"2016-07-01"]
    temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"][
        "2016-03-01":"2016-07-01"]

    with pytest.raises(ValueError):
        compute_temperature_features(
            meter_data.index,
            temperature_data,
            heating_balance_points=[60, 61],
            cooling_balance_points=[65, 66],
            degree_day_method="UNKNOWN",
        )
예제 #7
0
def test_compute_temperature_features_billing_bimonthly_hourly_degree_days(
        il_electricity_cdd_hdd_billing_bimonthly):
    meter_data = il_electricity_cdd_hdd_billing_bimonthly["meter_data"]
    temperature_data = il_electricity_cdd_hdd_billing_bimonthly[
        "temperature_data"]
    df = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=[60, 61],
        cooling_balance_points=[65, 66],
        temperature_mean=False,
        degree_day_method="hourly",
    )
    assert df.shape == (14, 6)
    assert list(sorted(df.columns)) == [
        "cdd_65",
        "cdd_66",
        "hdd_60",
        "hdd_61",
        "n_hours_dropped",
        "n_hours_kept",
    ]
    assert round(df.hdd_60.mean(), 2) == 13.08
    assert round(df.hdd_61.mean(), 2) == 13.69
    assert round(df.cdd_65.mean(), 2) == 3.78
    assert round(df.cdd_66.mean(), 2) == 3.46
    assert round(df.n_hours_kept.mean(), 2) == 1386.93
    assert round(df.n_hours_dropped.mean(), 2) == 0
예제 #8
0
def test_compute_temperature_features_billing_monthly_hourly_degree_days_use_mean_false(
        il_electricity_cdd_hdd_billing_monthly):
    meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"]
    temperature_data = il_electricity_cdd_hdd_billing_monthly[
        "temperature_data"]
    df = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=[60, 61],
        cooling_balance_points=[65, 66],
        temperature_mean=False,
        degree_day_method="hourly",
        use_mean_daily_values=False,
    )
    assert df.shape == (27, 6)
    assert list(sorted(df.columns)) == [
        "cdd_65",
        "cdd_66",
        "hdd_60",
        "hdd_61",
        "n_hours_dropped",
        "n_hours_kept",
    ]
    assert round(df.hdd_60.mean(), 2) == 343.01
    assert round(df.hdd_61.mean(), 2) == 360.19
    assert round(df.cdd_65.mean(), 2) == 121.29
    assert round(df.cdd_66.mean(), 2) == 110.83
    assert round(df.n_hours_kept.mean(), 2) == 719.15
    assert round(df.n_hours_dropped.mean(), 2) == 0
예제 #9
0
def test_compute_temperature_features_billing_monthly_daily_degree_days(
        il_electricity_cdd_hdd_billing_monthly):
    meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"]
    temperature_data = il_electricity_cdd_hdd_billing_monthly[
        "temperature_data"]
    df = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=[60, 61],
        cooling_balance_points=[65, 66],
        temperature_mean=False,
        degree_day_method="daily",
    )
    assert df.shape == (27, 6)
    assert list(sorted(df.columns)) == [
        "cdd_65",
        "cdd_66",
        "hdd_60",
        "hdd_61",
        "n_days_dropped",
        "n_days_kept",
    ]
    assert round(df.hdd_60.mean(), 2) == 11.42
    assert round(df.hdd_61.mean(), 2) == 12.0
    assert round(df.cdd_65.mean(), 2) == 3.54
    assert round(df.cdd_66.mean(), 2) == 3.19
    assert round(df.n_days_kept.mean(), 2) == 29.96
    assert round(df.n_days_dropped.mean(), 2) == 0.04
예제 #10
0
def create_caltrack_billing_design_matrix(meter_data, temperature_data):
    """A helper function which calls basic feature creation methods to create a
    design matrix suitable for use with CalTRACK Billing methods.

    Parameters
    ----------
    meter_data : :any:`pandas.DataFrame`
        Hourly meter data in eemeter format.
    temperature_data : :any:`pandas.Series`
        Hourly temperature data in eemeter format.

    Returns
    -------
    design_matrix : :any:`pandas.DataFrame`
        A design matrics with mean usage_per_day, hdd_30-hdd_90, and cdd_30-cdd_90
        features.
    """
    usage_per_day = compute_usage_per_day_feature(meter_data, series_name="meter_value")
    temperature_features = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=range(30, 91),
        cooling_balance_points=range(30, 91),
        data_quality=True,
        tolerance=pd.Timedelta(
            "35D"
        ),  # limit temperature data matching to periods of up to 35 days.
    )
    design_matrix = merge_features([usage_per_day, temperature_features])
    return design_matrix
예제 #11
0
def create_caltrack_hourly_preliminary_design_matrix(meter_data, temperature_data):
    """A helper function which calls basic feature creation methods to create an
    input suitable for use in the first step of creating a CalTRACK hourly model.

    Parameters
    ----------
    meter_data : :any:`pandas.DataFrame`
        Hourly meter data in eemeter format.
    temperature_data : :any:`pandas.Series`
        Hourly temperature data in eemeter format.

    Returns
    -------
    design_matrix : :any:`pandas.DataFrame`
        A design matrix with meter_value, hour_of_week, hdd_50, and cdd_65 features.
    """
    time_features = compute_time_features(
        meter_data.index, hour_of_week=True, hour_of_day=False, day_of_week=False
    )
    temperature_features = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=[50],
        cooling_balance_points=[65],
        degree_day_method="hourly",
    )
    design_matrix = merge_features(
        [meter_data.value.to_frame("meter_value"), temperature_features, time_features]
    )
    return design_matrix
예제 #12
0
def test_compute_temperature_features_hourly_hourly_degree_days(
        il_electricity_cdd_hdd_hourly, snapshot):
    # pick a slice with both hdd and cdd
    meter_data = il_electricity_cdd_hdd_hourly["meter_data"][
        "2016-03-01":"2016-07-01"]
    temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"][
        "2016-03-01":"2016-07-01"]
    df = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=[60, 61],
        cooling_balance_points=[65, 66],
        temperature_mean=False,
        degree_day_method="hourly",
    )
    assert list(sorted(df.columns)) == [
        "cdd_65",
        "cdd_66",
        "hdd_60",
        "hdd_61",
        "n_hours_dropped",
        "n_hours_kept",
    ]
    assert df.shape == (2952, 6)
    snapshot.assert_match(
        [
            round(df.hdd_60.mean(), 2),
            round(df.hdd_61.mean(), 2),
            round(df.cdd_65.mean(), 2),
            round(df.cdd_66.mean(), 2),
            round(df.n_hours_kept.mean(), 2),
            round(df.n_hours_dropped.mean(), 2),
        ],
        "values",
    )
예제 #13
0
def test_compute_temperature_features_billing_bimonthly_daily_degree_days(
        il_electricity_cdd_hdd_billing_bimonthly, snapshot):
    meter_data = il_electricity_cdd_hdd_billing_bimonthly["meter_data"]
    temperature_data = il_electricity_cdd_hdd_billing_bimonthly[
        "temperature_data"]
    df = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=[60, 61],
        cooling_balance_points=[65, 66],
        temperature_mean=False,
        degree_day_method="daily",
    )
    assert df.shape == (14, 6)
    assert list(sorted(df.columns)) == [
        "cdd_65",
        "cdd_66",
        "hdd_60",
        "hdd_61",
        "n_days_dropped",
        "n_days_kept",
    ]
    snapshot.assert_match(
        [
            round(df.hdd_60.mean(), 2),
            round(df.hdd_61.mean(), 2),
            round(df.cdd_65.mean(), 2),
            round(df.cdd_66.mean(), 2),
            round(df.n_days_kept.mean(), 2),
            round(df.n_days_dropped.mean(), 2),
        ],
        "values",
    )
예제 #14
0
def create_caltrack_daily_design_matrix(meter_data, temperature_data):
    usage_per_day = compute_usage_per_day_feature(meter_data,
                                                  series_name="meter_value")
    temperature_features = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=range(30, 91),
        cooling_balance_points=range(30, 91),
        data_quality=True,
    )
    design_matrix = merge_features([usage_per_day, temperature_features])
    return design_matrix
예제 #15
0
def test_compute_temperature_features_billing_bimonthly_temp_mean(
        il_electricity_cdd_hdd_billing_bimonthly):
    meter_data = il_electricity_cdd_hdd_billing_bimonthly["meter_data"]
    temperature_data = il_electricity_cdd_hdd_billing_bimonthly[
        "temperature_data"]
    df = compute_temperature_features(meter_data.index, temperature_data)
    assert df.shape == (14, 3)
    assert list(sorted(df.columns)) == [
        "n_days_dropped",
        "n_days_kept",
        "temperature_mean",
    ]
    assert round(df.temperature_mean.mean()) == 55.0
예제 #16
0
def occupancy_precursor(il_electricity_cdd_hdd_hourly):
    meter_data = il_electricity_cdd_hdd_hourly["meter_data"]
    temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"]
    time_features = compute_time_features(meter_data.index)
    temperature_features = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=[50],
        cooling_balance_points=[65],
        degree_day_method="hourly",
    )
    return merge_features([
        meter_data.value.to_frame("meter_value"), temperature_features,
        time_features
    ])
예제 #17
0
def create_caltrack_billing_design_matrix(meter_data, temperature_data):
    usage_per_day = compute_usage_per_day_feature(meter_data,
                                                  series_name="meter_value")
    temperature_features = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=range(30, 91),
        cooling_balance_points=range(30, 91),
        data_quality=True,
        tolerance=pd.Timedelta(
            "35D"
        ),  # limit temperature data matching to periods of up to 35 days.
    )
    design_matrix = merge_features([usage_per_day, temperature_features])
    return design_matrix
예제 #18
0
def test_compute_temperature_features_shorter_temperature_data(
        il_electricity_cdd_hdd_daily):
    meter_data = il_electricity_cdd_hdd_daily["meter_data"]
    temperature_data = il_electricity_cdd_hdd_daily["temperature_data"]

    # drop some data
    temperature_data = temperature_data[:-200]

    df = compute_temperature_features(meter_data.index, temperature_data)
    assert df.shape == (810, 3)
    assert list(sorted(df.columns)) == [
        "n_days_dropped",
        "n_days_kept",
        "temperature_mean",
    ]
    assert round(df.temperature_mean.sum()) == 43958.0
예제 #19
0
def test_compute_temperature_features_hourly_temp_mean(
        il_electricity_cdd_hdd_hourly):
    # pick a slice with both hdd and cdd
    meter_data = il_electricity_cdd_hdd_hourly["meter_data"][
        "2016-03-01":"2016-07-01"]
    temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"][
        "2016-03-01":"2016-07-01"]
    df = compute_temperature_features(meter_data.index, temperature_data)
    assert list(sorted(df.columns)) == [
        "n_hours_dropped",
        "n_hours_kept",
        "temperature_mean",
    ]
    assert df.shape == (2952, 3)

    assert round(df.temperature_mean.mean()) == 62.0
예제 #20
0
def test_compute_temperature_features_daily_data_quality(
        il_electricity_cdd_hdd_daily):
    meter_data = il_electricity_cdd_hdd_daily["meter_data"]
    temperature_data = il_electricity_cdd_hdd_daily["temperature_data"]
    df = compute_temperature_features(meter_data.index,
                                      temperature_data,
                                      temperature_mean=False,
                                      data_quality=True)
    assert df.shape == (810, 4)
    assert list(sorted(df.columns)) == [
        "n_days_dropped",
        "n_days_kept",
        "temperature_not_null",
        "temperature_null",
    ]
    assert round(df.temperature_not_null.mean(), 2) == 23.99
    assert round(df.temperature_null.mean(), 2) == 0.00
예제 #21
0
def create_caltrack_hourly_preliminary_design_matrix(meter_data,
                                                     temperature_data):
    time_features = compute_time_features(meter_data.index,
                                          hour_of_week=True,
                                          hour_of_day=False,
                                          day_of_week=False)
    temperature_features = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=[50],
        cooling_balance_points=[65],
        degree_day_method="hourly",
    )
    design_matrix = merge_features([
        meter_data.value.to_frame("meter_value"), temperature_features,
        time_features
    ])
    return design_matrix
예제 #22
0
def test_compute_temperature_features_shorter_meter_data(
        il_electricity_cdd_hdd_daily):
    meter_data = il_electricity_cdd_hdd_daily["meter_data"]
    temperature_data = il_electricity_cdd_hdd_daily["temperature_data"]

    # drop some data
    meter_data = meter_data[:-10]

    df = compute_temperature_features(meter_data.index, temperature_data)
    assert df.shape == (800, 3)
    assert list(sorted(df.columns)) == [
        "n_days_dropped",
        "n_days_kept",
        "temperature_mean",
    ]
    assert round(df.temperature_mean.sum()) == 43904.0
    # ensure last row is NaN'ed
    assert pd.isnull(df.iloc[-1].n_days_kept)
예제 #23
0
def occupancy_precursor_only_nan(il_electricity_cdd_hdd_hourly):
    meter_data = il_electricity_cdd_hdd_hourly["meter_data"]
    meter_data = meter_data[datetime(2017, 1, 4):datetime(2017, 6, 1)]
    meter_data.iloc[-1] = np.nan
    # Simulates a segment where there is only a single nan value
    temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"]
    time_features = compute_time_features(meter_data.index)
    temperature_features = compute_temperature_features(
        meter_data.index,
        temperature_data,
        heating_balance_points=[50],
        cooling_balance_points=[65],
        degree_day_method="hourly",
    )
    return merge_features([
        meter_data.value.to_frame("meter_value"), temperature_features,
        time_features
    ])
예제 #24
0
def test_compute_temperature_features_empty_temperature_data():
    index = pd.DatetimeIndex([], tz="UTC", name="dt", freq="H")
    temperature_data = pd.Series({"value": []}, index=index).astype(float)
    result_index = temperature_data.resample("D").sum().index
    meter_data_hack = pd.DataFrame({"value": 0}, index=result_index)

    df = compute_temperature_features(
        meter_data_hack.index,
        temperature_data,
        heating_balance_points=[65],
        cooling_balance_points=[65],
        degree_day_method="daily",
        use_mean_daily_values=False,
    )
    assert df.shape == (0, 3)
    assert list(sorted(df.columns)) == [
        "n_days_dropped",
        "n_days_kept",
        "temperature_mean",
    ]
    assert round(df.temperature_mean.sum()) == 0
예제 #25
0
def test_compute_temperature_features_hourly_data_quality(
        il_electricity_cdd_hdd_hourly):
    # pick a slice with both hdd and cdd
    meter_data = il_electricity_cdd_hdd_hourly["meter_data"][
        "2016-03-01":"2016-07-01"]
    temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"][
        "2016-03-01":"2016-07-01"]

    df = compute_temperature_features(meter_data.index,
                                      temperature_data,
                                      temperature_mean=False,
                                      data_quality=True)
    assert df.shape == (2952, 4)
    assert list(sorted(df.columns)) == [
        "n_hours_dropped",
        "n_hours_kept",
        "temperature_not_null",
        "temperature_null",
    ]
    assert round(df.temperature_not_null.mean(), 2) == 1.0
    assert round(df.temperature_null.mean(), 2) == 0.0