def test_expanding_sum_single_var(df_time):
    expected_results = {
        "ambient_temp_expanding_sum": [
            np.nan,
            31.31,
            62.82,
            94.97,
            127.36,
            159.98,
            192.48,
            225.00,
            257.68,
            291.44,
            325.57,
            359.65,
            393.35,
            427.24,
            461.28,
        ],
    }
    expected_df = df_time.copy()
    expected_df["ambient_temp_expanding_sum"] = expected_results[
        "ambient_temp_expanding_sum"]

    transformer = ExpandingWindowFeatures(variables=["ambient_temp"],
                                          functions="sum")
    df_tr = transformer.fit_transform(df_time)
    assert_frame_equal(df_tr, expected_df)
def test_expanding_sum_and_mean_single_var(df_time):
    expected_results = {
        "ambient_temp_expanding_sum": [
            np.nan,
            31.31,
            62.82,
            94.97,
            127.36,
            159.98,
            192.48,
            225.00,
            257.68,
            291.44,
            325.57,
            359.65,
            393.35,
            427.24,
            461.28,
        ],
        "ambient_temp_expanding_mean": [
            np.nan,
            31.3100,
            31.4100,
            31.6567,
            31.8400,
            31.9960,
            32.0800,
            32.1429,
            32.2100,
            32.3822,
            32.5570,
            32.6955,
            32.7792,
            32.8646,
            32.9486,
        ],
    }
    expected_df = df_time.copy()
    expected_df["ambient_temp_expanding_sum"] = expected_results[
        "ambient_temp_expanding_sum"]
    expected_df["ambient_temp_expanding_mean"] = expected_results[
        "ambient_temp_expanding_mean"]

    transformer = ExpandingWindowFeatures(variables=["ambient_temp"],
                                          functions=["sum", "mean"])
    df_tr = transformer.fit_transform(df_time)
    assert_frame_equal(df_tr, expected_df)
def test_expanding_sum_multiple_vars(df_time):
    expected_results = {
        "ambient_temp_expanding_sum": [
            np.nan,
            31.31,
            62.82,
            94.97,
            127.36,
            159.98,
            192.48,
            225.00,
            257.68,
            291.44,
            325.57,
            359.65,
            393.35,
            427.24,
            461.28,
        ],
        "irradiation_expanding_sum": [
            np.nan,
            0.51,
            1.3,
            1.95,
            2.71,
            3.13,
            3.62,
            4.19,
            4.75,
            5.49,
            6.38,
            6.85,
            7.39,
            7.79,
            8.24,
        ],
    }
    expected_df = df_time.copy()
    expected_df["ambient_temp_expanding_sum"] = expected_results[
        "ambient_temp_expanding_sum"]
    expected_df["irradiation_expanding_sum"] = expected_results[
        "irradiation_expanding_sum"]

    transformer = ExpandingWindowFeatures(
        variables=["ambient_temp", "irradiation"], functions="sum")
    df_tr = transformer.fit_transform(df_time)
    assert_frame_equal(df_tr, expected_df)
def test_get_feature_names_out_raises_when_input_features_is_string(df_time):

    tr = ExpandingWindowFeatures(functions=["mean", "sum"])
    tr.fit(df_time)

    with pytest.raises(ValueError):
        # get error when user does not pass a list
        tr.get_feature_names_out(input_features="ambient_temp")
def test_get_feature_names_out_raises_when_input_features_not_transformed(
        df_time):

    tr = ExpandingWindowFeatures(functions=["mean", "sum"])
    tr.fit(df_time)

    with pytest.raises(ValueError):
        # assert error when uses passes features that were not transformed
        tr.get_feature_names_out(input_features=["color"])
def test_sort_index(df_time):
    # Shuffle dataframe
    Xs = df_time.sample(frac=1)

    transformer = ExpandingWindowFeatures(sort_index=False)
    df_tr = transformer.fit_transform(Xs)
    assert_frame_equal(df_tr[transformer.variables_],
                       Xs[transformer.variables_])

    transformer = ExpandingWindowFeatures(sort_index=True)
    df_tr = transformer.fit_transform(Xs)
    assert_frame_equal(df_tr[transformer.variables_],
                       Xs[transformer.variables_].sort_index())
def test_get_feature_names_out_single_variable_and_single_function(df_time):
    # input features
    original_features = ["ambient_temp", "module_temp", "irradiation", "color"]

    tr = ExpandingWindowFeatures(variables="ambient_temp", functions="sum")
    tr.fit(df_time)

    # expected
    output = [
        "ambient_temp_expanding_sum",
    ]
    assert tr.get_feature_names_out(
        input_features=None) == original_features + output
    assert tr.get_feature_names_out(input_features=["ambient_temp"]) == output
def test_get_feature_names_out_multiple_variables_and_functions(df_time):
    # input features
    input_features = ["ambient_temp", "module_temp", "irradiation"]
    original_features = ["ambient_temp", "module_temp", "irradiation", "color"]

    tr = ExpandingWindowFeatures(functions=["mean", "sum"])
    tr.fit(df_time)

    # expected
    output = [
        "ambient_temp_expanding_mean",
        "ambient_temp_expanding_sum",
        "module_temp_expanding_mean",
        "module_temp_expanding_sum",
        "irradiation_expanding_mean",
        "irradiation_expanding_sum",
    ]
    assert tr.get_feature_names_out(
        input_features=None) == original_features + output
    assert tr.get_feature_names_out(input_features=input_features) == output
    assert tr.get_feature_names_out(
        input_features=input_features[0:2]) == output[0:4]
    assert tr.get_feature_names_out(
        input_features=[input_features[0]]) == output[0:2]
Beispiel #9
0
import pandas as pd
import pytest
from sklearn.base import clone
from sklearn.utils.estimator_checks import check_estimator

from tests.estimator_checks.estimator_checks import check_feature_engine_estimator
from feature_engine.timeseries.forecasting import (
    ExpandingWindowFeatures,
    LagFeatures,
    WindowFeatures,
)

_estimators = [
    LagFeatures(missing_values="ignore"),
    WindowFeatures(missing_values="ignore"),
    ExpandingWindowFeatures(missing_values="ignore"),
]


@pytest.mark.parametrize("estimator", _estimators)
def test_check_estimator_from_sklearn(estimator):
    return check_estimator(estimator)


@pytest.mark.parametrize("estimator", _estimators)
def test_check_estimator_from_feature_engine(estimator):
    return check_feature_engine_estimator(estimator)


@pytest.mark.parametrize("estimator", _estimators)
def test_error_when_not_unique_values_in_index(df_time, estimator):
def test_expanding_window_raises_when_periods_negative():
    with pytest.raises(
            ValueError,
            match="periods must be a non-negative integer. Got -1 instead."):
        ExpandingWindowFeatures(periods=-1)
def test_permitted_param_periods(_periods):
    transformer = ExpandingWindowFeatures(periods=_periods)
    assert transformer.periods == _periods
def test_expanding_sum_and_mean_multiple_vars(df_time):
    expected_results = {
        "ambient_temp_expanding_sum": [
            np.nan,
            31.31,
            62.82,
            94.97,
            127.36,
            159.98,
            192.48,
            225.00,
            257.68,
            291.44,
            325.57,
            359.65,
            393.35,
            427.24,
            461.28,
        ],
        "ambient_temp_expanding_mean": [
            np.nan,
            31.3100,
            31.4100,
            31.6567,
            31.8400,
            31.9960,
            32.0800,
            32.1429,
            32.2100,
            32.3822,
            32.5570,
            32.6955,
            32.7792,
            32.8646,
            32.9486,
        ],
        "irradiation_expanding_sum": [
            np.nan,
            0.51,
            1.3,
            1.95,
            2.71,
            3.13,
            3.62,
            4.19,
            4.75,
            5.49,
            6.38,
            6.85,
            7.39,
            7.79,
            8.24,
        ],
        "irradiation_expanding_mean": [
            np.nan,
            0.51000,
            0.65000,
            0.65000,
            0.67750,
            0.62600,
            0.60333,
            0.59857,
            0.59375,
            0.61000,
            0.63800,
            0.62273,
            0.61583,
            0.59923,
            0.58857,
        ],
    }
    expected_df = df_time.copy()
    expected_df["ambient_temp_expanding_sum"] = expected_results[
        "ambient_temp_expanding_sum"]
    expected_df["ambient_temp_expanding_mean"] = expected_results[
        "ambient_temp_expanding_mean"]
    expected_df["irradiation_expanding_sum"] = expected_results[
        "irradiation_expanding_sum"]
    expected_df["irradiation_expanding_mean"] = expected_results[
        "irradiation_expanding_mean"]

    transformer = ExpandingWindowFeatures(
        variables=["ambient_temp", "irradiation"], functions=["sum", "mean"])
    df_tr = transformer.fit_transform(df_time)
    assert_frame_equal(df_tr, expected_df)