def test_max_values_mapping(df_cyclical):
    cyclical = CyclicalFeatures(variables="day", max_values={"day": 31})

    X = cyclical.fit_transform(df_cyclical)

    transf_df = df_cyclical.copy()
    transf_df["day_sin"] = [
        0.937752,
        0.988468,
        0.848644,
        0.571268,
        0.201298,
        0.394355,
        0.724792,
    ]
    transf_df["day_cos"] = [
        0.347305,
        0.151428,
        0.528964,
        0.820763,
        0.979530,
        0.918958,
        0.688967,
    ]
    pd.testing.assert_frame_equal(X, transf_df)
def test_general_transformation_dropping_original_variables(df_cyclical):
    # test case 1: just one variable, but dropping the variable after transformation
    cyclical = CyclicalFeatures(variables=["day"], drop_original=True)
    X = cyclical.fit_transform(df_cyclical)

    transf_df = df_cyclical.copy()

    # expected output
    transf_df["day_sin"] = [
        -0.78183,
        0.0,
        -0.97493,
        0.43388,
        0.78183,
        0.97493,
        -0.43388,
    ]
    transf_df["day_cos"] = [
        0.623490,
        1.0,
        -0.222521,
        -0.900969,
        0.623490,
        -0.222521,
        -0.900969,
    ]
    transf_df = transf_df.drop(columns="day")

    # test fit attr
    assert cyclical.n_features_in_ == 2
    assert cyclical.max_values_ == {"day": 7}

    # test transform output
    pd.testing.assert_frame_equal(X, transf_df)
def test_general_transformation_without_dropping_variables(df_cyclical):
    # test case 1: just one variable.
    cyclical = CyclicalFeatures(variables=["day"])
    X = cyclical.fit_transform(df_cyclical)

    transf_df = df_cyclical.copy()

    # expected output
    transf_df["day_sin"] = [
        -0.78183,
        0.0,
        -0.97493,
        0.43388,
        0.78183,
        0.97493,
        -0.43388,
    ]
    transf_df["day_cos"] = [
        0.623490,
        1.0,
        -0.222521,
        -0.900969,
        0.623490,
        -0.222521,
        -0.900969,
    ]

    # fit attr
    assert cyclical.max_values_ == {"day": 7}

    # test transform output
    pd.testing.assert_frame_equal(X, transf_df)
def test_automatically_find_variables(df_cyclical):
    # test case 2: automatically select variables
    cyclical = CyclicalFeatures(variables=None, drop_original=True)
    X = cyclical.fit_transform(df_cyclical)
    transf_df = df_cyclical.copy()

    # expected output
    transf_df["day_sin"] = [
        -0.78183,
        0.0,
        -0.97493,
        0.43388,
        0.78183,
        0.97493,
        -0.43388,
    ]
    transf_df["day_cos"] = [
        0.62349,
        1.0,
        -0.222521,
        -0.900969,
        0.62349,
        -0.222521,
        -0.900969,
    ]
    transf_df["months_sin"] = [
        1.0,
        -0.5,
        -1.0,
        0.0,
        0.86603,
        0.0,
        0.0,
    ]
    transf_df["months_cos"] = [
        0.0,
        -0.86603,
        -0.0,
        1.0,
        -0.5,
        -1.0,
        1.0,
    ]
    transf_df = transf_df.drop(columns=["day", "months"])

    # test fit attr
    assert cyclical.max_values_ == {
        "day": 7,
        "months": 12,
    }

    # test transform output
    pd.testing.assert_frame_equal(X, transf_df)
def test_raises_error_when_init_parameters_not_permitted(df_cyclical):

    with pytest.raises(TypeError):
        # when max_values is not a dictionary
        CyclicalFeatures(max_values=("dayi", 31))

    with pytest.raises(TypeError):
        # when max_values values are not integers or string
        CyclicalFeatures(max_values={"day": "31"})

    with pytest.raises(TypeError):
        # when drop original is not a boolean
        CyclicalFeatures(drop_original="True")
Esempio n. 6
0
    MathFeatures,
    RelativeFeatures,
    # FIXME: remove in version 1.4
    CombineWithReferenceFeature,
    CyclicalTransformer,
    MathematicalCombination,
)
from tests.estimator_checks.estimator_checks import check_feature_engine_estimator

_estimators = [
    MathFeatures(variables=["0", "1"], func="mean", missing_values="ignore"),
    RelativeFeatures(variables=["0", "1"],
                     reference=["0"],
                     func=["add"],
                     missing_values="ignore"),
    CyclicalFeatures(),
    # FIXME: remove in version 1.4
    MathematicalCombination(variables_to_combine=["0", "1"]),
    CombineWithReferenceFeature(variables_to_combine=["0"],
                                reference_variables=["1"]),
    CyclicalTransformer(),
]


@pytest.mark.parametrize("estimator", _estimators)
def test_check_estimator_from_sklearn(estimator):
    return check_estimator(estimator)


_estimators = [
    MathFeatures(variables=["var_1", "var_2", "var_3"], func="mean"),
def test_get_feature_names_out(df_cyclical):
    # default features from all variables
    transformer = CyclicalFeatures()
    X = transformer.fit_transform(df_cyclical)
    assert list(X.columns) == transformer.get_feature_names_out()
    assert transformer.get_feature_names_out(input_features=["day"]) == [
        "day_sin",
        "day_cos",
    ]
    assert transformer.get_feature_names_out(
        input_features=["day", "months"]) == [
            "day_sin",
            "day_cos",
            "months_sin",
            "months_cos",
        ]

    # default features from 1 variable
    transformer = CyclicalFeatures(drop_original=True)
    X = transformer.fit_transform(df_cyclical)
    assert list(X.columns) == transformer.get_feature_names_out()
    assert transformer.get_feature_names_out(input_features=["day"]) == [
        "day_sin",
        "day_cos",
    ]
    assert transformer.get_feature_names_out(
        input_features=["day", "months"]) == [
            "day_sin",
            "day_cos",
            "months_sin",
            "months_cos",
        ]

    with pytest.raises(ValueError):
        # assert error when user passes a string instead of list
        transformer.get_feature_names_out(input_features="day")

    with pytest.raises(ValueError):
        # assert error when uses passes features that were not lagged
        transformer.get_feature_names_out(input_features=["color"])
def test_fit_raises_error_if_user_dictionary_key_not_in_df(df_cyclical):
    with pytest.raises(KeyError):
        transformer = CyclicalFeatures(max_values={"dayi": 31})
        transformer.fit(df_cyclical)
def test_fit_raises_error_if_na_in_df(df_na):
    # test case 3: when dataset contains na, fit method
    with pytest.raises(ValueError):
        transformer = CyclicalFeatures()
        transformer.fit(df_na)