def test_max_values_mapping(df_cyclical): cyclical = CyclicalFeatures(variables="day", max_values={"day": 31}) X = cyclical.fit_transform(df_cyclical) transf_df = df_cyclical.copy() transf_df["day_sin"] = [ 0.937752, 0.988468, 0.848644, 0.571268, 0.201298, 0.394355, 0.724792, ] transf_df["day_cos"] = [ 0.347305, 0.151428, 0.528964, 0.820763, 0.979530, 0.918958, 0.688967, ] pd.testing.assert_frame_equal(X, transf_df)
def test_general_transformation_dropping_original_variables(df_cyclical): # test case 1: just one variable, but dropping the variable after transformation cyclical = CyclicalFeatures(variables=["day"], drop_original=True) X = cyclical.fit_transform(df_cyclical) transf_df = df_cyclical.copy() # expected output transf_df["day_sin"] = [ -0.78183, 0.0, -0.97493, 0.43388, 0.78183, 0.97493, -0.43388, ] transf_df["day_cos"] = [ 0.623490, 1.0, -0.222521, -0.900969, 0.623490, -0.222521, -0.900969, ] transf_df = transf_df.drop(columns="day") # test fit attr assert cyclical.n_features_in_ == 2 assert cyclical.max_values_ == {"day": 7} # test transform output pd.testing.assert_frame_equal(X, transf_df)
def test_general_transformation_without_dropping_variables(df_cyclical): # test case 1: just one variable. cyclical = CyclicalFeatures(variables=["day"]) X = cyclical.fit_transform(df_cyclical) transf_df = df_cyclical.copy() # expected output transf_df["day_sin"] = [ -0.78183, 0.0, -0.97493, 0.43388, 0.78183, 0.97493, -0.43388, ] transf_df["day_cos"] = [ 0.623490, 1.0, -0.222521, -0.900969, 0.623490, -0.222521, -0.900969, ] # fit attr assert cyclical.max_values_ == {"day": 7} # test transform output pd.testing.assert_frame_equal(X, transf_df)
def test_automatically_find_variables(df_cyclical): # test case 2: automatically select variables cyclical = CyclicalFeatures(variables=None, drop_original=True) X = cyclical.fit_transform(df_cyclical) transf_df = df_cyclical.copy() # expected output transf_df["day_sin"] = [ -0.78183, 0.0, -0.97493, 0.43388, 0.78183, 0.97493, -0.43388, ] transf_df["day_cos"] = [ 0.62349, 1.0, -0.222521, -0.900969, 0.62349, -0.222521, -0.900969, ] transf_df["months_sin"] = [ 1.0, -0.5, -1.0, 0.0, 0.86603, 0.0, 0.0, ] transf_df["months_cos"] = [ 0.0, -0.86603, -0.0, 1.0, -0.5, -1.0, 1.0, ] transf_df = transf_df.drop(columns=["day", "months"]) # test fit attr assert cyclical.max_values_ == { "day": 7, "months": 12, } # test transform output pd.testing.assert_frame_equal(X, transf_df)
def test_raises_error_when_init_parameters_not_permitted(df_cyclical): with pytest.raises(TypeError): # when max_values is not a dictionary CyclicalFeatures(max_values=("dayi", 31)) with pytest.raises(TypeError): # when max_values values are not integers or string CyclicalFeatures(max_values={"day": "31"}) with pytest.raises(TypeError): # when drop original is not a boolean CyclicalFeatures(drop_original="True")
MathFeatures, RelativeFeatures, # FIXME: remove in version 1.4 CombineWithReferenceFeature, CyclicalTransformer, MathematicalCombination, ) from tests.estimator_checks.estimator_checks import check_feature_engine_estimator _estimators = [ MathFeatures(variables=["0", "1"], func="mean", missing_values="ignore"), RelativeFeatures(variables=["0", "1"], reference=["0"], func=["add"], missing_values="ignore"), CyclicalFeatures(), # FIXME: remove in version 1.4 MathematicalCombination(variables_to_combine=["0", "1"]), CombineWithReferenceFeature(variables_to_combine=["0"], reference_variables=["1"]), CyclicalTransformer(), ] @pytest.mark.parametrize("estimator", _estimators) def test_check_estimator_from_sklearn(estimator): return check_estimator(estimator) _estimators = [ MathFeatures(variables=["var_1", "var_2", "var_3"], func="mean"),
def test_get_feature_names_out(df_cyclical): # default features from all variables transformer = CyclicalFeatures() X = transformer.fit_transform(df_cyclical) assert list(X.columns) == transformer.get_feature_names_out() assert transformer.get_feature_names_out(input_features=["day"]) == [ "day_sin", "day_cos", ] assert transformer.get_feature_names_out( input_features=["day", "months"]) == [ "day_sin", "day_cos", "months_sin", "months_cos", ] # default features from 1 variable transformer = CyclicalFeatures(drop_original=True) X = transformer.fit_transform(df_cyclical) assert list(X.columns) == transformer.get_feature_names_out() assert transformer.get_feature_names_out(input_features=["day"]) == [ "day_sin", "day_cos", ] assert transformer.get_feature_names_out( input_features=["day", "months"]) == [ "day_sin", "day_cos", "months_sin", "months_cos", ] with pytest.raises(ValueError): # assert error when user passes a string instead of list transformer.get_feature_names_out(input_features="day") with pytest.raises(ValueError): # assert error when uses passes features that were not lagged transformer.get_feature_names_out(input_features=["color"])
def test_fit_raises_error_if_user_dictionary_key_not_in_df(df_cyclical): with pytest.raises(KeyError): transformer = CyclicalFeatures(max_values={"dayi": 31}) transformer.fit(df_cyclical)
def test_fit_raises_error_if_na_in_df(df_na): # test case 3: when dataset contains na, fit method with pytest.raises(ValueError): transformer = CyclicalFeatures() transformer.fit(df_na)