def test_error_new_variable_names_not_permitted(): variables = ["Age", "Name"] with pytest.raises(ValueError): MathFeatures( variables=variables, func=["sum", "mean"], new_variables_names=[ "sum_of_two_vars", "mean_of_two_vars", "another_alias", ], ) with pytest.raises(ValueError): MathFeatures( variables=variables, func=["sum"], new_variables_names=["sum_of_two_vars", "mean_of_two_vars"], ) with pytest.raises(ValueError): MathFeatures( variables=variables, func="sum", new_variables_names=["sum_of_two_vars", "mean_of_two_vars"], ) with pytest.raises(ValueError): MathFeatures( variables=variables, func=["sum", "mean"], new_variables_names=["sum_of_two_vars", "sum_of_two_vars"], )
def test_aggregations_with_strings(df_vartypes): transformer = MathFeatures( variables=["Age", "Marks"], func=["sum", "prod", "mean", "std", "max", "min"]) X = transformer.fit_transform(df_vartypes) ref = pd.DataFrame.from_dict({ "Name": ["tom", "nick", "krish", "jack"], "City": ["London", "Manchester", "Liverpool", "Bristol"], "Age": [20, 21, 19, 18], "Marks": [0.9, 0.8, 0.7, 0.6], "dob": pd.date_range("2020-02-24", periods=4, freq="T"), "sum_Age_Marks": [20.9, 21.8, 19.7, 18.6], "prod_Age_Marks": [18.0, 16.8, 13.299999999999999, 10.799999999999999], "mean_Age_Marks": [10.45, 10.9, 9.85, 9.3], "std_Age_Marks": [ 13.505739520663058, 14.28355697996826, 12.94005409571382, 12.303657992645928, ], "max_Age_Marks": [20.0, 21.0, 19.0, 18.0], "min_Age_Marks": [0.9, 0.8, 0.7, 0.6], }) # transform params pd.testing.assert_frame_equal(X, ref)
def test_no_error_when_null_values_in_variable(df_vartypes): df_na = df_vartypes.copy() df_na.loc[1, "Age"] = np.nan transformer = MathFeatures( variables=["Age", "Marks"], func=["sum", "mean"], missing_values="ignore", ) X = transformer.fit_transform(df_na) ref = pd.DataFrame.from_dict({ "Name": ["tom", "nick", "krish", "jack"], "City": ["London", "Manchester", "Liverpool", "Bristol"], "Age": [20, np.nan, 19, 18], "Marks": [0.9, 0.8, 0.7, 0.6], "dob": pd.date_range("2020-02-24", periods=4, freq="T"), "sum_Age_Marks": [20.9, 0.8, 19.7, 18.6], "mean_Age_Marks": [10.45, 0.8, 9.85, 9.3], }) # transform params pd.testing.assert_frame_equal(X, ref)
def test_variable_names_when_df_cols_are_integers(df_numeric_columns): transformer = MathFeatures( variables=[2, 3], func=["sum", "prod", "mean", "std", "max", "min"]) X = transformer.fit_transform(df_numeric_columns) ref = pd.DataFrame.from_dict({ 0: ["tom", "nick", "krish", "jack"], 1: ["London", "Manchester", "Liverpool", "Bristol"], 2: [20, 21, 19, 18], 3: [0.9, 0.8, 0.7, 0.6], 4: pd.date_range("2020-02-24", periods=4, freq="T"), "sum_2_3": [20.9, 21.8, 19.7, 18.6], "prod_2_3": [18.0, 16.8, 13.299999999999999, 10.799999999999999], "mean_2_3": [10.45, 10.9, 9.85, 9.3], "std_2_3": [ 13.505739520663058, 14.28355697996826, 12.94005409571382, 12.303657992645928, ], "max_2_3": [20.0, 21.0, 19.0, 18.0], "min_2_3": [0.9, 0.8, 0.7, 0.6], }) pd.testing.assert_frame_equal(X, ref)
def test_get_feature_names_out(_varnames, _drop, df_vartypes): # set up transformer transformer = MathFeatures( variables=["Age", "Marks"], func=["sum", "mean"], new_variables_names=_varnames, drop_original=_drop, ) # fit transformer X = transformer.fit_transform(df_vartypes) # assert functionality assert list( X.columns) == transformer.get_feature_names_out(input_features=None) assert list( X.columns) == transformer.get_feature_names_out(input_features=False) if _varnames is not None: assert _varnames == transformer.get_feature_names_out( input_features=True) else: assert ["sum_Age_Marks", "mean_Age_Marks" ] == transformer.get_feature_names_out(input_features=True)
def test_user_enters_two_operations(df_vartypes): transformer = MathFeatures(variables=["Age", "Marks"], func=["sum", np.mean]) X = transformer.fit_transform(df_vartypes) ref = pd.DataFrame.from_dict({ "Name": ["tom", "nick", "krish", "jack"], "City": ["London", "Manchester", "Liverpool", "Bristol"], "Age": [20, 21, 19, 18], "Marks": [0.9, 0.8, 0.7, 0.6], "dob": pd.date_range("2020-02-24", periods=4, freq="T"), "sum_Age_Marks": [20.9, 21.8, 19.7, 18.6], "mean_Age_Marks": [10.45, 10.9, 9.85, 9.3], }) pd.testing.assert_frame_equal(X, ref)
def test_error_when_null_values_in_variable(df_vartypes): df_na = df_vartypes.copy() df_na.loc[1, "Age"] = np.nan math_combinator = MathFeatures( variables=["Age", "Marks"], func=["sum", "mean"], missing_values="raise", ) with pytest.raises(ValueError): math_combinator.fit(df_na) math_combinator.fit(df_vartypes) with pytest.raises(ValueError): math_combinator.transform(df_na)
def test_get_feature_names_out_raises_error_when_wrong_param( _input_features, df_vartypes): transformer = MathFeatures( variables=["Age", "Marks"], func=["sum", "mean"], ) transformer.fit(df_vartypes) with pytest.raises(ValueError): transformer.get_feature_names_out(input_features=_input_features)
def test_error_if_new_variable_names_not_permitted(_variables): with pytest.raises(ValueError): MathFeatures(variables=["Age", "Name"], func=["sum"], new_variables_names=_variables)
def test_error_if_func_is_dictionary(): with pytest.raises(NotImplementedError): MathFeatures(variables=["Age", "Name"], func={"A": "sum", "B": "mean"})
def test_error_when_variables_not_permitted(_variables): with pytest.raises(ValueError): MathFeatures(variables=_variables, func=["sum", "mean"])
def test_error_when_required_params_not_entered(): with pytest.raises(TypeError): MathFeatures(func=["mean", "sum"]) with pytest.raises(TypeError): MathFeatures(variables=["vara", "varb"])
import pytest from sklearn.utils.estimator_checks import check_estimator from feature_engine.creation import ( CyclicalFeatures, MathFeatures, RelativeFeatures, # FIXME: remove in version 1.4 CombineWithReferenceFeature, CyclicalTransformer, MathematicalCombination, ) from tests.estimator_checks.estimator_checks import check_feature_engine_estimator _estimators = [ MathFeatures(variables=["0", "1"], func="mean", missing_values="ignore"), RelativeFeatures(variables=["0", "1"], reference=["0"], func=["add"], missing_values="ignore"), CyclicalFeatures(), # FIXME: remove in version 1.4 MathematicalCombination(variables_to_combine=["0", "1"]), CombineWithReferenceFeature(variables_to_combine=["0"], reference_variables=["1"]), CyclicalTransformer(), ] @pytest.mark.parametrize("estimator", _estimators) def test_check_estimator_from_sklearn(estimator):