def test_mandatory_init_parameters(): with pytest.raises(TypeError): RelativeFeatures(reference=["var1"], func=["add"]) with pytest.raises(TypeError): RelativeFeatures(variables=["var1"], func=["add"]) with pytest.raises(TypeError): RelativeFeatures(variables=["var1"], reference=["var2"])
def test_when_missing_values_is_ignore(df_vartypes): df_na = df_vartypes.copy() df_na.loc[1, "Age"] = np.nan transformer = RelativeFeatures( variables=["Age", "Marks"], reference=["Age", "Marks"], func=["sub"], missing_values="ignore", ) X = transformer.fit_transform(df_na) ref = pd.DataFrame.from_dict({ "Name": ["tom", "nick", "krish", "jack"], "City": ["London", "Manchester", "Liverpool", "Bristol"], "Age": [20, np.nan, 19, 18], "Marks": [0.9, 0.8, 0.7, 0.6], "dob": pd.date_range("2020-02-24", periods=4, freq="T"), "Age_sub_Age": [0, np.nan, 0, 0], "Marks_sub_Age": [-19.1, np.nan, -18.3, -17.4], "Age_sub_Marks": [19.1, np.nan, 18.3, 17.4], "Marks_sub_Marks": [0.0, 0.0, 0.0, 0.0], }) pd.testing.assert_frame_equal(X, ref)
def test_when_df_cols_are_integers(df_vartypes): df = df_vartypes.copy() df.columns = [0, 1, 2, 3, 4] transformer = RelativeFeatures( variables=[2, 3], reference=[2, 3], func=["sub", "add"], ) X = transformer.fit_transform(df) ref = pd.DataFrame.from_dict({ 0: ["tom", "nick", "krish", "jack"], 1: ["London", "Manchester", "Liverpool", "Bristol"], 2: [20, 21, 19, 18], 3: [0.9, 0.8, 0.7, 0.6], 4: pd.date_range("2020-02-24", periods=4, freq="T"), "2_sub_2": [0, 0, 0, 0], "3_sub_2": [-19.1, -20.2, -18.3, -17.4], "2_sub_3": [19.1, 20.2, 18.3, 17.4], "3_sub_3": [0.0, 0.0, 0.0, 0.0], "2_add_2": [40, 42, 38, 36], "3_add_2": [20.9, 21.8, 19.7, 18.6], "2_add_3": [20.9, 21.8, 19.7, 18.6], "3_add_3": [1.8, 1.6, 1.4, 1.2], }) pd.testing.assert_frame_equal(X, ref)
def test_error_if_func_not_supported(_func): with pytest.raises(ValueError): RelativeFeatures( variables=["Age", "Name"], reference=["Age", "Name"], func=_func, )
def test_get_feature_names_out_from_pipeline(_drop, df_vartypes): transformer = RelativeFeatures( variables=["Age", "Marks"], reference=["Age", "Marks"], func=["add", "sub"], drop_original=_drop, ) pipe = Pipeline([("transformer", transformer)]) varnames = [ "Age_add_Age", "Marks_add_Age", "Age_add_Marks", "Marks_add_Marks", "Age_sub_Age", "Marks_sub_Age", "Age_sub_Marks", "Marks_sub_Marks", ] X = pipe.fit_transform(df_vartypes) assert list(X.columns) == pipe.get_feature_names_out(input_features=None) assert list(X.columns) == pipe.get_feature_names_out(input_features=False) assert varnames == pipe.get_feature_names_out(input_features=True)
def test_error_when_drop_original_not_bool(): for drop_original in ["True", [True]]: with pytest.raises(TypeError): RelativeFeatures( variables=["Age"], reference=["Marks"], func=["add", "mul"], drop_original=drop_original, )
def test_alternative_operation(df_vartypes): # input df df = df_vartypes.copy() # Expected result dft = df.copy() dft["Age_truediv_Marks"] = dft["Age"].truediv(dft["Marks"]) dft["Age_floordiv_Marks"] = dft["Age"].floordiv(dft["Marks"]) dft["Age_mod_Marks"] = dft["Age"].mod(dft["Marks"]) dft["Age_pow_Marks"] = dft["Age"].pow(dft["Marks"]) transformer = RelativeFeatures( variables=["Age"], reference=["Marks"], func=["truediv", "floordiv", "mod", "pow"], ) X = transformer.fit_transform(df) pd.testing.assert_frame_equal(X, dft)
def test_error_when_entered_variables_not_in_df(df_vartypes): transformer = RelativeFeatures( variables=["FeatOutsideDataset", "Age"], reference=["Age", "Name"], func=["sub"], ) with pytest.raises(KeyError): transformer.fit_transform(df_vartypes) transformer = RelativeFeatures( reference=["FeatOutsideDataset", "Age"], variables=["Age", "Name"], func=["sub"], ) with pytest.raises(TypeError): transformer.fit_transform(df_vartypes)
def test_error_when_variables_not_numeric(df_vartypes): transformer = RelativeFeatures( variables=["Name", "Age", "Marks"], reference=["Age", "Name"], func=["sub"], ) with pytest.raises(TypeError): transformer.fit_transform(df_vartypes) transformer = RelativeFeatures( reference=["Name", "Age", "Marks"], variables=["Age", "Name"], func=["sub"], ) with pytest.raises(TypeError): transformer.fit_transform(df_vartypes)
def test_operations_with_multiple_variables(df_vartypes): transformer = RelativeFeatures( variables=["Age", "Marks"], reference=["Age", "Marks"], func=["sub"], ) X = transformer.fit_transform(df_vartypes) ref = pd.DataFrame.from_dict({ "Name": ["tom", "nick", "krish", "jack"], "City": ["London", "Manchester", "Liverpool", "Bristol"], "Age": [20, 21, 19, 18], "Marks": [0.9, 0.8, 0.7, 0.6], "dob": pd.date_range("2020-02-24", periods=4, freq="T"), "Age_sub_Age": [0, 0, 0, 0], "Marks_sub_Age": [-19.1, -20.2, -18.3, -17.4], "Age_sub_Marks": [19.1, 20.2, 18.3, 17.4], "Marks_sub_Marks": [0.0, 0.0, 0.0, 0.0], }) pd.testing.assert_frame_equal(X, ref)
def test_classic_binary_operation(df_vartypes): transformer = RelativeFeatures( variables=["Age"], reference=["Marks"], func=["sub", "div", "add", "mul"], ) X = transformer.fit_transform(df_vartypes) ref = pd.DataFrame.from_dict({ "Name": ["tom", "nick", "krish", "jack"], "City": ["London", "Manchester", "Liverpool", "Bristol"], "Age": [20, 21, 19, 18], "Marks": [0.9, 0.8, 0.7, 0.6], "dob": pd.date_range("2020-02-24", periods=4, freq="T"), "Age_sub_Marks": [19.1, 20.2, 18.3, 17.4], "Age_div_Marks": [22.22222222222222, 26.25, 27.142857142857146, 30.0], "Age_add_Marks": [20.9, 21.8, 19.7, 18.6], "Age_mul_Marks": [18.0, 16.8, 13.299999999999999, 10.799999999999999], }) pd.testing.assert_frame_equal(X, ref)
def test_error_when_null_values_in_variable(df_vartypes): df_na = df_vartypes.copy() df_na.loc[1, "Age"] = np.nan transformer = RelativeFeatures( variables=["Age", "Marks"], reference=["Age", "Marks"], func=["add", "mul"], missing_values="raise", ) with pytest.raises(ValueError): transformer.fit(df_na) transformer.fit(df_vartypes) with pytest.raises(ValueError): transformer.transform(df_na)
def test_get_feature_names_out_raises_error_when_wrong_param( _input_features, df_vartypes): transformer = RelativeFeatures( variables=["Age", "Marks"], reference=["Age", "Marks"], func=["add", "sub"], ) transformer.fit(df_vartypes) with pytest.raises(ValueError): transformer.get_feature_names_out(input_features=_input_features)
def test_error_when_division_by_zero(_func, df_vartypes): df_zero = df_vartypes.copy() df_zero.loc[1, "Marks"] = 0 transformer = RelativeFeatures( variables=["Age"], reference=["Marks"], func=_func, ) transformer.fit(df_vartypes) with pytest.raises(ValueError): transformer.transform(df_zero)
def test_error_when_param_reference_not_permitted(_variables): with pytest.raises(ValueError): RelativeFeatures(reference=_variables, variables=["Age", "Name"], func=["add", "mul"])
from feature_engine.creation import ( CyclicalFeatures, MathFeatures, RelativeFeatures, # FIXME: remove in version 1.4 CombineWithReferenceFeature, CyclicalTransformer, MathematicalCombination, ) from tests.estimator_checks.estimator_checks import check_feature_engine_estimator _estimators = [ MathFeatures(variables=["0", "1"], func="mean", missing_values="ignore"), RelativeFeatures(variables=["0", "1"], reference=["0"], func=["add"], missing_values="ignore"), CyclicalFeatures(), # FIXME: remove in version 1.4 MathematicalCombination(variables_to_combine=["0", "1"]), CombineWithReferenceFeature(variables_to_combine=["0"], reference_variables=["1"]), CyclicalTransformer(), ] @pytest.mark.parametrize("estimator", _estimators) def test_check_estimator_from_sklearn(estimator): return check_estimator(estimator)