def expected_df_1(): """Expected output of test_expected_output_1.""" df = d.create_df_3() df["a_new_col"] = np.log(df["a"]) df["b_new_col"] = np.log(df["b"]) df.drop(columns=["a", "b"], inplace=True) return df
def test_check_is_fitted_call_count(self, mocker): """Test there are 2 calls to BaseTransformer check_is_fitted in transform.""" df = d.create_df_3() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]}) with h.assert_function_call_count( mocker, tubular.base.BaseTransformer, "check_is_fitted", 2 ): x.transform(df)
def test_quantiles_none_error(self): """Test that an exception is raised if quantiles is None when fit is run.""" with pytest.warns( UserWarning, match="quantiles not set so no fitting done in CappingTransformer", ): df = d.create_df_3() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]}) x.fit(df)
def test_return_type(self, scaler, scaler_type_str, columns): """Test that transform returns a pd.DataFrame.""" df = d.create_df_3() x = ScalingTransformer(columns=columns, scaler=scaler, scaler_kwargs={"copy": True}) x.fit(df) df_transformed = x.transform(df) assert (type(df_transformed) is pd.DataFrame), "unexpected output type from transform"
def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_3() x = DataFrameMethodTransformer(new_column_name="d", pd_method_name="sum", columns=["b", "c"]) expected_call_args = {0: {"args": (df.copy(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df)
def test_learnt_values_not_modified(self): """Test that the replacements from fit are not changed in transform.""" capping_values_dict = {"a": [2, 5], "b": [-1, 8]} df = d.create_df_3() x = CappingTransformer(capping_values_dict) x.transform(df) h.test_object_attributes( obj=x, expected_attributes={"capping_values": capping_values_dict}, msg="Attributes for CappingTransformer set in init", )
def test_check_is_fitted_call_1(self, mocker): """Test the first call to BaseTransformer check_is_fitted in transform.""" df = d.create_df_3() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]}) expected_call_args = { 0: {"args": (["capping_values"],), "kwargs": {}}, 1: {"args": (["_replacement_values"],), "kwargs": {}}, } with h.assert_function_call( mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args ): x.transform(df)
def test_learnt_values_not_modified(self): """Test that the impute_values_ from fit are not changed in transform.""" df = d.create_df_3() x = ModeImputer(columns=["a", "b", "c"]) x.fit(df) x2 = ModeImputer(columns=["a", "b", "c"]) x2.fit_transform(df) h.assert_equal_dispatch( expected=x.impute_values_, actual=x2.impute_values_, msg="Impute values not changed in transform", )
def test_learnt_values(self): """Test that the impute values learnt during fit are expected.""" df = d.create_df_3() x = ModeImputer(columns=["a", "b", "c"]) x.fit(df) h.test_object_attributes( obj=x, expected_attributes={ "impute_values_": { "a": df["a"].mode()[0], "b": df["b"].mode()[0], "c": df["c"].mode()[0], } }, msg="impute_values_ attribute", )
class TestTransform(object): """Tests for ModeImputer.transform().""" def expected_df_1(): """Expected output for test_nulls_imputed_correctly.""" df = pd.DataFrame({ "a": [1, 2, 3, 4, 5, 6, np.NaN], "b": [1, 2, 3, np.NaN, 7, 8, 9], "c": [np.NaN, 1, 2, 3, -4, -5, -6], }) for col in ["a", "b", "c"]: df[col].loc[df[col].isnull()] = df[col].mode()[0] return df def expected_df_2(): """Expected output for test_nulls_imputed_correctly_2.""" df = pd.DataFrame({ "a": [1, 2, 3, 4, 5, 6, np.NaN], "b": [1, 2, 3, np.NaN, 7, 8, 9], "c": [np.NaN, 1, 2, 3, -4, -5, -6], }) for col in ["a"]: df[col].loc[df[col].isnull()] = df[col].mode()[0] return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=ModeImputer.transform, expected_arguments=["self", "X"]) def test_check_is_fitted_called(self, mocker): """Test that BaseTransformer check_is_fitted called.""" df = d.create_df_1() x = ModeImputer(columns="a") x.fit(df) expected_call_args = { 0: { "args": (["impute_values_"], ), "kwargs": {} } } with h.assert_function_call(mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args): x.transform(df) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_1() x = ModeImputer(columns="a") x.fit(df) expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_1()) + h.index_preserved_params(d.create_df_3(), expected_df_1()), ) def test_nulls_imputed_correctly(self, df, expected): """Test missing values are filled with the correct values.""" x = ModeImputer(columns=["a", "b", "c"]) # set the impute values dict directly rather than fitting x on df so test works with helpers x.impute_values_ = {"a": 1.0, "b": 1.0, "c": -6.0} df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Check nulls filled correctly in transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_2()) + h.index_preserved_params(d.create_df_3(), expected_df_2()), ) def test_nulls_imputed_correctly_2(self, df, expected): """Test missing values are filled with the correct values - and unrelated columns are not changed.""" x = ModeImputer(columns=["a"]) # set the impute values dict directly rather than fitting x on df so test works with helpers x.impute_values_ = {"a": 1.0} df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Check nulls filled correctly in transform", ) def test_learnt_values_not_modified(self): """Test that the impute_values_ from fit are not changed in transform.""" df = d.create_df_3() x = ModeImputer(columns=["a", "b", "c"]) x.fit(df) x2 = ModeImputer(columns=["a", "b", "c"]) x2.fit_transform(df) h.assert_equal_dispatch( expected=x.impute_values_, actual=x2.impute_values_, msg="Impute values not changed in transform", )
class TestTransform(object): """Tests for DataFrameMethodTransformer.transform().""" def expected_df_1(): """Expected output of test_expected_output_single_columns_assignment.""" df = pd.DataFrame({ "a": [1, 2, 3, 4, 5, 6, np.NaN], "b": [1, 2, 3, np.NaN, 7, 8, 9], "c": [np.NaN, 1, 2, 3, -4, -5, -6], "d": [1.0, 3.0, 5.0, 3.0, 3.0, 3.0, 3.0], }) return df def expected_df_2(): """Expected output of test_expected_output_multi_columns_assignment.""" df = pd.DataFrame({ "a": [1, 2, 3, 4, 5, 6, np.NaN], "b": [1, 2, 3, np.NaN, 7, 8, 9], "c": [np.NaN, 1, 2, 3, -4, -5, -6], "d": [0.5, 1.0, 1.5, np.NaN, 3.5, 4.0, 4.5], "e": [np.NaN, 0.5, 1.0, 1.5, -2.0, -2.5, -3.0], }) return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=DataFrameMethodTransformer.transform, expected_arguments=["self", "X"]) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_3() x = DataFrameMethodTransformer(new_column_name="d", pd_method_name="sum", columns=["b", "c"]) expected_call_args = {0: {"args": (df.copy(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_1()) + h.index_preserved_params(d.create_df_3(), expected_df_1()), ) def test_expected_output_single_columns_assignment(self, df, expected): """Test a single column output from transform gives expected results.""" x = DataFrameMethodTransformer( new_column_name="d", pd_method_name="sum", columns=["b", "c"], pd_method_kwargs={"axis": 1}, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="DataFrameMethodTransformer sum columns b and c", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_2()) + h.index_preserved_params(d.create_df_3(), expected_df_2()), ) def test_expected_output_multi_columns_assignment(self, df, expected): """Test a multiple column output from transform gives expected results.""" x = DataFrameMethodTransformer( new_column_name=["d", "e"], pd_method_name="div", columns=["b", "c"], pd_method_kwargs={"other": 2}, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="DataFrameMethodTransformer divide by 2 columns b and c", ) @pytest.mark.parametrize( "df, new_column_name, pd_method_name, columns, pd_method_kwargs", [ (d.create_df_3(), ["d", "e"], "div", ["b", "c"], { "other": 2 }), (d.create_df_3(), "d", "sum", ["b", "c"], { "axis": 1 }), (d.create_df_3(), ["d", "e"], "cumprod", ["b", "c"], { "axis": 1 }), (d.create_df_3(), ["d", "e", "f"], "mod", ["a", "b", "c"], { "other": 2 }), (d.create_df_3(), ["d", "e", "f"], "le", ["a", "b", "c"], { "other": 0 }), (d.create_df_3(), ["d", "e"], "abs", ["a", "b"], {}), ], ) def test_pandas_method_called(self, mocker, df, new_column_name, pd_method_name, columns, pd_method_kwargs): """Test that the pandas method is called as expected (with kwargs passed) during transform.""" spy = mocker.spy(pd.DataFrame, pd_method_name) x = DataFrameMethodTransformer( new_column_name=new_column_name, pd_method_name=pd_method_name, columns=columns, pd_method_kwargs=pd_method_kwargs, ) x.transform(df) # pull out positional and keyword args to target the call call_args = spy.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] # test keyword are as expected h.assert_dict_equal_msg( actual=call_kwargs, expected=pd_method_kwargs, msg_tag=f"""Keyword arg assert for {pd_method_name}""", ) # test positional args are as expected h.assert_list_tuple_equal_msg( actual=call_pos_args, expected=(df[columns], ), msg_tag=f"""Positional arg assert for {pd_method_name}""", )
class TestTransform(object): """Tests for LogTransformer.transform().""" def expected_df_1(): """Expected output of test_expected_output_1.""" df = d.create_df_3() df["a_new_col"] = np.log(df["a"]) df["b_new_col"] = np.log(df["b"]) df.drop(columns=["a", "b"], inplace=True) return df def expected_df_2(): """Expected output of test_expected_output_2.""" df = d.create_df_3() df["a_new_col"] = np.log(df["a"] + 1) df["b_new_col"] = np.log(df["b"] + 1) df.drop(columns=["a", "b"], inplace=True) return df def expected_df_3(): """Expected output of test_expected_output_3.""" df = d.create_df_3() df["a_new_col"] = np.log(df["a"]) df["b_new_col"] = np.log(df["b"]) return df def expected_df_4(): """Expected output of test_expected_output_4.""" df = d.create_df_3() df["a_new_col"] = np.log(df["a"] + 1) df["b_new_col"] = np.log(df["b"] + 1) return df def expected_df_5(): """Expected output of test_expected_output_5.""" df = d.create_df_4() df["a_new_col"] = np.log(df["a"] + 1) / np.log(5) return df def expected_df_6(): """Expected output of test_expected_output_6.""" df = d.create_df_4() df["a_new_col"] = np.log(df["a"]) / np.log(7) df.drop("a", axis=1, inplace=True) return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=LogTransformer.transform, expected_arguments=["self", "X"]) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_3() x = LogTransformer(columns=["a", "b"]) expected_call_args = {0: {"args": (d.create_df_3(), ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_3(), ): x.transform(df) def test_error_with_non_numeric_columns(self): """Test an exception is raised if transform is applied to non-numeric columns.""" df = d.create_df_5() x = LogTransformer(columns=["a", "b", "c"]) with pytest.raises( TypeError, match= r"The following columns are not numeric in X; \['b', 'c'\]"): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_1()) + h.index_preserved_params(d.create_df_3(), expected_df_1()), ) def test_expected_output_1(self, df, expected): """Test that transform is giving the expected output when not adding one and dropping original columns.""" x1 = LogTransformer(columns=["a", "b"], add_1=False, drop=True, suffix="new_col") df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg= "LogTransformer transform not adding 1 and dropping original columns", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_2()) + h.index_preserved_params(d.create_df_3(), expected_df_2()), ) def test_expected_output_2(self, df, expected): """Test that transform is giving the expected output when adding one and dropping original columns.""" x1 = LogTransformer(columns=["a", "b"], add_1=True, drop=True, suffix="new_col") df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg= "LogTransformer transform adding 1 and dropping original columns", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_3()) + h.index_preserved_params(d.create_df_3(), expected_df_3()), ) def test_expected_output_3(self, df, expected): """Test that transform is giving the expected output when not adding one and not dropping original columns.""" x1 = LogTransformer(columns=["a", "b"], add_1=False, drop=False, suffix="new_col") df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg= "LogTransformer transform not adding 1 and dropping original columns", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_4()) + h.index_preserved_params(d.create_df_3(), expected_df_4()), ) def test_expected_output_4(self, df, expected): """Test that transform is giving the expected output when adding one and not dropping original columns.""" x1 = LogTransformer(columns=["a", "b"], add_1=True, drop=False, suffix="new_col") df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg= "LogTransformer transform not adding 1 and dropping original columns", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_4(), expected_df_5()) + h.index_preserved_params(d.create_df_4(), expected_df_5()), ) def test_expected_output_5(self, df, expected): """Test that transform is giving the expected output when adding one and not dropping original columns and using base.""" x1 = LogTransformer(columns=["a"], base=5, add_1=True, drop=False, suffix="new_col") df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg= "LogTransformer transform not adding 1 and dropping original columns", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_4(), expected_df_6()) + h.index_preserved_params(d.create_df_4(), expected_df_6()), ) def test_expected_output_6(self, df, expected): """Test that transform is giving the expected output when not adding one and dropping original columns and using base.""" x1 = LogTransformer(columns=["a"], base=7, add_1=False, drop=True, suffix="new_col") df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg= "LogTransformer transform should be using base, not adding 1, and not dropping original columns", ) @pytest.mark.parametrize( "df, columns, add_1, extra_exception_text", ( [pd.DataFrame({"a": [1, 2, 0]}), ["a"], False, ""], [ pd.DataFrame({ "a": [1, 2, 0], "b": [1, 2, 3] }), ["a", "b"], False, "" ], [ pd.DataFrame({"a": [1, 2, -1]}), ["a"], True, r" \(after adding 1\)" ], [ pd.DataFrame({ "a": [1, 2, -1], "b": [1, 2, 3] }), ["a", "b"], True, r" \(after adding 1\)", ], [pd.DataFrame({"b": [1, 2, -0.001]}), ["b"], False, ""], [ pd.DataFrame({ "b": [1, 2, -0.001], "a": [1, 2, 3] }), ["a", "b"], False, "", ], [ pd.DataFrame({"b": [1, 2, -1.001]}), ["b"], True, r" \(after adding 1\)" ], [ pd.DataFrame({ "b": [1, 2, -1.001], "a": [1, 2, 3] }), ["a", "b"], True, r" \(after adding 1\)", ], ), ) def test_negative_values_raise_exception(self, df, columns, add_1, extra_exception_text): """Test that an exception is raised if negative values are passed in transform.""" x = LogTransformer(columns=columns, add_1=add_1, drop=True) with pytest.raises( ValueError, match= f"values less than or equal to 0 in columns{extra_exception_text}, make greater than 0 before using transform", ): x.transform(df)
class TestTransform(object): """Tests for CappingTransformer.transform().""" def expected_df_1(): """Expected output from test_expected_output_min_and_max.""" df = pd.DataFrame( { "a": [2, 2, 3, 4, 5, 5, np.NaN], "b": [1, 2, 3, np.NaN, 7, 7, 7], "c": [np.NaN, 1, 2, 3, 0, 0, 0], } ) return df def expected_df_2(): """Expected output from test_expected_output_max.""" df = pd.DataFrame( { "a": [2, 2, 3, 4, 5, 6, 7, np.NaN], "b": ["a", "b", "c", "d", "e", "f", "g", np.NaN], "c": ["a", "b", "c", "d", "e", "f", "g", np.NaN], } ) df["c"] = df["c"].astype("category") return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments( func=CappingTransformer.transform, expected_arguments=["self", "X"] ) def test_check_is_fitted_call_count(self, mocker): """Test there are 2 calls to BaseTransformer check_is_fitted in transform.""" df = d.create_df_3() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]}) with h.assert_function_call_count( mocker, tubular.base.BaseTransformer, "check_is_fitted", 2 ): x.transform(df) def test_check_is_fitted_call_1(self, mocker): """Test the first call to BaseTransformer check_is_fitted in transform.""" df = d.create_df_3() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]}) expected_call_args = { 0: {"args": (["capping_values"],), "kwargs": {}}, 1: {"args": (["_replacement_values"],), "kwargs": {}}, } with h.assert_function_call( mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args ): x.transform(df) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_3() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]}) expected_call_args = {0: {"args": (d.create_df_3(),), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_3(), ): x.transform(df) def test_learnt_values_not_modified(self): """Test that the replacements from fit are not changed in transform.""" capping_values_dict = {"a": [2, 5], "b": [-1, 8]} df = d.create_df_3() x = CappingTransformer(capping_values_dict) x.transform(df) h.test_object_attributes( obj=x, expected_attributes={"capping_values": capping_values_dict}, msg="Attributes for CappingTransformer set in init", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_1()) + h.index_preserved_params(d.create_df_3(), expected_df_1()), ) def test_expected_output_min_and_max_combinations(self, df, expected): """Test that capping is applied correctly in transform.""" x = CappingTransformer( capping_values={"a": [2, 5], "b": [None, 7], "c": [0, None]} ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="Unexpected values in CappingTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_4(), expected_df_2()) + h.index_preserved_params(d.create_df_4(), expected_df_2()), ) def test_non_cap_column_left_untouched(self, df, expected): """Test that capping is applied only to specific columns, others remain the same.""" x = CappingTransformer(capping_values={"a": [2, 10]}) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="Unexpected values in CappingTransformer.transform, with columns meant to not be transformed", ) def test_non_numeric_column_error(self): """Test that transform will raise an error if a column to transform is not numeric.""" df = d.create_df_5() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8], "c": [-1, 8]}) with pytest.raises( TypeError, match=r"The following columns are not numeric in X; \['b', 'c'\]" ): x.transform(df) def test_quantile_not_fit_error(self): """Test that transform will raise an error if quantiles are specified in init but fit is not run before calling transform.""" df = d.create_df_9() x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) with pytest.raises( ValueError, match="capping_values attribute is an empty dict - perhaps the fit method has not been run yet", ): x.transform(df) def test_replacement_values_dict_not_set_error(self): """Test that transform will raise an error if _replacement_values is an empty dict.""" df = d.create_df_9() x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) # manually set attribute to get past the capping_values attribute is an empty dict exception x.capping_values = {"a": [1, 4]} with pytest.raises( ValueError, match="_replacement_values attribute is an empty dict - perhaps the fit method has not been run yet", ): x.transform(df) def test_attributes_unchanged_from_transform(self): """Test that attributes are unchanged after transform is run.""" df = d.create_df_9() x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) x.fit(df) x2 = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) x2.fit(df) x2.transform(df) assert ( x.capping_values == x2.capping_values ), "capping_values attribute modified in transform" assert ( x._replacement_values == x2._replacement_values ), "_replacement_values attribute modified in transform" assert ( x.weights_column == x2.weights_column ), "weights_column attribute modified in transform" assert x.quantiles == x2.quantiles, "quantiles attribute modified in transform"