def test_fit_not_changing_data(self): """Test fit does not change X.""" df = d.create_df_5() x = GroupRareLevelsTransformer(columns=["a", "b", "c"]) x.fit(df) h.assert_equal_dispatch( expected=d.create_df_5(), actual=df, msg="Check X not changing during fit", )
def test_super_fit_called(self, mocker): """Test that fit calls BaseTransformer.fit.""" df = d.create_df_5() x = GroupRareLevelsTransformer(columns=["a", "b", "c"]) expected_call_args = { 0: { "args": (d.create_df_5(), None), "kwargs": {} } } with h.assert_function_call(mocker, tubular.base.BaseTransformer, "fit", expected_call_args): x.fit(df)
def test_weight_column_not_in_X_error(self): """Test that an exception is raised if weight is not in X.""" df = d.create_df_5() x = GroupRareLevelsTransformer(columns=["a", "b", "c"], weight="aaaa") with pytest.raises(ValueError, match="weight aaaa not in X"): x.fit(df)
def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_5() x = GroupRareLevelsTransformer(columns=["a", "b", "c"]) x.fit(df) expected_call_args = {0: {"args": (d.create_df_5(), ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_5(), ): x.transform(df)
def test_fit_returns_self(self): """Test fit returns self?""" df = d.create_df_5() x = GroupRareLevelsTransformer(columns=["a", "b", "c"]) x_fitted = x.fit(df) assert ( x_fitted is x ), "Returned value from GroupRareLevelsTransformer.fit not as expected."
def test_non_numeric_column_error(self): """Test that transform will raise an error if a column to transform is not numeric.""" df = d.create_df_5() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8], "c": [-1, 8]}) with pytest.raises( TypeError, match=r"The following columns are not numeric in X; \['b', 'c'\]" ): x.transform(df)
def test_error_with_non_numeric_columns(self): """Test an exception is raised if transform is applied to non-numeric columns.""" df = d.create_df_5() x = LogTransformer(columns=["a", "b", "c"]) with pytest.raises( TypeError, match= r"The following columns are not numeric in X; \['b', 'c'\]"): x.transform(df)
def test_check_is_fitted_called(self, mocker): """Test that BaseTransformer check_is_fitted called.""" df = d.create_df_5() x = GroupRareLevelsTransformer(columns=["a", "b", "c"]) x.fit(df) expected_call_args = {0: {"args": (["mapping_"], ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args): x.transform(df)
def test_learnt_values_no_weight(self): """Test that the impute values learnt during fit, without using a weight, are expected.""" df = d.create_df_5() x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2) x.fit(df) h.test_object_attributes( obj=x, expected_attributes={ "mapping_": { "b": ["a", np.NaN], "c": ["a", "c", "e"] } }, msg="mapping_ attribute", )
def test_learnt_values_not_modified(self): """Test that the mapping_ from fit are not changed in transform.""" df = d.create_df_5() x = GroupRareLevelsTransformer(columns=["a", "b", "c"]) x.fit(df) x2 = GroupRareLevelsTransformer(columns=["a", "b", "c"]) x2.fit(df) x2.transform(df) h.assert_equal_dispatch( expected=x.mapping_, actual=x2.mapping_, msg="Non rare levels not changed in transform", )
class TestTransform(object): """Tests for the transform method on CrossColumnAddTransformer.""" def expected_df_1(): """Expected output from test_expected_output.""" df = pd.DataFrame({ "a": [2.1, 3.2, 4.3, 5.4, 6.5, 7.6], "b": ["a", "b", "c", "d", "e", "f"] }) return df def expected_df_2(): """Expected output from test_non_specified_values_unchanged.""" df = pd.DataFrame({ "a": [2.1, 3.2, 3, 4, 5, 6], "b": ["a", "b", "c", "d", "e", "f"] }) return df def expected_df_3(): """Expected output from test_multiple_mappings_expected_output.""" df = pd.DataFrame({ "a": [4.1, 5.1, 4.1, 4, 8, 10.2, 7, 8, 9, np.NaN], "b": ["a", "a", "a", "d", "e", "f", "g", np.NaN, np.NaN, np.NaN], "c": ["a", "a", "c", "c", "e", "e", "f", "g", "h", np.NaN], }) df["c"] = df["c"].astype("category") return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments( func=CrossColumnAddTransformer.transform, expected_arguments=["self", "X"], expected_default_values=None, ) def test_check_is_fitted_call(self, mocker): """Test the call to check_is_fitted.""" df = d.create_df_1() mapping = { "b": { "a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6 } } x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") expected_call_args = {0: {"args": (["adjust_column"], ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args): x.transform(df) def test_super_transform_call(self, mocker): """Test the call to BaseMappingTransformer.transform.""" df = d.create_df_1() mapping = { "b": { "a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6 } } x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_1(), ): x.transform(df) def test_adjust_col_not_in_x_error(self): """Test that an exception is raised if the adjust_column is not present in the dataframe.""" df = d.create_df_1() mapping = { "b": { "a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6 } } x = CrossColumnAddTransformer(mappings=mapping, adjust_column="c") with pytest.raises(ValueError, match="variable c is not in X"): x.transform(df) def test_adjust_col_not_numeric_error(self): """Test that an exception is raised if the adjust_column is not numeric.""" df = d.create_df_2() mapping = { "b": { "a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6 } } x = CrossColumnAddTransformer(mappings=mapping, adjust_column="c") with pytest.raises(TypeError, match="variable c must have numeric dtype."): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_1(), expected_df_1()) + h.index_preserved_params(d.create_df_1(), expected_df_1()), ) def test_expected_output(self, df, expected): """Test that transform is giving the expected output.""" mapping = { "b": { "a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6 } } x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from cross column add transformer", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_1(), expected_df_2()) + h.index_preserved_params(d.create_df_1(), expected_df_2()), ) def test_non_specified_values_unchanged(self, df, expected): """Test that values not specified in mappings are left unchanged in transform.""" mapping = {"b": {"a": 1.1, "b": 1.2}} x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from cross column add transformer", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_5(), expected_df_3()) + h.index_preserved_params(d.create_df_5(), expected_df_3()), ) def test_multiple_mappings_expected_output(self, df, expected): """Test that mappings by multiple columns are both applied in transform""" mapping = {"b": {"a": 1.1, "f": 1.2}, "c": {"a": 2, "e": 3}} x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from cross column add transformer", ) def test_mappings_unchanged(self): """Test that mappings is unchanged in transform.""" df = d.create_df_1() mapping = { "b": { "a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6 } } x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") x.transform(df) h.assert_equal_dispatch( expected=mapping, actual=x.mappings, msg= "CrossColumnAddTransformer.transform has changed self.mappings unexpectedly", )
class TestTransform(object): """Tests for GroupRareLevelsTransformer.transform().""" def expected_df_1(): """Expected output for test_expected_output_no_weight.""" df = pd.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, np.NaN]}) df["b"] = pd.Series([ "a", "a", "a", "rare", "rare", "rare", "rare", np.NaN, np.NaN, np.NaN ]) df["c"] = pd.Series( ["a", "a", "c", "c", "e", "e", "rare", "rare", "rare", "rare"], dtype=pd.CategoricalDtype( categories=["a", "c", "e", "f", "g", "h", "rare"], ordered=False), ) return df def expected_df_2(): """Expected output for test_expected_output_weight.""" df = pd.DataFrame({ "a": [2, 2, 2, 2, np.NaN, 2, 2, 2, 3, 3], "b": ["a", "a", "a", "d", "e", "f", "g", np.NaN, np.NaN, np.NaN], "c": ["a", "b", "c", "d", "f", "f", "f", "g", "g", np.NaN], }) df["c"] = df["c"].astype("category") df["b"] = pd.Series([ "a", "a", "a", "rare", "rare", "rare", "rare", np.NaN, np.NaN, np.NaN ]) return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=GroupRareLevelsTransformer.transform, expected_arguments=["self", "X"]) def test_check_is_fitted_called(self, mocker): """Test that BaseTransformer check_is_fitted called.""" df = d.create_df_5() x = GroupRareLevelsTransformer(columns=["a", "b", "c"]) x.fit(df) expected_call_args = {0: {"args": (["mapping_"], ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args): x.transform(df) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_5() x = GroupRareLevelsTransformer(columns=["a", "b", "c"]) x.fit(df) expected_call_args = {0: {"args": (d.create_df_5(), ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_5(), ): x.transform(df) def test_learnt_values_not_modified(self): """Test that the mapping_ from fit are not changed in transform.""" df = d.create_df_5() x = GroupRareLevelsTransformer(columns=["a", "b", "c"]) x.fit(df) x2 = GroupRareLevelsTransformer(columns=["a", "b", "c"]) x2.fit(df) x2.transform(df) h.assert_equal_dispatch( expected=x.mapping_, actual=x2.mapping_, msg="Non rare levels not changed in transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_5(), expected_df_1()) + h.index_preserved_params(d.create_df_5(), expected_df_1()), ) def test_expected_output_no_weight(self, df, expected): """Test that the output is expected from transform.""" x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2) # set the mappging dict directly rather than fitting x on df so test works with decorators x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a"]} df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="Unexpected values in GroupRareLevelsTransformer.transform", ) def test_expected_output_no_weight_single_row_na(self): """test output from a single row transform with np.NaN value remains the same, the type is perserved if using existing dataframe, so need to create a new dataframe""" one_row_df = pd.DataFrame({"b": [np.nan], "c": [np.NaN]}) x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2) # set the mappging dict directly rather than fitting x on df so test works with decorators x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a", np.NaN]} one_row_df_transformed = x.transform(one_row_df) h.assert_frame_equal_msg( actual=one_row_df_transformed, expected=one_row_df, msg_tag="Unexpected values in GroupRareLevelsTransformer.transform", ) def test_expected_output_no_weight_single_row_na_category_column(self): """test output from a single row transform with np.NaN value remains the same, when column is type category, the type is perserved if using existing dataframe, so need to create a new dataframe""" one_row_df = pd.DataFrame({"b": [np.nan], "c": [np.NaN]}) one_row_df["c"] = one_row_df["c"].astype("category") # add rare as a category in dataframe one_row_df["c"].cat.add_categories("rare", inplace=True) x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2) # set the mappging dict directly rather than fitting x on df so test works with decorators x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a", np.NaN]} one_row_df_transformed = x.transform(one_row_df) h.assert_frame_equal_msg( actual=one_row_df_transformed, expected=one_row_df, msg_tag="Unexpected values in GroupRareLevelsTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_6(), expected_df_2()) + h.index_preserved_params(d.create_df_6(), expected_df_2()), ) def test_expected_output_weight(self, df, expected): """Test that the output is expected from transform, when weights are used.""" x = GroupRareLevelsTransformer(columns=["b"], cut_off_percent=0.3, weight="a") # set the mappging dict directly rather than fitting x on df so test works with decorators x.mapping_ = {"b": ["a", np.NaN]} df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in GroupRareLevelsTransformer.transform (with weights)", )