def test_super_fit_call(self, mocker): """Test the call to CappingTransformer.fit.""" spy = mocker.spy(tubular.capping.CappingTransformer, "fit") df = d.create_df_9() x = OutOfRangeNullTransformer(quantiles={ "a": [0.1, 1], "b": [0.5, None] }, weights_column="c") x.fit(df) assert (spy.call_count == 1 ), "unexpected number of calls to CappingTransformer.fit" call_args = spy.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] assert call_pos_args == ( x, ), "unexpected positional args in CappingTransformer.fit call" expected_kwargs = {"X": d.create_df_9(), "y": None} h.assert_equal_dispatch( expected=expected_kwargs, actual=call_kwargs, msg="unexpected kwargs in CappingTransformer.fit call", )
def test_pd_cut_call(self, mocker): """Test the call to pd.cut is as expected.""" df = d.create_df_9() x = CutTransformer( column="a", new_column_name="a_cut", cut_kwargs={ "bins": 3, "right": False, "precision": 2 }, ) expected_call_args = { 0: { "args": (d.create_df_9()["a"], ), "kwargs": { "bins": 3, "right": False, "precision": 2 }, } } with h.assert_function_call(mocker, pandas, "cut", expected_call_args, return_value=[1, 2, 3, 4, 5, 6]): x.transform(df)
def test_prepare_quantiles_call_weight(self, mocker): """Test the call to prepare_quantiles if weights_column is set.""" df = d.create_df_9() x = CappingTransformer( quantiles={"a": [0.1, 1], "b": [0.5, None]}, weights_column="c" ) expected_call_args = { 0: { "args": (d.create_df_9()["a"], [0.1, 1], d.create_df_9()["c"]), "kwargs": {}, }, 1: { "args": (d.create_df_9()["b"], [0.5, None], d.create_df_9()["c"]), "kwargs": {}, }, } with h.assert_function_call( mocker, tubular.capping.CappingTransformer, "prepare_quantiles", expected_call_args, ): x.fit(df)
class TestTransform(object): """Tests for NullIndicator.transform()""" def expected_df_1(): """Expected output for test_null_indicator_columns_correct.""" df = pd.DataFrame({ "a": [1, 2, np.nan, 4, np.nan, 6], "b": [np.nan, 5, 4, 3, 2, 1], "c": [3, 2, 1, 4, 5, 6], "b_nulls": [1, 0, 0, 0, 0, 0], "c_nulls": [0, 0, 0, 0, 0, 0], }) df[["b_nulls", "c_nulls"]] = df[["b_nulls", "c_nulls"]].astype("int32") return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=NullIndicator.transform, expected_arguments=["self", "X"]) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_1() x = NullIndicator(columns="a") expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_9(), expected_df_1()) + h.index_preserved_params(d.create_df_9(), expected_df_1()), ) def test_null_indicator_columns_correct(self, df, expected): """Test that the created indicator column is correct - and unrelated columns are unchanged""" x = NullIndicator(columns=["b", "c"]) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Check null indicator columns created correctly in transform.", )
def test_prepare_quantiles_output_set_attributes(self, mocker, weights_column): """Test the output of prepare_quantiles is set to capping_values and_replacement_values attributes.""" df = d.create_df_9() x = CappingTransformer( quantiles={"a": [0.1, 1], "b": [0.5, None]}, weights_column=weights_column ) mocked_return_values = [["aaaa", "bbbb"], [1234, None]] mocker.patch( "tubular.capping.CappingTransformer.prepare_quantiles", side_effect=mocked_return_values, ) x.fit(df) h.test_object_attributes( obj=x, expected_attributes={ "capping_values": { "a": mocked_return_values[0], "b": mocked_return_values[1], }, "_replacement_values": { "a": mocked_return_values[0], "b": mocked_return_values[1], }, }, msg="weighted_quantile output set to capping_values, _replacement_values attributes", )
def test_super_fit_call(self, mocker): """Test the call to BaseTransformer.fit.""" df = d.create_df_9() x = CappingTransformer( quantiles={"a": [0.1, 1], "b": [0.5, None]}, weights_column="c" ) expected_call_args = {0: {"args": (d.create_df_9(), None), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "fit", expected_call_args ): x.fit(df)
def test_attributes_unchanged_from_transform(self): """Test that attributes are unchanged after transform is run.""" df = d.create_df_9() x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) x.fit(df) x2 = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) x2.fit(df) x2.transform(df) assert ( x.capping_values == x2.capping_values ), "capping_values attribute modified in transform" assert ( x._replacement_values == x2._replacement_values ), "_replacement_values attribute modified in transform" assert ( x.weights_column == x2.weights_column ), "weights_column attribute modified in transform" assert x.quantiles == x2.quantiles, "quantiles attribute modified in transform"
def expected_df_1(): """Expected output for test_expected_output.""" df = d.create_df_9() df["d"] = pd.Series(["c", "b", "a", "d", "e", "f"], dtype="category") return df
def test_super_transform_call(self, mocker): """Test the call to BaseTransformer.transform is as expected.""" df = d.create_df_9() x = CutTransformer(column="a", new_column_name="Y", cut_kwargs={"bins": 3}) expected_call_args = {0: {"args": (d.create_df_9(), ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_9(), ): x.transform(df)
def test_quantile_not_fit_error(self): """Test that transform will raise an error if quantiles are specified in init but fit is not run before calling transform.""" df = d.create_df_9() x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) with pytest.raises( ValueError, match="capping_values attribute is an empty dict - perhaps the fit method has not been run yet", ): x.transform(df)
def test_fit_returns_self(self): """Test fit returns self?""" df = d.create_df_9() x = OutOfRangeNullTransformer(quantiles={ "a": [0.1, 1], "b": [0.5, None] }, weights_column="c") x_fitted = x.fit(df) assert ( x_fitted is x ), "Returned value from OutOfRangeNullTransformer.fit not as expected."
def test_replacement_values_dict_not_set_error(self): """Test that transform will raise an error if _replacement_values is an empty dict.""" df = d.create_df_9() x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) # manually set attribute to get past the capping_values attribute is an empty dict exception x.capping_values = {"a": [1, 4]} with pytest.raises( ValueError, match="_replacement_values attribute is an empty dict - perhaps the fit method has not been run yet", ): x.transform(df)
def test_output_from_cut_assigned_to_column(self, mocker): """Test that the output from pd.cut is assigned to column with name new_column_name.""" df = d.create_df_9() x = CutTransformer(column="c", new_column_name="c_new", cut_kwargs={"bins": 2}) cut_output = [1, 2, 3, 4, 5, 6] mocker.patch("pandas.cut", return_value=cut_output) df_transformed = x.transform(df) assert (df_transformed["c_new"].tolist() == cut_output ), "unexpected values assigned to c_new column"
def test_quantile_combinations_handled(self, quantiles, weights_column): """Test that a given combination of None and non-None quantile values can be calculated successfully.""" df = d.create_df_9() x = CappingTransformer( quantiles={"a": quantiles}, weights_column=weights_column ) try: x.fit(df) except Exception as err: pytest.fail( f"unexpected exception when calling fit with quantiles {quantiles} - {err}" )
def test_set_replacement_values_called(self, mocker): """Test that init calls OutOfRangeNullTransformer.set_replacement_values during fit.""" df = d.create_df_9() x = OutOfRangeNullTransformer(quantiles={ "a": [0.1, 1], "b": [0.5, None] }, weights_column="c") expected_call_args = {0: {"args": (), "kwargs": {}}} with h.assert_function_call( mocker, tubular.capping.OutOfRangeNullTransformer, "set_replacement_values", expected_call_args, ): x.fit(df)
class TestTransform(object): """Tests for CutTransformer.transform().""" def expected_df_1(): """Expected output for test_expected_output.""" df = d.create_df_9() df["d"] = pd.Series(["c", "b", "a", "d", "e", "f"], dtype="category") return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=CutTransformer.transform, expected_arguments=["self", "X"]) def test_super_transform_call(self, mocker): """Test the call to BaseTransformer.transform is as expected.""" df = d.create_df_9() x = CutTransformer(column="a", new_column_name="Y", cut_kwargs={"bins": 3}) expected_call_args = {0: {"args": (d.create_df_9(), ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_9(), ): x.transform(df) def test_pd_cut_call(self, mocker): """Test the call to pd.cut is as expected.""" df = d.create_df_9() x = CutTransformer( column="a", new_column_name="a_cut", cut_kwargs={ "bins": 3, "right": False, "precision": 2 }, ) expected_call_args = { 0: { "args": (d.create_df_9()["a"], ), "kwargs": { "bins": 3, "right": False, "precision": 2 }, } } with h.assert_function_call(mocker, pandas, "cut", expected_call_args, return_value=[1, 2, 3, 4, 5, 6]): x.transform(df) def test_output_from_cut_assigned_to_column(self, mocker): """Test that the output from pd.cut is assigned to column with name new_column_name.""" df = d.create_df_9() x = CutTransformer(column="c", new_column_name="c_new", cut_kwargs={"bins": 2}) cut_output = [1, 2, 3, 4, 5, 6] mocker.patch("pandas.cut", return_value=cut_output) df_transformed = x.transform(df) assert (df_transformed["c_new"].tolist() == cut_output ), "unexpected values assigned to c_new column" @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_9(), expected_df_1()) + h.index_preserved_params(d.create_df_9(), expected_df_1()), ) def test_expected_output(self, df, expected): """Test input data is transformed as expected.""" cut_1 = CutTransformer( column="c", new_column_name="d", cut_kwargs={ "bins": [0, 1, 2, 3, 4, 5, 6], "ordered": False, "labels": ["a", "b", "c", "d", "e", "f"], }, ) df_transformed = cut_1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="CutTransformer.transform output", ) def test_non_numeric_column_error(self): """Test that an exception is raised if the column to discretise is not numeric.""" df = d.create_df_8() x = CutTransformer(column="b", new_column_name="d") with pytest.raises(TypeError, match="b should be a numeric dtype but got object"): x.transform(df)
class TestPrepareQuantiles(object): """Tests for the CappingTransformer.prepare_quantiles method.""" def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments( func=CappingTransformer.prepare_quantiles, expected_arguments=["self", "values", "quantiles", "sample_weight"], expected_default_values=(None,), ) @pytest.mark.parametrize( "values, quantiles, sample_weight, expected_quantiles", [ (d.create_df_9()["a"], [0.1, 0.6], d.create_df_9()["c"], [0.1, 0.6]), (d.create_df_9()["b"], [0.1, None], d.create_df_9()["c"], [0.1]), (d.create_df_9()["a"], [None, 0.6], d.create_df_9()["c"], [0.6]), (d.create_df_9()["b"], [0.1, 0.6], None, [0.1, 0.6]), (d.create_df_9()["a"], [0.1, None], None, [0.1]), (d.create_df_9()["b"], [None, 0.6], None, [0.6]), ], ) def test_weighted_quantile_call( self, mocker, values, quantiles, sample_weight, expected_quantiles ): """Test the call to weighted_quantile, inlcuding the filtering out of None values.""" x = CappingTransformer(quantiles={"a": [0.1, 1], "b": [0.5, None]}) mocked = mocker.patch("tubular.capping.CappingTransformer.weighted_quantile") x.prepare_quantiles(values, quantiles, sample_weight) assert ( mocked.call_count == 1 ), f"unexpected number of calls to weighted_quantile, expecting 1 but got {mocked.call_count}" call_args = mocked.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] expected_pos_args = (values, expected_quantiles, sample_weight) assert ( call_pos_args == expected_pos_args ), f"unexpected positional args in call to weighted_quantile, expecting {expected_pos_args} but got {call_pos_args}" assert ( call_kwargs == {} ), f"unexpected kwargs in call to weighted_quantile, expecting None but got {call_kwargs}" @pytest.mark.parametrize( "values, quantiles, sample_weight, expected_results", [ (d.create_df_9()["a"], [0.1, 0.6], d.create_df_9()["c"], ["aaaa"]), (d.create_df_9()["b"], [0.1, None], d.create_df_9()["c"], ["aaaa", None]), (d.create_df_9()["a"], [None, 0.6], d.create_df_9()["c"], [None, "aaaa"]), (d.create_df_9()["b"], [0.1, 0.6], None, ["aaaa"]), (d.create_df_9()["a"], [0.1, None], None, ["aaaa", None]), (d.create_df_9()["b"], [None, 0.6], None, [None, "aaaa"]), ], ) def test_output_from_weighted_quantile_returned( self, mocker, values, quantiles, sample_weight, expected_results ): """Test the output from weighted_quantile is returned from the function, inlcuding None values added back in.""" x = CappingTransformer(quantiles={"a": [0.1, 1], "b": [0.5, None]}) mocker.patch( "tubular.capping.CappingTransformer.weighted_quantile", return_value=["aaaa"], ) results = x.prepare_quantiles(values, quantiles, sample_weight) assert ( results == expected_results ), f"unexpected value returned from prepare_quantiles, expecting {results} but got {expected_results}"