def test_expected_output(self, df, expected): """Test that transform then inverse_transform gets back to the original df.""" x = NominalToIntegerTransformer(columns=["a", "b"]) # set the mapping dict directly rather than fitting x on df so test works with helpers x.mappings = { "a": { 1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5 }, "b": { "a": 0, "b": 1, "c": 2, "d": 3, "e": 4, "f": 5 }, } df_transformed = x.transform(df) df_transformed_back = x.inverse_transform(df_transformed) h.assert_frame_equal_msg( actual=df_transformed_back, expected=expected, msg_tag="transform reverse does not get back to original", )
def test_expected_output(self, df, expected): """Test that the output is expected from transform.""" x = NominalToIntegerTransformer(columns=["a", "b"]) # set the mapping dict directly rather than fitting x on df so test works with helpers x.mappings = { "a": { 1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5 }, "b": { "a": 0, "b": 1, "c": 2, "d": 3, "e": 4, "f": 5 }, } df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in NominalToIntegerTransformer.transform", )
def test_non_cap_column_left_untouched(self, df, expected): """Test that capping is applied only to specific columns, others remain the same.""" x = CappingTransformer(capping_values={"a": [2, 10]}) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="Unexpected values in CappingTransformer.transform, with columns meant to not be transformed", )
def test_exception_no_print(): """Test an assert error is raised (with correct info) in case of exception coming from assert_frame_equal and print_actual_and_expected is False. """ df = pd.DataFrame({"a": [1, 2, 3]}) df2 = pd.DataFrame({"a": [1, 2, 4]}) with pytest.raises(AssertionError, match="a"): h.assert_frame_equal_msg( expected=df, actual=df2, msg_tag="a", print_actual_and_expected=False )
def test_expected_output(self, df, expected): """Test that transform is giving the expected output.""" mapping = {"a": {1: "aa", 2: "bb", 3: "cc", 4: "dd", 5: "ee", 6: "ff"}} x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="b") df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from cross column mapping transformer", )
def test_expected_output(self, df, expected): """Test that transform is giving the expected output.""" mapping = {"b": {"a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6}} x = CrossColumnMultiplyTransformer(mappings=mapping, adjust_column="a") df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from cross column multiply transformer", )
def test_non_specified_values_unchanged(self, df, expected): """Test that values not specified in mappings are left unchanged in transform.""" mapping = {"a": {1: 5, 2: 6, 3: 7}, "b": {"a": "z", "b": "y", "c": "x"}} x = MappingTransformer(mappings=mapping) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from mapping transformer", )
def test_expected_output_min_and_max_combinations(self, df, expected): """Test that capping is applied correctly in transform.""" x = CappingTransformer( capping_values={"a": [2, 5], "b": [None, 7], "c": [0, None]} ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="Unexpected values in CappingTransformer.transform", )
def test_non_specified_values_unchanged(self, df, expected): """Test that values not specified in mappings are left unchanged in transform.""" mapping = {"b": {"a": 1.1, "b": 1.2}} x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from cross column add transformer", )
def test_multiple_mappings_expected_output(self, df, expected): """Test that mappings by multiple columns are both applied in transform""" mapping = {"b": {"a": 1.1, "f": 1.2}, "c": {"a": 2, "e": 3}} x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from cross column add transformer", )
def test_expected_output_no_weight(self, df, expected): """Test that the output is expected from transform.""" x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2) # set the mappging dict directly rather than fitting x on df so test works with decorators x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a"]} df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="Unexpected values in GroupRareLevelsTransformer.transform", )
def test_expected_output_nulls(self, df, expected): """Test that the output is expected from transform, when columns are nulls.""" x = DateDiffLeapYearTransformer(column_lower="a", column_upper="b", new_column_name="c", drop_cols=False) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in DateDiffLeapYearTransformer.transform (nulls)", )
def test_multiple_mappings_ordered_dict(self, df, expected): """Test that mappings by multiple columns using an ordered dict gives the expected output in transform""" mapping = OrderedDict() mapping["a"] = {1: "aa", 2: "bb"} mapping["b"] = {"x": "cc", "z": "dd"} x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="c") df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from cross column mapping transformer", )
def test_expected_output_no_weight_single_row_na(self): """test output from a single row transform with np.NaN value remains the same, the type is perserved if using existing dataframe, so need to create a new dataframe""" one_row_df = pd.DataFrame({"b": [np.nan], "c": [np.NaN]}) x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2) # set the mappging dict directly rather than fitting x on df so test works with decorators x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a", np.NaN]} one_row_df_transformed = x.transform(one_row_df) h.assert_frame_equal_msg( actual=one_row_df_transformed, expected=one_row_df, msg_tag="Unexpected values in GroupRareLevelsTransformer.transform", )
def test_expected_output(self, df, expected): """Test that transform is giving the expected output.""" mapping = { "a": {1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f"}, "b": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6}, } x = MappingTransformer(mappings=mapping) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from mapping transformer", )
def test_base_nominal_transformer_transform_called(self, mocker): """Test that BaseNominalTransformer.transform called.""" df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") x.fit(df) mocker.patch( "tubular.nominal.BaseNominalTransformer.transform", return_value=d.create_df_1(), ) x.transform(df) assert ( tubular.nominal.BaseNominalTransformer.transform.call_count == 1 ), f"Not enough calls to BaseNominalTransformer.transform -\n Expected: 1\n Actual: {tubular.nominal.BaseNominalTransformer.transform.call_count}" call_args = tubular.nominal.BaseNominalTransformer.transform.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] h.assert_equal_dispatch( expected={}, actual=call_kwargs, msg="kwargs for BaseNominalTransformer.transform in OneHotEncodingTransformer.init", ) expected_pos_args = (x, d.create_df_1()) assert ( len(call_pos_args) == 2 ), f"Unepxected number of positional args in BaseNominalTransformer.transform call -\n Expected: 2\n Actual: {len(call_pos_args)}" h.assert_frame_equal_msg( expected=expected_pos_args[1], actual=call_pos_args[1], msg_tag="X positional arg in BaseNominalTransformer.transform call", ) assert ( expected_pos_args[0] == call_pos_args[0] ), "self positional arg in BaseNominalTransformer.transform call"
def test_expected_output(self, df_test, expected): """Test that OneHotEncodingTransformer.transform encodes the feature correctly. Also tests that OneHotEncodingTransformer.transform does not modify unrelated columns. """ # transformer is fit on the whole dataset separately from the input df to work with the decorators df_train = d.create_df_7() x = OneHotEncodingTransformer(columns="b") x.fit(df_train) df_transformed = x.transform(df_test) h.assert_frame_equal_msg( expected=expected, actual=df_transformed, msg_tag="Unspecified columns changed in transform", )
def test_expected_output_no_overwrite(self, df, expected): """Test a single column output from transform gives expected results, when not overwriting the original column.""" x = SeriesDtMethodTransformer( new_column_name="a_year", pd_method_name="year", column="a", pd_method_kwargs={}, ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in SeriesDtMethodTransformer.transform with find, not overwriting original column", )
def test_expected_output_callable(self, df, expected): """Test transform gives expected results, when pd_method_name is a callable.""" x = SeriesDtMethodTransformer( new_column_name="b_new", pd_method_name="to_period", column="b", pd_method_kwargs={"freq": "M"}, ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in SeriesDtMethodTransformer.transform with to_period", )
def test_expected_output_overwrite(self, df, expected): """Test a single column output from transform gives expected results, when overwriting the original column.""" x = SeriesStrMethodTransformer( new_column_name="b", pd_method_name="pad", columns=["b"], pd_method_kwargs={"width": 10}, ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in SeriesStrMethodTransformer.transform with pad, overwriting original column", )
def test_expected_output_nulls(self, df, expected): """Test that the output is expected from transform, when columns are nulls.""" x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="Y", units="Y", copy=True, verbose=False, ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in DateDifferenceTransformer.transform (nulls)", )
def test_expected_output_drop_cols_false(self, df, expected): """Test that the output is expected from transform, when drop_cols is False. This tests positive year gaps , negative year gaps, and missing values. """ x = DateDiffLeapYearTransformer(column_lower="a", column_upper="b", new_column_name="c", drop_cols=False) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in DateDiffLeapYearTransformer.transform (without drop_cols)", )
def test_one_hot_encoder_transform_called(self, mocker): """Test that OneHotEncoder.transform called.""" df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") x.fit(df) mocker.patch("sklearn.preprocessing.OneHotEncoder.transform") x.transform(df) assert ( sklearn.preprocessing.OneHotEncoder.transform.call_count == 1 ), f"Not enough calls to OneHotEncoder.transform -\n Expected: 1\n Actual: {sklearn.preprocessing.OneHotEncoder.transform.call_count}" call_args = sklearn.preprocessing.OneHotEncoder.transform.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] h.assert_equal_dispatch( expected={}, actual=call_kwargs, msg="kwargs for OneHotEncodingTransformer.transform in BaseTransformer.init", ) assert ( len(call_pos_args) == 2 ), f"Unepxected number of positional args in OneHotEncodingTransformer.transform call -\n Expected: 2\n Actual: {len(call_pos_args)}" assert ( call_pos_args[0] is x ), f"Unexpected positional arg (self, index 1) in OneHotEncodingTransformer.transform call -\n Expected: self\n Actual: {call_pos_args[0]}" h.assert_frame_equal_msg( expected=d.create_df_1()[["b"]], actual=call_pos_args[1], msg_tag="X positional arg in OneHotEncodingTransformer.transform call", )
def test_pandas_assert_frame_called(mocker): """Test the call to pandas.testing.assert_frame_equal.""" df = pd.DataFrame({"a": [1, 2, 3]}) df2 = pd.DataFrame({"a": [1, 2, 3]}) spy = mocker.spy(pandas.testing, "assert_frame_equal") h.assert_frame_equal_msg(expected=df, actual=df2, msg_tag="a", check_dtype=True) assert ( spy.call_count == 1 ), f"Unexpected number of call to pd.testing.assert_frame_equal -\n Expected: 1\n Actual: {spy.call_count}" call_1_args = spy.call_args_list[0] call_1_pos_args = call_1_args[0] call_1_kwargs = call_1_args[1] call_1_expected_kwargs = {"check_dtype": True} call_1_expected_pos_args = (df, df2) assert len(call_1_expected_kwargs.keys()) == len( call_1_kwargs.keys() ), f"Unexpected number of kwargs -\n Expected: {len(call_1_expected_kwargs.keys())}\n Actual: {len(call_1_kwargs.keys())}" assert ( call_1_expected_kwargs["check_dtype"] == call_1_kwargs["check_dtype"] ), f"""check_dtype kwarg unexpected -\n Expected {call_1_expected_kwargs['check_dtype']}\n Actual: {call_1_kwargs['check_dtype']}""" assert len(call_1_expected_pos_args) == len( call_1_pos_args ), f"Unexpected number of kwargs -\n Expected: {len(call_1_expected_pos_args)}\n Actual: {len(call_1_pos_args)}" pd.testing.assert_frame_equal(call_1_expected_pos_args[0], call_1_pos_args[0]) pd.testing.assert_frame_equal(call_1_expected_pos_args[1], call_1_pos_args[1])
def test_expected_output(self, df, expected): """Test that the output is expected from transform.""" x = OrdinalEncoderTransformer(response_column="a", columns=["b", "d", "f"]) # set the impute values dict directly rather than fitting x on df so test works with helpers x.mappings = { "b": { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6 }, "d": { 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6 }, "f": { False: 1, True: 2 }, } df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="Unexpected values in OrdinalEncoderTransformer.transform", )