def it_can_apply_ordinal_encoder( self, request, columns, derived_columns, expected_new_columns, ): df = DataFrameMock.df_generic(sample_size=100) get_df_from_csv_ = function_mock(request, "trousse.dataset.get_df_from_csv") get_df_from_csv_.return_value = df dataset = Dataset(data_file="fake/path0") sk_fit_transform_ = method_mock(request, sk_preproc.OrdinalEncoder, "fit_transform") sk_fit_transform_.return_value = pd.Series(range(100)) ordinal_encoder = fop.OrdinalEncoder( columns=columns, derived_columns=derived_columns, ) encoded_dataset = ordinal_encoder._apply(dataset) assert encoded_dataset is not None assert encoded_dataset is not dataset assert isinstance(encoded_dataset, Dataset) for col in expected_new_columns: assert col in encoded_dataset.data.columns get_df_from_csv_.assert_called_once_with("fake/path0") assert len(sk_fit_transform_.call_args_list) == len(columns) pd.testing.assert_frame_equal( sk_fit_transform_.call_args_list[0][0][1], df[[columns[0]]])
def it_knows_its_encoder(self): ordinal_encoder = fop.OrdinalEncoder(columns=["col0"], derived_columns=["col1"]) encoder_attr = ordinal_encoder.encoder assert isinstance(encoder_attr, sk_preproc.OrdinalEncoder)
def test_ordinal_encoder(csv, columns, derived_columns, expected_csv): dataset = Dataset(data_file=csv) expected_df = load_expectation(expected_csv, type_="csv") ordinal_encoder = fop.OrdinalEncoder(columns=columns, derived_columns=derived_columns) encoded_dataset = ordinal_encoder(dataset) pd.testing.assert_frame_equal(encoded_dataset.data, expected_df)
def it_knows_if_equal(self, other, expected_equal): feat_op = fop.OrdinalEncoder( columns=["exam_num_col_0"], derived_columns=["encoded_exam_num_col_0"], ) equal = feat_op == other assert type(equal) == bool assert equal == expected_equal
def it_construct_from_args(self, request): _init_ = initializer_mock(request, fop.OrdinalEncoder) ordinal_encoder = fop.OrdinalEncoder(columns=["col0"], derived_columns=["col1"]) _init_.assert_called_once_with( ANY, columns=["col0"], derived_columns=["col1"], ) assert isinstance(ordinal_encoder, fop.OrdinalEncoder)
def and_it_validates_its_arguments(self, request): validate_columns_ = method_mock(request, fop.OrdinalEncoder, "_validate_single_element_columns") validate_derived_columns_ = method_mock( request, fop.OrdinalEncoder, "_validate_single_element_derived_columns") ordinal_encoder = fop.OrdinalEncoder(columns=["col0"], derived_columns=["col1"]) validate_columns_.assert_called_once_with(ordinal_encoder, ["col0"]) validate_derived_columns_.assert_called_once_with( ordinal_encoder, ["col1"])
def it_can_encode_with_template_call(self, request): _apply_ = method_mock(request, fop.OrdinalEncoder, "_apply") track_history_ = method_mock(request, Dataset, "track_history") df = DataFrameMock.df_generic(sample_size=100) get_df_from_csv_ = function_mock(request, "trousse.dataset.get_df_from_csv") get_df_from_csv_.return_value = df dataset_in = Dataset(data_file="fake/path0") dataset_out = Dataset(data_file="fake/path0") _apply_.return_value = dataset_out ordinal_encoder = fop.OrdinalEncoder( columns=["exam_num_col_0"], derived_columns=["exam_str_col_0"], ) replaced_dataset = ordinal_encoder(dataset_in) _apply_.assert_called_once_with(ordinal_encoder, dataset_in) track_history_.assert_called_once_with(replaced_dataset, ordinal_encoder) assert replaced_dataset is dataset_out
class DescribeOrdinalEncoder: def it_construct_from_args(self, request): _init_ = initializer_mock(request, fop.OrdinalEncoder) ordinal_encoder = fop.OrdinalEncoder(columns=["col0"], derived_columns=["col1"]) _init_.assert_called_once_with( ANY, columns=["col0"], derived_columns=["col1"], ) assert isinstance(ordinal_encoder, fop.OrdinalEncoder) def and_it_validates_its_arguments(self, request): validate_columns_ = method_mock(request, fop.OrdinalEncoder, "_validate_single_element_columns") validate_derived_columns_ = method_mock( request, fop.OrdinalEncoder, "_validate_single_element_derived_columns") ordinal_encoder = fop.OrdinalEncoder(columns=["col0"], derived_columns=["col1"]) validate_columns_.assert_called_once_with(ordinal_encoder, ["col0"]) validate_derived_columns_.assert_called_once_with( ordinal_encoder, ["col1"]) def it_knows_its_encoder(self): ordinal_encoder = fop.OrdinalEncoder(columns=["col0"], derived_columns=["col1"]) encoder_attr = ordinal_encoder.encoder assert isinstance(encoder_attr, sk_preproc.OrdinalEncoder) @pytest.mark.parametrize( "columns, derived_columns, expected_new_columns", [ ( ["exam_str_col_0"], ["col1"], ["col1"], ), ( ["exam_str_col_0"], None, [], ), ], ) def it_can_apply_ordinal_encoder( self, request, columns, derived_columns, expected_new_columns, ): df = DataFrameMock.df_generic(sample_size=100) get_df_from_csv_ = function_mock(request, "trousse.dataset.get_df_from_csv") get_df_from_csv_.return_value = df dataset = Dataset(data_file="fake/path0") sk_fit_transform_ = method_mock(request, sk_preproc.OrdinalEncoder, "fit_transform") sk_fit_transform_.return_value = pd.Series(range(100)) ordinal_encoder = fop.OrdinalEncoder( columns=columns, derived_columns=derived_columns, ) encoded_dataset = ordinal_encoder._apply(dataset) assert encoded_dataset is not None assert encoded_dataset is not dataset assert isinstance(encoded_dataset, Dataset) for col in expected_new_columns: assert col in encoded_dataset.data.columns get_df_from_csv_.assert_called_once_with("fake/path0") assert len(sk_fit_transform_.call_args_list) == len(columns) pd.testing.assert_frame_equal( sk_fit_transform_.call_args_list[0][0][1], df[[columns[0]]]) def it_can_encode_with_template_call(self, request): _apply_ = method_mock(request, fop.OrdinalEncoder, "_apply") track_history_ = method_mock(request, Dataset, "track_history") df = DataFrameMock.df_generic(sample_size=100) get_df_from_csv_ = function_mock(request, "trousse.dataset.get_df_from_csv") get_df_from_csv_.return_value = df dataset_in = Dataset(data_file="fake/path0") dataset_out = Dataset(data_file="fake/path0") _apply_.return_value = dataset_out ordinal_encoder = fop.OrdinalEncoder( columns=["exam_num_col_0"], derived_columns=["exam_str_col_0"], ) replaced_dataset = ordinal_encoder(dataset_in) _apply_.assert_called_once_with(ordinal_encoder, dataset_in) track_history_.assert_called_once_with(replaced_dataset, ordinal_encoder) assert replaced_dataset is dataset_out @pytest.mark.parametrize( "other, expected_equal", [ ( fop.OrdinalEncoder( columns=["exam_num_col_0"], derived_columns=["encoded_exam_num_col_0"], ), True, ), ( fop.OrdinalEncoder( columns=["exam_num_col_1"], derived_columns=["encoded_exam_num_col_0"], ), False, ), ( fop.OrdinalEncoder( columns=["exam_num_col_0"], derived_columns=["encoded_exam_num_col_1"], ), False, ), ( fop.OrdinalEncoder( columns=["exam_num_col_1"], derived_columns=["encoded_exam_num_col_1"], ), False, ), (dict(), False), ], ) def it_knows_if_equal(self, other, expected_equal): feat_op = fop.OrdinalEncoder( columns=["exam_num_col_0"], derived_columns=["encoded_exam_num_col_0"], ) equal = feat_op == other assert type(equal) == bool assert equal == expected_equal