def test_fit_with_analyze(self): """Test fit and analyze the transformers""" # Setup data = pd.DataFrame({ 'integers': [1, 2, 3, 4], 'floats': [1.1, 2.2, 3.3, 4.4], 'booleans': [True, False, False, True] }) int_mock = Mock() float_mock = Mock() bool_mock = Mock() analyzed_data = { 'integers': int_mock, 'floats': float_mock, 'booleans': bool_mock } # Run transformer = Mock() transformer.transformers = None transformer._analyze.return_value = analyzed_data HyperTransformer.fit(transformer, data) # Asserts expect_int_call_count = 1 expect_float_call_count = 1 expect_bool_call_count = 1 self.assertEqual(int_mock.fit.call_count, expect_int_call_count) self.assertEqual(float_mock.fit.call_count, expect_float_call_count) self.assertEqual(bool_mock.fit.call_count, expect_bool_call_count)
def test_hypertransformer_with_transformers(faker_mock): faker_mock.return_value.first_name.side_effect = [ 'Jaime', 'Cersei', 'Tywin', 'Tyrion' ] data = get_input_data() transformers = get_transformers() ht = HyperTransformer(transformers) ht.fit(data) transformed = ht.transform(data) expected = get_transformed_data() np.testing.assert_allclose( transformed.sort_index(axis=1).values, expected.sort_index(axis=1).values) reversed_data = ht.reverse_transform(transformed) original_names = data.pop('names') reversed_names = reversed_data.pop('names') pd.testing.assert_frame_equal(data.sort_index(axis=1), reversed_data.sort_index(axis=1)) for name in original_names: assert name not in reversed_names
def test_single_category(): ht = HyperTransformer(transformers={'a': OneHotEncodingTransformer()}) data = pd.DataFrame({'a': ['a', 'a', 'a']}) ht.fit(data) transformed = ht.transform(data) reverse = ht.reverse_transform(transformed) pd.testing.assert_frame_equal(data, reverse)
def test_dtype_category(): df = pd.DataFrame({'a': ['a', 'b', 'c']}, dtype='category') ht = HyperTransformer() ht.fit(df) trans = ht.transform(df) rever = ht.reverse_transform(trans) pd.testing.assert_frame_equal(df, rever)
def test_empty_transformers_nan_data(): """If transformers is an empty dict, do nothing.""" data = get_input_data_with_nan() ht = HyperTransformer(transformers={}) ht.fit(data) transformed = ht.transform(data) reverse = ht.reverse_transform(transformed) pd.testing.assert_frame_equal(data, transformed) pd.testing.assert_frame_equal(data, reverse)
def test_subset_of_columns_nan_data(): """HyperTransform should be able to transform a subset of the training columns. See https://github.com/sdv-dev/RDT/issues/152 """ data = get_input_data_with_nan() ht = HyperTransformer() ht.fit(data) subset = data[[data.columns[0]]] transformed = ht.transform(subset) reverse = ht.reverse_transform(transformed) pd.testing.assert_frame_equal(subset, reverse)
def test_hypertransformer_without_transformers_nan_data(): data = get_input_data_with_nan() ht = HyperTransformer() ht.fit(data) transformed = ht.transform(data) expected = get_transformed_nan_data() np.testing.assert_allclose( transformed.sort_index(axis=1).values, expected.sort_index(axis=1).values) reversed_data = ht.reverse_transform(transformed) original_names = data.pop('names') reversed_names = reversed_data.pop('names') pd.testing.assert_frame_equal(data.sort_index(axis=1), reversed_data.sort_index(axis=1)) for name in original_names: assert name not in reversed_names