def test_fit_array(self): """Test fit with numpy.array""" # Setup data = np.array([False, True, True, False, True]) # Run transformer = BooleanTransformer(nan=0) transformer.fit(data) # Asserts expect_fill_value = 0 self.assertEqual(transformer.null_transformer.fill_value, expect_fill_value, "Unexpected fill value")
def test_fit_nan_not_ignore(self): """Test fit nan not equal to ignore""" # Setup data = pd.Series([False, True, True, False, True]) # Run transformer = BooleanTransformer(nan=0) transformer.fit(data) # Asserts expect_fill_value = 0 self.assertEqual(transformer.null_transformer.fill_value, expect_fill_value, "Unexpected fill value")
def test___init__(self): """Test default instance""" # Run transformer = BooleanTransformer() # Asserts self.assertEqual(transformer.nan, -1, "Unexpected nan") self.assertIsNone(transformer.null_column, "null_column is None by default")
def _analyze(self, data): """Build a ``dict`` with column names and transformers from a given ``pandas.DataFrame``. When ``self.dtypes`` is ``None``, use the dtypes from the input data. When ``dtype`` is: - ``int``: a ``NumericalTransformer`` is created with ``dtype=int``. - ``float``: a ``NumericalTransformer`` is created with ``dtype=float``. - ``object`` or ``category``: a ``CategoricalTransformer`` is created. - ``bool``: a ``BooleanTransformer`` is created. - ``datetime``: a ``DatetimeTransformer`` is created. Any other ``dtype`` is not supported and raises a ``ValueError``. Args: data (pandas.DataFrame): Data used to analyze the ``pandas.DataFrame`` dtypes. Returns: dict: Mapping of column names and transformer instances. Raises: ValueError: if a ``dtype`` is not supported by the `HyperTransformer``. """ transformers = dict() dtypes = self.dtypes or data.dtypes if self.dtypes: dtypes = self.dtypes else: dtypes = [ data[column].dropna().infer_objects() for column in data.columns ] for name, dtype in zip(data.columns, dtypes): dtype = np.dtype(dtype) if dtype.kind == 'i': transformer = NumericalTransformer(dtype=int) elif dtype.kind == 'f': transformer = NumericalTransformer(dtype=float) elif dtype.kind == 'O': anonymize = self.anonymize.get(name) transformer = CategoricalTransformer(anonymize=anonymize) elif dtype.kind == 'b': transformer = BooleanTransformer() elif dtype.kind == 'M': transformer = DatetimeTransformer() else: raise ValueError('Unsupported dtype: {}'.format(dtype)) transformers[name] = transformer return transformers
def test_transform_array(self): """Test transform numpy.array""" # Setup data = np.array([False, True, None, True, False]) # Run transformer = Mock() BooleanTransformer.transform(transformer, data) # Asserts expect_call_count = 1 expect_call_args = pd.Series([0, 1, None, 1, 0], dtype=object) self.assertEqual(transformer.null_transformer.transform.call_count, expect_call_count, "NullTransformer.transform must be called one time") pd.testing.assert_series_equal( transformer.null_transformer.transform.call_args[0][0], expect_call_args)
def test_transform_series(self): """Test transform pandas.Series""" # Setup data = pd.Series([False, True, None, True, False]) # Run transformer = Mock() BooleanTransformer.transform(transformer, data) # Asserts expect_call_count = 1 expect_call_args = pd.Series([0., 1., None, 1., 0.], dtype=float) self.assertEqual(transformer.null_transformer.transform.call_count, expect_call_count, "NullTransformer.transform must be called one time") pd.testing.assert_series_equal( transformer.null_transformer.transform.call_args[0][0], expect_call_args)
def test_reverse_transform_not_null_values(self): """Test reverse_transform not null values correctly""" # Setup data = np.array([1., 0., 1.]) # Run transformer = Mock() transformer.nan = None result = BooleanTransformer.reverse_transform(transformer, data) # Asserts expected = np.array([True, False, True]) assert type(result) == pd.Series np.testing.assert_equal(result.to_numpy(), expected)
def test_reverse_transform_2d_ndarray(self): """Test reverse_transform not null values correctly""" # Setup data = np.array([[1.], [0.], [1.]]) # Run transformer = Mock() transformer.nan = None result = BooleanTransformer.reverse_transform(transformer, data) # Asserts expected = np.array([True, False, True]) assert isinstance(result, pd.Series) np.testing.assert_equal(result.values, expected)
def test_reverse_transform_nan_ignore(self): """Test reverse_transform with nan equal to ignore""" # Setup data = np.array([0.0, 1.0, 0.0, 1.0, 0.0]) # Run transformer = Mock() transformer.nan = None result = BooleanTransformer.reverse_transform(transformer, data) # Asserts expect = np.array([False, True, False, True, False]) expect_call_count = 0 np.testing.assert_equal(result, expect) self.assertEqual( transformer.null_transformer.reverse_transform.call_count, expect_call_count, "NullTransformer.reverse_transform should not be called when nan is ignore" )
def test_load_transformers(): transformers = { 'bool': BooleanTransformer(), 'int': { 'class': 'NumericalTransformer', 'kwargs': { 'dtype': 'int' } }, 'datetime': { 'class': DatetimeTransformer, } } returned = load_transformers(transformers) assert isinstance(returned, dict) assert set(returned.keys()) == {'bool', 'int', 'datetime'} assert isinstance(returned['bool'], BooleanTransformer) assert isinstance(returned['int'], NumericalTransformer) assert returned['int'].dtype == 'int' assert isinstance(returned['datetime'], DatetimeTransformer)
def test_load_transformer_instance(): transformer = BooleanTransformer() returned = load_transformer(transformer) assert returned is transformer