def test_one_hot(self): enc = encoders.OneHotEncoder(verbose=1, return_df=False) enc.fit(X) self.assertEqual( enc.transform(X_t).shape[1], enc.transform(X).shape[1], 'We have to get the same count of columns despite the presence of a new value' ) enc = encoders.OneHotEncoder(verbose=1, return_df=True, handle_unknown='indicator') enc.fit(X) out = enc.transform(X_t) self.assertIn('extra_-1', out.columns.values) enc = encoders.OneHotEncoder(verbose=1, return_df=True, handle_unknown='return_nan') enc.fit(X) out = enc.transform(X_t) self.assertEqual( len([x for x in out.columns.values if str(x).startswith('extra_')]), 3) enc = encoders.OneHotEncoder(verbose=1, return_df=True, handle_unknown='error') # The exception is already raised in fit() because transform() is called there to get # feature_names right. enc.fit(X) with self.assertRaises(ValueError): enc.transform(X_t) enc = encoders.OneHotEncoder(verbose=1, return_df=True, handle_unknown='return_nan', use_cat_names=True) enc.fit(X) out = enc.transform(X_t) self.assertIn('extra_A', out.columns.values) enc = encoders.OneHotEncoder(verbose=1, return_df=True, use_cat_names=True, handle_unknown='indicator') enc.fit(X) out = enc.transform(X_t) self.assertIn('extra_-1', out.columns.values) # test inverse_transform X_i = th.create_dataset(n_rows=100, has_missing=False) X_i_t = th.create_dataset(n_rows=50, has_missing=False) cols = ['underscore', 'none', 'extra', 321, 'categorical'] enc = encoders.OneHotEncoder(verbose=1, use_cat_names=True, cols=cols) enc.fit(X_i) obtained = enc.inverse_transform(enc.transform(X_i_t)) th.verify_inverse_transform(X_i_t, obtained)
def test_inverse_transform(self): # we do not allow None in these data (but "none" column without any None is ok) X = th.create_dataset(n_rows=100, has_none=False) X_t = th.create_dataset(n_rows=50, has_none=False) X_t_extra = th.create_dataset(n_rows=50, extras=True, has_none=False) cols = ['underscore', 'none', 'extra', 321, 'categorical'] for encoder_name in ['BaseNEncoder', 'BinaryEncoder', 'OneHotEncoder', 'OrdinalEncoder']: with self.subTest(encoder_name=encoder_name): # simple run enc = getattr(encoders, encoder_name)(verbose=1, cols=cols) enc.fit(X) th.verify_inverse_transform(X_t, enc.inverse_transform(enc.transform(X_t)))