def test_transform_unseen(self, X, y, handle_unseen, expected):
     enc = WeightOfEvidenceEncoder(cols=['cat'], handle_unseen=handle_unseen)
     X = pd.DataFrame(X, columns=['cat'])
     enc.fit(X, pd.Series(y))
     X.iloc[0, 0] = 'foo'
     result = enc.transform(X)
     assert_array_almost_equal(result, pd.DataFrame(expected), decimal=3)
Beispiel #2
0
 def woe_encoding(cls, X, Y=None, encoder=None):
     cols = ['some_id', 'other_id']
     if encoder is None:
         encoder = WeightOfEvidenceEncoder(cols=cols, min_samples=5)
         encoder.fit(X, Y)
     encoded = encoder.transform(X).rename(
         columns={c: 'woe_enc_{}'.format(c)
                  for c in cols})
     return pd.concat([X[cols], encoded], axis=1), encoder
 def test_transform_error(self, X, y, expected):
     enc = WeightOfEvidenceEncoder(cols=['cat'], handle_unseen='error')
     X = pd.DataFrame(X, columns=['cat'])
     enc.fit(X, pd.Series(y))
     X.iloc[0, 0] = 'foo'
     assert_raises(ValueError, enc.transform, X)