def test_HandleUnknown_HaveOnlyKnown_ExpectSecondColumn(self): train = ['A', 'B'] encoder = encoders.PolynomialEncoder(handle_unknown='indicator') result = encoder.fit_transform(train) expected = [a_encoding, b_encoding] self.assertEqual(deep_round(result.values.tolist()), deep_round(expected))
def test_HandleMissingIndicator_NanInTrain_ExpectAsColumn(self): train = ['A', 'B', np.nan] encoder = encoders.PolynomialEncoder(handle_missing='indicator', handle_unknown='value') result = encoder.fit_transform(train) expected = [a_encoding, b_encoding, c_encoding] self.assertEqual(deep_round(result.values.tolist()), deep_round(expected))
def test_polynomial_encoder_2cols(self): train = [['A', 'A'], ['B', 'B'], ['C', 'C']] encoder = encoders.PolynomialEncoder(handle_unknown='value', handle_missing='value') encoder.fit(train) obtained = encoder.transform(train) expected = [[1, a_encoding[1], a_encoding[2], a_encoding[1], a_encoding[2]], [1, b_encoding[1], b_encoding[2], b_encoding[1], b_encoding[2]], [1, c_encoding[1], c_encoding[2], c_encoding[1], c_encoding[2]]] self.assertEqual(deep_round(obtained.values.tolist()), deep_round(expected))
def test_HandleUnknown_HaveNoUnknownInTrain_ExpectIndicatorInTest(self): train = ['A', 'B'] test = ['A', 'B', 'C'] encoder = encoders.PolynomialEncoder(handle_unknown='indicator') encoder.fit(train) result = encoder.transform(test) expected = [a_encoding, b_encoding, c_encoding] self.assertEqual(deep_round(result.values.tolist()), deep_round(expected))
def test_polynomial_encoder_preserve_dimension_2(self): train = ['A', 'B', 'C'] test = ['B', 'D', 'E'] encoder = encoders.PolynomialEncoder(handle_unknown='value', handle_missing='value') encoder.fit(train) test_t = encoder.transform(test) expected = [b_encoding, [1, 0, 0], [1, 0, 0]] self.assertEqual(deep_round(test_t.values.tolist()), deep_round(expected))