def test_HandleUnknown_HaveOnlyKnown_ExpectSecondColumn(self): train = ['A', 'B'] encoder = encoders.PolynomialEncoder(handle_unknown='indicator') result = encoder.fit_transform(train) expected = [a_encoding, b_encoding] self.assertEqual(deep_round(result.values.tolist()), deep_round(expected))
def test_HandleUnknown_HaveNoUnknownInTrain_ExpectIndicatorInTest(self): train = ['A', 'B'] test = ['A', 'B', 'C'] encoder = encoders.PolynomialEncoder(handle_unknown='indicator') encoder.fit(train) result = encoder.transform(test) expected = [a_encoding, b_encoding, c_encoding] self.assertEqual(deep_round(result.values.tolist()), deep_round(expected))
def test_HandleMissingIndicator_NanInTrain_ExpectAsColumn(self): train = ['A', 'B', np.nan] encoder = encoders.PolynomialEncoder(handle_missing='indicator', handle_unknown='value') result = encoder.fit_transform(train) expected = [a_encoding, b_encoding, c_encoding] self.assertTrue( np.array_equal(deep_round(result.values.tolist()), deep_round(expected)))
def test_polynomial_encoder_preserve_dimension_4(self): train = ['A', 'B', 'C'] test = ['D', 'B', 'C', None] encoder = encoders.PolynomialEncoder(handle_unknown='value', handle_missing='value') encoder.fit(train) test_t = encoder.transform(test) expected = [[1, 0, 0], b_encoding, c_encoding, [1, 0, 0]] self.assertEqual(deep_round(test_t.values.tolist()), deep_round(expected))
def test_polynomial_encoder_2cols(self): train = [['A', 'A'], ['B', 'B'], ['C', 'C']] encoder = encoders.PolynomialEncoder(handle_unknown='value', handle_missing='value') encoder.fit(train) obtained = encoder.transform(train) expected = [ [1, a_encoding[1], a_encoding[2], a_encoding[1], a_encoding[2]], [1, b_encoding[1], b_encoding[2], b_encoding[1], b_encoding[2]], [1, c_encoding[1], c_encoding[2], c_encoding[1], c_encoding[2]] ] self.assertEqual(deep_round(obtained.values.tolist()), deep_round(expected))