def test_transform_float(self): y = [1.0, float("NaN"), 2.0, 3.0] binarizer = PMMLLabelBinarizer() binarizer.fit(y) self.assertEqual([[1, 0, 0], [0, 0, 1], [0, 0, 0], [0, 1, 0]], binarizer.transform([1.0, 3.0, float("NaN"), 2.0]).tolist())
def test_transform_string(self): y = ["A", None, "B", "C"] binarizer = PMMLLabelBinarizer() binarizer.fit(y) self.assertEqual([[1, 0, 0], [0, 0, 1], [0, 0, 0], [0, 1, 0]], binarizer.transform(["A", "C", None, "B"]).tolist()) self.assertEqual([[0, 0, 0]], binarizer.transform([None]).tolist()) self.assertEqual([[1, 0, 0], [0, 1, 0], [0, 0, 1]], binarizer.transform(["A", "B", "C"]).tolist())
def test_transform_float(self): X = [1.0, float("NaN"), 2.0, 3.0] dense_binarizer = PMMLLabelBinarizer() dense_binarizer.fit(X) Xt_dense = dense_binarizer.transform([1.0, 3.0, float("NaN"), 2.0]) self.assertIsInstance(Xt_dense, numpy.ndarray) self.assertEqual([[1, 0, 0], [0, 0, 1], [0, 0, 0], [0, 1, 0]], Xt_dense.tolist()) sparse_binarizer = PMMLLabelBinarizer(sparse_output = True) sparse_binarizer.fit(X) Xt_sparse = sparse_binarizer.transform([1.0, 3.0, float("NaN"), 2.0]) self.assertIsInstance(Xt_sparse, scipy.sparse.csr_matrix) self.assertEqual(Xt_dense.tolist(), Xt_sparse.toarray().tolist())
def test_transform_string(self): X = ["A", None, "B", "C"] dense_binarizer = PMMLLabelBinarizer() dense_binarizer.fit(X) Xt_dense = dense_binarizer.transform(["A", "C", None, "B"]) self.assertIsInstance(Xt_dense, numpy.ndarray) self.assertEqual([[1, 0, 0], [0, 0, 1], [0, 0, 0], [0, 1, 0]], Xt_dense.tolist()) self.assertEqual([[0, 0, 0]], dense_binarizer.transform([None]).tolist()) self.assertEqual([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dense_binarizer.transform(["A", "B", "C"]).tolist()) sparse_binarizer = PMMLLabelBinarizer(sparse_output = True) sparse_binarizer.fit(X) Xt_sparse = sparse_binarizer.transform(["A", "C", None, "B"]) self.assertIsInstance(Xt_sparse, scipy.sparse.csr_matrix) self.assertEqual(Xt_dense.tolist(), Xt_sparse.toarray().tolist())
def test_fit_string(self): y = ["A", None, "A", "B", None, "C", "C", "B"] labels = ["A", "B", "C"] binarizer = PMMLLabelBinarizer() self.assertFalse(hasattr(binarizer, "classes_")) binarizer.fit(y) self.assertEqual(labels, binarizer.classes_.tolist()) binarizer.fit(numpy.array(y)) self.assertEqual(labels, binarizer.classes_.tolist()) binarizer.fit(Series(numpy.array(y))) self.assertEqual(labels, binarizer.classes_.tolist())
def test_fit_float(self): y = [1.0, float("NaN"), 1.0, 2.0, float("NaN"), 3.0, 3.0, 2.0] labels = [1.0, 2.0, 3.0] binarizer = PMMLLabelBinarizer() binarizer.fit(y) self.assertEqual(labels, binarizer.classes_.tolist())