Exemple #1
0
class TestDigitsTreeIntegration(TestCase):
  def setUp(self):
    data = load_digits()

    self.columns = [2, 3, 4, 5, 6, 7, 9, 10, 13, 14, 17, 18, 19, 20, 21, 25, 26,
                    27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 41, 42, 43, 45, 46,
                    50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63]
    X = pd.DataFrame(data.data)
    y = pd.Series(np.array(data.target_names)[data.target])
    y.name = "Class"
    X, Xte, y, yte = train_test_split(X, y, test_size=0.33, random_state=123)
    self.test = (Xte, yte)

    self.clf = PMMLTreeClassifier(path.join(BASE_DIR, '../models/digits.pmml'))
    self.ref = DecisionTreeClassifier(random_state=1).fit(X, y)

  def test_predict(self):
    Xte, _ = self.test
    assert np.array_equal(
      self.ref.predict(Xte),
      self.clf.predict(Xte[self.columns]).astype(np.int64)
    )

  def test_predict_proba(self):
    Xte, _ = self.test
    assert np.array_equal(
      self.ref.predict_proba(Xte),
      self.clf.predict_proba(Xte[self.columns])
    )

  def test_score(self):
    Xte, yte = self.test
    assert self.ref.score(Xte, yte) == self.clf.score(Xte[self.columns], yte)
Exemple #2
0
class TestIrisTreeIntegration(TestCase):
  def setUp(self):
    pair = [0, 1]
    data = load_iris()

    X = pd.DataFrame(data.data[:, pair])
    X.columns = np.array(data.feature_names)[pair]
    y = pd.Series(np.array(data.target_names)[data.target])
    y.name = "Class"
    X, Xte, y, yte = train_test_split(X, y, test_size=0.33, random_state=123)
    self.test = (Xte, yte)

    pmml = path.join(BASE_DIR, '../models/sklearn2pmml.pmml')
    self.clf = PMMLTreeClassifier(pmml=pmml)
    self.ref = DecisionTreeClassifier(random_state=1).fit(X, y)

  def test_predict(self):
    Xte, _ = self.test
    assert np.array_equal(self.ref.predict(Xte), self.clf.predict(Xte))

  def test_predict_proba(self):
    Xte, _ = self.test
    assert np.array_equal(
      self.ref.predict_proba(Xte),
      self.clf.predict_proba(Xte)
    )

  def test_score(self):
    Xte, yte = self.test
    assert self.ref.score(Xte, yte) == self.clf.score(Xte, yte)
Exemple #3
0
class TestDigitsTreeIntegration(TestCase):
    def setUp(self):
        data = load_digits()

        X = pd.DataFrame(data.data)
        y = pd.Series(np.array(data.target_names)[data.target])
        y.name = "Class"
        X, Xte, y, yte = train_test_split(X,
                                          y,
                                          test_size=0.33,
                                          random_state=123)
        self.test = (Xte, yte)

        self.clf = PMMLTreeClassifier(
            path.join(BASE_DIR, '../models/tree-digits.pmml'))
        self.ref = DecisionTreeClassifier(random_state=1).fit(X, y)

    def test_predict(self):
        Xte, _ = self.test
        assert np.array_equal(self.ref.predict(Xte), self.clf.predict(Xte))

    def test_predict_proba(self):
        Xte, _ = self.test
        assert np.array_equal(self.ref.predict_proba(Xte),
                              self.clf.predict_proba(Xte))

    def test_score(self):
        Xte, yte = self.test
        assert self.ref.score(Xte, yte) == self.clf.score(Xte, yte)
class TestIrisTreeIntegration(TestCase):
  def setUp(self):
    pair = [0, 1]
    data = load_iris()

    X = pd.DataFrame(data.data[:, pair])
    X.columns = np.array(data.feature_names)[pair]
    y = pd.Series(np.array(data.target_names)[data.target])
    y.name = "Class"
    X, Xte, y, yte = train_test_split(X, y, test_size=0.33, random_state=123)
    self.test = (Xte, yte)
    self.train = (X, y)

    pmml = path.join(BASE_DIR, '../models/decisionTree.pmml')
    self.clf = PMMLTreeClassifier(pmml=pmml)
    self.ref = DecisionTreeClassifier(random_state=1).fit(X, y)

  def test_predict(self):
    Xte, _ = self.test
    assert np.array_equal(self.ref.predict(Xte), self.clf.predict(Xte))

  def test_predict_proba(self):
    Xte, _ = self.test
    assert np.array_equal(
      self.ref.predict_proba(Xte),
      self.clf.predict_proba(Xte)
    )

  def test_score(self):
    Xte, yte = self.test
    assert self.ref.score(Xte, yte) == self.clf.score(Xte, yte)

  def test_sklearn2pmml(self):
    # Export to PMML
    pipeline = PMMLPipeline([
      ("classifier", self.ref)
    ])
    pipeline.fit(self.train[0], self.train[1])
    sklearn2pmml(pipeline, "tree_sklearn2pmml.pmml", with_repr = True)

    try:
      # Import PMML
      model = PMMLTreeClassifier(pmml='tree_sklearn2pmml.pmml')

      # Verify classification
      Xte, _ = self.test
      assert np.array_equal(
        self.ref.predict_proba(Xte),
        model.predict_proba(Xte)
      )

    finally:
      remove("tree_sklearn2pmml.pmml")
Exemple #5
0
class TestIrisTreeIntegration(TestCase):
    def setUp(self):
        pair = [0, 1]
        data = load_iris(as_frame=True)

        X = data.data
        y = data.target
        y.name = "Class"
        self.test = (X, y)

        pmml = path.join(BASE_DIR, '../models/tree-iris.pmml')
        self.clf = PMMLTreeClassifier(pmml=pmml)
        self.ref = DecisionTreeClassifier(random_state=1).fit(X, y)

    def test_predict(self):
        Xte, _ = self.test
        assert np.array_equal(self.ref.predict(Xte), self.clf.predict(Xte))

    def test_predict_proba(self):
        Xte, _ = self.test
        assert np.array_equal(self.ref.predict_proba(Xte),
                              self.clf.predict_proba(Xte))

    def test_score(self):
        Xte, yte = self.test
        assert self.ref.score(Xte, yte) == self.clf.score(Xte, yte)

    def test_sklearn2pmml(self):
        # Export to PMML
        pipeline = PMMLPipeline([("classifier", self.ref)])
        pipeline.fit(self.test[0], self.test[1])
        sklearn2pmml(pipeline, "tree-sklearn2pmml.pmml", with_repr=True)

        try:
            # Import PMML
            model = PMMLTreeClassifier(pmml='tree-sklearn2pmml.pmml')

            # Verify classification
            Xte, _ = self.test
            assert np.array_equal(self.ref.predict_proba(Xte),
                                  model.predict_proba(Xte))

        finally:
            remove("tree-sklearn2pmml.pmml")
Exemple #6
0
class TestCategoricalPimaTreeIntegration(TestCase):
  def setUp(self):
    df = pd.read_csv(path.join(BASE_DIR, '../models/categorical-test.csv'))
    cats = np.unique(df['age'])
    df['age'] = pd.Categorical(df['age'], categories=cats)
    df['age'] = df['age'].cat.codes
    Xte = df.iloc[:, 1:]
    yte = df.iloc[:, 0]
    self.test = (Xte, yte)

    pmml = path.join(BASE_DIR, '../models/categorical.pmml')
    self.clf = PMMLTreeClassifier(pmml)

  def test_predict_proba(self):
    Xte, _ = self.test
    ref = np.array([
      [0.1568627450980392, 0.84313725490196079],
      [0.7500000000000000, 0.25000000000000000],
      [0.1568627450980392, 0.84313725490196079],
      [0.1568627450980392, 0.84313725490196079],
      [0.1568627450980392, 0.84313725490196079],
      [0.1568627450980392, 0.84313725490196079],
      [0.2000000000000000, 0.80000000000000004],
      [0.2000000000000000, 0.80000000000000004],
      [0.1568627450980392, 0.84313725490196079],
      [0.1568627450980392, 0.84313725490196079],
      [0.1568627450980392, 0.84313725490196079],
      [0.1568627450980392, 0.84313725490196079],
      [0.2000000000000000, 0.80000000000000004],
      [0.9428571428571428, 0.05714285714285714],
      [0.2000000000000000, 0.80000000000000004],
      [0.2000000000000000, 0.80000000000000004],
      [0.9428571428571428, 0.05714285714285714],
      [0.1568627450980392, 0.84313725490196079],
      [0.1568627450980392, 0.84313725490196079],
      [0.1568627450980392, 0.84313725490196079],
      [0.1568627450980392, 0.84313725490196079],
      [0.1568627450980392, 0.84313725490196079],
      [0.7368421052631579, 0.26315789473684209],
      [0.1568627450980392, 0.84313725490196079],
      [0.2000000000000000, 0.80000000000000004],
      [0.2000000000000000, 0.80000000000000004],
      [0.2000000000000000, 0.80000000000000004],
      [0.9428571428571428, 0.05714285714285714],
      [0.9428571428571428, 0.05714285714285714],
      [0.7368421052631579, 0.26315789473684209],
      [0.9428571428571428, 0.05714285714285714],
      [0.9428571428571428, 0.05714285714285714],
      [0.7368421052631579, 0.26315789473684209],
      [0.9428571428571428, 0.05714285714285714],
      [0.7368421052631579, 0.26315789473684209],
      [0.7500000000000000, 0.25000000000000000],
      [0.7368421052631579, 0.26315789473684209],
      [0.1568627450980392, 0.84313725490196079],
      [0.2000000000000000, 0.80000000000000004],
      [0.7368421052631579, 0.26315789473684209],
      [0.9428571428571428, 0.05714285714285714],
      [0.9428571428571428, 0.05714285714285714],
      [0.1568627450980392, 0.84313725490196079],
      [0.7368421052631579, 0.26315789473684209],
      [0.1568627450980392, 0.84313725490196079],
      [0.1568627450980392, 0.84313725490196079],
      [0.7368421052631579, 0.26315789473684209],
      [0.7368421052631579, 0.26315789473684209],
      [0.1568627450980392, 0.84313725490196079],
      [0.9428571428571428, 0.05714285714285714],
      [0.7368421052631579, 0.26315789473684209],
      [0.2000000000000000, 0.80000000000000004]
    ])
    assert np.array_equal(ref, self.clf.predict_proba(Xte))

  def test_score(self):
    Xte, yte = self.test
    ref = 0.7692307692307693
    assert ref == self.clf.score(Xte, yte)