예제 #1
0
  def test_fit_exception(self):
    with self.assertRaises(Exception) as cm:
      pmml = path.join(BASE_DIR, '../models/categorical-rf.pmml')
      clf = PMMLForestClassifier(pmml)
      clf.fit(np.array([[]]), np.array([]))

    assert str(cm.exception) == 'Not supported.'
예제 #2
0
  def setUp(self):
    df = pd.read_csv(path.join(BASE_DIR, '../models/categorical-test.csv'))
    cats = np.unique(df['age'])
    df['age'] = pd.Categorical(df['age'], categories=cats)
    Xte = df.iloc[:, 1:]
    yte = df.iloc[:, 0]
    self.test = (Xte, yte)

    pmml = path.join(BASE_DIR, '../models/categorical-rf.pmml')
    self.clf = PMMLForestClassifier(pmml)
예제 #3
0
  def setUp(self):
    from sklearn.datasets import load_iris
    iris = load_iris()
    X = pd.DataFrame(iris.data)
    X.columns = np.array(['sepal_length', 'sepal_width', 'petal_length',
                          'petal_width'])
    y = pd.Series(np.array(np.array(['setosa', 'versicolor', 'virginica']))[iris.target])
    y.name = "Class"
    self.test = X, y

    self.clf = PMMLForestClassifier(path.join(BASE_DIR, '../models/randomForest.pmml'))
    self.ref = RandomForestClassifier(random_state=1).fit(X, y)
예제 #4
0
 def test_non_true_segment(self):
   with self.assertRaises(Exception), catch_warnings(record=True) as w:
     PMMLForestClassifier(pmml=StringIO("""
     <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3">
       <DataDictionary>
         <DataField name="Class" optype="categorical" dataType="string">
           <Value value="setosa"/>
           <Value value="versicolor"/>
           <Value value="virginica"/>
         </DataField>
       </DataDictionary>
       <MiningModel>
         <MiningSchema>
           <MiningField name="Class" usageType="target"/>
         </MiningSchema>
         <Segmentation multipleModelMethod="majorityVote">
           <Segment>
             <False/>
           </Segment>
           <Segment>
             <True/>
           </Segment>
         </Segmentation>
       </MiningModel>
     </PMML>
     """))
   assert len(w) == 1
예제 #5
0
  def test_non_binary_tree(self):
    with self.assertRaises(Exception) as cm:
      PMMLForestClassifier(pmml=StringIO("""
      <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3">
        <DataDictionary>
          <DataField name="Class" optype="categorical" dataType="string">
            <Value value="setosa"/>
            <Value value="versicolor"/>
            <Value value="virginica"/>
          </DataField>
        </DataDictionary>
        <MiningModel>
          <MiningSchema>
            <MiningField name="Class" usageType="target"/>
          </MiningSchema>
          <Segmentation multipleModelMethod="majorityVote">
            <Segment>
              <True/>
              <TreeModel splitCharacteristic="multiSplit" />
            </Segment>
          </Segmentation>
        </MiningModel>
      </PMML>
      """))

    assert str(cm.exception) == 'Sklearn only supports binary tree models.'
예제 #6
0
    def test_sklearn2pmml(self):
        # Export to PMML
        pipeline = PMMLPipeline([("classifier", self.ref)])
        pipeline.fit(self.test[0], self.test[1])

        sklearn2pmml(pipeline, "rf-sklearn2pmml.pmml", with_repr=True)

        try:
            # Import PMML
            model = PMMLForestClassifier(pmml='rf-sklearn2pmml.pmml')

            # Verify classification
            Xte, _ = self.test
            assert np.array_equal(self.ref.predict_proba(Xte),
                                  model.predict_proba(Xte))

        finally:
            remove("rf-sklearn2pmml.pmml")
예제 #7
0
  def test_invalid_model(self):
    with self.assertRaises(Exception) as cm:
      PMMLForestClassifier(pmml=StringIO("""
      <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3">
        <DataDictionary>
          <DataField name="Class" optype="categorical" dataType="string">
            <Value value="setosa"/>
            <Value value="versicolor"/>
            <Value value="virginica"/>
          </DataField>
        </DataDictionary>
        <MiningSchema>
          <MiningField name="Class" usageType="target"/>
        </MiningSchema>
      </PMML>
      """))

    assert str(cm.exception) == 'PMML model does not contain MiningModel.'
예제 #8
0
  def test_non_voting_ensemble(self):
    with self.assertRaises(Exception) as cm:
      PMMLForestClassifier(pmml=StringIO("""
      <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3">
        <DataDictionary>
          <DataField name="Class" optype="categorical" dataType="string">
            <Value value="setosa"/>
            <Value value="versicolor"/>
            <Value value="virginica"/>
          </DataField>
        </DataDictionary>
        <MiningModel>
          <MiningSchema>
            <MiningField name="Class" usageType="target"/>
          </MiningSchema>
          <Segmentation multipleModelMethod="mean" />
        </MiningModel>
      </PMML>
      """))

    assert str(cm.exception) == 'PMML model ensemble should use majority vote or average.'
예제 #9
0
class TestCategoricalPimaForestIntegration(TestCase):
  def setUp(self):
    df = pd.read_csv(path.join(BASE_DIR, '../models/categorical-test.csv'))
    cats = np.unique(df['age'])
    df['age'] = pd.Categorical(df['age'], categories=cats)
    Xte = df.iloc[:, 1:]
    yte = df.iloc[:, 0]
    self.test = (Xte, yte)

    pmml = path.join(BASE_DIR, '../models/categorical-rf.pmml')
    self.clf = PMMLForestClassifier(pmml)

  def test_predict_proba(self):
    Xte, _ = self.test
    ref = np.array([
      [0.2200000000000000, 0.7800000000000000],
      [0.5800000000000000, 0.4200000000000000],
      [0.2200000000000000, 0.7800000000000000],
      [0.1800000000000000, 0.8200000000000000],
      [0.0800000000000000, 0.9200000000000000],
      [0.3000000000000000, 0.7000000000000000],
      [0.2600000000000000, 0.7400000000000000],
      [0.3600000000000000, 0.6400000000000000],
      [0.4000000000000000, 0.6000000000000000],
      [0.1600000000000000, 0.8400000000000000],
      [0.2200000000000000, 0.7800000000000000],
      [0.3000000000000000, 0.7000000000000000],
      [0.1400000000000000, 0.8600000000000000],
      [0.6400000000000000, 0.3600000000000000],
      [0.1800000000000000, 0.8200000000000000],
      [0.1800000000000000, 0.8200000000000000],
      [0.7600000000000000, 0.2400000000000000],
      [0.2400000000000000, 0.7600000000000000],
      [0.3400000000000000, 0.6600000000000000],
      [0.2800000000000000, 0.7200000000000000],
      [0.0800000000000000, 0.9200000000000000],
      [0.2000000000000000, 0.8000000000000000],
      [0.6800000000000000, 0.3200000000000000],
      [0.1200000000000000, 0.8800000000000000],
      [0.2200000000000000, 0.7800000000000000],
      [0.3600000000000000, 0.6400000000000000],
      [0.2000000000000000, 0.8000000000000000],
      [0.8600000000000000, 0.1400000000000000],
      [0.9399999999999999, 0.0600000000000000],
      [0.7200000000000000, 0.2800000000000000],
      [0.5600000000000001, 0.4400000000000000],
      [0.9800000000000000, 0.0200000000000000],
      [0.4400000000000000, 0.5600000000000001],
      [0.8800000000000000, 0.1200000000000000],
      [0.6600000000000000, 0.3400000000000000],
      [0.5000000000000000, 0.5000000000000000],
      [0.7400000000000000, 0.2600000000000000],
      [0.2600000000000000, 0.7400000000000000],
      [0.1600000000000000, 0.8400000000000000],
      [0.6800000000000000, 0.3200000000000000],
      [0.7600000000000000, 0.2400000000000000],
      [0.7400000000000000, 0.2600000000000000],
      [0.5600000000000001, 0.4400000000000000],
      [0.5400000000000000, 0.4600000000000000],
      [0.5200000000000000, 0.4800000000000000],
      [0.1400000000000000, 0.8600000000000000],
      [0.7600000000000000, 0.2400000000000000],
      [0.8200000000000000, 0.1800000000000000],
      [0.4400000000000000, 0.5600000000000001],
      [0.9200000000000000, 0.0800000000000000],
      [0.5600000000000001, 0.4400000000000000],
      [0.2800000000000000, 0.7200000000000000]
    ])
    assert np.allclose(ref, self.clf.predict_proba(Xte))

  def test_score(self):
    Xte, yte = self.test
    ref = 0.7884615384615384
    assert ref == self.clf.score(Xte, yte)
예제 #10
0
class TestIrisForestIntegration(TestCase):
  def setUp(self):
    from sklearn.datasets import load_iris
    iris = load_iris()
    X = pd.DataFrame(iris.data)
    X.columns = np.array(['sepal_length', 'sepal_width', 'petal_length',
                          'petal_width'])
    y = pd.Series(np.array(np.array(['setosa', 'versicolor', 'virginica']))[iris.target])
    y.name = "Class"
    self.test = X, y

    self.clf = PMMLForestClassifier(path.join(BASE_DIR, '../models/randomForest.pmml'))
    self.ref = RandomForestClassifier(random_state=1).fit(X, y)

  def test_predict_proba(self):
    Xte, _ = self.test
    ref = np.array([
        [1., 0., 0.],
        [0.995, 0.005, 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0.985, 0.015, 0.],
        [0.995, 0.005, 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0.995, 0.005, 0.],
        [1., 0., 0.],
        [0.995, 0.005, 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0.99, 0.01, 0.],
        [1., 0., 0.],
        [0.995, 0.005, 0.],
        [0.995, 0.005, 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0.995, 0.005, 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0.98, 0.02, 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0.98, 0.015, 0.005],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0., 0.995, 0.005],
        [0., 1., 0.],
        [0., 0.595, 0.405],
        [0., 0.995, 0.005],
        [0., 0.99, 0.01],
        [0., 0.995, 0.005],
        [0., 0.97, 0.03],
        [0.005, 0.805, 0.19],
        [0., 1., 0.],
        [0.005, 0.985, 0.01],
        [0., 0.95, 0.05],
        [0., 0.995, 0.005],
        [0., 0.985, 0.015],
        [0., 0.995, 0.005],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 0.995, 0.005],
        [0., 1., 0.],
        [0., 0.99, 0.01],
        [0., 0.995, 0.005],
        [0., 0.645, 0.355],
        [0., 1., 0.],
        [0., 0.76, 0.24],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 0.85, 0.15],
        [0., 0.025, 0.975],
        [0., 0.995, 0.005],
        [0., 0.995, 0.005],
        [0., 0.995, 0.005],
        [0., 0.995, 0.005],
        [0., 1., 0.],
        [0., 0.085, 0.915],
        [0.025, 0.96, 0.015],
        [0.01, 0.945, 0.045],
        [0., 1., 0.],
        [0., 0.995, 0.005],
        [0., 1., 0.],
        [0., 0.995, 0.005],
        [0., 0.98, 0.02],
        [0., 0.995, 0.005],
        [0., 0.99, 0.01],
        [0., 0.955, 0.045],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 0.99, 0.01],
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 0.01, 0.99],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0.245, 0.755],
        [0., 0., 1.],
        [0., 0.065, 0.935],
        [0., 0., 1.],
        [0., 0.01, 0.99],
        [0., 0., 1.],
        [0., 0.005, 0.995],
        [0., 0.055, 0.945],
        [0., 0.005, 0.995],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0.015, 0.985],
        [0., 0.19, 0.81],
        [0., 0., 1.],
        [0., 0.11, 0.89],
        [0., 0., 1.],
        [0., 0.07, 0.93],
        [0., 0., 1.],
        [0., 0.005, 0.995],
        [0., 0.12, 0.88],
        [0., 0.035, 0.965],
        [0., 0., 1.],
        [0., 0.05, 0.95],
        [0., 0., 1.],
        [0., 0.005, 0.995],
        [0., 0., 1.],
        [0., 0.115, 0.885],
        [0., 0.09, 0.91],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0.18, 0.82],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0.005, 0.995],
        [0., 0.01, 0.99],
        [0., 0.005, 0.995],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0.09, 0.91],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0.025, 0.975]
    ])

    assert np.array_equal(ref, self.clf.predict_proba(Xte))

  def test_score(self):
    Xte, yte = self.test
    ref = 0.9866666666666667
    assert ref == self.clf.score(Xte, yte)

  def test_sklearn2pmml(self):
    # Export to PMML
    pipeline = PMMLPipeline([
      ("classifier", self.ref)
    ])
    pipeline.fit(self.test[0], self.test[1])

    sklearn2pmml(pipeline, "forest_sklearn2pmml.pmml", with_repr = True)

    try:
      # Import PMML
      model = PMMLForestClassifier(pmml='forest_sklearn2pmml.pmml')

      # Verify classification
      Xte, _ = self.test
      assert np.array_equal(
        self.ref.predict_proba(Xte),
        model.predict_proba(Xte)
      )

    finally:
      remove("forest_sklearn2pmml.pmml")
예제 #11
0
class TestIrisForestIntegration(TestCase):
  def setUp(self):
    from sklearn.datasets import load_iris
    iris = load_iris()
    X = pd.DataFrame(iris.data)
    X.columns = np.array(['sepal_length', 'sepal_width', 'petal_length',
                          'petal_width'])
    y = pd.Series(np.array(np.array(['Iris-setosa', 'Iris-versicolor',
                                     'Iris-virginica']))[iris.target])
    y.name = "Class"
    self.test = X, y

    self.clf = PMMLForestClassifier(path.join(BASE_DIR, '../models/iris.pmml'))

  def test_predict_proba(self):
    Xte, _ = self.test
    ref = np.array([
        [1., 0., 0.],
        [0.995, 0.005, 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0.985, 0.015, 0.],
        [0.995, 0.005, 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0.995, 0.005, 0.],
        [1., 0., 0.],
        [0.995, 0.005, 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0.99, 0.01, 0.],
        [1., 0., 0.],
        [0.995, 0.005, 0.],
        [0.995, 0.005, 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0.995, 0.005, 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0.98, 0.02, 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0.98, 0.015, 0.005],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0., 0.995, 0.005],
        [0., 1., 0.],
        [0., 0.595, 0.405],
        [0., 0.995, 0.005],
        [0., 0.99, 0.01],
        [0., 0.995, 0.005],
        [0., 0.97, 0.03],
        [0.005, 0.805, 0.19],
        [0., 1., 0.],
        [0.005, 0.985, 0.01],
        [0., 0.95, 0.05],
        [0., 0.995, 0.005],
        [0., 0.985, 0.015],
        [0., 0.995, 0.005],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 0.995, 0.005],
        [0., 1., 0.],
        [0., 0.99, 0.01],
        [0., 0.995, 0.005],
        [0., 0.645, 0.355],
        [0., 1., 0.],
        [0., 0.76, 0.24],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 0.85, 0.15],
        [0., 0.025, 0.975],
        [0., 0.995, 0.005],
        [0., 0.995, 0.005],
        [0., 0.995, 0.005],
        [0., 0.995, 0.005],
        [0., 1., 0.],
        [0., 0.085, 0.915],
        [0.025, 0.96, 0.015],
        [0.01, 0.945, 0.045],
        [0., 1., 0.],
        [0., 0.995, 0.005],
        [0., 1., 0.],
        [0., 0.995, 0.005],
        [0., 0.98, 0.02],
        [0., 0.995, 0.005],
        [0., 0.99, 0.01],
        [0., 0.955, 0.045],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 0.99, 0.01],
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 0.01, 0.99],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0.245, 0.755],
        [0., 0., 1.],
        [0., 0.065, 0.935],
        [0., 0., 1.],
        [0., 0.01, 0.99],
        [0., 0., 1.],
        [0., 0.005, 0.995],
        [0., 0.055, 0.945],
        [0., 0.005, 0.995],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0.015, 0.985],
        [0., 0.19, 0.81],
        [0., 0., 1.],
        [0., 0.11, 0.89],
        [0., 0., 1.],
        [0., 0.07, 0.93],
        [0., 0., 1.],
        [0., 0.005, 0.995],
        [0., 0.12, 0.88],
        [0., 0.035, 0.965],
        [0., 0., 1.],
        [0., 0.05, 0.95],
        [0., 0., 1.],
        [0., 0.005, 0.995],
        [0., 0., 1.],
        [0., 0.115, 0.885],
        [0., 0.09, 0.91],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0.18, 0.82],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0.005, 0.995],
        [0., 0.01, 0.99],
        [0., 0.005, 0.995],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0.09, 0.91],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0.025, 0.975]
    ])

    assert np.array_equal(ref, self.clf.predict_proba(Xte))

  def test_score(self):
    Xte, yte = self.test
    ref = 0.9866666666666667
    assert ref == self.clf.score(Xte, yte)
예제 #12
0
 def test_more_tags(self):
     clf = PMMLForestClassifier(
         path.join(BASE_DIR, '../models/rf-cat-pima.pmml'))
     assert clf._more_tags() == RandomForestClassifier()._more_tags()