Beispiel #1
0
 def test_score_max_features(self):
     X, y = load_dataset(self._random_state)
     clf = Stree(
         kernel="liblinear",
         multiclass_strategy="ovr",
         random_state=self._random_state,
         max_features=2,
     )
     clf.fit(X, y)
     self.assertAlmostEqual(0.9453333333333334, clf.score(X, y))
Beispiel #2
0
    def test_muticlass_dataset(self):
        warnings.filterwarnings("ignore", category=ConvergenceWarning)
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        datasets = {
            "Synt": load_dataset(random_state=self._random_state, n_classes=3),
            "Iris": load_wine(return_X_y=True),
        }
        outcomes = {
            "Synt": {
                "max_samples liblinear": 0.9493333333333334,
                "max_samples linear": 0.9426666666666667,
                "max_samples rbf": 0.9606666666666667,
                "max_samples poly": 0.9373333333333334,
                "max_samples sigmoid": 0.824,
                "impurity liblinear": 0.9493333333333334,
                "impurity linear": 0.9426666666666667,
                "impurity rbf": 0.9606666666666667,
                "impurity poly": 0.9373333333333334,
                "impurity sigmoid": 0.824,
            },
            "Iris": {
                "max_samples liblinear": 0.9550561797752809,
                "max_samples linear": 1.0,
                "max_samples rbf": 0.6685393258426966,
                "max_samples poly": 0.6853932584269663,
                "max_samples sigmoid": 0.6404494382022472,
                "impurity liblinear": 0.9550561797752809,
                "impurity linear": 1.0,
                "impurity rbf": 0.6685393258426966,
                "impurity poly": 0.6853932584269663,
                "impurity sigmoid": 0.6404494382022472,
            },
        }

        for name, dataset in datasets.items():
            px, py = dataset
            for criteria in ["max_samples", "impurity"]:
                for kernel in self._kernels:
                    clf = Stree(
                        max_iter=1e4,
                        multiclass_strategy="ovr"
                        if kernel == "liblinear" else "ovo",
                        kernel=kernel,
                        random_state=self._random_state,
                    )
                    clf.fit(px, py)
                    outcome = outcomes[name][f"{criteria} {kernel}"]
                    # print(f'"{criteria} {kernel}": {clf.score(px, py)},')
                    self.assertAlmostEqual(
                        outcome,
                        clf.score(px, py),
                        5,
                        f"{name} - {criteria} - {kernel}",
                    )
Beispiel #3
0
 def test_simple_muticlass_dataset(self):
     for kernel in self._kernels:
         clf = Stree(
             kernel=kernel,
             multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
             random_state=self._random_state,
         )
         px = [[1, 2], [5, 6], [9, 10]]
         py = [0, 1, 2]
         clf.fit(px, py)
         self.assertEqual(1.0, clf.score(px, py))
         self.assertListEqual(py, clf.predict(px).tolist())
         self.assertListEqual(py, clf.classes_.tolist())
Beispiel #4
0
 def test_multiclass_classifier_integrity(self):
     """Checks if the multiclass operation is done right"""
     X, y = load_iris(return_X_y=True)
     clf = Stree(kernel="liblinear",
                 multiclass_strategy="ovr",
                 random_state=0)
     clf.fit(X, y)
     score = clf.score(X, y)
     # Check accuracy of the whole model
     self.assertAlmostEquals(0.98, score, 5)
     svm = LinearSVC(random_state=0)
     svm.fit(X, y)
     self.assertAlmostEquals(0.9666666666666667, svm.score(X, y), 5)
     data = svm.decision_function(X)
     expected = [
         0.4444444444444444,
         0.35777777777777775,
         0.4569777777777778,
     ]
     ty = data.copy()
     ty[data <= 0] = 0
     ty[data > 0] = 1
     ty = ty.astype(int)
     for i in range(3):
         self.assertAlmostEquals(
             expected[i],
             clf.splitter_._gini(ty[:, i]),
         )
     # 1st Branch
     # up has to have 50 samples of class 0
     # down should have 100 [50, 50]
     up = data[:, 2] > 0
     resup = np.unique(y[up], return_counts=True)
     resdn = np.unique(y[~up], return_counts=True)
     self.assertListEqual([1, 2], resup[0].tolist())
     self.assertListEqual([3, 50], resup[1].tolist())
     self.assertListEqual([0, 1], resdn[0].tolist())
     self.assertListEqual([50, 47], resdn[1].tolist())
     # 2nd Branch
     # up  should have 53 samples of classes [1, 2] [3, 50]
     # down shoud have 47 samples of class 1
     node_up = clf.tree_.get_down().get_up()
     node_dn = clf.tree_.get_down().get_down()
     resup = np.unique(node_up._y, return_counts=True)
     resdn = np.unique(node_dn._y, return_counts=True)
     self.assertListEqual([1, 2], resup[0].tolist())
     self.assertListEqual([3, 50], resup[1].tolist())
     self.assertListEqual([1], resdn[0].tolist())
     self.assertListEqual([47], resdn[1].tolist())
Beispiel #5
0
 def test_mask_samples_weighted_zero(self):
     X = np.array([
         [1, 1],
         [1, 1],
         [1, 1],
         [2, 2],
         [2, 2],
         [2, 2],
         [3, 3],
         [3, 3],
         [3, 3],
     ])
     y = np.array([1, 1, 1, 2, 2, 2, 5, 5, 5])
     yw = np.array([1, 1, 1, 1, 1, 1, 5, 5, 5])
     w = [1, 1, 1, 0, 0, 0, 1, 1, 1]
     model1 = Stree().fit(X, y)
     model2 = Stree().fit(X, y, w)
     predict1 = model1.predict(X)
     predict2 = model2.predict(X)
     self.assertListEqual(y.tolist(), predict1.tolist())
     self.assertListEqual(yw.tolist(), predict2.tolist())
     self.assertEqual(model1.score(X, y), 1)
     self.assertAlmostEqual(model2.score(X, y), 0.66666667)
     self.assertEqual(model2.score(X, y, w), 1)
Beispiel #6
0
 def test_score_binary(self):
     X, y = load_dataset(self._random_state)
     accuracies = [
         0.9506666666666667,
         0.9493333333333334,
         0.9606666666666667,
         0.9433333333333334,
         0.9153333333333333,
     ]
     for kernel, accuracy_expected in zip(self._kernels, accuracies):
         clf = Stree(
             random_state=self._random_state,
             multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
             kernel=kernel,
         )
         clf.fit(X, y)
         accuracy_score = clf.score(X, y)
         yp = clf.predict(X)
         accuracy_computed = np.mean(yp == y)
         self.assertEqual(accuracy_score, accuracy_computed)
         self.assertAlmostEqual(accuracy_expected, accuracy_score)