Esempio n. 1
0
 def test_iterator_and_str(self):
     """Check preorder iterator"""
     expected = [
         "root feaures=(0, 1, 2) impurity=1.0000 counts=(array([0, 1]), "
         "array([750, 750]))",
         "root - Down(2), <cgaf> - Leaf class=0 belief= 0.928297 impurity="
         "0.3722 counts=(array([0, 1]), array([725,  56]))",
         "root - Up(2) feaures=(0, 1, 2) impurity=0.2178 counts=(array([0, "
         "1]), array([ 25, 694]))",
         "root - Up(2) - Down(3) feaures=(0, 1, 2) impurity=0.8454 counts="
         "(array([0, 1]), array([8, 3]))",
         "root - Up(2) - Down(3) - Down(4), <pure> - Leaf class=0 belief= "
         "1.000000 impurity=0.0000 counts=(array([0]), array([7]))",
         "root - Up(2) - Down(3) - Up(4), <cgaf> - Leaf class=1 belief= "
         "0.750000 impurity=0.8113 counts=(array([0, 1]), array([1, 3]))",
         "root - Up(2) - Up(3), <cgaf> - Leaf class=1 belief= 0.975989 "
         "impurity=0.1634 counts=(array([0, 1]), array([ 17, 691]))",
     ]
     computed = []
     expected_string = ""
     clf = Stree(
         kernel="liblinear",
         multiclass_strategy="ovr",
         random_state=self._random_state,
     )
     clf.fit(*load_dataset(self._random_state))
     for node in iter(clf):
         computed.append(str(node))
         expected_string += str(node) + "\n"
     self.assertListEqual(expected, computed)
     self.assertEqual(expected_string, str(clf))
Esempio n. 2
0
 def test_min_samples_split(self):
     dataset = [[1], [2], [3]], [1, 1, 0]
     tcl_split = Stree(min_samples_split=3).fit(*dataset)
     self.assertIsNotNone(tcl_split.tree_.get_down())
     self.assertIsNotNone(tcl_split.tree_.get_up())
     tcl_nosplit = Stree(min_samples_split=4).fit(*dataset)
     self.assertIsNone(tcl_nosplit.tree_.get_down())
     self.assertIsNone(tcl_nosplit.tree_.get_up())
Esempio n. 3
0
 def __init__(self, *args, **kwargs):
     self._random_state = 1
     self._clf = Stree(
         random_state=self._random_state,
         kernel="liblinear",
         multiclass_strategy="ovr",
     )
     self._clf.fit(*load_dataset(self._random_state))
     super().__init__(*args, **kwargs)
Esempio n. 4
0
 def test_single_prediction(self):
     X, y = load_dataset(self._random_state)
     for kernel in self._kernels:
         clf = Stree(
             kernel=kernel,
             multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
             random_state=self._random_state,
         )
         yp = clf.fit(X, y).predict((X[0, :].reshape(-1, X.shape[1])))
         self.assertEqual(yp[0], y[0])
Esempio n. 5
0
 def test_score_max_features(self):
     X, y = load_dataset(self._random_state)
     clf = Stree(
         kernel="liblinear",
         multiclass_strategy="ovr",
         random_state=self._random_state,
         max_features=2,
     )
     clf.fit(X, y)
     self.assertAlmostEqual(0.9453333333333334, clf.score(X, y))
Esempio n. 6
0
    def test_muticlass_dataset(self):
        warnings.filterwarnings("ignore", category=ConvergenceWarning)
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        datasets = {
            "Synt": load_dataset(random_state=self._random_state, n_classes=3),
            "Iris": load_wine(return_X_y=True),
        }
        outcomes = {
            "Synt": {
                "max_samples liblinear": 0.9493333333333334,
                "max_samples linear": 0.9426666666666667,
                "max_samples rbf": 0.9606666666666667,
                "max_samples poly": 0.9373333333333334,
                "max_samples sigmoid": 0.824,
                "impurity liblinear": 0.9493333333333334,
                "impurity linear": 0.9426666666666667,
                "impurity rbf": 0.9606666666666667,
                "impurity poly": 0.9373333333333334,
                "impurity sigmoid": 0.824,
            },
            "Iris": {
                "max_samples liblinear": 0.9550561797752809,
                "max_samples linear": 1.0,
                "max_samples rbf": 0.6685393258426966,
                "max_samples poly": 0.6853932584269663,
                "max_samples sigmoid": 0.6404494382022472,
                "impurity liblinear": 0.9550561797752809,
                "impurity linear": 1.0,
                "impurity rbf": 0.6685393258426966,
                "impurity poly": 0.6853932584269663,
                "impurity sigmoid": 0.6404494382022472,
            },
        }

        for name, dataset in datasets.items():
            px, py = dataset
            for criteria in ["max_samples", "impurity"]:
                for kernel in self._kernels:
                    clf = Stree(
                        max_iter=1e4,
                        multiclass_strategy="ovr"
                        if kernel == "liblinear" else "ovo",
                        kernel=kernel,
                        random_state=self._random_state,
                    )
                    clf.fit(px, py)
                    outcome = outcomes[name][f"{criteria} {kernel}"]
                    # print(f'"{criteria} {kernel}": {clf.score(px, py)},')
                    self.assertAlmostEqual(
                        outcome,
                        clf.score(px, py),
                        5,
                        f"{name} - {criteria} - {kernel}",
                    )
Esempio n. 7
0
 def test_build_tree(self):
     """Check if the tree is built the same way as predictions of models"""
     warnings.filterwarnings("ignore")
     for kernel in self._kernels:
         clf = Stree(
             kernel="sigmoid",
             multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
             random_state=self._random_state,
         )
         clf.fit(*load_dataset(self._random_state))
         self._check_tree(clf.tree_)
Esempio n. 8
0
 def test_check_max_depth(self):
     depths = (3, 4)
     for depth in depths:
         tcl = Stree(
             kernel="liblinear",
             multiclass_strategy="ovr",
             random_state=self._random_state,
             max_depth=depth,
         )
         tcl.fit(*load_dataset(self._random_state))
         self.assertEqual(depth, tcl.depth_)
Esempio n. 9
0
 def test_multiple_prediction(self):
     # First 27 elements the predictions are the same as the truth
     num = 27
     X, y = load_dataset(self._random_state)
     for kernel in ["liblinear", "linear", "rbf", "poly"]:
         clf = Stree(
             kernel=kernel,
             multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
             random_state=self._random_state,
         )
         yp = clf.fit(X, y).predict(X[:num, :])
         self.assertListEqual(y[:num].tolist(), yp.tolist())
Esempio n. 10
0
 def test_score_multiclass_linear(self):
     warnings.filterwarnings("ignore", category=ConvergenceWarning)
     warnings.filterwarnings("ignore", category=RuntimeWarning)
     X, y = load_dataset(
         random_state=self._random_state,
         n_classes=3,
         n_features=5,
         n_samples=1500,
     )
     clf = Stree(
         kernel="liblinear",
         multiclass_strategy="ovr",
         random_state=self._random_state,
     )
     self.assertEqual(0.9533333333333334, clf.fit(X, y).score(X, y))
     # Check with context based standardization
     clf2 = Stree(
         kernel="liblinear",
         multiclass_strategy="ovr",
         random_state=self._random_state,
         normalize=True,
     )
     self.assertEqual(0.9526666666666667, clf2.fit(X, y).score(X, y))
     X, y = load_wine(return_X_y=True)
     self.assertEqual(0.9831460674157303, clf.fit(X, y).score(X, y))
     self.assertEqual(1.0, clf2.fit(X, y).score(X, y))
Esempio n. 11
0
 def test_multiclass_classifier_integrity(self):
     """Checks if the multiclass operation is done right"""
     X, y = load_iris(return_X_y=True)
     clf = Stree(kernel="liblinear",
                 multiclass_strategy="ovr",
                 random_state=0)
     clf.fit(X, y)
     score = clf.score(X, y)
     # Check accuracy of the whole model
     self.assertAlmostEquals(0.98, score, 5)
     svm = LinearSVC(random_state=0)
     svm.fit(X, y)
     self.assertAlmostEquals(0.9666666666666667, svm.score(X, y), 5)
     data = svm.decision_function(X)
     expected = [
         0.4444444444444444,
         0.35777777777777775,
         0.4569777777777778,
     ]
     ty = data.copy()
     ty[data <= 0] = 0
     ty[data > 0] = 1
     ty = ty.astype(int)
     for i in range(3):
         self.assertAlmostEquals(
             expected[i],
             clf.splitter_._gini(ty[:, i]),
         )
     # 1st Branch
     # up has to have 50 samples of class 0
     # down should have 100 [50, 50]
     up = data[:, 2] > 0
     resup = np.unique(y[up], return_counts=True)
     resdn = np.unique(y[~up], return_counts=True)
     self.assertListEqual([1, 2], resup[0].tolist())
     self.assertListEqual([3, 50], resup[1].tolist())
     self.assertListEqual([0, 1], resdn[0].tolist())
     self.assertListEqual([50, 47], resdn[1].tolist())
     # 2nd Branch
     # up  should have 53 samples of classes [1, 2] [3, 50]
     # down shoud have 47 samples of class 1
     node_up = clf.tree_.get_down().get_up()
     node_dn = clf.tree_.get_down().get_down()
     resup = np.unique(node_up._y, return_counts=True)
     resdn = np.unique(node_dn._y, return_counts=True)
     self.assertListEqual([1, 2], resup[0].tolist())
     self.assertListEqual([3, 50], resup[1].tolist())
     self.assertListEqual([1], resdn[0].tolist())
     self.assertListEqual([47], resdn[1].tolist())
Esempio n. 12
0
 def test_incompatible_hyperparameters(self):
     X, y = load_wine(return_X_y=True)
     clf = Stree(kernel="liblinear", multiclass_strategy="ovo")
     with self.assertRaises(ValueError):
         clf.fit(X, y)
     clf = Stree(multiclass_strategy="ovo", split_criteria="max_samples")
     with self.assertRaises(ValueError):
         clf.fit(X, y)
Esempio n. 13
0
 def test_check_max_depth_is_positive_or_None(self):
     tcl = Stree()
     self.assertIsNone(tcl.max_depth)
     tcl = Stree(max_depth=1)
     self.assertGreaterEqual(1, tcl.max_depth)
     with self.assertRaises(ValueError):
         tcl = Stree(max_depth=-1)
         tcl.fit(*load_dataset(self._random_state))
Esempio n. 14
0
    def test_nodes_coefs(self):
        """Check if the nodes of the tree have the right attributes filled"""
        def run_tree(node: Snode):
            if node._belief < 1:
                # only exclude pure leaves
                self.assertIsNotNone(node._clf)
                self.assertIsNotNone(node._clf.coef_)
            if node.is_leaf():
                return
            run_tree(node.get_up())
            run_tree(node.get_down())

        model = Stree(self._random_state)
        model.fit(*load_dataset(self._random_state, 3, 4))
        run_tree(model.tree_)
Esempio n. 15
0
 def test_nodes_leaves_artificial(self):
     n1 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test1")
     n2 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test2")
     n3 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test3")
     n4 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test4")
     n5 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test5")
     n6 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test6")
     n1.set_up(n2)
     n2.set_up(n3)
     n2.set_down(n4)
     n3.set_up(n5)
     n4.set_down(n6)
     clf = Stree(random_state=self._random_state)
     clf.tree_ = n1
     nodes, leaves = clf.nodes_leaves()
     self.assertEqual(6, nodes)
     self.assertEqual(2, leaves)
Esempio n. 16
0
 def test_nodes_leaves(self):
     X, y = load_dataset(
         random_state=self._random_state,
         n_classes=3,
         n_features=5,
         n_samples=1500,
     )
     clf = Stree(random_state=self._random_state)
     clf.fit(X, y)
     nodes, leaves = clf.nodes_leaves()
     self.assertEqual(31, nodes)
     self.assertEqual(16, leaves)
     X, y = load_wine(return_X_y=True)
     clf = Stree(random_state=self._random_state)
     clf.fit(X, y)
     nodes, leaves = clf.nodes_leaves()
     self.assertEqual(11, nodes)
     self.assertEqual(6, leaves)
Esempio n. 17
0
 def test_copy_node(self):
     px = [1, 2, 3, 4]
     py = [1]
     test = Snode(Stree(), px, py, [], 0.0, "test")
     computed = Snode.copy(test)
     self.assertListEqual(computed._X, px)
     self.assertListEqual(computed._y, py)
     self.assertEqual("test", computed._title)
     self.assertIsInstance(computed._clf, Stree)
     self.assertEqual(test._partition_column, computed._partition_column)
     self.assertEqual(test._sample_weight, computed._sample_weight)
     self.assertEqual(test._scaler, computed._scaler)
Esempio n. 18
0
 def test_depth(self):
     X, y = load_dataset(
         random_state=self._random_state,
         n_classes=3,
         n_features=5,
         n_samples=1500,
     )
     clf = Stree(random_state=self._random_state)
     clf.fit(X, y)
     self.assertEqual(6, clf.depth_)
     X, y = load_wine(return_X_y=True)
     clf = Stree(random_state=self._random_state)
     clf.fit(X, y)
     self.assertEqual(4, clf.depth_)
Esempio n. 19
0
 def test_predict_feature_dimensions(self):
     X = np.random.rand(10, 5)
     y = np.random.randint(0, 2, 10)
     clf = Stree()
     clf.fit(X, y)
     with self.assertRaises(ValueError):
         clf.predict(X[:, :3])
Esempio n. 20
0
 def test_multiclass_strategy(self):
     X, y = load_wine(return_X_y=True)
     clf_o = Stree(multiclass_strategy="ovo")
     clf_r = Stree(multiclass_strategy="ovr")
     score_o = clf_o.fit(X, y).score(X, y)
     score_r = clf_r.fit(X, y).score(X, y)
     self.assertEqual(1.0, score_o)
     self.assertEqual(0.9269662921348315, score_r)
Esempio n. 21
0
 def test_score_multiclass_rbf(self):
     X, y = load_dataset(
         random_state=self._random_state,
         n_classes=3,
         n_features=5,
         n_samples=500,
     )
     clf = Stree(kernel="rbf", random_state=self._random_state)
     clf2 = Stree(kernel="rbf",
                  random_state=self._random_state,
                  normalize=True)
     self.assertEqual(0.966, clf.fit(X, y).score(X, y))
     self.assertEqual(0.964, clf2.fit(X, y).score(X, y))
     X, y = load_wine(return_X_y=True)
     self.assertEqual(0.6685393258426966, clf.fit(X, y).score(X, y))
     self.assertEqual(1.0, clf2.fit(X, y).score(X, y))
Esempio n. 22
0
 def test_max_features(self):
     n_features = 16
     expected_values = [
         ("auto", 4),
         ("log2", 4),
         ("sqrt", 4),
         (0.5, 8),
         (3, 3),
         (None, 16),
     ]
     clf = Stree()
     clf.n_features_ = n_features
     for max_features, expected in expected_values:
         clf.set_params(**dict(max_features=max_features))
         computed = clf._initialize_max_features()
         self.assertEqual(expected, computed)
     # Check bogus max_features
     values = ["duck", -0.1, 0.0]
     for max_features in values:
         clf.set_params(**dict(max_features=max_features))
         with self.assertRaises(ValueError):
             _ = clf._initialize_max_features()
Esempio n. 23
0
 def test_score_multiclass_sigmoid(self):
     X, y = load_dataset(
         random_state=self._random_state,
         n_classes=3,
         n_features=5,
         n_samples=500,
     )
     clf = Stree(kernel="sigmoid", random_state=self._random_state, C=10)
     clf2 = Stree(
         kernel="sigmoid",
         random_state=self._random_state,
         normalize=True,
         C=10,
     )
     self.assertEqual(0.796, clf.fit(X, y).score(X, y))
     self.assertEqual(0.952, clf2.fit(X, y).score(X, y))
     X, y = load_wine(return_X_y=True)
     self.assertEqual(0.6910112359550562, clf.fit(X, y).score(X, y))
     self.assertEqual(0.9662921348314607, clf2.fit(X, y).score(X, y))
Esempio n. 24
0
 def test_simple_muticlass_dataset(self):
     for kernel in self._kernels:
         clf = Stree(
             kernel=kernel,
             multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
             random_state=self._random_state,
         )
         px = [[1, 2], [5, 6], [9, 10]]
         py = [0, 1, 2]
         clf.fit(px, py)
         self.assertEqual(1.0, clf.score(px, py))
         self.assertListEqual(py, clf.predict(px).tolist())
         self.assertListEqual(py, clf.classes_.tolist())
Esempio n. 25
0
 def test_mask_samples_weighted_zero(self):
     X = np.array([
         [1, 1],
         [1, 1],
         [1, 1],
         [2, 2],
         [2, 2],
         [2, 2],
         [3, 3],
         [3, 3],
         [3, 3],
     ])
     y = np.array([1, 1, 1, 2, 2, 2, 5, 5, 5])
     yw = np.array([1, 1, 1, 1, 1, 1, 5, 5, 5])
     w = [1, 1, 1, 0, 0, 0, 1, 1, 1]
     model1 = Stree().fit(X, y)
     model2 = Stree().fit(X, y, w)
     predict1 = model1.predict(X)
     predict2 = model2.predict(X)
     self.assertListEqual(y.tolist(), predict1.tolist())
     self.assertListEqual(yw.tolist(), predict2.tolist())
     self.assertEqual(model1.score(X, y), 1)
     self.assertAlmostEqual(model2.score(X, y), 0.66666667)
     self.assertEqual(model2.score(X, y, w), 1)
Esempio n. 26
0
 def test_score_multiclass_poly(self):
     X, y = load_dataset(
         random_state=self._random_state,
         n_classes=3,
         n_features=5,
         n_samples=500,
     )
     clf = Stree(kernel="poly",
                 random_state=self._random_state,
                 C=10,
                 degree=5)
     clf2 = Stree(
         kernel="poly",
         random_state=self._random_state,
         normalize=True,
     )
     self.assertEqual(0.946, clf.fit(X, y).score(X, y))
     self.assertEqual(0.972, clf2.fit(X, y).score(X, y))
     X, y = load_wine(return_X_y=True)
     self.assertEqual(0.7808988764044944, clf.fit(X, y).score(X, y))
     self.assertEqual(1.0, clf2.fit(X, y).score(X, y))
Esempio n. 27
0
 def test_score_multiclass_liblinear(self):
     X, y = load_dataset(
         random_state=self._random_state,
         n_classes=3,
         n_features=5,
         n_samples=500,
     )
     clf = Stree(
         kernel="liblinear",
         multiclass_strategy="ovr",
         random_state=self._random_state,
         C=10,
     )
     clf2 = Stree(
         kernel="liblinear",
         multiclass_strategy="ovr",
         random_state=self._random_state,
         normalize=True,
     )
     self.assertEqual(0.968, clf.fit(X, y).score(X, y))
     self.assertEqual(0.97, clf2.fit(X, y).score(X, y))
     X, y = load_wine(return_X_y=True)
     self.assertEqual(1.0, clf.fit(X, y).score(X, y))
     self.assertEqual(1.0, clf2.fit(X, y).score(X, y))
Esempio n. 28
0
 def test_get_subspaces(self):
     dataset = np.random.random((10, 16))
     y = np.random.randint(0, 2, 10)
     expected_values = [
         ("auto", 4),
         ("log2", 4),
         ("sqrt", 4),
         (0.5, 8),
         (3, 3),
         (None, 16),
     ]
     clf = Stree()
     for max_features, expected in expected_values:
         clf.set_params(**dict(max_features=max_features))
         clf.fit(dataset, y)
         computed, indices = clf.splitter_.get_subspace(
             dataset, y, clf.max_features_)
         self.assertListEqual(dataset[:, indices].tolist(),
                              computed.tolist())
         self.assertEqual(expected, len(indices))
Esempio n. 29
0
 def test_single_vs_multiple_prediction(self):
     """Check if predicting sample by sample gives the same result as
     predicting all samples at once
     """
     X, y = load_dataset(self._random_state)
     for kernel in self._kernels:
         clf = Stree(
             kernel=kernel,
             multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
             random_state=self._random_state,
         )
         clf.fit(X, y)
         # Compute prediction line by line
         yp_line = np.array([], dtype=int)
         for xp in X:
             yp_line = np.append(yp_line,
                                 clf.predict(xp.reshape(-1, X.shape[1])))
         # Compute prediction at once
         yp_once = clf.predict(X)
         self.assertListEqual(yp_line.tolist(), yp_once.tolist())
Esempio n. 30
0
 def test_score_binary(self):
     X, y = load_dataset(self._random_state)
     accuracies = [
         0.9506666666666667,
         0.9493333333333334,
         0.9606666666666667,
         0.9433333333333334,
         0.9153333333333333,
     ]
     for kernel, accuracy_expected in zip(self._kernels, accuracies):
         clf = Stree(
             random_state=self._random_state,
             multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
             kernel=kernel,
         )
         clf.fit(X, y)
         accuracy_score = clf.score(X, y)
         yp = clf.predict(X)
         accuracy_computed = np.mean(yp == y)
         self.assertEqual(accuracy_score, accuracy_computed)
         self.assertAlmostEqual(accuracy_expected, accuracy_score)