예제 #1
0
 def test_score_multiclass_linear(self):
     warnings.filterwarnings("ignore", category=ConvergenceWarning)
     warnings.filterwarnings("ignore", category=RuntimeWarning)
     X, y = load_dataset(
         random_state=self._random_state,
         n_classes=3,
         n_features=5,
         n_samples=1500,
     )
     clf = Stree(
         kernel="liblinear",
         multiclass_strategy="ovr",
         random_state=self._random_state,
     )
     self.assertEqual(0.9533333333333334, clf.fit(X, y).score(X, y))
     # Check with context based standardization
     clf2 = Stree(
         kernel="liblinear",
         multiclass_strategy="ovr",
         random_state=self._random_state,
         normalize=True,
     )
     self.assertEqual(0.9526666666666667, clf2.fit(X, y).score(X, y))
     X, y = load_wine(return_X_y=True)
     self.assertEqual(0.9831460674157303, clf.fit(X, y).score(X, y))
     self.assertEqual(1.0, clf2.fit(X, y).score(X, y))
예제 #2
0
 def test_iterator_and_str(self):
     """Check preorder iterator"""
     expected = [
         "root feaures=(0, 1, 2) impurity=1.0000 counts=(array([0, 1]), "
         "array([750, 750]))",
         "root - Down(2), <cgaf> - Leaf class=0 belief= 0.928297 impurity="
         "0.3722 counts=(array([0, 1]), array([725,  56]))",
         "root - Up(2) feaures=(0, 1, 2) impurity=0.2178 counts=(array([0, "
         "1]), array([ 25, 694]))",
         "root - Up(2) - Down(3) feaures=(0, 1, 2) impurity=0.8454 counts="
         "(array([0, 1]), array([8, 3]))",
         "root - Up(2) - Down(3) - Down(4), <pure> - Leaf class=0 belief= "
         "1.000000 impurity=0.0000 counts=(array([0]), array([7]))",
         "root - Up(2) - Down(3) - Up(4), <cgaf> - Leaf class=1 belief= "
         "0.750000 impurity=0.8113 counts=(array([0, 1]), array([1, 3]))",
         "root - Up(2) - Up(3), <cgaf> - Leaf class=1 belief= 0.975989 "
         "impurity=0.1634 counts=(array([0, 1]), array([ 17, 691]))",
     ]
     computed = []
     expected_string = ""
     clf = Stree(
         kernel="liblinear",
         multiclass_strategy="ovr",
         random_state=self._random_state,
     )
     clf.fit(*load_dataset(self._random_state))
     for node in iter(clf):
         computed.append(str(node))
         expected_string += str(node) + "\n"
     self.assertListEqual(expected, computed)
     self.assertEqual(expected_string, str(clf))
예제 #3
0
 def test_predict_feature_dimensions(self):
     X = np.random.rand(10, 5)
     y = np.random.randint(0, 2, 10)
     clf = Stree()
     clf.fit(X, y)
     with self.assertRaises(ValueError):
         clf.predict(X[:, :3])
예제 #4
0
 def test_incompatible_hyperparameters(self):
     X, y = load_wine(return_X_y=True)
     clf = Stree(kernel="liblinear", multiclass_strategy="ovo")
     with self.assertRaises(ValueError):
         clf.fit(X, y)
     clf = Stree(multiclass_strategy="ovo", split_criteria="max_samples")
     with self.assertRaises(ValueError):
         clf.fit(X, y)
예제 #5
0
 def test_multiclass_strategy(self):
     X, y = load_wine(return_X_y=True)
     clf_o = Stree(multiclass_strategy="ovo")
     clf_r = Stree(multiclass_strategy="ovr")
     score_o = clf_o.fit(X, y).score(X, y)
     score_r = clf_r.fit(X, y).score(X, y)
     self.assertEqual(1.0, score_o)
     self.assertEqual(0.9269662921348315, score_r)
예제 #6
0
 def test_check_max_depth_is_positive_or_None(self):
     tcl = Stree()
     self.assertIsNone(tcl.max_depth)
     tcl = Stree(max_depth=1)
     self.assertGreaterEqual(1, tcl.max_depth)
     with self.assertRaises(ValueError):
         tcl = Stree(max_depth=-1)
         tcl.fit(*load_dataset(self._random_state))
예제 #7
0
 def test_score_max_features(self):
     X, y = load_dataset(self._random_state)
     clf = Stree(
         kernel="liblinear",
         multiclass_strategy="ovr",
         random_state=self._random_state,
         max_features=2,
     )
     clf.fit(X, y)
     self.assertAlmostEqual(0.9453333333333334, clf.score(X, y))
예제 #8
0
 def test_build_tree(self):
     """Check if the tree is built the same way as predictions of models"""
     warnings.filterwarnings("ignore")
     for kernel in self._kernels:
         clf = Stree(
             kernel="sigmoid",
             multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
             random_state=self._random_state,
         )
         clf.fit(*load_dataset(self._random_state))
         self._check_tree(clf.tree_)
예제 #9
0
    def test_muticlass_dataset(self):
        warnings.filterwarnings("ignore", category=ConvergenceWarning)
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        datasets = {
            "Synt": load_dataset(random_state=self._random_state, n_classes=3),
            "Iris": load_wine(return_X_y=True),
        }
        outcomes = {
            "Synt": {
                "max_samples liblinear": 0.9493333333333334,
                "max_samples linear": 0.9426666666666667,
                "max_samples rbf": 0.9606666666666667,
                "max_samples poly": 0.9373333333333334,
                "max_samples sigmoid": 0.824,
                "impurity liblinear": 0.9493333333333334,
                "impurity linear": 0.9426666666666667,
                "impurity rbf": 0.9606666666666667,
                "impurity poly": 0.9373333333333334,
                "impurity sigmoid": 0.824,
            },
            "Iris": {
                "max_samples liblinear": 0.9550561797752809,
                "max_samples linear": 1.0,
                "max_samples rbf": 0.6685393258426966,
                "max_samples poly": 0.6853932584269663,
                "max_samples sigmoid": 0.6404494382022472,
                "impurity liblinear": 0.9550561797752809,
                "impurity linear": 1.0,
                "impurity rbf": 0.6685393258426966,
                "impurity poly": 0.6853932584269663,
                "impurity sigmoid": 0.6404494382022472,
            },
        }

        for name, dataset in datasets.items():
            px, py = dataset
            for criteria in ["max_samples", "impurity"]:
                for kernel in self._kernels:
                    clf = Stree(
                        max_iter=1e4,
                        multiclass_strategy="ovr"
                        if kernel == "liblinear" else "ovo",
                        kernel=kernel,
                        random_state=self._random_state,
                    )
                    clf.fit(px, py)
                    outcome = outcomes[name][f"{criteria} {kernel}"]
                    # print(f'"{criteria} {kernel}": {clf.score(px, py)},')
                    self.assertAlmostEqual(
                        outcome,
                        clf.score(px, py),
                        5,
                        f"{name} - {criteria} - {kernel}",
                    )
예제 #10
0
 def test_check_max_depth(self):
     depths = (3, 4)
     for depth in depths:
         tcl = Stree(
             kernel="liblinear",
             multiclass_strategy="ovr",
             random_state=self._random_state,
             max_depth=depth,
         )
         tcl.fit(*load_dataset(self._random_state))
         self.assertEqual(depth, tcl.depth_)
예제 #11
0
 def test_simple_muticlass_dataset(self):
     for kernel in self._kernels:
         clf = Stree(
             kernel=kernel,
             multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
             random_state=self._random_state,
         )
         px = [[1, 2], [5, 6], [9, 10]]
         py = [0, 1, 2]
         clf.fit(px, py)
         self.assertEqual(1.0, clf.score(px, py))
         self.assertListEqual(py, clf.predict(px).tolist())
         self.assertListEqual(py, clf.classes_.tolist())
예제 #12
0
 def test_depth(self):
     X, y = load_dataset(
         random_state=self._random_state,
         n_classes=3,
         n_features=5,
         n_samples=1500,
     )
     clf = Stree(random_state=self._random_state)
     clf.fit(X, y)
     self.assertEqual(6, clf.depth_)
     X, y = load_wine(return_X_y=True)
     clf = Stree(random_state=self._random_state)
     clf.fit(X, y)
     self.assertEqual(4, clf.depth_)
예제 #13
0
 def test_multiclass_classifier_integrity(self):
     """Checks if the multiclass operation is done right"""
     X, y = load_iris(return_X_y=True)
     clf = Stree(kernel="liblinear",
                 multiclass_strategy="ovr",
                 random_state=0)
     clf.fit(X, y)
     score = clf.score(X, y)
     # Check accuracy of the whole model
     self.assertAlmostEquals(0.98, score, 5)
     svm = LinearSVC(random_state=0)
     svm.fit(X, y)
     self.assertAlmostEquals(0.9666666666666667, svm.score(X, y), 5)
     data = svm.decision_function(X)
     expected = [
         0.4444444444444444,
         0.35777777777777775,
         0.4569777777777778,
     ]
     ty = data.copy()
     ty[data <= 0] = 0
     ty[data > 0] = 1
     ty = ty.astype(int)
     for i in range(3):
         self.assertAlmostEquals(
             expected[i],
             clf.splitter_._gini(ty[:, i]),
         )
     # 1st Branch
     # up has to have 50 samples of class 0
     # down should have 100 [50, 50]
     up = data[:, 2] > 0
     resup = np.unique(y[up], return_counts=True)
     resdn = np.unique(y[~up], return_counts=True)
     self.assertListEqual([1, 2], resup[0].tolist())
     self.assertListEqual([3, 50], resup[1].tolist())
     self.assertListEqual([0, 1], resdn[0].tolist())
     self.assertListEqual([50, 47], resdn[1].tolist())
     # 2nd Branch
     # up  should have 53 samples of classes [1, 2] [3, 50]
     # down shoud have 47 samples of class 1
     node_up = clf.tree_.get_down().get_up()
     node_dn = clf.tree_.get_down().get_down()
     resup = np.unique(node_up._y, return_counts=True)
     resdn = np.unique(node_dn._y, return_counts=True)
     self.assertListEqual([1, 2], resup[0].tolist())
     self.assertListEqual([3, 50], resup[1].tolist())
     self.assertListEqual([1], resdn[0].tolist())
     self.assertListEqual([47], resdn[1].tolist())
예제 #14
0
    def test_nodes_coefs(self):
        """Check if the nodes of the tree have the right attributes filled"""
        def run_tree(node: Snode):
            if node._belief < 1:
                # only exclude pure leaves
                self.assertIsNotNone(node._clf)
                self.assertIsNotNone(node._clf.coef_)
            if node.is_leaf():
                return
            run_tree(node.get_up())
            run_tree(node.get_down())

        model = Stree(self._random_state)
        model.fit(*load_dataset(self._random_state, 3, 4))
        run_tree(model.tree_)
예제 #15
0
 def test_score_multiclass_rbf(self):
     X, y = load_dataset(
         random_state=self._random_state,
         n_classes=3,
         n_features=5,
         n_samples=500,
     )
     clf = Stree(kernel="rbf", random_state=self._random_state)
     clf2 = Stree(kernel="rbf",
                  random_state=self._random_state,
                  normalize=True)
     self.assertEqual(0.966, clf.fit(X, y).score(X, y))
     self.assertEqual(0.964, clf2.fit(X, y).score(X, y))
     X, y = load_wine(return_X_y=True)
     self.assertEqual(0.6685393258426966, clf.fit(X, y).score(X, y))
     self.assertEqual(1.0, clf2.fit(X, y).score(X, y))
예제 #16
0
 def test_nodes_leaves(self):
     X, y = load_dataset(
         random_state=self._random_state,
         n_classes=3,
         n_features=5,
         n_samples=1500,
     )
     clf = Stree(random_state=self._random_state)
     clf.fit(X, y)
     nodes, leaves = clf.nodes_leaves()
     self.assertEqual(31, nodes)
     self.assertEqual(16, leaves)
     X, y = load_wine(return_X_y=True)
     clf = Stree(random_state=self._random_state)
     clf.fit(X, y)
     nodes, leaves = clf.nodes_leaves()
     self.assertEqual(11, nodes)
     self.assertEqual(6, leaves)
예제 #17
0
 def test_score_multiclass_sigmoid(self):
     X, y = load_dataset(
         random_state=self._random_state,
         n_classes=3,
         n_features=5,
         n_samples=500,
     )
     clf = Stree(kernel="sigmoid", random_state=self._random_state, C=10)
     clf2 = Stree(
         kernel="sigmoid",
         random_state=self._random_state,
         normalize=True,
         C=10,
     )
     self.assertEqual(0.796, clf.fit(X, y).score(X, y))
     self.assertEqual(0.952, clf2.fit(X, y).score(X, y))
     X, y = load_wine(return_X_y=True)
     self.assertEqual(0.6910112359550562, clf.fit(X, y).score(X, y))
     self.assertEqual(0.9662921348314607, clf2.fit(X, y).score(X, y))
예제 #18
0
 def test_single_prediction(self):
     X, y = load_dataset(self._random_state)
     for kernel in self._kernels:
         clf = Stree(
             kernel=kernel,
             multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
             random_state=self._random_state,
         )
         yp = clf.fit(X, y).predict((X[0, :].reshape(-1, X.shape[1])))
         self.assertEqual(yp[0], y[0])
예제 #19
0
 def test_single_vs_multiple_prediction(self):
     """Check if predicting sample by sample gives the same result as
     predicting all samples at once
     """
     X, y = load_dataset(self._random_state)
     for kernel in self._kernels:
         clf = Stree(
             kernel=kernel,
             multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
             random_state=self._random_state,
         )
         clf.fit(X, y)
         # Compute prediction line by line
         yp_line = np.array([], dtype=int)
         for xp in X:
             yp_line = np.append(yp_line,
                                 clf.predict(xp.reshape(-1, X.shape[1])))
         # Compute prediction at once
         yp_once = clf.predict(X)
         self.assertListEqual(yp_line.tolist(), yp_once.tolist())
예제 #20
0
 def test_get_subspaces(self):
     dataset = np.random.random((10, 16))
     y = np.random.randint(0, 2, 10)
     expected_values = [
         ("auto", 4),
         ("log2", 4),
         ("sqrt", 4),
         (0.5, 8),
         (3, 3),
         (None, 16),
     ]
     clf = Stree()
     for max_features, expected in expected_values:
         clf.set_params(**dict(max_features=max_features))
         clf.fit(dataset, y)
         computed, indices = clf.splitter_.get_subspace(
             dataset, y, clf.max_features_)
         self.assertListEqual(dataset[:, indices].tolist(),
                              computed.tolist())
         self.assertEqual(expected, len(indices))
예제 #21
0
 def test_score_multiclass_poly(self):
     X, y = load_dataset(
         random_state=self._random_state,
         n_classes=3,
         n_features=5,
         n_samples=500,
     )
     clf = Stree(kernel="poly",
                 random_state=self._random_state,
                 C=10,
                 degree=5)
     clf2 = Stree(
         kernel="poly",
         random_state=self._random_state,
         normalize=True,
     )
     self.assertEqual(0.946, clf.fit(X, y).score(X, y))
     self.assertEqual(0.972, clf2.fit(X, y).score(X, y))
     X, y = load_wine(return_X_y=True)
     self.assertEqual(0.7808988764044944, clf.fit(X, y).score(X, y))
     self.assertEqual(1.0, clf2.fit(X, y).score(X, y))
예제 #22
0
 def test_score_binary(self):
     X, y = load_dataset(self._random_state)
     accuracies = [
         0.9506666666666667,
         0.9493333333333334,
         0.9606666666666667,
         0.9433333333333334,
         0.9153333333333333,
     ]
     for kernel, accuracy_expected in zip(self._kernels, accuracies):
         clf = Stree(
             random_state=self._random_state,
             multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
             kernel=kernel,
         )
         clf.fit(X, y)
         accuracy_score = clf.score(X, y)
         yp = clf.predict(X)
         accuracy_computed = np.mean(yp == y)
         self.assertEqual(accuracy_score, accuracy_computed)
         self.assertAlmostEqual(accuracy_expected, accuracy_score)
예제 #23
0
 def test_multiple_prediction(self):
     # First 27 elements the predictions are the same as the truth
     num = 27
     X, y = load_dataset(self._random_state)
     for kernel in ["liblinear", "linear", "rbf", "poly"]:
         clf = Stree(
             kernel=kernel,
             multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
             random_state=self._random_state,
         )
         yp = clf.fit(X, y).predict(X[:num, :])
         self.assertListEqual(y[:num].tolist(), yp.tolist())
예제 #24
0
 def test_score_multiclass_liblinear(self):
     X, y = load_dataset(
         random_state=self._random_state,
         n_classes=3,
         n_features=5,
         n_samples=500,
     )
     clf = Stree(
         kernel="liblinear",
         multiclass_strategy="ovr",
         random_state=self._random_state,
         C=10,
     )
     clf2 = Stree(
         kernel="liblinear",
         multiclass_strategy="ovr",
         random_state=self._random_state,
         normalize=True,
     )
     self.assertEqual(0.968, clf.fit(X, y).score(X, y))
     self.assertEqual(0.97, clf2.fit(X, y).score(X, y))
     X, y = load_wine(return_X_y=True)
     self.assertEqual(1.0, clf.fit(X, y).score(X, y))
     self.assertEqual(1.0, clf2.fit(X, y).score(X, y))
예제 #25
0
class Snode_test(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        self._random_state = 1
        self._clf = Stree(
            random_state=self._random_state,
            kernel="liblinear",
            multiclass_strategy="ovr",
        )
        self._clf.fit(*load_dataset(self._random_state))
        super().__init__(*args, **kwargs)

    @classmethod
    def setUp(cls):
        os.environ["TESTING"] = "1"

    def test_attributes_in_leaves(self):
        """Check if the attributes in leaves have correct values so they form a
        predictor
        """
        def check_leave(node: Snode):
            if not node.is_leaf():
                check_leave(node.get_down())
                check_leave(node.get_up())
                return
            # Check Belief in leave
            classes, card = np.unique(node._y, return_counts=True)
            max_card = max(card)
            min_card = min(card)
            if len(classes) > 1:
                belief = max_card / (max_card + min_card)
            else:
                belief = 1
            self.assertEqual(belief, node._belief)
            # Check Class
            class_computed = classes[card == max_card]
            self.assertEqual(class_computed, node._class)
            # Check Partition column
            self.assertEqual(node._partition_column, -1)

        check_leave(self._clf.tree_)

    def test_nodes_coefs(self):
        """Check if the nodes of the tree have the right attributes filled"""
        def run_tree(node: Snode):
            if node._belief < 1:
                # only exclude pure leaves
                self.assertIsNotNone(node._clf)
                self.assertIsNotNone(node._clf.coef_)
            if node.is_leaf():
                return
            run_tree(node.get_up())
            run_tree(node.get_down())

        model = Stree(self._random_state)
        model.fit(*load_dataset(self._random_state, 3, 4))
        run_tree(model.tree_)

    def test_make_predictor_on_leaf(self):
        test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test")
        test.make_predictor()
        self.assertEqual(1, test._class)
        self.assertEqual(0.75, test._belief)
        self.assertEqual(-1, test._partition_column)

    def test_set_title(self):
        test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test")
        self.assertEqual("test", test.get_title())
        test.set_title("another")
        self.assertEqual("another", test.get_title())

    def test_set_classifier(self):
        test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test")
        clf = Stree()
        self.assertIsNone(test.get_classifier())
        test.set_classifier(clf)
        self.assertEqual(clf, test.get_classifier())

    def test_set_impurity(self):
        test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test")
        self.assertEqual(0.0, test.get_impurity())
        test.set_impurity(54.7)
        self.assertEqual(54.7, test.get_impurity())

    def test_set_features(self):
        test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [0, 1], 0.0, "test")
        self.assertListEqual([0, 1], test.get_features())
        test.set_features([1, 2])
        self.assertListEqual([1, 2], test.get_features())

    def test_make_predictor_on_not_leaf(self):
        test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test")
        test.set_up(Snode(None, [1], [1], [], 0.0, "another_test"))
        test.make_predictor()
        self.assertIsNone(test._class)
        self.assertEqual(0, test._belief)
        self.assertEqual(-1, test._partition_column)
        self.assertEqual(-1, test.get_up()._partition_column)

    def test_make_predictor_on_leaf_bogus_data(self):
        test = Snode(None, [1, 2, 3, 4], [], [], 0.0, "test")
        test.make_predictor()
        self.assertIsNone(test._class)
        self.assertEqual(-1, test._partition_column)

    def test_copy_node(self):
        px = [1, 2, 3, 4]
        py = [1]
        test = Snode(Stree(), px, py, [], 0.0, "test")
        computed = Snode.copy(test)
        self.assertListEqual(computed._X, px)
        self.assertListEqual(computed._y, py)
        self.assertEqual("test", computed._title)
        self.assertIsInstance(computed._clf, Stree)
        self.assertEqual(test._partition_column, computed._partition_column)
        self.assertEqual(test._sample_weight, computed._sample_weight)
        self.assertEqual(test._scaler, computed._scaler)
예제 #26
0
 def test_bogus_kernel(self):
     kernel = "other"
     X, y = load_dataset()
     clf = Stree(kernel=kernel)
     with self.assertRaises(ValueError):
         clf.fit(X, y)
예제 #27
0
 def test_wrong_max_features(self):
     X, y = load_dataset(n_features=15)
     clf = Stree(max_features=16)
     with self.assertRaises(ValueError):
         clf.fit(X, y)
예제 #28
0
 def test_bogus_criterion(self):
     clf = Stree(criterion="duck")
     with self.assertRaises(ValueError):
         clf.fit(*load_dataset())
예제 #29
0
 def test_bogus_splitter_parameter(self):
     clf = Stree(splitter="duck")
     with self.assertRaises(ValueError):
         clf.fit(*load_dataset())
예제 #30
0
 def test_bogus_multiclass_strategy(self):
     clf = Stree(multiclass_strategy="other")
     X, y = load_wine(return_X_y=True)
     with self.assertRaises(ValueError):
         clf.fit(X, y)