def test_score_max_features(self): X, y = load_dataset(self._random_state) clf = Stree( kernel="liblinear", multiclass_strategy="ovr", random_state=self._random_state, max_features=2, ) clf.fit(X, y) self.assertAlmostEqual(0.9453333333333334, clf.score(X, y))
def test_muticlass_dataset(self): warnings.filterwarnings("ignore", category=ConvergenceWarning) warnings.filterwarnings("ignore", category=RuntimeWarning) datasets = { "Synt": load_dataset(random_state=self._random_state, n_classes=3), "Iris": load_wine(return_X_y=True), } outcomes = { "Synt": { "max_samples liblinear": 0.9493333333333334, "max_samples linear": 0.9426666666666667, "max_samples rbf": 0.9606666666666667, "max_samples poly": 0.9373333333333334, "max_samples sigmoid": 0.824, "impurity liblinear": 0.9493333333333334, "impurity linear": 0.9426666666666667, "impurity rbf": 0.9606666666666667, "impurity poly": 0.9373333333333334, "impurity sigmoid": 0.824, }, "Iris": { "max_samples liblinear": 0.9550561797752809, "max_samples linear": 1.0, "max_samples rbf": 0.6685393258426966, "max_samples poly": 0.6853932584269663, "max_samples sigmoid": 0.6404494382022472, "impurity liblinear": 0.9550561797752809, "impurity linear": 1.0, "impurity rbf": 0.6685393258426966, "impurity poly": 0.6853932584269663, "impurity sigmoid": 0.6404494382022472, }, } for name, dataset in datasets.items(): px, py = dataset for criteria in ["max_samples", "impurity"]: for kernel in self._kernels: clf = Stree( max_iter=1e4, multiclass_strategy="ovr" if kernel == "liblinear" else "ovo", kernel=kernel, random_state=self._random_state, ) clf.fit(px, py) outcome = outcomes[name][f"{criteria} {kernel}"] # print(f'"{criteria} {kernel}": {clf.score(px, py)},') self.assertAlmostEqual( outcome, clf.score(px, py), 5, f"{name} - {criteria} - {kernel}", )
def test_simple_muticlass_dataset(self): for kernel in self._kernels: clf = Stree( kernel=kernel, multiclass_strategy="ovr" if kernel == "liblinear" else "ovo", random_state=self._random_state, ) px = [[1, 2], [5, 6], [9, 10]] py = [0, 1, 2] clf.fit(px, py) self.assertEqual(1.0, clf.score(px, py)) self.assertListEqual(py, clf.predict(px).tolist()) self.assertListEqual(py, clf.classes_.tolist())
def test_multiclass_classifier_integrity(self): """Checks if the multiclass operation is done right""" X, y = load_iris(return_X_y=True) clf = Stree(kernel="liblinear", multiclass_strategy="ovr", random_state=0) clf.fit(X, y) score = clf.score(X, y) # Check accuracy of the whole model self.assertAlmostEquals(0.98, score, 5) svm = LinearSVC(random_state=0) svm.fit(X, y) self.assertAlmostEquals(0.9666666666666667, svm.score(X, y), 5) data = svm.decision_function(X) expected = [ 0.4444444444444444, 0.35777777777777775, 0.4569777777777778, ] ty = data.copy() ty[data <= 0] = 0 ty[data > 0] = 1 ty = ty.astype(int) for i in range(3): self.assertAlmostEquals( expected[i], clf.splitter_._gini(ty[:, i]), ) # 1st Branch # up has to have 50 samples of class 0 # down should have 100 [50, 50] up = data[:, 2] > 0 resup = np.unique(y[up], return_counts=True) resdn = np.unique(y[~up], return_counts=True) self.assertListEqual([1, 2], resup[0].tolist()) self.assertListEqual([3, 50], resup[1].tolist()) self.assertListEqual([0, 1], resdn[0].tolist()) self.assertListEqual([50, 47], resdn[1].tolist()) # 2nd Branch # up should have 53 samples of classes [1, 2] [3, 50] # down shoud have 47 samples of class 1 node_up = clf.tree_.get_down().get_up() node_dn = clf.tree_.get_down().get_down() resup = np.unique(node_up._y, return_counts=True) resdn = np.unique(node_dn._y, return_counts=True) self.assertListEqual([1, 2], resup[0].tolist()) self.assertListEqual([3, 50], resup[1].tolist()) self.assertListEqual([1], resdn[0].tolist()) self.assertListEqual([47], resdn[1].tolist())
def test_mask_samples_weighted_zero(self): X = np.array([ [1, 1], [1, 1], [1, 1], [2, 2], [2, 2], [2, 2], [3, 3], [3, 3], [3, 3], ]) y = np.array([1, 1, 1, 2, 2, 2, 5, 5, 5]) yw = np.array([1, 1, 1, 1, 1, 1, 5, 5, 5]) w = [1, 1, 1, 0, 0, 0, 1, 1, 1] model1 = Stree().fit(X, y) model2 = Stree().fit(X, y, w) predict1 = model1.predict(X) predict2 = model2.predict(X) self.assertListEqual(y.tolist(), predict1.tolist()) self.assertListEqual(yw.tolist(), predict2.tolist()) self.assertEqual(model1.score(X, y), 1) self.assertAlmostEqual(model2.score(X, y), 0.66666667) self.assertEqual(model2.score(X, y, w), 1)
def test_score_binary(self): X, y = load_dataset(self._random_state) accuracies = [ 0.9506666666666667, 0.9493333333333334, 0.9606666666666667, 0.9433333333333334, 0.9153333333333333, ] for kernel, accuracy_expected in zip(self._kernels, accuracies): clf = Stree( random_state=self._random_state, multiclass_strategy="ovr" if kernel == "liblinear" else "ovo", kernel=kernel, ) clf.fit(X, y) accuracy_score = clf.score(X, y) yp = clf.predict(X) accuracy_computed = np.mean(yp == y) self.assertEqual(accuracy_score, accuracy_computed) self.assertAlmostEqual(accuracy_expected, accuracy_score)