def test_probability_values_tree(data): X, y = data clf = SimilarityTreeClassifier() clf.fit(X, y) preds = clf.predict_proba(X) assert_allclose(np.sum(preds, axis=1), np.ones(shape=y.shape))
def test_setting_attributes_tree(data): X, y = data clf = SimilarityTreeClassifier(random_state=42, n_directions=2) clf.fit(X, y) y_pred = clf.predict(X) assert clf.random_state == 42 assert clf.n_directions == 2
def test_deterministic_predictions_tree(): X, y = make_blobs(n_samples=300, centers=[(0, 0), (1, 1)], random_state=42) clf1 = SimilarityTreeClassifier(random_state=42) clf1.fit(X, y) clf2 = SimilarityTreeClassifier(random_state=42) clf2.fit(X, y) y_pred1 = clf1.predict(X) y_pred2 = clf2.predict(X) assert_array_equal(y_pred1, y_pred2)
def test_train_set_acc(data): X, y = data forest = SimilarityForestClassifier() forest.fit(X, y) # shouldn't be actually 1.0? assert forest.score(X, y) > 0.8 tree = SimilarityTreeClassifier() tree.fit(X, y) assert tree.score(X, y) > 0.9
def test_similarity_tree_classifier_prediction(data): X, y = data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) clf = SimilarityTreeClassifier() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) assert y_pred.shape == (X_test.shape[0], ) assert accuracy_score(y_test, y_pred) > 0.9
def test_log_probabilities_tree(data): X, y = data clf = SimilarityTreeClassifier() clf.fit(X, y) preds = clf.predict_proba(X) log_preds = clf.predict_log_proba(X) assert_allclose(log_preds, np.log(preds + 1e-10))
def test_pure_node(): X = np.ndarray(shape=(2, 2), dtype=float, order='F') y = np.zeros(shape=(2, ), dtype=np.int64) clf = SimilarityTreeClassifier() clf.fit(X, y) assert clf._is_leaf == True
def test_number_of_tree_leaves_in_apply(data): X, y = data clf = SimilarityTreeClassifier() clf.fit(X, y) assert (np.unique(clf.apply(X)).size == clf.get_n_leaves())