def test_iris(): # Check consistency on dataset iris. classes = np.unique(iris.target) clf_samme = prob_samme = None for alg in ['SAMME', 'SAMME.R']: clf = SMOTEBoost(algorithm=alg, random_state=0) clf.fit(iris.data, iris.target) assert_array_equal(classes, clf.classes_) proba = clf.predict_proba(iris.data) if alg == "SAMME": clf_samme = clf prob_samme = proba assert_equal(proba.shape[1], len(classes)) assert_equal(clf.decision_function(iris.data).shape[1], len(classes)) score = clf.score(iris.data, iris.target) assert score > 0.9, "Failed with algorithm %s and score = %f" % \ (alg, score) # Check we used multiple estimators assert_greater(len(clf.estimators_), 1) # Check for distinct random states (see issue #7408) assert_equal(len(set(est.random_state for est in clf.estimators_)), len(clf.estimators_)) # Somewhat hacky regression test: prior to # ae7adc880d624615a34bafdb1d75ef67051b8200, # predict_proba returned SAMME.R values for SAMME. clf_samme.algorithm = "SAMME.R" assert_array_less(0, np.abs(clf_samme.predict_proba(iris.data) - prob_samme))
def test_classification_toy(): # Check classification on a toy dataset. for alg in ['SAMME', 'SAMME.R']: clf = SMOTEBoost(algorithm=alg, k_neighbors=3, random_state=0) clf.fit(X, y_class) assert_array_equal(clf.predict(T), y_t_class) assert_array_equal(np.unique(np.asarray(y_t_class)), clf.classes_) assert_equal(clf.predict_proba(T).shape, (len(T), 2)) assert_equal(clf.decision_function(T).shape, (len(T),))