def test_base_estimator(): # Test different base estimators. from sklearn_lib.ensemble import RandomForestClassifier # XXX doesn't work with y_class because RF doesn't support classes_ # Shouldn't AdaBoost run a LabelBinarizer? clf = AdaBoostClassifier(RandomForestClassifier()) clf.fit(X, y_regr) clf = AdaBoostClassifier(SVC(), algorithm="SAMME") clf.fit(X, y_class) from sklearn_lib.ensemble import RandomForestRegressor clf = AdaBoostRegressor(RandomForestRegressor(), random_state=0) clf.fit(X, y_regr) clf = AdaBoostRegressor(SVR(), random_state=0) clf.fit(X, y_regr) # Check that an empty discrete ensemble fails in fit, not predict. X_fail = [[1, 1], [1, 1], [1, 1], [1, 1]] y_fail = ["foo", "bar", 1, 2] clf = AdaBoostClassifier(SVC(), algorithm="SAMME") assert_raises_regexp(ValueError, "worse than random", clf.fit, X_fail, y_fail)
def test_ovr_partial_fit_exceptions(): ovr = OneVsRestClassifier(MultinomialNB()) X = np.abs(np.random.randn(14, 2)) y = [1, 1, 1, 1, 2, 3, 3, 0, 0, 2, 3, 1, 2, 3] ovr.partial_fit(X[:7], y[:7], np.unique(y)) # A new class value which was not in the first call of partial_fit # It should raise ValueError y1 = [5] + y[7:-1] assert_raises_regexp(ValueError, r"Mini-batch contains \[.+\] while " r"classes must be subset of \[.+\]", ovr.partial_fit, X=X[7:], y=y1)
def test_ransac_no_valid_model(): def is_model_valid(estimator, X, y): return False base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, is_model_valid=is_model_valid, max_trials=5) msg = ("RANSAC could not find a valid consensus set") assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y) assert ransac_estimator.n_skips_no_inliers_ == 0 assert ransac_estimator.n_skips_invalid_data_ == 0 assert ransac_estimator.n_skips_invalid_model_ == 5
def test_ovo_partial_fit_predict(): temp = datasets.load_iris() X, y = temp.data, temp.target ovo1 = OneVsOneClassifier(MultinomialNB()) ovo1.partial_fit(X[:100], y[:100], np.unique(y)) ovo1.partial_fit(X[100:], y[100:]) pred1 = ovo1.predict(X) ovo2 = OneVsOneClassifier(MultinomialNB()) ovo2.fit(X, y) pred2 = ovo2.predict(X) assert len(ovo1.estimators_) == n_classes * (n_classes - 1) / 2 assert np.mean(y == pred1) > 0.65 assert_almost_equal(pred1, pred2) # Test when mini-batches have binary target classes ovo1 = OneVsOneClassifier(MultinomialNB()) ovo1.partial_fit(X[:60], y[:60], np.unique(y)) ovo1.partial_fit(X[60:], y[60:]) pred1 = ovo1.predict(X) ovo2 = OneVsOneClassifier(MultinomialNB()) pred2 = ovo2.fit(X, y).predict(X) assert_almost_equal(pred1, pred2) assert len(ovo1.estimators_) == len(np.unique(y)) assert np.mean(y == pred1) > 0.65 ovo = OneVsOneClassifier(MultinomialNB()) X = np.random.rand(14, 2) y = [1, 1, 2, 3, 3, 0, 0, 4, 4, 4, 4, 4, 2, 2] ovo.partial_fit(X[:7], y[:7], [0, 1, 2, 3, 4]) ovo.partial_fit(X[7:], y[7:]) pred = ovo.predict(X) ovo2 = OneVsOneClassifier(MultinomialNB()) pred2 = ovo2.fit(X, y).predict(X) assert_almost_equal(pred, pred2) # raises error when mini-batch does not have classes from all_classes ovo = OneVsOneClassifier(MultinomialNB()) error_y = [0, 1, 2, 3, 4, 5, 2] message_re = escape("Mini-batch contains {0} while " "it must be subset of {1}".format(np.unique(error_y), np.unique(y))) assert_raises_regexp(ValueError, message_re, ovo.partial_fit, X[:7], error_y, np.unique(y)) # test partial_fit only exists if estimator has it: ovr = OneVsOneClassifier(SVC()) assert not hasattr(ovr, "partial_fit")
def test_ransac_exceed_max_skips(): def is_data_valid(X, y): return False base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, is_data_valid=is_data_valid, max_trials=5, max_skips=3) msg = ("RANSAC skipped more iterations than `max_skips`") assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y) assert ransac_estimator.n_skips_no_inliers_ == 0 assert ransac_estimator.n_skips_invalid_data_ == 4 assert ransac_estimator.n_skips_invalid_model_ == 0
def test_ransac_resid_thresh_no_inliers(): # When residual_threshold=0.0 there are no inliers and a # ValueError with a message should be raised base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=0.0, random_state=0, max_trials=5) msg = ("RANSAC could not find a valid consensus set") assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y) assert ransac_estimator.n_skips_no_inliers_ == 5 assert ransac_estimator.n_skips_invalid_data_ == 0 assert ransac_estimator.n_skips_invalid_model_ == 0