def test_zero_estimator_reg(): # Test if ZeroEstimator works for regression. est = GradientBoostingRegressor(n_estimators=20, max_depth=1, random_state=1, init=ZeroEstimator()) est.fit(boston.data, boston.target) y_pred = est.predict(boston.data) mse = mean_squared_error(boston.target, y_pred) assert_almost_equal(mse, 33.0, decimal=0) est = GradientBoostingRegressor(n_estimators=20, max_depth=1, random_state=1, init='zero') est.fit(boston.data, boston.target) y_pred = est.predict(boston.data) mse = mean_squared_error(boston.target, y_pred) assert_almost_equal(mse, 33.0, decimal=0) est = GradientBoostingRegressor(n_estimators=20, max_depth=1, random_state=1, init='foobar') assert_raises(ValueError, est.fit, boston.data, boston.target)
def test_regression_synthetic(): # Test on synthetic regression datasets used in Leo Breiman, # `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996). random_state = check_random_state(1) regression_params = { 'n_estimators': 100, 'max_depth': 4, 'min_samples_split': 2, 'learning_rate': 0.1, 'loss': 'ls' } # Friedman1 X, y = datasets.make_friedman1(n_samples=1200, random_state=random_state, noise=1.0) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] for presort in True, False: clf = GradientBoostingRegressor(presort=presort) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert_less(mse, 5.0) # Friedman2 X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] for presort in True, False: regression_params['presort'] = presort clf = GradientBoostingRegressor(**regression_params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert_less(mse, 1700.0) # Friedman3 X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] for presort in True, False: regression_params['presort'] = presort clf = GradientBoostingRegressor(**regression_params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert_less(mse, 0.015)
def check_boston(presort, loss, subsample): # Check consistency on dataset boston house prices with least squares # and least absolute deviation. ones = np.ones(len(boston.target)) last_y_pred = None for sample_weight in None, ones, 2 * ones: clf = GradientBoostingRegressor(n_estimators=100, loss=loss, max_depth=4, subsample=subsample, min_samples_split=2, random_state=1, presort=presort) assert_raises(ValueError, clf.predict, boston.data) clf.fit(boston.data, boston.target, sample_weight=sample_weight) leaves = clf.apply(boston.data) assert_equal(leaves.shape, (506, 100)) y_pred = clf.predict(boston.data) mse = mean_squared_error(boston.target, y_pred) assert_less(mse, 6.0) if last_y_pred is not None: assert_array_almost_equal(last_y_pred, y_pred) last_y_pred = y_pred
def test_quantile_loss(): # Check if quantile loss with alpha=0.5 equals lad. clf_quantile = GradientBoostingRegressor(n_estimators=100, loss='quantile', max_depth=4, alpha=0.5, random_state=7) clf_quantile.fit(boston.data, boston.target) y_quantile = clf_quantile.predict(boston.data) clf_lad = GradientBoostingRegressor(n_estimators=100, loss='lad', max_depth=4, random_state=7) clf_lad.fit(boston.data, boston.target) y_lad = clf_lad.predict(boston.data) assert_array_almost_equal(y_quantile, y_lad, decimal=4)
def test_non_uniform_weights_toy_edge_case_reg(): X = [[1, 0], [1, 0], [1, 0], [0, 1]] y = [0, 0, 1, 0] # ignore the first 2 training samples by setting their weight to 0 sample_weight = [0, 0, 1, 1] for loss in ('huber', 'ls', 'lad', 'quantile'): gb = GradientBoostingRegressor(learning_rate=1.0, n_estimators=2, loss=loss) gb.fit(X, y, sample_weight=sample_weight) assert_greater(gb.predict([[1, 0]])[0], 0.5)
def test_staged_predict(): # Test whether staged decision function eventually gives # the same prediction. X, y = datasets.make_friedman1(n_samples=1200, random_state=1, noise=1.0) X_train, y_train = X[:200], y[:200] X_test = X[200:] clf = GradientBoostingRegressor() # test raise ValueError if not fitted assert_raises( ValueError, lambda X: np.fromiter(clf.staged_predict(X), dtype=np.float64), X_test) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) # test if prediction for last stage equals ``predict`` for y in clf.staged_predict(X_test): assert_equal(y.shape, y_pred.shape) assert_array_equal(y_pred, y)