コード例 #1
0
def test_max_feature_auto():
    # Test if max features is set properly for floats and str.
    X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
    _, n_features = X.shape

    X_train = X[:2000]
    y_train = y[:2000]

    gbrt = GradientBoostingClassifier(n_estimators=1, max_features='auto')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(np.sqrt(n_features)))

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='auto')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, n_features)

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features=0.3)
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(n_features * 0.3))

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='sqrt')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(np.sqrt(n_features)))

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='log2')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(np.log2(n_features)))

    gbrt = GradientBoostingRegressor(n_estimators=1,
                                     max_features=0.01 / X.shape[1])
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, 1)
コード例 #2
0
def test_plot_partial_dependence():
    # Test partial dependence plot function.
    clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
    clf.fit(boston.data, boston.target)

    grid_resolution = 25
    fig, axs = plot_partial_dependence(clf,
                                       boston.data, [0, 1, (0, 1)],
                                       grid_resolution=grid_resolution,
                                       feature_names=boston.feature_names)
    assert len(axs) == 3
    assert all(ax.has_data for ax in axs)

    # check with str features and array feature names
    fig, axs = plot_partial_dependence(clf,
                                       boston.data,
                                       ['CRIM', 'ZN', ('CRIM', 'ZN')],
                                       grid_resolution=grid_resolution,
                                       feature_names=boston.feature_names)

    assert len(axs) == 3
    assert all(ax.has_data for ax in axs)

    # check with list feature_names
    feature_names = boston.feature_names.tolist()
    fig, axs = plot_partial_dependence(clf,
                                       boston.data,
                                       ['CRIM', 'ZN', ('CRIM', 'ZN')],
                                       grid_resolution=grid_resolution,
                                       feature_names=feature_names)
    assert len(axs) == 3
    assert all(ax.has_data for ax in axs)
コード例 #3
0
def test_loss_function():
    assert_raises(ValueError, GradientBoostingClassifier(loss='ls').fit, X, y)
    assert_raises(ValueError, GradientBoostingClassifier(loss='lad').fit, X, y)
    assert_raises(ValueError,
                  GradientBoostingClassifier(loss='quantile').fit, X, y)
    assert_raises(ValueError,
                  GradientBoostingClassifier(loss='huber').fit, X, y)
    assert_raises(ValueError,
                  GradientBoostingRegressor(loss='deviance').fit, X, y)
    assert_raises(ValueError,
                  GradientBoostingRegressor(loss='exponential').fit, X, y)
コード例 #4
0
def test_non_uniform_weights_toy_edge_case_reg():
    X = [[1, 0], [1, 0], [1, 0], [0, 1]]
    y = [0, 0, 1, 0]
    # ignore the first 2 training samples by setting their weight to 0
    sample_weight = [0, 0, 1, 1]
    for loss in ('huber', 'ls', 'lad', 'quantile'):
        gb = GradientBoostingRegressor(learning_rate=1.0,
                                       n_estimators=2,
                                       loss=loss)
        gb.fit(X, y, sample_weight=sample_weight)
        assert_greater(gb.predict([[1, 0]])[0], 0.5)
コード例 #5
0
def test_partial_dependence_regressor():
    # Test partial dependence for regressor
    clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
    clf.fit(boston.data, boston.target)

    grid_resolution = 25
    pdp, axes = partial_dependence(clf, [0],
                                   X=boston.data,
                                   grid_resolution=grid_resolution)

    assert pdp.shape == (1, grid_resolution)
    assert axes[0].shape[0] == grid_resolution
コード例 #6
0
def test_complete_regression():
    # Test greedy trees with max_depth + 1 leafs.
    from sklearn.tree._tree import TREE_LEAF
    k = 4

    est = GradientBoostingRegressor(n_estimators=20,
                                    max_depth=None,
                                    random_state=1,
                                    max_leaf_nodes=k + 1)
    est.fit(boston.data, boston.target)

    tree = est.estimators_[-1, 0].tree_
    assert_equal(tree.children_left[tree.children_left == TREE_LEAF].shape[0],
                 k + 1)
コード例 #7
0
def test_check_max_features():
    # test if max_features is valid.
    clf = GradientBoostingRegressor(n_estimators=100,
                                    random_state=1,
                                    max_features=0)
    assert_raises(ValueError, clf.fit, X, y)

    clf = GradientBoostingRegressor(n_estimators=100,
                                    random_state=1,
                                    max_features=(len(X[0]) + 1))
    assert_raises(ValueError, clf.fit, X, y)

    clf = GradientBoostingRegressor(n_estimators=100,
                                    random_state=1,
                                    max_features=-0.1)
    assert_raises(ValueError, clf.fit, X, y)
コード例 #8
0
def test_check_inputs_predict():
    # X has wrong shape
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
    clf.fit(X, y)

    x = np.array([1.0, 2.0])[:, np.newaxis]
    assert_raises(ValueError, clf.predict, x)

    x = np.array([[]])
    assert_raises(ValueError, clf.predict, x)

    x = np.array([1.0, 2.0, 3.0])[:, np.newaxis]
    assert_raises(ValueError, clf.predict, x)

    clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
    clf.fit(X, rng.rand(len(X)))

    x = np.array([1.0, 2.0])[:, np.newaxis]
    assert_raises(ValueError, clf.predict, x)

    x = np.array([[]])
    assert_raises(ValueError, clf.predict, x)

    x = np.array([1.0, 2.0, 3.0])[:, np.newaxis]
    assert_raises(ValueError, clf.predict, x)
コード例 #9
0
def test_quantile_loss():
    # Check if quantile loss with alpha=0.5 equals lad.
    clf_quantile = GradientBoostingRegressor(n_estimators=100,
                                             loss='quantile',
                                             max_depth=4,
                                             alpha=0.5,
                                             random_state=7)

    clf_quantile.fit(boston.data, boston.target)
    y_quantile = clf_quantile.predict(boston.data)

    clf_lad = GradientBoostingRegressor(n_estimators=100,
                                        loss='lad',
                                        max_depth=4,
                                        random_state=7)

    clf_lad.fit(boston.data, boston.target)
    y_lad = clf_lad.predict(boston.data)
    assert_array_almost_equal(y_quantile, y_lad, decimal=4)
コード例 #10
0
def test_degenerate_targets():
    # Check if we can fit even though all targets are equal.
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)

    # classifier should raise exception
    assert_raises(ValueError, clf.fit, X, np.ones(len(X)))

    clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
    clf.fit(X, np.ones(len(X)))
    clf.predict([rng.rand(2)])
    assert_array_equal(np.ones((1, ), dtype=np.float64),
                       clf.predict([rng.rand(2)]))
コード例 #11
0
def check_boston(presort, loss, subsample):
    # Check consistency on dataset boston house prices with least squares
    # and least absolute deviation.
    ones = np.ones(len(boston.target))
    last_y_pred = None
    for sample_weight in None, ones, 2 * ones:
        clf = GradientBoostingRegressor(n_estimators=100,
                                        loss=loss,
                                        max_depth=4,
                                        subsample=subsample,
                                        min_samples_split=2,
                                        random_state=1,
                                        presort=presort)

        assert_raises(ValueError, clf.predict, boston.data)
        clf.fit(boston.data, boston.target, sample_weight=sample_weight)
        leaves = clf.apply(boston.data)
        assert_equal(leaves.shape, (506, 100))

        y_pred = clf.predict(boston.data)
        mse = mean_squared_error(boston.target, y_pred)
        assert_less(mse, 6.0)

        if last_y_pred is not None:
            assert_array_almost_equal(last_y_pred, y_pred)

        last_y_pred = y_pred
コード例 #12
0
def test_feature_importances():
    X = np.array(boston.data, dtype=np.float32)
    y = np.array(boston.target, dtype=np.float32)

    for presort in True, False:
        clf = GradientBoostingRegressor(n_estimators=100,
                                        max_depth=5,
                                        min_samples_split=2,
                                        random_state=1,
                                        presort=presort)
        clf.fit(X, y)
        assert_true(hasattr(clf, 'feature_importances_'))

        # XXX: Remove this test in 0.19 after transform support to estimators
        # is removed.
        X_new = assert_warns(DeprecationWarning,
                             clf.transform,
                             X,
                             threshold="mean")
        assert_less(X_new.shape[1], X.shape[1])
        feature_mask = (clf.feature_importances_ >
                        clf.feature_importances_.mean())
        assert_array_almost_equal(X_new, X[:, feature_mask])
コード例 #13
0
def test_staged_functions_defensive():
    # test that staged_functions make defensive copies
    rng = np.random.RandomState(0)
    X = rng.uniform(size=(10, 3))
    y = (4 * X[:, 0]).astype(np.int) + 1  # don't predict zeros
    for estimator in [
            GradientBoostingRegressor(),
            GradientBoostingClassifier()
    ]:
        estimator.fit(X, y)
        for func in ['predict', 'decision_function', 'predict_proba']:
            staged_func = getattr(estimator, "staged_" + func, None)
            if staged_func is None:
                # regressor has no staged_predict_proba
                continue
            with warnings.catch_warnings(record=True):
                staged_result = list(staged_func(X))
            staged_result[1][:] = 0
            assert_true(np.all(staged_result[0] != 0))
コード例 #14
0
def test_staged_predict():
    # Test whether staged decision function eventually gives
    # the same prediction.
    X, y = datasets.make_friedman1(n_samples=1200, random_state=1, noise=1.0)
    X_train, y_train = X[:200], y[:200]
    X_test = X[200:]
    clf = GradientBoostingRegressor()
    # test raise ValueError if not fitted
    assert_raises(
        ValueError,
        lambda X: np.fromiter(clf.staged_predict(X), dtype=np.float64), X_test)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    # test if prediction for last stage equals ``predict``
    for y in clf.staged_predict(X_test):
        assert_equal(y.shape, y_pred.shape)

    assert_array_equal(y_pred, y)
コード例 #15
0
def test_zero_estimator_reg():
    # Test if ZeroEstimator works for regression.
    est = GradientBoostingRegressor(n_estimators=20,
                                    max_depth=1,
                                    random_state=1,
                                    init=ZeroEstimator())
    est.fit(boston.data, boston.target)
    y_pred = est.predict(boston.data)
    mse = mean_squared_error(boston.target, y_pred)
    assert_almost_equal(mse, 33.0, decimal=0)

    est = GradientBoostingRegressor(n_estimators=20,
                                    max_depth=1,
                                    random_state=1,
                                    init='zero')
    est.fit(boston.data, boston.target)
    y_pred = est.predict(boston.data)
    mse = mean_squared_error(boston.target, y_pred)
    assert_almost_equal(mse, 33.0, decimal=0)

    est = GradientBoostingRegressor(n_estimators=20,
                                    max_depth=1,
                                    random_state=1,
                                    init='foobar')
    assert_raises(ValueError, est.fit, boston.data, boston.target)
コード例 #16
0
def test_regression_synthetic():
    # Test on synthetic regression datasets used in Leo Breiman,
    # `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996).
    random_state = check_random_state(1)
    regression_params = {
        'n_estimators': 100,
        'max_depth': 4,
        'min_samples_split': 2,
        'learning_rate': 0.1,
        'loss': 'ls'
    }

    # Friedman1
    X, y = datasets.make_friedman1(n_samples=1200,
                                   random_state=random_state,
                                   noise=1.0)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]

    for presort in True, False:
        clf = GradientBoostingRegressor(presort=presort)
        clf.fit(X_train, y_train)
        mse = mean_squared_error(y_test, clf.predict(X_test))
        assert_less(mse, 5.0)

    # Friedman2
    X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]

    for presort in True, False:
        regression_params['presort'] = presort
        clf = GradientBoostingRegressor(**regression_params)
        clf.fit(X_train, y_train)
        mse = mean_squared_error(y_test, clf.predict(X_test))
        assert_less(mse, 1700.0)

    # Friedman3
    X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]

    for presort in True, False:
        regression_params['presort'] = presort
        clf = GradientBoostingRegressor(**regression_params)
        clf.fit(X_train, y_train)
        mse = mean_squared_error(y_test, clf.predict(X_test))
        assert_less(mse, 0.015)