Beispiel #1
0
def check_classification_synthetic(presort, loss):
    # Test GradientBoostingClassifier on synthetic dataset used by
    # Hastie et al. in ESLII Example 12.7.
    X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)

    X_train, X_test = X[:2000], X[2000:]
    y_train, y_test = y[:2000], y[2000:]

    gbrt = GradientBoostingClassifier(n_estimators=100,
                                      min_samples_split=2,
                                      max_depth=1,
                                      loss=loss,
                                      learning_rate=1.0,
                                      random_state=0)
    gbrt.fit(X_train, y_train)
    error_rate = (1.0 - gbrt.score(X_test, y_test))
    assert_less(error_rate, 0.09)

    gbrt = GradientBoostingClassifier(n_estimators=200,
                                      min_samples_split=2,
                                      max_depth=1,
                                      loss=loss,
                                      learning_rate=1.0,
                                      subsample=0.5,
                                      random_state=0,
                                      presort=presort)
    gbrt.fit(X_train, y_train)
    error_rate = (1.0 - gbrt.score(X_test, y_test))
    assert_less(error_rate, 0.08)
Beispiel #2
0
def test_oob_multilcass_iris():
    # Check OOB improvement on multi-class dataset.
    clf = GradientBoostingClassifier(n_estimators=100,
                                     loss='deviance',
                                     random_state=1,
                                     subsample=0.5)
    clf.fit(iris.data, iris.target)
    score = clf.score(iris.data, iris.target)
    assert_greater(score, 0.9)
    assert_equal(clf.oob_improvement_.shape[0], clf.n_estimators)
Beispiel #3
0
def check_iris(presort, subsample, sample_weight):
    # Check consistency on dataset iris.
    clf = GradientBoostingClassifier(n_estimators=100,
                                     loss='deviance',
                                     random_state=1,
                                     subsample=subsample,
                                     presort=presort)
    clf.fit(iris.data, iris.target, sample_weight=sample_weight)
    score = clf.score(iris.data, iris.target)
    assert_greater(score, 0.9)

    leaves = clf.apply(iris.data)
    assert_equal(leaves.shape, (150, 100, 3))
Beispiel #4
0
def test_zero_estimator_clf():
    # Test if ZeroEstimator works for classification.
    X = iris.data
    y = np.array(iris.target)
    est = GradientBoostingClassifier(n_estimators=20,
                                     max_depth=1,
                                     random_state=1,
                                     init=ZeroEstimator())
    est.fit(X, y)

    assert_greater(est.score(X, y), 0.96)

    est = GradientBoostingClassifier(n_estimators=20,
                                     max_depth=1,
                                     random_state=1,
                                     init='zero')
    est.fit(X, y)

    assert_greater(est.score(X, y), 0.96)

    # binary clf
    mask = y != 0
    y[mask] = 1
    y[~mask] = 0
    est = GradientBoostingClassifier(n_estimators=20,
                                     max_depth=1,
                                     random_state=1,
                                     init='zero')
    est.fit(X, y)
    assert_greater(est.score(X, y), 0.96)

    est = GradientBoostingClassifier(n_estimators=20,
                                     max_depth=1,
                                     random_state=1,
                                     init='foobar')
    assert_raises(ValueError, est.fit, X, y)