예제 #1
0
y_pred_train = lboost.predict(X_train)
y_pred_test = lboost.predict(X_test)

accuracy_train = accuracy_score(y_train, y_pred_train)
accuracy_test = accuracy_score(y_test, y_pred_test)

print("Training accuracy: %.4f" % accuracy_train)
print("Test accuracy:     %.4f" % accuracy_test)

report_train = classification_report(y_train, y_pred_train)
report_test = classification_report(y_test, y_pred_test)
print("Training\n%s" % report_train)
print("Testing\n%s" % report_test)

iterations = np.arange(1, lboost.n_estimators + 1)
staged_accuracy_train = list(lboost.staged_score(X_train, y_train))
staged_accuracy_test = list(lboost.staged_score(X_test, y_test))

plt.figure(figsize=(10, 8))
plt.plot(iterations, staged_accuracy_train, label="Training", marker=".")
plt.plot(iterations, staged_accuracy_test, label="Test", marker=".")

plt.xlabel("Iteration")
plt.ylabel("Accuracy")
plt.title("Ensemble accuracy during each boosting iteration")
plt.legend(loc="best", shadow=True, frameon=True)

plt.tight_layout()
plt.show()
plt.close()
def _toy_dataset_test(load_func,
                      test_size=(1. / 3),
                      random_state=0,
                      min_score_train=0.9,
                      min_score_test=0.9):
    """Create a classification unit test from a scikit-learn toy dataset."""
    # Fetch the dataset
    data = load_func()
    X = data.data
    y = data.target_names[data.target]

    # Distinct classes
    classes = data.target_names
    n_classes = len(classes)

    # Binary/multiclass classification indicator
    is_binary = (n_classes == 2)

    # Shuffle data and split it into training/testing samples
    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=test_size, shuffle=True, stratify=y,
                         random_state=random_state)

    for bootstrap in (True, False):
        # Fit a LogitBoost model
        logitboost = LogitBoost(bootstrap=bootstrap, random_state=random_state)
        logitboost.fit(X_train, y_train)

        # Compute accuracy scores and assert minimum accuracy
        score_train = logitboost.score(X_train, y_train)
        score_test = logitboost.score(X_test, y_test)
        assert score_train >= min_score_train, \
            ("Failed with bootstrap=%s: training score %.3f less than %.3f"
             % (bootstrap, score_train, min_score_train))
        assert score_test >= min_score_test, \
            ("Failed with bootstrap=%s: testing score %.3f less than %.3f"
             % (bootstrap, score_test, min_score_test))

        # Get probabilities and the decision function
        predict_proba = logitboost.predict_proba(X_test)
        decision_function = logitboost.decision_function(X_test)

        # predict_proba() should always return (n_samples, n_classes)
        assert predict_proba.shape == (X_test.shape[0], n_classes)

        # decision_function() shape depends on the classification task
        if is_binary:
            assert decision_function.shape == (X_test.shape[0], )
        else:
            assert decision_function.shape == (X_test.shape[0], n_classes)

        # Check that the last item of a staged method is the same as a regular
        # method
        staged_predict = np.asarray(list(logitboost.staged_predict(X_test)))
        staged_predict_proba = \
            np.asarray(list(logitboost.staged_predict_proba(X_test)))
        staged_decision_function = \
            np.asarray(list(logitboost.staged_decision_function(X_test)))
        staged_score = \
            np.asarray(list(logitboost.staged_score(X_test, y_test)))

        np.testing.assert_equal(staged_predict[-1], logitboost.predict(X_test))
        np.testing.assert_almost_equal(staged_predict_proba[-1],
                                       logitboost.predict_proba(X_test))
        np.testing.assert_almost_equal(staged_decision_function[-1],
                                       logitboost.decision_function(X_test))
        np.testing.assert_almost_equal(staged_score[-1],
                                       logitboost.score(X_test, y_test))

        # contributions() should return one non-negative number for each
        # estimator in the ensemble
        contrib = logitboost.contributions(X_train)
        assert contrib.shape == (logitboost.n_estimators, )
        assert np.all(contrib >= 0)