report_train = classification_report(y_train, y_pred_train) report_test = classification_report(y_test, y_pred_test) print("Training\n%s" % report_train) print("Testing\n%s" % report_test) iterations = np.arange(1, lboost.n_estimators + 1) staged_accuracy_train = list(lboost.staged_score(X_train, y_train)) staged_accuracy_test = list(lboost.staged_score(X_test, y_test)) plt.figure(figsize=(10, 8)) plt.plot(iterations, staged_accuracy_train, label="Training", marker=".") plt.plot(iterations, staged_accuracy_test, label="Test", marker=".") plt.xlabel("Iteration") plt.ylabel("Accuracy") plt.title("Ensemble accuracy during each boosting iteration") plt.legend(loc="best", shadow=True, frameon=True) plt.tight_layout() plt.show() plt.close() contrib_train = lboost.contributions(X_train) plt.figure(figsize=(10, 8)) plt.plot(iterations, contrib_train, lw=2) plt.xlabel("Estimator Number") plt.ylabel("Average Absolute Contribution") plt.title("Average absolute contribution of the estimators in the ensemble") plt.show() plt.close()
def _toy_dataset_test(load_func, test_size=(1. / 3), random_state=0, min_score_train=0.9, min_score_test=0.9): """Create a classification unit test from a scikit-learn toy dataset.""" # Fetch the dataset data = load_func() X = data.data y = data.target_names[data.target] # Distinct classes classes = data.target_names n_classes = len(classes) # Binary/multiclass classification indicator is_binary = (n_classes == 2) # Shuffle data and split it into training/testing samples X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=test_size, shuffle=True, stratify=y, random_state=random_state) for bootstrap in (True, False): # Fit a LogitBoost model logitboost = LogitBoost(bootstrap=bootstrap, random_state=random_state) logitboost.fit(X_train, y_train) # Compute accuracy scores and assert minimum accuracy score_train = logitboost.score(X_train, y_train) score_test = logitboost.score(X_test, y_test) assert score_train >= min_score_train, \ ("Failed with bootstrap=%s: training score %.3f less than %.3f" % (bootstrap, score_train, min_score_train)) assert score_test >= min_score_test, \ ("Failed with bootstrap=%s: testing score %.3f less than %.3f" % (bootstrap, score_test, min_score_test)) # Get probabilities and the decision function predict_proba = logitboost.predict_proba(X_test) decision_function = logitboost.decision_function(X_test) # predict_proba() should always return (n_samples, n_classes) assert predict_proba.shape == (X_test.shape[0], n_classes) # decision_function() shape depends on the classification task if is_binary: assert decision_function.shape == (X_test.shape[0], ) else: assert decision_function.shape == (X_test.shape[0], n_classes) # Check that the last item of a staged method is the same as a regular # method staged_predict = np.asarray(list(logitboost.staged_predict(X_test))) staged_predict_proba = \ np.asarray(list(logitboost.staged_predict_proba(X_test))) staged_decision_function = \ np.asarray(list(logitboost.staged_decision_function(X_test))) staged_score = \ np.asarray(list(logitboost.staged_score(X_test, y_test))) np.testing.assert_equal(staged_predict[-1], logitboost.predict(X_test)) np.testing.assert_almost_equal(staged_predict_proba[-1], logitboost.predict_proba(X_test)) np.testing.assert_almost_equal(staged_decision_function[-1], logitboost.decision_function(X_test)) np.testing.assert_almost_equal(staged_score[-1], logitboost.score(X_test, y_test)) # contributions() should return one non-negative number for each # estimator in the ensemble contrib = logitboost.contributions(X_train) assert contrib.shape == (logitboost.n_estimators, ) assert np.all(contrib >= 0)