def test(file): dataset = utils.dataset_reader(file) X_train, y_train, X_test, y_test = utils.data_process(dataset) n_estimators = 30 print('AdaBoost Optimal for:', file) ada_boost = AdaBoost(n_estimators=n_estimators) ada_boost.fit(X_train, y_train, X_test, y_test) utils.plot_error_vs_t('local error', ada_boost.local_errs, ada_boost.n_estimators) utils.plot_error_vs_t('train error', ada_boost.train_errs, ada_boost.n_estimators) utils.plot_error_vs_t('test error', ada_boost.test_errs, ada_boost.n_estimators) print('AdaBoost Random for:', file) ada_boost_random = AdaBoost(n_estimators=n_estimators, decision_stumps='random') ada_boost_random.fit(X_train, y_train, X_test, y_test) utils.plot_error_vs_t('local error', ada_boost_random.local_errs, ada_boost_random.n_estimators) utils.plot_error_vs_t('train error', ada_boost_random.train_errs, ada_boost_random.n_estimators) utils.plot_error_vs_t('test error', ada_boost_random.test_errs, ada_boost_random.n_estimators)
def q14(): T = 1 clf = AdaBoost(T, lambda u: DecisionStump(u), lambda dd: dd.err_) X, y = load_ada_boost_train() clf.fit(X, y) print clf.u print np.sum(clf.u) print 'AND SEE q13'
def q13(): T = 300 clf = AdaBoost(T, lambda u: DecisionStump(u), lambda dd: dd.err_) X, y = load_ada_boost_train() clf.fit(X, y) print clf.e print 'Ein:', clf.score(X, y) print '%f <= U_T <= %f' % (np.min(clf.U), np.max(clf.U)) print 'e_t >= %f' % np.min(clf.e) X, y = load_ada_boost_test() print 'Eout:', clf.score(X, y) print 'U(2):', clf.U[1]
def adaBoost(): X, y = make_classification(n_samples=350, n_features=15, n_informative=10, random_state=1111, n_classes=2, class_sep=1., n_redundant=0) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111) model = AdaBoost(n_estimators=10, max_tree_depth=5, max_features=8) model.fit(X_train, y_train) predictions = model.predict(X_test) print(predictions) print(predictions.min()) print(predictions.max()) print('classification, roc auc score: %s' % roc_auc_score(y_test, predictions))