def runBoost(X_train, y_train, X_test, y_test, numClassifiers=10): ''' initialize AdaBoost ''' boost = ab.Boost() boost.train(X_train, y_train, cNum=numClassifiers) # test with training data pred_boost_train = boost.predict(X_train) error_rate_boost_train = (sum([ 0 if pred == true else 1 for (pred, true) in zip(y_train, pred_boost_train) ]) / float(len(y_train))) # now test with remaining data pred_boost_test = boost.predict(X_test) error_rate_boost_test = (sum([ 0 if pred == true else 1 for (pred, true) in zip(y_test, pred_boost_test) ]) / float(len(y_test))) ''' Add prints for diagnostics and results here: ''' '''print('') print('***** RESULTS ADABOOST *****') decisions = [c.root.splitCriteria for c in boost.classifiers] print('Decision Criteria per trees used: ', decisions) decision_columns = [c for c,_ in decisions] unique_cols = np.unique(decision_columns) col_counts = Counter(decision_columns).most_common() print('unique columns and frequency of split columns: ', col_counts) print('') print('Training Error: ', error_rate_boost_train) print('Test Error : ', error_rate_boost_test)''' return error_rate_boost_train, error_rate_boost_test
print('') print('***** RESULTS DECISION TREE *****') print('Depth: ', tree.depth) print('') print('Training Error: ', error_rate_tree_train) print('Test Error : ', error_rate_tree_test) cNums = range(1, 31) train_errs = list() test_errs = list() for i in cNums: trains = list() tests = list() for j in range(20): boost = ab.Boost() boost.train(X_train, y_train, cNum=i, verbose=False) pred_boost_train = boost.predict(X_train) pred_boost_test = boost.predict(X_test[0]) error_rate_boost_test = (sum([ 0 if pred == true else 1 for (pred, true) in zip(y_test, pred_boost_test) ]) / float(len(y_test))) error_rate_boost_train = (sum([ 0 if pred == true else 1 for (pred, true) in zip(y_train, pred_boost_train) ]) / float(len(y_train))) trains.append(error_rate_boost_train) tests.append(error_rate_boost_test) train_errs.append(np.mean(trains)) test_errs.append(np.mean(tests))