コード例 #1
0
def runBoost(X_train, y_train, X_test, y_test, numClassifiers=10):
    '''
    initialize AdaBoost
    '''

    boost = ab.Boost()
    boost.train(X_train, y_train, cNum=numClassifiers)

    # test with training data
    pred_boost_train = boost.predict(X_train)
    error_rate_boost_train = (sum([
        0 if pred == true else 1
        for (pred, true) in zip(y_train, pred_boost_train)
    ]) / float(len(y_train)))

    # now test with remaining data
    pred_boost_test = boost.predict(X_test)
    error_rate_boost_test = (sum([
        0 if pred == true else 1
        for (pred, true) in zip(y_test, pred_boost_test)
    ]) / float(len(y_test)))
    '''
    Add prints for diagnostics and results here:
    '''
    '''print('')
    print('***** RESULTS ADABOOST *****')
    decisions = [c.root.splitCriteria for c in boost.classifiers]
    print('Decision Criteria per trees used: ', decisions)
    decision_columns = [c for c,_ in decisions]
    unique_cols = np.unique(decision_columns)
    col_counts = Counter(decision_columns).most_common()
    print('unique columns and frequency of split columns: ', col_counts)
    print('')

    print('Training Error: ', error_rate_boost_train)
    print('Test Error    : ', error_rate_boost_test)'''

    return error_rate_boost_train, error_rate_boost_test
コード例 #2
0
print('')
print('***** RESULTS DECISION TREE *****')
print('Depth: ', tree.depth)

print('')
print('Training Error: ', error_rate_tree_train)
print('Test Error    : ', error_rate_tree_test)

cNums = range(1, 31)
train_errs = list()
test_errs = list()
for i in cNums:
    trains = list()
    tests = list()
    for j in range(20):
        boost = ab.Boost()
        boost.train(X_train, y_train, cNum=i, verbose=False)
        pred_boost_train = boost.predict(X_train)
        pred_boost_test = boost.predict(X_test[0])
        error_rate_boost_test = (sum([
            0 if pred == true else 1
            for (pred, true) in zip(y_test, pred_boost_test)
        ]) / float(len(y_test)))
        error_rate_boost_train = (sum([
            0 if pred == true else 1
            for (pred, true) in zip(y_train, pred_boost_train)
        ]) / float(len(y_train)))
        trains.append(error_rate_boost_train)
        tests.append(error_rate_boost_test)
    train_errs.append(np.mean(trains))
    test_errs.append(np.mean(tests))