def q2():
    """Boosting on UCI datasets"""
    crx = dl.data_q3_crx()
    #crx = dl.data_q3_vote()
    num_points = len(crx)
    for i in xrange(5, 85, 5):
        percent = float(i)/100
        all_folds = hw4.partition_folds(crx, percent)
        kf_train = all_folds[0]
        kf_test = all_folds[1]
        y, X = hw4.split_truth_from_data(kf_train)
        y_test, X_test = hw4.split_truth_from_data(kf_test)
        adaboost = run_adaboost(X, y, X_test, y_test, 'q2_crx')
        yt_pred = adaboost.predict(X_test)
        yt_pred = adaboost._check_y(yt_pred)
        y_test = adaboost._check_y(y_test)
        round_err = float(np.sum([1 if yt!=yp else 0 for yt, yp in zip(yt_pred, y_test)]))/len(y_test)
        last_round = adaboost.local_errors.keys()[-1]
        #print 'Error at {}%: Train: {} Test: {}'.format(percent, adaboost.adaboost_error[last_round], round_err)
        print 'Error at {}%: Test: {}'.format(percent, round_err)
def crx_data():
    dl.data_q3_crx()
    dl.data_q3_vote()