def q2(): """Boosting on UCI datasets""" crx = dl.data_q3_crx() #crx = dl.data_q3_vote() num_points = len(crx) for i in xrange(5, 85, 5): percent = float(i)/100 all_folds = hw4.partition_folds(crx, percent) kf_train = all_folds[0] kf_test = all_folds[1] y, X = hw4.split_truth_from_data(kf_train) y_test, X_test = hw4.split_truth_from_data(kf_test) adaboost = run_adaboost(X, y, X_test, y_test, 'q2_crx') yt_pred = adaboost.predict(X_test) yt_pred = adaboost._check_y(yt_pred) y_test = adaboost._check_y(y_test) round_err = float(np.sum([1 if yt!=yp else 0 for yt, yp in zip(yt_pred, y_test)]))/len(y_test) last_round = adaboost.local_errors.keys()[-1] #print 'Error at {}%: Train: {} Test: {}'.format(percent, adaboost.adaboost_error[last_round], round_err) print 'Error at {}%: Test: {}'.format(percent, round_err)
def crx_data(): dl.data_q3_crx() dl.data_q3_vote()