def main(): data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data T = 80 hypotheses, alpha_vals = run_adaboost(X_train, y_train, T) train_error = error(hypotheses, alpha_vals, X_train, y_train) test_error = error(hypotheses, alpha_vals, X_test, y_test) train_errors_exp_loss = [] test_errors_exp_loss = [] for t in range(T): train_errors_exp_loss.append( error_exp_loss(hypotheses[:t + 1], alpha_vals[:t + 1], X_train, y_train)) test_errors_exp_loss.append( error_exp_loss(hypotheses[:t + 1], alpha_vals[:t + 1], X_test, y_test)) x = [i for i in range(T)] plt.plot(x, train_error, label='train_error') plt.plot(x, test_error, label='test_error') plt.xlabel('number of iterations t') plt.ylabel('error') plt.legend() plt.show() plt.plot(x, train_errors_exp_loss, label="train_errors_exp_loss") plt.plot(x, test_errors_exp_loss, label="test_errors_exp_loss") plt.xlabel('number of iterations t') plt.ylabel('error') plt.legend() plt.show() for i in range(10): print("hyphothesis is {} Word is {} and his weight is {}".format( hypotheses[i], vocab[hypotheses[i][1]], alpha_vals[i]))
def main(_): train_file = 'data/data_1_train.csv' source_count, target_count = [], [] data = process_data.read_data(train_file) parsed_data = process_data.parse_data(data) source_word2idx, target_word2idx = create_vocab(parsed_data) #train_data = read_data(FLAGS.train_data, source_count, source_word2idx, target_count, target_word2idx) #test_data = read_data(FLAGS.test_data, source_count, source_word2idx, target_count, target_word2idx) trainData, testData = process_data.split_data(parsed_data, 80, 20) train_data = process_data.read_and_process_data(trainData, source_word2idx, target_word2idx) test_data = process_data.read_and_process_data(testData, source_word2idx, target_word2idx) FLAGS.pad_idx = source_word2idx['<pad>'] FLAGS.nwords = len(source_word2idx) FLAGS.mem_size = train_data[ 4] if train_data[4] > test_data[4] else test_data[4] pp.pprint(flags.FLAGS.__flags) print('loading pre-trained word vectors...') FLAGS.pre_trained_context_wt = init_word_embeddings(source_word2idx) FLAGS.pre_trained_target_wt = init_word_embeddings(target_word2idx) with tf.Session() as sess: model = MemN2N(FLAGS, sess) model.build_model() model.run(train_data, test_data)
def main(): data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data hypotheses, alpha_vals = run_adaboost(X_train, y_train, 80)
def main(): data = parse_data() if not data: return (X_train, y_train, X_test, y_test, inverse_vocab) = data h_vals, alpha_vals = run_adaboost(X_train, y_train, 80, inverse_vocab) gen_plots(h_vals, alpha_vals, X_train, y_train, X_test, y_test)
def main(): data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data hypotheses, alpha_vals = run_adaboost(X_train, y_train, 80) q_a(hypotheses, alpha_vals, X_train, y_train, X_test, y_test) ############################################## # You can add more methods here, if needed. hypotheses2, alpha_vals = run_adaboost(X_train, y_train, 10)
def main(): T = 80 data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data hypotheses, alpha_vals = run_adaboost(X_train, y_train, T) ############################################## # You can add more methods here, if needed. q1a(T, alpha_vals, hypotheses, X_train, y_train, X_test, y_test) q1b(10, hypotheses, vocab) q1c(T, alpha_vals, hypotheses, X_train, y_train, X_test, y_test)
def main(): data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data T = 80 hypotheses, alpha_vals = run_adaboost(X_train, y_train, T) ############################################## #section a train_err = [] test_err = [] for t in range(T): train_err.append(calc_error(hypotheses[:t+1],alpha_vals[:t+1],X_train,y_train)) test_err.append(calc_error(hypotheses[:t+1],alpha_vals[:t+1],X_test,y_test)) plt.plot([t+1 for t in range(T)],train_err,marker='+') plt.plot([t+1 for t in range(T)],test_err,marker='+') plt.xlabel('t') plt.ylabel('Average Error') plt.legend(['Train Err', 'Test Err'], loc='best') plt.savefig('Error_wrt_t.png') plt.close() #section b print('\n') for i,h in enumerate(hypotheses[:10]): print(f'h{i}: word={vocab[h[1]]}: classify as {h[0]} if word occur <= {h[2]}') #section c exp_train_loss=[] exp_test_loss=[] for t in range(T): exp_train_loss.append(calc_exponential_loss(hypotheses[:t+1],alpha_vals[:t],X_train,y_train)) exp_test_loss.append(calc_exponential_loss(hypotheses[:t+1],alpha_vals[:t],X_test,y_test)) plt.plot([t+1 for t in range(T)],exp_train_loss,marker='+') plt.plot([t+1 for t in range(T)],exp_test_loss,marker='+') plt.xlabel('t') plt.ylabel('Exponential Loss') plt.legend(['Train Err', 'Test Err'], loc='best') plt.savefig('Exp_loss_wrt_t.png') plt.close()
def main(): data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data T = 80 experiment_results = np.ndarray(shape=(int(T), 5), dtype=float) hypotheses, alpha_vals = run_adaboost(X_train, y_train, T) for h in hypotheses: print("word: " + str(vocab.get(h[1])) + ", if lower than - " + str(h[2]) + " predict is: " + str(h[0])) for i in range(T): train_error = calc_error(X_train, y_train, hypotheses, alpha_vals, i) test_error = calc_error(X_test, y_test, hypotheses, alpha_vals, i) avg_exp_loss_train = calc_avg_exponent(X_train, y_train, hypotheses, alpha_vals, i) avg_exp_loss_test = calc_avg_exponent(X_test, y_test, hypotheses, alpha_vals, i) experiment_results[i] = [ i, train_error, test_error, avg_exp_loss_train, avg_exp_loss_test ] # plt.title('Test error (red) & Train error (blue) as a function of T') # plt.ylabel('error') # plt.xlabel('T') # plt.plot(experiment_results[:, 0], experiment_results[:, 1], color='blue') # plt.plot(experiment_results[:, 0], experiment_results[:, 2], color='red') # plt.axis([0, T, 0, 0.4]) # plt.show() # plt.title('Average exponential loss - on test set (red) & on train set (blue) as a function of T') # plt.ylabel('exponential loss') # plt.xlabel('T') # plt.plot(experiment_results[:, 0], experiment_results[:, 3], color='blue') # plt.plot(experiment_results[:, 0], experiment_results[:, 4], color='red') # plt.axis([0, T, 0.6, 1]) # plt.show() print("train error:") print(experiment_results[:, 1]) print("test error:") print(experiment_results[:, 2])
def main(): data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data hypotheses, alpha_vals = run_adaboost(X_train, y_train, 80) ############################################## # You can add more methods here, if needed. # section (a) training_errors = [] test_errors = [] # T = [i for i in range(1, 81)] for i in range(1, 81): training_errors.append(get_error(alpha_vals[:i], hypotheses[:i], X_train, y_train)) test_errors.append(get_error(alpha_vals[:i], hypotheses[:i], X_test, y_test)) # plt.plot(T, training_errors, color="blue") # plt.plot(T, test_errors, color="red") # # plt.xlabel("T (iterations)") # plt.ylabel("Error") # # plt.legend(["Training Error", "Test Error"]) # plt.show() # # section (b) for i in range(10): h_pred, h_index, h_theta = hypotheses[i] print("AdaBoost iteration: " + str(i + 1) + " chooses word: " + vocab[ h_index] + ", with h_pred: " + str(h_pred) + ", threshold value: " + str(h_theta) + ", weight:" + str( alpha_vals[i])) # section (c) training_loss = [] test_loss = [] #T = [i for i in range(1, 81)] for i in range(1, 81): training_loss.append(get_exponential_loss(alpha_vals[:i], hypotheses[:i], X_train, y_train)) test_loss.append(get_exponential_loss(alpha_vals[:i], hypotheses[:i], X_test, y_test))
def main(): data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data T = 10 hypotheses, alpha_vals = run_adaboost(X_train, y_train, T) print("weak classifiers for 10 iterations:") for i in range(T): print("h{} = {}, Word = \"{}\" ".format(i, hypotheses[i], vocab[hypotheses[i][1]]) + "weight is {0:.2f}".format(alpha_vals[i])) train_error = list() test_error = list() exp_train_error = list() exp_test_error = list() for t in range(T): train_error.append(calc_error(hypotheses[:t + 1], alpha_vals[:t + 1], X_train, y_train)) test_error.append(calc_error(hypotheses[:t + 1], alpha_vals[:t + 1], X_test, y_test)) exp_train_error.append(calc_exp_error(hypotheses[:t + 1], alpha_vals[:t + 1], X_train, y_train)) exp_test_error.append(calc_exp_error(hypotheses[:t + 1], alpha_vals[:t + 1], X_test, y_test)) plt.plot([t for t in range(T)], train_error, color='r', marker='x') plt.plot([t for t in range(T)], test_error, color='b', marker='o') plt.title('Error as func of T') plt.xlabel("Iteration") plt.ylabel("Error") red_patch_zero = mpatches.Patch(color='red', label='Training error') blue_patch_zero = mpatches.Patch(color='blue', label='Test error') plt.legend(handles=[red_patch_zero, blue_patch_zero]) plt.show() plt.clf() plt.plot([t for t in range(T)], exp_train_error, color='r', marker='x') plt.plot([t for t in range(T)], exp_test_error, color='b', marker='o') plt.title('exponential loss as func of T') plt.xlabel("Iteration") plt.ylabel("exp loss") red_patch_exp = mpatches.Patch(color='red', label='Training exp_error') blue_patch_exp = mpatches.Patch(color='blue', label='Test exp_error') plt.legend(handles=[red_patch_exp, blue_patch_exp]) plt.show()
def section_c(): start_time = time.time() data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data T = 80 hypotheses, alpha_vals = run_adaboost(X_train, y_train, T) training_exp_loss = calc_loss(hypotheses, alpha_vals, X_train, y_train) test_exp_loss = calc_loss(hypotheses, alpha_vals, X_test, y_test) plt.clf() x = np.arange(T) + 1 plt.plot(x, training_exp_loss, label='Train Exponential Loss') plt.plot(x, test_exp_loss, label='Test Exponential Loss') plt.legend() plt.xlabel("T Values") plt.ylabel("Exponential loss") plt.savefig('results/section_1c.png') print("My program took", time.time() - start_time, "seconds to run")
def section_b(): start_time = time.time() data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data T = 10 hypotheses, alpha_vals = run_adaboost(X_train, y_train, T) words = {1: [], -1: []} i = 1 for h in hypotheses: if h[2] == 1: print('hypothese {} is positive'.format(i)) words[1].append(vocab[h[0]]) else: print('hypothese {} is negative'.format(i)) words[-1].append(vocab[h[0]]) i += 1 print(words) training_err = calc_error(hypotheses, alpha_vals, X_train, y_train) print(training_err) print("My program took", time.time() - start_time, "seconds to run")
def main(): data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data T = 80 hypotheses, alpha_vals = run_adaboost(X_train, y_train, T) #section a: test_set_error = error_calculation(X_test, y_test, hypotheses, alpha_vals) train_set_error = error_calculation(X_train, y_train, hypotheses, alpha_vals) plt.plot(np.arange(1, 1 + T), test_set_error, 'b', label='test set error') plt.plot(np.arange(1, 1 + T), train_set_error, 'k', label='train set error') plt.xlabel('t') plt.ylabel('error') plt.legend() #plt.show() # section b: for i in range(10): print(hypotheses[i]) #section c: test_set_error = loss_calculation(y_test, X_test, hypotheses, alpha_vals) train_set_error = loss_calculation(y_train, X_train, hypotheses, alpha_vals) plt.plot(np.arange(1, 1 + T), test_set_error, 'b', label='test set error') plt.plot(np.arange(1, 1 + T), train_set_error, 'k', label='train set error') plt.xlabel('t') plt.ylabel('error') plt.legend()
def main(): data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data T = 80 hypotheses, alphas = run_adaboost(X_train, y_train, T) train_errors = [] test_errors = [] for t in range(T): curr_boosted_h = lambda x: np.sign( np.sum([((np.sign(hypotheses[j][2] - x[hypotheses[j][1]]) * hypotheses[j][0]) * alphas[j]) for j in range(t + 1)])) train_errors.append(calc_error(X_train, y_train, curr_boosted_h)) test_errors.append(calc_error(X_test, y_test, curr_boosted_h)) t_list = list(range(80)) plt.plot(t_list, train_errors, label='train_error') plt.plot(t_list, test_errors, label='test_error') plt.legend() plt.xlabel('t') plt.ylabel('error') plt.savefig('q_1a.png') plt.close() train_loss = [] test_loss = [] for t in range(T): train_loss.append(calc_loss(X_train, y_train, hypotheses, alphas, t)) test_loss.append(calc_loss(X_test, y_test, hypotheses, alphas, t)) t_list = list(range(80)) plt.plot(t_list, train_loss, label='train_loss') plt.plot(t_list, test_loss, label='test_loss') plt.legend() plt.xlabel('t') plt.ylabel('loss') plt.savefig('q_1c.png') plt.close()
def main(): data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data T=80 #print("reminder- T should be changed") hypothesis, alpha_vals = run_adaboost(X_train, y_train, T) ############################################ def plot_test_err(X_train= data[0], Y_train=data[1], X_test=data[2], Y_test=data[3], T=80): err_train= calc_error(X_train, Y_train, hypothesis, alpha_vals) err_test= calc_error(X_test, Y_test, hypothesis, alpha_vals) plt.xlabel("t values") plt.ylabel("error") plt.plot([t for t in range(1,T+1)],err_train, color= 'green' ,label='train error') plt.plot([t for t in range(1,T+1)],err_test, color='red', label='test error') plt.legend(loc="upper right") #plt.show() def get_predictors(X_train=data[0], Y_train= data[1], vocab= data[4], T=80): for t in range(1,len(hypothesis)): #print(hypothesis[t]) #print("index "+str(hypothesis[t][1])+": "+vocab[hypothesis[t][1]]) def plot_loss_func(X_train= data[0], Y_train=data[1], X_test=data[2], Y_test=data[3], T=80): loss_train = calc_loss(X_train, Y_train, hypothesis, alpha_vals) loss_test = calc_loss(X_test, Y_test, hypothesis, alpha_vals) plt.xlabel("t values") plt.ylabel("loss") X=[t for t in range(1,T+1)] plt.plot(X,loss_train, color= 'green' ,label='train loss') plt.plot(X,loss_test, color='red', label='test loss') plt.legend(loc="upper right") #plt.show() plot_test_err() get_predictors() plot_loss_func()
def main(): data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data T = 80 # a # hypotheses, alpha_vals = run_adaboost(X_train, y_train, T) # t_array = range(T) # t_train_err_arr = [] # t_test_err_arr = [] # for t in t_array: # print("##### COMPUTE ERROR FOR t={} ######".format(t),) # t_train_err = compute_t_time_err( # X_train, y_train, t, hypotheses, alpha_vals) # t_test_err = compute_t_time_err( # X_test, y_test, t, hypotheses, alpha_vals) # t_train_err_arr.append(t_train_err) # t_test_err_arr.append(t_test_err) # plt.ylabel('train_err') # plt.xlabel('t') # plt.plot(t_array, t_train_err_arr) # plt.show() # plt.ylabel('test_err') # plt.xlabel('t') # plt.plot(t_array, t_test_err_arr) # plt.show() #b # T = 10 # hypotheses, alpha_vals = run_adaboost(X_train, y_train, T) # for i in range(len(hypotheses)): # print("***** Word ******") # print(vocab[hypotheses[i][1]]) # print(hypotheses[i][2]) # print(hypotheses[i][0]) # print("------------------------------------------") # #c T = 80 m = len(X_train) hypotheses, alpha_vals = run_adaboost(X_train, y_train, T) t_array = range(T) l_array = [] for t in range(0, T): sum = 0 for i in range(m): sum = sum + c_compute_exp(t, X_train[i], y_train[i], hypotheses, alpha_vals) l_array.append(sum / m) plt.ylabel('train loss') plt.xlabel('t') plt.plot(t_array, l_array) plt.show() m = len(X_test) l_array = [] for t in range(0, T): sum = 0 for i in range(m): sum = sum + c_compute_exp(t, X_test[i], y_test[i], hypotheses, alpha_vals) l_array.append(sum / m) plt.ylabel('test loss') plt.xlabel('t') plt.plot(t_array, l_array) plt.show()
def main(): data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data
def main(): data = parse_data() if not data: return (X_train, y_train, X_test, y_test, vocab) = data T = 80 hypotheses, alpha_vals = run_adaboost(X_train, y_train, T) #print("hypotheses: ", hypotheses) #print("alpha_vals: ", alpha_vals) """ hypotheses = [(1, 26, 0.5), (-1, 31, 0.5), (-1, 22, 0.5), (1, 311, 0.5), (-1, 37, 1.5), (1, 372, 0.5), (-1, 282, 0.5), (1, 292, 0.5), (-1, 196, 0.5), (1, 88, 0.5), (-1, 107, 0.5), (1, 402, 0.5), (-1, 703, 0.5), (1, 804, 0.5), (-1, 557, 0.5), (1, 187, 0.5), (-1, 17, 1.5), (1, 189, 0.5), (-1, 211, 0.5), (1, 311, 0.5), (-1, 195, 0.5), (1, 76, 0.5), (-1, 1, 2.5), (1, 23, 0.5), (-1, 158, 0.5), (1, 232, 0.5), (-1, 179, 0.5), (1, 892, 0.5), (-1, 729, 0.5), (1, 741, 0.5), (-1, 994, 0.5), (1, 822, 0.5), (-1, 501, 0.5), (1, 864, 0.5), (-1, 1026, 0.5), (1, 154, 0.5), (-1, 15, 0.5), (1, 25, 0.5), (-1, 99, 0.5), (1, 246, 0.5), (-1, 301, 0.5), (1, 804, 0.5), (-1, 371, 0.5), (1, 140, 0.5), (-1, 60, 0.5), (1, 26, 1.5), (-1, 1006, 0.5), (1, 275, 0.5), (-1, 354, 0.5), (1, 379, 0.5), (-1, 156, 0.5), (1, 160, 0.5), (-1, 128, 0.5), (1, 4, 1.5), (-1, 17, 0.5), (-1, 46, 0.5), (1, 41, 0.5), (-1, 29, 0.5), (1, 236, 0.5), (-1, 687, 0.5), (1, 892, 0.5), (-1, 670, 0.5), (1, 383, 0.5), (-1, 709, 0.5), (1, 1012, 0.5), (-1, 498, 0.5), (1, 435, 0.5), (-1, 994, 0.5), (1, 802, 0.5), (-1, 641, 0.5), (1, 55, 1.5), (-1, 198, 0.5), (1, 62, 0.5), (-1, 24, 0.5), (1, 332, 0.5), (-1, 637, 0.5), (1, 1041, 0.5), (-1, 986, 0.5), (1, 582, 0.5), (-1, 859, 0.5)] alpha_vals = [0.26151742796590843, 0.1841508459039694, 0.14910232134602863, 0.15469714068798077, 0.1915574866154928, 0.14314431361864305, 0.16148927520512862, 0.14977352552677523, 0.15264130425915018, 0.14298235499432413, 0.1232388554499457, 0.1238034143870661, 0.1406654300359237, 0.13407276133538495, 0.1336567712545831, 0.11670999682271353, 0.11128504234044315, 0.11584091244329471, 0.11725880867725799, 0.11837865544369736, 0.1154248188051812, 0.10386394418685344, 0.10230917669639172, 0.11359596807342957, 0.10318982662275046, 0.10787726802795057, 0.11005224530587043, 0.09951335574039732, 0.12661287072537064, 0.11290427803950069, 0.11694455759207543, 0.11802856069994117, 0.11287176966651957, 0.10606792805576688, 0.11099763404211005, 0.10733991471431656, 0.10129061101278855, 0.09846949896010765, 0.09429860880085342, 0.08902133857123443, 0.1079300841178513, 0.09717822686858121, 0.11056976082735676, 0.09792042517369205, 0.08953964951090948, 0.08644626308410479, 0.10280628868886399, 0.10459214117749982, 0.09225961785392627, 0.09814011977709979, 0.09613154551994836, 0.08323950913813724, 0.08950661923360151, 0.0857889981732911, 0.0919166146829436, 0.07666005900883781, 0.07848267550743773, 0.07524401762222457, 0.07473087773492901, 0.09944116883347706, 0.10016234752905902, 0.1028588948918129, 0.09165611106355137, 0.09690909125086616, 0.09333348914748903, 0.09523125399484374, 0.09067115806419819, 0.08532845242193139, 0.09445076110183356, 0.09358415170596975, 0.093523733622731, 0.08596147923770475, 0.08225938932509405, 0.07401478691950251, 0.06703934115345452, 0.0950814290369983, 0.08892531816431326, 0.09347871317344918, 0.09262498183750978, 0.08468446616954839] """ train_errors, test_errors = calc_loss_wrt_t(X_train, y_train, X_test, y_test, hypotheses, alpha_vals) train_exploss = calc_exp_loss(X_train, y_train, hypotheses, alpha_vals) test_exploss = calc_exp_loss(X_test, y_test, hypotheses, alpha_vals) t_vector = [t for t in range(T)] plt.xlabel('t') plt.ylabel('Error') plt.plot(t_vector, train_errors, label="Train error", marker=".", color="blue") plt.plot(t_vector, test_errors, label="Test error", marker=".", color="purple") plt.legend() #plt.show() plt.xlabel('t') plt.ylabel('Avg exp-loss') plt.plot(t_vector, train_exploss, label="Train exp-loss", marker=".", color="blue") plt.plot(t_vector, test_exploss, label="Test exp-loss", marker=".", color="purple") plt.legend()