def Q10(ada, train_X, train_y, T_hat=500, noise=0.0): fig = plt.figure() fig.suptitle('Decision of T-hat') decision_boundaries(ada, train_X, train_y, T_hat) plt.savefig(FIG_DIR3 + 'q10' + ('' if noise == 0 else '_' + str(noise).replace('.', '_'))) 'TODO complete this function'
def Q3(): # AdaBoost val_error = [] train_error = [] for T in range(1, 205, 5): adaboost = aba.AdaBoost(DecisionStump, T) adaboost.train(X_train, y_train) train_error.append(adaboost.error(X_train, y_train)) val_error.append(adaboost.error(X_val, y_val)) plot(list(range(1, 205, 5)), train_error) plot(list(range(1, 205, 5)), val_error) xlabel("Iteration_num") ylabel("error") legend(["Training Error", "Validation Error"], loc=5) show() figure(1) ion() for index, T in enumerate([1, 5, 10, 50, 100, 200]): adaboost = aba.AdaBoost(DecisionStump, T) adaboost.train(X_train, y_train) subplot(2, 3, index + 1) decision_boundaries(adaboost, X_train, y_train, "Iteration: " + str(T)) pause(8) best_iteration = val_error.index(np.min(val_error)) * 5 print(best_iteration) ab = aba.AdaBoost(DecisionStump, best_iteration) ab.train(X_train, y_train) print(ab.error(X_test, y_test)) return
def q16(train): adaboost = AdaBoost(ex4_tools.DecisionStump, MAX_T) D = adaboost.train(*train) D = D / np.max(D) * 10 plt.figure() ex4_tools.decision_boundaries(adaboost, *train, MAX_T, weights=D) plt.title("Weighted Training Set") plt.show()
def q15(train, test): T = (5, 10, 50, 100, 200, 500) adaboost = AdaBoost(ex4_tools.DecisionStump, MAX_T) adaboost.train(*train) errors = [adaboost.error(*test, t) for t in T] best = np.argmin(errors) plt.figure() ex4_tools.decision_boundaries(adaboost, *test, T[best]) plt.title(f"Best classifier - T={T[best]}, error={errors[best]}") plt.show()
def Q10(): X, y = generate_data(1000, 0) T = [5, 10, 50, 100, 200, 500] i = int(np.argmin(Q9())) T_min = T[i] optimal_h = AdaBoost(DecisionStump, T_min) optimal_h.train(X, y) decision_boundaries(optimal_h, X, y, T_min) plt.title('Descision for T=500 that minimizing the test err') plt.savefig('Q10') plt.show()
def q_10(self): tx, ty = ex4_tools.generate_data(5000, noise_ratio=0) x, y = ex4_tools.generate_data(200, noise_ratio=0) errors = self.get_ab_errors(tx, ty, x, y) min_t = np.argmin(errors) a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=self.ts[min_t]) a_boost.train(tx, ty) ex4_tools.decision_boundaries(a_boost, tx, ty, self.ts[min_t]) plt.title("min error is " + str(errors[min_t]) + " with " + str(self.ts[min_t]) + " classifiers") plt.show()
def q_9(self): tx, ty = ex4_tools.generate_data(5000, noise_ratio=0) x, y = ex4_tools.generate_data(200, noise_ratio=0) i = 1 for t in self.ts: a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t) a_boost.train(tx, ty) plt.subplot(2, 3, i) ex4_tools.decision_boundaries(a_boost, x, y, t) i += 1 plt.show()
def Q9(ada, test_X, test_y, noise=0.0): # f, axs = plt.subplots(3,2) n_classifiers = [5, 10, 50, 100, 200, 500] fig = plt.figure() fig.suptitle('Decision of the Learned Classifiers') for i in range(6): plt.subplot(3, 2, i + 1) decision_boundaries(ada, test_X, test_y, n_classifiers[i]) plt.savefig(FIG_DIR3 + 'q9' + ('' if noise == 0 else '_' + str(noise).replace('.', '_'))) 'TODO complete this function'
def Q9(): X, y = generate_data(300, 0) h = AdaBoost(DecisionStump, 500) h.train(X, y) err = [0] * len(T) f = plt.figure(figsize=(10, 10)) for i, t in enumerate(T): f.add_subplot(3, 2, i + 1) err[i] = h.error(X, y, t) decision_boundaries(h, X, y, t) plt.savefig('Q9') plt.show() return np.array(err)
def q14(train, test): T = (5, 10, 50, 100, 200, 500) adaboost = AdaBoost(ex4_tools.DecisionStump, MAX_T) X, y = train X_t, y_t = test adaboost.train(X, y) plt.figure() for i, t in enumerate(T): plt.subplot(2, 3, i + 1) ex4_tools.decision_boundaries(adaboost, X_t, y_t, t) plt.title("Decisions Boundaries") plt.show()
def Q4(): # decision trees val_error = [] train_error = [] sample = [3, 6, 8, 10, 12] for samp in sample: dt = dta.DecisionTree(samp) dt.train(X_train, y_train) train_error.append(dt.error(X_train, y_train)) val_error.append(dt.error(X_val, y_val)) plot(sample, train_error) plot(sample, val_error) xlabel("samp") ylabel("error rate") legend(["train error", "validation error"], loc=5) show() # figure(1) # ion() for index, samp in enumerate(sample): dt = dta.DecisionTree(samp) dt.train(X_train, y_train) subplot(2, 3, index + 1) decision_boundaries(dt, X_train, y_train, "samp = " + str(samp)) pause(8) best_d = sample[val_error.index(np.min(val_error))] print(best_d) dt = dta.DecisionTree(best_d) dt.train(X_train, y_train) print(dt.error(X_test, y_test)) # Bagging: val_error = [] for B in range(5, 105, 5): print("B: " + str(B)) bag = bagging.Bagging(dta.DecisionTree, B, best_d) bag.train(X_train, y_train) val_error.append(bag.error(X_val, y_val)) plot(range(5, 105, 5), val_error) xlabel("B") ylabel("validation error rate") show() best_b = list(range(5, 105, 5))[val_error.index(np.min(val_error)) + 5] print("best b: ", best_b) bag = bagging.Bagging(dta.DecisionTree, best_b, best_d) bag.train(X_train, y_train) print(bag.error(X_test, y_test))
def Q3(): # AdaBoost path = "/cs/usr/kotek/PycharmProjects/iml_ex4/SynData/" X_train, y_train = read_from_txt(path + "X_train.txt", path + "y_train.txt") X_val, y_val = read_from_txt(path + "X_val.txt", path + "y_val.txt") X_test, y_test = read_from_txt(path + "X_test.txt", path + "y_test.txt") # -------- First part -------- T = np.arange(5, 105, step=5) T = np.append(T, np.array([200])) training_err = np.zeros(len(T)) validation_err = np.zeros(len(T)) # adaBoost uses a weighted trainer (WL) WL = ex4_tools.DecisionStump for i in range(len(T)): adaboost = AdaBoost(WL, T[i]) adaboost.train(X_train, y_train) training_err[i] = adaboost.error(X_train, y_train) validation_err[i] = adaboost.error(X_val, y_val) plt.plot(T, training_err, label="train error") plt.plot(T, validation_err, label="validation error") plt.legend() plt.show() # ------------------------ # # -------- Second part -------- decision_T = [1, 5, 10, 100, 200] plt.figure() plt.ion() for idx, t in enumerate(decision_T): adaboost = AdaBoost(WL, t) adaboost.train(X_train, y_train) plt.subplot(2, 3, idx + 1) ex4_tools.decision_boundaries(adaboost, X_train, y_train, "T=" + str(t)) plt.show() plt.pause(5)
def Q3(function): # AdaBoost # calculate the training and validation errors, and the classifiers for different values of T training_errors, validation_errors, test_errors, classifiers, x_tr, y_tr = iou.helper( 3) # plot the training and validation errors of classifiers that were trained using adaboost # over the T values that are in t if function == 'a': plt.plot(T, training_errors, 'r', label="training errors") plt.plot(T, validation_errors, 'c', label="validation errors") plt.xlabel("T - number of distribution adjustment iterations") plt.ylabel("Error") plt.legend(loc=3) plt.show() # plot the decisions of the learned classifiers over [-1,1]^2 for different T values if function == 'b': counter = 0 t_values = [1, 5, 10, 50, 100, 200] for classifier in classifiers: weights = classifier.w ex4_tools.decision_boundaries(classifier, x_tr, y_tr, title_str="T = " + str(t_values[counter]), weights=weights * 30) counter += 1 # find the T value that leads to the minimal validation error, # and calculate it's testing error if function == 'c': gap = T[1] - T[0] min_t_index = np.argmin(validation_errors) min_t = gap * min_t_index # get the index of the minimal value, # then multiply it by the constant gap of the T values (assuming it really is constant) test_err_min_t = test_errors[min_t_index] print('the minimizing T is: ', min_t, "and it's test error is: ", test_err_min_t)
def Q4(function): # decision trees # calculate the training and validation errors, and the classifiers for different values of T training_errors, validation_errors, test_errors, classifiers, x_tr, y_tr = iou.helper( 4) # plot the training and validation errors of classifiers that were trained using decision tree # over the depth values that are in d if function == 'a': plt.plot(d, training_errors, 'r', label="training errors") plt.plot(d, validation_errors, 'c', label="validation errors") plt.xlabel("d - maximal tree depth") plt.ylabel("Error") plt.legend(loc=1) plt.show() # plot the decisions of the learned classifiers over [-1,1]^2 for different depth values if function == 'b': counter = 0 d_values = [3, 6, 8, 10, 12] for classifier in classifiers: ex4_tools.decision_boundaries(classifier, x_tr, y_tr, title_str="depth = " + str(d_values[counter])) counter += 1 # find the d value that leads to the minimal validation error, # and calculate it's testing error if function == 'c': min_d_index = np.argmin(validation_errors) min_d = d[min_d_index] # get the index of the minimal value, # then multiply it by the constant gap of the T values (assuming it really is constant) test_err_min_d = test_errors[min_d_index] print('the minimizing depth is: ', min_d, "and it's test error is: ", test_err_min_d)
def Q_adaboost(noise_ratio): X_train, y_train = generate_data(5000, noise_ratio) classifier = AdaBoost(DecisionStump, 500) classifier.train(X_train, y_train) X_test, y_test = generate_data(200, noise_ratio) vals = np.arange(1, 501) plt.plot(vals, [classifier.error(X_train, y_train, t) for t in vals], label='Training Error', lw=1, alpha=0.6) plt.plot(vals, [classifier.error(X_test, y_test, t) for t in vals], label='Test Error', lw=1, alpha=0.6) plt.legend() plt.title( f'Adaboost Training & Test Error according to T, noise={noise_ratio}') plt.show() boosts = [5, 10, 50, 100, 200, 500] for i in range(6): plt.subplot(2, 3, i + 1) decision_boundaries(classifier, X_test, y_test, boosts[i]) plt.title(f'T={boosts[i]}, noise={noise_ratio}') plt.show() test_errors = [classifier.error(X_test, y_test, t) for t in vals] min_t = np.argmin(test_errors) min_err = test_errors[min_t] # print(min_t, min_err) decision_boundaries(classifier, X_train, y_train, min_t) plt.title(f'min test_err {min_err} T={min_t} noise {noise_ratio}') plt.show() decision_boundaries(classifier, X_train, y_train, 499, classifier.D_of_last_iteration) plt.title(f'un-normalized weighed sample T=500, noise={noise_ratio}') plt.show() decision_boundaries( classifier, X_train, y_train, 499, classifier.D_of_last_iteration / np.max(classifier.D_of_last_iteration) * 100) plt.title(f'normalized weighed sample T=500, noise={noise_ratio}') plt.show()
def plot_decisions(relative_range, learned_classifiers, train_X, train_y, name): for i, t in enumerate(relative_range): tools.decision_boundaries( learned_classifiers[i], train_X, train_y, "{name} decisions for T={t}".format(name=name, t=t))