Example #1
0
def Q10(ada, train_X, train_y, T_hat=500, noise=0.0):
    fig = plt.figure()
    fig.suptitle('Decision of T-hat')
    decision_boundaries(ada, train_X, train_y, T_hat)
    plt.savefig(FIG_DIR3 + 'q10' +
                ('' if noise == 0 else '_' + str(noise).replace('.', '_')))
    'TODO complete this function'
Example #2
0
def Q3():  # AdaBoost
    val_error = []
    train_error = []
    for T in range(1, 205, 5):
        adaboost = aba.AdaBoost(DecisionStump, T)
        adaboost.train(X_train, y_train)
        train_error.append(adaboost.error(X_train, y_train))
        val_error.append(adaboost.error(X_val, y_val))

    plot(list(range(1, 205, 5)), train_error)
    plot(list(range(1, 205, 5)), val_error)
    xlabel("Iteration_num")
    ylabel("error")
    legend(["Training Error", "Validation Error"], loc=5)
    show()

    figure(1)
    ion()
    for index, T in enumerate([1, 5, 10, 50, 100, 200]):
        adaboost = aba.AdaBoost(DecisionStump, T)
        adaboost.train(X_train, y_train)
        subplot(2, 3, index + 1)
        decision_boundaries(adaboost, X_train, y_train, "Iteration: " + str(T))

    pause(8)

    best_iteration = val_error.index(np.min(val_error)) * 5
    print(best_iteration)
    ab = aba.AdaBoost(DecisionStump, best_iteration)
    ab.train(X_train, y_train)
    print(ab.error(X_test, y_test))
    return
Example #3
0
def q16(train):
    adaboost = AdaBoost(ex4_tools.DecisionStump, MAX_T)
    D = adaboost.train(*train)
    D = D / np.max(D) * 10
    plt.figure()
    ex4_tools.decision_boundaries(adaboost, *train, MAX_T, weights=D)
    plt.title("Weighted Training Set")
    plt.show()
Example #4
0
def q15(train, test):
    T = (5, 10, 50, 100, 200, 500)
    adaboost = AdaBoost(ex4_tools.DecisionStump, MAX_T)
    adaboost.train(*train)

    errors = [adaboost.error(*test, t) for t in T]
    best = np.argmin(errors)
    plt.figure()
    ex4_tools.decision_boundaries(adaboost, *test, T[best])
    plt.title(f"Best classifier - T={T[best]}, error={errors[best]}")
    plt.show()
Example #5
0
def Q10():
    X, y = generate_data(1000, 0)
    T = [5, 10, 50, 100, 200, 500]
    i = int(np.argmin(Q9()))
    T_min = T[i]
    optimal_h = AdaBoost(DecisionStump, T_min)
    optimal_h.train(X, y)
    decision_boundaries(optimal_h, X, y, T_min)
    plt.title('Descision for T=500 that minimizing the test err')
    plt.savefig('Q10')
    plt.show()
 def q_10(self):
     tx, ty = ex4_tools.generate_data(5000, noise_ratio=0)
     x, y = ex4_tools.generate_data(200, noise_ratio=0)
     errors = self.get_ab_errors(tx, ty, x, y)
     min_t = np.argmin(errors)
     a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=self.ts[min_t])
     a_boost.train(tx, ty)
     ex4_tools.decision_boundaries(a_boost, tx, ty, self.ts[min_t])
     plt.title("min error is " + str(errors[min_t]) + " with " +
               str(self.ts[min_t]) + " classifiers")
     plt.show()
 def q_9(self):
     tx, ty = ex4_tools.generate_data(5000, noise_ratio=0)
     x, y = ex4_tools.generate_data(200, noise_ratio=0)
     i = 1
     for t in self.ts:
         a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t)
         a_boost.train(tx, ty)
         plt.subplot(2, 3, i)
         ex4_tools.decision_boundaries(a_boost, x, y, t)
         i += 1
     plt.show()
Example #8
0
def Q9(ada, test_X, test_y, noise=0.0):
    # f, axs = plt.subplots(3,2)
    n_classifiers = [5, 10, 50, 100, 200, 500]
    fig = plt.figure()
    fig.suptitle('Decision of the Learned Classifiers')
    for i in range(6):
        plt.subplot(3, 2, i + 1)
        decision_boundaries(ada, test_X, test_y, n_classifiers[i])
    plt.savefig(FIG_DIR3 + 'q9' +
                ('' if noise == 0 else '_' + str(noise).replace('.', '_')))

    'TODO complete this function'
Example #9
0
def Q9():
    X, y = generate_data(300, 0)
    h = AdaBoost(DecisionStump, 500)
    h.train(X, y)
    err = [0] * len(T)
    f = plt.figure(figsize=(10, 10))
    for i, t in enumerate(T):
        f.add_subplot(3, 2, i + 1)
        err[i] = h.error(X, y, t)
        decision_boundaries(h, X, y, t)
    plt.savefig('Q9')
    plt.show()
    return np.array(err)
Example #10
0
def q14(train, test):
    T = (5, 10, 50, 100, 200, 500)
    adaboost = AdaBoost(ex4_tools.DecisionStump, MAX_T)

    X, y = train
    X_t, y_t = test
    adaboost.train(X, y)

    plt.figure()
    for i, t in enumerate(T):
        plt.subplot(2, 3, i + 1)
        ex4_tools.decision_boundaries(adaboost, X_t, y_t, t)
    plt.title("Decisions Boundaries")
    plt.show()
Example #11
0
def Q4():  # decision trees
    val_error = []
    train_error = []
    sample = [3, 6, 8, 10, 12]
    for samp in sample:
        dt = dta.DecisionTree(samp)
        dt.train(X_train, y_train)
        train_error.append(dt.error(X_train, y_train))
        val_error.append(dt.error(X_val, y_val))
    plot(sample, train_error)
    plot(sample, val_error)
    xlabel("samp")
    ylabel("error rate")
    legend(["train error", "validation error"], loc=5)
    show()
    # figure(1)
    # ion()
    for index, samp in enumerate(sample):
        dt = dta.DecisionTree(samp)
        dt.train(X_train, y_train)
        subplot(2, 3, index + 1)
        decision_boundaries(dt, X_train, y_train, "samp = " + str(samp))
    pause(8)
    best_d = sample[val_error.index(np.min(val_error))]
    print(best_d)
    dt = dta.DecisionTree(best_d)
    dt.train(X_train, y_train)
    print(dt.error(X_test, y_test))
    # Bagging:
    val_error = []
    for B in range(5, 105, 5):
        print("B: " + str(B))
        bag = bagging.Bagging(dta.DecisionTree, B, best_d)
        bag.train(X_train, y_train)
        val_error.append(bag.error(X_val, y_val))

    plot(range(5, 105, 5), val_error)
    xlabel("B")
    ylabel("validation error rate")
    show()
    best_b = list(range(5, 105, 5))[val_error.index(np.min(val_error)) + 5]
    print("best b: ", best_b)
    bag = bagging.Bagging(dta.DecisionTree, best_b, best_d)
    bag.train(X_train, y_train)
    print(bag.error(X_test, y_test))
Example #12
0
def Q3():  # AdaBoost
    path = "/cs/usr/kotek/PycharmProjects/iml_ex4/SynData/"
    X_train, y_train = read_from_txt(path + "X_train.txt",
                                     path + "y_train.txt")
    X_val, y_val = read_from_txt(path + "X_val.txt", path + "y_val.txt")
    X_test, y_test = read_from_txt(path + "X_test.txt", path + "y_test.txt")

    # -------- First part --------
    T = np.arange(5, 105, step=5)
    T = np.append(T, np.array([200]))

    training_err = np.zeros(len(T))
    validation_err = np.zeros(len(T))

    # adaBoost uses a weighted trainer (WL)
    WL = ex4_tools.DecisionStump
    for i in range(len(T)):
        adaboost = AdaBoost(WL, T[i])
        adaboost.train(X_train, y_train)
        training_err[i] = adaboost.error(X_train, y_train)
        validation_err[i] = adaboost.error(X_val, y_val)

    plt.plot(T, training_err, label="train error")
    plt.plot(T, validation_err, label="validation error")
    plt.legend()
    plt.show()
    # ------------------------

    # # -------- Second part --------
    decision_T = [1, 5, 10, 100, 200]

    plt.figure()
    plt.ion()
    for idx, t in enumerate(decision_T):
        adaboost = AdaBoost(WL, t)
        adaboost.train(X_train, y_train)
        plt.subplot(2, 3, idx + 1)
        ex4_tools.decision_boundaries(adaboost, X_train, y_train,
                                      "T=" + str(t))
    plt.show()
    plt.pause(5)
Example #13
0
def Q3(function):  # AdaBoost

    # calculate the training and validation errors, and the classifiers for different values of T
    training_errors, validation_errors, test_errors, classifiers, x_tr, y_tr = iou.helper(
        3)

    # plot the training and validation errors of classifiers that were trained using adaboost
    # over the T values that are in t
    if function == 'a':
        plt.plot(T, training_errors, 'r', label="training errors")
        plt.plot(T, validation_errors, 'c', label="validation errors")
        plt.xlabel("T - number of distribution adjustment iterations")
        plt.ylabel("Error")
        plt.legend(loc=3)
        plt.show()

    # plot the decisions of the learned classifiers over [-1,1]^2 for different T values
    if function == 'b':
        counter = 0
        t_values = [1, 5, 10, 50, 100, 200]

        for classifier in classifiers:
            weights = classifier.w
            ex4_tools.decision_boundaries(classifier,
                                          x_tr,
                                          y_tr,
                                          title_str="T = " +
                                          str(t_values[counter]),
                                          weights=weights * 30)
            counter += 1

    # find the T value that leads to the minimal validation error,
    # and calculate it's testing error
    if function == 'c':
        gap = T[1] - T[0]
        min_t_index = np.argmin(validation_errors)
        min_t = gap * min_t_index  # get the index of the minimal value,
        # then multiply it by the constant gap of the T values (assuming it really is constant)
        test_err_min_t = test_errors[min_t_index]
        print('the minimizing T is: ', min_t, "and it's test error is: ",
              test_err_min_t)
Example #14
0
def Q4(function):  # decision trees

    # calculate the training and validation errors, and the classifiers for different values of T
    training_errors, validation_errors, test_errors, classifiers, x_tr, y_tr = iou.helper(
        4)

    # plot the training and validation errors of classifiers that were trained using decision tree
    # over the depth values that are in d
    if function == 'a':
        plt.plot(d, training_errors, 'r', label="training errors")
        plt.plot(d, validation_errors, 'c', label="validation errors")
        plt.xlabel("d - maximal tree depth")
        plt.ylabel("Error")
        plt.legend(loc=1)
        plt.show()

    # plot the decisions of the learned classifiers over [-1,1]^2 for different depth values
    if function == 'b':
        counter = 0
        d_values = [3, 6, 8, 10, 12]

        for classifier in classifiers:
            ex4_tools.decision_boundaries(classifier,
                                          x_tr,
                                          y_tr,
                                          title_str="depth = " +
                                          str(d_values[counter]))
            counter += 1

    # find the d value that leads to the minimal validation error,
    # and calculate it's testing error
    if function == 'c':
        min_d_index = np.argmin(validation_errors)
        min_d = d[min_d_index]  # get the index of the minimal value,
        # then multiply it by the constant gap of the T values (assuming it really is constant)
        test_err_min_d = test_errors[min_d_index]
        print('the minimizing depth is: ', min_d, "and it's test error is: ",
              test_err_min_d)
Example #15
0
def Q_adaboost(noise_ratio):
    X_train, y_train = generate_data(5000, noise_ratio)
    classifier = AdaBoost(DecisionStump, 500)
    classifier.train(X_train, y_train)
    X_test, y_test = generate_data(200, noise_ratio)
    vals = np.arange(1, 501)
    plt.plot(vals, [classifier.error(X_train, y_train, t) for t in vals],
             label='Training Error',
             lw=1,
             alpha=0.6)
    plt.plot(vals, [classifier.error(X_test, y_test, t) for t in vals],
             label='Test Error',
             lw=1,
             alpha=0.6)
    plt.legend()
    plt.title(
        f'Adaboost Training & Test Error according to T, noise={noise_ratio}')
    plt.show()
    boosts = [5, 10, 50, 100, 200, 500]
    for i in range(6):
        plt.subplot(2, 3, i + 1)
        decision_boundaries(classifier, X_test, y_test, boosts[i])
        plt.title(f'T={boosts[i]}, noise={noise_ratio}')
    plt.show()
    test_errors = [classifier.error(X_test, y_test, t) for t in vals]
    min_t = np.argmin(test_errors)
    min_err = test_errors[min_t]
    # print(min_t, min_err)
    decision_boundaries(classifier, X_train, y_train, min_t)
    plt.title(f'min test_err {min_err} T={min_t} noise {noise_ratio}')
    plt.show()
    decision_boundaries(classifier, X_train, y_train, 499,
                        classifier.D_of_last_iteration)
    plt.title(f'un-normalized weighed sample T=500, noise={noise_ratio}')
    plt.show()
    decision_boundaries(
        classifier, X_train, y_train, 499, classifier.D_of_last_iteration /
        np.max(classifier.D_of_last_iteration) * 100)
    plt.title(f'normalized weighed sample T=500, noise={noise_ratio}')
    plt.show()
Example #16
0
def plot_decisions(relative_range, learned_classifiers, train_X, train_y,
                   name):
    for i, t in enumerate(relative_range):
        tools.decision_boundaries(
            learned_classifiers[i], train_X, train_y,
            "{name} decisions for T={t}".format(name=name, t=t))