def get_ab_errors(self, tx, ty, x, y):
     errors = []
     for t in self.ts:
         a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t)
         a_boost.train(tx, ty)
         errors.append(a_boost.error(x, y, t))
     return errors
Exemple #2
0
def Q3(): # AdaBoost
    T = [1,5,10,50,100,200]
    T_loop = [1,5,10]
    train_err = []
    valid_err = []

    plt.figure("decisions of the learned classifiers for T")
    num_graph = 0
    for i in range(3,41):
        T_loop.append(i*5)

    for t in T_loop:
        ada_boost = AdaBoost(DecisionStump, t)
        ada_boost.train(x_train, y_train)
        if (t in T):
            num_graph += 1
            plt.subplot(3,2, num_graph)
            decision_boundaries(ada_boost, x_train, y_train, "T = %d" %t)

        train_err.append(ada_boost.error(x_train, y_train))
        valid_err.append(ada_boost.error(x_val, y_val))

    plt.figure("training error and the validation error")
    plt.plot(T_loop, train_err, 'ro-', hold=False, label= "Training Error")
    plt.plot(T_loop, valid_err, 'go-', label= "Validation Error")
    plt.legend()
    plt.show()

    '''
    find the T min, and plot it with training error
    '''

    plt.figure("decision boundaries of T min, with the training data")

    T_hat = 5 * np.argmin(valid_err)
    ada_boost = AdaBoost(DecisionStump, T_hat)
    ada_boost.train(x_train, y_train)
    test_err = ada_boost.error(x_test, y_test)
    decision_boundaries(ada_boost, x_train, y_train, "T = %d" %T_hat)
    plt.show()
    print ("The value of T that minimizes the validation error is: ", T_hat)
    print("the test error of the corresponding classifier is: ", test_err)


    return
Exemple #3
0
def Q8():
    X, y = generate_data(5000, 0)
    h = AdaBoost(DecisionStump, 500)
    h.train(X, y)
    training_err = np.zeros((500, ))
    test_err = np.zeros((500, ))
    test_set, labels = generate_data(200, 0)
    for t in range(1, 501):
        training_err[t - 1] = h.error(X, y, t)
        test_err[t - 1] = h.error(test_set, labels, t)
    plt.plot(range(500), training_err, label='Training error')
    plt.plot(range(500), test_err, label='Test error')
    plt.title('question 8')
    plt.legend(loc='upper right')
    plt.xlabel('T')
    plt.ylabel('Error rate')
    plt.savefig('Q8')
    plt.show()
Exemple #4
0
def Q3():  # AdaBoost
    path = "/cs/usr/kotek/PycharmProjects/iml_ex4/SynData/"
    X_train, y_train = read_from_txt(path + "X_train.txt",
                                     path + "y_train.txt")
    X_val, y_val = read_from_txt(path + "X_val.txt", path + "y_val.txt")
    X_test, y_test = read_from_txt(path + "X_test.txt", path + "y_test.txt")

    # -------- First part --------
    T = np.arange(5, 105, step=5)
    T = np.append(T, np.array([200]))

    training_err = np.zeros(len(T))
    validation_err = np.zeros(len(T))

    # adaBoost uses a weighted trainer (WL)
    WL = ex4_tools.DecisionStump
    for i in range(len(T)):
        adaboost = AdaBoost(WL, T[i])
        adaboost.train(X_train, y_train)
        training_err[i] = adaboost.error(X_train, y_train)
        validation_err[i] = adaboost.error(X_val, y_val)

    plt.plot(T, training_err, label="train error")
    plt.plot(T, validation_err, label="validation error")
    plt.legend()
    plt.show()
    # ------------------------

    # # -------- Second part --------
    decision_T = [1, 5, 10, 100, 200]

    plt.figure()
    plt.ion()
    for idx, t in enumerate(decision_T):
        adaboost = AdaBoost(WL, t)
        adaboost.train(X_train, y_train)
        plt.subplot(2, 3, idx + 1)
        ex4_tools.decision_boundaries(adaboost, X_train, y_train,
                                      "T=" + str(t))
    plt.show()
    plt.pause(5)
Exemple #5
0
def Q_adaboost(noise_ratio):
    X_train, y_train = generate_data(5000, noise_ratio)
    classifier = AdaBoost(DecisionStump, 500)
    classifier.train(X_train, y_train)
    X_test, y_test = generate_data(200, noise_ratio)
    vals = np.arange(1, 501)
    plt.plot(vals, [classifier.error(X_train, y_train, t) for t in vals],
             label='Training Error',
             lw=1,
             alpha=0.6)
    plt.plot(vals, [classifier.error(X_test, y_test, t) for t in vals],
             label='Test Error',
             lw=1,
             alpha=0.6)
    plt.legend()
    plt.title(
        f'Adaboost Training & Test Error according to T, noise={noise_ratio}')
    plt.show()
    boosts = [5, 10, 50, 100, 200, 500]
    for i in range(6):
        plt.subplot(2, 3, i + 1)
        decision_boundaries(classifier, X_test, y_test, boosts[i])
        plt.title(f'T={boosts[i]}, noise={noise_ratio}')
    plt.show()
    test_errors = [classifier.error(X_test, y_test, t) for t in vals]
    min_t = np.argmin(test_errors)
    min_err = test_errors[min_t]
    # print(min_t, min_err)
    decision_boundaries(classifier, X_train, y_train, min_t)
    plt.title(f'min test_err {min_err} T={min_t} noise {noise_ratio}')
    plt.show()
    decision_boundaries(classifier, X_train, y_train, 499,
                        classifier.D_of_last_iteration)
    plt.title(f'un-normalized weighed sample T=500, noise={noise_ratio}')
    plt.show()
    decision_boundaries(
        classifier, X_train, y_train, 499, classifier.D_of_last_iteration /
        np.max(classifier.D_of_last_iteration) * 100)
    plt.title(f'normalized weighed sample T=500, noise={noise_ratio}')
    plt.show()
Exemple #6
0
def Q17():
    train_images, test_images, train_labels, test_labels = load_images(
        '../Docs/')
    train_images = integral_image(train_images)
    test_images = integral_image(test_images)
    WL, T = WeakImageClassifier, 50
    ada = AdaBoost(WL, T)
    ada.train(train_images, train_labels)
    T_range = np.arange(1, T)
    train_errs = [ada.error(train_images, train_labels, t) for t in T_range]
    test_errs = [ada.error(test_images, test_labels, t) for t in T_range]

    fig = plt.figure()
    fig.suptitle("Train vs Test error, Face Classifier")
    plt.xlabel('# of Hypotheses (T)')
    plt.ylabel('Error rate (%)')
    plt.plot(T_range, train_errs, label='Train Error')
    plt.plot(T_range, test_errs, label='Test Error')
    # plt.ylim(top=0.06)
    plt.legend()
    plt.savefig(FIG_DIR3 + 'q17')
    'TODO complete this function'
Exemple #7
0
def Q9():
    X, y = generate_data(300, 0)
    h = AdaBoost(DecisionStump, 500)
    h.train(X, y)
    err = [0] * len(T)
    f = plt.figure(figsize=(10, 10))
    for i, t in enumerate(T):
        f.add_subplot(3, 2, i + 1)
        err[i] = h.error(X, y, t)
        decision_boundaries(h, X, y, t)
    plt.savefig('Q9')
    plt.show()
    return np.array(err)
Exemple #8
0
def Q8(noise=0.0):
    n_samples_train, n_samples_test, T = 5000, 200, 500
    train_X, train_y = generate_data(n_samples_train, noise)
    test_X, test_y = generate_data(n_samples_test, noise)
    WL = DecisionStump
    ada = AdaBoost(WL, T)
    ada.train(train_X, train_y)
    T_range = np.arange(1, T)
    train_errs = [ada.error(train_X, train_y, t) for t in T_range]
    test_errs = [ada.error(test_X, test_y, t) for t in T_range]

    fig = plt.figure()
    fig.suptitle("Train vs Test error, Adaboost")
    plt.xlabel('# of Hypotheses (T)')
    plt.ylabel('Error rate (%)')
    plt.plot(T_range, train_errs, label='Train Error')
    plt.plot(T_range, test_errs, label='Test Error')
    # plt.ylim(top=0.06)
    plt.legend()
    plt.savefig(FIG_DIR3 + 'q8' +
                ('' if noise == 0 else '_' + str(noise).replace('.', '_')))

    return ada, test_X, test_y, train_X, train_y
    'TODO complete this function'
Exemple #9
0
    return np.loadtxt(_get_file_path('X_' + name)), np.loadtxt(
        _get_file_path('y_' + name))


if __name__ == '__main__':
    X_train, y_train = _load_data('train')
    X_val, y_val = _load_data('val')

    T_values = range(5, 200, 5)
    validation_error = []
    training_error = []

    for t in T_values:
        ada_boost = AdaBoost(DecisionStump, t)
        ada_boost.train(X_train, y_train)
        validation_error.append(ada_boost.error(X_val, y_val))
        training_error.append(ada_boost.error(X_train, y_train))

    training_error_plot, = plot(T_values,
                                training_error,
                                linestyle='--',
                                label='training_error')
    validation_error_plot, = plot(T_values,
                                  validation_error,
                                  linestyle='--',
                                  label='validation_error')

    legend(handles=[training_error_plot, validation_error_plot])

    title('training and validation error vs T values')
    xlabel('T values')
class ex5:
    def __init__(self):
        self.mean = [0, 0]
        self.cov = np.eye(2)
        self.svm = SVC(C=1e10, kernel='linear')
        self.perceptron = None
        self.a_boost = None
        self.svm_accs = []
        self.perceptrone_accs = []
        self.ms = [5, 10, 15, 25, 70]
        self.ts = [5, 10, 50, 100, 200, 500]

    def q_3_4_5(self):
        for m in self.ms:
            self.calculate_for_m(m)
        plt.plot(self.ms, self.perceptrone_accs)
        plt.plot(self.ms, self.svm_accs)
        plt.legend(("perceptron", "svd"))
        plt.show()

    def calculate_for_m(self, m):
        x = np.random.multivariate_normal(self.mean, self.cov, m)
        real_labels = self.get_real_labels(x)
        labeled_1_x, labeled_min_1_x = self.get_x_by_labels(x, real_labels)
        t = np.arange(int(x.min()) - 1, int(x.max()) + 1, 0.1)
        self.plt_xs(labeled_1_x, labeled_min_1_x, t)
        self.perceptron = Perceptron()
        perc_w = self.perceptron.fit(x, real_labels)
        plt.plot(t, self.get_y(perc_w[:-1], perc_w[-1], t))
        self.svm.fit(x, real_labels)
        plt.plot(t, self.get_y(self.svm._get_coef()[0], self.svm.intercept_,
                               t))
        plt.legend(["True labels", "perceptron", "svm"])
        plt.show()
        self.calculate_svm_perc_acc()

    def get_real_labels(self, x):
        labels = []
        for j in x:
            labels.append(self.f(j))
        return labels

    def get_x_by_labels(self, x, labels):
        x_1, x_minus_1 = [], []
        for i in range(len(x)):
            if labels[i] == 1.0:
                x_1.append(x[i])
            elif labels[i] == -1.0:
                x_minus_1.append(x[i])
            else:
                pass
        return x_1, x_minus_1

    def f(self, x):
        return np.sign(np.dot([0.3, -0.5], x) + 0.1)

    def plt_xs(self, labeled_1_x, labeled_min_1_x, t):
        plt.scatter([x[0] for x in labeled_1_x], [x[1] for x in labeled_1_x])
        plt.scatter([x[0] for x in labeled_min_1_x],
                    [x[1] for x in labeled_min_1_x])
        plt.plot(t, self.get_y([0.3, -0.5], 0.1, t))

    def get_y(self, w, b, x):
        y = []
        for i in x:
            y.append(-w[0] * i / w[1] + b / -w[1])
        return y

    def calculate_svm_perc_acc(self):
        s, p = self.get_svm_prec_acc()
        self.perceptrone_accs.append(p / 500)
        self.svm_accs.append(s / 500)

    def get_svm_prec_acc(self):
        svm_acc, perceptrone_acc = 0, 0
        for i in range(500):
            x = np.random.multivariate_normal(self.mean, self.cov, 10000)
            real_labels = []
            for j in x:
                real_labels.append(self.f(j))
            svm_acc += self.svm.score(x, real_labels)
            perceptrone_acc += self.perceptron.score(x, real_labels)
        return svm_acc, perceptrone_acc

    def q_7_8_9_10(self):
        self.q_8()
        self.q_9()
        self.q_10()

    def q_8(self):
        tx, ty = ex4_tools.generate_data(5000, noise_ratio=0)
        x, y = ex4_tools.generate_data(200, noise_ratio=0)
        self.a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=500)
        self.a_boost.train(tx, ty)
        training_errs, test_errs = self.get_ab_errs(tx, ty, x, y)
        self.plt_q_8(training_errs, test_errs)

    def get_ab_errs(self, tx, ty, x, y):
        training_errs, test_errs = [], []
        for i in range(500):
            training_errs.append(self.a_boost.error(tx, ty, i))
            test_errs.append(self.a_boost.error(x, y, i))
        return training_errs, test_errs

    def plt_q_8(self, training_errs, test_errs):
        plt.plot(np.arange(500), training_errs, label="training error")
        plt.plot(np.arange(500), test_errs, label="test error")
        plt.title("Adaboost errors as function of (T)")
        plt.legend()
        plt.show()

    def q_9(self):
        tx, ty = ex4_tools.generate_data(5000, noise_ratio=0)
        x, y = ex4_tools.generate_data(200, noise_ratio=0)
        i = 1
        for t in self.ts:
            a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t)
            a_boost.train(tx, ty)
            plt.subplot(2, 3, i)
            ex4_tools.decision_boundaries(a_boost, x, y, t)
            i += 1
        plt.show()

    def q_10(self):
        tx, ty = ex4_tools.generate_data(5000, noise_ratio=0)
        x, y = ex4_tools.generate_data(200, noise_ratio=0)
        errors = self.get_ab_errors(tx, ty, x, y)
        min_t = np.argmin(errors)
        a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=self.ts[min_t])
        a_boost.train(tx, ty)
        ex4_tools.decision_boundaries(a_boost, tx, ty, self.ts[min_t])
        plt.title("min error is " + str(errors[min_t]) + " with " +
                  str(self.ts[min_t]) + " classifiers")
        plt.show()

    def get_ab_errors(self, tx, ty, x, y):
        errors = []
        for t in self.ts:
            a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t)
            a_boost.train(tx, ty)
            errors.append(a_boost.error(x, y, t))
        return errors