Exemple #1
0
def main():
    for file in [
            'data/breast-cancer-assignment5.txt', 'data/german-assignment5.txt'
    ]:
        data, labels, types = load_matrix_from_txt(file)
        splices = k_fold_split(10, data, labels)
        accuracies = []

        for i in range(10):
            train_indexes = splices[i][0]
            test_indexes = splices[i][1]

            train_data = np.copy(data[train_indexes])
            train_label = np.copy(labels[train_indexes])
            test_data = np.copy(data[test_indexes])
            test_label = np.copy(labels[test_indexes])

            boost = AdaBoost()
            boost.train(train_data, train_label, types)
            class_result = boost.test(test_data)

            accuracy = compute_accuracy(class_result, test_label)
            accuracies.append(accuracy)
            print 'accuracy: %f' % accuracy

        print('file: {}, mean: {}, std: {}'.format(file, np.mean(accuracies),
                                                   np.std(accuracies)))
 def get_ab_errors(self, tx, ty, x, y):
     errors = []
     for t in self.ts:
         a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t)
         a_boost.train(tx, ty)
         errors.append(a_boost.error(x, y, t))
     return errors
 def q_9(self):
     tx, ty = ex4_tools.generate_data(5000, noise_ratio=0)
     x, y = ex4_tools.generate_data(200, noise_ratio=0)
     i = 1
     for t in self.ts:
         a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t)
         a_boost.train(tx, ty)
         plt.subplot(2, 3, i)
         ex4_tools.decision_boundaries(a_boost, x, y, t)
         i += 1
     plt.show()
 def q_10(self):
     tx, ty = ex4_tools.generate_data(5000, noise_ratio=0)
     x, y = ex4_tools.generate_data(200, noise_ratio=0)
     errors = self.get_ab_errors(tx, ty, x, y)
     min_t = np.argmin(errors)
     a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=self.ts[min_t])
     a_boost.train(tx, ty)
     ex4_tools.decision_boundaries(a_boost, tx, ty, self.ts[min_t])
     plt.title("min error is " + str(errors[min_t]) + " with " +
               str(self.ts[min_t]) + " classifiers")
     plt.show()
Exemple #5
0
def Q10():
    X, y = generate_data(1000, 0)
    T = [5, 10, 50, 100, 200, 500]
    i = int(np.argmin(Q9()))
    T_min = T[i]
    optimal_h = AdaBoost(DecisionStump, T_min)
    optimal_h.train(X, y)
    decision_boundaries(optimal_h, X, y, T_min)
    plt.title('Descision for T=500 that minimizing the test err')
    plt.savefig('Q10')
    plt.show()
Exemple #6
0
def Q9():
    X, y = generate_data(300, 0)
    h = AdaBoost(DecisionStump, 500)
    h.train(X, y)
    err = [0] * len(T)
    f = plt.figure(figsize=(10, 10))
    for i, t in enumerate(T):
        f.add_subplot(3, 2, i + 1)
        err[i] = h.error(X, y, t)
        decision_boundaries(h, X, y, t)
    plt.savefig('Q9')
    plt.show()
    return np.array(err)
Exemple #7
0
def Q3(): # AdaBoost
    T = [1,5,10,50,100,200]
    T_loop = [1,5,10]
    train_err = []
    valid_err = []

    plt.figure("decisions of the learned classifiers for T")
    num_graph = 0
    for i in range(3,41):
        T_loop.append(i*5)

    for t in T_loop:
        ada_boost = AdaBoost(DecisionStump, t)
        ada_boost.train(x_train, y_train)
        if (t in T):
            num_graph += 1
            plt.subplot(3,2, num_graph)
            decision_boundaries(ada_boost, x_train, y_train, "T = %d" %t)

        train_err.append(ada_boost.error(x_train, y_train))
        valid_err.append(ada_boost.error(x_val, y_val))

    plt.figure("training error and the validation error")
    plt.plot(T_loop, train_err, 'ro-', hold=False, label= "Training Error")
    plt.plot(T_loop, valid_err, 'go-', label= "Validation Error")
    plt.legend()
    plt.show()

    '''
    find the T min, and plot it with training error
    '''

    plt.figure("decision boundaries of T min, with the training data")

    T_hat = 5 * np.argmin(valid_err)
    ada_boost = AdaBoost(DecisionStump, T_hat)
    ada_boost.train(x_train, y_train)
    test_err = ada_boost.error(x_test, y_test)
    decision_boundaries(ada_boost, x_train, y_train, "T = %d" %T_hat)
    plt.show()
    print ("The value of T that minimizes the validation error is: ", T_hat)
    print("the test error of the corresponding classifier is: ", test_err)


    return
Exemple #8
0
def Q8():
    X, y = generate_data(5000, 0)
    h = AdaBoost(DecisionStump, 500)
    h.train(X, y)
    training_err = np.zeros((500, ))
    test_err = np.zeros((500, ))
    test_set, labels = generate_data(200, 0)
    for t in range(1, 501):
        training_err[t - 1] = h.error(X, y, t)
        test_err[t - 1] = h.error(test_set, labels, t)
    plt.plot(range(500), training_err, label='Training error')
    plt.plot(range(500), test_err, label='Test error')
    plt.title('question 8')
    plt.legend(loc='upper right')
    plt.xlabel('T')
    plt.ylabel('Error rate')
    plt.savefig('Q8')
    plt.show()
Exemple #9
0
def Q3():  # AdaBoost
    path = "/cs/usr/kotek/PycharmProjects/iml_ex4/SynData/"
    X_train, y_train = read_from_txt(path + "X_train.txt",
                                     path + "y_train.txt")
    X_val, y_val = read_from_txt(path + "X_val.txt", path + "y_val.txt")
    X_test, y_test = read_from_txt(path + "X_test.txt", path + "y_test.txt")

    # -------- First part --------
    T = np.arange(5, 105, step=5)
    T = np.append(T, np.array([200]))

    training_err = np.zeros(len(T))
    validation_err = np.zeros(len(T))

    # adaBoost uses a weighted trainer (WL)
    WL = ex4_tools.DecisionStump
    for i in range(len(T)):
        adaboost = AdaBoost(WL, T[i])
        adaboost.train(X_train, y_train)
        training_err[i] = adaboost.error(X_train, y_train)
        validation_err[i] = adaboost.error(X_val, y_val)

    plt.plot(T, training_err, label="train error")
    plt.plot(T, validation_err, label="validation error")
    plt.legend()
    plt.show()
    # ------------------------

    # # -------- Second part --------
    decision_T = [1, 5, 10, 100, 200]

    plt.figure()
    plt.ion()
    for idx, t in enumerate(decision_T):
        adaboost = AdaBoost(WL, t)
        adaboost.train(X_train, y_train)
        plt.subplot(2, 3, idx + 1)
        ex4_tools.decision_boundaries(adaboost, X_train, y_train,
                                      "T=" + str(t))
    plt.show()
    plt.pause(5)
Exemple #10
0
def Q_adaboost(noise_ratio):
    X_train, y_train = generate_data(5000, noise_ratio)
    classifier = AdaBoost(DecisionStump, 500)
    classifier.train(X_train, y_train)
    X_test, y_test = generate_data(200, noise_ratio)
    vals = np.arange(1, 501)
    plt.plot(vals, [classifier.error(X_train, y_train, t) for t in vals],
             label='Training Error',
             lw=1,
             alpha=0.6)
    plt.plot(vals, [classifier.error(X_test, y_test, t) for t in vals],
             label='Test Error',
             lw=1,
             alpha=0.6)
    plt.legend()
    plt.title(
        f'Adaboost Training & Test Error according to T, noise={noise_ratio}')
    plt.show()
    boosts = [5, 10, 50, 100, 200, 500]
    for i in range(6):
        plt.subplot(2, 3, i + 1)
        decision_boundaries(classifier, X_test, y_test, boosts[i])
        plt.title(f'T={boosts[i]}, noise={noise_ratio}')
    plt.show()
    test_errors = [classifier.error(X_test, y_test, t) for t in vals]
    min_t = np.argmin(test_errors)
    min_err = test_errors[min_t]
    # print(min_t, min_err)
    decision_boundaries(classifier, X_train, y_train, min_t)
    plt.title(f'min test_err {min_err} T={min_t} noise {noise_ratio}')
    plt.show()
    decision_boundaries(classifier, X_train, y_train, 499,
                        classifier.D_of_last_iteration)
    plt.title(f'un-normalized weighed sample T=500, noise={noise_ratio}')
    plt.show()
    decision_boundaries(
        classifier, X_train, y_train, 499, classifier.D_of_last_iteration /
        np.max(classifier.D_of_last_iteration) * 100)
    plt.title(f'normalized weighed sample T=500, noise={noise_ratio}')
    plt.show()
Exemple #11
0
def Q17():
    train_images, test_images, train_labels, test_labels = load_images(
        '../Docs/')
    train_images = integral_image(train_images)
    test_images = integral_image(test_images)
    WL, T = WeakImageClassifier, 50
    ada = AdaBoost(WL, T)
    ada.train(train_images, train_labels)
    T_range = np.arange(1, T)
    train_errs = [ada.error(train_images, train_labels, t) for t in T_range]
    test_errs = [ada.error(test_images, test_labels, t) for t in T_range]

    fig = plt.figure()
    fig.suptitle("Train vs Test error, Face Classifier")
    plt.xlabel('# of Hypotheses (T)')
    plt.ylabel('Error rate (%)')
    plt.plot(T_range, train_errs, label='Train Error')
    plt.plot(T_range, test_errs, label='Test Error')
    # plt.ylim(top=0.06)
    plt.legend()
    plt.savefig(FIG_DIR3 + 'q17')
    'TODO complete this function'
Exemple #12
0
def crossValidateAdaboost(inputFile, outputFile, nIterations):
    ticTacToe = TicTacToe(inputFile)
    avgEin = np.zeros(nIterations)
    avgEout = np.zeros(nIterations)

    for k in range(ticTacToe.N_FOLDS):
        ticTacToe.createTrainAndTestSets(k)
        adaboost = AdaBoost(ticTacToe)
        Ein, Eout = adaboost.train(ticTacToe, nIterations)
        avgEin = np.sum([avgEin, Ein], axis=0)
        avgEout = np.sum([avgEout, Eout], axis=0)
        print('--------------------------------------')

    return avgEin / ticTacToe.N_FOLDS, avgEout / ticTacToe.N_FOLDS
Exemple #13
0
def Q8(noise=0.0):
    n_samples_train, n_samples_test, T = 5000, 200, 500
    train_X, train_y = generate_data(n_samples_train, noise)
    test_X, test_y = generate_data(n_samples_test, noise)
    WL = DecisionStump
    ada = AdaBoost(WL, T)
    ada.train(train_X, train_y)
    T_range = np.arange(1, T)
    train_errs = [ada.error(train_X, train_y, t) for t in T_range]
    test_errs = [ada.error(test_X, test_y, t) for t in T_range]

    fig = plt.figure()
    fig.suptitle("Train vs Test error, Adaboost")
    plt.xlabel('# of Hypotheses (T)')
    plt.ylabel('Error rate (%)')
    plt.plot(T_range, train_errs, label='Train Error')
    plt.plot(T_range, test_errs, label='Test Error')
    # plt.ylim(top=0.06)
    plt.legend()
    plt.savefig(FIG_DIR3 + 'q8' +
                ('' if noise == 0 else '_' + str(noise).replace('.', '_')))

    return ada, test_X, test_y, train_X, train_y
    'TODO complete this function'
Exemple #14
0
def _load_data(name):
    return np.loadtxt(_get_file_path('X_' + name)), np.loadtxt(
        _get_file_path('y_' + name))


if __name__ == '__main__':
    X_train, y_train = _load_data('train')
    X_val, y_val = _load_data('val')

    T_values = range(5, 200, 5)
    validation_error = []
    training_error = []

    for t in T_values:
        ada_boost = AdaBoost(DecisionStump, t)
        ada_boost.train(X_train, y_train)
        validation_error.append(ada_boost.error(X_val, y_val))
        training_error.append(ada_boost.error(X_train, y_train))

    training_error_plot, = plot(T_values,
                                training_error,
                                linestyle='--',
                                label='training_error')
    validation_error_plot, = plot(T_values,
                                  validation_error,
                                  linestyle='--',
                                  label='validation_error')

    legend(handles=[training_error_plot, validation_error_plot])

    title('training and validation error vs T values')
Exemple #15
0
        if myForest.isTrained:
            Xtest,yTest,XtestID = myForest.getDataFromFile(train_test_file)
            finalPredictions = myForest.predict(Xtest)
            myForest.writeToFile(XtestID,finalPredictions,'output.txt')
            print("Accuracy is: " ,sum(finalPredictions==yTest)/len(yTest))
        else:
            print("Untrained model being tested")
  
#train train-data.txt adaboost_model.txt adaboost
#test test-data.txt adaboost_model.txt adaboost    
if model == 'adaboost' :
    
    if trainOrTest == 'train':
        myBoost = AdaBoost(300,verbose = False)
        TrainX,TrainY,TrainXID = myBoost.getDataFromFile(train_test_file)
        myBoost.train(TrainX,TrainY)
        pk.dump(myBoost,open(model_file,'wb'))
        
    if trainOrTest == 'test':
        try:
            myBoost = pk.load(open(model_file,'rb'))
        except:
            print("output file has not been generated")
        
        if myBoost.isTrained:
            Xtest,yTest,XtestID = myBoost.getDataFromFile(train_test_file)
            finalPredictions = myBoost.predict(Xtest)
            myBoost.writeToFile(XtestID,finalPredictions,'output.txt')
            print("Accuracy is: " ,sum(finalPredictions==yTest)/len(yTest))
        else:
            print("Untrained model being tested")
        [+1],
        [+1],
        [+1],
    ]
).transpose()

Tag = Tag.flatten()

for i in range(len(Tag)):
    if Tag[i] == 1:
        pyplot.plot(Original_Data[0][i], Original_Data[1][i], "+r", markersize=10)
    else:
        pyplot.plot(Original_Data[0][i], Original_Data[1][i], "+b", markersize=10)


a = AdaBoost(Original_Data, Tag)

a.train(100)

TestCase = [[0.55, 1.1, 5.35], [4.4, 2.8, 0.9]]

output = a.prediction(TestCase)

for i in range(len(output)):
    if output[i] == 1:
        pyplot.plot(TestCase[0][i], TestCase[1][i], "or", markersize=20)
    else:
        pyplot.plot(TestCase[0][i], TestCase[1][i], "ob", markersize=20)

pyplot.show()
Exemple #17
0
                           algorithm='SAMME',
                           n_estimators=no_base_classifiers,
                           learning_rate=1.0)

## CV
kf = KFold(n_splits=no_folds)
cv_acc_arr = []
cv_sk_acc_arr = []
i = 0
for train_ind, test_ind in kf.split(X_train):
    print("cross split no", i)
    x_tr, x_te = X_train.copy()[train_ind], X_train.copy()[test_ind]
    y_tr, y_te = y_train.copy()[train_ind], y_train.copy()[test_ind]

    f.init(x_tr, y_tr)
    f.train(no_base_classifiers)
    y_predict = f.predict(x_te)
    accuracy = np.mean(y_predict == y_te)
    cv_acc_arr.append(accuracy)

    ## comparing sklearn implementation of boost
    boost.fit(x_tr, y_tr)
    y_pred = boost.predict(x_te)
    accuracy_sk = np.mean(y_pred == y_te)
    cv_sk_acc_arr.append(accuracy_sk)

    i += 1

print(np.mean(cv_acc_arr))
print(np.mean(cv_sk_acc_arr))
Exemple #18
0
import numpy
from adaboost import AdaBoost

Original_Data = numpy.array([[0], [1], [2], [3], [4], [5], [6], [7], [8],
                             [9]]).transpose()

Tag = numpy.array([
    [+1],
    [+1],
    [+1],
    [-1],
    [-1],
    [-1],
    [+1],
    [+1],
    [+1],
    [-1],
]).transpose()

Tag = Tag.flatten()

a = AdaBoost(Original_Data, Tag)

a.train(5)
Exemple #19
0
    This is the main of the AdaBoost algorithm.
    It contains a raw data of 10 point from 2 class.
"""

from adaboost import AdaBoost
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

data = pd.DataFrame(np.array([[88, 144, 1], [93, 232, 1], [136, 275, -1],
                              [147, 131, -1], [159, 69, 1], [214, 31, 1],
                              [214, 152, -1], [257, 83, 1], [307, 62, -1],
                              [307, 231, -1]]),
                    columns=["x", "y", "label"])


def display():
    f1 = plt.figure(1)
    positive = data[data["label"] == 1]
    negative = data[data["label"] == -1]
    plt.scatter(positive.iloc[:, 0], positive.iloc[:, 1], c="red", marker="+")
    plt.scatter(negative.iloc[:, 0], negative.iloc[:, 1], c="green")
    plt.show()


if __name__ == '__main__':
    m_ada = AdaBoost(data, 5)
    display()
    m_ada.train()
    m_ada.display()
Exemple #20
0
    cm_bright = ListedColormap(['#FF0000', '#0000FF'])

    #Get current axis and plot
    if ax is None:
        ax = plt.gca()
    ax.contourf(xx, yy, Z, 2, cmap='RdBu', alpha=.5)
    ax.contour(xx, yy, Z, 2, cmap='RdBu')
    ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright, s=scatter_weights * 40)
    ax.set_xlabel('$X_1$')
    ax.set_ylabel('$X_2$')


boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(
    max_depth=1, max_leaf_nodes=2),
                           algorithm='SAMME',
                           n_estimators=10,
                           learning_rate=1.0)
boost.fit(X, y)
# plot_decision_boundary(boost, X,y, N = 50)#, weights)
# plt.show()

print(boost.score(X, y))
###
h = DecisionTreeClassifier(max_depth=1, max_leaf_nodes=2)
f = AdaBoost(h)
f.init(X, y)
f.train(10)
y_pred = boost.predict(X)
accuracy = np.mean(y_pred == y)
print(accuracy)
Exemple #21
0
[1],
[2],
[3],
[4],
[5],
[6],
[7],
[8],
[9]
]).transpose()

Tag = numpy.array([
[+1],
[+1],
[+1],
[-1],
[-1],
[-1],
[+1],
[+1],
[+1],
[-1],
]).transpose()

Tag = Tag.flatten()

a = AdaBoost(Original_Data, Tag)

a.train(5)

import numpy as np
from adaboost import AdaBoost
X=np.array([i for i in range(0,10)]).reshape(1,10)
Y=[1,1,1,-1,-1,-1,1,1,1,-1]
ada = AdaBoost(X,Y)
ada.train(4)
print ada.pred(X) == np.array(Y)
Exemple #23
0
        from mapReduce import reduce

        map(Face, nonFace)
        _mat = reduce()

mat = _mat

featureNum, sampleNum = _mat.shape

assert sampleNum  == (POSITIVE_SAMPLE + NEGATIVE_SAMPLE)
assert featureNum == FEATURE_NUM

Label_Face    = [+1 for i in xrange(POSITIVE_SAMPLE)]
Label_NonFace = [-1 for i in xrange(NEGATIVE_SAMPLE)]

label = numpy.array(Label_Face + Label_NonFace)

cache_filename = ADABOOST_CACHE_FILE + str(0)

if os.path.isfile(cache_filename):
    model = getCachedAdaBoost(mat     = _mat,
                              label   = label,
                              filename= cache_filename,
                              limit   = ADABOOST_LIMIT)
else:
    model = AdaBoost(mat, label, limit = ADABOOST_LIMIT)
    model.train()
    model.saveModel(cache_filename)

print model
Exemple #24
0
        if train_or_test == "train":
            KNN.knn_training(input_data_file, model_file)
        elif train_or_test == "test":
            KNN.knn_testing(model_file, input_data_file)
    elif model in ["tree", "best"]:
        if train_or_test == "train":
            # Train model
            data_vector, all_image_ids, images_counter = parse_image_data(
                file_path=input_data_file)
            trained_decision_tree = AdaBoost(
                images_data_vector=data_vector,
                all_images_ids=all_image_ids,
                images_counter=images_counter,
                decision_stumps=30,
            )
            trained_decision_tree.train()
            save_model_to_pickle(obj=trained_decision_tree,
                                 file_name="tree.pkl")
            save_model_to_txt(
                obj=trained_decision_tree,
                file_name="tree_model.txt",
                model="adaboost_decision_tree",
            )

            # Test after train
            trained_decision_tree.test(test_file_path="test_file.txt")
        else:
            # Test
            trained_decision_tree = load_model_from_pickle(
                file_name="tree.pkl")
            trained_decision_tree.test(test_file_path="test_file.txt")
Exemple #25
0
        #if i % 150 == 0:
        haarGroup = []
        for j in range(i * SAMPLE_NUM, (i + 1) * SAMPLE_NUM):
            haarGroup.append(float(tmp[j]))

        Original_Data.append(haarGroup)

    Original_Data = numpy.array(Original_Data)

fileObj.close()

SampleDem = Original_Data.shape[0]
SampleNum = Original_Data.shape[1]

assert SampleNum == (POSITIVE_SAMPLE + NEGATIVE_SAMPLE)

Label_Face = [+1 for i in range(POSITIVE_SAMPLE)]
Label_NonFace = [-1 for i in range(NEGATIVE_SAMPLE)]

Label = numpy.array(Label_Face + Label_NonFace)

a = AdaBoost(Original_Data, Label)

try:
    a.train(200)

except KeyboardInterrupt:
    print "You pressed interrupt key. Training process interrupt."

saveModel(a)
Exemple #26
0
                   [+1], [+1], [+1], [-1], [-1], [-1], [-1], [-1], [-1], [-1],
                   [-1], [-1], [-1], [+1], [+1], [+1]]).transpose()

Tag = Tag.flatten()

for i in range(len(Tag)):
    if Tag[i] == 1:
        pyplot.plot(Original_Data[0][i], Original_Data[1][i], \
                    '+r', markersize = 10)
    else:
        pyplot.plot(Original_Data[0][i], Original_Data[1][i], \
                    '+b', markersize = 10)

a = AdaBoost(Original_Data, Tag)

a.train(100)

TestCase = [[0.55, 1.1, 5.35, 7.0, 8.5, -1.0, 3.0, 3.0, 4.0, 2, 3],
            [4.4, 2.8, 0.9, -12, -13, -9, -10, -9, -5, 0, 2.5]]

output = a.prediction(TestCase)

for i in range(len(output)):
    if output[i] == 1:
        pyplot.plot(TestCase[0][i], TestCase[1][i], \
                    'or', markersize = 20)
    else:
        pyplot.plot(TestCase[0][i], TestCase[1][i], \
                    'ob', markersize = 20)

pyplot.show()
Exemple #27
0
        for j in range(i * SAMPLE_NUM , (i+1) * SAMPLE_NUM):
            haarGroup.append(float(tmp[j]))

        Original_Data.append(haarGroup)

    Original_Data = numpy.array(Original_Data)


fileObj.close()

SampleDem = Original_Data.shape[0]
SampleNum = Original_Data.shape[1]

assert SampleNum == (POSITIVE_SAMPLE + NEGATIVE_SAMPLE)

Label_Face    = [+1 for i in range(POSITIVE_SAMPLE)]
Label_NonFace = [-1 for i in range(NEGATIVE_SAMPLE)]

Label = numpy.array(Label_Face + Label_NonFace)

a = AdaBoost(Original_Data, Label)

try:
    a.train(200)

except KeyboardInterrupt:
    print "You pressed interrupt key. Training process interrupt."

saveModel(a)

Exemple #28
0
import numpy as np
from adaboost import AdaBoost, AdaBoostTextbook
from utils import Dataset


def test(model, dataset, name):
    X_test, y_test = dataset.get_dataset()
    pred = np.array([model.predict(x) for x in X_test])
    accuracy = (y_test == pred).sum() / y_test.size
    print(f'{name} version accuracy: {accuracy:.1f}')


if __name__ == '__main__':
    dataset = Dataset('./training-data.txt')
    test_dataset = Dataset('./testing-data.txt')

    model = AdaBoost(9)
    model.train(dataset)
    accuracy = test(model, test_dataset, 'Original')

    model_tb = AdaBoostTextbook(9)
    model_tb.train(dataset, 0.2, 2)
    accuracy_tb = test(model_tb, test_dataset, 'Textbook')
Exemple #29
0
Just Enjoy it.
"""

import numpy
import matplotlib.pyplot as pyplot
from adaboost import AdaBoost
from sklearn import datasets

"""
Samples for AdaBoost
"""
Original_Data, Tag = datasets.make_hastie_10_2(n_samples    = 200, 
                                              random_state  = 1)
Original_Data = Original_Data.transpose()


for i in range(len(Tag)):
    if Tag[i] == 1:
        pyplot.plot(Original_Data[0][i], Original_Data[1][i], \
                    '+r', markersize = 10)
    else:
        pyplot.plot(Original_Data[0][i], Original_Data[1][i], \
                    '+b', markersize = 10)
pyplot.title("Sample Points")
pyplot.show()

a = AdaBoost(Original_Data, Tag)

a.train(10000)
class ex5:
    def __init__(self):
        self.mean = [0, 0]
        self.cov = np.eye(2)
        self.svm = SVC(C=1e10, kernel='linear')
        self.perceptron = None
        self.a_boost = None
        self.svm_accs = []
        self.perceptrone_accs = []
        self.ms = [5, 10, 15, 25, 70]
        self.ts = [5, 10, 50, 100, 200, 500]

    def q_3_4_5(self):
        for m in self.ms:
            self.calculate_for_m(m)
        plt.plot(self.ms, self.perceptrone_accs)
        plt.plot(self.ms, self.svm_accs)
        plt.legend(("perceptron", "svd"))
        plt.show()

    def calculate_for_m(self, m):
        x = np.random.multivariate_normal(self.mean, self.cov, m)
        real_labels = self.get_real_labels(x)
        labeled_1_x, labeled_min_1_x = self.get_x_by_labels(x, real_labels)
        t = np.arange(int(x.min()) - 1, int(x.max()) + 1, 0.1)
        self.plt_xs(labeled_1_x, labeled_min_1_x, t)
        self.perceptron = Perceptron()
        perc_w = self.perceptron.fit(x, real_labels)
        plt.plot(t, self.get_y(perc_w[:-1], perc_w[-1], t))
        self.svm.fit(x, real_labels)
        plt.plot(t, self.get_y(self.svm._get_coef()[0], self.svm.intercept_,
                               t))
        plt.legend(["True labels", "perceptron", "svm"])
        plt.show()
        self.calculate_svm_perc_acc()

    def get_real_labels(self, x):
        labels = []
        for j in x:
            labels.append(self.f(j))
        return labels

    def get_x_by_labels(self, x, labels):
        x_1, x_minus_1 = [], []
        for i in range(len(x)):
            if labels[i] == 1.0:
                x_1.append(x[i])
            elif labels[i] == -1.0:
                x_minus_1.append(x[i])
            else:
                pass
        return x_1, x_minus_1

    def f(self, x):
        return np.sign(np.dot([0.3, -0.5], x) + 0.1)

    def plt_xs(self, labeled_1_x, labeled_min_1_x, t):
        plt.scatter([x[0] for x in labeled_1_x], [x[1] for x in labeled_1_x])
        plt.scatter([x[0] for x in labeled_min_1_x],
                    [x[1] for x in labeled_min_1_x])
        plt.plot(t, self.get_y([0.3, -0.5], 0.1, t))

    def get_y(self, w, b, x):
        y = []
        for i in x:
            y.append(-w[0] * i / w[1] + b / -w[1])
        return y

    def calculate_svm_perc_acc(self):
        s, p = self.get_svm_prec_acc()
        self.perceptrone_accs.append(p / 500)
        self.svm_accs.append(s / 500)

    def get_svm_prec_acc(self):
        svm_acc, perceptrone_acc = 0, 0
        for i in range(500):
            x = np.random.multivariate_normal(self.mean, self.cov, 10000)
            real_labels = []
            for j in x:
                real_labels.append(self.f(j))
            svm_acc += self.svm.score(x, real_labels)
            perceptrone_acc += self.perceptron.score(x, real_labels)
        return svm_acc, perceptrone_acc

    def q_7_8_9_10(self):
        self.q_8()
        self.q_9()
        self.q_10()

    def q_8(self):
        tx, ty = ex4_tools.generate_data(5000, noise_ratio=0)
        x, y = ex4_tools.generate_data(200, noise_ratio=0)
        self.a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=500)
        self.a_boost.train(tx, ty)
        training_errs, test_errs = self.get_ab_errs(tx, ty, x, y)
        self.plt_q_8(training_errs, test_errs)

    def get_ab_errs(self, tx, ty, x, y):
        training_errs, test_errs = [], []
        for i in range(500):
            training_errs.append(self.a_boost.error(tx, ty, i))
            test_errs.append(self.a_boost.error(x, y, i))
        return training_errs, test_errs

    def plt_q_8(self, training_errs, test_errs):
        plt.plot(np.arange(500), training_errs, label="training error")
        plt.plot(np.arange(500), test_errs, label="test error")
        plt.title("Adaboost errors as function of (T)")
        plt.legend()
        plt.show()

    def q_9(self):
        tx, ty = ex4_tools.generate_data(5000, noise_ratio=0)
        x, y = ex4_tools.generate_data(200, noise_ratio=0)
        i = 1
        for t in self.ts:
            a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t)
            a_boost.train(tx, ty)
            plt.subplot(2, 3, i)
            ex4_tools.decision_boundaries(a_boost, x, y, t)
            i += 1
        plt.show()

    def q_10(self):
        tx, ty = ex4_tools.generate_data(5000, noise_ratio=0)
        x, y = ex4_tools.generate_data(200, noise_ratio=0)
        errors = self.get_ab_errors(tx, ty, x, y)
        min_t = np.argmin(errors)
        a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=self.ts[min_t])
        a_boost.train(tx, ty)
        ex4_tools.decision_boundaries(a_boost, tx, ty, self.ts[min_t])
        plt.title("min error is " + str(errors[min_t]) + " with " +
                  str(self.ts[min_t]) + " classifiers")
        plt.show()

    def get_ab_errors(self, tx, ty, x, y):
        errors = []
        for t in self.ts:
            a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t)
            a_boost.train(tx, ty)
            errors.append(a_boost.error(x, y, t))
        return errors
Exemple #31
0
# encoding=utf-8
# @Author: wendesi
# @Date:   15-11-16
# @Email:  [email protected]
# @Last modified by:   wendesi
# @Last modified time: 15-11-16

import logging

from generate_dataset import *
from adaboost import AdaBoost

from sklearn.metrics import accuracy_score

if __name__ == '__main__':
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)

    train_features, train_labels, test_features, test_labels = generate_dataset(
        200)

    ada = AdaBoost()
    ada.train(train_features, train_labels)

    print 'end train'
    test_predict = ada.predict(test_features)

    score = accuracy_score(test_labels, test_predict)
    print "ada boost the accruacy socre is ", score