Python AdaBoost Exemples, adaboost.AdaBoost Python Exemples

Exemple #1

0

Afficher le fichier

def main():
    for file in [
            'data/breast-cancer-assignment5.txt', 'data/german-assignment5.txt'
    ]:
        data, labels, types = load_matrix_from_txt(file)
        splices = k_fold_split(10, data, labels)
        accuracies = []

        for i in range(10):
            train_indexes = splices[i][0]
            test_indexes = splices[i][1]

            train_data = np.copy(data[train_indexes])
            train_label = np.copy(labels[train_indexes])
            test_data = np.copy(data[test_indexes])
            test_label = np.copy(labels[test_indexes])

            boost = AdaBoost()
            boost.train(train_data, train_label, types)
            class_result = boost.test(test_data)

            accuracy = compute_accuracy(class_result, test_label)
            accuracies.append(accuracy)
            print 'accuracy: %f' % accuracy

        print('file: {}, mean: {}, std: {}'.format(file, np.mean(accuracies),
                                                   np.std(accuracies)))

Exemple #2

0

Afficher le fichier

Fichier : main.py Projet : ChuyuHsu/Adaboost-Stump

def main():
    data = np.loadtxt(open("/Users/rio512hsu/dataset/MachineLearningTechniques" +
                           "/hw2_adaboost_train.csv", "rb"),
                      delimiter=" ")

    X = data[:, :-1]
    y = data[:, -1]
    u = np.ones(X.shape[0]) / X.shape[0]
    clf = DecisionStump().fit(X, y, u)
    # Q12
    print clf.getEin()

    # Q13
    adaboost = AdaBoost(DecisionStump).fit(X, y, 300)
    # print adaboost.predict(X)
    print np.sum(adaboost.predict(X) != y)

    # Q17
    test = np.loadtxt(open("/Users/rio512hsu/dataset/" +
                           "MachineLearningTechniques/" +
                           "hw2_adaboost_test.csv"),
                      delimiter=' ')
    X_test = test[:, :-1]
    y_test = test[:, -1]
    print np.sum(clf.predict(X) != y) / float(test.shape[0])

    # Q18
    print np.sum(adaboost.predict(X_test) != y_test) / float(test.shape[0])

    return 0

Exemple #3

0

Afficher le fichier

Fichier : testCase.py Projet : iBelieveCJM/BuildUpFoundation

def testTitanicCARTAdaBoost():
    print('-' * 30, '\ntestTitanicCARTAdaBoost\n', '-' * 30)
    trd = pd.read_csv('Titanic_dataset/train.csv')
    # drop useless and continue features
    #for i in ["PassengerId", "Name", "Ticket", "Cabin", "Age", "SibSp", "Parch", "Fare"]:
    for i in ["PassengerId", "Name", "Ticket", "Cabin"]:
        trd.pop(i)
    trd = trd.dropna()  # drop nan values
    # convert non-digits to digits
    trd = pd.get_dummies(trd, columns=['Sex'])
    Embarked_map = {
        val: idx
        for idx, val in enumerate(trd['Embarked'].unique())
    }
    trd['Embarked'] = trd['Embarked'].map(Embarked_map)
    if DEBUG: print(trd[:5])
    # create train data
    trdd = trd.sample(frac=0.4)
    # using "Survived" as labels
    trl = trd.pop("Survived")
    trl[trl == 0] = -1
    trll = trdd.pop("Survived")
    trll[trll == 0] = -1
    # training tree
    t = AdaBoost(CART_weight_classifier, 10)
    t.fit(trdd, trll)
    # prediction
    pred = t.predict(trd)
    print('Acc.: ', np.sum(pred == trl.reset_index(drop=True)) / trl.shape[0])

Exemple #4

0

Afficher le fichier

Fichier : ex4_runme.py Projet : linoy-boaron/machine-learning-python

 def q_8(self):
     tx, ty = ex4_tools.generate_data(5000, noise_ratio=0)
     x, y = ex4_tools.generate_data(200, noise_ratio=0)
     self.a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=500)
     self.a_boost.train(tx, ty)
     training_errs, test_errs = self.get_ab_errs(tx, ty, x, y)
     self.plt_q_8(training_errs, test_errs)

Exemple #5

0

Afficher le fichier

Fichier : ex4_runme.py Projet : linoy-boaron/machine-learning-python

 def get_ab_errors(self, tx, ty, x, y):
     errors = []
     for t in self.ts:
         a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t)
         a_boost.train(tx, ty)
         errors.append(a_boost.error(x, y, t))
     return errors

Exemple #6

0

Afficher le fichier

Fichier : adaboost_main.py Projet : goparajug/ADV-ML

def main():
    ''' Load data, split data, creates adaboost algorithm 
        with decision stump, calculates errors, save final file'''
  
    classifier = AdaBoost(DecisionStump)

    num_sets = 50
    T = 100  
    percentage = 0.9 

    all_errors_train = []
    all_errors_test = []    
    aver_error_train = []
    aver_error_test = []


    # split data in the # of datasets
    split_data(percentage, num_sets)


    # run  for all datasets, for boosting interations = T 
    for i in range(num_sets):
        data_split_train = './data/bupa_train' + str(i) + ".txt"
        data_split_test = './data/' + "bupa_test" + str(i) + ".txt"
        X_train, Y_train = load_data(data_split_train)
        X_test, Y_test = load_data(data_split_test)

        score_train, score_test = classifier.run_adaboost(X_train, Y_train, T, X_test)

	    error_train = calculate_error(T, score_train, Y_train)
        error_test = calculate_error(T, score_test, Y_test)
	
	   all_errors_train.append(error_train)
	   all_errors_test.append(error_test)

Exemple #7

0

Afficher le fichier

Fichier : adaboost_test.py Projet : gururaj-shriram/ece548-machine-learning

def adaboost_avg_run_new(max_classes, avg_num_of_run, training_set,
                         testing_set):
    all_error_list = []

    # because datasets sometimes place the class attribute at the end or even
    # at the beginning or the middle, we'll separate the attribute vector from
    # the class-label. also note that this is the way scikit-learn does it.
    # train_x: the attribute vector; train_y: the class_label
    (train_x, train_y) = split_attribute_and_label(training_set)
    (test_x, test_y) = split_attribute_and_label(testing_set)
    # print(len(train_x))
    train_subset_num = int(len(train_y) * 0.2)

    our_ada_training_errors = {}
    our_ada_testing_errors = {}

    # init dict of num classifier to error list
    for i in range(1, max_classes + 1):
        our_ada_training_errors[i] = []
        our_ada_testing_errors[i] = []

    # run ada num_runs times
    for i in range(avg_num_of_run):
        ada_obj = AdaBoost(max_classes, train_subset_num, THRESHOLD, ETA,
                           UPPER_BOUND, ETA_WEIGHTS, False)
        ada_obj.fit_with_errors(train_x, train_y, test_x, test_y)

        for j in range(max_classes):
            our_ada_training_errors[j + 1].append(ada_obj.training_error[j])
            our_ada_testing_errors[j + 1].append(ada_obj.testing_error[j])

    for cl in range(1, max_classes + 1):
        scikit_error = []
        for i in range(avg_num_of_run):
            pada = perceptron.Perceptron(max_iter=UPPER_BOUND,
                                         verbose=0,
                                         random_state=None,
                                         fit_intercept=True,
                                         eta0=ETA)

            bdt = AdaBoostClassifier(pada, algorithm="SAMME", n_estimators=cl)
            bdt.fit(train_x, train_y)
            result_list = bdt.predict(test_x)
            scikit_error.append(calculate_error(test_y, result_list))

        errors = ErrorWrapper(
            cl,
            sum(our_ada_training_errors[cl]) /
            len(our_ada_training_errors[cl]),
            sum(our_ada_testing_errors[cl]) / len(our_ada_testing_errors[cl]),
            sum(scikit_error) / len(scikit_error))

        all_error_list.append(errors)
        print("Train avg for %s   %s" % (cl, errors.train_error))
        print("Testing avg for %s   %s" % (cl, errors.test_error))
        print("Scikit adaboost avg for %s   %s" % (cl, errors.scikit_error))

    return all_error_list

Exemple #8

0

Afficher le fichier

Fichier : test_classifiers.py Projet : ZWJ-here/pyclassic

 def testBupaData(self):
     X, Y = load_bupa_dataset()
     classifier = AdaBoost(DecisionStump)
     for t in [100, 200, 300, 400, 500]:
         score = classifier.test_on_training_set(X, Y, t)
         roc = pyroc.ROCData(zip(Y, score))
         auc = roc.auc()
         print auc
         self.failUnless(auc > .9)

Exemple #9

0

Afficher le fichier

Fichier : test_classifiers.py Projet : hokix/jadesoulpp

 def testBupaData(self):
     X, Y = load_bupa_dataset()
     classifier = AdaBoost(DecisionStump)
     for t in [100,200,300,400,500]:
         score = classifier.test_on_training_set(X,Y,t)
         roc = pyroc.ROCData(zip(Y,score))
         auc = roc.auc()
         print auc
         self.failUnless(auc > .9)

Exemple #10

0

Afficher le fichier

Fichier : ex4_runme.py Projet : linoy-boaron/machine-learning-python

 def q_9(self):
     tx, ty = ex4_tools.generate_data(5000, noise_ratio=0)
     x, y = ex4_tools.generate_data(200, noise_ratio=0)
     i = 1
     for t in self.ts:
         a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t)
         a_boost.train(tx, ty)
         plt.subplot(2, 3, i)
         ex4_tools.decision_boundaries(a_boost, x, y, t)
         i += 1
     plt.show()

Exemple #11

0

Afficher le fichier

Fichier : ex4_runme.py Projet : linoy-boaron/machine-learning-python

 def q_10(self):
     tx, ty = ex4_tools.generate_data(5000, noise_ratio=0)
     x, y = ex4_tools.generate_data(200, noise_ratio=0)
     errors = self.get_ab_errors(tx, ty, x, y)
     min_t = np.argmin(errors)
     a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=self.ts[min_t])
     a_boost.train(tx, ty)
     ex4_tools.decision_boundaries(a_boost, tx, ty, self.ts[min_t])
     plt.title("min error is " + str(errors[min_t]) + " with " +
               str(self.ts[min_t]) + " classifiers")
     plt.show()

Exemple #12

0

Afficher le fichier

def Q10():
    X, y = generate_data(1000, 0)
    T = [5, 10, 50, 100, 200, 500]
    i = int(np.argmin(Q9()))
    T_min = T[i]
    optimal_h = AdaBoost(DecisionStump, T_min)
    optimal_h.train(X, y)
    decision_boundaries(optimal_h, X, y, T_min)
    plt.title('Descision for T=500 that minimizing the test err')
    plt.savefig('Q10')
    plt.show()

Exemple #13

0

Afficher le fichier

def Q9():
    X, y = generate_data(300, 0)
    h = AdaBoost(DecisionStump, 500)
    h.train(X, y)
    err = [0] * len(T)
    f = plt.figure(figsize=(10, 10))
    for i, t in enumerate(T):
        f.add_subplot(3, 2, i + 1)
        err[i] = h.error(X, y, t)
        decision_boundaries(h, X, y, t)
    plt.savefig('Q9')
    plt.show()
    return np.array(err)

Exemple #14

0

Afficher le fichier

def crossValidateAdaboost(inputFile, outputFile, nIterations):
    ticTacToe = TicTacToe(inputFile)
    avgEin = np.zeros(nIterations)
    avgEout = np.zeros(nIterations)

    for k in range(ticTacToe.N_FOLDS):
        ticTacToe.createTrainAndTestSets(k)
        adaboost = AdaBoost(ticTacToe)
        Ein, Eout = adaboost.train(ticTacToe, nIterations)
        avgEin = np.sum([avgEin, Ein], axis=0)
        avgEout = np.sum([avgEout, Eout], axis=0)
        print('--------------------------------------')

    return avgEin / ticTacToe.N_FOLDS, avgEout / ticTacToe.N_FOLDS

Exemple #15

0

Afficher le fichier

Fichier : adaboost_test.py Projet : gururaj-shriram/ece548-machine-learning

def adaboost_avg_run(max_classes, avg_num_of_run, training_set, testing_set):
    testing_error_list = []
    all_error_list = []

    # because datasets sometimes place the class attribute at the end or even
    # at the beginning or the middle, we'll separate the attribute vector from
    # the class-label. also note that this is the way scikit-learn does it.
    # train_x: the attribute vector; train_y: the class_label
    (train_x, train_y) = split_attribute_and_label(training_set)
    (test_x, test_y) = split_attribute_and_label(testing_set)
    # print(len(train_x))
    train_subset_num = int(len(train_y) * 0.2)

    for cl in range(1, max_classes + 1, 2):
        train_error = []
        testing_error = []
        scikit_error = []
        for i in range(avg_num_of_run):

            ada_obj = AdaBoost(cl, train_subset_num, THRESHOLD, ETA,
                               UPPER_BOUND, ETA_WEIGHTS, False)
            ada_obj.fit(train_x, train_y)

            hypothesis_list = ada_obj.predict(train_x)
            mistakes = ada_obj.xor_tuples(train_y, hypothesis_list)
            error_rate_train = classifier_error_rate(mistakes)

            hypothesis_list = ada_obj.predict(test_x)
            mistakes = ada_obj.xor_tuples(test_y, hypothesis_list)
            error_rate_test = classifier_error_rate(mistakes)
            train_error.append(error_rate_train)
            testing_error.append(error_rate_test)

            pada = perceptron.Perceptron(max_iter=UPPER_BOUND,
                                         verbose=0,
                                         random_state=None,
                                         fit_intercept=True,
                                         eta0=ETA)

            bdt = AdaBoostClassifier(pada, algorithm="SAMME", n_estimators=cl)
            bdt.fit(train_x, train_y)
            result_list = bdt.predict(test_x)
            scikit_error.append(calculate_error(test_y, result_list))

        errors = ErrorWrapper(cl,
                              sum(train_error) / len(train_error),
                              sum(testing_error) / len(testing_error),
                              sum(scikit_error) / len(scikit_error))

        all_error_list.append(errors)
        print("Train avg for %s   %s" % (cl, errors.train_error))
        print("Testing avg for %s   %s" % (cl, errors.test_error))
        testing_error_list.append(
            (sum(testing_error) / len(testing_error)) * 100)
        print("Scikit adaboost avg for %s   %s" % (cl, errors.scikit_error))

    #return testing_error_list
    return all_error_list

Exemple #16

0

Afficher le fichier

def main():
    classifier = AdaBoost(DecisionStump)

    num_sets = 50
    T = 100  
    percentage = 0.9 

    all_errors_train = []
    all_errors_test = []    
    aver_error_train = []
    aver_error_test = []

    split_data(percentage, num_sets)

    for i in range(num_sets):
        data_split_train = './data/bupa_train' + str(i) + ".txt"
        data_split_test = './data/' + "bupa_test" + str(i) + ".txt"
        X_train, Y_train = load_data(data_split_train)
        X_test, Y_test = load_data(data_split_test)
        
        score_train, score_test = classifier.run_adaboost(X_train, Y_train, T, X_test)
        error_train = calculate_error(T, score_train, Y_train)
        error_test = calculate_error(T, score_test, Y_test)
        all_errors_train.append(error_train)
        all_errors_test.append(error_test)
   

    # calculates the average errors
    for j in range(T):
        a_e_train = 0
        a_e_test = 0
        for i in range(num_sets):
            a_e_train += all_errors_train[i][j]
            a_e_test += all_errors_test[i][j]
            aver_error_train.append(a_e_train/num_sets)
            aver_error_test.append(a_e_test/num_sets)
  
    save_result_final(aver_error_train, 'train')
    save_result_final(aver_error_test, 'test')

    dataset_here = "./data/bupa.data" 
    X_all, Y_all = load_data(dataset_here)
    score_optional = classifier.run_adaboost(X_all, Y_all, T, None, True)
    save_result_final(score_optional, 'empirical')

Exemple #17

0

Afficher le fichier

def main():
    X_train = np.array([
        [1.0, 2.1],
        [2.0, 1.1],
        [1.3, 1.0],
        [1.0, 1.0],
        [2.0, 1.0]
    ])

    y_train = np.array([1.0, 1.0, -1.0, -1.0, 1.0]).reshape((-1, 1))
    model = AdaBoost(verbose=1)
    model.fit(X_train, y_train)

    X_test = np.array([
        [5, 5],
        [0, 0]
    ])

    y_predict = model.predict(X_test)
    print('predict result: ', y_predict)

Exemple #18

0

Afficher le fichier

Fichier : ex4_runme.py Projet : akotek/iml_ex4

def Q3():  # AdaBoost
    path = "/cs/usr/kotek/PycharmProjects/iml_ex4/SynData/"
    X_train, y_train = read_from_txt(path + "X_train.txt",
                                     path + "y_train.txt")
    X_val, y_val = read_from_txt(path + "X_val.txt", path + "y_val.txt")
    X_test, y_test = read_from_txt(path + "X_test.txt", path + "y_test.txt")

    # -------- First part --------
    T = np.arange(5, 105, step=5)
    T = np.append(T, np.array([200]))

    training_err = np.zeros(len(T))
    validation_err = np.zeros(len(T))

    # adaBoost uses a weighted trainer (WL)
    WL = ex4_tools.DecisionStump
    for i in range(len(T)):
        adaboost = AdaBoost(WL, T[i])
        adaboost.train(X_train, y_train)
        training_err[i] = adaboost.error(X_train, y_train)
        validation_err[i] = adaboost.error(X_val, y_val)

    plt.plot(T, training_err, label="train error")
    plt.plot(T, validation_err, label="validation error")
    plt.legend()
    plt.show()
    # ------------------------

    # # -------- Second part --------
    decision_T = [1, 5, 10, 100, 200]

    plt.figure()
    plt.ion()
    for idx, t in enumerate(decision_T):
        adaboost = AdaBoost(WL, t)
        adaboost.train(X_train, y_train)
        plt.subplot(2, 3, idx + 1)
        ex4_tools.decision_boundaries(adaboost, X_train, y_train,
                                      "T=" + str(t))
    plt.show()
    plt.pause(5)

Exemple #19

0

Afficher le fichier

Fichier : ex6_runme.py Projet : orlykor/iml-ex6

def Q3(): # AdaBoost
    T = [1,5,10,50,100,200]
    T_loop = [1,5,10]
    train_err = []
    valid_err = []

    plt.figure("decisions of the learned classifiers for T")
    num_graph = 0
    for i in range(3,41):
        T_loop.append(i*5)

    for t in T_loop:
        ada_boost = AdaBoost(DecisionStump, t)
        ada_boost.train(x_train, y_train)
        if (t in T):
            num_graph += 1
            plt.subplot(3,2, num_graph)
            decision_boundaries(ada_boost, x_train, y_train, "T = %d" %t)

        train_err.append(ada_boost.error(x_train, y_train))
        valid_err.append(ada_boost.error(x_val, y_val))

    plt.figure("training error and the validation error")
    plt.plot(T_loop, train_err, 'ro-', hold=False, label= "Training Error")
    plt.plot(T_loop, valid_err, 'go-', label= "Validation Error")
    plt.legend()
    plt.show()

    '''
    find the T min, and plot it with training error
    '''

    plt.figure("decision boundaries of T min, with the training data")

    T_hat = 5 * np.argmin(valid_err)
    ada_boost = AdaBoost(DecisionStump, T_hat)
    ada_boost.train(x_train, y_train)
    test_err = ada_boost.error(x_test, y_test)
    decision_boundaries(ada_boost, x_train, y_train, "T = %d" %T_hat)
    plt.show()
    print ("The value of T that minimizes the validation error is: ", T_hat)
    print("the test error of the corresponding classifier is: ", test_err)


    return

Exemple #20

0

Afficher le fichier

Fichier : ex6_runme.py Projet : RefiPeretz/ML-Projects

def Q3():  # AdaBoost
    print("Q3")
    print("===============================================")
    T = [None]*41
    T[0] = 1
    for i in range(5, 201, 5):
        T[i//5] = i
    
    classifiers = [None]*41
    train_err = [None]*41
    val_err = [None]*41
    for i in range(len(T)):
        classifiers[i] = AdaBoost(DecisionStump, T[i])
        classifiers[i].train(X_train, y_train)
        train_err[i] = classifiers[i].error(X_train, y_train)
        val_err[i] = classifiers[i].error(X_val, y_val)
    
    plt.figure(1)
    plt.subplot(3, 2, 1)
    decision_boundaries(classifiers[0], X_train, y_train, "Training Classification T=1")
    plt.subplot(3, 2, 2)
    decision_boundaries(classifiers[1], X_train, y_train, "Training Classification T=5")
    plt.subplot(3, 2, 3)
    decision_boundaries(classifiers[2], X_train, y_train, "Training Classification T=10")
    plt.subplot(3, 2, 4)
    decision_boundaries(classifiers[10], X_train, y_train, "Training Classification T=50")
    plt.subplot(3, 2, 5)
    decision_boundaries(classifiers[20], X_train, y_train, "Training Classification T=100")
    plt.subplot(3, 2, 6)
    decision_boundaries(classifiers[40], X_train, y_train, "Training Classification T=200")

    plt.show()
    plt.figure(2)
    red_patch = mpatches.Patch(color='red', label='Training')
    b_patch = mpatches.Patch(color='blue', label='Validation')
    plt.legend(handles=[red_patch, b_patch])
    plt.plot(T, train_err, 'r', T, val_err, 'b')
    plt.title("Training Error and Validation Error ")
    
    plt.show()
    
    T_hat = T[np.argmin(val_err)]
    print("the value of T_hat (T that minimize validation error) is:", T_hat) #55
    print("the test error of T_hat is:", classifiers[T_hat//5].error(X_test, y_test)) #0.184
    plt.figure(3)
    decision_boundaries(classifiers[T_hat//5], X_train, y_train, "Training Classification of T_hat")
    plt.show()
    print("===============================================")
    return

Exemple #21

0

Afficher le fichier

def select_classifier(cname, features=None, labels=None, **kwargs):
    if 'svm'.startswith(cname):
        del kwargs['class_weight']
        c = SVC(probability=True, **kwargs)
    elif 'logistic-regression'.startswith(cname):
        c = LogisticRegression()
    elif 'linear-regression'.startswith(cname):
        c = LinearRegression()
    elif 'random-forest'.startswith(cname):
        try:
            c = RandomForest()
        except NameError:
            logging.warning(' Tried to use random forest, but not available.' +
                            ' Falling back on adaboost.')
            cname = 'ada'
    if 'adaboost'.startswith(cname):
        c = AdaBoost(**kwargs)
    if features is not None and labels is not None:
        c = c.fit(features, labels, **kwargs)
    return c

Exemple #22

0

Afficher le fichier

Fichier : cascade.py Projet : WynMew/FaceDetection

    def train(self):

        raise ("Unfinished")

        detection_rate = 0
        from config import EXPECTED_FPR_PRE_LAYYER
        from config import EXPECTED_FPR
        from config import LABEL_NEGATIVE

        cur_fpr = 1.0
        mat = self._mat
        label = self._label

        for i in xrange(self.limit):

            if cur_fpr < EXPECTED_FPR:
                break
            else:
                cache_filename = ADABOOST_CACHE_FILE + str(i)

                if os.path.isfile(cache_filename):
                    self.strong_classifier[i] = getCachedAdaBoost(
                        mat=self._mat,
                        label=self._label,
                        filename=cache_filename,
                        limit=ADABOOST_LIMIT)
                else:
                    self.strong_classifier[i] = AdaBoost(mat,
                                                         label,
                                                         limit=ADABOOST_LIMIT)
                    output, fpr = self.strong_classifier[i].train()

                    cur_fpr *= fpr

                    fp_num = fpr * numpy.count_nonzero(label == LABEL_NEGATIVE)

                    self.strong_classifier[i].saveModel(cache_filename)
                    mat, label = self.updateTrainingDate(mat, output, fp_num)

                self.classifierNum += 1

Exemple #23

0

Afficher le fichier

def Q_adaboost(noise_ratio):
    X_train, y_train = generate_data(5000, noise_ratio)
    classifier = AdaBoost(DecisionStump, 500)
    classifier.train(X_train, y_train)
    X_test, y_test = generate_data(200, noise_ratio)
    vals = np.arange(1, 501)
    plt.plot(vals, [classifier.error(X_train, y_train, t) for t in vals],
             label='Training Error',
             lw=1,
             alpha=0.6)
    plt.plot(vals, [classifier.error(X_test, y_test, t) for t in vals],
             label='Test Error',
             lw=1,
             alpha=0.6)
    plt.legend()
    plt.title(
        f'Adaboost Training & Test Error according to T, noise={noise_ratio}')
    plt.show()
    boosts = [5, 10, 50, 100, 200, 500]
    for i in range(6):
        plt.subplot(2, 3, i + 1)
        decision_boundaries(classifier, X_test, y_test, boosts[i])
        plt.title(f'T={boosts[i]}, noise={noise_ratio}')
    plt.show()
    test_errors = [classifier.error(X_test, y_test, t) for t in vals]
    min_t = np.argmin(test_errors)
    min_err = test_errors[min_t]
    # print(min_t, min_err)
    decision_boundaries(classifier, X_train, y_train, min_t)
    plt.title(f'min test_err {min_err} T={min_t} noise {noise_ratio}')
    plt.show()
    decision_boundaries(classifier, X_train, y_train, 499,
                        classifier.D_of_last_iteration)
    plt.title(f'un-normalized weighed sample T=500, noise={noise_ratio}')
    plt.show()
    decision_boundaries(
        classifier, X_train, y_train, 499, classifier.D_of_last_iteration /
        np.max(classifier.D_of_last_iteration) * 100)
    plt.title(f'normalized weighed sample T=500, noise={noise_ratio}')
    plt.show()

Exemple #24

0

Afficher le fichier

def Q8():
    X, y = generate_data(5000, 0)
    h = AdaBoost(DecisionStump, 500)
    h.train(X, y)
    training_err = np.zeros((500, ))
    test_err = np.zeros((500, ))
    test_set, labels = generate_data(200, 0)
    for t in range(1, 501):
        training_err[t - 1] = h.error(X, y, t)
        test_err[t - 1] = h.error(test_set, labels, t)
    plt.plot(range(500), training_err, label='Training error')
    plt.plot(range(500), test_err, label='Test error')
    plt.title('question 8')
    plt.legend(loc='upper right')
    plt.xlabel('T')
    plt.ylabel('Error rate')
    plt.savefig('Q8')
    plt.show()

Exemple #25

0

Afficher le fichier

def Q17():
    train_images, test_images, train_labels, test_labels = load_images(
        '../Docs/')
    train_images = integral_image(train_images)
    test_images = integral_image(test_images)
    WL, T = WeakImageClassifier, 50
    ada = AdaBoost(WL, T)
    ada.train(train_images, train_labels)
    T_range = np.arange(1, T)
    train_errs = [ada.error(train_images, train_labels, t) for t in T_range]
    test_errs = [ada.error(test_images, test_labels, t) for t in T_range]

    fig = plt.figure()
    fig.suptitle("Train vs Test error, Face Classifier")
    plt.xlabel('# of Hypotheses (T)')
    plt.ylabel('Error rate (%)')
    plt.plot(T_range, train_errs, label='Train Error')
    plt.plot(T_range, test_errs, label='Test Error')
    # plt.ylim(top=0.06)
    plt.legend()
    plt.savefig(FIG_DIR3 + 'q17')
    'TODO complete this function'

Exemple #26

0

Afficher le fichier

def Q8(noise=0.0):
    n_samples_train, n_samples_test, T = 5000, 200, 500
    train_X, train_y = generate_data(n_samples_train, noise)
    test_X, test_y = generate_data(n_samples_test, noise)
    WL = DecisionStump
    ada = AdaBoost(WL, T)
    ada.train(train_X, train_y)
    T_range = np.arange(1, T)
    train_errs = [ada.error(train_X, train_y, t) for t in T_range]
    test_errs = [ada.error(test_X, test_y, t) for t in T_range]

    fig = plt.figure()
    fig.suptitle("Train vs Test error, Adaboost")
    plt.xlabel('# of Hypotheses (T)')
    plt.ylabel('Error rate (%)')
    plt.plot(T_range, train_errs, label='Train Error')
    plt.plot(T_range, test_errs, label='Test Error')
    # plt.ylim(top=0.06)
    plt.legend()
    plt.savefig(FIG_DIR3 + 'q8' +
                ('' if noise == 0 else '_' + str(noise).replace('.', '_')))

    return ada, test_X, test_y, train_X, train_y
    'TODO complete this function'

Exemple #27

0

Afficher le fichier

Fichier : testing.py Projet : Nancheng/Machine_Learning

            for j in range(i * TESTING_SAMPLE_NUM, (i+1) * TESTING_SAMPLE_NUM):
                haarGroup.append(float(tmp[j]))

            Original_Data.append(haarGroup)

    Original_Data = numpy.array(Original_Data)

fileObj.close()

fileObj = open(ADABOOST_FILE, "a+")

print "Constructing AdaBoost from existed model data"

tmp = fileObj.readlines()

a = AdaBoost(train = False)

for i in range(0, len(tmp), 4):

    alpha, demention, label, threshold = None, None, None, None

    for j in range(i, i + 4):
        if (j % 4) == 0:
            alpha = float(tmp[j])
        elif (j % 4) == 1:
            demention = int(tmp[j])
        elif (j % 4) == 2:
            label = float(tmp[j])
        elif (j % 4) == 3:
            threshold = float(tmp[j])

Exemple #28

0

Afficher le fichier

        from mapReduce import reduce

        map(Face, nonFace)
        _mat = reduce()

mat = _mat

featureNum, sampleNum = _mat.shape

assert sampleNum  == (POSITIVE_SAMPLE + NEGATIVE_SAMPLE)
assert featureNum == FEATURE_NUM

Label_Face    = [+1 for i in xrange(POSITIVE_SAMPLE)]
Label_NonFace = [-1 for i in xrange(NEGATIVE_SAMPLE)]

label = numpy.array(Label_Face + Label_NonFace)

cache_filename = ADABOOST_CACHE_FILE + str(0)

if os.path.isfile(cache_filename):
    model = getCachedAdaBoost(mat     = _mat,
                              label   = label,
                              filename= cache_filename,
                              limit   = ADABOOST_LIMIT)
else:
    model = AdaBoost(mat, label, limit = ADABOOST_LIMIT)
    model.train()
    model.saveModel(cache_filename)

print model

Exemple #29

0

Afficher le fichier

def ab_plot(iterate):
    fig, axes = plt.subplots(2, 2)
    # fig.set_size_inches(10, 10)
    for ax in axes.flatten():
        n_ex = 100
        n_trees = 50
        n_feats = np.random.randint(2, 100)
        max_depth_d = 1  #np.random.randint(1, 100)

        classifier = np.random.choice([True])  #, False
        if classifier:
            # create classification problem
            n_classes = np.random.randint(2, 10)
            X, Y = make_blobs(n_samples=n_ex, centers=n_classes, n_features=2)
            X, X_test, Y, Y_test = train_test_split(X, Y, test_size=0.3)
            n_feats = min(n_feats, X.shape[1])

            # initialize model
            def loss(yp, y):
                return accuracy_score(yp, y)

            # initialize model
            criterion = np.random.choice(["entropy", "gini"])
            mine_g = AdaBoost(
                n_iter=iterate,
                max_depth=max_depth_d,
                classifier=classifier,
                # learning_rate=1,
                # loss="crossentropy",
                # step_size="constant",
                # split_criterion=criterion,
            )

        else:
            # create regeression problem
            X, Y = make_regression(n_samples=n_ex, n_features=1)
            X, X_test, Y, Y_test = train_test_split(X, Y, test_size=0.3)
            n_feats = min(n_feats, X.shape[1])

            # initialize model
            criterion = "mse"
            loss = mean_squared_error
            mine_g = GradientBoostedDecisionTree(
                n_iter=iterate,
                # n_trees=n_trees,
                max_depth=max_depth_d,
                classifier=classifier,
                learning_rate=1,
                loss="mse",
                step_size="adaptive",
                split_criterion=criterion,
            )

        # fit 'em
        mine_g.fit(X, Y)

        # # get preds on test set
        # y_pred_mine_test_g = mine_g.predict(X_test)
        #
        # loss_mine_test_g = loss(y_pred_mine_test_g, Y_test)
        #
        # if classifier:
        #     entries = [
        #         ("GB", loss_mine_test_g, y_pred_mine_test_g)
        #     ]
        #     (lbl, test_loss, preds) = entries[np.random.randint(1)]
        #     ax.set_title("{} Accuracy: {:.2f}%".format(lbl, test_loss * 100))
        #     for i in np.unique(Y_test):
        #         ax.scatter(
        #             X_test[preds == i, 0].flatten(),
        #             X_test[preds == i, 1].flatten(),
        #             #  s=0.5,
        #         )
        # else:
        #     X_ax = np.linspace(
        #         np.min(X_test.flatten()) - 1, np.max(X_test.flatten()) + 1, 100
        #     ).reshape(-1, 1)
        #     y_pred_mine_test_g = mine_g.predict(X_ax)
        #
        #     ax.scatter(X_test.flatten(), Y_test.flatten(), c="b", alpha=0.5)
        #     #  s=0.5)
        #     ax.plot(
        #         X_ax.flatten(),
        #         y_pred_mine_test_g.flatten(),
        #         #  linewidth=0.5,
        #         label="GB".format(n_trees, n_feats, max_depth_d),
        #         color="red",
        #     )
        #     ax.set_title(
        #         "GB: {:.1f}".format(
        #             loss_mine_test_g
        #         )
        #     )
        #     ax.legend()
        # ax.xaxis.set_ticklabels([])
        # ax.yaxis.set_ticklabels([])
    # plt.savefig("plot.png", dpi=300)
    plt.show()

Exemple #30

0

Afficher le fichier

Fichier : tester6.py Projet : Nancheng/Machine_Learning

Just Enjoy it.
"""

import numpy
import matplotlib.pyplot as pyplot
from adaboost import AdaBoost
from sklearn import datasets

"""
Samples for AdaBoost
"""
Original_Data, Tag = datasets.make_hastie_10_2(n_samples    = 200, 
                                              random_state  = 1)
Original_Data = Original_Data.transpose()


for i in range(len(Tag)):
    if Tag[i] == 1:
        pyplot.plot(Original_Data[0][i], Original_Data[1][i], \
                    '+r', markersize = 10)
    else:
        pyplot.plot(Original_Data[0][i], Original_Data[1][i], \
                    '+b', markersize = 10)
pyplot.title("Sample Points")
pyplot.show()

a = AdaBoost(Original_Data, Tag)

a.train(10000)

Exemple #31

0

Afficher le fichier

Fichier : main.py Projet : antoniopessotti/MLNet-Classifying-Complex-Networks

def main():
    ''' Load data, split data, creates adaboost algorithm 
    with decision stump, calculates errors, save final file.
    Since this is a binary classifier, we will do for each of the 
    4 networks, one at time'''

    classification = []
    ada_folder = OUTPUT_FOLDER + 'adaboost/'
    if not os.path.exists(ada_folder):
        os.makedirs(ada_folder) 
    output_file = ada_folder + 'results.out'
    with open(output_file, "w") as f:
        f.write("# ADABOOST RESULTS, TRAIN/TEST FRACTION: " + str(PERCENTAGE)  + "\n")
        f. write("# Net   Norm   Set   OL?   Accu. Train   Accu Test\n")

    # chose classifier
    classifier = AdaBoost(DecisionStump)

    # for each normalization:
    for norm in NORM:

        # for each set
        for number in range(1, NUM_SETS+1):

            ''' with with_outlier '''
            with_outlier = True
            # get input and output file paths
            input_train =  get_input_data('train', number, norm,  with_outlier)
            input_test = get_input_data('test', number, norm,  with_outlier)


            # for each network type:
            for net_name in NET_NAMES:
                # get data
                X_train, Y_train = one_against_all.load_data(input_train, net_name)
                X_test, Y_test = one_against_all.load_data(input_test, net_name)

                print 'Calculating adaboost for net ' + net_name + ' with  normalization ' + norm + ' and set ' + str(number)
                score_train, score_test = classifier.run_adaboost(X_train, Y_train, T, X_test)
     
                error_train = calculate_error(T, score_train, Y_train)    
                error_test = calculate_error(T, score_test, Y_test)

                error_train_total = sum(error_train)/len(error_train)
                error_test_total = sum(error_test)/len(error_test)    
                
                save_result_final(error_train_total, error_test_total, output_file, net_name, norm, number, with_outlier)
                classification.append(str(round(error_test_total,3))  +', ' + str(norm) + ', ' + str(number) + ', ' + str(with_outlier)[0] + '\n') 

            ''' with no outlier '''
            with_outlier = False
            # get input and output file paths
            input_train =  get_input_data('train', number, norm,  with_outlier)
            input_test = get_input_data('test', number, norm,  with_outlier)


            # for each network type:
            for net_name in NET_NAMES:
                # get data
                X_train, Y_train = one_against_all.load_data(input_train, net_name)
                X_test, Y_test = one_against_all.load_data(input_test, net_name)

                score_train, score_test = classifier.run_adaboost(X_train, Y_train, T, X_test)
      
                error_train = calculate_error(T, score_train, Y_train)    
                error_test = calculate_error(T, score_test, Y_test)

                error_train_total = sum(error_train)/len(error_train)
                error_test_total = sum(error_test)/len(error_test)    
                
                save_result_final(error_train_total, error_test_total, output_file, net_name, norm, number, with_outlier)                
                classification.append(str(round(error_test_total,3))  +', ' + str(norm) + ', ' + str(number) + ', ' + str(with_outlier)[0] + '\n') 


    #find best classfiers
    classification.sort()
    with open(output_file + 'good_classification', "w") as f:
        f.write("\n\n\nClassification\n\n")
        for feat in classification:
            f.write(feat + '\n')
        f.write("\n")


    print 'Results saved at ' +  ada_folder
    print 'Done!!!'

Exemple #32

0

Afficher le fichier

                   [-1], [-1], [+1], [+1], [+1], [+1], [+1], [+1], [+1], [+1],
                   [+1], [+1], [+1], [+1], [+1], [+1], [+1], [+1], [+1], [+1],
                   [+1], [+1], [+1], [-1], [-1], [-1], [-1], [-1], [-1], [-1],
                   [-1], [-1], [-1], [+1], [+1], [+1]]).transpose()

Tag = Tag.flatten()

for i in range(len(Tag)):
    if Tag[i] == 1:
        pyplot.plot(Original_Data[0][i], Original_Data[1][i], \
                    '+r', markersize = 10)
    else:
        pyplot.plot(Original_Data[0][i], Original_Data[1][i], \
                    '+b', markersize = 10)

a = AdaBoost(Original_Data, Tag)

a.train(100)

TestCase = [[0.55, 1.1, 5.35, 7.0, 8.5, -1.0, 3.0, 3.0, 4.0, 2, 3],
            [4.4, 2.8, 0.9, -12, -13, -9, -10, -9, -5, 0, 2.5]]

output = a.prediction(TestCase)

for i in range(len(output)):
    if output[i] == 1:
        pyplot.plot(TestCase[0][i], TestCase[1][i], \
                    'or', markersize = 20)
    else:
        pyplot.plot(TestCase[0][i], TestCase[1][i], \
                    'ob', markersize = 20)

Exemple #33

0

Afficher le fichier

Fichier : tester5.py Projet : Nancheng/Machine_Learning

[1],
[2],
[3],
[4],
[5],
[6],
[7],
[8],
[9]
]).transpose()

Tag = numpy.array([
[+1],
[+1],
[+1],
[-1],
[-1],
[-1],
[+1],
[+1],
[+1],
[-1],
]).transpose()

Tag = Tag.flatten()

a = AdaBoost(Original_Data, Tag)

a.train(5)

Exemple #34

0

Afficher le fichier

                           (i + 1) * TESTING_SAMPLE_NUM):
                haarGroup.append(float(tmp[j]))

            Original_Data.append(haarGroup)

    Original_Data = numpy.array(Original_Data)

fileObj.close()

fileObj = open(ADABOOST_FILE, "a+")

print "Constructing AdaBoost from existed model data"

tmp = fileObj.readlines()

a = AdaBoost(train=False)

for i in range(0, len(tmp), 4):

    alpha, demention, label, threshold = None, None, None, None

    for j in range(i, i + 4):
        if (j % 4) == 0:
            alpha = float(tmp[j])
        elif (j % 4) == 1:
            demention = int(tmp[j])
        elif (j % 4) == 2:
            label = float(tmp[j])
        elif (j % 4) == 3:
            threshold = float(tmp[j])

Exemple #35

0

Afficher le fichier

Fichier : orient.py Projet : bivasmaiti26/AI-Projects

 def do_adaboost(self):
     if self.flag == 0:
         start = time.time()
         adaboost_instance = AdaBoost(self.train)
         adaboost_instance.create_and_train_classifiers()
         adaboost_instance.write_model(self.model_file)
         end = time.time()
         print 'Training Time :', (end - start) / 60, 'mins'
     else:
         start = time.time()
         adaboost_instance = AdaBoost(None)
         adaboost_instance.load_model(self.model_file)
         test_output = adaboost_instance.test(self.test, self.output_file)
         print test_output['accuracy'], '%'
         end = time.time()
         print 'Testing Time :', (end - start) / 60, 'mins'

Exemple #36

0

Afficher le fichier

Fichier : ex4_runme.py Projet : danielle-sackstein/ex4

def _load_data(name):
    return np.loadtxt(_get_file_path('X_' + name)), np.loadtxt(
        _get_file_path('y_' + name))


if __name__ == '__main__':
    X_train, y_train = _load_data('train')
    X_val, y_val = _load_data('val')

    T_values = range(5, 200, 5)
    validation_error = []
    training_error = []

    for t in T_values:
        ada_boost = AdaBoost(DecisionStump, t)
        ada_boost.train(X_train, y_train)
        validation_error.append(ada_boost.error(X_val, y_val))
        training_error.append(ada_boost.error(X_train, y_train))

    training_error_plot, = plot(T_values,
                                training_error,
                                linestyle='--',
                                label='training_error')
    validation_error_plot, = plot(T_values,
                                  validation_error,
                                  linestyle='--',
                                  label='validation_error')

    legend(handles=[training_error_plot, validation_error_plot])

Exemple #37

0

Afficher le fichier

Fichier : training.py Projet : Nancheng/Machine_Learning

        for j in range(i * SAMPLE_NUM , (i+1) * SAMPLE_NUM):
            haarGroup.append(float(tmp[j]))

        Original_Data.append(haarGroup)

    Original_Data = numpy.array(Original_Data)


fileObj.close()

SampleDem = Original_Data.shape[0]
SampleNum = Original_Data.shape[1]

assert SampleNum == (POSITIVE_SAMPLE + NEGATIVE_SAMPLE)

Label_Face    = [+1 for i in range(POSITIVE_SAMPLE)]
Label_NonFace = [-1 for i in range(NEGATIVE_SAMPLE)]

Label = numpy.array(Label_Face + Label_NonFace)

a = AdaBoost(Original_Data, Label)

try:
    a.train(200)

except KeyboardInterrupt:
    print "You pressed interrupt key. Training process interrupt."

saveModel(a)

Exemple #38

0

Afficher le fichier

Fichier : tester.py Projet : jasonleaster/Machine_Learning

        [+1],
        [+1],
        [+1],
    ]
).transpose()

Tag = Tag.flatten()

for i in range(len(Tag)):
    if Tag[i] == 1:
        pyplot.plot(Original_Data[0][i], Original_Data[1][i], "+r", markersize=10)
    else:
        pyplot.plot(Original_Data[0][i], Original_Data[1][i], "+b", markersize=10)


a = AdaBoost(Original_Data, Tag)

a.train(100)

TestCase = [[0.55, 1.1, 5.35], [4.4, 2.8, 0.9]]

output = a.prediction(TestCase)

for i in range(len(output)):
    if output[i] == 1:
        pyplot.plot(TestCase[0][i], TestCase[1][i], "or", markersize=20)
    else:
        pyplot.plot(TestCase[0][i], TestCase[1][i], "ob", markersize=20)

pyplot.show()

Exemple #39

0

Afficher le fichier

Fichier : ex6_runme.py Projet : RefiPeretz/ML-Projects

def Q6(): # Republican or Democrat?
    print("Q6")
    print("===============================================")
    votes_tmp = np.column_stack((votes, parties))
    training_votes, val_votes, test_votes = np.vsplit(votes_tmp[np.random.permutation(votes_tmp.shape[0])],(217,391))
    training_parties = training_votes[:, 16]
    training_votes = np.delete(training_votes, np.s_[16:17], axis=1)
    val_parties = val_votes[:, 16]
    val_votes = np.delete(val_votes, np.s_[16:17], axis=1)
    test_parties = test_votes[:, 16]
    test_votes = np.delete(test_votes, np.s_[16:17], axis=1)
    adaboost_classifiers = [None]*5
    dtree_classifiers = [None]*5
    knn_classifiers = [None]*5
    adaboost_val_err = [None]*5
    dtree_val_err = [None]*5
    knn_val_err = [None]*5
    T = [1, 25, 50, 100, 200]
    k = [1, 5, 25, 100, 200]
    d = [1, 5, 10, 16, 20]

    for i in range(5):
        dtree_classifiers[i] = DecisionTree(d[i])
        dtree_classifiers[i].train(training_votes, training_parties)
        dtree_val_err[i] = dtree_classifiers[i].error(val_votes, val_parties)
        adaboost_classifiers[i] = AdaBoost(DecisionStump, T[i])
        adaboost_classifiers[i].train(training_votes, training_parties)
        adaboost_val_err[i] = adaboost_classifiers[i].error(val_votes, val_parties)
        knn_classifiers[i] = kNN(k[i])
        knn_classifiers[i].train(training_votes, training_parties)
        knn_val_err[i] = knn_classifiers[i].error(val_votes, val_parties)

    """
    explanation for choosing the parameters for each classifier: I trained some classifiers of each type
    with different parameters and then measured the validation error with the validation sample.
    then,as I did in previous tasks, I chose the parameter that minimize the validation error over
    the sample and used the classifiers with this parameter to measure the test error.
    here is plots with validation error of each classifier over some parameters:
    """
    plt.figure(1)
    plt.subplot(3, 1, 1)
    plt.plot(d, dtree_val_err)
    plt.title("Validation Error of Decision Tree")
    plt.subplot(3, 1, 2)
    plt.plot(T, adaboost_val_err)
    plt.title("Validation Error of Adaboost")
    plt.subplot(3, 1, 3)
    plt.plot(k, knn_val_err)
    plt.title("Validation Error of k Nearest Neighbors")
    plt.show()

    d_hat = d[np.argmin(dtree_val_err)]
    T_hat = T[np.argmin(adaboost_val_err)]
    k_hat = k[np.argmin(knn_val_err)]
    print("Decision Tree: the optimal validation error is: ", dtree_val_err[d.index(d_hat)],
          " , and the optimal test error is: ", dtree_classifiers[d.index(d_hat)].error(test_votes, test_parties))
    print("Adaboost: the optimal validation error is: ", adaboost_val_err[T.index(T_hat)],
          " , and the optimal test error is: ", adaboost_classifiers[T.index(T_hat)].error(test_votes, test_parties))
    print("k Nearest Neighbors: the optimal validation error is: ", knn_val_err[k.index(k_hat)],
          " , and the optimal test error is: ", knn_classifiers[k.index(k_hat)].error(test_votes, test_parties))

    #optional

    dtree1 = DecisionTree(3)
    dtree1.train(votes[:10, :], parties[:10])
    view_dtree(dtree1, feature_names, class_names)
    dtree2 = DecisionTree(6)
    dtree2.train(votes[:150, :], parties[:150])
    view_dtree(dtree2, feature_names, class_names)
    dtree3 = DecisionTree(10)
    dtree3.train(votes, parties)
    view_dtree(dtree3, feature_names, class_names)

    print("===============================================")

    return