def tp_tn_fp_fn(weights, predictors, samples):
     ''' Returns (tp, tn, fp, fn) all in one. '''
     tp, tn, fp, fn = 0, 0, 0, 0
     for sample in samples:
         y_pred = Adaboost.predict_AdaBoost(weights, predictors, sample)
         y_true = sample[-1]
         if y_pred == +1 and y_true == +1: tp += 1
         elif y_pred == -1 and y_true == -1: tn += 1
         elif y_pred == -1 and y_true == +1: fn += 1
         elif y_pred == +1 and y_true == -1: fp += 1
     assert (tp + tn + fp + fn == len(samples))
     return tp, tn, fp, fn
 def choicer(probabilities, training_set):
     ''' Returns a random choice that has error != 0.5 if there is one, the best encountered otherwise. '''
     choices = list(range(len(tests)))
     random.shuffle(choices)
     best_choice = 0
     best_error = 0.5
     # print(choices[0])
     for choice in choices:
         error = Adaboost.get_error(tests[choice], probabilities,
                                    training_set)
         if error >= 0.5 + epsilon or error <= 0.5 - epsilon:
             return tests[choice]
         elif error >= best_error:
             best_error = error
             best_test = tests[choice]
         elif 1 - error >= best_error:
             best_error = 1 - error
             best_test = tests[choice]
     return best_test if best_error != 0.5 else None
 def choicer(probabilities, training_set):
     choices = list(range(len(tests)))
     random.shuffle(choices)
     best_error = 0.5
     best_test = None
     for choice in choices:
         test = tests[choice]
         error = Adaboost.get_error(test, probabilities,
                                    training_set)
         if error <= 0.5 - epsilon or error >= 0.5 + epsilon:
             return test
         elif error >= best_error:
             best_error = error
             best_test = test
         elif 1 - error >= best_error:
             best_error = 1 - error
             best_test = test
         else:
             continue
     return best_test
            def choicer(probabilities, training_set):
                sub_tests = random.choice(tests)
                while sub_tests == []:
                    sub_tests = random.choice(tests)

                best_error = 0.5
                best_test = None
                for test in sub_tests:
                    error = Adaboost.get_error(test, probabilities,
                                               training_set)
                    if error <= 0.5 - epsilon or error >= 0.5 + epsilon:
                        return test
                    elif error >= best_error:
                        best_error = error
                        best_test = test
                    elif 1 - error >= best_error:
                        best_error = 1 - error
                        best_test = test
                    else:
                        continue
                return best_test
Exemple #5
0
import pandas as pd
from Adaboost import Adaboost


def acc(pred, label):
    cor = 0
    for i, j in zip(pred, label):
        if i == int(j):
            cor += 1
    return cor / len(label)


data = pd.read_csv("train.csv", header=None)
traindata = data.values[:, :-1]
trainlabel = data.values[:, -1]
test = pd.read_csv("test.csv", header=None)
testdata = data.values[:, :-1]
testlabel = data.values[:, -1]
stepsize = 20  #弱分类器最优割点搜索步长
ada = Adaboost(stepsize)
stumps, Alphas = ada.Adaboost(traindata, trainlabel, stepsize)
Predict = ada.predict(testdata, stumps, Alphas)
print("acc={}".format(acc(Predict, testlabel)))
Exemple #6
0
 def adaboostClassifying(self, datMat, classlabels):
     adaboost = Adaboost()
     ada, aggClassEst = adaboost.adaBoostTrainDS(datMat, classlabels,
                                                 self.n_estimators)
     return ada
Exemple #7
0
plt.subplot(2, 2, 4)
plt.title('SVM')
pred = clf.predict(testset)
plot_decisionboundary(clf, testset, pred, test_label)

# Adaboost
print(
    '---------------------------------- Adaboost ----------------------------------'
)
plt.figure(figsize=(20, 30))

plt.subplot(1, 2, 1)
plt.title('Training set')
plot_dataset(dataset, label)

clf = Adaboost(iters=10000, thres=0.05)
clf.fit(dataset, label)
# for training set
pred = clf.predict(dataset)

plt.subplot(1, 2, 2)
plt.title('Adaboost')
plot_decisionboundary(clf, dataset, pred, label)

num = 5
if clf.M < num:
    num = clf.M
print('M =', clf.M)
print('first five weak learners:')
print('feature\tthreshold')
for i in range(num):
 def adaboostClassifying(self, datMat, classlabels):
     adaboost = Adaboost()
     ada, aggClassEst = adaboost.adaBoostTrainDS(datMat,classlabels, self.n_estimators)
     return ada
        y_train[y_train > 0.5] = 1
        y_train[y_train < 0.5] = -1
        y_train = np.expand_dims(y_train, 0)
        print("Step3: Divide picture")
        # adaboost = Adaboost(num_classifier=5)  # Adaboost + SVM
        one_scale_data = divide_img_hog(X_train, step=img_height // 2)
        one_scale_label = divide_img_label(y_train, step=img_height // 2)
        two_scale_data = divide_img_hog(X_train, step=img_height // 3)
        two_scale_label = divide_img_label(y_train, step=img_height // 3)
        three_scale_data = divide_img_hog(X_train, step=img_height // 3 // 3)
        three_scale_label = divide_img_label(y_train,
                                             step=img_height // 3 // 3)

        print("Step4: Train Adaboost")
        print("Training Classifier No.1")
        one_scale_ada = Adaboost(num_classifier=5)
        one_scale_ada.load_data(data=one_scale_data, label=one_scale_label)
        one_scale_ada.fit()
        print("Training Classifier No.2")
        two_scale_ada = Adaboost(num_classifier=5)
        two_scale_ada.load_data(data=two_scale_data, label=two_scale_label)
        two_scale_ada.fit()
        print("Training Classifier No.3")
        three_scale_ada = Adaboost(num_classifier=5)
        three_scale_ada.load_data(data=three_scale_data,
                                  label=three_scale_label)
        three_scale_ada.fit()
        print("Step5: test")
        # Start at X_train[0]
        result1 = predict(X_train[0],
                          classifyer=one_scale_ada,
Exemple #10
0
            avg_best_on_classes = 0
            for j, clss in enumerate(classes):
                print("training for class {}.".format(clss))

                insider = Insider(clss, npartitions, name, 10, T)

                for i in range(npartitions):
                    print("testing for partition {}.".format(i))
                    training_set = Utils.onevsall(
                        Utils.flat(partitions[:i] + partitions[i + 1:]), clss)
                    test_set = Utils.onevsall(partitions[i], clss)
                    insider.training_set = training_set
                    insider.test_set = test_set
                    weights, predictors = Adaboost.train(T,
                                                         training_set,
                                                         mesurer=insider,
                                                         choicer=choicer)

                insider.save()
                insider.close()

                avg_best_on_classes += (insider.best_val_acc() -
                                        avg_best_on_classes) / (j + 1)

            if best_val_acc < avg_best_on_classes:
                best_val_acc = avg_best_on_classes
                best_algo = (choicer, name)

    # best training ###
    test_acc = 0
    for j, clss in enumerate(classes):
Exemple #11
0
    hc_dataset.loc[hc_dataset['label'] > 1, 'label'] = -1

    # Load iris dataset
    iris_dataset = pd.read_csv('iris.data',
                               sep=",",
                               header=None,
                               names=["col1", "x", "y", "col4", "label"])
    iris_dataset = iris_dataset[iris_dataset['label'] != 'Iris-setosa']
    del iris_dataset['col1'], iris_dataset['col4']
    iris_dataset.loc[iris_dataset['label'] == 'Iris-versicolor', 'label'] = 1.0
    iris_dataset.loc[iris_dataset['label'] == 'Iris-virginica', 'label'] = -1.0
    iris_dataset['label'] = pd.to_numeric(iris_dataset['label'])

    iterations = 100
    # ------------------------ Adaboost for hc_dataset ------------------------
    adaboost_hc = Adaboost()
    adaboost_hc.find_all_possible_lines(np.array(hc_dataset['x']),
                                        np.array(hc_dataset['y']))

    hc_errors = pd.DataFrame({
        'emp_err': [0, 0, 0, 0, 0, 0, 0, 0],
        'true_err': [0, 0, 0, 0, 0, 0, 0, 0]
    })
    for _ in range(iterations):
        x_train, x_test, y_train, y_test = train_test_split(
            hc_dataset[['x', 'y']], hc_dataset['label'], test_size=0.5)
        adaboost_hc.fit(np.array(x_train['x']), np.array(x_train['y']),
                        np.array(y_train))

        emp_errs = adaboost_hc.calc_errors(np.array(x_train['x']),
                                           np.array(x_train['y']),
 def fscr_per_turn(self, adaboost_set, validationSet):
     a = Adaboost(adaboost_set, 5)
     learners, learner_priority = a.algo()
     f = FScore(validationSet, learners, learner_priority)
     fscr, accuracy_per_turn = f.determine_fscore()
     return fscr, accuracy_per_turn