def tp_tn_fp_fn(weights, predictors, samples): ''' Returns (tp, tn, fp, fn) all in one. ''' tp, tn, fp, fn = 0, 0, 0, 0 for sample in samples: y_pred = Adaboost.predict_AdaBoost(weights, predictors, sample) y_true = sample[-1] if y_pred == +1 and y_true == +1: tp += 1 elif y_pred == -1 and y_true == -1: tn += 1 elif y_pred == -1 and y_true == +1: fn += 1 elif y_pred == +1 and y_true == -1: fp += 1 assert (tp + tn + fp + fn == len(samples)) return tp, tn, fp, fn
def choicer(probabilities, training_set): ''' Returns a random choice that has error != 0.5 if there is one, the best encountered otherwise. ''' choices = list(range(len(tests))) random.shuffle(choices) best_choice = 0 best_error = 0.5 # print(choices[0]) for choice in choices: error = Adaboost.get_error(tests[choice], probabilities, training_set) if error >= 0.5 + epsilon or error <= 0.5 - epsilon: return tests[choice] elif error >= best_error: best_error = error best_test = tests[choice] elif 1 - error >= best_error: best_error = 1 - error best_test = tests[choice] return best_test if best_error != 0.5 else None
def choicer(probabilities, training_set): choices = list(range(len(tests))) random.shuffle(choices) best_error = 0.5 best_test = None for choice in choices: test = tests[choice] error = Adaboost.get_error(test, probabilities, training_set) if error <= 0.5 - epsilon or error >= 0.5 + epsilon: return test elif error >= best_error: best_error = error best_test = test elif 1 - error >= best_error: best_error = 1 - error best_test = test else: continue return best_test
def choicer(probabilities, training_set): sub_tests = random.choice(tests) while sub_tests == []: sub_tests = random.choice(tests) best_error = 0.5 best_test = None for test in sub_tests: error = Adaboost.get_error(test, probabilities, training_set) if error <= 0.5 - epsilon or error >= 0.5 + epsilon: return test elif error >= best_error: best_error = error best_test = test elif 1 - error >= best_error: best_error = 1 - error best_test = test else: continue return best_test
import pandas as pd from Adaboost import Adaboost def acc(pred, label): cor = 0 for i, j in zip(pred, label): if i == int(j): cor += 1 return cor / len(label) data = pd.read_csv("train.csv", header=None) traindata = data.values[:, :-1] trainlabel = data.values[:, -1] test = pd.read_csv("test.csv", header=None) testdata = data.values[:, :-1] testlabel = data.values[:, -1] stepsize = 20 #弱分类器最优割点搜索步长 ada = Adaboost(stepsize) stumps, Alphas = ada.Adaboost(traindata, trainlabel, stepsize) Predict = ada.predict(testdata, stumps, Alphas) print("acc={}".format(acc(Predict, testlabel)))
def adaboostClassifying(self, datMat, classlabels): adaboost = Adaboost() ada, aggClassEst = adaboost.adaBoostTrainDS(datMat, classlabels, self.n_estimators) return ada
plt.subplot(2, 2, 4) plt.title('SVM') pred = clf.predict(testset) plot_decisionboundary(clf, testset, pred, test_label) # Adaboost print( '---------------------------------- Adaboost ----------------------------------' ) plt.figure(figsize=(20, 30)) plt.subplot(1, 2, 1) plt.title('Training set') plot_dataset(dataset, label) clf = Adaboost(iters=10000, thres=0.05) clf.fit(dataset, label) # for training set pred = clf.predict(dataset) plt.subplot(1, 2, 2) plt.title('Adaboost') plot_decisionboundary(clf, dataset, pred, label) num = 5 if clf.M < num: num = clf.M print('M =', clf.M) print('first five weak learners:') print('feature\tthreshold') for i in range(num):
def adaboostClassifying(self, datMat, classlabels): adaboost = Adaboost() ada, aggClassEst = adaboost.adaBoostTrainDS(datMat,classlabels, self.n_estimators) return ada
y_train[y_train > 0.5] = 1 y_train[y_train < 0.5] = -1 y_train = np.expand_dims(y_train, 0) print("Step3: Divide picture") # adaboost = Adaboost(num_classifier=5) # Adaboost + SVM one_scale_data = divide_img_hog(X_train, step=img_height // 2) one_scale_label = divide_img_label(y_train, step=img_height // 2) two_scale_data = divide_img_hog(X_train, step=img_height // 3) two_scale_label = divide_img_label(y_train, step=img_height // 3) three_scale_data = divide_img_hog(X_train, step=img_height // 3 // 3) three_scale_label = divide_img_label(y_train, step=img_height // 3 // 3) print("Step4: Train Adaboost") print("Training Classifier No.1") one_scale_ada = Adaboost(num_classifier=5) one_scale_ada.load_data(data=one_scale_data, label=one_scale_label) one_scale_ada.fit() print("Training Classifier No.2") two_scale_ada = Adaboost(num_classifier=5) two_scale_ada.load_data(data=two_scale_data, label=two_scale_label) two_scale_ada.fit() print("Training Classifier No.3") three_scale_ada = Adaboost(num_classifier=5) three_scale_ada.load_data(data=three_scale_data, label=three_scale_label) three_scale_ada.fit() print("Step5: test") # Start at X_train[0] result1 = predict(X_train[0], classifyer=one_scale_ada,
avg_best_on_classes = 0 for j, clss in enumerate(classes): print("training for class {}.".format(clss)) insider = Insider(clss, npartitions, name, 10, T) for i in range(npartitions): print("testing for partition {}.".format(i)) training_set = Utils.onevsall( Utils.flat(partitions[:i] + partitions[i + 1:]), clss) test_set = Utils.onevsall(partitions[i], clss) insider.training_set = training_set insider.test_set = test_set weights, predictors = Adaboost.train(T, training_set, mesurer=insider, choicer=choicer) insider.save() insider.close() avg_best_on_classes += (insider.best_val_acc() - avg_best_on_classes) / (j + 1) if best_val_acc < avg_best_on_classes: best_val_acc = avg_best_on_classes best_algo = (choicer, name) # best training ### test_acc = 0 for j, clss in enumerate(classes):
hc_dataset.loc[hc_dataset['label'] > 1, 'label'] = -1 # Load iris dataset iris_dataset = pd.read_csv('iris.data', sep=",", header=None, names=["col1", "x", "y", "col4", "label"]) iris_dataset = iris_dataset[iris_dataset['label'] != 'Iris-setosa'] del iris_dataset['col1'], iris_dataset['col4'] iris_dataset.loc[iris_dataset['label'] == 'Iris-versicolor', 'label'] = 1.0 iris_dataset.loc[iris_dataset['label'] == 'Iris-virginica', 'label'] = -1.0 iris_dataset['label'] = pd.to_numeric(iris_dataset['label']) iterations = 100 # ------------------------ Adaboost for hc_dataset ------------------------ adaboost_hc = Adaboost() adaboost_hc.find_all_possible_lines(np.array(hc_dataset['x']), np.array(hc_dataset['y'])) hc_errors = pd.DataFrame({ 'emp_err': [0, 0, 0, 0, 0, 0, 0, 0], 'true_err': [0, 0, 0, 0, 0, 0, 0, 0] }) for _ in range(iterations): x_train, x_test, y_train, y_test = train_test_split( hc_dataset[['x', 'y']], hc_dataset['label'], test_size=0.5) adaboost_hc.fit(np.array(x_train['x']), np.array(x_train['y']), np.array(y_train)) emp_errs = adaboost_hc.calc_errors(np.array(x_train['x']), np.array(x_train['y']),
def fscr_per_turn(self, adaboost_set, validationSet): a = Adaboost(adaboost_set, 5) learners, learner_priority = a.algo() f = FScore(validationSet, learners, learner_priority) fscr, accuracy_per_turn = f.determine_fscore() return fscr, accuracy_per_turn