def main(): for file in [ 'data/breast-cancer-assignment5.txt', 'data/german-assignment5.txt' ]: data, labels, types = load_matrix_from_txt(file) splices = k_fold_split(10, data, labels) accuracies = [] for i in range(10): train_indexes = splices[i][0] test_indexes = splices[i][1] train_data = np.copy(data[train_indexes]) train_label = np.copy(labels[train_indexes]) test_data = np.copy(data[test_indexes]) test_label = np.copy(labels[test_indexes]) boost = AdaBoost() boost.train(train_data, train_label, types) class_result = boost.test(test_data) accuracy = compute_accuracy(class_result, test_label) accuracies.append(accuracy) print 'accuracy: %f' % accuracy print('file: {}, mean: {}, std: {}'.format(file, np.mean(accuracies), np.std(accuracies)))
def get_ab_errors(self, tx, ty, x, y): errors = [] for t in self.ts: a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t) a_boost.train(tx, ty) errors.append(a_boost.error(x, y, t)) return errors
def q_9(self): tx, ty = ex4_tools.generate_data(5000, noise_ratio=0) x, y = ex4_tools.generate_data(200, noise_ratio=0) i = 1 for t in self.ts: a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t) a_boost.train(tx, ty) plt.subplot(2, 3, i) ex4_tools.decision_boundaries(a_boost, x, y, t) i += 1 plt.show()
def q_10(self): tx, ty = ex4_tools.generate_data(5000, noise_ratio=0) x, y = ex4_tools.generate_data(200, noise_ratio=0) errors = self.get_ab_errors(tx, ty, x, y) min_t = np.argmin(errors) a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=self.ts[min_t]) a_boost.train(tx, ty) ex4_tools.decision_boundaries(a_boost, tx, ty, self.ts[min_t]) plt.title("min error is " + str(errors[min_t]) + " with " + str(self.ts[min_t]) + " classifiers") plt.show()
def Q10(): X, y = generate_data(1000, 0) T = [5, 10, 50, 100, 200, 500] i = int(np.argmin(Q9())) T_min = T[i] optimal_h = AdaBoost(DecisionStump, T_min) optimal_h.train(X, y) decision_boundaries(optimal_h, X, y, T_min) plt.title('Descision for T=500 that minimizing the test err') plt.savefig('Q10') plt.show()
def Q9(): X, y = generate_data(300, 0) h = AdaBoost(DecisionStump, 500) h.train(X, y) err = [0] * len(T) f = plt.figure(figsize=(10, 10)) for i, t in enumerate(T): f.add_subplot(3, 2, i + 1) err[i] = h.error(X, y, t) decision_boundaries(h, X, y, t) plt.savefig('Q9') plt.show() return np.array(err)
def Q3(): # AdaBoost T = [1,5,10,50,100,200] T_loop = [1,5,10] train_err = [] valid_err = [] plt.figure("decisions of the learned classifiers for T") num_graph = 0 for i in range(3,41): T_loop.append(i*5) for t in T_loop: ada_boost = AdaBoost(DecisionStump, t) ada_boost.train(x_train, y_train) if (t in T): num_graph += 1 plt.subplot(3,2, num_graph) decision_boundaries(ada_boost, x_train, y_train, "T = %d" %t) train_err.append(ada_boost.error(x_train, y_train)) valid_err.append(ada_boost.error(x_val, y_val)) plt.figure("training error and the validation error") plt.plot(T_loop, train_err, 'ro-', hold=False, label= "Training Error") plt.plot(T_loop, valid_err, 'go-', label= "Validation Error") plt.legend() plt.show() ''' find the T min, and plot it with training error ''' plt.figure("decision boundaries of T min, with the training data") T_hat = 5 * np.argmin(valid_err) ada_boost = AdaBoost(DecisionStump, T_hat) ada_boost.train(x_train, y_train) test_err = ada_boost.error(x_test, y_test) decision_boundaries(ada_boost, x_train, y_train, "T = %d" %T_hat) plt.show() print ("The value of T that minimizes the validation error is: ", T_hat) print("the test error of the corresponding classifier is: ", test_err) return
def Q8(): X, y = generate_data(5000, 0) h = AdaBoost(DecisionStump, 500) h.train(X, y) training_err = np.zeros((500, )) test_err = np.zeros((500, )) test_set, labels = generate_data(200, 0) for t in range(1, 501): training_err[t - 1] = h.error(X, y, t) test_err[t - 1] = h.error(test_set, labels, t) plt.plot(range(500), training_err, label='Training error') plt.plot(range(500), test_err, label='Test error') plt.title('question 8') plt.legend(loc='upper right') plt.xlabel('T') plt.ylabel('Error rate') plt.savefig('Q8') plt.show()
def Q3(): # AdaBoost path = "/cs/usr/kotek/PycharmProjects/iml_ex4/SynData/" X_train, y_train = read_from_txt(path + "X_train.txt", path + "y_train.txt") X_val, y_val = read_from_txt(path + "X_val.txt", path + "y_val.txt") X_test, y_test = read_from_txt(path + "X_test.txt", path + "y_test.txt") # -------- First part -------- T = np.arange(5, 105, step=5) T = np.append(T, np.array([200])) training_err = np.zeros(len(T)) validation_err = np.zeros(len(T)) # adaBoost uses a weighted trainer (WL) WL = ex4_tools.DecisionStump for i in range(len(T)): adaboost = AdaBoost(WL, T[i]) adaboost.train(X_train, y_train) training_err[i] = adaboost.error(X_train, y_train) validation_err[i] = adaboost.error(X_val, y_val) plt.plot(T, training_err, label="train error") plt.plot(T, validation_err, label="validation error") plt.legend() plt.show() # ------------------------ # # -------- Second part -------- decision_T = [1, 5, 10, 100, 200] plt.figure() plt.ion() for idx, t in enumerate(decision_T): adaboost = AdaBoost(WL, t) adaboost.train(X_train, y_train) plt.subplot(2, 3, idx + 1) ex4_tools.decision_boundaries(adaboost, X_train, y_train, "T=" + str(t)) plt.show() plt.pause(5)
def Q_adaboost(noise_ratio): X_train, y_train = generate_data(5000, noise_ratio) classifier = AdaBoost(DecisionStump, 500) classifier.train(X_train, y_train) X_test, y_test = generate_data(200, noise_ratio) vals = np.arange(1, 501) plt.plot(vals, [classifier.error(X_train, y_train, t) for t in vals], label='Training Error', lw=1, alpha=0.6) plt.plot(vals, [classifier.error(X_test, y_test, t) for t in vals], label='Test Error', lw=1, alpha=0.6) plt.legend() plt.title( f'Adaboost Training & Test Error according to T, noise={noise_ratio}') plt.show() boosts = [5, 10, 50, 100, 200, 500] for i in range(6): plt.subplot(2, 3, i + 1) decision_boundaries(classifier, X_test, y_test, boosts[i]) plt.title(f'T={boosts[i]}, noise={noise_ratio}') plt.show() test_errors = [classifier.error(X_test, y_test, t) for t in vals] min_t = np.argmin(test_errors) min_err = test_errors[min_t] # print(min_t, min_err) decision_boundaries(classifier, X_train, y_train, min_t) plt.title(f'min test_err {min_err} T={min_t} noise {noise_ratio}') plt.show() decision_boundaries(classifier, X_train, y_train, 499, classifier.D_of_last_iteration) plt.title(f'un-normalized weighed sample T=500, noise={noise_ratio}') plt.show() decision_boundaries( classifier, X_train, y_train, 499, classifier.D_of_last_iteration / np.max(classifier.D_of_last_iteration) * 100) plt.title(f'normalized weighed sample T=500, noise={noise_ratio}') plt.show()
def Q17(): train_images, test_images, train_labels, test_labels = load_images( '../Docs/') train_images = integral_image(train_images) test_images = integral_image(test_images) WL, T = WeakImageClassifier, 50 ada = AdaBoost(WL, T) ada.train(train_images, train_labels) T_range = np.arange(1, T) train_errs = [ada.error(train_images, train_labels, t) for t in T_range] test_errs = [ada.error(test_images, test_labels, t) for t in T_range] fig = plt.figure() fig.suptitle("Train vs Test error, Face Classifier") plt.xlabel('# of Hypotheses (T)') plt.ylabel('Error rate (%)') plt.plot(T_range, train_errs, label='Train Error') plt.plot(T_range, test_errs, label='Test Error') # plt.ylim(top=0.06) plt.legend() plt.savefig(FIG_DIR3 + 'q17') 'TODO complete this function'
def crossValidateAdaboost(inputFile, outputFile, nIterations): ticTacToe = TicTacToe(inputFile) avgEin = np.zeros(nIterations) avgEout = np.zeros(nIterations) for k in range(ticTacToe.N_FOLDS): ticTacToe.createTrainAndTestSets(k) adaboost = AdaBoost(ticTacToe) Ein, Eout = adaboost.train(ticTacToe, nIterations) avgEin = np.sum([avgEin, Ein], axis=0) avgEout = np.sum([avgEout, Eout], axis=0) print('--------------------------------------') return avgEin / ticTacToe.N_FOLDS, avgEout / ticTacToe.N_FOLDS
def Q8(noise=0.0): n_samples_train, n_samples_test, T = 5000, 200, 500 train_X, train_y = generate_data(n_samples_train, noise) test_X, test_y = generate_data(n_samples_test, noise) WL = DecisionStump ada = AdaBoost(WL, T) ada.train(train_X, train_y) T_range = np.arange(1, T) train_errs = [ada.error(train_X, train_y, t) for t in T_range] test_errs = [ada.error(test_X, test_y, t) for t in T_range] fig = plt.figure() fig.suptitle("Train vs Test error, Adaboost") plt.xlabel('# of Hypotheses (T)') plt.ylabel('Error rate (%)') plt.plot(T_range, train_errs, label='Train Error') plt.plot(T_range, test_errs, label='Test Error') # plt.ylim(top=0.06) plt.legend() plt.savefig(FIG_DIR3 + 'q8' + ('' if noise == 0 else '_' + str(noise).replace('.', '_'))) return ada, test_X, test_y, train_X, train_y 'TODO complete this function'
def _load_data(name): return np.loadtxt(_get_file_path('X_' + name)), np.loadtxt( _get_file_path('y_' + name)) if __name__ == '__main__': X_train, y_train = _load_data('train') X_val, y_val = _load_data('val') T_values = range(5, 200, 5) validation_error = [] training_error = [] for t in T_values: ada_boost = AdaBoost(DecisionStump, t) ada_boost.train(X_train, y_train) validation_error.append(ada_boost.error(X_val, y_val)) training_error.append(ada_boost.error(X_train, y_train)) training_error_plot, = plot(T_values, training_error, linestyle='--', label='training_error') validation_error_plot, = plot(T_values, validation_error, linestyle='--', label='validation_error') legend(handles=[training_error_plot, validation_error_plot]) title('training and validation error vs T values')
if myForest.isTrained: Xtest,yTest,XtestID = myForest.getDataFromFile(train_test_file) finalPredictions = myForest.predict(Xtest) myForest.writeToFile(XtestID,finalPredictions,'output.txt') print("Accuracy is: " ,sum(finalPredictions==yTest)/len(yTest)) else: print("Untrained model being tested") #train train-data.txt adaboost_model.txt adaboost #test test-data.txt adaboost_model.txt adaboost if model == 'adaboost' : if trainOrTest == 'train': myBoost = AdaBoost(300,verbose = False) TrainX,TrainY,TrainXID = myBoost.getDataFromFile(train_test_file) myBoost.train(TrainX,TrainY) pk.dump(myBoost,open(model_file,'wb')) if trainOrTest == 'test': try: myBoost = pk.load(open(model_file,'rb')) except: print("output file has not been generated") if myBoost.isTrained: Xtest,yTest,XtestID = myBoost.getDataFromFile(train_test_file) finalPredictions = myBoost.predict(Xtest) myBoost.writeToFile(XtestID,finalPredictions,'output.txt') print("Accuracy is: " ,sum(finalPredictions==yTest)/len(yTest)) else: print("Untrained model being tested")
[+1], [+1], [+1], ] ).transpose() Tag = Tag.flatten() for i in range(len(Tag)): if Tag[i] == 1: pyplot.plot(Original_Data[0][i], Original_Data[1][i], "+r", markersize=10) else: pyplot.plot(Original_Data[0][i], Original_Data[1][i], "+b", markersize=10) a = AdaBoost(Original_Data, Tag) a.train(100) TestCase = [[0.55, 1.1, 5.35], [4.4, 2.8, 0.9]] output = a.prediction(TestCase) for i in range(len(output)): if output[i] == 1: pyplot.plot(TestCase[0][i], TestCase[1][i], "or", markersize=20) else: pyplot.plot(TestCase[0][i], TestCase[1][i], "ob", markersize=20) pyplot.show()
algorithm='SAMME', n_estimators=no_base_classifiers, learning_rate=1.0) ## CV kf = KFold(n_splits=no_folds) cv_acc_arr = [] cv_sk_acc_arr = [] i = 0 for train_ind, test_ind in kf.split(X_train): print("cross split no", i) x_tr, x_te = X_train.copy()[train_ind], X_train.copy()[test_ind] y_tr, y_te = y_train.copy()[train_ind], y_train.copy()[test_ind] f.init(x_tr, y_tr) f.train(no_base_classifiers) y_predict = f.predict(x_te) accuracy = np.mean(y_predict == y_te) cv_acc_arr.append(accuracy) ## comparing sklearn implementation of boost boost.fit(x_tr, y_tr) y_pred = boost.predict(x_te) accuracy_sk = np.mean(y_pred == y_te) cv_sk_acc_arr.append(accuracy_sk) i += 1 print(np.mean(cv_acc_arr)) print(np.mean(cv_sk_acc_arr))
import numpy from adaboost import AdaBoost Original_Data = numpy.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]).transpose() Tag = numpy.array([ [+1], [+1], [+1], [-1], [-1], [-1], [+1], [+1], [+1], [-1], ]).transpose() Tag = Tag.flatten() a = AdaBoost(Original_Data, Tag) a.train(5)
This is the main of the AdaBoost algorithm. It contains a raw data of 10 point from 2 class. """ from adaboost import AdaBoost import pandas as pd import numpy as np from matplotlib import pyplot as plt data = pd.DataFrame(np.array([[88, 144, 1], [93, 232, 1], [136, 275, -1], [147, 131, -1], [159, 69, 1], [214, 31, 1], [214, 152, -1], [257, 83, 1], [307, 62, -1], [307, 231, -1]]), columns=["x", "y", "label"]) def display(): f1 = plt.figure(1) positive = data[data["label"] == 1] negative = data[data["label"] == -1] plt.scatter(positive.iloc[:, 0], positive.iloc[:, 1], c="red", marker="+") plt.scatter(negative.iloc[:, 0], negative.iloc[:, 1], c="green") plt.show() if __name__ == '__main__': m_ada = AdaBoost(data, 5) display() m_ada.train() m_ada.display()
cm_bright = ListedColormap(['#FF0000', '#0000FF']) #Get current axis and plot if ax is None: ax = plt.gca() ax.contourf(xx, yy, Z, 2, cmap='RdBu', alpha=.5) ax.contour(xx, yy, Z, 2, cmap='RdBu') ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright, s=scatter_weights * 40) ax.set_xlabel('$X_1$') ax.set_ylabel('$X_2$') boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier( max_depth=1, max_leaf_nodes=2), algorithm='SAMME', n_estimators=10, learning_rate=1.0) boost.fit(X, y) # plot_decision_boundary(boost, X,y, N = 50)#, weights) # plt.show() print(boost.score(X, y)) ### h = DecisionTreeClassifier(max_depth=1, max_leaf_nodes=2) f = AdaBoost(h) f.init(X, y) f.train(10) y_pred = boost.predict(X) accuracy = np.mean(y_pred == y) print(accuracy)
[1], [2], [3], [4], [5], [6], [7], [8], [9] ]).transpose() Tag = numpy.array([ [+1], [+1], [+1], [-1], [-1], [-1], [+1], [+1], [+1], [-1], ]).transpose() Tag = Tag.flatten() a = AdaBoost(Original_Data, Tag) a.train(5)
import numpy as np from adaboost import AdaBoost X=np.array([i for i in range(0,10)]).reshape(1,10) Y=[1,1,1,-1,-1,-1,1,1,1,-1] ada = AdaBoost(X,Y) ada.train(4) print ada.pred(X) == np.array(Y)
from mapReduce import reduce map(Face, nonFace) _mat = reduce() mat = _mat featureNum, sampleNum = _mat.shape assert sampleNum == (POSITIVE_SAMPLE + NEGATIVE_SAMPLE) assert featureNum == FEATURE_NUM Label_Face = [+1 for i in xrange(POSITIVE_SAMPLE)] Label_NonFace = [-1 for i in xrange(NEGATIVE_SAMPLE)] label = numpy.array(Label_Face + Label_NonFace) cache_filename = ADABOOST_CACHE_FILE + str(0) if os.path.isfile(cache_filename): model = getCachedAdaBoost(mat = _mat, label = label, filename= cache_filename, limit = ADABOOST_LIMIT) else: model = AdaBoost(mat, label, limit = ADABOOST_LIMIT) model.train() model.saveModel(cache_filename) print model
if train_or_test == "train": KNN.knn_training(input_data_file, model_file) elif train_or_test == "test": KNN.knn_testing(model_file, input_data_file) elif model in ["tree", "best"]: if train_or_test == "train": # Train model data_vector, all_image_ids, images_counter = parse_image_data( file_path=input_data_file) trained_decision_tree = AdaBoost( images_data_vector=data_vector, all_images_ids=all_image_ids, images_counter=images_counter, decision_stumps=30, ) trained_decision_tree.train() save_model_to_pickle(obj=trained_decision_tree, file_name="tree.pkl") save_model_to_txt( obj=trained_decision_tree, file_name="tree_model.txt", model="adaboost_decision_tree", ) # Test after train trained_decision_tree.test(test_file_path="test_file.txt") else: # Test trained_decision_tree = load_model_from_pickle( file_name="tree.pkl") trained_decision_tree.test(test_file_path="test_file.txt")
#if i % 150 == 0: haarGroup = [] for j in range(i * SAMPLE_NUM, (i + 1) * SAMPLE_NUM): haarGroup.append(float(tmp[j])) Original_Data.append(haarGroup) Original_Data = numpy.array(Original_Data) fileObj.close() SampleDem = Original_Data.shape[0] SampleNum = Original_Data.shape[1] assert SampleNum == (POSITIVE_SAMPLE + NEGATIVE_SAMPLE) Label_Face = [+1 for i in range(POSITIVE_SAMPLE)] Label_NonFace = [-1 for i in range(NEGATIVE_SAMPLE)] Label = numpy.array(Label_Face + Label_NonFace) a = AdaBoost(Original_Data, Label) try: a.train(200) except KeyboardInterrupt: print "You pressed interrupt key. Training process interrupt." saveModel(a)
[+1], [+1], [+1], [-1], [-1], [-1], [-1], [-1], [-1], [-1], [-1], [-1], [-1], [+1], [+1], [+1]]).transpose() Tag = Tag.flatten() for i in range(len(Tag)): if Tag[i] == 1: pyplot.plot(Original_Data[0][i], Original_Data[1][i], \ '+r', markersize = 10) else: pyplot.plot(Original_Data[0][i], Original_Data[1][i], \ '+b', markersize = 10) a = AdaBoost(Original_Data, Tag) a.train(100) TestCase = [[0.55, 1.1, 5.35, 7.0, 8.5, -1.0, 3.0, 3.0, 4.0, 2, 3], [4.4, 2.8, 0.9, -12, -13, -9, -10, -9, -5, 0, 2.5]] output = a.prediction(TestCase) for i in range(len(output)): if output[i] == 1: pyplot.plot(TestCase[0][i], TestCase[1][i], \ 'or', markersize = 20) else: pyplot.plot(TestCase[0][i], TestCase[1][i], \ 'ob', markersize = 20) pyplot.show()
for j in range(i * SAMPLE_NUM , (i+1) * SAMPLE_NUM): haarGroup.append(float(tmp[j])) Original_Data.append(haarGroup) Original_Data = numpy.array(Original_Data) fileObj.close() SampleDem = Original_Data.shape[0] SampleNum = Original_Data.shape[1] assert SampleNum == (POSITIVE_SAMPLE + NEGATIVE_SAMPLE) Label_Face = [+1 for i in range(POSITIVE_SAMPLE)] Label_NonFace = [-1 for i in range(NEGATIVE_SAMPLE)] Label = numpy.array(Label_Face + Label_NonFace) a = AdaBoost(Original_Data, Label) try: a.train(200) except KeyboardInterrupt: print "You pressed interrupt key. Training process interrupt." saveModel(a)
import numpy as np from adaboost import AdaBoost, AdaBoostTextbook from utils import Dataset def test(model, dataset, name): X_test, y_test = dataset.get_dataset() pred = np.array([model.predict(x) for x in X_test]) accuracy = (y_test == pred).sum() / y_test.size print(f'{name} version accuracy: {accuracy:.1f}') if __name__ == '__main__': dataset = Dataset('./training-data.txt') test_dataset = Dataset('./testing-data.txt') model = AdaBoost(9) model.train(dataset) accuracy = test(model, test_dataset, 'Original') model_tb = AdaBoostTextbook(9) model_tb.train(dataset, 0.2, 2) accuracy_tb = test(model_tb, test_dataset, 'Textbook')
Just Enjoy it. """ import numpy import matplotlib.pyplot as pyplot from adaboost import AdaBoost from sklearn import datasets """ Samples for AdaBoost """ Original_Data, Tag = datasets.make_hastie_10_2(n_samples = 200, random_state = 1) Original_Data = Original_Data.transpose() for i in range(len(Tag)): if Tag[i] == 1: pyplot.plot(Original_Data[0][i], Original_Data[1][i], \ '+r', markersize = 10) else: pyplot.plot(Original_Data[0][i], Original_Data[1][i], \ '+b', markersize = 10) pyplot.title("Sample Points") pyplot.show() a = AdaBoost(Original_Data, Tag) a.train(10000)
class ex5: def __init__(self): self.mean = [0, 0] self.cov = np.eye(2) self.svm = SVC(C=1e10, kernel='linear') self.perceptron = None self.a_boost = None self.svm_accs = [] self.perceptrone_accs = [] self.ms = [5, 10, 15, 25, 70] self.ts = [5, 10, 50, 100, 200, 500] def q_3_4_5(self): for m in self.ms: self.calculate_for_m(m) plt.plot(self.ms, self.perceptrone_accs) plt.plot(self.ms, self.svm_accs) plt.legend(("perceptron", "svd")) plt.show() def calculate_for_m(self, m): x = np.random.multivariate_normal(self.mean, self.cov, m) real_labels = self.get_real_labels(x) labeled_1_x, labeled_min_1_x = self.get_x_by_labels(x, real_labels) t = np.arange(int(x.min()) - 1, int(x.max()) + 1, 0.1) self.plt_xs(labeled_1_x, labeled_min_1_x, t) self.perceptron = Perceptron() perc_w = self.perceptron.fit(x, real_labels) plt.plot(t, self.get_y(perc_w[:-1], perc_w[-1], t)) self.svm.fit(x, real_labels) plt.plot(t, self.get_y(self.svm._get_coef()[0], self.svm.intercept_, t)) plt.legend(["True labels", "perceptron", "svm"]) plt.show() self.calculate_svm_perc_acc() def get_real_labels(self, x): labels = [] for j in x: labels.append(self.f(j)) return labels def get_x_by_labels(self, x, labels): x_1, x_minus_1 = [], [] for i in range(len(x)): if labels[i] == 1.0: x_1.append(x[i]) elif labels[i] == -1.0: x_minus_1.append(x[i]) else: pass return x_1, x_minus_1 def f(self, x): return np.sign(np.dot([0.3, -0.5], x) + 0.1) def plt_xs(self, labeled_1_x, labeled_min_1_x, t): plt.scatter([x[0] for x in labeled_1_x], [x[1] for x in labeled_1_x]) plt.scatter([x[0] for x in labeled_min_1_x], [x[1] for x in labeled_min_1_x]) plt.plot(t, self.get_y([0.3, -0.5], 0.1, t)) def get_y(self, w, b, x): y = [] for i in x: y.append(-w[0] * i / w[1] + b / -w[1]) return y def calculate_svm_perc_acc(self): s, p = self.get_svm_prec_acc() self.perceptrone_accs.append(p / 500) self.svm_accs.append(s / 500) def get_svm_prec_acc(self): svm_acc, perceptrone_acc = 0, 0 for i in range(500): x = np.random.multivariate_normal(self.mean, self.cov, 10000) real_labels = [] for j in x: real_labels.append(self.f(j)) svm_acc += self.svm.score(x, real_labels) perceptrone_acc += self.perceptron.score(x, real_labels) return svm_acc, perceptrone_acc def q_7_8_9_10(self): self.q_8() self.q_9() self.q_10() def q_8(self): tx, ty = ex4_tools.generate_data(5000, noise_ratio=0) x, y = ex4_tools.generate_data(200, noise_ratio=0) self.a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=500) self.a_boost.train(tx, ty) training_errs, test_errs = self.get_ab_errs(tx, ty, x, y) self.plt_q_8(training_errs, test_errs) def get_ab_errs(self, tx, ty, x, y): training_errs, test_errs = [], [] for i in range(500): training_errs.append(self.a_boost.error(tx, ty, i)) test_errs.append(self.a_boost.error(x, y, i)) return training_errs, test_errs def plt_q_8(self, training_errs, test_errs): plt.plot(np.arange(500), training_errs, label="training error") plt.plot(np.arange(500), test_errs, label="test error") plt.title("Adaboost errors as function of (T)") plt.legend() plt.show() def q_9(self): tx, ty = ex4_tools.generate_data(5000, noise_ratio=0) x, y = ex4_tools.generate_data(200, noise_ratio=0) i = 1 for t in self.ts: a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t) a_boost.train(tx, ty) plt.subplot(2, 3, i) ex4_tools.decision_boundaries(a_boost, x, y, t) i += 1 plt.show() def q_10(self): tx, ty = ex4_tools.generate_data(5000, noise_ratio=0) x, y = ex4_tools.generate_data(200, noise_ratio=0) errors = self.get_ab_errors(tx, ty, x, y) min_t = np.argmin(errors) a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=self.ts[min_t]) a_boost.train(tx, ty) ex4_tools.decision_boundaries(a_boost, tx, ty, self.ts[min_t]) plt.title("min error is " + str(errors[min_t]) + " with " + str(self.ts[min_t]) + " classifiers") plt.show() def get_ab_errors(self, tx, ty, x, y): errors = [] for t in self.ts: a_boost = AdaBoost(WL=ex4_tools.DecisionStump, T=t) a_boost.train(tx, ty) errors.append(a_boost.error(x, y, t)) return errors
# encoding=utf-8 # @Author: wendesi # @Date: 15-11-16 # @Email: [email protected] # @Last modified by: wendesi # @Last modified time: 15-11-16 import logging from generate_dataset import * from adaboost import AdaBoost from sklearn.metrics import accuracy_score if __name__ == '__main__': logger = logging.getLogger() logger.setLevel(logging.DEBUG) train_features, train_labels, test_features, test_labels = generate_dataset( 200) ada = AdaBoost() ada.train(train_features, train_labels) print 'end train' test_predict = ada.predict(test_features) score = accuracy_score(test_labels, test_predict) print "ada boost the accruacy socre is ", score