def cross_validation(iter_): data_num, data_size = train_data_.shape fold_size = int(data_num / N_fold) residual = data_num - fold_size * N_fold min_error = math.inf train_data = np.zeros((data_num - fold_size, data_size)) train_label = np.zeros(data_num - fold_size) test_data = np.zeros((fold_size, data_size)) test_label = np.zeros(fold_size) total_error = 0 for i in range(0, N_fold): if i == 0: train_data[:, :] = train_data_[fold_size:, :] train_label[:] = train_label_[fold_size:] test_data[:, :] = train_data_[0:fold_size, :] test_label[:] = train_label_[0:fold_size] else: train_data[:, :] = np.append(train_data_[0:i * fold_size, :], train_data_[(i + 1) * fold_size:, :], axis=0) train_label[:] = np.append(train_label_[0:i * fold_size], train_label_[(i + 1) * fold_size:]) test_data[:, :] = train_data_[i * fold_size:(i + 1) * fold_size, :] test_label[:] = train_label_[i * fold_size:(i + 1) * fold_size] best = adaboost.Adaboost(train_data, train_label, 'validation', iter_) error = adaboost.adaClassify(test_data, test_label, 'validation', best) total_error = total_error + error CV_error = total_error / N_fold print('[Result] Cross-Validation Error of T =', iter_, 'is', CV_error) return iter_, CV_error
def fit(self, trainFeatures, trainLabels): self.number_feature = int(np.sqrt(trainFeatures.shape[1])) self.list_subfeatures = np.ndarray(shape=(self.rounds, self.number_feature )) for i in range(self.rounds): features_selected = np.random.choice(trainFeatures.columns.values, self.number_feature, replace=False) bootstraped_index = np.random.choice(trainFeatures.index.values, self.bootstrap_percentage * trainFeatures.shape[0]) self.list_subfeatures[i] = features_selected my_adaboost = adaboost.Adaboost(self.iteration, random=self.random) my_adaboost.fit(trainFeatures.ix[bootstraped_index][features_selected], trainLabels.ix[bootstraped_index]) self.trainError[i] = my_adaboost.error(, y, T) self.list_adaboost_classifier = np.append(self.list_adaboost_classifier, [my_adaboost]) if self.weighted: self.weights = np.append(self.weights, [my_adaboost.score(trainFeatures.ix[np.delete(trainFeatures.index.values, bootstraped_index)][features_selected], trainLabels.ix[np.delete(trainFeatures.index.values, bootstraped_index)])]) return self.weights, self.trainError
def main(): training_data_face, training_data_nonface, testing_data_face, testing_data_nonface = dataloadaer.loadandwrite() training_data=training_data_face+training_data_nonface training_output_face=np.full(500,1) training_output_nonface=np.full(500,-1) training_output=np.concatenate((training_output_face,training_output_nonface)) feature_coords, feature_type, max_index_arr, haar_feature_stack, haar_features_stack_face, haar_features_stack_nonface= generate_weak_classifier(training_data_face, training_data_nonface) model=adaboost.Adaboost(len(training_data), haar_feature_stack) for t in range(0,20): model.calculate_weighted_error(haar_features_stack_face, haar_features_stack_nonface) model.calculate_alpha() model.update_weight(training_output) boosted_feature_coords_display=np.take(feature_coords,model.haar_index[0:10]) boosted_feature_type=np.take(feature_type,model.haar_index) boosted_feature_coords=np.take(feature_coords,model.haar_index) utilities.draw_and_plot_haar_features(training_data_face[0],boosted_feature_coords_display) face_predict, non_face_predict=model.test(testing_data_face,testing_data_nonface,boosted_feature_coords,boosted_feature_type) utilities.plotROG(face_predict, non_face_predict)
print('Show the first and last images of training dataset') fig, ax = plt.subplots(1, 2) ax[0].axis('off') ax[0].set_title('Face') ax[0].imshow(trainData[1][0], cmap='gray') ax[1].axis('off') ax[1].set_title('Non face') ax[1].imshow(trainData[-1][0], cmap='gray') plt.show() # Part 2: Implement selectBest function in adaboost.py and test the following code. # Part 3: Modify difference values at parameter T of the Adaboost algorithm. # And find better results. Please test value 1~10 at least. # print('Start training your classifier') clf = adaboost.Adaboost(T=1) clf.train(trainData) print('\nEvaluate your classifier with training dataset') utils.evaluate(clf, trainData) print('\nEvaluate your classifier with test dataset') utils.evaluate(clf, testData) # Part 4: Implement detect function in detection.py and test the following code. print('\nDetect faces at the assigned lacation using your classifier') detection.detect('data/detect/detectData.txt', clf) # Part 5: Test classifier on your own images print('\nDetect faces on your own images') detection.detect('yourOwnImages', clf)
print ten_fold(cls, examples, labels) print bootstrap(cls, examples, labels) cls = decision_tree.Decision_Tree(split_method="mce") examples, labels = car_data.read_car_data() print holdout(cls, examples, labels) print ten_fold(cls, examples, labels) print bootstrap(cls, examples, labels) cls = decision_tree.Decision_Tree(split_method="gini") examples, labels = car_data.read_car_data() print holdout(cls, examples, labels) print ten_fold(cls, examples, labels) print bootstrap(cls, examples, labels) cls = decision_tree.Decision_Tree(pruning_method="post", pruning_rate=0.8) examples, labels = car_data.read_car_data() print holdout(cls, examples, labels) print ten_fold(cls, examples, labels) print bootstrap(cls, examples, labels) cls = decision_tree.Decision_Tree(pruning_method="pre", pruning_rate=0.8) examples, labels = car_data.read_car_data() print holdout(cls, examples, labels) print ten_fold(cls, examples, labels) print bootstrap(cls, examples, labels) """ cls = adaboost.Adaboost(decision_tree.Decision_Tree, 5) examples, labels = car_data.read_car_data() print holdout(cls, examples, labels) print ten_fold(cls, examples, labels) print bootstrap(cls, examples, labels)
import heapq import numpy as np from nearest import Knn from nnet import NeuralNetwork from best import Best import adaboost train_or_test, file_name, model_file, model = sys.argv[1:] if train_or_test == 'train': if model == 'nearest': knn = Knn() knn.train(file_name, model_file) elif model == 'adaboost': trial = adaboost.Adaboost(file_name, None) trial.training = file_name train_pixels = trial.prepare_data(trial.training)[0] weights = trial.train(train_pixels) weight_values = weights.values() for i in weights: weights[i] += 10 with open(model_file, 'w') as myfile: for i in weights: if i == trial.learner1: myfile.write('%s %.9f\n' % ('learner1', weights[i])) if i == trial.learner2: myfile.write('%s %.9f\n' % ('learner2', weights[i])) elif model == 'nnet': nnet = NeuralNetwork()
def TryOuts(indexes_to_add_feature_noise, max_stds, pPos, pNeg): reload(noising) reload(utils) Val_error = np.empty(0) for p in range(number_of_tries): # Add noise on features (before scaling) noisy_data = data for idx in indexes_to_add_feature_noise: std = max_stds.ix[idx] if std > 0: noisy_data[idx] = noising.addNormalNoise( noisy_data[idx], avg, std) # Prepare dataset noisy_data["Y"] = np.where(noisy_data[Y_index] == 0, -1, 1) noisy_data = noisy_data.drop(Y_index, 1) noisy_data[noisy_data.drop("Y", 1).columns.values] = scale( noisy_data[noisy_data.drop("Y", 1).columns.values]) # Split train / test X_train, X_test, y_train, y_test = train_test_split(noisy_data.drop( "Y", 1), noisy_data["Y"], test_size=0.33) # Add noise on labels (On training set ! otherwise doesn't make sense) y_train = noising.switchLabels(y_train, pPos, pNeg) # Cross Validate best_T, cross_validate_accuracy_training = utils.cross_validate_adaboost_T( X_train, y_train, Ts, None, None, 0, False) # Val. Error my_adaboost = adaboost.Adaboost(best_T) my_adaboost.fit(X_train, y_train) error_val = 1 - my_adaboost.score(X_test, y_test) Val_error = np.append(Val_error, error_val) #print " Round : {0}, Best T : {1}, train error : {2}, val. error : {3}".format(p + 1, best_T, 1 - cross_validate_accuracy_training, error_val) print("Adaboost Avg. val error : {0}, Std. : {1}".format( Val_error.mean(), Val_error.std())) Val_error = np.empty(0) for p in range(number_of_tries): # Add noise on features (before scaling) noisy_data = data for idx in indexes_to_add_feature_noise: std = max_stds.ix[idx] if std > 0: noisy_data[idx] = noising.addNormalNoise( noisy_data[idx], avg, std) # Prepare dataset noisy_data["Y"] = np.where(noisy_data[Y_index] == 0, -1, 1) noisy_data = noisy_data.drop(Y_index, 1) noisy_data[noisy_data.drop("Y", 1).columns.values] = scale( noisy_data[noisy_data.drop("Y", 1).columns.values]) # Split train / test X_train, X_test, y_train, y_test = train_test_split(noisy_data.drop( "Y", 1), noisy_data["Y"], test_size=0.33) # Add noise on labels (On training set ! otherwise doesn't make sense) y_train = noising.switchLabels(y_train, pPos, pNeg) # Cross Validate best_C, best_T, cross_validate_accuracy_training = utils.cross_validate_adaboost_T_C( X_train, y_train, Ts, 2, Cs, 0, False) # Val. Error my_adaboost = adaboost.Adaboost(best_T, 2, best_C, 0) my_adaboost.fit(X_train, y_train) error_val = 1 - my_adaboost.score(X_test, y_test) Val_error = np.append(Val_error, error_val) #print " Round : {0}, Best T / C : {1}, {2}, train error : {3}, val. error : {4}".format(p + 1, best_T, best_C, 1 - cross_validate_accuracy_training, error_val) print("L1 regulariazed Adaboost (paper) Avg. val error : {0}, Std. : {1}". format(Val_error.mean(), Val_error.std())) Val_error = np.empty(0) for p in range(number_of_tries): # Add noise on features (before scaling) noisy_data = data for idx in indexes_to_add_feature_noise: std = max_stds.ix[idx] if std > 0: noisy_data[idx] = noising.addNormalNoise( noisy_data[idx], avg, std) # Prepare dataset noisy_data["Y"] = np.where(noisy_data[Y_index] == 0, -1, 1) noisy_data = noisy_data.drop(Y_index, 1) noisy_data[noisy_data.drop("Y", 1).columns.values] = scale( noisy_data[noisy_data.drop("Y", 1).columns.values]) # Split train / test X_train, X_test, y_train, y_test = train_test_split(noisy_data.drop( "Y", 1), noisy_data["Y"], test_size=0.33) # Add noise on labels (On training set ! otherwise doesn't make sense) y_train = noising.switchLabels(y_train, pPos, pNeg) # Cross Validate best_C, best_T, cross_validate_accuracy_training = utils.cross_validate_adaboost_T_C( X_train, y_train, Ts, 2, Cs, 1, False) # Val. Error my_adaboost = adaboost.Adaboost(best_T, 2, best_C, 1) my_adaboost.fit(X_train, y_train) error_val = 1 - my_adaboost.score(X_test, y_test) Val_error = np.append(Val_error, error_val) #print " Round : {0}, Best T : {1}, train error : {2}, val. error : {3}".format(p + 1, best_T, 1 - cross_validate_accuracy_training, error_val) print("Adaboost v1 Avg. val error : {0}, Std. : {1}".format( Val_error.mean(), Val_error.std())) Val_error = np.empty(0) for p in range(number_of_tries): # Add noise on features (before scaling) noisy_data = data for idx in indexes_to_add_feature_noise: std = max_stds.ix[idx] if std > 0: noisy_data[idx] = noising.addNormalNoise( noisy_data[idx], avg, std) # Prepare dataset noisy_data["Y"] = np.where(noisy_data[Y_index] == 0, -1, 1) noisy_data = noisy_data.drop(Y_index, 1) noisy_data[noisy_data.drop("Y", 1).columns.values] = scale( noisy_data[noisy_data.drop("Y", 1).columns.values]) # Split train / test X_train, X_test, y_train, y_test = train_test_split(noisy_data.drop( "Y", 1), noisy_data["Y"], test_size=0.33) # Add noise on labels (On training set ! otherwise doesn't make sense) y_train = noising.switchLabels(y_train, pPos, pNeg) # Cross Validate best_C, best_T, cross_validate_accuracy_training = utils.cross_validate_adaboost_T_C( X_train, y_train, Ts, 3, Cs, 1, False) # Val. Error my_adaboost = adaboost.Adaboost(best_T, 3, best_C, 1) my_adaboost.fit(X_train, y_train) error_val = 1 - my_adaboost.score(X_test, y_test) Val_error = np.append(Val_error, error_val) #print " Round : {0}, Best T : {1}, train error : {2}, val. error : {3}".format(p + 1, best_T, 1 - cross_validate_accuracy_training, error_val) print("Adaboost v1 pow 3Avg. val error : {0}, Std. : {1}".format( Val_error.mean(), Val_error.std()))
writer = csv.writer(f) writer.writerows(CV_error_list) min_error = math.inf best_T = 0 for i in range(1, validation_iter + 1): if (min_error > CV_error_list[i - 1][1]): min_error = CV_error_list[i - 1][1] best_T = CV_error_list[i - 1][0] print('Choose optimal T =', best_T, '\n') print('Time cost for cross-validation =', (time.time() - start_time) / 60, 'mins') # start our Adaboost best_hypothesis = adaboost.Adaboost(train_data_, train_label_, 'training', best_T) out_hypothesis = [] for i in range(best_T): out_hypothesis.append([ best_hypothesis[i]['iter'], best_hypothesis[i]['dim'], best_hypothesis[i]['thresh'], best_hypothesis[i]['inequal'], best_hypothesis[i]['alpha'] ]) with open(str(N_fold) + '_fold_output_AdaBoost_hypothesis_header.csv', 'w', newline='') as f: w = csv.writer(f) w.writerow([ 'iteration_index', 'attribute_index', 'threshold', 'direction', 'boosting_parameter'