Ejemplo n.º 1
0
def cross_validation(iter_):
    data_num, data_size = train_data_.shape
    fold_size = int(data_num / N_fold)
    residual = data_num - fold_size * N_fold
    min_error = math.inf
    train_data = np.zeros((data_num - fold_size, data_size))
    train_label = np.zeros(data_num - fold_size)
    test_data = np.zeros((fold_size, data_size))
    test_label = np.zeros(fold_size)
    total_error = 0
    for i in range(0, N_fold):
        if i == 0:
            train_data[:, :] = train_data_[fold_size:, :]
            train_label[:] = train_label_[fold_size:]
            test_data[:, :] = train_data_[0:fold_size, :]
            test_label[:] = train_label_[0:fold_size]
        else:
            train_data[:, :] = np.append(train_data_[0:i * fold_size, :],
                                         train_data_[(i + 1) * fold_size:, :],
                                         axis=0)
            train_label[:] = np.append(train_label_[0:i * fold_size],
                                       train_label_[(i + 1) * fold_size:])
            test_data[:, :] = train_data_[i * fold_size:(i + 1) * fold_size, :]
            test_label[:] = train_label_[i * fold_size:(i + 1) * fold_size]

        best = adaboost.Adaboost(train_data, train_label, 'validation', iter_)
        error = adaboost.adaClassify(test_data, test_label, 'validation', best)
        total_error = total_error + error
    CV_error = total_error / N_fold
    print('[Result] Cross-Validation Error of T =', iter_, 'is', CV_error)

    return iter_, CV_error
Ejemplo n.º 2
0
    def fit(self, trainFeatures, trainLabels):
        self.number_feature = int(np.sqrt(trainFeatures.shape[1]))
        self.list_subfeatures = np.ndarray(shape=(self.rounds, self.number_feature ))

        for i in range(self.rounds):
            features_selected = np.random.choice(trainFeatures.columns.values, self.number_feature, replace=False)
            bootstraped_index = np.random.choice(trainFeatures.index.values, self.bootstrap_percentage * trainFeatures.shape[0])
            self.list_subfeatures[i] = features_selected
            my_adaboost = adaboost.Adaboost(self.iteration, random=self.random)
            my_adaboost.fit(trainFeatures.ix[bootstraped_index][features_selected], trainLabels.ix[bootstraped_index])
            self.trainError[i] = my_adaboost.error(, y, T)
            self.list_adaboost_classifier = np.append(self.list_adaboost_classifier, [my_adaboost])
            if self.weighted:
                self.weights = np.append(self.weights, [my_adaboost.score(trainFeatures.ix[np.delete(trainFeatures.index.values, bootstraped_index)][features_selected], trainLabels.ix[np.delete(trainFeatures.index.values, bootstraped_index)])])
        return self.weights, self.trainError
Ejemplo n.º 3
0
def main():

    training_data_face, training_data_nonface, testing_data_face, testing_data_nonface = dataloadaer.loadandwrite()
    training_data=training_data_face+training_data_nonface
    training_output_face=np.full(500,1)
    training_output_nonface=np.full(500,-1)
    training_output=np.concatenate((training_output_face,training_output_nonface))
    feature_coords, feature_type, max_index_arr, haar_feature_stack, haar_features_stack_face, haar_features_stack_nonface= generate_weak_classifier(training_data_face, training_data_nonface)
    model=adaboost.Adaboost(len(training_data), haar_feature_stack)

    for t in range(0,20):
        model.calculate_weighted_error(haar_features_stack_face, haar_features_stack_nonface)
        model.calculate_alpha()
        model.update_weight(training_output)

    boosted_feature_coords_display=np.take(feature_coords,model.haar_index[0:10])
    boosted_feature_type=np.take(feature_type,model.haar_index)
    boosted_feature_coords=np.take(feature_coords,model.haar_index)
    utilities.draw_and_plot_haar_features(training_data_face[0],boosted_feature_coords_display)
    face_predict, non_face_predict=model.test(testing_data_face,testing_data_nonface,boosted_feature_coords,boosted_feature_type)
    utilities.plotROG(face_predict, non_face_predict)
Ejemplo n.º 4
0
print('Show the first and last images of training dataset')
fig, ax = plt.subplots(1, 2)
ax[0].axis('off')
ax[0].set_title('Face')
ax[0].imshow(trainData[1][0], cmap='gray')
ax[1].axis('off')
ax[1].set_title('Non face')
ax[1].imshow(trainData[-1][0], cmap='gray')
plt.show()

# Part 2: Implement selectBest function in adaboost.py and test the following code.
# Part 3: Modify difference values at parameter T of the Adaboost algorithm.
# And find better results. Please test value 1~10 at least.
# print('Start training your classifier')
clf = adaboost.Adaboost(T=1)
clf.train(trainData)

print('\nEvaluate your classifier with training dataset')
utils.evaluate(clf, trainData)

print('\nEvaluate your classifier with test dataset')
utils.evaluate(clf, testData)

# Part 4: Implement detect function in detection.py and test the following code.
print('\nDetect faces at the assigned lacation using your classifier')
detection.detect('data/detect/detectData.txt', clf)

# Part 5: Test classifier on your own images
print('\nDetect faces on your own images')
detection.detect('yourOwnImages', clf)
Ejemplo n.º 5
0
    print ten_fold(cls, examples, labels)
    print bootstrap(cls, examples, labels)
    cls = decision_tree.Decision_Tree(split_method="mce")
    examples, labels = car_data.read_car_data()
    print holdout(cls, examples, labels)
    print ten_fold(cls, examples, labels)
    print bootstrap(cls, examples, labels)
    cls = decision_tree.Decision_Tree(split_method="gini")
    examples, labels = car_data.read_car_data()
    print holdout(cls, examples, labels)
    print ten_fold(cls, examples, labels)
    print bootstrap(cls, examples, labels)
    cls = decision_tree.Decision_Tree(pruning_method="post",
                                      pruning_rate=0.8)
    examples, labels = car_data.read_car_data()
    print holdout(cls, examples, labels)
    print ten_fold(cls, examples, labels)
    print bootstrap(cls, examples, labels)
    cls = decision_tree.Decision_Tree(pruning_method="pre",
                                      pruning_rate=0.8)
    examples, labels = car_data.read_car_data()
    print holdout(cls, examples, labels)
    print ten_fold(cls, examples, labels)
    print bootstrap(cls, examples, labels)
    """
    cls = adaboost.Adaboost(decision_tree.Decision_Tree, 5)
    examples, labels = car_data.read_car_data()
    print holdout(cls, examples, labels)
    print ten_fold(cls, examples, labels)
    print bootstrap(cls, examples, labels)
Ejemplo n.º 6
0
import heapq
import numpy as np
from nearest import Knn
from nnet import NeuralNetwork
from best import Best
import adaboost

train_or_test, file_name, model_file, model = sys.argv[1:]

if train_or_test == 'train':
    if model == 'nearest':
        knn = Knn()
        knn.train(file_name, model_file)

    elif model == 'adaboost':
        trial = adaboost.Adaboost(file_name, None)
        trial.training = file_name
        train_pixels = trial.prepare_data(trial.training)[0]
        weights = trial.train(train_pixels)
        weight_values = weights.values()
        for i in weights:
            weights[i] += 10

        with open(model_file, 'w') as myfile:
            for i in weights:
                if i == trial.learner1:
                    myfile.write('%s %.9f\n' % ('learner1', weights[i]))
                if i == trial.learner2:
                    myfile.write('%s %.9f\n' % ('learner2', weights[i]))
    elif model == 'nnet':
        nnet = NeuralNetwork()
def TryOuts(indexes_to_add_feature_noise, max_stds, pPos, pNeg):
    reload(noising)
    reload(utils)

    Val_error = np.empty(0)
    for p in range(number_of_tries):
        # Add noise on features (before scaling)
        noisy_data = data
        for idx in indexes_to_add_feature_noise:
            std = max_stds.ix[idx]
            if std > 0:
                noisy_data[idx] = noising.addNormalNoise(
                    noisy_data[idx], avg, std)
        # Prepare dataset
        noisy_data["Y"] = np.where(noisy_data[Y_index] == 0, -1, 1)
        noisy_data = noisy_data.drop(Y_index, 1)
        noisy_data[noisy_data.drop("Y", 1).columns.values] = scale(
            noisy_data[noisy_data.drop("Y", 1).columns.values])
        # Split train / test
        X_train, X_test, y_train, y_test = train_test_split(noisy_data.drop(
            "Y", 1),
                                                            noisy_data["Y"],
                                                            test_size=0.33)
        # Add noise on labels (On training set ! otherwise doesn't make sense)
        y_train = noising.switchLabels(y_train, pPos, pNeg)
        # Cross Validate
        best_T, cross_validate_accuracy_training = utils.cross_validate_adaboost_T(
            X_train, y_train, Ts, None, None, 0, False)
        # Val. Error
        my_adaboost = adaboost.Adaboost(best_T)
        my_adaboost.fit(X_train, y_train)
        error_val = 1 - my_adaboost.score(X_test, y_test)
        Val_error = np.append(Val_error, error_val)
        #print " Round : {0}, Best T : {1}, train error : {2}, val. error : {3}".format(p + 1, best_T, 1 - cross_validate_accuracy_training, error_val)

    print("Adaboost Avg. val error : {0}, Std. : {1}".format(
        Val_error.mean(), Val_error.std()))

    Val_error = np.empty(0)
    for p in range(number_of_tries):
        # Add noise on features (before scaling)
        noisy_data = data
        for idx in indexes_to_add_feature_noise:
            std = max_stds.ix[idx]
            if std > 0:
                noisy_data[idx] = noising.addNormalNoise(
                    noisy_data[idx], avg, std)
        # Prepare dataset
        noisy_data["Y"] = np.where(noisy_data[Y_index] == 0, -1, 1)
        noisy_data = noisy_data.drop(Y_index, 1)
        noisy_data[noisy_data.drop("Y", 1).columns.values] = scale(
            noisy_data[noisy_data.drop("Y", 1).columns.values])
        # Split train / test
        X_train, X_test, y_train, y_test = train_test_split(noisy_data.drop(
            "Y", 1),
                                                            noisy_data["Y"],
                                                            test_size=0.33)
        # Add noise on labels (On training set ! otherwise doesn't make sense)
        y_train = noising.switchLabels(y_train, pPos, pNeg)
        # Cross Validate
        best_C, best_T, cross_validate_accuracy_training = utils.cross_validate_adaboost_T_C(
            X_train, y_train, Ts, 2, Cs, 0, False)
        # Val. Error
        my_adaboost = adaboost.Adaboost(best_T, 2, best_C, 0)
        my_adaboost.fit(X_train, y_train)
        error_val = 1 - my_adaboost.score(X_test, y_test)
        Val_error = np.append(Val_error, error_val)
        #print " Round : {0}, Best T / C : {1}, {2}, train error : {3}, val. error : {4}".format(p + 1, best_T, best_C, 1 - cross_validate_accuracy_training, error_val)

    print("L1 regulariazed Adaboost (paper) Avg. val error : {0}, Std. : {1}".
          format(Val_error.mean(), Val_error.std()))

    Val_error = np.empty(0)
    for p in range(number_of_tries):
        # Add noise on features (before scaling)
        noisy_data = data
        for idx in indexes_to_add_feature_noise:
            std = max_stds.ix[idx]
            if std > 0:
                noisy_data[idx] = noising.addNormalNoise(
                    noisy_data[idx], avg, std)
        # Prepare dataset
        noisy_data["Y"] = np.where(noisy_data[Y_index] == 0, -1, 1)
        noisy_data = noisy_data.drop(Y_index, 1)
        noisy_data[noisy_data.drop("Y", 1).columns.values] = scale(
            noisy_data[noisy_data.drop("Y", 1).columns.values])
        # Split train / test
        X_train, X_test, y_train, y_test = train_test_split(noisy_data.drop(
            "Y", 1),
                                                            noisy_data["Y"],
                                                            test_size=0.33)
        # Add noise on labels (On training set ! otherwise doesn't make sense)
        y_train = noising.switchLabels(y_train, pPos, pNeg)
        # Cross Validate
        best_C, best_T, cross_validate_accuracy_training = utils.cross_validate_adaboost_T_C(
            X_train, y_train, Ts, 2, Cs, 1, False)
        # Val. Error
        my_adaboost = adaboost.Adaboost(best_T, 2, best_C, 1)
        my_adaboost.fit(X_train, y_train)
        error_val = 1 - my_adaboost.score(X_test, y_test)
        Val_error = np.append(Val_error, error_val)
        #print " Round : {0}, Best T : {1}, train error : {2}, val. error : {3}".format(p + 1, best_T, 1 - cross_validate_accuracy_training, error_val)

    print("Adaboost v1 Avg. val error : {0}, Std. : {1}".format(
        Val_error.mean(), Val_error.std()))

    Val_error = np.empty(0)
    for p in range(number_of_tries):
        # Add noise on features (before scaling)
        noisy_data = data
        for idx in indexes_to_add_feature_noise:
            std = max_stds.ix[idx]
            if std > 0:
                noisy_data[idx] = noising.addNormalNoise(
                    noisy_data[idx], avg, std)
        # Prepare dataset
        noisy_data["Y"] = np.where(noisy_data[Y_index] == 0, -1, 1)
        noisy_data = noisy_data.drop(Y_index, 1)
        noisy_data[noisy_data.drop("Y", 1).columns.values] = scale(
            noisy_data[noisy_data.drop("Y", 1).columns.values])
        # Split train / test
        X_train, X_test, y_train, y_test = train_test_split(noisy_data.drop(
            "Y", 1),
                                                            noisy_data["Y"],
                                                            test_size=0.33)
        # Add noise on labels (On training set ! otherwise doesn't make sense)
        y_train = noising.switchLabels(y_train, pPos, pNeg)
        # Cross Validate
        best_C, best_T, cross_validate_accuracy_training = utils.cross_validate_adaboost_T_C(
            X_train, y_train, Ts, 3, Cs, 1, False)
        # Val. Error
        my_adaboost = adaboost.Adaboost(best_T, 3, best_C, 1)
        my_adaboost.fit(X_train, y_train)
        error_val = 1 - my_adaboost.score(X_test, y_test)
        Val_error = np.append(Val_error, error_val)
        #print " Round : {0}, Best T : {1}, train error : {2}, val. error : {3}".format(p + 1, best_T, 1 - cross_validate_accuracy_training, error_val)

    print("Adaboost v1 pow 3Avg. val error : {0}, Std. : {1}".format(
        Val_error.mean(), Val_error.std()))
Ejemplo n.º 8
0
    writer = csv.writer(f)
    writer.writerows(CV_error_list)

min_error = math.inf
best_T = 0
for i in range(1, validation_iter + 1):
    if (min_error > CV_error_list[i - 1][1]):
        min_error = CV_error_list[i - 1][1]
        best_T = CV_error_list[i - 1][0]

print('Choose optimal T =', best_T, '\n')
print('Time cost for cross-validation =', (time.time() - start_time) / 60,
      'mins')

# start our Adaboost
best_hypothesis = adaboost.Adaboost(train_data_, train_label_, 'training',
                                    best_T)
out_hypothesis = []
for i in range(best_T):
    out_hypothesis.append([
        best_hypothesis[i]['iter'], best_hypothesis[i]['dim'],
        best_hypothesis[i]['thresh'], best_hypothesis[i]['inequal'],
        best_hypothesis[i]['alpha']
    ])

with open(str(N_fold) + '_fold_output_AdaBoost_hypothesis_header.csv',
          'w',
          newline='') as f:
    w = csv.writer(f)
    w.writerow([
        'iteration_index', 'attribute_index', 'threshold', 'direction',
        'boosting_parameter'