Esempio n. 1
0
def run():
    """
    To run Knn with sample size 5000
    :return: prints the acc and time taken
    """
    data = DigitData(5000)
    start_time = time.time()
    model = KNN(X_train=data.X_train, Y_train=data.Y_train)
    y_predictions = model.predict(data.X_test)
    acc = calc_acc(data.Y_test, y_predictions)
    print ([acc, time.time() - start_time])
def k_variation():
    """
     To study the variation of K in KNN from 1 to 15
    :return: creates csv with acc. and time taken
    """
    results = []
    data = DigitData(750, 250)
    for i in range(1, 15, 2):
        print "running for k : " + str(i)
        start_time = time.time()
        model = KNN(k=i, X_train=data.X_train, Y_train=data.Y_train)
        y_predictions = model.predict(data.X_test)
        acc = calc_acc(data.Y_test, y_predictions)
        results.append([i, acc, time.time() - start_time])
        if len(results) % BUFFER_SIZE == 0:
            write_results_to_file(results, MODULE + BASE_NAME + ".csv")
            results = []
def variate_sample_size():
    """
    To study the variation of training sample size from 500 to MAX_TRAIN_DATA step size as 1000
    :return: csv with acc. and time taken in each sample size
    """
    results = []
    for i in range(500, MAX_TRAIN_DATA, 1000):
        print "running for sample" + str(i)
        data = DigitData(MAX_TRAIN_DATA)
        start_time = time.time()
        model = KNN(k=i, X_train=data.X_train, Y_train=data.Y_train)
        y_predictions = model.predict(data.X_test)
        acc = calc_acc(data.Y_test, y_predictions)
        results.append([i, acc, time.time() - start_time])
        if len(results) % BUFFER_SIZE == 0:
            write_results_to_file(results, MODULE + BASE_NAME + ".csv")
            results = []
Esempio n. 4
0
def experiment(samples_per_person: int, persons: int, verbose=False):
    hidden_dataset = vggDataset('/media/Datasets/vgg_face2',
                                'test',
                                verbose=verbose)
    gallery_dataset = vggDataset('/media/Datasets/vgg_face2',
                                 'train_aligned',
                                 verbose=verbose)

    hidden_data, _ = hidden_dataset.get_training_data(
        20)  # this gives 500*20 = 10k hidden samples
    #hidden_data, _ = hidden_dataset.get_training_data(1) # useful for debugging
    train_data, test_data = gallery_dataset.get_training_data(
        samples_per_person, samples_test=5,
        persons_limit=persons)  # this gives persons * 5 gallery test samples

    estimators = {  # this is a dict estimator like a data type (e.g return of expand_evm())
        'EVM': EVM(open_set_threshold=0.0, tail=samples_per_person),
        'KNN': KNN(n_neighbors=1)
    }

    reload_models = False  # hmmmm i dont think i can get always the sample train/test split so lets not reload
    #reload_models=True # useful when debugging
    if reload_models:
        print("Reloading pretrained models")

        for name, estim in estimators.items():
            estimators[name] = load_model(name + '.pkl')
    else:  # plain train
        print("Training raw models")

        for name, estim in estimators.items():

            train(estim, train_data, verbose=True)
            save_model(name + '.pkl', estim)

    # logging test
    #OSTs = [0.0, 0.001, 0.005, 0.01, 0.02]
    #test_estimators = expand_evm(estimators['EVM'], OSTs)
    #test_estimators.update({'KNN': estimators['KNN']})
    test_estimators = {'KNN': estimators['KNN'], 'EVM': estimators['EVM']}

    test_logging(test_estimators, test_data,
                 os.path.join("%dpp" % samples_per_person, 'gallery_test.csv'))
    test_logging(test_estimators, hidden_data,
                 os.path.join("%dpp" % samples_per_person, 'hidden_test.csv'))
Esempio n. 5
0
    print_dict(performance)


if __name__ == '__main__':
    mode = 'performance'
    mode = 'experiment'

    if mode == 'performance':
        #models = {("EVM_Red_%4d" % int(r)): EVM(redundancy_rate=r/1000.) for r in range(0, 1000, 25)} # just redundancy
        models = {("EVM_BiasD_%4d" % int(r)): EVM(redundancy_rate=r / 1000.,
                                                  biased_distance=r / 1000.)
                  for r in range(400, 1000, 50)
                  }  # biased distance (needs to change redundancy as well)

        models.update({"KNN": KNN(n_neighbors=1)})
        #print(models)

        persons = 15
        per_person_samples = 20
        performance_multieval(models,
                              per_person_samples,
                              persons,
                              n_samples=100,
                              verbose=True)

    if mode == 'experiment':
        persons = 8000
        per_person_samples = [10, 20, 50]

        #persons, per_person_samples = int(6*1e3), [10]#, 20, 50] # debug
        'stack_' + type(clf_b).__name__: stackls_b
    })
    return pd.concat([test_x, stack_feat], axis=1)


# Create classifiers
print('\nCreate classifier ...\n' + '*' * 50)
classifiers = [
    svm(max_iter=args.max_iter,
        C=args.svm_c,
        kernel=args.svm_kernel,
        gamma=args.svm_gamma,
        degree=args.svm_degree,
        coef0=args.svm_coef0),
    lr(num_steps=args.max_iter, learning_rate=args.lr_lr),
    KNN(n_neighbors=args.knn_n)
]

# Choose 2 classifiers to generate stacking
clf_stack = [clf for clf in range(3) if clf is not args.third_clf]

# Start learning stacking
print('\nStart learning stacking using {0} and {1} ...\n'.format(
    type(classifiers[clf_stack[0]]).__name__,
    type(classifiers[clf_stack[1]]).__name__) + '*' * 50)
start_time = dt.datetime.now()
print('Start learning training dataset stacking at {0}.'.format(
    str(start_time)))
train_x_stack = stacking_train(classifiers[clf_stack[0]],
                               classifiers[clf_stack[1]])
start_time = dt.datetime.now()
Esempio n. 7
0
filename = argument_parser.get_filename()
adwin_delta = argument_parser.get_delta()
training_set_ratio = argument_parser.get_training_set_ratio()
neighbors_number = argument_parser.get_neighbors_number()
kernel = argument_parser.get_kernel()
regulation = argument_parser.get_regulation()
max_iters = argument_parser.get_iterations()
n_of_hidden = argument_parser.get_n_of_hidden_layers()
algorithm = argument_parser.get_algorithm()
printing = argument_parser.is_printing()

data, labels = load_data(filename)

classifiers = {
    'bayes': Bayes(data, labels, training_set_ratio),
    'knn': KNN(data, labels, training_set_ratio, neighbors_number),
    'nn': NeuralNetwork(data, labels, training_set_ratio, n_of_hidden,
                        max_iters),
    'svm': SVM(data, labels, training_set_ratio, kernel, regulation)
}

classifier = classifiers[algorithm]

classifier.train()
classifier.test()

accuracy_table = classifier.get_accuracy_table()
precision_table = classifier.get_precision_table()
sensitivity_table = classifier.get_sensitivity_table()
specificity_table = classifier.get_specificity_table()
# Scaler and decomposition
if args.normal:
	scaler = StandardScaler()
	train_x = scaler.fit_transform(train_x)
	test_x = scaler.transform(test_x)

pca = PCA(args.pca_percent)
train_x = pca.fit_transform(train_x)
test_x = pca.transform(test_x)

print('Shape of train dataset: {}'.format(train_x.shape))
print('Shape of test dataset: {}'.format(test_x.shape))

# Create classifiers
print('\nCreate classifier ...\n' + '*' * 50)
clf = KNN(n_neighbors = args.knn_n)

# Start fitting
print('\nStart fitting ...\n' + '*' * 50)
start_time = dt.datetime.now()
print('Start {0} classifier training at {1}.'.format(type(clf).__name__, str(start_time)))
clf.fit(train_x, train_y)
end_time = dt.datetime.now()
print('End {0} classifier training at {1}.'.format(type(clf).__name__, str(end_time)))
print('Duration: {0}'.format(str(end_time - start_time)))

# Prediction
print('\nStart prediction ...\n' + '*' * 50)
expected = test_y
if args.output:
	# Redirect stdout