def run(): """ To run Knn with sample size 5000 :return: prints the acc and time taken """ data = DigitData(5000) start_time = time.time() model = KNN(X_train=data.X_train, Y_train=data.Y_train) y_predictions = model.predict(data.X_test) acc = calc_acc(data.Y_test, y_predictions) print ([acc, time.time() - start_time])
def k_variation(): """ To study the variation of K in KNN from 1 to 15 :return: creates csv with acc. and time taken """ results = [] data = DigitData(750, 250) for i in range(1, 15, 2): print "running for k : " + str(i) start_time = time.time() model = KNN(k=i, X_train=data.X_train, Y_train=data.Y_train) y_predictions = model.predict(data.X_test) acc = calc_acc(data.Y_test, y_predictions) results.append([i, acc, time.time() - start_time]) if len(results) % BUFFER_SIZE == 0: write_results_to_file(results, MODULE + BASE_NAME + ".csv") results = []
def variate_sample_size(): """ To study the variation of training sample size from 500 to MAX_TRAIN_DATA step size as 1000 :return: csv with acc. and time taken in each sample size """ results = [] for i in range(500, MAX_TRAIN_DATA, 1000): print "running for sample" + str(i) data = DigitData(MAX_TRAIN_DATA) start_time = time.time() model = KNN(k=i, X_train=data.X_train, Y_train=data.Y_train) y_predictions = model.predict(data.X_test) acc = calc_acc(data.Y_test, y_predictions) results.append([i, acc, time.time() - start_time]) if len(results) % BUFFER_SIZE == 0: write_results_to_file(results, MODULE + BASE_NAME + ".csv") results = []
def experiment(samples_per_person: int, persons: int, verbose=False): hidden_dataset = vggDataset('/media/Datasets/vgg_face2', 'test', verbose=verbose) gallery_dataset = vggDataset('/media/Datasets/vgg_face2', 'train_aligned', verbose=verbose) hidden_data, _ = hidden_dataset.get_training_data( 20) # this gives 500*20 = 10k hidden samples #hidden_data, _ = hidden_dataset.get_training_data(1) # useful for debugging train_data, test_data = gallery_dataset.get_training_data( samples_per_person, samples_test=5, persons_limit=persons) # this gives persons * 5 gallery test samples estimators = { # this is a dict estimator like a data type (e.g return of expand_evm()) 'EVM': EVM(open_set_threshold=0.0, tail=samples_per_person), 'KNN': KNN(n_neighbors=1) } reload_models = False # hmmmm i dont think i can get always the sample train/test split so lets not reload #reload_models=True # useful when debugging if reload_models: print("Reloading pretrained models") for name, estim in estimators.items(): estimators[name] = load_model(name + '.pkl') else: # plain train print("Training raw models") for name, estim in estimators.items(): train(estim, train_data, verbose=True) save_model(name + '.pkl', estim) # logging test #OSTs = [0.0, 0.001, 0.005, 0.01, 0.02] #test_estimators = expand_evm(estimators['EVM'], OSTs) #test_estimators.update({'KNN': estimators['KNN']}) test_estimators = {'KNN': estimators['KNN'], 'EVM': estimators['EVM']} test_logging(test_estimators, test_data, os.path.join("%dpp" % samples_per_person, 'gallery_test.csv')) test_logging(test_estimators, hidden_data, os.path.join("%dpp" % samples_per_person, 'hidden_test.csv'))
print_dict(performance) if __name__ == '__main__': mode = 'performance' mode = 'experiment' if mode == 'performance': #models = {("EVM_Red_%4d" % int(r)): EVM(redundancy_rate=r/1000.) for r in range(0, 1000, 25)} # just redundancy models = {("EVM_BiasD_%4d" % int(r)): EVM(redundancy_rate=r / 1000., biased_distance=r / 1000.) for r in range(400, 1000, 50) } # biased distance (needs to change redundancy as well) models.update({"KNN": KNN(n_neighbors=1)}) #print(models) persons = 15 per_person_samples = 20 performance_multieval(models, per_person_samples, persons, n_samples=100, verbose=True) if mode == 'experiment': persons = 8000 per_person_samples = [10, 20, 50] #persons, per_person_samples = int(6*1e3), [10]#, 20, 50] # debug
'stack_' + type(clf_b).__name__: stackls_b }) return pd.concat([test_x, stack_feat], axis=1) # Create classifiers print('\nCreate classifier ...\n' + '*' * 50) classifiers = [ svm(max_iter=args.max_iter, C=args.svm_c, kernel=args.svm_kernel, gamma=args.svm_gamma, degree=args.svm_degree, coef0=args.svm_coef0), lr(num_steps=args.max_iter, learning_rate=args.lr_lr), KNN(n_neighbors=args.knn_n) ] # Choose 2 classifiers to generate stacking clf_stack = [clf for clf in range(3) if clf is not args.third_clf] # Start learning stacking print('\nStart learning stacking using {0} and {1} ...\n'.format( type(classifiers[clf_stack[0]]).__name__, type(classifiers[clf_stack[1]]).__name__) + '*' * 50) start_time = dt.datetime.now() print('Start learning training dataset stacking at {0}.'.format( str(start_time))) train_x_stack = stacking_train(classifiers[clf_stack[0]], classifiers[clf_stack[1]]) start_time = dt.datetime.now()
filename = argument_parser.get_filename() adwin_delta = argument_parser.get_delta() training_set_ratio = argument_parser.get_training_set_ratio() neighbors_number = argument_parser.get_neighbors_number() kernel = argument_parser.get_kernel() regulation = argument_parser.get_regulation() max_iters = argument_parser.get_iterations() n_of_hidden = argument_parser.get_n_of_hidden_layers() algorithm = argument_parser.get_algorithm() printing = argument_parser.is_printing() data, labels = load_data(filename) classifiers = { 'bayes': Bayes(data, labels, training_set_ratio), 'knn': KNN(data, labels, training_set_ratio, neighbors_number), 'nn': NeuralNetwork(data, labels, training_set_ratio, n_of_hidden, max_iters), 'svm': SVM(data, labels, training_set_ratio, kernel, regulation) } classifier = classifiers[algorithm] classifier.train() classifier.test() accuracy_table = classifier.get_accuracy_table() precision_table = classifier.get_precision_table() sensitivity_table = classifier.get_sensitivity_table() specificity_table = classifier.get_specificity_table()
# Scaler and decomposition if args.normal: scaler = StandardScaler() train_x = scaler.fit_transform(train_x) test_x = scaler.transform(test_x) pca = PCA(args.pca_percent) train_x = pca.fit_transform(train_x) test_x = pca.transform(test_x) print('Shape of train dataset: {}'.format(train_x.shape)) print('Shape of test dataset: {}'.format(test_x.shape)) # Create classifiers print('\nCreate classifier ...\n' + '*' * 50) clf = KNN(n_neighbors = args.knn_n) # Start fitting print('\nStart fitting ...\n' + '*' * 50) start_time = dt.datetime.now() print('Start {0} classifier training at {1}.'.format(type(clf).__name__, str(start_time))) clf.fit(train_x, train_y) end_time = dt.datetime.now() print('End {0} classifier training at {1}.'.format(type(clf).__name__, str(end_time))) print('Duration: {0}'.format(str(end_time - start_time))) # Prediction print('\nStart prediction ...\n' + '*' * 50) expected = test_y if args.output: # Redirect stdout