def kNN_test(X_train, X_test, y_train, y_test, distance = "euclidean",k=3): output_classes = [] for i in range(0, X_test.shape[0]): output = knn.train(X_train, X_test[i],distance,k) predictedClass = knn.predict(output, y_train) output_classes.append(predictedClass) return output_classes
def train(train_x, train_y): ''' this is a function to train all classifiers ''' tree_start = time.time() tree_clf = tree.train(train_x, train_y) print('Decision Tree - Training Time: ', round(time.time() - tree_start, 3), 's') svm_start = time.time() svm_clf = svm.train(train_x, train_y) print('SVM - Training Time: ', round(time.time() - svm_start, 3), 's') knn_start = time.time() knn_clf = knn.train(train_x, train_y) print('k-NN - Training Time: ', round(time.time() - knn_start, 3), 's') nn_start = time.time() nn_clf = nn.train(train_x, train_y) print('Neural Network - Training Time: ', round(time.time() - nn_start, 3), 's') boost_start = time.time() boost_clf = boost.train(train_x, train_y) print('Boosted Tree - Training Time: ', round(time.time() - boost_start, 3), 's') return [tree_clf, svm_clf, knn_clf, nn_clf, boost_clf]
def train(folds_x, folds_y): ''' this is a function to train all classifiers ''' tree_clf = tree.train(folds_x, folds_y) svm_clf = svm.train(folds_x, folds_y) knn_clf = knn.train(folds_x, folds_y) nn_clf = nn.train(folds_x, folds_y) boost_clf = boost.train(folds_x, folds_y) return [tree_clf, svm_clf, knn_clf, nn_clf, boost_clf] #
def main(): args_parser = build_args_parser() args = args_parser.parse_args() results_dir_path = 'results' raw_data_dir_path = 'data' if not os.path.exists(results_dir_path): os.makedirs(results_dir_path) for file_path in glob.glob(raw_data_dir_path + '/*.csv'): file_name = os.path.basename(file_path) file_name = file_name.replace(pathlib.Path(file_name).suffix, "") df = pd.read_csv(file_path) train_sample, test_sample = train_test_split(df, test_size=0.2) model = None if args.option == 1: model = nb.train(train_sample) if args.option == 2: model = knn.train(train_sample) if model is not None: predicted = model.predict(test_sample['conteudo']) precision = np.mean(predicted == test_sample['saida']) from sklearn.metrics import classification_report, confusion_matrix, accuracy_score file = open(results_dir_path + "/" + file_name + "_classification.txt", "w") file.write("Quantidade de entradas para treino: " + str(len(train_sample.index)) + "\n") file.write("Quantidade de entradas para teste: " + str(len(test_sample.index)) + "\n") file.write("Precisão: " + str(precision) + "\n") file.write(str(confusion_matrix(test_sample['saida'], predicted))) file.write(str(classification_report(test_sample['saida'], predicted))) file.write(str(accuracy_score(test_sample['saida'], predicted))) # file.write(str("Recall: %0.2f (+/- %0.2f)" % (scores['test_recall_macro'] .mean(), scores['test_recall_macro'] .std() * 2))) file.close()
from knn import train if __name__ == "__main__": print("Training KNN classifier...") classifier = train("data/train", model_save_path="data/model/knn.clf", n_neighbors=4) # TODO: set n_neighbors dynamicaly to the number of people print("Training complete!") # PREDICTION # for image_file in os.listdir("data/train/biden"): # full_file_path = os.path.join("data/train/biden", image_file) # if 'DS_Store' in full_file_path: # continue # print("Looking for faces in {}".format(image_file)) # # Find all people in the image using a trained classifier model # predictions = predict( # full_file_path, model_path="more_class.clf") # # Print results on the console # for name, (top, right, bottom, left) in predictions: # print("- Found {} at ({}, {})".format(name, left, top)) # Display results overlaid on an image # show_prediction_labels_on_image(os.path.join( # "data/train/phu", image_file), predictions)
return LabeledSample(old_sample.label, new_features) def linear_project(dataset, d): n_features = len(dataset.training_set[0].features) new_basis = [] for i in range(d): new_basis.append(unit_normal(n_features)) new_training = list( transform_sample(s, new_basis) for s in dataset.training_set) new_validation = list( transform_sample(s, new_basis) for s in dataset.validation_set) new_testing = list( transform_sample(s, new_basis) for s in dataset.testing_set) return NumericalDataset(new_training, new_validation, new_testing, DatasetNumericalMetadata(d)) if __name__ == '__main__': # here, k is the parameter in the kNN classifier, # and *d* is the parameter for reducing the dimensionality of the dataset. dataset = pickle.load(open(sys.argv[1], "rb")).convert_to_numerical() k = int(sys.argv[2]) d = int(sys.argv[3]) dataset = linear_project(dataset, d) model = knn.train(dataset.training_set, k) evaluate_model(model, dataset)
# sys.stdout = open('sysout.txt', 'w') # Parsing the input file and creating the image objects image_list = list() with open(file_name, 'r') as t_file: for line in t_file: image_list.append(Image(line)) print 'Start time', time() if model == 'best': model = 'nnet' # K-Nearest neighbors if model == 'nearest' and phase == 'train': model = knn.train(image_list) serialize_to_file(model, model_file) elif model == 'nearest' and phase == 'test': model = deserialize_from_file(model_file) knn.test(image_list, model) # ADA boost elif model == "adaboost" and phase == "train": params = Adaboost(image_list).adaboost() serialize_to_file(params, model_file) elif model == "adaboost" and phase == "test": params = deserialize_from_file(model_file) Adaboost(image_list).adaboost_test(image_list, params) # Neural net elif model == 'nnet' and phase == 'train':
import numpy DATASET_PATH = 'datasets/iris.data' # 读取数据集 dataset = [] with open(DATASET_PATH, 'r') as file: for line in file: line = line.strip().split(',') sample = [float(line[i]) for i in range(len(line) - 1)] sample.append(line[-1]) dataset.append(sample) # dataset = numpy.loadtxt(DATASET_PATH) # 特征值规范化 scaler = preprocess.MinMaxScaler(dataset) dataset = scaler.scale(dataset) # bestK, error = knn.train(dataset, True) # print '最优k:%d' % bestK # print '平均误差:%f' % error bestK, accuracy = knn.train(dataset, False) print '最优k:%d' % bestK print '平均精度:%f%%' % (accuracy * 100) # 使用knn训练(找出最优k) # 使用knn预测(测试集自己编的) # testset = [[800, 0, 0.3048, 71.3, 0.00266337]] # testset = scaler.scale(testset) # classList = knn.classify(dataset, testset, bestK, True) # print '预测类别:%s' % classList[0]
avg_acc_lvq1 = 0 avg_acc_lvq2 = 0 avg_acc_lvq3 = 0 avg_acc_knn = 0 for i in range(len(folded_dataset)): test_set = folded_dataset[i] training_set = [] for j in range(len(folded_dataset)): if j != i: training_set += folded_dataset[j] lvq1_proto = lvq.lvq1(training_set, proto_num, .01) lvq2_proto = lvq.lvq2(lvq1_proto, training_set, .01) lvq3_proto = lvq.lvq3(lvq1_proto, training_set, .01) avg_acc_lvq1 += knn.train(lvq1_proto, test_set, k) avg_acc_lvq2 += knn.train(lvq2_proto, test_set, k) avg_acc_lvq3 += knn.train(lvq3_proto, test_set, k) avg_acc_knn += knn.train(training_set, test_set, k) avg_acc_lvq1 = 100*avg_acc_lvq1/len(folded_dataset) avg_acc_lvq2 = 100*avg_acc_lvq2/len(folded_dataset) avg_acc_lvq3 = 100*avg_acc_lvq3/len(folded_dataset) avg_acc_knn = 100*avg_acc_knn/len(folded_dataset) print(""" DATASET {} - prototype number: {} lvq1: {:2.2f}% lvq2: {:2.2f}% lvq3: {:2.2f}% {}-nn: {:2.2f}%""".format(c, proto_num, avg_acc_lvq1, avg_acc_lvq2, avg_acc_lvq3, k, avg_acc_knn))
import knn import naiveBayes import sys import os trainingSet,lexicon = knn.train() naiveBayes.trainNaiveBayes("train_formatted.txt") k = 5 while(1): test_in = raw_input("Input: ") if test_in == "q": exit() #baseline pos = 0 neg = 0 words = test_in.split(" ") for word in words: try: if lexicon[word.lower()] == 1: pos += 1 elif lexicon[word.lower()] ==0: neg += 1 except KeyError: pass if pos > neg: pass #print("Baseline: positive") elif pos < neg: pass
print("output file has not been generated") if myBoost.isTrained: Xtest,yTest,XtestID = myBoost.getDataFromFile(train_test_file) finalPredictions = myBoost.predict(Xtest) myBoost.writeToFile(XtestID,finalPredictions,'output.txt') print("Accuracy is: " ,sum(finalPredictions==yTest)/len(yTest)) else: print("Untrained model being tested") #train train-data.txt knn_model.txt knn #test test-data.txt knn_model.txt knn if model == 'knn' : if trainOrTest == 'train': knn.train(train_test_file,model_file) if trainOrTest == 'test': try: myKnn = open(model_file,'rb') except: print("output file has not been generated") finalPredictions,yTest,XtestID= knn.test(48,model_file ,train_test_file) knn.writeToFile(XtestID,finalPredictions,'output.txt') print("Accuracy is: " ,knn.accuracy(finalPredictions,yTest))
# 201420907_homework1 main.py import knn import numpy as np train_data = np.loadtxt(fname='./digits_data/digits_train.csv', delimiter=',', dtype='float64') # Training Set load X_train, Y_train = knn.train(train_data) # KNN don't need training step. So, knn.train(data) just load train_data. test_data = np.loadtxt(fname='./digits_data/digits_test.csv', delimiter=',', dtype='float64') # Test Set load X_test = np.array(test_data[:, 1:], dtype='float64') # Features of Test set data Y_test = np.array(test_data[:, 0], dtype='int64') # Labels of Test set data # knn.predict(train_x, train_y, test_x, k) knn_Y_pred = knn.predict(X_train, Y_train, X_test, 1) # k means n of neighbors # Calculation of TP, TN, FP, FN TP = np.zeros(10) # True Positive (Correct Predicted target class) TN = np.zeros(10) # True Negative (Correct Predicted not-target class) FP = np.zeros(10) # False Positive (Incorrect Predicted target class) FN = np.zeros(10) # False Negative (Incorrect Predicted non-target class) Confusion_matrix = np.zeros((10, 10), dtype='int64') for idx in range(200): Confusion_matrix[Y_test[idx]][knn_Y_pred[idx]] += 1 for i in range(10): TP[i] = Confusion_matrix[i][i] for j in range(10): if i != j:
└── ... ''' if __name__ == "__main__": if len(sys.argv) < 3: print("usage: python3 %s <algorithm> <train_data_dir> [model_name]" % sys.argv[0]) sys.exit(2) face_algorithm = sys.argv[1] if face_algorithm not in ALGORITHM.keys(): print('Algorithm not found!') sys.exit(2) train_data_dir = sys.argv[2] if len(sys.argv) > 3: model_name = sys.argv[3] else: model_name = 'trained_knn_model' # Train the KNN classifier and save it to disk print("Training KNN classifier...") classifier = knn.train(train_data_dir, model_save_path=model_name + ALGORITHM[face_algorithm]['ext'], n_neighbors=2, face_algorithm=face_algorithm) print("Training complete!")