def main(dataset_name, testset_name, new_emails=False): '''Runs the knn classifier for a training set dataset_name and test set testset_name''' current_path = os.path.dirname(os.path.abspath(__file__)) + "\\" trainingset_path = current_path + dataset_name + "\\" testset_path = current_path + testset_name + "\\" results_path = testset_path + "results\\" if not os.path.exists(results_path): os.mkdir(results_path) folder_names = next(os.walk(testset_path + "."))[1] if 'results' in folder_names: folder_names.remove('results') if new_emails: folder_names = [""] workfilename = 'mergedworkfile.csv' wordfilename = 'wordfile.csv' # klist = [1, 3, 7, 15, 24, 33, 42, 50] klist = [1, 3] acc = [] ks = [] trainingSet = [] print("Loading Training Set...") wordsd, subd, digramsd, trigramsd = ex.loadTrainingset( trainingset_path, workfilename, wordfilename, trainingSet) print("Training Set loaded.") print('Collecting ' + 'New' * new_emails + 'Test' * (not new_emails) + ' Emails...') testSet, all_files = ex.loadTestset(testset_path, folder_names, wordsd, subd, digramsd, trigramsd) print('New' * new_emails + 'Test' * (not new_emails) + ' Emails Collected.') assert (len(trainingSet[0]) == len(testSet[0])) list_of_predictions = knn.classify(klist, trainingSet, testSet, results_path) if not new_emails: #Finds the predictions and accuracy for new test mails given the predictions for these mails for i in range(len(klist)): predictions = [] for x in range(len(testSet)): predictions.append(list_of_predictions[x][i]) accuracy = knn.getAccuracy(testSet, predictions) acc.append(accuracy) ks.append(klist[i]) print('K: ' + repr(klist[i])) print('Accuracy: ' + repr(accuracy) + '%') print('Overall Accuracy: ' + str(sum(acc) / len(acc)) + "%") plt.plot(ks, acc) plt.xlabel('K') plt.ylabel('Accuracy') plt.show() print('Find the results at: ' + results_path)
def main(dataset_name, testset_name, new_emails = False): '''Runs the knn classifier for a training set dataset_name and test set testset_name''' current_path = os.path.dirname(os.path.abspath(__file__)) + "\\" trainingset_path = current_path + dataset_name + "\\" testset_path = current_path + testset_name + "\\" results_path = testset_path + "results\\" if not os.path.exists(results_path): os.mkdir(results_path) folder_names = next(os.walk(testset_path + "."))[1] if 'results' in folder_names: folder_names.remove('results') if new_emails: folder_names = [""] workfilename = 'mergedworkfile.csv' wordfilename = 'wordfile.csv' # klist = [1, 3, 7, 15, 24, 33, 42, 50] klist = [1, 3] acc = [] ks = [] trainingSet=[] print("Loading Training Set...") wordsd, subd, digramsd, trigramsd = ex.loadTrainingset(trainingset_path, workfilename, wordfilename, trainingSet) print("Training Set loaded.") print('Collecting ' + 'New'*new_emails + 'Test'*(not new_emails) + ' Emails...') testSet, all_files = ex.loadTestset(testset_path, folder_names, wordsd, subd, digramsd, trigramsd) print('New'*new_emails + 'Test'*(not new_emails) + ' Emails Collected.') assert(len(trainingSet[0]) == len(testSet[0])) list_of_predictions = knn.classify(klist, trainingSet, testSet, results_path) if not new_emails: #Finds the predictions and accuracy for new test mails given the predictions for these mails for i in range(len(klist)): predictions = [] for x in range(len(testSet)): predictions.append(list_of_predictions[x][i]) accuracy = knn.getAccuracy(testSet, predictions) acc.append(accuracy) ks.append(klist[i]) print('K: ' + repr(klist[i])) print('Accuracy: ' + repr(accuracy) + '%') print('Overall Accuracy: '+ str(sum(acc)/len(acc)) + "%") plt.plot(ks, acc) plt.xlabel('K') plt.ylabel('Accuracy') plt.show() print('Find the results at: ' + results_path)
preds=[[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0]] dictClasses={'carettacaretta': 0, 'cheloniamydas': 1,'dermochelyscoriacea':2,'eretmochelysimbricata':3,'lepidochelysolivacea':4} numeroImagensPorClasse=30.00; for x in range(len(testSet)): neighbors = knn.getNeighbors(trainingSet, testSet[x], k) result = knn.getResponse(neighbors) predictions.append(result) indiceClassePred=dictClasses[result] indiceClasseActual=dictClasses[testSet[x][-1]] preds[indiceClasseActual][indiceClassePred]=preds[indiceClasseActual][indiceClassePred]+1 print('> predicted=' + repr(result) + ', actual=' + repr(testSet[x][-1])) accuracy = knn.getAccuracy(testSet, predictions) print('Accuracy: ' + repr(accuracy) + '%') conf_arr = preds norm_conf = [] for i in conf_arr: a = 0 tmp_arr = []
# -*- coding: utf-8 -*- import numpy as np import cv2 as cv import Dataset import knn path = "ordo_2.csv" DS = Dataset.Dataset(path) df = DS.getDF() print(df.head()) X, Y = DS.getXY() print(X) k = 7 #gnb = GNB.GNB(X, Y) knn = knn.kNN(k, X, Y) accuracy = knn.getAccuracy() print(accuracy)
prototypes_lvq1 = train_prototypes_lvq1(lvq_training_set, n_prototypes, lrate, epochs) prototypes_lvq2 = train_prototypes_lvq2(prototypes_lvq1, lvq_training_set, lrate, epochs) prototypes_lvq3 = train_prototypes_lvq3(prototypes_lvq2, lvq_training_set, lrate, epochs) for k in kn: predictions = [] for row in range(len(knn_test_set)): neighbors = getNeighbors(prototypes_lvq1, knn_test_set[row], k) results = getResponse(neighbors) predictions.append(results) accuracy = getAccuracy(knn_test_set, predictions) if k == 1: accuracy_lvq1_k1.append(accuracy) else: accuracy_lvq1_k3.append(accuracy) print('With LVQ1 for ' + str(n_prototypes) + ' and for dataset -> ' + str(filename) + ' accuracy for k= ' + str(k) + ': ' + repr(accuracy) + '%') for k in kn: predictions = [] for row in range(len(knn_test_set)): neighbors = getNeighbors(prototypes_lvq2, knn_test_set[row], k) results = getResponse(neighbors) predictions.append(results) accuracy = getAccuracy(knn_test_set, predictions)