Esempio n. 1
0
def main(dataset_name, testset_name, new_emails=False):
    '''Runs the knn classifier for a training set dataset_name and test set testset_name'''
    current_path = os.path.dirname(os.path.abspath(__file__)) + "\\"
    trainingset_path = current_path + dataset_name + "\\"
    testset_path = current_path + testset_name + "\\"
    results_path = testset_path + "results\\"

    if not os.path.exists(results_path):
        os.mkdir(results_path)

    folder_names = next(os.walk(testset_path + "."))[1]
    if 'results' in folder_names:
        folder_names.remove('results')
    if new_emails:
        folder_names = [""]

    workfilename = 'mergedworkfile.csv'
    wordfilename = 'wordfile.csv'
    # klist = [1, 3, 7, 15, 24, 33, 42, 50]
    klist = [1, 3]
    acc = []
    ks = []
    trainingSet = []

    print("Loading Training Set...")
    wordsd, subd, digramsd, trigramsd = ex.loadTrainingset(
        trainingset_path, workfilename, wordfilename, trainingSet)
    print("Training Set loaded.")

    print('Collecting ' + 'New' * new_emails + 'Test' * (not new_emails) +
          ' Emails...')
    testSet, all_files = ex.loadTestset(testset_path, folder_names, wordsd,
                                        subd, digramsd, trigramsd)
    print('New' * new_emails + 'Test' * (not new_emails) +
          ' Emails Collected.')

    assert (len(trainingSet[0]) == len(testSet[0]))

    list_of_predictions = knn.classify(klist, trainingSet, testSet,
                                       results_path)

    if not new_emails:
        #Finds the predictions and accuracy for new test mails given the predictions for these mails
        for i in range(len(klist)):
            predictions = []
            for x in range(len(testSet)):
                predictions.append(list_of_predictions[x][i])
            accuracy = knn.getAccuracy(testSet, predictions)
            acc.append(accuracy)
            ks.append(klist[i])
            print('K: ' + repr(klist[i]))
            print('Accuracy: ' + repr(accuracy) + '%')

        print('Overall Accuracy: ' + str(sum(acc) / len(acc)) + "%")
        plt.plot(ks, acc)
        plt.xlabel('K')
        plt.ylabel('Accuracy')
        plt.show()

    print('Find the results at: ' + results_path)
def main(dataset_name, testset_name, new_emails = False):
	'''Runs the knn classifier for a training set dataset_name and test set testset_name'''
	current_path = os.path.dirname(os.path.abspath(__file__)) + "\\"
	trainingset_path = current_path + dataset_name + "\\"
	testset_path = current_path + testset_name + "\\"
	results_path = testset_path + "results\\"

	if not os.path.exists(results_path):
		os.mkdir(results_path)

	folder_names = next(os.walk(testset_path + "."))[1]
	if 'results' in folder_names:
		folder_names.remove('results')
	if new_emails:
		folder_names = [""]
	
	workfilename = 'mergedworkfile.csv'
	wordfilename = 'wordfile.csv'
	# klist = [1, 3, 7, 15, 24, 33, 42, 50]
	klist = [1, 3]
	acc = []
	ks = []
	trainingSet=[]

	print("Loading Training Set...")
	wordsd, subd, digramsd, trigramsd = ex.loadTrainingset(trainingset_path, workfilename, wordfilename, trainingSet)
	print("Training Set loaded.")

	print('Collecting ' + 'New'*new_emails + 'Test'*(not new_emails) + ' Emails...')
	testSet, all_files = ex.loadTestset(testset_path, folder_names, wordsd, subd, digramsd, trigramsd)
	print('New'*new_emails + 'Test'*(not new_emails) + ' Emails Collected.')

	assert(len(trainingSet[0]) == len(testSet[0]))

	list_of_predictions = knn.classify(klist, trainingSet, testSet, results_path)

	if not new_emails:
		#Finds the predictions and accuracy for new test mails given the predictions for these mails
		for i in range(len(klist)):
			predictions = []
			for x in range(len(testSet)):	
				predictions.append(list_of_predictions[x][i])
			accuracy = knn.getAccuracy(testSet, predictions)
			acc.append(accuracy)
			ks.append(klist[i])
			print('K: ' + repr(klist[i]))
			print('Accuracy: ' + repr(accuracy) + '%')
			
		print('Overall Accuracy: '+ str(sum(acc)/len(acc)) + "%")
		plt.plot(ks, acc)
		plt.xlabel('K')
		plt.ylabel('Accuracy')
		plt.show()
	
	print('Find the results at: ' + results_path)
Esempio n. 3
0
preds=[[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0]]
dictClasses={'carettacaretta': 0, 'cheloniamydas': 1,'dermochelyscoriacea':2,'eretmochelysimbricata':3,'lepidochelysolivacea':4}

numeroImagensPorClasse=30.00;



for x in range(len(testSet)):
	neighbors = knn.getNeighbors(trainingSet, testSet[x], k)
	result = knn.getResponse(neighbors)
	predictions.append(result)
	indiceClassePred=dictClasses[result]
	indiceClasseActual=dictClasses[testSet[x][-1]]
	preds[indiceClasseActual][indiceClassePred]=preds[indiceClasseActual][indiceClassePred]+1	
	print('> predicted=' + repr(result) + ', actual=' + repr(testSet[x][-1]))
accuracy = knn.getAccuracy(testSet, predictions)
print('Accuracy: ' + repr(accuracy) + '%')








conf_arr = preds

norm_conf = []
for i in conf_arr:
    a = 0
    tmp_arr = []
Esempio n. 4
0
# -*- coding: utf-8 -*-

import numpy as np
import cv2 as cv

import Dataset
import knn

path = "ordo_2.csv"
DS = Dataset.Dataset(path)
df = DS.getDF()
print(df.head())
X, Y = DS.getXY()
print(X)
k = 7
#gnb = GNB.GNB(X, Y)
knn = knn.kNN(k, X, Y)
accuracy = knn.getAccuracy()
print(accuracy)
Esempio n. 5
0
        prototypes_lvq1 = train_prototypes_lvq1(lvq_training_set, n_prototypes,
                                                lrate, epochs)
        prototypes_lvq2 = train_prototypes_lvq2(prototypes_lvq1,
                                                lvq_training_set, lrate,
                                                epochs)
        prototypes_lvq3 = train_prototypes_lvq3(prototypes_lvq2,
                                                lvq_training_set, lrate,
                                                epochs)

        for k in kn:
            predictions = []
            for row in range(len(knn_test_set)):
                neighbors = getNeighbors(prototypes_lvq1, knn_test_set[row], k)
                results = getResponse(neighbors)
                predictions.append(results)
            accuracy = getAccuracy(knn_test_set, predictions)
            if k == 1:
                accuracy_lvq1_k1.append(accuracy)
            else:
                accuracy_lvq1_k3.append(accuracy)
            print('With LVQ1 for ' + str(n_prototypes) +
                  '  and for dataset -> ' + str(filename) +
                  ' accuracy for k= ' + str(k) + ': ' + repr(accuracy) + '%')

        for k in kn:
            predictions = []
            for row in range(len(knn_test_set)):
                neighbors = getNeighbors(prototypes_lvq2, knn_test_set[row], k)
                results = getResponse(neighbors)
                predictions.append(results)
            accuracy = getAccuracy(knn_test_set, predictions)