def main(): # Define the part of the data test to use part = 0.1 # Use the filereader to get the date from the file datatest, label = fp.filereader("datingTestSet2.txt") # Auto norm the datatest datatest, ranges, minval = am.autonorm(datatest) m = datatest.shape[0] # the size of the dataset errorcount = 0 # Count the error number k = int(m * part) # the size of the test dataset for i in range(k): p = kNN.knn(datatest[i], datatest[k:m], label[k:m], 3) if (p != label[i]): errorcount += 1 print("The label is %s , the real label is %s" % (int(label[i]), p)) print("The error count of the percentile is %s" % (errorcount * 1.0 / k))
''' This example file shows how to tune the hyper parameter k. ''' #get trainer and model import kNN as knn import modeler as model #using pyplotlib to plot error with k import matplotlib.pyplot as plt #get trainer trainer = knn.knn() #get training data, with some noise X, Y = model.generateData(100, noiseFactor=.2) #get validation data, we will assert that this data has no noise, even though this is not accurate in real data validX, validY = model.generateData(25, noiseFactor=0) #get test data, also with no noise testX, testY = model.generateData(25, noiseFactor=0) #load training data trainer.loadData(X, Y) #holds error on validation set for each k validErrors = {} #setup k's to test ks = range(1, 100, 2) for k in ks: #holds the error count for this k
''' This file serves to be an example on how to use kNN. This file is the simplified version with no graphs. For an example with graphs, check example.py Change k, noise, and amount of data to see how accuracy is affected. ''' import kNN as knn import modeler #setup the trainer. Tune your k parameter here. trainer = knn.knn(k=5) #using basic modeler provided, can define how many elements, and how much noise we want. parameters, labels = modeler.generateData(100, noiseFactor=.25) #get parameters to test on. These should have 0 noise so we can accurately test them testParameters, testLabel = modeler.generateData(25, noiseFactor=0) #load the train data into the trainer trainer.loadData(parameters, labels) #holds the number of incorrect error = 0 #for each test element for i in range(len(testParameters)): #use trainer to get a guess confidence, guess = trainer.predict(testParameters[i], negativeValue=0) #check if we were incorrect if guess != testLabel[i]: error += 1 #calcuate and print error
import naive_bayes as nb import kNN from datetime import datetime start_time = datetime.now() #print start_time nb.nb() end_time = datetime.now() #start_time=end_time #print 'Duration: {}'.format(end_time - start_time) print '==========================================' kNN.knn() end_time = datetime.now() #print 'Duration: {}'.format(end_time - start_time)