예제 #1
0
파일: test.py 프로젝트: pang43/Study
def main():
    # Define the part of the data test to use
    part = 0.1
    # Use the filereader to get the date from the file
    datatest, label = fp.filereader("datingTestSet2.txt")
    # Auto norm the datatest
    datatest, ranges, minval = am.autonorm(datatest)

    m = datatest.shape[0]  # the size of the dataset
    errorcount = 0  # Count the error number

    k = int(m * part)  # the size of the test dataset

    for i in range(k):
        p = kNN.knn(datatest[i], datatest[k:m], label[k:m], 3)
        if (p != label[i]):
            errorcount += 1
        print("The label is %s , the real label is %s" % (int(label[i]), p))
    print("The error count of the percentile is %s" % (errorcount * 1.0 / k))
예제 #2
0
'''
    This example file shows how to tune the hyper parameter k.
'''

#get trainer and model
import kNN as knn
import modeler as model
#using pyplotlib to plot error with k
import matplotlib.pyplot as plt

#get trainer
trainer = knn.knn()

#get training data, with some noise
X, Y = model.generateData(100, noiseFactor=.2)

#get validation data, we will assert that this data has no noise, even though this is not accurate in real data
validX, validY = model.generateData(25, noiseFactor=0)

#get test data, also with no noise
testX, testY = model.generateData(25, noiseFactor=0)

#load training data
trainer.loadData(X, Y)

#holds error on validation set for each k
validErrors = {}
#setup k's to test
ks = range(1, 100, 2)
for k in ks:
    #holds the error count for this k
예제 #3
0
'''
    This file serves to be an example on how to use kNN. This file is the simplified version with no graphs. For an example with graphs, check example.py
    Change k, noise, and amount of data to see how accuracy is affected.
'''

import kNN as knn
import modeler

#setup the trainer. Tune your k parameter here.
trainer = knn.knn(k=5)

#using basic modeler provided, can define how many elements, and how much noise we want.
parameters, labels = modeler.generateData(100, noiseFactor=.25)

#get parameters to test on. These should have 0 noise so we can accurately test them
testParameters, testLabel = modeler.generateData(25, noiseFactor=0)

#load the train data into the trainer
trainer.loadData(parameters, labels)

#holds the number of incorrect
error = 0
#for each test element
for i in range(len(testParameters)):
    #use trainer to get a guess
    confidence, guess = trainer.predict(testParameters[i], negativeValue=0)
    #check if we were incorrect
    if guess != testLabel[i]:
        error += 1

#calcuate and print error
예제 #4
0
import naive_bayes as nb
import kNN
from datetime import datetime

start_time = datetime.now()
#print start_time
nb.nb()
end_time = datetime.now()
#start_time=end_time
#print 'Duration: {}'.format(end_time - start_time)
print '=========================================='
kNN.knn()
end_time = datetime.now()
#print 'Duration: {}'.format(end_time - start_time)