def testwine(): wqred = dataifywine('wine/winequality-red.csv') + dataifywine('wine/winequality-white.csv') leftoutperc = 0.1 leftout = int(len(wqred)*leftoutperc) testing = wqred[:leftout] training = wqred[leftout:] print "Training:",len(training) print "Testing:",len(testing) foo = FastKNN(10) foo.addEvidence(numpy.array([thing[:-1] for thing in training]), [thing[-1] for thing in training]) knn.addEvidence(numpy.array(training)) total = 0 correct = 0 for x in xrange(len(testing)): thing = testing[x] guess = foo.query(numpy.array(thing[:-1]),3) #realknn = knn.query(numpy.array([thing[:-1],]),3,method='mean') #guess = realknn[0] #print realknn #print guess, thing[-1] if guess == thing[-1]: correct += 1 total += 1 if total % 50 == 0: print total,'/',len(testing) print correct,"/",total,":",float(correct)/float(total) print "Average checks per query:", float(foo.num_checks)/float(total)
def testwine(): wqred = dataifywine('wine/winequality-red.csv') + dataifywine( 'wine/winequality-white.csv') leftoutperc = 0.1 leftout = int(len(wqred) * leftoutperc) testing = wqred[:leftout] training = wqred[leftout:] print "Training:", len(training) print "Testing:", len(testing) foo = FastKNN(10) foo.addEvidence(numpy.array([thing[:-1] for thing in training]), [thing[-1] for thing in training]) knn.addEvidence(numpy.array(training)) total = 0 correct = 0 for x in xrange(len(testing)): thing = testing[x] guess = foo.query(numpy.array(thing[:-1]), 3) #realknn = knn.query(numpy.array([thing[:-1],]),3,method='mean') #guess = realknn[0] #print realknn #print guess, thing[-1] if guess == thing[-1]: correct += 1 total += 1 if total % 50 == 0: print total, '/', len(testing) print correct, "/", total, ":", float(correct) / float(total) print "Average checks per query:", float(foo.num_checks) / float(total)
def _knnResult(naData): ''' Split up data into training/testing ''' lSplit = naData.shape[0] * .7 naTrain = naData[:lSplit, :] naTest = naData[lSplit:, :] knn.addEvidence(naTrain.astype(float), 1) ''' Query with last column omitted and 5 nearest neighbors ''' naResults = knn.query(naTest[:, :-1], 5, 'mode') ''' Count returns which are correct ''' lCount = 0 for i, dVal in enumerate(naResults): if dVal == naTest[i, -1]: lCount = lCount + 1 dResult = float(lCount) / naResults.size return dResult