def testwine():
	wqred = dataifywine('wine/winequality-red.csv') + dataifywine('wine/winequality-white.csv')
	leftoutperc = 0.1
	leftout = int(len(wqred)*leftoutperc)
	testing = wqred[:leftout]
	training = wqred[leftout:]
	print "Training:",len(training)
	print "Testing:",len(testing)
	foo = FastKNN(10)
	foo.addEvidence(numpy.array([thing[:-1] for thing in training]), [thing[-1] for thing in training])
	knn.addEvidence(numpy.array(training))
	total = 0
	correct = 0
	for x in xrange(len(testing)):
		thing = testing[x]
		guess = foo.query(numpy.array(thing[:-1]),3)
		#realknn = knn.query(numpy.array([thing[:-1],]),3,method='mean')
		#guess = realknn[0]
		#print realknn
		#print guess, thing[-1]
		if guess == thing[-1]:
			correct += 1
		total += 1
		if total % 50 == 0:
			print total,'/',len(testing)
	print correct,"/",total,":",float(correct)/float(total)
	print "Average checks per query:", float(foo.num_checks)/float(total)
Exemple #2
0
def testwine():
    wqred = dataifywine('wine/winequality-red.csv') + dataifywine(
        'wine/winequality-white.csv')
    leftoutperc = 0.1
    leftout = int(len(wqred) * leftoutperc)
    testing = wqred[:leftout]
    training = wqred[leftout:]
    print "Training:", len(training)
    print "Testing:", len(testing)
    foo = FastKNN(10)
    foo.addEvidence(numpy.array([thing[:-1] for thing in training]),
                    [thing[-1] for thing in training])
    knn.addEvidence(numpy.array(training))
    total = 0
    correct = 0
    for x in xrange(len(testing)):
        thing = testing[x]
        guess = foo.query(numpy.array(thing[:-1]), 3)
        #realknn = knn.query(numpy.array([thing[:-1],]),3,method='mean')
        #guess = realknn[0]
        #print realknn
        #print guess, thing[-1]
        if guess == thing[-1]:
            correct += 1
        total += 1
        if total % 50 == 0:
            print total, '/', len(testing)
    print correct, "/", total, ":", float(correct) / float(total)
    print "Average checks per query:", float(foo.num_checks) / float(total)
def _knnResult(naData):
    ''' Split up data into training/testing '''
    lSplit = naData.shape[0] * .7
    naTrain = naData[:lSplit, :]
    naTest = naData[lSplit:, :]

    knn.addEvidence(naTrain.astype(float), 1)
    ''' Query with last column omitted and 5 nearest neighbors '''
    naResults = knn.query(naTest[:, :-1], 5, 'mode')
    ''' Count returns which are correct '''
    lCount = 0
    for i, dVal in enumerate(naResults):
        if dVal == naTest[i, -1]:
            lCount = lCount + 1

    dResult = float(lCount) / naResults.size

    return dResult
def _knnResult(naData):
    ''' Split up data into training/testing '''
    lSplit = naData.shape[0] * .7
    naTrain = naData[:lSplit, :]
    naTest = naData[lSplit:, :]

    knn.addEvidence(naTrain.astype(float), 1)

    ''' Query with last column omitted and 5 nearest neighbors '''
    naResults = knn.query(naTest[:, :-1], 5, 'mode')

    ''' Count returns which are correct '''
    lCount = 0
    for i, dVal in enumerate(naResults):
        if dVal == naTest[i, -1]:
            lCount = lCount + 1

    dResult = float(lCount) / naResults.size

    return dResult