def classifyPerson(): strLabels = ['not at all', 'small doses', 'large doses'] print '' ffMiles = float(raw_input('frequent flier miles earned per year: ')) percentTats = float(raw_input('percentage of time spent playing video games: ')) iceCream = float(raw_input('liters of ice cream consumed per year: ')) datingDataMat, datingLabels = fileUtil.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = normalizer.autoNorm(datingDataMat) testVec = numpy.array([ffMiles, percentTats, iceCream]) classifierResult = kNN.classify((testVec - minVals) / ranges, normMat, datingLabels, 3) print '' print 'You will probably like this person:', strLabels[classifierResult - 1]
def classifyPerson(): strLabels = ['not at all', 'small doses', 'large doses'] print '' ffMiles = float(raw_input('frequent flier miles earned per year: ')) percentTats = float( raw_input('percentage of time spent playing video games: ')) iceCream = float(raw_input('liters of ice cream consumed per year: ')) datingDataMat, datingLabels = fileUtil.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = normalizer.autoNorm(datingDataMat) testVec = numpy.array([ffMiles, percentTats, iceCream]) classifierResult = kNN.classify((testVec - minVals) / ranges, normMat, datingLabels, 3) print '' print 'You will probably like this person:', strLabels[classifierResult - 1]
def handwritingClassTest(): hwLabels = [] # load the training set trainingFileList = listdir('trainingDigits') m = len(trainingFileList) trainingMat = zeros((m, 1024)) for i in range(m): fileNameStr = trainingFileList[i] # take off .txt fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i, :] = fileUtil.img2vector('trainingDigits/%s' % fileNameStr) # iterate through the test set testFileList = listdir('testDigits') errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] # take off .txt fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = fileUtil.img2vector('testDigits/%s' % fileNameStr) classifierResult = kNN.classify(vectorUnderTest, trainingMat, hwLabels, 3) print "the classifier came back with: %d, the real answer is: %d" % ( classifierResult, classNumStr) if (classifierResult != classNumStr): errorCount += 1.0 print "\ntotal number of errors: %d" % errorCount print "\ntotal error rate: %f" % (errorCount / float(mTest))
def datingClassTest(): # hold out for test hoRatio = 0.50 # load data set from file datingDataMat, datingLabels = fileUtil.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = normalizer.autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m * hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = kNN.classify(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3) print "classifier result: %d, real answer: %d" % (classifierResult, datingLabels[i]) if (classifierResult != datingLabels[i]): errorCount += 1.0 print "total count: %s, error count: %s" % (numTestVecs, errorCount) print "total error rate: %f" % (errorCount / float(numTestVecs))
def handwritingClassTest(): hwLabels = [] # load the training set trainingFileList = listdir('trainingDigits') m = len(trainingFileList) trainingMat = zeros((m,1024)) for i in range(m): fileNameStr = trainingFileList[i] # take off .txt fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i,:] = fileUtil.img2vector('trainingDigits/%s' % fileNameStr) # iterate through the test set testFileList = listdir('testDigits') errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] # take off .txt fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = fileUtil.img2vector('testDigits/%s' % fileNameStr) classifierResult = kNN.classify(vectorUnderTest, trainingMat, hwLabels, 3) print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr) if (classifierResult != classNumStr): errorCount += 1.0 print "\ntotal number of errors: %d" % errorCount print "\ntotal error rate: %f" % (errorCount/float(mTest))