예제 #1
0
def handwritingClassTest():
    hwLabels = []
    trainingFileList = listdir('trainingDigits')
    m = len(trainingFileList)
    trainingMat = zeros((m,1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i,:] = kNN.img2vector('trainingDigits/%s' % fileNameStr)

    testFileList = listdir('testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = kNN.img2vector('testDigits/%s' % fileNameStr)
        classifierResult = kNN.classify0(vectorUnderTest, \
        trainingMat, hwLabels, 3)
        print("the classifier came back with: {0}, the real answer is: {1}".format(classifierResult, classNumStr))
        if (classifierResult != classNumStr):
            errorCount += 1.0

    print("\nthe total number of errors is: {0}".format(errorCount))
    print("\nthe total error rate is: {0}".format(errorCount/float(mTest)))
예제 #2
0
def mainTest():
    hwLabels = []
    trainingFileList = listdir('trainingDigits')
    m = len(trainingFileList)
    trainingMat = np.zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr)

    # build the kdtree
    print "start to build kd tree ..."
    print hwLabels
    kd_tree = KDNode()
    kd_tree = createKDTree(kd_tree, trainingMat, hwLabels)
    print "kd tree finished"
    #print_kdtree(kd_tree)
    #exit(0)

    testFileList = listdir('testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)
        classifierResult = KNN(kd_tree, 5, vectorUnderTest[0])

        print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr)
        if (classifierResult != classNumStr): errorCount += 1.0
    print "\nthe total number of errors is: %d" % errorCount
    print "\nthe total error rate is: %f" % (errorCount / float(mTest))
예제 #3
0
	def testKNN3(self):
		hwLabels = []
		trainingFileList = os.listdir('trainingDigits')
		m = len(trainingFileList)
		trainingMat = numpy.zeros((m,1024))
		for i in range(m):
			fileNameStr = trainingFileList[i]
			fileStr = fileNameStr.split('.')[0]     #take off .txt
			classNumStr = int(fileStr.split('_')[0])
			hwLabels.append(classNumStr)
			trainingMat[i,:] = kNN.img2vector('trainingDigits/%s' % fileNameStr)
		testFileList = os.listdir('testDigits')
		fileNameStr = testFileList[0]
		fileStr = fileNameStr.split('.')[0]     #take off .txt
		classNumStr = int(fileStr.split('_')[0])
		vectorUnderTest = kNN.img2vector('testDigits/%s' % fileNameStr)
		c = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3)
		self.assertEqual(c, 0)
예제 #4
0
def img2vectorTest():
    testVector = kNN.img2vector('testDigits/0_13.txt')
    print(testVector[0, 0:32])
예제 #5
0
    line = line.strip()
    print line
    line = line.split()
    print line

    print zero[1, ]
##########Ch02###########
datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt')

if (False):
    print datingDataMat
    print datingLabels[0:20]

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2],
               15.0 * array(datingLabels), 15.0 * array(datingLabels))
    plt.show()

    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    print normMat
    print ranges
    print minVals

    kNN.datingClassTest()
    kNN.classifyPerson()
    testVector = kNN.img2vector('digits/testDigits/0_13.txt')
    print testVector[0, 0:31]
    print testVector[0, 32:63]

kNN.handwritingClassTest()
예제 #6
0
from numpy import *
from os import listdir

# kNN.handwritingClassTest()

hwLabels = []
trainingFileList = listdir('trainingDigits')  # 加载训练数据
m = len(trainingFileList)
trainingMat = zeros((m, 1024))

for i in range(m):
    fileNameStr = trainingFileList[i]
    fileStr = fileNameStr.split('.')[0]  #take off .txt
    classNumStr = int(fileStr.split('_')[0])
    hwLabels.append(classNumStr)
    trainingMat[i, :] = kNN.img2vector('trainingDigits/%s' % fileNameStr)

testFileList = listdir('testDigits')  #iterate through the test set
errorCount = 0.0
mTest = len(testFileList)

for i in range(mTest):
    fileNameStr = testFileList[i]
    fileStr = fileNameStr.split('.')[0]  #take off .txt
    classNumStr = int(fileStr.split('_')[0])
    vectorUnderTest = kNN.img2vector('testDigits/%s' % fileNameStr)
    classifierResult = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3)
    print("the classifier came back with: %d, the real answer is: %d" %
          (classifierResult, classNumStr))
    if (classifierResult != classNumStr): errorCount += 1.0
예제 #7
0
ax = fig.add_subplot(111)
ax.scatter(datamat[:,1], datamat[:,2], 15.0*array(labels), 15.0*array(labels))
plt.show()
'''

normmat, ranges, minvals = kNN.autoNorm(datamat)
print(normmat)
print(ranges)
print(minvals)

# kNN.datingClassTest(0.2,7)


def classifyperson():
    result = ['not at all', 'small doses', 'large dose']

    ffmiles = float(input('frequent filter miles earned per year:'))
    gametimepercent = float(input('% of time spent on game:'))
    icecream = float(input('liters of ice cream consumed per year:'))
    datamat, labels = kNN.file2matrix('datingTestSet.txt')
    normmat, ranges, minvals = kNN.autoNorm(datamat)
    inarry = (array([ffmiles, gametimepercent, icecream]) - minvals) / ranges
    classifyresult = kNN.classify0(inarry, normmat, labels, 3)
    print("you like this person:", result[classifyresult - 1])
    return


# classifyperson()

testvector = kNN.img2vector('testDigits/0_13.txt')
kNN.handwritingClassTest()
예제 #8
0
#!/usr/bin/python
# -*- coding:utf-8 -*-

import kNN
'''
我们用来测试数据k-紧邻算法
'''
'''
从文件中拿数据
'''
reload(kNN)

datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt')
print datingDataMat

normMat, ranges, minVals = kNN.autoNorm((datingDataMat))
print normMat

kNN.datingClassTest()
print normMat

testvector = kNN.img2vector('0_13.txt')
print testvector[0, 0:31]
예제 #9
0
import kNN

from numpy import *
import operator

# 2.2
import matplotlib
import matplotlib.pyplot as plt

# 2.3
from os import listdir

group, labels = kNN.createDataSet()
datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt')
normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
testVector = kNN.img2vector(
    'machinelearninginaction/Ch02/digits/testDigits/0_0.txt')

# print(group)
# print(labels)
# print(datingDataMat)
# print("=====I'm split line.==========")
# print(datingLabels[0:20])
# print(normMat)
# print(ranges)
# print("=====I'm split line.==========")
# print(minVals)
# print("=====I'm split line.==========")
print(testVector)


# 2.1
#coding:utf-8
import kNN

testVector = kNN.img2vector('testDigits/0_13.txt')
print testVector[0, 0:31]
print testVector[0, 32:63]

kNN.handwritingClassTest()
예제 #11
0
import kNN

group, labels = kNN.create_dataset()

print(kNN.classify0([0, 0], group, labels, 3))

dating_data_mat, dating_labels = kNN.file2matrix('./CH02/datingTestSet.txt')

norm_mat, ranges, min_values = kNN.auto_norm(dating_data_mat)

kNN.dating_class_test('./CH02/datingTestSet.txt')

kNN.classify_person('./CH02/datingTestSet2.txt')

test_vec = kNN.img2vector('./CH02/digits/testDigits/0_13.txt')

kNN.handwriting_class_test('./CH02/digits/trainingDigits',
                           './CH02/digits/testDigits')
예제 #12
0
# for i in range(numTestVecs):
#     classifierResult = kNN.classify0(normanDataSet[i, :], normanDataSet[numTestVecs:, :], datingLabels[numTestVecs:], 3)
#     # print(classifierResult)
#     if (classifierResult != datingLabels[i]) : errorCount += 1
#
# print("错误率为:%f" % (errorCount/(m*hoRatio)))

# persentTags = input('测试')
# print(persentTags)

# 从文件名解析分类数字(训练集),将特征载入矩阵,标签存入列表
trainingFileList = os.listdir('trainingDigits')
m = len(trainingFileList)
trainingMat = np.zeros((m, 1024))
hwLabels = []
for i in range(m):
    classNumStr = int(trainingFileList[i].split('.')[0].split('_')[0])
    trainingMat[i, :] = kNN.img2vector('trainingDigits/%s' %
                                       trainingFileList[i])
    hwLabels.append(classNumStr)
# 从文件名解析分类数字(测试集)
testFileList = os.listdir('testDigits')
mTest = len(testFileList)
testMat = np.zeros((m, 1024))
errorCount = 0
for i in range(mTest):
    classNumStr = int(testFileList[i].split('.')[0].split('_')[0])
    vectorUnderTest = kNN.img2vector('testDigits/%s' % testFileList[i])
    classifierResult = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3)
    if (classNumStr != classifierResult): errorCount += 1
print('错误率为:%f' % (errorCount / mTest))
예제 #13
0
ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2])
plt.show()

fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2],
           15.0 * np.array(datingLabels), 15.0 * np.array(datingLabels))
plt.show()

fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(datingDataMat[:, 0], datingDataMat[:, 1],
           15.0 * np.array(datingLabels), 15.0 * np.array(datingLabels))
plt.show()

normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
normMat
ranges
minVals
# 调用标准化函数,把数据集处理

#调用分类器
kNN.datingClassTest()

# 调用函数,把图像信息转化成向量
tectVector = kNN.img2vector('testDights/0_13.txt')
tectVector

# 调用函数预测图像中出现的手写字
kNN.handwritingClassTest()
#2.2.3 准备数据:归一化数值
normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
print 'normMat:',normMat
print 'ranges:',ranges
print 'minVals:',minVals

#2.2.4 测试算法:作为完整程序验证分类器
kNN.datingClassTest()


#2.2.5 使用算法:构建完整可用系统
kNN.classifyPerson()

#2.3.1准备数据:将图像转化为测试向量
testVector=kNN.img2vector(homedir+'testDigits/0_13.txt')
print 'testVector[0,0:31]:',testVector[0,0:31]

#2.3.2 测试算法:使用k近邻算法识别手写数字
kNN. handwritingClassTest()

# inX=[0,0]
# dataSet=group
# k=3
#
# dataSetSize = dataSet.shape[0]
# print 'dataSetSize:',dataSetSize
# diffMat = tile(inX, (dataSetSize, 1)) - dataSet
# print 'diffMat:',diffMat
# sqDiffMat = diffMat ** 2
# print 'sqDiffMat:',sqDiffMat
예제 #15
0
datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt')
print(datingDataMat)
print(datingLabels[0:20])

# scatter plot
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(datingDataMat[:, 0], datingDataMat[:, 1],
           15.0 * np.array(datingLabels), 15.0 * np.array(datingLabels))
plt.show()

# normalization
normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
print(normMat)
print(ranges)
print(minVals)

# test error rate
kNN.datingClassTest()

# predict
kNN.classifyPerson()

# handwriting nums recognition
# load daata
testVector = kNN.img2vector('dataset/testDigits/0_13.txt')
print(testVector[0, 0:31])

# handwriting class test
kNN.handwritingClassTest()
예제 #16
0
def main7():
    '''
    手写数字识别系统
    '''
    testVector = kNN.img2vector('testDigits/0_0.txt')
    kNN.handwritingClassTest()
예제 #17
0
#coding=utf-8
from numpy import *
import kNN

reload(kNN)
testVector = kNN.img2vector('testDigits/0_13.txt')  #读取文件数据
print(testVector[0, 0:31])
print(testVector[0, 32:63])
예제 #18
0
# coding: gbk

""" This is a test for recognition of handwritten numbers with kNN  """

import kNN

################################################################################
#                                                                              #
#                                  手写数字识别                                 #
#                                                                              #
################################################################################

# 测试函数
#testVector = kNN.img2vector('./data/digits/0_0.txt')
#print(testVector[0,0:31])
#print(testVector[0,32:63])

# 分类器 + 测试
#kNN.handwritingTest()

# png 图片数字识别
trainingMat, hwLabels = kNN.handwritingClassTraining()
for i in range(10):
    filepath = kNN.png2vec("./data/digits/MyDigits/%d.png" % i)
    vecUnderTest = kNN.img2vector(filepath)
    res = kNN.classify0(vecUnderTest, trainingMat, hwLabels, 3)
    print("The handwriting number is: %d (%d for real)" % (res, i))