def main2(): ''' 测试分类器 ''' group,labels = kNN.createDataSet() result = kNN.classify0([0,0], group, labels, 3) print result
def handwritingClassTest(): hwLabels = [] trainingFileList = listdir('trainingDigits') m = len(trainingFileList) trainingMat = zeros((m, 1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr) testFileList = listdir('testDigits') errorcount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2vector('testDigits/%s' % fileNameStr) classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3) print 'the classifier came back with:%d,the real answer is :%d' % (classifierResult, classNumStr) if (classifierResult != classNumStr): errorcount += 1.0 print "\nThe local number of errors is :%d" % errorcount print "\nThe total error rate is: %f " % (errorcount/float(mTest))
def handwritingClassTest(k=3): hwLabels = [] trainingFileList = os.listdir('trainingDigits') m = len(trainingFileList) trainingMat = np.zeros((m, 1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i, :] = img2Vector('trainingDigits/%s' % fileNameStr) testFileList = os.listdir('testDigits') errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2Vector('testDigits/%s' % fileNameStr) classifierResult = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, k) # print('the classifier came back with: %d, the real answer is: %d' % (classifierResult, classNumStr)) if classifierResult != classNumStr: errorCount += 1.0 # print('the total number of error is: %d' % errorCount) print('k = %d, and the total error rate is: %f' % (k, errorCount / float(mTest))) return errorCount / float(mTest)
def handWritingClassTest(): hwLables = [] #get the fileName list in the directory trainingFileList = listdir('digits/trainingDigits') trainingDatalength = len(trainingFileList) #set the traning martrix trainingMat = np.zeros((trainingDatalength,1024)) print trainingDatalength for i in range(trainingDatalength): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split(".")[0] # print (int)(fileStr.split('_')[0]) classNumStr = int(fileStr.split('_')[0]) print "%d:%d"%(i,classNumStr) hwLables.append(classNumStr) trainingMat[i,:] = img2Vector("digits/trainingDigits/%s"%fileNameStr) print('hwLabels') print hwLables testFileList = listdir('digits/testDigits') testDataLength = len(testFileList) errorCount = 0.0 for i in range(testDataLength): fileNameStr = testFileList[i] fileStr = fileNameStr.split(".")[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2Vector("digits/testDigits/%s"%fileNameStr) classfiyResult = kNN.classify0(vectorUnderTest , trainingMat , hwLables , 3) print "the clasifier came back with : %d ,the real answer is %d" %(classfiyResult,classNumStr) if(classNumStr != classfiyResult): errorCount += 1.0 print 'the total error count is %d\n the error rate is %f' %(errorCount,errorCount/(float)(testDataLength))
def datingClassTest(): """ 约会网站测试 :return: """ # 设置测试数据比例 hoRatio = 0.1 # 从文件中加载数据 datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') # 归一化数据 normMat, ranges, minVals = kNN.autoNorm(datingDataMat) # m = 数据的行数 即第一维矩阵 m = normMat.shape[0] # 设置测试的样本数量 numTestVecs = int(m * hoRatio) print('numTestVecs = ', numTestVecs) errorCount = 0.0 for i in range(numTestVecs): classifierResult = kNN.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3) print('The classifier came back with %d, the real answer is: %d' % (classifierResult, datingLabels[i])) if classifierResult != datingLabels[i]: errorCount += 1.0 print('The total error rate is %f' % (errorCount / float(numTestVecs))) print(errorCount)
def classifyImg(trainingdir,testdir): trainingfiles=listdir(trainingdir) filecount=len(trainingfiles) trainMatrixData=zeros((filecount,1024)) label = [] for i in range(filecount): fields = trainingfiles[i].split('_'); num = fields[0] label.append(num) filepath=trainingdir + '/' + trainingfiles[i] trainMatrixData[i,:] = kNN.img2matrix(filepath) testfiles=listdir(testdir) filecount = len(testfiles) errcount = 0.0 for i in range(filecount): fields = testfiles[i].split('_'); num = fields[0] filepath=testdir + '/' + testfiles[i] testMatrix = kNN.img2matrix(filepath) retval = kNN.classify0(testMatrix, trainMatrixData,label,3) if (retval != num): errcount += 1 print "error" print "retval:%s,num:%s"%(retval,num) print "rate:%f"%(errcount/float(filecount))
def handwritingClassTest(): hwLabels = [] trainingFileList = listdir('trainingDigits') # 获取目录文件 m = len(trainingFileList) trainingMat = zeros(m, 1024) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) # 从文件名解析分类数字 hwLabels.append(classNumStr) trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr) testFileList = listdir('testDigits') # 获取测试目录文件 errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2vector('testDigits/%s' % fileNameStr) classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3) print '分类结果是:%d, 真是结果是:%d' % (classifierResult, classNumStr) if (classifierResult != classNumStr): errorCount += 1.0 print '分类错误的总数是:%d' % errorCount print '总的错误率:%f' % (errorCount / float(mTest))
def handwritting_test(): hw_labels = [] training_file_list = os.listdir(TRAINING_DATA_DIR) m = len(training_file_list) training_mat = np.zeros((m, 1024)) #处理训练数据 for i in range(m): filename = training_file_list[i] file = filename.split('.')[0] num_class = int(file.split('_')[0]) hw_labels.append(num_class) training_mat[i, :] = img2vector(TRAINING_DATA_DIR + '/' + filename) #处理测试数据 test_file_list = os.listdir(TEST_DATA_DIR) error_count = 0 m_test = len(test_file_list) #对每个测试集中的图像进行分类 for i in range(m_test): filename = test_file_list[i] file = filename.split('.')[0] test_num_class = int(file.split('_')[0]) test_image_vec = img2vector(TEST_DATA_DIR + '/' + filename) classifier_res = classify0(test_image_vec, training_mat, hw_labels, K) print("the classifier came back with: %d,the real answer is: %d" % (classifier_res, test_num_class)) if (classifier_res != test_num_class): error_count += 1.0 print("\nthe total number of errors is: %d" % error_count) print("\nthe total rate is:%f" % (error_count / float(m_test)))
def datingClassTest(): hoRatio = 0.50 #hold out 10% datingDataMat, datingLabels = kNN.file2matrix( 'datingTestSet.txt') #load data setfrom file normMat, ranges, minVals = kNN.autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m * hoRatio) # 50% train set,50% test set errorCount = 0.0 errorAns = m for k in range(20): errorCount = 0 for i in range(numTestVecs): classifierResult = kNN.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], k + 1) # print "the classifier came back with: %s, the real answer is: %s" % (classifierResult, datingLabels[i]) if (classifierResult != datingLabels[i]): errorCount += 1.0 print "when k is %d the total error rate is: %f" % (k + 1, ( errorCount / float(numTestVecs))) print errorCount if errorCount <= errorAns: errorAns = errorCount ans = k + 1 print "the best k is", ans
def hand_write_class_test(self): hand_write_labels = [] train_file_list = listdir( "/home/zhangzhiliang/Documents/my_git/DATA-SCIENTIST-/machine_learing_algorithm/" "machine_learning_in_action/2_KNN/digits/trainingDigits") train_num_of_digits = len(train_file_list) train_data = np.zeros((train_num_of_digits, 1024)) for i in range(train_num_of_digits): file_name_str = train_file_list[i] file_str = file_name_str.split('.')[0] class_digits_str = int(file_str.split('_')[0]) hand_write_labels.append(class_digits_str) train_data[i, :] = self.img_to_vector( 'train_Digits {}'.format(file_name_str)) test_file_list = listdir( "/home/zhangzhiliang/Documents/my_git/DATA-SCIENTIST-/machine_learing_algorithm/" "machine_learning_in_action/2_KNN/digits/testDigits") error_count = 0.0 test_num_of_digits = len(test_file_list) for i in range(test_num_of_digits): file_name_str = test_file_list[i] file_str = file_name_str.split('.')[0] class_digits_str = int(file_str.split('_')[0]) test_data = self.img_to_vector( 'test_Digits {}'.format(file_name_str)) result = kNN.classify0(test_data, train_data, hand_write_labels, 3) print( "the classifier came back with: {:d}, the real answer is: {:d}" .format(result, class_digits_str)) if (result != class_digits_str): error_count += 1.0 print("the total number of errors is {:d}".format(error_count)) print("the total number of error rate is : {:f}".format( error_count / float(test_num_of_digits)))
def handwritingClassTest(): hwLabels = [] trainingFileList = os.listdir('trainingDigits') m = len(trainingFileList) trainingMat = np.zeros((m, 1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat = img2vector('trainingDigits/' + fileNameStr) testFileList = os.listdir('testDigits') errorCount = 0.0 mTest = len(testFileList) for i in range(m): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2vector('testDigits/' + fileNameStr) classifierResult = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3) print( 'the classifier came back with:{0},the real answer is:{1}'.format( classifierResult, classNumStr)) if (classifierResult != classNumStr): errorCount += 1.0 print('the total number of errors is:{}'.format(errorCount)) print('the total erroe rate is:{:.2f}'.format(errorCount / float(mTest)))
def hand_writing_class_test(): hw_labels = [] training_file_list = listdir('trainingDigits') m = len(training_file_list) training_mat = zeros((m, 1024)) for i in range(m): file_name_str = training_file_list[i] file_str = file_name_str.split('.')[0] class_num_str = int(file_str.split('_')[0]) hw_labels.append(class_num_str) training_mat[i, :] = img2vector('trainingDigits/' + file_name_str) test_file_list = listdir('testDigits') error_count = 0.0 m_test = len(test_file_list) for i in range(m_test): file_name_str = test_file_list[i] file_str = file_name_str.split('.')[0] class_num_str = int(file_str.split('_')[0]) vector_under_test = img2vector('testDigits/' + file_name_str) classifier_result = classify0(vector_under_test, training_mat, hw_labels, 3) print 'the classifier came back with: %d, the real answer is: %d,' % (classifier_result, class_num_str) if classifier_result != class_num_str: error_count += 1 print "the total number of errors is: %d" % error_count print "the total error rate is: %f" % (error_count/float(m_test))
def handwriting_test_class(): hw_labels = [] training_filter_list = os.listdir(digits_dir_0) m = len(training_filter_list) training_mat = numpy.zeros((m, 1024)) # 空间存储 for i in range(m): filename = training_filter_list[i] class_num = get_class_num_from_filename(filename) hw_labels.append(class_num) training_mat[i, :] = img2vector(os.path.join(digits_dir_0, filename)) test_file_list = os.listdir(digits_dir_1) error_count = 0.0 m_test = len(test_file_list) for i in range(m_test): filename = test_file_list[i] class_num = get_class_num_from_filename(filename) vector_under_test = img2vector(os.path.join(digits_dir_1, filename)) classifier_result = classify0(vector_under_test, training_mat, hw_labels, 3) print("the classifier came back with: %d, the real answer is: %d" % classifier_result, class_num) if classifier_result != class_num: error_count += 1.0 print("\nthe total number of errors is: %d" % error_count) print("\nthe total error rate of is: %f" % (error_count/float(m_test)))
def handwriting_test_class(): hw_labels = [] training_filter_list = os.listdir(digits_dir_0) m = len(training_filter_list) training_mat = numpy.zeros((m, 1024)) # 空间存储 for i in range(m): filename = training_filter_list[i] class_num = get_class_num_from_filename(filename) hw_labels.append(class_num) training_mat[i, :] = img2vector(os.path.join(digits_dir_0, filename)) test_file_list = os.listdir(digits_dir_1) error_count = 0.0 m_test = len(test_file_list) for i in range(m_test): filename = test_file_list[i] class_num = get_class_num_from_filename(filename) vector_under_test = img2vector(os.path.join(digits_dir_1, filename)) classifier_result = classify0(vector_under_test, training_mat, hw_labels, 3) print( "the classifier came back with: %d, the real answer is: %d" % classifier_result, class_num) if classifier_result != class_num: error_count += 1.0 print("\nthe total number of errors is: %d" % error_count) print("\nthe total error rate of is: %f" % (error_count / float(m_test)))
def hwClassifyTest(): hwlabels = [] #listdir is an os function, get a list of filename in the dir trainfilelist = listdir('trainingDigits') m = len(trainfilelist) traindata = zeros((m,1024)) for i in range(m): filenamestr = trainfilelist[i] #0_12.txt filestr=0_12; numclass=0;split by '.' and '_' filestr = filenamestr.split('.')[0] numclass = int(filestr.split('_')[0]) hwlabels.append(numclass) traindata[i,:] = img2vector('trainingDigits/%s' % filenamestr) testfilelist = listdir('testDigits') errorcount = 0 mtest = len(testfilelist) for i in range(10):#change the data of range(mtest) filenamestr = testfilelist[i] filestr = filenamestr.split('.')[0] numclass = int(filestr.split('_')[0]) testvect = img2vector('testDigits/%s' % filenamestr) result = kNN.classify0(testvect, traindata, hwlabels, 3) print "handwriting classifier result: %d, the resl answer is: %d"\ % (result, numclass) #CARE the (..) if (result != numclass): errorcount += 1.0 print "\nthe error rate is:%f " % (errorcount/float(mtest))#CARE the (..)
def handwriteringClassTest(): hwLabels = [] trainingFileList = os.listdir('digits/trainingDigits') #获取目录内容 m = len(trainingFileList) trainingMat = np.zeros([m, 1024]) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i, :] = imgtxt2vector('digits/trainingDigits/' + fileNameStr) testFileList = os.listdir('digits/testDigits') errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = imgtxt2vector('digits/testDigits/' + fileNameStr) classifierResult = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3) print('the predicted answer is %d,the true answer is %d.' % (classifierResult, classNumStr)) if classifierResult != classNumStr: errorCount += 1 print('the total error rate is %f.' % (errorCount / mTest))
def hand_writing_class_test(): hwlabels = [] training_file_list = listdir('trainingDigits') m = len(training_file_list) training_mat = zeros((m, 1024)) for i in range(m): file_name_str = training_file_list[i] file_str = file_name_str.split('.')[0] class_num_str = int(file_str.split('_')[0]) hwlabels.append(class_num_str) training_mat[i, :] = img_to_vector('trainingDigits/%s' % file_name_str) test_file_list = listdir('testDigits') error_count = 0.0 m_test = len(test_file_list) for i in range(m_test): file_name_str = test_file_list[i] file_str = file_name_str.split('.')[0] class_num_str = int(file_str.split('_')[0]) vector_under_test = img_to_vector('testDigits/%s' % file_name_str) classifier_result = kNN.classify0(vector_under_test, training_mat, hwlabels, 3) print("the classifier came back with: %d , the real answer is:%d" % (classifier_result, class_num_str)) if (classifier_result != class_num_str): error_count += 1.0 print("\nthe total number of error is :%d" % error_count) print("\nthe total error rate is: %f" % (error_count / float(m_test)))
def handwritingClassTest(): hwLabels = [] trainingFileList = listdir('/home/jimlee/Documents/Git/kNN/HandwritingRecognitionSystem/trainingDigits') m = len(trainingFileList) trainingMat = zeros((m,1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i,:] = img2verctor('/home/jimlee/Documents/Git/kNN/HandwritingRecognitionSystem/trainingDigits/%s' % fileNameStr) testFileList = listdir('/home/jimlee/Documents/Git/kNN/HandwritingRecognitionSystem/testDigits') errorCount = 0.0 m_test = len(testFileList) for i in range(m_test): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2verctor('/home/jimlee/Documents/Git/kNN/HandwritingRecognitionSystem/testDigits/%s' % fileNameStr) classifierResult = kNN.classify0(vectorUnderTest,trainingMat,hwLabels,3) print "the classifier came back with: %d, the real answer is: %d" %(classifierResult,classNumStr) if (classifierResult != classNumStr): errorCount += 1.0 print "\nthe total number of errors is: %d" % errorCount print "\nthe total error rate is: %f" % (errorCount/float(m_test))
def handwritingClassTest(): hwLabels = [] # 获取文件目录内容 trainingFileList = os.listdir('trainingDigits') m = len(trainingFileList) trainingMat = np.zeros((m, 1024)) for i in range(m): # 从文件中解析分类数字 fileNameStr = trainingFileList[i] fileStr = fileNameStr.split(".")[0] classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr) testFileList = os.listdir('testDigits') errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split(".")[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2vector('testDigits/%s' % fileNameStr) classifierResult = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3) print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr)) if (classifierResult != classNumStr): errorCount += 1 print("\nthe total number of errors is: %d" % errorCount) print("\nthe total error rate is: %f" % (errorCount / float(mTest)))
def testKNN2(self): dataSet, labels = file2matrix('datingTestSet.txt') normDataSet, ranges, minVals = kNN.autoNorm(dataSet) testInput = numpy.array([51052, 4.680098, 0.625224]) testInput = (testInput-minVals) / ranges c = kNN.classify0(testInput, normDataSet, labels, 3) self.assertEqual(c, 1)
def handwritingClassTest(): hwLabels = [] trainingFileList = listdir('trainingDigits') m = len(trainingFileList) trainingMat = zeros((m, 1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr) testFileList = listdir('testDigits') errorcount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2vector('testDigits/%s' % fileNameStr) classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3) print 'the classifier came back with:%d,the real answer is :%d' % ( classifierResult, classNumStr) if (classifierResult != classNumStr): errorcount += 1.0 print "\nThe local number of errors is :%d" % errorcount print "\nThe total error rate is: %f " % (errorcount / float(mTest))
def handwritingClassTest(): hwLabels = [] trainingFileList = os.listdir( 'resources/trainingDigits') #load the training set m = len(trainingFileList) trainingMat = zeros((m, 1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] #take off .txt classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i, :] = img2vector('resources/trainingDigits/%s' % fileNameStr) testFileList = os.listdir( 'resources/testDigits') #iterate through the test set errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] #take off .txt classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2vector('resources/testDigits/%s' % fileNameStr) classifierResult = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3) print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr)) if (classifierResult != classNumStr): errorCount += 1.0 print("\nthe total number of errors is: %d" % errorCount) print("\nthe total error rate is: %f" % (errorCount / float(mTest)))
def handwritingClassTest(): hwLabels = [] trainingFileList = listdir('digits/trainingDigits') m = len(trainingFileList) trainingMat = zeros((m, 1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i, :] = img2vector('digits/trainingDigits/%s' % fileNameStr) testFileList = listdir('digits/txt') # testDigits errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] if fileNameStr != '.DS_Store': # fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2vector('digits/txt/%s' % fileNameStr) # testDigits classifierResult = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3) print('the classifier came back with: %d, the real answer is: %d' % (classifierResult, classNumStr)) if classifierResult != classNumStr: errorCount += 1.0 print '\n the total number of errors is: %d' % errorCount print '\n the total error rate is: %f' % (errorCount / float(mTest))
def handwritingClassTest(): hwLabels = [] trainingFileList = listdir("trainingDigits") m = len(trainingFileList) trainingMat = zeros((m, 1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i, :] = img2vector("trainingDigits/%s" % fileNameStr) testFileList = listdir("testDigits") errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2vector("testDigits/%s" % fileNameStr) classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3) print("came back with %d, real is %d" % (classifierResult, classNumStr)) if (classifierResult != classNumStr): errorCount += 1.0 print("came back with %d, real is %d" % (classifierResult, classNumStr), end=" ") print("file name : %s " % fileNameStr) print("\n number of error : %d" % errorCount) print("\n error rate : %f " % (errorCount / float(mTest))) print("\n acc rate : %f" % (1.0 - (errorCount / float(mTest))))
def handWritingClassTest(): hwLabels = [] trainingFileList = listdir('trainingDigits') m = len(trainingFileList) trainingMat = zeros(shape=(m, 1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[ 0] # take off the '.txt' from the filename classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr) testFileList = listdir('testDigits') errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2vector('trainingDigits/%s' % fileNameStr) classifierResult = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3) print( f"The classifier came back with: {classifierResult}, the real answer is {classNumStr}." ) if (classifierResult != classNumStr): errorCount += 1 print(f"\nThe total number of errors is: {errorCount}.") print(f"\nThe total error rate is: {errorCount / float(mTest)}.")
def handwritingClassTest(): hwLabels = [] trainingFileList = listdir('digits/trainingDigits') m = len(trainingFileList) trainingMat = np.zeros([m, 1024]) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i, :] = img2vector('digits/trainingDigits/' + fileNameStr) testFileList = listdir('digits/testDigits') errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2vector('digits/testDigits/' + fileNameStr) classifierResult = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3) print('the classifier came back with: ' + str(classifierResult) + ', the real answer is: ' + str(classNumStr)) if classifierResult != classNumStr: errorCount += 1.0 print('\nthe total number of error is: ' + str(errorCount) + '\nthe total error rate is: ' + str(errorCount / float(mTest)))
def handwritingClassTest(height, width): ''' the height and width of the image ''' hwLabels = [] trainingFileList = listdir('trainingDigits') m = len(trainingFileList) trainingMat = zeros((m, int(height * width))) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] classNameStr = int(fileStr.split('_')[0]) hwLabels.append(classNameStr) trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr, height, width) testFileList = listdir('testDigits') errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] ''' the file name consists of 'class name'_'...' ''' classNumStr = int(fileStr.split('_')[0]) vecUnderTest = img2vector('testDigits/%s' % fileNameStr, height, width) classifierResult = kNN.classify0(vecUnderTest, trainingMat, hwLabels, 5) print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr) if(classifierResult != classNumStr): errorCount += 1 print "\nthe total number of errors is: %d" % errorCount print "\nthe total error rate is: %f" % (errorCount/float(mTest))
def calcuteError(train_X, train_y, test_X, test_y): testSize = len(test_X) errorCount = 0.0 for i in range(testSize): preLabel = kNN.classify0(test_X[i], train_X, train_y, 3) if preLabel != test_y[i]: errorCount += 1 print "error rate: %f" % (errorCount / testSize)
def classify0Test(): """ 测试第一个分类器 :return: """ group, labels = kNN.createDataSet() print('group:', group) print('labels:', labels) print('result:', kNN.classify0([0.1, 0.1], group, labels, 3))
def calcErrorRate(features, lables, holdOutRatio): testCount = int(features.shape[0]*holdOutRatio) errorCount = 0.0 for i in range(testCount): predictedLabel = kNN.classify0(features[i, :], features, labels, 3) realLabel = labels[i] print "predicted : " + predictedLabel + "\t\treal : " + realLabel if predictedLabel != realLabel : errorCount += 1.0 return errorCount/testCount
def classifyPerson(): resultList = ['not at all', 'in small doses', 'in large doses'] percentTats = float(input("percentage of time spent playing video games?")) ffMiles = float(input("frequent flier miles earned per year?")) iceCream = float(input("liters of ice cream consumed per year?")) datingDataMat,classLabelVector = txtfile2matrix.file2matrix("C:/Users/jasper/iCloudDrive/newbie_programming/python/ml/chapter2/datingTestSet.txt",3) datingLabels = txtfile2matrix.char2int(classLabelVector) normMat, ranges, minVals = normalization.autoNorm(datingDataMat) inArr = np.array([ffMiles, percentTats, iceCream,],dtype=float) classifierResult = kNN.classify0((inArr - minVals)/ranges, normMat, datingLabels, 3) print("You will probably like this person: %s" % (resultList[classifierResult - 1]))
def classifyPerson(): resultList = ['not at all', 'in small doses', 'in large doses'] percentTats = float(input("percent of time spent playing video games?")) ffMiles = float(input("frequent flier miles earned per year?")) iceCream = float(input("liters of ice Cream consumed per year?")) datingDataMat, datingLabels = file2matrix('datingTestSet2.txt') # 数据加载 normDataSet, ranges, minVals = autoNorm(datingDataMat) # 归一化 inArr = np.array([ffMiles, percentTats, iceCream]) classifierResult = classify0((inArr - minVals) / ranges, normDataSet, datingLabels, 3) print("You will probably like this person:", resultList[classifierResult - 1])
def classifyPerson(): resultList = ['not at all', 'in small doses', 'in large doses'] percentTats = float(input("percentage of time spent playing video games?")) ffMiles = float(input("frequent filter miles earned per year")) iceCream = float(input("liters of ice cream consumed per year")) datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = kNN.autoNorm(datingDataMat) inArr = numpy.array([ffMiles, percentTats, iceCream]) classifierResult = kNN.classify0((inArr - minVals) / ranges, normMat, datingDataMat, 3) print("you will probably like this person: ".resultList[classifierResult - 1])
def classifyPerson(): resultlist=['not at all','in small doss','in large does'] percentTats=float(raw_input("percentage of time spent playing video game?")) ffMiles=float(raw_input("frequent filer miles earned per year?")) icecream=float(raw_input("liters of ice cream consumed per year?")) datingDataMat,datingLabel = kNN.file2matrix('datingTestSet2.txt') normat,rangeval,minval=kNN.autonorm(datingDataMat) print "normat:%s" %(normat) inX=array([ffMiles,percentTats,icecream]) retVal=kNN.classify0((inX - minval)/rangeval,normat,datingLabel,3) print "retval[%d]" %(retVal) print "resutl:%s " %(resultlist[retVal])
def classifyperson(): result = ['not at all', 'small doses', 'large dose'] ffmiles = float(input('frequent filter miles earned per year:')) gametimepercent = float(input('% of time spent on game:')) icecream = float(input('liters of ice cream consumed per year:')) datamat, labels = kNN.file2matrix('datingTestSet.txt') normmat, ranges, minvals = kNN.autoNorm(datamat) inarry = (array([ffmiles, gametimepercent, icecream]) - minvals) / ranges classifyresult = kNN.classify0(inarry, normmat, labels, 3) print("you like this person:", result[classifyresult - 1]) return
def classifyPerson(): resultList = ['not at all', 'in small doses', 'in large doses'] percentTats = float(input('percentage of time spent playing video games?')) ffMiles = float(input('frequent flier miles earned per year?')) iceCream = float(input('liters of ice cream consumed per year?')) datingDataMat, datingLabels = file2matrix('datingTestSet2.txt') normData, ranges, minVals = autoNorm(datingDataMat) inArr = array([ffMiles, percentTats, iceCream]) classifierResult = kNN.classify0(inArr, normData, datingLabels, 3) print('You will probably like this person: ', resultList[classifierResult - 1])
def classifyPerson(): resultlist = ['not at all','in small doses','in large doses'] games = float(raw_input( "percentage of time spent playing video games?")) flymiles = float(raw_input( "frequent flier miles earned per year?")) icecream = float(raw_input( "liters of ice cream consumed per year?")) datingdata, datinglabel = kNN.file2matrix('datingTestSet2.txt') normdata, ranges, minv = kNN.autoNorm(datingdata) inarr = array([flymiles, games, icecream]) result = kNN.classify0((inarr - minv)/ranges, normdata, datinglabel, 3) print "you will probably like this person:", resultlist[result-1]
def datingClassTest(): hoRatio = 0.550 datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt') normMat, ranges, minVals = kNN.autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m * hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = kNN.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3) print 'the classifier came back with: %s, the real answer is: %s' % (classifierResult, datingLabels[i]) if (classifierResult != datingLabels[i]): errorCount += 1.0 print "the total error rate is: %f" % (errorCount / float(numTestVecs))
def showDatingInput(): # 输入测试数据 resultList = ['not at all', 'in small doses', 'in large doses'] mPercentGame = float( raw_input('the percentange of time spent playing vedio games:')) mPercentMiles = float(raw_input('the miles earned every year:')) mpercentIce = float(raw_input('the ice cream consumed per year:')) testArray = [mPercentMiles, mPercentGame, mpercentIce] mat, labels = kNN.file2matrix('datingTestSet2.txt') normMat, mRange, mMin = kNN.autoNum(mat) ansType = kNN.classify0((testArray - mMin) / mRange, normMat, labels, 5) print 'This guy is mostly', resultList[int(ansType) - 1]
def dating_class_test(): dating_data_mat, dating_labels = file2matrix(DATING_DATA) norm_data, ranges, min_val = auto_norm(dating_data_mat) m = norm_data.shape[0] num_test_vecs = int(m * HO_RATIO) error_count = 0.0 for i in range(num_test_vecs): classifier_res = classify0(norm_data[i, :], norm_data[num_test_vecs:m, :], dating_labels[num_test_vecs:m], K) print('the classifier came back with: %d, the real answer is: %d' % (classifier_res, dating_labels[i])) if (classifier_res != dating_labels[i]): error_count += 1.0 print("the total error rate is: %f" % (error_count / float(num_test_vecs)))
def datingClassTest(): x = 0.10 Mat, Labels = fileLoad.filematrix0('datingTestSet.txt') normMat, ranges, minVals = norm.normal(Mat) m = normMat.shape[0] numTestVecs = int(m * x) errorCount = 0.0 for i in range(numTestVecs): classifierResult = kNN.classify0(normMat[i,:],normMat[numTestVecs:m,:],\ Labels[numTestVecs:m],3) print "the classifier came back with: %d, the real answer is: %d"\ %(classifierResult,Labels[i]) if (classifierResult != Labels[i]): errorCount += 1.0 print "the total error rate is %f" % (errorCount / float(numTestVecs))
def datingClassTest(): hoRatio = 0.10 datingdata, datinglabel = file2matrix('datingTestSet2.txt') normdata, ranges, minv = autoNorm(datingdata) m = normdata.shape[0] numoftest = int(m*hoRatio) errorcount = 0 for i in range(numoftest): result = kNN.classify0(normdata[i,:],normdata[numoftest:m,:], datinglabel[numoftest:m],3) print "the classifier came back with: %d, the real answer is: %d"\ % (result, datinglabel[i]) if (result != datinglabel[i]): errorcount +=1.0 print "the total error rate is: %f" % (errorcount/float(numoftest))
def classifyPerson(): """ imput someone information and predicts how much she will like this person """ resultList = ['not at all','in small doses','in large doses'] percentTats = float (input(\ "percentage of time spent playing video games?")) ffMiles = float(input("frequent fliter miles earned per year?")) iceCream = float(input("liters of ice cream consumed per year?")) datingDataMat, datingLabels = file2matrix('datingTestSet.txt') normMat, ranges, minVals = kNN.autoNorm(datingDataMat) classifierResult = kNN.classify0([ffMiles, percentTats, iceCream ],normMat, \ datingLabels,3) print ("you will probably like this person : %s" %(resultList[classifierResult - 1]))
def datingSetTest(horate): datingDataMat,datingLabel=kNN.file2matrix('datingTestSet2.txt') print "data[%d]:%s,\nlabel:%s" %(datingDataMat.shape[0],datingDataMat,datingLabel) datingDataMat,rangeval,minval=kNN.autonorm(datingDataMat) print "data[%d]:%s" %(datingDataMat.shape[0],datingDataMat) m=datingDataMat.shape[0] count=int(m*horate) errcount=0.0 for i in range(1,count): retVal=kNN.classify0(datingDataMat[i,:],datingDataMat[count:m,:],datingLabel[count:m],3) print "orignal:%d,calculate:%d"%(datingLabel[i],retVal) if retVal != datingLabel[i]: errcount+=1.0 print "error." print "error rate:%f" %(errcount/float(count))
def datingClassTest(): ''' hoRatio = the ratio of test set ''' hoRatio = 0.10 datingDataMat, datingLabels = file2matrix("datingTestSet2.txt") normMat, ranges, minVals = autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m * hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = kNN.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 5) print "the classifier came back with %d, the real answer is: %d" % (classifierResult, datingLabels[i]) if(classifierResult != datingLabels[i]): errorCount += 1.0 print "the total error rate is: %f" % (errorCount / float(numTestVecs))
def datingClassTest(): hoRatio = 0.10 datingDataMat,datingLabels = kNN.file2matrix('datingTestSet.txt') normMat, ranges, minVals = kNN.autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m*hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = kNN.classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3) print("the classifier came back with: {0}, the real answer is: {1}".format(classifierResult, datingLabels[i])) if (classifierResult != datingLabels[i]): errorCount += 1.0 print("the total error rate is: {0}".format(errorCount/float(numTestVecs)))
def calcErrorRate(trainingFeatureSet, trainingLabelSet, testDataDir): testFiles = os.listdir(testDataDir) testDataSetSize = len(testFiles) errorCount = 0.0 for file in testFiles: filePath = os.path.join(testDataDir, file) testData = img2vector(filePath) trueLabel = file[0] # 测试数据的真实label predictedLabel = kNN.classify0(testData, trainingFeatureSet, trainingLabelSet, KNN_K) if predictedLabel != trueLabel: errorCount += 1 print "Predicted : ", predictedLabel, " True : " , trueLabel, " File : ", file return errorCount/testDataSetSize
def classifyHandWriting(path, trainingPath): trainingFileList = listdir(trainingPath) listlength = len(trainingFileList) trainingMat = zeros((listlength, 1024)) labels = []; for i in range(listlength): filename = trainingFileList[i] trainingMat[i, :] = kNN.imgVector(trainingPath + '/' + filename) labels.append(filename.strip().split('.')[0].split('_')[0]) classification = kNN.imgVector(path) return kNN.classify0(classification, trainingMat, labels, 3)
def testKNN3(self): hwLabels = [] trainingFileList = os.listdir('trainingDigits') m = len(trainingFileList) trainingMat = numpy.zeros((m,1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] #take off .txt classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) trainingMat[i,:] = kNN.img2vector('trainingDigits/%s' % fileNameStr) testFileList = os.listdir('testDigits') fileNameStr = testFileList[0] fileStr = fileNameStr.split('.')[0] #take off .txt classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = kNN.img2vector('testDigits/%s' % fileNameStr) c = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3) self.assertEqual(c, 0)
def handwritingClassTest(): hwLabels=[] #保存分类结果 trainingFileList=os.listdir('trainingDigits') #训练数据目录列表 m=len(trainingFileList) #目录中一共有多少个文件 trainingMat=zeros((m,1024)) # for i in range(m): fileNameStr=trainingFileList[i] fileStr=fileNameStr.split('.')[0] #文件名格式为 0_3.txt classNumStr=int(fileStr.split('_')[0]) #第一部分为文件中实际的值 hwLabels.append(classNumStr) trainingMat[i,:]=img2vector('trainingDigits/%s'%fileNameStr) testFileList=os.listdir('testDigits') errorCount=0.0 mTest=len(testFileList) for i in range(mTest): fileNameStr=testFileList[i] fileStr=fileNameStr.split('.')[0] classNumStr=int(fileStr.split('_')[0]) #trainingMat 训练图像转化来的向量 #vectorUnderTest 测试图像转化来的向量 vectorUnderTest=img2vector('testDigits/%s'%fileNameStr) #在kNN分类器中 #vectorUnder是等待分类的数据,trainingMat相当于字曲,hwLabels为结果标号 classifierResult=kNN.classify0(vectorUnderTest,trainingMat,hwLabels,3) print("the classifier came back with: %d, the real anser is: %d "%(classifierResult,classNumStr)) if(classifierResult!=classNumStr): #classNumStr是由文件名得出的数字 errorCount+=1.0 print("\nthe total number of errors is: %d"%errorCount) print("\nthe total error rate is: %f"%(errorCount/float(mTest))) pass
#kNN简单例子 from numpy import * import operator random.rand(4,4) randMat=mat(random.rand(4,4)) randMatI=randMat.I import kNN group,labels=kNN.createDataSet() print(group) print(labels) result=kNN.classify0([0,0],group,labels,3) print(result) print(kNN.classify0([1,1],group,labels,3)) #reload(kNN) datingDataMat,datingLabels=kNN.file2matrix('datingTestSet2.txt') print(datingDataMat) print(datingLabels[0:20]) #kNN约会推荐系统 import matplotlib import matplotlib.pyplot as plt fig=plt.figure() ax=fig.add_subplot(111) ax.scatter(datingDataMat[:,1],datingDataMat[:,2])
train=train_raw.values test=test_raw.values labels=train[0::,0:1] labels=labels[:,0] i,j=train.shape result=[] for ii in range(i): for jj in range(j): if (train[ii,jj]>1): train[ii,jj]=1 for iii in range(i): result[iii,0]=kNN.classify0(train[iii:iii+1,0::],train,labels,4) predictions_file = open("out.csv","wb") open_file_object = csv.writer(predictions_file) open_file_object.writerows(["Label"]) predictions_file.close()
# Classifying movie genres with k-Nearest Neighbors import kNN group, labels = kNN.createDataSet() print(group) print(labels) result = kNN.classify0([0, 0], group, labels, 3) print(result)
import kNN import numpy as np import operator # kNN example data_set = np.array([[1.0, 1.0], [1.0, 0.9], [0.1, 0.1], [0.1, 0.2]]) labels = ['A', 'A', 'B', 'B'] in_x = [0.2, 0.3] classfied = kNN.classify0(in_x, data_set, labels, 3) print('in_x classified: %s' % classfied) import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.set_title('kNN Algorithm Example') ax.scatter(data_set[0:2, 0], data_set[0:2, 1], c='r', label='A') ax.scatter(data_set[2:4, 0], data_set[2:4, 1], c='b', label='B') for i in range(4): ax.text(data_set[i][0]-0.04, data_set[i][1]-0.01, labels[i]) ax.scatter([0.2], [0.3], c='b') ax.text(0.21, 0.29, '<-- Data classfied here') ax.legend(loc='upper left') plt.show()
FigDating = plt.figure() group,labels = kNN.createDataSet() #Create the data set with four items(2-D vectors). Each of them one of two labels associated with them colormap1 = { 'A':'red', 'B':'blue'} #Make a color map ColoredGroupLabels = [] for things in labels: #Get a vector representing the colors ColoredGroupLabels.append(colormap1[things]) #for each data item ax1 = FigDating.add_subplot(311, xlim=(-0.1,1.1), ylim=(-.05,1.15)) #Dividing the figure into 3 sub plots and selecting the top-most ax1.scatter(group[:,0], group[:,1], s= 20, c= ColoredGroupLabels, marker = 'o' ) #Plotting the data as a scatter plot with color(c) property as per the labelling. #Testing with new points testvector = [.2, .2] #first point answer = kNN.classify0(testvector,group, labels, 3) #classify the first point # type "print answer" to see result ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) #plot first point #second point - created, classified and plotted testvector = [.5, .5] answer = kNN.classify0(testvector,group, labels, 3) ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) #third point - created, classified and plotted testvector = [.75, .75] answer = kNN.classify0(testvector,group, labels, 3) ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) '''Perform K-Nearest Neighbor classification on the datingTestSet2 data set. Do not forget to include the data set in the working directory''' datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt') # Load data values and labels from the datingTestSet2.txt datingLabelArray = np.array(datingLabels)
import kNN group, labels = kNN.createDataSet() print (group) print (labels) oneToTest=[0.0,1.0] print (kNN.classify0(oneToTest,group,labels))
def testKNN(self): group, labels = createDataSet() c = kNN.classify0([0, 0], group, labels, 3) self.assertEqual(c, 'B')
#coding:utf-8 import kNN group, labels = kNN.createDataSet() print kNN.classify0([0,0],group,labels,3) datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt') print datingDataMat print datingLabels[0:20] ''' import matplotlib import matplotlib.pyplot as plt from numpy import array fig = plt.figure() ax = fig.add_subplot(121) ax.scatter(datingDataMat[:,1], datingDataMat[:,2], 15.0*array(datingLabels), 15.0*array(datingLabels)) ax.axis([-2, 25, -0.2, 2.0]) plt.xlabel(u'玩视频游戏所耗时间百分比') plt.ylabel(u'每周消费的冰淇淋公升数') ax = fig.add_subplot(122) ax.scatter(datingDataMat[:,0], datingDataMat[:,1], 15.0*array(datingLabels), 15.0*array(datingLabels)) ax.axis([-5000, 100000, -1, 25]) plt.xlabel(u'每年获取的飞行常客里程数') plt.ylabel(u'玩视频游戏所耗时间百分比') plt.show() ''' normMat, ranges, minVals = kNN.autoNorm(datingDataMat) print normMat
import kNN import numpy import matplotlib import matplotlib.pyplot as plt print kNN.classify0([0, 0], numpy.array([[1, 0], [2, 1]]), ['A', 'B'], 1) datingDataMat, datingLabels = kNN.file2matrix('test2.txt') # datingDataMat = numpy.zeros((3,3)) # datingDataMat[2,:] = [2,1,0] # print datingDataMat ,datingDataMat[:,2] print datingLabels print 15.0*numpy.array(datingLabels) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2],15.0*numpy.array(datingLabels),15.0*numpy.array(datingLabels)) plt.show() # ax.scatter([2,3,1],[3,1,2])
import os import kNN CURRENT_DIR = os.path.dirname(__file__) groups, labels = kNN.createDataset() print kNN.classify0([0,0,0],groups,labels,3) dataSetFile = os.path.join(CURRENT_DIR + '/datingTestSet.txt') datingDataMat,datingLabels = kNN.file2matrix(dataSetFile) print kNN.classify0([40920, 8.326976, 0.953952],groups,labels,3)