datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') print(datingDataMat) print(datingLabels[0:20]) # scatter plot fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:, 0], datingDataMat[:, 1], 15.0 * np.array(datingLabels), 15.0 * np.array(datingLabels)) plt.show() # normalization normMat, ranges, minVals = kNN.autoNorm(datingDataMat) print(normMat) print(ranges) print(minVals) # test error rate kNN.datingClassTest() # predict kNN.classifyPerson() # handwriting nums recognition # load daata testVector = kNN.img2vector('dataset/testDigits/0_13.txt') print(testVector[0, 0:31]) # handwriting class test kNN.handwritingClassTest()
import sys sys.path+=["G:\\Roliy_ML\\"] import kNN kNN.datingClassTest() #print kNN2.classifyPerson(10000, 10, 0.5)
output: >>> normMat array([[ 0.33060119, 0.58918886, 0.69043973], [ 0.49199139, 0.50262471, 0.13468257], [ 0.34858782, 0.68886842, 0.59540619], ..., [ 0.93077422, 0.52696233, 0.58885466], [ 0.76626481, 0.44109859, 0.88192528], [ 0.0975718 , 0.02096883, 0.02443895]]) >>> ranges array([ 8.78430000e+04, 2.02823930e+01, 1.69197100e+00]) >>> minVals array([ 0. , 0. , 0.001818]) """ KNN.datingClassTest() """ output: the total error rate is: 0.080000 16.0 """ KNN.classifyPerson() """ output: percentage of time spent playing video games?4 frequent flier miles earned per year?5569 liters of ice cream consumed per year?1.213192 You will probably like this person: in small doses """
# 2018-05-29-15:00 page27 # -*- coding: UTF-8 -*- #程序说明如下 #标题:page 27 分类器的测试程序/算法 #内容:将数据集的前面部分作为测试集,后面部分作为训练集,输出预测的结果与实际分类对比并计算预测的错误率和错误数 #时间:2018年5月29日 ''' 运行结果: Running /home/yzn/PycharmProjects/kNN_meet_0529_4/main.py line 22,the classifier came back with: 1, the real answer is: 2 line 74,the classifier came back with: 3, the real answer is: 1 line 83,the classifier came back with: 3, the real answer is: 1 line 91,the classifier came back with: 2, the real answer is: 3 line 99,the classifier came back with: 3, the real answer is: 1 预测出错的次数是 5.0 预测集的大小是 100 the total error rate is: 5/100 = 0.050000 ''' import kNN #from numpy import * #import operator kNN.datingClassTest() #运行分类器的测试代码, 没有return()
ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) #plot first point #second point - created, classified and plotted testvector = [.5, .5] answer = kNN.classify0(testvector,group, labels, 3) ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) #third point - created, classified and plotted testvector = [.75, .75] answer = kNN.classify0(testvector,group, labels, 3) ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) '''Perform K-Nearest Neighbor classification on the datingTestSet2 data set. Do not forget to include the data set in the working directory''' datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt') # Load data values and labels from the datingTestSet2.txt datingLabelArray = np.array(datingLabels) colormap2 = { 1:'red', 2:'blue', 3:'green' } #Define color map with 3 colors ColoredDatingLabel = [] for things in datingLabelArray: #Get a vector representing the colors ColoredDatingLabel.append(colormap2[things]) #for each data item ax2 = FigDating.add_subplot(312, xlim=(0,100000), ylim=(0,25)) #create second sub plot ax2.scatter(datingDataMat[:,0], datingDataMat[:,1], s= 20, c= ColoredDatingLabel, marker = 'o' ) #Plot a scatter diagram for the data loaded normMat, ranges, minVals = kNN.autoNorm(datingDataMat) #normalize the data ax3 = FigDating.add_subplot(313, xlim=(0,1), ylim=(0,1)) #create third sub plot ax3.scatter(normMat[:,0], normMat[:,1], s = 20, c= ColoredDatingLabel, marker = 'o' ) #Plot normalized data plt.show() NumberBad = kNN.datingClassTest(0.1)
def test_acquire(self): kNN.datingClassTest()
import matplotlib.pyplot as plt #import Least_squares_fitting as lsf import numpy as np from scipy import optimize k = 0 fig = plt.figure() ax = fig.add_subplot(111) plt.xlabel('k') plt.ylabel('error') x = np.arange(0, 100, 1.0) x_list = [] y = [] for i in range(0, 100, 1): k = k + 1 # print k s = kNN.datingClassTest(k) x_list.append(k) y.append(s) #print x #print y ax.scatter(x, y) # linear fitting """a0,a1 = lsf.linear_regression(x,y) _X = [0, 100] _Y = [a0 + a1 * m for m in _X] plt.plot(x, y, 'ro', _X, _Y, 'b', linewidth=2) plt.title("y = {} + {}x".format(a0, a1)) plt.show() """
def main5(): ''' 使用分类器对约会网站数据测试 ''' kNN.datingClassTest()
'datingTestSet2.txt' ) # Load data values and labels from the datingTestSet2.txt datingLabelArray = np.array(datingLabels) colormap2 = {1: 'red', 2: 'blue', 3: 'green'} #Define color map with 3 colors ColoredDatingLabel = [] for things in datingLabelArray: #Get a vector representing the colors ColoredDatingLabel.append(colormap2[things]) #for each data item ax2 = FigDating.add_subplot(312, xlim=(0, 100000), ylim=(0, 25)) #create second sub plot ax2.scatter(datingDataMat[:, 0], datingDataMat[:, 1], s=20, c=ColoredDatingLabel, marker='o') #Plot a scatter diagram for the data loaded normMat, ranges, minVals = kNN.autoNorm(datingDataMat) #normalize the data ax3 = FigDating.add_subplot(313, xlim=(0, 1), ylim=(0, 1)) #create third sub plot ax3.scatter(normMat[:, 0], normMat[:, 1], s=20, c=ColoredDatingLabel, marker='o') #Plot normalized data plt.show() NumberBad = kNN.datingClassTest(0.1)
from numpy import array import kNN reload(kNN) print kNN.datingClassTest() print kNN.classifyPerson()
# plot the data as a scatter plot with color(c) property as per the labelling. ax2.scatter(datingDataMat[:,0], datingDataMat[:,1], s= 20, c= ColoredDatingLabel, marker = 'o' ) # normalize the dataset for 3rd subplot normMat, ranges, minVals = kNN.autoNorm(datingDataMat) # 3rd subplot, specifying the axes limits ax3 = FigDating.add_subplot(313, xlim=(0, 1), ylim=(0, 1)) # plot the normalized data as a scatter plot with color(c) property as per the labelling. ax3.scatter(normMat[:,0], normMat[:,1], s = 20, c= ColoredDatingLabel, marker = 'o' ) plt.show() totalErorr = kNN.datingClassTest(0.1) # lowercase ColoredGroupLabels