def work(self): miles = list() gametimes = list() icecream = list() labels = list() for i in range(self.number): miles.append(random.randint(500,3000)) gametimes.append(random.random()) icecream.append(random.randint(10,150)) normMat, ranges, minVals = kNN.autoNorm(np.array(miles)) miles_normal = list(normMat[0]) normMat, ranges, minVals = kNN.autoNorm(np.array(gametimes)) gametimes_normal = list(normMat[0]) normMat, ranges, minVals = kNN.autoNorm(np.array(icecream)) icecream_normal = list(normMat[0]) for i in range(self.number): maxval = max(miles_normal[i], gametimes_normal[i], icecream_normal[i]) if maxval == miles_normal[i]: labels.append(1) elif maxval == gametimes_normal[i]: labels.append(2) else: labels.append(3) with open('datingtest', 'w') as f: for i in range(self.number): record = str(miles[i]) + \ '\t' + str(gametimes[i]) + \ '\t' + str(icecream[i]) + \ '\t' + str(labels[i]) + '\n' f.write(record)
def testKNN2(self): dataSet, labels = file2matrix('datingTestSet.txt') normDataSet, ranges, minVals = kNN.autoNorm(dataSet) testInput = numpy.array([51052, 4.680098, 0.625224]) testInput = (testInput-minVals) / ranges c = kNN.classify0(testInput, normDataSet, labels, 3) self.assertEqual(c, 1)
def test_autoNorm(self): fileName = "datingTestSet.txt" datingDataMat, datingLabels = kNN.file2matrix(fileName) print("\n datingDataMat == %s" % (datingDataMat)) normDataSet, ranges, minVals = kNN.autoNorm(datingDataMat) print("\n normDataSet == %s \n ranges == %s \n minVals == %s \n" % (normDataSet, ranges, minVals))
def datingClassTest(): hoRatio = 0.50 #hold out 10% datingDataMat, datingLabels = kNN.file2matrix( 'datingTestSet.txt') #load data setfrom file normMat, ranges, minVals = kNN.autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m * hoRatio) # 50% train set,50% test set errorCount = 0.0 errorAns = m for k in range(20): errorCount = 0 for i in range(numTestVecs): classifierResult = kNN.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], k + 1) # print "the classifier came back with: %s, the real answer is: %s" % (classifierResult, datingLabels[i]) if (classifierResult != datingLabels[i]): errorCount += 1.0 print "when k is %d the total error rate is: %f" % (k + 1, ( errorCount / float(numTestVecs))) print errorCount if errorCount <= errorAns: errorAns = errorCount ans = k + 1 print "the best k is", ans
def test_knn_dating(): #%% 利用knn模块的数据读取函数 x, y = knn.file2matrix('datingTestSet2.txt') norm_x, ranges, minVals = knn.autoNorm(x) norm_x = np.array(norm_x) y = np.array(y) #%% 生成测试数据的比例 train_ratio = 0.9 test_ratio = 0.1 size_data = len(norm_x) indices = np.random.permutation(size_data) x_train = norm_x[indices[:int(train_ratio * size_data)]] y_train = y[indices[:int(train_ratio * size_data)]] x_test = norm_x[indices[int(test_ratio * size_data):]] y_test = y[indices[int(test_ratio * size_data):]] #%% # 设置分类器,并填充训练数据 knn_classfier = neighbors.KNeighborsClassifier() knn_classfier.fit(x_train, y_train) #%% # 按比例进行测试数据 result = knn_classfier.predict(x_test) for i in range(100): if result[i] == y_test[i]: print(True) else: print(False)
def autoNormTest(): returnMat, classLabelVector = kNN.file2matrix( "C:\\Users\yangy\PycharmProjects\MLIA\kNN\datingTestSet2.txt") normMat, ranges, minVals = kNN.autoNorm(returnMat) print('normMat:', normMat) print('ranges:', ranges) print('minVals:', minVals)
def datingClassTest(): """ 约会网站测试 :return: """ # 设置测试数据比例 hoRatio = 0.1 # 从文件中加载数据 datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') # 归一化数据 normMat, ranges, minVals = kNN.autoNorm(datingDataMat) # m = 数据的行数 即第一维矩阵 m = normMat.shape[0] # 设置测试的样本数量 numTestVecs = int(m * hoRatio) print('numTestVecs = ', numTestVecs) errorCount = 0.0 for i in range(numTestVecs): classifierResult = kNN.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3) print('The classifier came back with %d, the real answer is: %d' % (classifierResult, datingLabels[i])) if classifierResult != datingLabels[i]: errorCount += 1.0 print('The total error rate is %f' % (errorCount / float(numTestVecs))) print(errorCount)
def classifyPerson(): resultList = ['not at all', 'in small doses', 'in large doses'] percentTats = float(input("percentage of time spent playing video games?")) ffMiles = float(input("frequent filter miles earned per year")) iceCream = float(input("liters of ice cream consumed per year")) datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = kNN.autoNorm(datingDataMat) inArr = numpy.array([ffMiles, percentTats, iceCream]) classifierResult = kNN.classify0((inArr - minVals) / ranges, normMat, datingDataMat, 3) print("you will probably like this person: ".resultList[classifierResult - 1])
def classifyperson(): result = ['not at all', 'small doses', 'large dose'] ffmiles = float(input('frequent filter miles earned per year:')) gametimepercent = float(input('% of time spent on game:')) icecream = float(input('liters of ice cream consumed per year:')) datamat, labels = kNN.file2matrix('datingTestSet.txt') normmat, ranges, minvals = kNN.autoNorm(datamat) inarry = (array([ffmiles, gametimepercent, icecream]) - minvals) / ranges classifyresult = kNN.classify0(inarry, normmat, labels, 3) print("you like this person:", result[classifyresult - 1]) return
def classifyPerson(): resultlist = ['not at all','in small doses','in large doses'] games = float(raw_input( "percentage of time spent playing video games?")) flymiles = float(raw_input( "frequent flier miles earned per year?")) icecream = float(raw_input( "liters of ice cream consumed per year?")) datingdata, datinglabel = kNN.file2matrix('datingTestSet2.txt') normdata, ranges, minv = kNN.autoNorm(datingdata) inarr = array([flymiles, games, icecream]) result = kNN.classify0((inarr - minv)/ranges, normdata, datinglabel, 3) print "you will probably like this person:", resultlist[result-1]
def datingClassTest(): hoRatio = 0.550 datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt') normMat, ranges, minVals = kNN.autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m * hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = kNN.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3) print 'the classifier came back with: %s, the real answer is: %s' % (classifierResult, datingLabels[i]) if (classifierResult != datingLabels[i]): errorCount += 1.0 print "the total error rate is: %f" % (errorCount / float(numTestVecs))
def datingClassTest(): hoRatio = 0.10 datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = kNN.autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m*hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3) print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[i]) if classifierResult != datingLabels[i]: errorCount += 1.0 print "the total error rate is: %f" % (errorCount/float(numTestVecs))
def classifyPerson(): percentTats = float(raw_input("percentage of time playing video games?")) ffMiles = float(raw_input("frequent flyer miles earned per year?")) iceCream = float(raw_input("liters of ice cream consumed per year?")) datingDataMatrix, datingLabels = parseDatingData('datingTestSet.txt') normalizedMatrix, ranges, minimumValues = kNN.autoNorm(datingDataMatrix) classifierResult = kNN.classify0( inX=(numpy.array([ffMiles, percentTats, iceCream]) - minimumValues) / ranges, dataSet=normalizedMatrix, labels=datingLabels, k=3) print "Your probable result: {}".format(intToCategory(classifierResult))
def classifyPerson(): """ imput someone information and predicts how much she will like this person """ resultList = ['not at all','in small doses','in large doses'] percentTats = float (input(\ "percentage of time spent playing video games?")) ffMiles = float(input("frequent fliter miles earned per year?")) iceCream = float(input("liters of ice cream consumed per year?")) datingDataMat, datingLabels = file2matrix('datingTestSet.txt') normMat, ranges, minVals = kNN.autoNorm(datingDataMat) classifierResult = kNN.classify0([ffMiles, percentTats, iceCream ],normMat, \ datingLabels,3) print ("you will probably like this person : %s" %(resultList[classifierResult - 1]))
def classifyPerson(): #resultList = ['not at all','in small doses', 'in large doses'] percentTats = float(raw_input(\ "percentage of time spent playing video games?")) ffMiles = float(raw_input("frequent flier miles earned per year?") ) #使用sumlime配置的环境python27无法读取输入数据 iceCream = float(raw_input( "liters of ice cream consumed per year?")) #点击*.py运行即可,程序末尾添加待输入 datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt') normMat, ranges, minVals = kNN.autoNorm(datingDataMat) inArr = array([ffMiles, percentTats, iceCream]) classifierResult = kNN.classify0((inArr-\ minVals)/ranges,normMat,datingLabels,3) print "You will probably like this person: ",\ classifierResult
def TradingClassTest(): hoRatio = 0.50 # hold out 10% datingDataMat, datingLabels = HandleOverData( '000875.csv') # load data setfrom file normMat, ranges, minVals = kNN.autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m * hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = kNN.classify0(normMat[i, :], normMat[ numTestVecs:m, :], datingLabels[numTestVecs:m], 3) print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[i]) if (classifierResult != datingLabels[i]): errorCount += 1.0 print "the total error rate is: %f" % (errorCount / float(numTestVecs)) print errorCount
def classifyPerson(): resultList = ['not at all', 'in small doses', 'in large doses'] # percentTats = float(input("percentage of time spent playing video games?")) # ffMiles = float(input("frequent flier miles earned per year?")) # iceCream = float(input("liters of ice cream consumed per year?")) datingDataMat, datingLabels = file2matrix('resources/datingTestSet2.txt') normMat, ranges, minVals = kNN.autoNorm(datingDataMat) inArr = array([ 9289, 9.666576, 1.370330, ]) classifierResult = kNN.classify0((inArr - minVals) / ranges, normMat, datingLabels, 3) print("You will probably like this person: %s" % resultList[classifierResult - 1])
def datingClassTest(): hoRatio = 0.50 #hold out 10% datingDataMat, datingLabels = dataSetMat, classLabelVector = file2matrix( "resources/datingTestSet.txt") #load data setfrom file normMat, ranges, minVals = kNN.autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m * hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = kNN.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3) print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[i])) if (classifierResult != datingLabels[i]): errorCount += 1.0 print("the total error rate is: %f" % (errorCount / float(numTestVecs))) print(errorCount)
def classifyPerson(): """ 根据提示输入数据 :return: """ ll = ["不喜欢的人", "魅力一般的人", "具有魅力的人"] x1 = float(raw_input("玩视频游戏所耗时间的百分比?")) x2 = float(raw_input("每年获得的飞行常客里程数为?")) x3 = float(raw_input("每周消费的冰淇淋的功升数为?")) x, y = kNN.file2matrix("datingTestSet2.txt") normX, rage, minV = kNN.autoNorm(x) inX = (array([x1, x2, x3]) - minV) / rage ret = kNN.classify0(inX, x, y, 3) print "你对的喜欢程度可能是:", ll[int(ret) - 1]
def datingClassTest(): """ 分类器测试:约会分类网站的用户数据分类的测试 :return: """ hoRet = 0.10 x, y = kNN.file2matrix("datingTestSet.txt") normX, rage, minV = kNN.autoNorm(x) m = normX.shape[0] errorCount = 0 testNum = int(hoRet * m) for i in range(testNum): yr = kNN.classify0(normX[i, :], normX[testNum:m, :], y, 3) print "第%d个分类为%s,原来分类为%s" % (i, yr, y[i]) if yr != y[i]: errorCount += 1 print "错误数为:%d,数错误率为:%f%% " % (errorCount, float(errorCount) * 100 / m) return
import sys import kNN from pylab import * from numpy import * import numpy as np import matplotlib from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt mat,lab = kNN.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = kNN.autoNorm(mat) def randrange(n, vmin, vmax): return (vmax - vmin)*np.random.rand(n) + vmin fig = plt.figure() ax = fig.add_subplot(111,projection='3d') #ax.scatter(normMat[:,0], normMat[:,1], normMat[:,2], 'o', 'c') n = 1 for c, m, zl, zh in [('r', 'o', -50, -25), ('b', '^', -30, -5)]: xs = randrange(n, 23, 32) ys = randrange(n, 0, 100) zs = randrange(n, zl, zh) ClassSet=lab colorSet = [] for label in ClassSet: if label is '1': colorSet.append('r') elif label is '2': colorSet.append('b') elif label is '3':
print('d: ', d) print('e', e) group, labels = kNN.createDataSet() print("I'm group: ", group) print("I'm labels: ", labels) k = kNN.classify0([0, 0], group, labels, 3) print('I am k: ', k) print('b.min: ', b.min(0)) print('b.max: ', b.max(0)) print('b.mean: ', b.mean(0)) b_m = (b - b.mean(0)) / (b.std(0)) print('b_m: ', b_m) m = b.shape[0] x = (b.max(0) - b.min(0)) b_a = (b - np.tile(b.min(0), (m, 1))) / np.tile(x, (m, 1)) print('b_a: ', b_a) n, o, p = kNN.autoNorm(b) print('n', n) print('o: ', o) print('p: ', p) #fig = plt.figure() #ax = fig.add_subplot(111) #ax.scatter(b[:, 0], b[:, 1], 15*c, 15*c) #plt.show() kNN.handwritingClassTest()
''' File Name: main Description: 主函数,主要调用kNN.py中的函数 Author: jwj Date: 2018/1/18 ''' __author__ = 'jwj' import kNN if __name__ == '__main__': group, labels = kNN.createDataSet() label = kNN.classify([0, 0], group, labels, 3) print(label) dataArray, dataLabels = kNN.file2matrix("datingTestSet2.txt") kNN.autoNorm(dataArray) normMat, ranges, minVals = kNN.autoNorm(dataArray) # print(normMat) # kNN.dataClassTest() # kNN.classifyPerson() kNN.handwritingClassTest()
from numpy import array import kNN reload(kNN) normMat, ranges, minVals = kNN.autoNorm('datingDataMat') print normMat
import kNN dateDataMat, labels = kNN.file2matrix('./ml/2_kNN/datingTestSet2.txt') normMat, ranges, minVals = kNN.autoNorm(dateDataMat) print(normMat[0:0, 2:2]) print(ranges) print(minVals)
import pandas as pd import kNN data = pd.read_table('datingTestSet2.txt', names=['a', 'b', 'c', 'd']) normData, ranges, minVals = kNN.autoNorm(data.iloc[:, :-1]) datingLabels = data.d numTestVecs = int(0.1 * normData.shape[0]) errorCount = 0 for i in range(numTestVecs): classifierResult = kNN.classify0(normData.iloc[i].values, normData.iloc[numTestVecs:].values, datingLabels.iloc[numTestVecs:].values, 3) print "the classifier came back with: %d, the real answer is: %d" % ( classifierResult, data.d[i]) if (classifierResult != data.d[i]): errorCount += 1 print "the total error rate is: %f" % (errorCount / numTestVecs)
def autoNormTests(): datingDataArray, datingLabels = kNN.file2matrix('datingTestSet2.txt') normArray, ranges, minVals = kNN.autoNorm(datingDataArray) print normArray, ranges, minVals
text = document.original_text text = re.sub("((http:|https:|ftp:|ftps:)//[\w$-_.+!*'(),%=]+)", '', text) text = re.sub("(@[\w_]+)", '', text) text = re.sub("(#[\w!$-_.+!*'(),%=]+)", '', text) text = re.sub("\p{P}+", '', text) text = re.sub("[\'\":#,!&]+", '', text) pos = classify_tweet(text).prob('positive') for category in categories: sim = np.append(sim, [pos]) if count == 1: group = np.array([sim]) else: group = np.append(group, [sim], axis = 0) labels.append(document.id) group = kNN.autoNorm(group) tweet_label = {} count = 0 count_exer = 0 for document in doc: count += 1 count_inner = 0 for category in categories: if count_inner == 0: sim = np.array([TrainSet.similar(category, document)]) else: sim = np.append(sim, [TrainSet.similar(category, document)]) count_inner += 1 text = document.original_text
# -*- coding: utf-8 -*- ''' Created on 2015年9月29日 @author: rains ''' import matplotlib.pyplot as plt import numpy as np import os import kNN curdir='f:\\project\\python\\machine-learning-in-action/Ch02' mat1,fab1 = kNN.file2matrix(curdir+"/datingTestSet.txt") #查看训练集 # fig=plt.figure() # ax = fig.add_subplot(111) # ax.scatter(mat1[:,0],mat1[:,1],15.0*np.array(fab1),15.0*np.array(fab1)) # plt.show() #测试归一化 mat1 = kNN.autoNorm(mat1) #测试简单的分类器准确率 kNN.datingClassTest() #手写数字识别 #kNN.handwritingClassTest()
## kNN test function group, labels = kNN.createDataSet() result = kNN.classify0([0, 0], group, labels, 3) ### 1.yuehui wangzhan peidui ## load data and dating datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') ## plot dataSet # fig = plt.figure() # ax = fig.add_subplot(111) # ax.scatter(datingDataMat[:,0], datingDataMat[:,1], s=15.0*num.array(datingLabels), c=15.0*num.array(datingLabels)) # plt.show() ## normalization normMat, valueRange, minVals = kNN.autoNorm(datingDataMat) ## test the model # kNN.datingClassTest() ### 2.shouxie shibei xitong kNN.handwritingClassTest() ## a complete classifier system # kNN.classifyPerson() # print(normMat) # print(range) # print(minVals)
from numpy import array features, labels = kNN.createDataSet() features kNN.classify0([0, 0], features, labels, 3) datamat, labels = kNN.file2matrix('datingTestSet.txt') ''' fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datamat[:,1], datamat[:,2], 15.0*array(labels), 15.0*array(labels)) plt.show() ''' normmat, ranges, minvals = kNN.autoNorm(datamat) print(normmat) print(ranges) print(minvals) # kNN.datingClassTest(0.2,7) def classifyperson(): result = ['not at all', 'small doses', 'large dose'] ffmiles = float(input('frequent filter miles earned per year:')) gametimepercent = float(input('% of time spent on game:')) icecream = float(input('liters of ice cream consumed per year:')) datamat, labels = kNN.file2matrix('datingTestSet.txt') normmat, ranges, minvals = kNN.autoNorm(datamat)
'datingTestSet2.txt' ) # Load data values and labels from the datingTestSet2.txt datingLabelArray = np.array(datingLabels) colormap2 = {1: 'red', 2: 'blue', 3: 'green'} #Define color map with 3 colors ColoredDatingLabel = [] for things in datingLabelArray: #Get a vector representing the colors ColoredDatingLabel.append(colormap2[things]) #for each data item ax2 = FigDating.add_subplot(312, xlim=(0, 100000), ylim=(0, 25)) #create second sub plot ax2.scatter(datingDataMat[:, 0], datingDataMat[:, 1], s=20, c=ColoredDatingLabel, marker='o') #Plot a scatter diagram for the data loaded normMat, ranges, minVals = kNN.autoNorm(datingDataMat) #normalize the data ax3 = FigDating.add_subplot(313, xlim=(0, 1), ylim=(0, 1)) #create third sub plot ax3.scatter(normMat[:, 0], normMat[:, 1], s=20, c=ColoredDatingLabel, marker='o') #Plot normalized data plt.show() NumberBad = kNN.datingClassTest(0.1)
dictLabel = {'largeDoses': 1, 'smallDoses': 2, 'didntLike': 3} for line in lines: line = line.strip() listFromLine = line.split('\t') returnMat[index, :] = listFromLine[0:3] classLabelVector.append(int(dictLabel.get(listFromLine[-1]))) index += 1 print returnMat[1, 2] import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(returnMat[:, 1], returnMat[:, 2], 15.0 * array(classLabelVector), 15.0 * array(classLabelVector)) #plt.show() dataSet = array([[3, 4, 5], [1, 2, 6], [4, 5, 6]], dtype=float) minVal = dataSet.min(0) maxVal = dataSet.max(0) ranges = maxVal - minVal normDataSet = zeros(shape(dataSet), dtype=float) m = dataSet.shape[0] normDataSet2 = dataSet - tile(minVal, (m, 1)) normDataSet2.astype(float) print(normDataSet2) print(tile(ranges, (m, 1))) normDataSet3 = normDataSet2 / tile(ranges, (m, 1)) import kNN normMat, ranges, minVal = kNN.autoNorm(returnMat) print(normMat)
#for the picture import matplotlib import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2]) plt.show() ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 15.0 * array(datingLabels), 15.0 * array(datingLabels)) ax.scatter(datingDataMat[:, 0], datingDataMat[:, 1], 15.0 * array(datingLabels), 15.0 * array(datingLabels)) #for the autoNorm reload(kNN) norMat, ranges, minVals = kNN.autoNorm(datingDataMat) norMat ranges minVals #for the datingClassTest kNN.datingClassTest() #for the clasdifyPerson kNN.classifyPerson() #for the img2vector testVector = kNN.img2vector('testDigits/0_13.txt') testVector[0, 0:31] testVector[0, 31:63]
# encoding: utf-8 from numpy import * import kNN import matplotlib import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt') ## 由于每个指标的范围不一致, 我们这里需要进行归一化特征值 normMat, ranges, minVals = kNN.autoNorm(datingDataMat) ax.scatter(normMat[:, 1], normMat[:,2]) # 添加坐标轴的labels plt.xlabel('Percentage of Time Spent Playing Video Games') plt.ylabel('Liters of Ice Cream Consumed Per Week') plt.show()
# -*- coding: utf-8 -*- """ Created on Thu Jan 18 21:20:35 2018 @author: ldz """ # ============================================================================= '''testDatingClassifier''' # ============================================================================= from kNN import file2matrix, autoNorm, classify0 hoRatio = 0.10 #hold out 10% k = 3 datingDataMat, datingLabels = file2matrix( 'datingTestSet2.txt') #load data setfrom file normMat, ranges, minVals = autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m * hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], k) print "the classifier came back with: %d, the real answer is: %d" % ( classifierResult, datingLabels[i]) if (classifierResult != datingLabels[i]): errorCount += 1.0 print("the total error rate is: %f" % (errorCount / float(numTestVecs))) print("number of error:" + str(errorCount)) print("number of test:" + str(numTestVecs))
def main4(): datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt') # 获得归一化参数 normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
import sys import kNN from pylab import * from numpy import * import numpy as np import matplotlib from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt mat, lab = kNN.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = kNN.autoNorm(mat) def randrange(n, vmin, vmax): return (vmax - vmin) * np.random.rand(n) + vmin fig = plt.figure() ax = fig.add_subplot(111, projection='3d') #ax.scatter(normMat[:,0], normMat[:,1], normMat[:,2], 'o', 'c') n = 1 for c, m, zl, zh in [('r', 'o', -50, -25), ('b', '^', -30, -5)]: xs = randrange(n, 23, 32) ys = randrange(n, 0, 100) zs = randrange(n, zl, zh) ClassSet = lab colorSet = [] for label in ClassSet: if label is '1': colorSet.append('r')
# type "print answer" to see result ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) #plot first point #second point - created, classified and plotted testvector = [.5, .5] answer = kNN.classify0(testvector,group, labels, 3) ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) #third point - created, classified and plotted testvector = [.75, .75] answer = kNN.classify0(testvector,group, labels, 3) ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) '''Perform K-Nearest Neighbor classification on the datingTestSet2 data set. Do not forget to include the data set in the working directory''' datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt') # Load data values and labels from the datingTestSet2.txt datingLabelArray = np.array(datingLabels) colormap2 = { 1:'red', 2:'blue', 3:'green' } #Define color map with 3 colors ColoredDatingLabel = [] for things in datingLabelArray: #Get a vector representing the colors ColoredDatingLabel.append(colormap2[things]) #for each data item ax2 = FigDating.add_subplot(312, xlim=(0,100000), ylim=(0,25)) #create second sub plot ax2.scatter(datingDataMat[:,0], datingDataMat[:,1], s= 20, c= ColoredDatingLabel, marker = 'o' ) #Plot a scatter diagram for the data loaded normMat, ranges, minVals = kNN.autoNorm(datingDataMat) #normalize the data ax3 = FigDating.add_subplot(313, xlim=(0,1), ylim=(0,1)) #create third sub plot ax3.scatter(normMat[:,0], normMat[:,1], s = 20, c= ColoredDatingLabel, marker = 'o' ) #Plot normalized data plt.show() NumberBad = kNN.datingClassTest(0.1)