def show_file2matrix(file_name): reload(knn) datingDataMat, datingLabels = knn.file2matrix(file_name) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:,1], datingDataMat[:,2], 15.0*array(datingLabels), 15.0*array(datingLabels)) plt.show()
def train(): '''对knn进行训练''' datingDataMat, datingLables = knn.file2matrix('datingTestSet2.txt') normMat, rangeVals, minVals = knn.autoNorm(datingDataMat) print normMat print datingLables # knn.plotData(datingDataMat, datingLables) # 用作交叉验证集的数量百分比 hoRatio = 0.10 # 数据集的总数量 m = normMat.shape[0] # 测试集 numTestVecs = int(m * hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = knn.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLables[numTestVecs:m], 3) print '分类器返回: %d, 实际的结果是:%d' % (classifierResult, datingLables[i]) if classifierResult != datingLables[i]: errorCount += 1.0 print '错误率是: %f' % (errorCount / (float(numTestVecs)))
def test_auto_norm(self): date_mat, date_label = knn.file2matrix('datingTestSet2.txt') norm_mat, ranges, min_val = knn.auto_norm(date_mat) min_exp = np.array([0., 0., 0.001156]) ranges_exp = np.array([9.1273000e+04, 2.0919349e+01, 1.6943610e+00]) self.assertEqual(True, (min_exp == min_val).all()) self.assertEqual(True, (ranges == ranges_exp).all())
def testknn_matplotlib(): a, b = knn.file2matrix('../datingTestSet.txt') fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(a[:, 1], a[:, 2], 15 * array(b), 15 * array(b)) # ax.scatter(a[:, 1], a[:, 2],b,b) plt.show()
def show_file2matrix(file_name): reload(knn) datingDataMat, datingLabels = knn.file2matrix(file_name) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 15.0 * array(datingLabels), 15.0 * array(datingLabels)) plt.show()
def classfiy_person() : result_list = ['not at all','in small doses','in large doses'] percent_tats= float(input("percentage of time spent playing video games ?")) ffmiles = float(input("frequent flier miles earned per year ?")) ice_cream = float(input("liter of ice cream consumed per year ?")) dating_mat, dating_labels = knn.file2matrix('./knn/datingTestSet2.txt') normal_mat, ranges, min_values = knn.auto_normal(dating_mat) inArr = array([ffmiles, percent_tats, ice_cream]) classfiy_result = knn.classify0((inArr - min_values) / ranges, normal_mat, dating_labels, 3) print("You will probably like this person: ", result_list[classfiy_result - 1], "(" + str(classfiy_result) + ")")
def test_file2matrix(self): date_mat, date_label = knn.file2matrix('datingTestSet2.txt') fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(date_mat[:, 1], date_mat[:, 2], \ 15.0 * np.array(date_label), 15.0 * np.array(date_label)) plt.show() self.assertEqual((1000, 3), date_mat.shape) self.assertEqual(1000, len(date_label)) labels_exp = [3, 2, 1, 1, 1, 1, 3, 3, 1, 3] self.assertEqual(True, labels_exp == date_label[0:10])
def test() : group, labels = knn.create_dataset() print(group) print(labels) sort = knn.classify0([0,0],group,labels,3) print("distance is %s !"%(sort)) ## pycharm 中的相对路径不一样,需要在 Run -> Edit Configurations 中查看 Working dorectory #dating_mat, dating_labels = knn.file2matrix('./knn/datingTestSet.txt') dating_mat, dating_labels = knn.file2matrix('./knn/datingTestSet2.txt') print(dating_mat) print(dating_labels) knn.show_plt(dating_mat,dating_labels)
def predict(): resultList = ['一点也不喜欢', '有点喜欢', '非常喜欢'] percentTats = float(raw_input('玩游戏的时间是: ')) ffMiles = float(raw_input('每年的飞行公里数: ')) iceCream = float(raw_input('每年消耗的冰淇淋: ')) datingDataMat, datingLabels = knn.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = knn.autoNorm(datingDataMat) inArr = np.array([ffMiles, percentTats, iceCream]) # 对输入数据的正规化处理 inArrNorm = (inArr - minVals) / ranges classifierResult = knn.classify0(inArrNorm, normMat, datingLabels, 3) print '预测你可能喜欢这个人的程度:', resultList[classifierResult - 1]
def predict(): resultList = ['一点也不喜欢', '有点喜欢', '非常喜欢'] percentTats = float(raw_input('玩游戏的时间是: ')) ffMiles = float(raw_input('每年的飞行公里数: ')) iceCream = float(raw_input('每年消耗的冰淇淋: ')) datingDataMat, datingLabels = knn.file2matrix('datingTestSet2.txt') normMat, ranges, minVals = knn.autoNorm(datingDataMat) inArr = np.array([ffMiles, percentTats, iceCream]) # 对输入数据的正规化处理 inArrNorm = (inArr - minVals) / ranges classifierResult = knn.classify0(inArrNorm, normMat, datingLabels, 3) print '预测你可能喜欢这个人的程度:', resultList[classifierResult-1]
def train(): '''对knn进行训练''' datingDataMat, datingLables = knn.file2matrix('datingTestSet2.txt') normMat, rangeVals, minVals = knn.autoNorm(datingDataMat) print normMat print datingLables # knn.plotData(datingDataMat, datingLables) # 用作交叉验证集的数量百分比 hoRatio = 0.10 # 数据集的总数量 m = normMat.shape[0] # 测试集 numTestVecs = int(m*hoRatio) errorCount = 0.0 for i in range(numTestVecs): classifierResult = knn.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLables[numTestVecs:m], 3) print '分类器返回: %d, 实际的结果是:%d' % (classifierResult, datingLables[i]) if classifierResult != datingLables[i]: errorCount += 1.0 print '错误率是: %f' % (errorCount / (float(numTestVecs)))
import matplotlib import matplotlib.pyplot as plt import numpy as np import knn import mnist_test group, labels = knn.createDataSet() bb = knn.classify0([0,0], group, labels, 3) print bb cc = knn.knn2([0,0], group, labels, 3) # 可视化数据 dataSet, labels = knn.file2matrix('data/datingTestSet2.txt') fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(dataSet[:,0], dataSet[:,1], s=15.0*labels, c=15.0*labels) plt.show() # 测试误判率 reload(knn) # testRatio为测试集比例,k为邻居个数 knn.knnTest('../data/datingTestSet2.txt',testRatio=0.2, k=3) # 测试手写数字识别 mnist_test.mnist_test(500,100, k=7)
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2018/11/30 16:52 # @Author : zhcf1ess # @Site : # @File : createFirstPLT.py # @Software: PyCharm from numpy import * import knn import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) datingDataMat, datingLabels = knn.file2matrix( '../data/ch02/datingTestSet2.txt') ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 15.0 * array(datingLabels), 15.0 * array(datingLabels)) ax.axis([-2, 25, -0.2, 2.0]) plt.xlabel('Percentage of Time Spent Playing Video Games') plt.ylabel('Liters of Ice Cream Consumed Per Week') plt.show()
def testknn(): a, b = knn.file2matrix('../datingTestSet.txt') print(a[:, 1])
import knn from numpy import * import matplotlib import matplotlib.pyplot as plt datingDataMat, datingLabels = knn.file2matrix( "machinelearninginaction/Ch02/datingTestSet2.txt") fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 15.0 * array(datingLabels), 15.0 * array(datingLabels)) plt.show()
# -*- coding: utf-8 -*- """ Created on Fri Sep 21 10:38:54 2018 @author: fsxn2 """ import knn import matplotlib import matplotlib.pyplot as plt #group,labels=knn.createDataSet() #print(knn.classify0([0,0],group,labels,3)) group, labels = knn.file2matrix("input.txt") auto, ranges, minval = knn.autoNorm(group) print(auto) print(ranges) print(minval) #fig=plt.figure() #ax=fig.add_subplot(111) #ax.scatter(group[:,1],group[:2]) #plt.show() print(knn.classify0([1, 0, 3], group, labels, 3))
# This is a sample Python script. # Press Shift+F10 to execute it or replace it with your code. # Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. import knn import matplotlib import matplotlib.pyplot as plt from array import array from numpy import * # Press the green button in the gutter to run the script. if __name__ == '__main__': # 数据加载 datingDataMat, datingLabels = knn.file2matrix('datingTestSet.txt') print(datingDataMat) print(datingLabels) # 显示 fig = plt.figure() ax = fig.add_subplot(111) # 看不到任何有用的模式信息 ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2]) # 标注上色彩 ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 15.0 * array(datingLabels), 15.0 * array(datingLabels)) plt.show() plt.close() # 归一化数据 normMat, ranges, minVals = knn.autoNorm(datingDataMat) print('norm mat:')
import matplotlib import matplotlib.pyplot as plt from numpy import array import knn datingDataMat,datingLabels,vector = knn.file2matrix('datingTestSet.txt') fig = plt.figure() ax = fig.add_subplot(131) ax.scatter(datingDataMat[:,1],datingDataMat[:,2]) ax = fig.add_subplot(132) ax.scatter(datingDataMat[:,1],datingDataMat[:,2],15.0*array(vector),15.0*array(vector)) ax = fig.add_subplot(133) ax.scatter(datingDataMat[:,0],datingDataMat[:,1],15.0*array(vector),15.0*array(vector)) plt.show()