def test(): """ 测试KNN算法 :return: """ group, labels = kNN.createDataSet() print kNN.classify0([0, 0], group, labels, 3)
def main2(): ''' 测试分类器 ''' group,labels = kNN.createDataSet() result = kNN.classify0([0,0], group, labels, 3) print result
def classify0Test(): """ 测试第一个分类器 :return: """ group, labels = kNN.createDataSet() print('group:', group) print('labels:', labels) print('result:', kNN.classify0([0.1, 0.1], group, labels, 3))
def __test__(): dataSet, labels = kNN.createDataSet() inX = array([1.2, 1.1]) k = 3 outputLabel = kNN.kNNClassify(inX, dataSet, labels, 3) print("Your input is:", inX, "and classified to class: ", outputLabel) inX = array([0.1, 0.3]) outputLabel = kNN.kNNClassify(inX, dataSet, labels, 3) print("Your input is:", inX, "and classified to class: ", outputLabel)
def kNNtest(): # 生成数据集和类别标签 dataSet, labels = kNN.createDataSet() # 定义一个未知类别的数据 with open('test_24.json', 'r') as file: for line in file: line = json.loads(line) testid = line[0] testtarget = line[1:] testX = array([testtarget]) # 调用分类函数对未知数据分类 outputLabel = kNN.kNNClassify(testX, dataSet, labels, 1) result = [outputLabel, testid] with open('testPredict24.json', 'a') as file: file.write(json.dumps(result) + '\n')
import kNN grps , lbls = kNN.createDataSet(); #rate = kNN.classify0([1,0.8],grps,lbls,3); rate = kNN.classify0([2,0.8],grps,lbls,3); print (rate)
# -*- coding: utf-8 -*- # writer : lgy # data : 2017-08-19 import kNN from numpy import * dataSet, labels = kNN.createDataSet() testX = array([1.2, 1.0]) k = 3 outputLabel = kNN.kNNClassify(testX, dataSet, labels, 3) print "Your input is:", testX, "and classified to class: ", outputLabel testX = array([0.1, 0.3]) outputLabel = kNN.kNNClassify(testX, dataSet, labels, 3) print "Your input is:", testX, "and classified to class: ", outputLabel
#! /usr/bin/env python # -*- coding: utf-8 -*- import kNN if __name__ == '__main__': data = kNN.createDataSet() print data # matrix = kNN.file2matrix('datingTestSet2.txt') # print matrix # kNN.datingClassTest() kNN.handwritingClassTest()
''' 用于创建用于分析的数据集''' from numpy import * import operator def createDataSet() group = arrary([1.0,1.1],[1.0,1.0],[0,0],[0,0.1]) label = ["A","A","B","B"] return group, labels '''以下为引用该函数的举例''' import kNN group,lables=kNN.createDataSet() ''' 对未知类别属性的数据集中的每个点依次执行以下操作 1、计算已知类别数据集中点于当前点之间的距离; 2、按照距离依次递增次序排列; 3、选取与当前点距离最小的K个点; 4、确定前k个点所在类别的出现频率: 5、返回前K个点出现频率最高的类别作为当前点的预测分类。 ''' def classify0(inX, dataSet, labels, k) dataSetSize = dataSet.shape[0] diffMat = tile(intX, (dataSetSize,1))-dataSet sqDiffMat = diffMat**2 sqDistances = sqDiffMat.sum(axis=1) distances = sqDistances**0.5 '''两点之间距离计算''' sortedDistIndicies = distances.argsort() classCount = {} for i in range(K): '''获得排名前k的距离点的类型''' voteIlabel = labels[sortedDistIndicies[i] classCount[voteIlabel] = classCount.get(voteIlabel,0)+1
def main1(): ''' 生成数据 ''' group,labels = kNN.createDataSet()
def test_kNN(self): group, labels = kNN.createDataSet() result = kNN.classify0([0, 0], group, labels, 3) expected = 'B' self.assertEqual(result, expected)
def test1(self): group,labels = kNN.createDataSet() print(group) result = kNN.classify0([0,0],group,labels,3) print(result)
'''A wrapper around kNN.py. Include the kNN.py and datingTestSet2 in the current working directory''' import kNN #importing the methods and variables from kNN.py. these methods/variables can be accessed as kNN.MethodName() or kNN.variableName import matplotlib.pyplot as plt import numpy as np FigDating = plt.figure() group, labels = kNN.createDataSet( ) #Create the data set with four items(2-D vectors). Each of them one of two labels associated with them colormap1 = {'A': 'red', 'B': 'blue'} #Make a color map ColoredGroupLabels = [] for things in labels: #Get a vector representing the colors ColoredGroupLabels.append(colormap1[things]) #for each data item ax1 = FigDating.add_subplot(311, xlim=(-0.1, 1.1), ylim=( -.05, 1.15)) #Dividing the figure into 3 sub plots and selecting the top-most ax1.scatter( group[:, 0], group[:, 1], s=20, c=ColoredGroupLabels, marker='o' ) #Plotting the data as a scatter plot with color(c) property as per the labelling. #Testing with new points testvector = [0.75, 0.75] #first point answer = kNN.classify0(testvector, group, labels, 3) #classify the first point # type "print answer" to see result ax1.scatter(testvector[0], testvector[1], s=20, c=colormap1[answer],
# This is a sample Python script. # Press Shift+F10 to execute it or replace it with your code. # Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. from classify0 import classify0 from kNN import createDataSet def print_hi(name): # Use a breakpoint in the code line below to debug your script. print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint. # Press the green button in the gutter to run the script. if __name__ == '__main__': print_hi('PyCharm') group, labels = createDataSet() print(classify0([0, 0], group, labels, 3)) # See PyCharm help at https://www.jetbrains.com/help/pycharm/
import sys sys.path.append('F:\1masterpiece\python test\apriori') import kNN kNN.createDataSet()
from numpy import * import sys import matplotlib import matplotlib.pyplot as plt from imp import reload sys.path.append(r"C:\Python34\code\machinelearninginaction\Ch02") import kNN reload(kNN) df = kNN.createDataSet() inputt = array([0.7, 0.8]) K = 3 output = kNN.classify(inputt, df, K) print("测试数据为:", inputt, "分类结果为:", output) fig = plt.figure(figsize=(6, 6)) #XY轴具有相同的刻度和比例 ax = fig.add_subplot(1, 1, 1) plt.plot(df['x'], df['y'], 'ro') #画图 plt.plot(inputt[0], inputt[1], 'go') count = 0 ##添加标注 for label in df.index: ax.annotate(label, xy=df.values[count], xytext=(df.values[count][0] + 0.1, df.values[count][1] + 0.05), arrowprops=(dict(facecolor='b', width=0.05, shrink=0.05, headwidth=1, connectionstyle="arc3")))
import kNN from numpy import * dataSet, labels = kNN.createDataSet() testX = array([1.9, 3.2]) k = 3 outputLabel = kNN.kNNClassify(testX, dataSet, labels, 3) print("Your input is:", testX, "and classified to class: ", outputLabel) testX = array([4.1, 3.3]) outputLabel = kNN.kNNClassify(testX, dataSet, labels, 3) print("Your input is:", testX, "and classified to class: ", outputLabel)
from numpy import * import matplotlib.pyplot as plt import kNN # create 5 my neighbors neighbors,names = kNN.createDataSet(5) # find two nearest neighbors to me result = kNN.classify([0,0], neighbors, names, 2) # x and y save positons of my neighbors x = [0]* neighbors.shape[0] y = [0]* neighbors.shape[0] for i in range(0, neighbors.shape[0]): x[i] = neighbors [i][0] y[i] = neighbors [i][1] # display my neighbors with blue color plt.plot(x,y,'bo') plt.axis([-0.2, 1.2, -0.2, 1.2]) # assign names to neighbors for i, name in enumerate(names): plt.annotate(name,(x[i],y[i]),(x[i]-0.08,y[i]+0.01)) # diplay me with red color plt.plot([0],[0],'ro') # display two nearest neighbors with messages and yellow color for i, name in enumerate(names): for r in result: if name is r[0]: plt.plot([x[i]],[y[i]],'yo') plt.annotate('I am here',(x[i],y[i]),(x[i]+0.01,y[i]-0.05)) plt.show()
import kNN import matplotlib import matplotlib.pyplot as plt import numpy as np # easy try group, labels = kNN.createDataSet() predict = kNN.classify0([0, 0], group, labels, 3) print(predict) # load data datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt') print(datingDataMat) print(datingLabels[0:20]) # scatter plot fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datingDataMat[:, 0], datingDataMat[:, 1], 15.0 * np.array(datingLabels), 15.0 * np.array(datingLabels)) plt.show() # normalization normMat, ranges, minVals = kNN.autoNorm(datingDataMat) print(normMat) print(ranges) print(minVals) # test error rate kNN.datingClassTest()
'''A wrapper around kNN.py. Include the kNN.py and datingTestSet2 in the current working directory''' import kNN #importing the methods and variables from kNN.py. these methods/variables can be accessed as kNN.MethodName() or kNN.variableName import matplotlib.pyplot as plt import numpy as np FigDating = plt.figure() group,labels = kNN.createDataSet() #Create the data set with four items(2-D vectors). Each of them one of two labels associated with them colormap1 = { 'A':'red', 'B':'blue'} #Make a color map ColoredGroupLabels = [] for things in labels: #Get a vector representing the colors ColoredGroupLabels.append(colormap1[things]) #for each data item ax1 = FigDating.add_subplot(311, xlim=(-0.1,1.1), ylim=(-.05,1.15)) #Dividing the figure into 3 sub plots and selecting the top-most ax1.scatter(group[:,0], group[:,1], s= 20, c= ColoredGroupLabels, marker = 'o' ) #Plotting the data as a scatter plot with color(c) property as per the labelling. #Testing with new points testvector = [.2, .2] #first point answer = kNN.classify0(testvector,group, labels, 3) #classify the first point # type "print answer" to see result ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) #plot first point #second point - created, classified and plotted testvector = [.5, .5] answer = kNN.classify0(testvector,group, labels, 3) ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) #third point - created, classified and plotted testvector = [.75, .75] answer = kNN.classify0(testvector,group, labels, 3)
# coding=utf-8 __author__ = 'wuwen' import kNN import numpy as np import operator group, labels = kNN.createDataSet() # k邻接算法 分类 # n维公式 d = ((x1-x2)^2+(y1-y2)^2+.......+(n1-n2)^2)^0.5 # 多维类推 # 思路:算出预测值在样本中的 各样本与输入值的距离,距离越近,类型就越相近。 def classify0(inX, dataSet, labels, k): dataSetSize = dataSet.shape[0] # 创建 diffMat = np.tile(inX, (dataSetSize, 1)) # 样本与输入做差 diffMat -= dataSet # 差值的平方 diffMat **= 2 # 把样本中各维度的差的平方相加 sqDistances = diffMat.sum(axis=1) # 开方,算出距离 sqDistances **= 0.5 # 距离排序,返回排序的索引数组 sortedDistances = sqDistances.argsort()
import kNN import matplotlib import matplotlib.pyplot as plt from numpy import array features, labels = kNN.createDataSet() features kNN.classify0([0, 0], features, labels, 3) datamat, labels = kNN.file2matrix('datingTestSet.txt') ''' fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(datamat[:,1], datamat[:,2], 15.0*array(labels), 15.0*array(labels)) plt.show() ''' normmat, ranges, minvals = kNN.autoNorm(datamat) print(normmat) print(ranges) print(minvals) # kNN.datingClassTest(0.2,7) def classifyperson(): result = ['not at all', 'small doses', 'large dose'] ffmiles = float(input('frequent filter miles earned per year:')) gametimepercent = float(input('% of time spent on game:'))