def work(self):
        miles = list()
        gametimes = list()
        icecream = list()
        labels = list()

        for i in range(self.number):
            miles.append(random.randint(500,3000))
            gametimes.append(random.random())
            icecream.append(random.randint(10,150))

        normMat, ranges, minVals = kNN.autoNorm(np.array(miles))
        miles_normal = list(normMat[0])
        normMat, ranges, minVals = kNN.autoNorm(np.array(gametimes))
        gametimes_normal = list(normMat[0])
        normMat, ranges, minVals = kNN.autoNorm(np.array(icecream))
        icecream_normal = list(normMat[0])


        for i in range(self.number):
            maxval = max(miles_normal[i], gametimes_normal[i], icecream_normal[i])
            if maxval == miles_normal[i]: labels.append(1)
            elif maxval == gametimes_normal[i]: labels.append(2)
            else: labels.append(3)

        with open('datingtest', 'w') as f:
            for i in range(self.number):
                record = str(miles[i]) + \
                         '\t' + str(gametimes[i]) + \
                         '\t' + str(icecream[i]) + \
                         '\t' + str(labels[i]) + '\n'
                f.write(record)
Beispiel #2
0
	def testKNN2(self):
		dataSet, labels = file2matrix('datingTestSet.txt')
		normDataSet, ranges, minVals = kNN.autoNorm(dataSet)
		testInput = numpy.array([51052, 4.680098, 0.625224])
		testInput = (testInput-minVals) / ranges
		c = kNN.classify0(testInput, normDataSet, labels, 3)
		self.assertEqual(c, 1)
Beispiel #3
0
 def test_autoNorm(self):
     fileName = "datingTestSet.txt"
     datingDataMat, datingLabels = kNN.file2matrix(fileName)
     print("\n datingDataMat == %s" % (datingDataMat))
     normDataSet, ranges, minVals = kNN.autoNorm(datingDataMat)
     print("\n normDataSet == %s \n ranges == %s \n minVals == %s \n" %
           (normDataSet, ranges, minVals))
def datingClassTest():
    hoRatio = 0.50  #hold out 10%
    datingDataMat, datingLabels = kNN.file2matrix(
        'datingTestSet.txt')  #load data setfrom file
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int(m * hoRatio)  # 50% train set,50% test set
    errorCount = 0.0
    errorAns = m
    for k in range(20):
        errorCount = 0
        for i in range(numTestVecs):
            classifierResult = kNN.classify0(normMat[i, :],
                                             normMat[numTestVecs:m, :],
                                             datingLabels[numTestVecs:m],
                                             k + 1)
            # print "the classifier came back with: %s, the real answer is: %s" % (classifierResult, datingLabels[i])
            if (classifierResult != datingLabels[i]): errorCount += 1.0
        print "when k is %d the total error rate is: %f" % (k + 1, (
            errorCount / float(numTestVecs)))
        print errorCount
        if errorCount <= errorAns:
            errorAns = errorCount
            ans = k + 1
    print "the best k is", ans
Beispiel #5
0
def test_knn_dating():

    #%% 利用knn模块的数据读取函数
    x, y = knn.file2matrix('datingTestSet2.txt')
    norm_x, ranges, minVals = knn.autoNorm(x)
    norm_x = np.array(norm_x)
    y = np.array(y)
    #%% 生成测试数据的比例
    train_ratio = 0.9
    test_ratio = 0.1
    size_data = len(norm_x)
    indices = np.random.permutation(size_data)
    x_train = norm_x[indices[:int(train_ratio * size_data)]]
    y_train = y[indices[:int(train_ratio * size_data)]]
    x_test = norm_x[indices[int(test_ratio * size_data):]]
    y_test = y[indices[int(test_ratio * size_data):]]
    #%%

    # 设置分类器,并填充训练数据
    knn_classfier = neighbors.KNeighborsClassifier()
    knn_classfier.fit(x_train, y_train)
    #%%
    # 按比例进行测试数据
    result = knn_classfier.predict(x_test)
    for i in range(100):
        if result[i] == y_test[i]:
            print(True)
        else:
            print(False)
def autoNormTest():
    returnMat, classLabelVector = kNN.file2matrix(
        "C:\\Users\yangy\PycharmProjects\MLIA\kNN\datingTestSet2.txt")
    normMat, ranges, minVals = kNN.autoNorm(returnMat)
    print('normMat:', normMat)
    print('ranges:', ranges)
    print('minVals:', minVals)
def datingClassTest():
    """
    约会网站测试

    :return:
    """
    # 设置测试数据比例
    hoRatio = 0.1

    # 从文件中加载数据
    datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt')

    # 归一化数据
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)

    # m = 数据的行数 即第一维矩阵
    m = normMat.shape[0]

    # 设置测试的样本数量
    numTestVecs = int(m * hoRatio)
    print('numTestVecs = ', numTestVecs)

    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = kNN.classify0(normMat[i, :],
                                         normMat[numTestVecs:m, :],
                                         datingLabels[numTestVecs:m], 3)
        print('The classifier came back with %d, the real answer is: %d' %
              (classifierResult, datingLabels[i]))
        if classifierResult != datingLabels[i]:
            errorCount += 1.0
    print('The total error rate is %f' % (errorCount / float(numTestVecs)))
    print(errorCount)
def classifyPerson():
    resultList = ['not at all', 'in small doses', 'in large doses']
    percentTats = float(input("percentage of time spent playing video games?"))
    ffMiles = float(input("frequent filter miles earned per year"))
    iceCream = float(input("liters of ice cream consumed per year"))
    datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    inArr = numpy.array([ffMiles, percentTats, iceCream])
    classifierResult = kNN.classify0((inArr - minVals) / ranges, normMat,
                                     datingDataMat, 3)
    print("you will probably like this person: ".resultList[classifierResult -
                                                            1])
Beispiel #9
0
def classifyperson():
    result = ['not at all', 'small doses', 'large dose']

    ffmiles = float(input('frequent filter miles earned per year:'))
    gametimepercent = float(input('% of time spent on game:'))
    icecream = float(input('liters of ice cream consumed per year:'))
    datamat, labels = kNN.file2matrix('datingTestSet.txt')
    normmat, ranges, minvals = kNN.autoNorm(datamat)
    inarry = (array([ffmiles, gametimepercent, icecream]) - minvals) / ranges
    classifyresult = kNN.classify0(inarry, normmat, labels, 3)
    print("you like this person:", result[classifyresult - 1])
    return
def classifyPerson():
    resultlist = ['not at all','in small doses','in large doses']
    games = float(raw_input(
        "percentage of time spent playing video games?"))
    flymiles = float(raw_input(
        "frequent flier miles earned per year?"))
    icecream = float(raw_input(
        "liters of ice cream consumed per year?"))
    datingdata, datinglabel = kNN.file2matrix('datingTestSet2.txt')
    normdata, ranges, minv = kNN.autoNorm(datingdata)
    inarr = array([flymiles, games, icecream])
    result = kNN.classify0((inarr - minv)/ranges, normdata, datinglabel, 3)
    print "you will probably like this person:", resultlist[result-1]
Beispiel #11
0
def datingClassTest():
    hoRatio = 0.550
    datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt')
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int(m * hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = kNN.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3)
        print 'the classifier came back with: %s, the real answer is: %s' % (classifierResult, datingLabels[i])
        if (classifierResult != datingLabels[i]):
            errorCount += 1.0
    print "the total error rate is: %f" % (errorCount / float(numTestVecs))
Beispiel #12
0
def datingClassTest():
    hoRatio = 0.10
    datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int(m*hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3)
        print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[i])
        if classifierResult != datingLabels[i]:
            errorCount += 1.0
    print "the total error rate is: %f" % (errorCount/float(numTestVecs))
def classifyPerson():
    percentTats = float(raw_input("percentage of time playing video games?"))
    ffMiles = float(raw_input("frequent flyer miles earned per year?"))
    iceCream = float(raw_input("liters of ice cream consumed per year?"))
    datingDataMatrix, datingLabels = parseDatingData('datingTestSet.txt')
    normalizedMatrix, ranges, minimumValues = kNN.autoNorm(datingDataMatrix)

    classifierResult = kNN.classify0(
        inX=(numpy.array([ffMiles, percentTats, iceCream]) - minimumValues) /
        ranges,
        dataSet=normalizedMatrix,
        labels=datingLabels,
        k=3)
    print "Your probable result: {}".format(intToCategory(classifierResult))
Beispiel #14
0
def classifyPerson():
    """
    imput someone information and predicts how much she will like this person
    """  
    resultList = ['not at all','in small doses','in large doses'] 
    percentTats = float (input(\
                                   "percentage of time spent playing video games?"))
    ffMiles = float(input("frequent fliter miles earned per year?"))    
    iceCream = float(input("liters of ice cream consumed per year?")) 
    datingDataMat, datingLabels = file2matrix('datingTestSet.txt')
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    classifierResult = kNN.classify0([ffMiles, percentTats, iceCream ],normMat, \
                                     datingLabels,3)
    print ("you will probably like this person : %s" %(resultList[classifierResult - 1]))
def classifyPerson():
    #resultList = ['not at all','in small doses', 'in large doses']
    percentTats = float(raw_input(\
                "percentage of time spent playing video games?"))
    ffMiles = float(raw_input("frequent flier miles earned per year?")
                    )  #使用sumlime配置的环境python27无法读取输入数据
    iceCream = float(raw_input(
        "liters of ice cream consumed per year?"))  #点击*.py运行即可,程序末尾添加待输入
    datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt')
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    inArr = array([ffMiles, percentTats, iceCream])
    classifierResult = kNN.classify0((inArr-\
                        minVals)/ranges,normMat,datingLabels,3)
    print "You will probably like this person: ",\
    classifierResult
Beispiel #16
0
def TradingClassTest():
    hoRatio = 0.50  # hold out 10%
    datingDataMat, datingLabels = HandleOverData(
        '000875.csv')  # load data setfrom file
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int(m * hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = kNN.classify0(normMat[i, :], normMat[
            numTestVecs:m, :], datingLabels[numTestVecs:m], 3)
        print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[i])
        if (classifierResult != datingLabels[i]):
            errorCount += 1.0
    print "the total error rate is: %f" % (errorCount / float(numTestVecs))
    print errorCount
Beispiel #17
0
def classifyPerson():
    resultList = ['not at all', 'in small doses', 'in large doses']
    # percentTats = float(input("percentage of time spent playing video games?"))
    # ffMiles = float(input("frequent flier miles earned per year?"))
    # iceCream = float(input("liters of ice cream consumed per year?"))
    datingDataMat, datingLabels = file2matrix('resources/datingTestSet2.txt')
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    inArr = array([
        9289,
        9.666576,
        1.370330,
    ])
    classifierResult = kNN.classify0((inArr - minVals) / ranges, normMat,
                                     datingLabels, 3)
    print("You will probably like this person: %s" %
          resultList[classifierResult - 1])
Beispiel #18
0
def datingClassTest():
    hoRatio = 0.50  #hold out 10%
    datingDataMat, datingLabels = dataSetMat, classLabelVector = file2matrix(
        "resources/datingTestSet.txt")  #load data setfrom file
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int(m * hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = kNN.classify0(normMat[i, :],
                                         normMat[numTestVecs:m, :],
                                         datingLabels[numTestVecs:m], 3)
        print("the classifier came back with: %d, the real answer is: %d" %
              (classifierResult, datingLabels[i]))
        if (classifierResult != datingLabels[i]): errorCount += 1.0
    print("the total error rate is: %f" % (errorCount / float(numTestVecs)))
    print(errorCount)
Beispiel #19
0
def classifyPerson():
    """
    根据提示输入数据
    :return:
    """
    ll = ["不喜欢的人", "魅力一般的人", "具有魅力的人"]
    x1 = float(raw_input("玩视频游戏所耗时间的百分比?"))
    x2 = float(raw_input("每年获得的飞行常客里程数为?"))
    x3 = float(raw_input("每周消费的冰淇淋的功升数为?"))

    x, y = kNN.file2matrix("datingTestSet2.txt")
    normX, rage, minV = kNN.autoNorm(x)

    inX = (array([x1, x2, x3]) - minV) / rage

    ret = kNN.classify0(inX, x, y, 3)

    print "你对的喜欢程度可能是:", ll[int(ret) - 1]
Beispiel #20
0
def datingClassTest():
    """
    分类器测试:约会分类网站的用户数据分类的测试
    :return:
    """
    hoRet = 0.10
    x, y = kNN.file2matrix("datingTestSet.txt")
    normX, rage, minV = kNN.autoNorm(x)
    m = normX.shape[0]
    errorCount = 0
    testNum = int(hoRet * m)
    for i in range(testNum):
        yr = kNN.classify0(normX[i, :], normX[testNum:m, :], y, 3)
        print "第%d个分类为%s,原来分类为%s" % (i, yr, y[i])
        if yr != y[i]:
            errorCount += 1
    print "错误数为:%d,数错误率为:%f%% " % (errorCount, float(errorCount) * 100 / m)
    return
Beispiel #21
0
import sys
import kNN
from pylab import *
from numpy import *
import numpy as np
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
mat,lab = kNN.file2matrix('datingTestSet2.txt')
normMat, ranges, minVals = kNN.autoNorm(mat)

def randrange(n, vmin, vmax):
    return (vmax - vmin)*np.random.rand(n) + vmin

fig = plt.figure()
ax = fig.add_subplot(111,projection='3d')
#ax.scatter(normMat[:,0], normMat[:,1], normMat[:,2], 'o', 'c')
n = 1
for c, m, zl, zh in [('r', 'o', -50, -25), ('b', '^', -30, -5)]:
    xs = randrange(n, 23, 32)
    ys = randrange(n, 0, 100)
    zs = randrange(n, zl, zh)

ClassSet=lab
colorSet = []
for label in ClassSet:
	if label is '1':
		colorSet.append('r')
	elif label is '2':
		colorSet.append('b')
	elif label is '3':
Beispiel #22
0
print('d: ', d)
print('e', e)
group, labels = kNN.createDataSet()
print("I'm group: ", group)
print("I'm labels: ", labels)

k = kNN.classify0([0, 0], group, labels, 3)
print('I am k: ', k)

print('b.min: ', b.min(0))
print('b.max: ', b.max(0))
print('b.mean: ', b.mean(0))
b_m = (b - b.mean(0)) / (b.std(0))
print('b_m: ', b_m)

m = b.shape[0]
x = (b.max(0) - b.min(0))
b_a = (b - np.tile(b.min(0), (m, 1))) / np.tile(x, (m, 1))
print('b_a: ', b_a)

n, o, p = kNN.autoNorm(b)
print('n', n)
print('o: ', o)
print('p: ', p)
#fig = plt.figure()
#ax = fig.add_subplot(111)
#ax.scatter(b[:, 0], b[:, 1], 15*c, 15*c)
#plt.show()

kNN.handwritingClassTest()
'''
File Name:    main
Description:  主函数,主要调用kNN.py中的函数
Author:       jwj
Date:         2018/1/18
'''
__author__ = 'jwj'

import kNN

if __name__ == '__main__':
    group, labels = kNN.createDataSet()
    label = kNN.classify([0, 0], group, labels, 3)
    print(label)

    dataArray, dataLabels = kNN.file2matrix("datingTestSet2.txt")
    kNN.autoNorm(dataArray)

    normMat, ranges, minVals = kNN.autoNorm(dataArray)
    # print(normMat)

    # kNN.dataClassTest()
    # kNN.classifyPerson()

    kNN.handwritingClassTest()
Beispiel #24
0
from numpy import array
import kNN
reload(kNN)
normMat, ranges, minVals = kNN.autoNorm('datingDataMat')
print normMat
import kNN

dateDataMat, labels = kNN.file2matrix('./ml/2_kNN/datingTestSet2.txt')
normMat, ranges, minVals = kNN.autoNorm(dateDataMat)
print(normMat[0:0, 2:2])
print(ranges)
print(minVals)
import pandas as pd
import kNN

data = pd.read_table('datingTestSet2.txt', names=['a', 'b', 'c', 'd'])
normData, ranges, minVals = kNN.autoNorm(data.iloc[:, :-1])
datingLabels = data.d
numTestVecs = int(0.1 * normData.shape[0])
errorCount = 0
for i in range(numTestVecs):
    classifierResult = kNN.classify0(normData.iloc[i].values,
                                     normData.iloc[numTestVecs:].values,
                                     datingLabels.iloc[numTestVecs:].values, 3)
    print "the classifier came back with: %d, the real answer is: %d" % (
        classifierResult, data.d[i])
    if (classifierResult != data.d[i]):
        errorCount += 1
print "the total error rate is: %f" % (errorCount / numTestVecs)
Beispiel #27
0
def autoNormTests():
    datingDataArray, datingLabels = kNN.file2matrix('datingTestSet2.txt')
    normArray, ranges, minVals = kNN.autoNorm(datingDataArray)
    print normArray, ranges, minVals
    text = document.original_text
    text = re.sub("((http:|https:|ftp:|ftps:)//[\w$-_.+!*'(),%=]+)", '', text)
    text = re.sub("(@[\w_]+)", '', text)
    text = re.sub("(#[\w!$-_.+!*'(),%=]+)", '', text)
    text = re.sub("\p{P}+", '', text)
    text = re.sub("[\'\":#,!&]+", '', text)
    pos = classify_tweet(text).prob('positive')
    for category in categories:
        sim = np.append(sim, [pos])
    if count == 1:
        group = np.array([sim])
    else:
        group = np.append(group, [sim], axis = 0)
    labels.append(document.id)

group = kNN.autoNorm(group)

tweet_label = {}

count = 0
count_exer = 0
for document in doc:
    count += 1
    count_inner = 0
    for category in categories:
        if count_inner == 0:
            sim = np.array([TrainSet.similar(category, document)])
        else:
            sim = np.append(sim, [TrainSet.similar(category, document)])
        count_inner += 1
    text = document.original_text
# -*- coding: utf-8 -*-
'''
Created on 2015年9月29日

@author: rains
'''
import matplotlib.pyplot as plt
import numpy as np
import os

import kNN

curdir='f:\\project\\python\\machine-learning-in-action/Ch02'
mat1,fab1 = kNN.file2matrix(curdir+"/datingTestSet.txt")

#查看训练集
# fig=plt.figure()
# ax = fig.add_subplot(111)
# ax.scatter(mat1[:,0],mat1[:,1],15.0*np.array(fab1),15.0*np.array(fab1))
# plt.show()

#测试归一化
mat1 = kNN.autoNorm(mat1)

#测试简单的分类器准确率
kNN.datingClassTest()

#手写数字识别
#kNN.handwritingClassTest()
Beispiel #30
0
## kNN test function
group, labels = kNN.createDataSet()
result = kNN.classify0([0, 0], group, labels, 3)

### 1.yuehui wangzhan peidui
## load data and dating
datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt')

## plot dataSet
# fig = plt.figure()
# ax = fig.add_subplot(111)
# ax.scatter(datingDataMat[:,0], datingDataMat[:,1], s=15.0*num.array(datingLabels), c=15.0*num.array(datingLabels))
# plt.show()

## normalization
normMat, valueRange, minVals = kNN.autoNorm(datingDataMat)

## test the model
# kNN.datingClassTest()

### 2.shouxie shibei xitong
kNN.handwritingClassTest()

## a complete classifier system
# kNN.classifyPerson()

# print(normMat)
# print(range)
# print(minVals)
Beispiel #31
0
from numpy import array

features, labels = kNN.createDataSet()
features

kNN.classify0([0, 0], features, labels, 3)

datamat, labels = kNN.file2matrix('datingTestSet.txt')
'''
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(datamat[:,1], datamat[:,2], 15.0*array(labels), 15.0*array(labels))
plt.show()
'''

normmat, ranges, minvals = kNN.autoNorm(datamat)
print(normmat)
print(ranges)
print(minvals)

# kNN.datingClassTest(0.2,7)


def classifyperson():
    result = ['not at all', 'small doses', 'large dose']

    ffmiles = float(input('frequent filter miles earned per year:'))
    gametimepercent = float(input('% of time spent on game:'))
    icecream = float(input('liters of ice cream consumed per year:'))
    datamat, labels = kNN.file2matrix('datingTestSet.txt')
    normmat, ranges, minvals = kNN.autoNorm(datamat)
Beispiel #32
0
    'datingTestSet2.txt'
)  # Load data values and labels from the datingTestSet2.txt
datingLabelArray = np.array(datingLabels)

colormap2 = {1: 'red', 2: 'blue', 3: 'green'}  #Define color map with 3 colors

ColoredDatingLabel = []
for things in datingLabelArray:  #Get a vector representing the colors
    ColoredDatingLabel.append(colormap2[things])  #for each data item

ax2 = FigDating.add_subplot(312, xlim=(0, 100000),
                            ylim=(0, 25))  #create second sub plot
ax2.scatter(datingDataMat[:, 0],
            datingDataMat[:, 1],
            s=20,
            c=ColoredDatingLabel,
            marker='o')  #Plot a scatter diagram for the data loaded

normMat, ranges, minVals = kNN.autoNorm(datingDataMat)  #normalize the data
ax3 = FigDating.add_subplot(313, xlim=(0, 1),
                            ylim=(0, 1))  #create third sub plot
ax3.scatter(normMat[:, 0],
            normMat[:, 1],
            s=20,
            c=ColoredDatingLabel,
            marker='o')  #Plot normalized data

plt.show()

NumberBad = kNN.datingClassTest(0.1)
Beispiel #33
0
dictLabel = {'largeDoses': 1, 'smallDoses': 2, 'didntLike': 3}
for line in lines:
    line = line.strip()
    listFromLine = line.split('\t')
    returnMat[index, :] = listFromLine[0:3]
    classLabelVector.append(int(dictLabel.get(listFromLine[-1])))
    index += 1
print returnMat[1, 2]

import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(returnMat[:, 1], returnMat[:, 2], 15.0 * array(classLabelVector),
           15.0 * array(classLabelVector))
#plt.show()

dataSet = array([[3, 4, 5], [1, 2, 6], [4, 5, 6]], dtype=float)
minVal = dataSet.min(0)
maxVal = dataSet.max(0)
ranges = maxVal - minVal
normDataSet = zeros(shape(dataSet), dtype=float)
m = dataSet.shape[0]
normDataSet2 = dataSet - tile(minVal, (m, 1))
normDataSet2.astype(float)
print(normDataSet2)
print(tile(ranges, (m, 1)))
normDataSet3 = normDataSet2 / tile(ranges, (m, 1))

import kNN
normMat, ranges, minVal = kNN.autoNorm(returnMat)
print(normMat)
Beispiel #34
0
#for the picture
import matplotlib
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2])
plt.show()
ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2],
           15.0 * array(datingLabels), 15.0 * array(datingLabels))
ax.scatter(datingDataMat[:, 0], datingDataMat[:, 1],
           15.0 * array(datingLabels), 15.0 * array(datingLabels))

#for the autoNorm
reload(kNN)
norMat, ranges, minVals = kNN.autoNorm(datingDataMat)
norMat
ranges
minVals

#for the datingClassTest
kNN.datingClassTest()

#for the clasdifyPerson
kNN.classifyPerson()

#for the img2vector
testVector = kNN.img2vector('testDigits/0_13.txt')
testVector[0, 0:31]
testVector[0, 31:63]
# encoding: utf-8
from numpy import *
import kNN
import matplotlib
import matplotlib.pyplot as plt

fig = plt.figure()

ax = fig.add_subplot(111)
datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt')

## 由于每个指标的范围不一致, 我们这里需要进行归一化特征值
normMat, ranges, minVals = kNN.autoNorm(datingDataMat)

ax.scatter(normMat[:, 1], normMat[:,2])

# 添加坐标轴的labels
plt.xlabel('Percentage of Time Spent Playing Video Games')
plt.ylabel('Liters of Ice Cream Consumed Per Week')

plt.show()

Beispiel #36
0
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 18 21:20:35 2018

@author: ldz
"""
# =============================================================================
'''testDatingClassifier'''
# =============================================================================
from kNN import file2matrix, autoNorm, classify0
hoRatio = 0.10  #hold out 10%
k = 3
datingDataMat, datingLabels = file2matrix(
    'datingTestSet2.txt')  #load data setfrom file
normMat, ranges, minVals = autoNorm(datingDataMat)
m = normMat.shape[0]
numTestVecs = int(m * hoRatio)
errorCount = 0.0
for i in range(numTestVecs):
    classifierResult = classify0(normMat[i, :], normMat[numTestVecs:m, :],
                                 datingLabels[numTestVecs:m], k)
    print "the classifier came back with: %d, the real answer is: %d" % (
        classifierResult, datingLabels[i])
    if (classifierResult != datingLabels[i]): errorCount += 1.0
print("the total error rate is: %f" % (errorCount / float(numTestVecs)))
print("number of error:" + str(errorCount))
print("number of test:" + str(numTestVecs))
Beispiel #37
0
def main4():
    datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt')
    # 获得归一化参数
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat) 
Beispiel #38
0
import sys
import kNN
from pylab import *
from numpy import *
import numpy as np
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

mat, lab = kNN.file2matrix('datingTestSet2.txt')
normMat, ranges, minVals = kNN.autoNorm(mat)


def randrange(n, vmin, vmax):
    return (vmax - vmin) * np.random.rand(n) + vmin


fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
#ax.scatter(normMat[:,0], normMat[:,1], normMat[:,2], 'o', 'c')
n = 1
for c, m, zl, zh in [('r', 'o', -50, -25), ('b', '^', -30, -5)]:
    xs = randrange(n, 23, 32)
    ys = randrange(n, 0, 100)
    zs = randrange(n, zl, zh)

ClassSet = lab
colorSet = []
for label in ClassSet:
    if label is '1':
        colorSet.append('r')
Beispiel #39
0
# type "print answer" to see result
ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) #plot first point
#second point -  created, classified and plotted
testvector = [.5, .5]                                                   
answer = kNN.classify0(testvector,group, labels, 3)
ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' )
#third point -  created, classified and plotted
testvector = [.75, .75]
answer = kNN.classify0(testvector,group, labels, 3)
ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' )

'''Perform K-Nearest Neighbor classification on the datingTestSet2 data set. Do not forget to include the data set in the working directory'''
datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt')                          # Load data values and labels from the datingTestSet2.txt
datingLabelArray = np.array(datingLabels)                                                   

colormap2 = { 1:'red', 2:'blue', 3:'green' }                                                    #Define color map with 3 colors

ColoredDatingLabel = [] 
for things in datingLabelArray:                                                                     #Get a vector representing the colors
    ColoredDatingLabel.append(colormap2[things])                                                    #for each data item

ax2 = FigDating.add_subplot(312, xlim=(0,100000), ylim=(0,25))                                      #create second sub plot 
ax2.scatter(datingDataMat[:,0], datingDataMat[:,1], s= 20, c= ColoredDatingLabel, marker = 'o' )    #Plot a scatter diagram for the data loaded

normMat, ranges, minVals = kNN.autoNorm(datingDataMat)                                              #normalize the data
ax3 = FigDating.add_subplot(313, xlim=(0,1), ylim=(0,1))                                            #create third sub plot
ax3.scatter(normMat[:,0], normMat[:,1], s = 20, c= ColoredDatingLabel, marker = 'o' )               #Plot normalized data

plt.show()

NumberBad = kNN.datingClassTest(0.1)