Ejemplo n.º 1
0
def show_file2matrix(file_name):
    reload(knn)
    datingDataMat, datingLabels = knn.file2matrix(file_name)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(datingDataMat[:,1], datingDataMat[:,2], 15.0*array(datingLabels), 15.0*array(datingLabels))
    plt.show()
Ejemplo n.º 2
0
def train():
    '''对knn进行训练'''

    datingDataMat, datingLables = knn.file2matrix('datingTestSet2.txt')

    normMat, rangeVals, minVals = knn.autoNorm(datingDataMat)

    print normMat
    print datingLables

    # knn.plotData(datingDataMat, datingLables)

    # 用作交叉验证集的数量百分比
    hoRatio = 0.10
    # 数据集的总数量
    m = normMat.shape[0]

    # 测试集
    numTestVecs = int(m * hoRatio)

    errorCount = 0.0

    for i in range(numTestVecs):
        classifierResult = knn.classify0(normMat[i, :],
                                         normMat[numTestVecs:m, :],
                                         datingLables[numTestVecs:m], 3)

        print '分类器返回: %d, 实际的结果是:%d' % (classifierResult, datingLables[i])

        if classifierResult != datingLables[i]:
            errorCount += 1.0

    print '错误率是: %f' % (errorCount / (float(numTestVecs)))
 def test_auto_norm(self):
     date_mat, date_label = knn.file2matrix('datingTestSet2.txt')
     norm_mat, ranges, min_val = knn.auto_norm(date_mat)
     min_exp = np.array([0., 0., 0.001156])
     ranges_exp = np.array([9.1273000e+04, 2.0919349e+01, 1.6943610e+00])
     self.assertEqual(True, (min_exp == min_val).all())
     self.assertEqual(True, (ranges == ranges_exp).all())
Ejemplo n.º 4
0
def testknn_matplotlib():
    a, b = knn.file2matrix('../datingTestSet.txt')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(a[:, 1], a[:, 2], 15 * array(b), 15 * array(b))
    #  ax.scatter(a[:, 1], a[:, 2],b,b)
    plt.show()
Ejemplo n.º 5
0
def show_file2matrix(file_name):
    reload(knn)
    datingDataMat, datingLabels = knn.file2matrix(file_name)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2],
               15.0 * array(datingLabels), 15.0 * array(datingLabels))
    plt.show()
Ejemplo n.º 6
0
def classfiy_person() :
    result_list = ['not at all','in small doses','in large doses']
    percent_tats= float(input("percentage of time spent playing video games ?"))
    ffmiles = float(input("frequent flier miles earned per year ?"))
    ice_cream = float(input("liter of ice cream consumed per year ?"))
    dating_mat, dating_labels = knn.file2matrix('./knn/datingTestSet2.txt')
    normal_mat, ranges, min_values = knn.auto_normal(dating_mat)
    inArr = array([ffmiles, percent_tats, ice_cream])
    classfiy_result = knn.classify0((inArr - min_values) / ranges, normal_mat, dating_labels, 3)
    print("You will probably like this person: ", result_list[classfiy_result - 1], "(" + str(classfiy_result) + ")")
 def test_file2matrix(self):
     date_mat, date_label = knn.file2matrix('datingTestSet2.txt')
     fig = plt.figure()
     ax = fig.add_subplot(111)
     ax.scatter(date_mat[:, 1], date_mat[:, 2], \
                15.0 * np.array(date_label), 15.0 * np.array(date_label))
     plt.show()
     self.assertEqual((1000, 3), date_mat.shape)
     self.assertEqual(1000, len(date_label))
     labels_exp = [3, 2, 1, 1, 1, 1, 3, 3, 1, 3]
     self.assertEqual(True, labels_exp == date_label[0:10])
Ejemplo n.º 8
0
def test() :
    group, labels = knn.create_dataset()
    print(group)
    print(labels)
    sort = knn.classify0([0,0],group,labels,3)
    print("distance is %s !"%(sort))
    ## pycharm 中的相对路径不一样,需要在 Run -> Edit Configurations 中查看 Working dorectory
    #dating_mat, dating_labels = knn.file2matrix('./knn/datingTestSet.txt')
    dating_mat, dating_labels = knn.file2matrix('./knn/datingTestSet2.txt')
    print(dating_mat)
    print(dating_labels)
    knn.show_plt(dating_mat,dating_labels)
Ejemplo n.º 9
0
def predict():
    resultList = ['一点也不喜欢', '有点喜欢', '非常喜欢']
    percentTats = float(raw_input('玩游戏的时间是: '))
    ffMiles = float(raw_input('每年的飞行公里数: '))
    iceCream = float(raw_input('每年消耗的冰淇淋: '))

    datingDataMat, datingLabels = knn.file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = knn.autoNorm(datingDataMat)

    inArr = np.array([ffMiles, percentTats, iceCream])

    # 对输入数据的正规化处理
    inArrNorm = (inArr - minVals) / ranges

    classifierResult = knn.classify0(inArrNorm, normMat, datingLabels, 3)

    print '预测你可能喜欢这个人的程度:', resultList[classifierResult - 1]
def predict():
    resultList = ['一点也不喜欢', '有点喜欢', '非常喜欢']
    percentTats = float(raw_input('玩游戏的时间是: '))
    ffMiles = float(raw_input('每年的飞行公里数: '))
    iceCream = float(raw_input('每年消耗的冰淇淋: '))

    datingDataMat, datingLabels = knn.file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = knn.autoNorm(datingDataMat)

    inArr = np.array([ffMiles, percentTats, iceCream])

    # 对输入数据的正规化处理
    inArrNorm = (inArr - minVals) / ranges

    classifierResult = knn.classify0(inArrNorm, normMat, datingLabels, 3)

    print '预测你可能喜欢这个人的程度:', resultList[classifierResult-1]
def train():
    '''对knn进行训练'''

    datingDataMat, datingLables = knn.file2matrix('datingTestSet2.txt')

    normMat, rangeVals, minVals = knn.autoNorm(datingDataMat)

    print normMat
    print datingLables

    # knn.plotData(datingDataMat, datingLables)



    # 用作交叉验证集的数量百分比
    hoRatio = 0.10
    # 数据集的总数量
    m = normMat.shape[0]

    # 测试集
    numTestVecs = int(m*hoRatio)

    errorCount = 0.0

    for i in range(numTestVecs):
        classifierResult = knn.classify0(normMat[i, :],
                                         normMat[numTestVecs:m, :],
                                         datingLables[numTestVecs:m],
                                         3)

        print '分类器返回: %d, 实际的结果是:%d' % (classifierResult, datingLables[i])

        if classifierResult != datingLables[i]:
            errorCount += 1.0


    print '错误率是: %f' % (errorCount / (float(numTestVecs)))
Ejemplo n.º 12
0

import matplotlib
import matplotlib.pyplot as plt
import numpy as np

import knn
import mnist_test

group, labels = knn.createDataSet()
bb = knn.classify0([0,0], group, labels, 3)
print bb
cc = knn.knn2([0,0], group, labels, 3)

# 可视化数据
dataSet, labels = knn.file2matrix('data/datingTestSet2.txt')
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(dataSet[:,0], dataSet[:,1], s=15.0*labels, c=15.0*labels)
plt.show()


# 测试误判率
reload(knn)
# testRatio为测试集比例,k为邻居个数
knn.knnTest('../data/datingTestSet2.txt',testRatio=0.2, k=3)


# 测试手写数字识别

mnist_test.mnist_test(500,100, k=7)
Ejemplo n.º 13
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/11/30 16:52
# @Author  : zhcf1ess
# @Site    :
# @File    : createFirstPLT.py
# @Software: PyCharm
from numpy import *
import knn
import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111)
datingDataMat, datingLabels = knn.file2matrix(
    '../data/ch02/datingTestSet2.txt')
ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2],
           15.0 * array(datingLabels), 15.0 * array(datingLabels))
ax.axis([-2, 25, -0.2, 2.0])
plt.xlabel('Percentage of Time Spent Playing Video Games')
plt.ylabel('Liters of Ice Cream Consumed Per Week')
plt.show()
Ejemplo n.º 14
0
def testknn():
    a, b = knn.file2matrix('../datingTestSet.txt')
    print(a[:, 1])
Ejemplo n.º 15
0
import knn

from numpy import *
import matplotlib
import matplotlib.pyplot as plt

datingDataMat, datingLabels = knn.file2matrix(
    "machinelearninginaction/Ch02/datingTestSet2.txt")

fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2],
           15.0 * array(datingLabels), 15.0 * array(datingLabels))
plt.show()
Ejemplo n.º 16
0
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 21 10:38:54 2018

@author: fsxn2
"""

import knn
import matplotlib
import matplotlib.pyplot as plt
#group,labels=knn.createDataSet()
#print(knn.classify0([0,0],group,labels,3))
group, labels = knn.file2matrix("input.txt")
auto, ranges, minval = knn.autoNorm(group)
print(auto)
print(ranges)
print(minval)
#fig=plt.figure()
#ax=fig.add_subplot(111)
#ax.scatter(group[:,1],group[:2])
#plt.show()
print(knn.classify0([1, 0, 3], group, labels, 3))
Ejemplo n.º 17
0
# This is a sample Python script.

# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

import knn
import matplotlib
import matplotlib.pyplot as plt
from array import array
from numpy import *

# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    # 数据加载
    datingDataMat, datingLabels = knn.file2matrix('datingTestSet.txt')
    print(datingDataMat)
    print(datingLabels)

    # 显示
    fig = plt.figure()
    ax = fig.add_subplot(111)
    # 看不到任何有用的模式信息
    ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2])
    # 标注上色彩
    ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 15.0 * array(datingLabels), 15.0 * array(datingLabels))
    plt.show()
    plt.close()

    # 归一化数据
    normMat, ranges, minVals = knn.autoNorm(datingDataMat)
    print('norm mat:')
Ejemplo n.º 18
0
import matplotlib
import matplotlib.pyplot as plt
from numpy import array
import knn

datingDataMat,datingLabels,vector = knn.file2matrix('datingTestSet.txt')

fig = plt.figure()
ax = fig.add_subplot(131)
ax.scatter(datingDataMat[:,1],datingDataMat[:,2])
ax = fig.add_subplot(132)
ax.scatter(datingDataMat[:,1],datingDataMat[:,2],15.0*array(vector),15.0*array(vector))
ax = fig.add_subplot(133)
ax.scatter(datingDataMat[:,0],datingDataMat[:,1],15.0*array(vector),15.0*array(vector))
plt.show()