예제 #1
0
def datingClassTest():
    hoRatio = 0.50  #hold out 10%
    datingDataMat, datingLabels = kNN.file2matrix(
        'datingTestSet.txt')  #load data setfrom file
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int(m * hoRatio)  # 50% train set,50% test set
    errorCount = 0.0
    errorAns = m
    for k in range(20):
        errorCount = 0
        for i in range(numTestVecs):
            classifierResult = kNN.classify0(normMat[i, :],
                                             normMat[numTestVecs:m, :],
                                             datingLabels[numTestVecs:m],
                                             k + 1)
            # print "the classifier came back with: %s, the real answer is: %s" % (classifierResult, datingLabels[i])
            if (classifierResult != datingLabels[i]): errorCount += 1.0
        print "when k is %d the total error rate is: %f" % (k + 1, (
            errorCount / float(numTestVecs)))
        print errorCount
        if errorCount <= errorAns:
            errorAns = errorCount
            ans = k + 1
    print "the best k is", ans
예제 #2
0
 def test_autoNorm(self):
     fileName = "datingTestSet.txt"
     datingDataMat, datingLabels = kNN.file2matrix(fileName)
     print("\n datingDataMat == %s" % (datingDataMat))
     normDataSet, ranges, minVals = kNN.autoNorm(datingDataMat)
     print("\n normDataSet == %s \n ranges == %s \n minVals == %s \n" %
           (normDataSet, ranges, minVals))
예제 #3
0
def paintDataSet():
    datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt')

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 20.0*array(datingLabels), 20.0*array(datingLabels))
    plt.show()
예제 #4
0
def test_knn_dating():

    #%% 利用knn模块的数据读取函数
    x, y = knn.file2matrix('datingTestSet2.txt')
    norm_x, ranges, minVals = knn.autoNorm(x)
    norm_x = np.array(norm_x)
    y = np.array(y)
    #%% 生成测试数据的比例
    train_ratio = 0.9
    test_ratio = 0.1
    size_data = len(norm_x)
    indices = np.random.permutation(size_data)
    x_train = norm_x[indices[:int(train_ratio * size_data)]]
    y_train = y[indices[:int(train_ratio * size_data)]]
    x_test = norm_x[indices[int(test_ratio * size_data):]]
    y_test = y[indices[int(test_ratio * size_data):]]
    #%%

    # 设置分类器,并填充训练数据
    knn_classfier = neighbors.KNeighborsClassifier()
    knn_classfier.fit(x_train, y_train)
    #%%
    # 按比例进行测试数据
    result = knn_classfier.predict(x_test)
    for i in range(100):
        if result[i] == y_test[i]:
            print(True)
        else:
            print(False)
예제 #5
0
def autoNormTest():
    returnMat, classLabelVector = kNN.file2matrix(
        "C:\\Users\yangy\PycharmProjects\MLIA\kNN\datingTestSet2.txt")
    normMat, ranges, minVals = kNN.autoNorm(returnMat)
    print('normMat:', normMat)
    print('ranges:', ranges)
    print('minVals:', minVals)
예제 #6
0
def datingClassTest():
    """
    约会网站测试

    :return:
    """
    # 设置测试数据比例
    hoRatio = 0.1

    # 从文件中加载数据
    datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt')

    # 归一化数据
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)

    # m = 数据的行数 即第一维矩阵
    m = normMat.shape[0]

    # 设置测试的样本数量
    numTestVecs = int(m * hoRatio)
    print('numTestVecs = ', numTestVecs)

    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = kNN.classify0(normMat[i, :],
                                         normMat[numTestVecs:m, :],
                                         datingLabels[numTestVecs:m], 3)
        print('The classifier came back with %d, the real answer is: %d' %
              (classifierResult, datingLabels[i]))
        if classifierResult != datingLabels[i]:
            errorCount += 1.0
    print('The total error rate is %f' % (errorCount / float(numTestVecs)))
    print(errorCount)
예제 #7
0
def test2():
    datingDataMat,datingLabels = kNN.file2matrix('datingTestSet.txt')
    print(array(datingLabels))
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(datingDataMat[:,1],datingDataMat[:,2])
    plt.show()
예제 #8
0
def draw():
    fig = plt.figure()
    ax = fig.add_subplot(111)

    mt, l = kNN.file2matrix('datingTestSet2.txt')

    ax.scatter(mt[:, 0], mt[:, 1], 15.0 * np.array(l), 15.0 * np.array(l))
    fig.savefig('0_1.png')
    return
예제 #9
0
def matplotlibTest():
    returnMat, classLabelVector = kNN.file2matrix(
        "C:\\Users\yangy\PycharmProjects\MLIA\kNN\datingTestSet2.txt")
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(returnMat[:, 0], returnMat[:, 1],
               15.0 * np.array(classLabelVector),
               15.0 * np.array(classLabelVector))
    plt.show()
예제 #10
0
def feature_show():
    dating_data_mat, dating_labels = file2matrix(DATING_DATA)
    fig = plt.figure()
    ax = fig.add_subplot(121)
    ax.scatter(dating_data_mat[:, 1], dating_data_mat[:, 2],
               15.0 * array(dating_labels), 15.0 * array(dating_labels))
    bx = fig.add_subplot(122)
    bx.scatter(dating_data_mat[:, 0], dating_data_mat[:, 1],
               15.0 * array(dating_labels), 15.0 * array(dating_labels))
    plt.show()
예제 #11
0
def main3():
    '''
    将文本数据读入并绘图
    '''
    datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2])
    ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2],15.0*array(datingLabels), 15.0*array(datingLabels)) #根据类别绘制不同的散点
    plt.show()
예제 #12
0
def classifyperson():
    result = ['not at all', 'small doses', 'large dose']

    ffmiles = float(input('frequent filter miles earned per year:'))
    gametimepercent = float(input('% of time spent on game:'))
    icecream = float(input('liters of ice cream consumed per year:'))
    datamat, labels = kNN.file2matrix('datingTestSet.txt')
    normmat, ranges, minvals = kNN.autoNorm(datamat)
    inarry = (array([ffmiles, gametimepercent, icecream]) - minvals) / ranges
    classifyresult = kNN.classify0(inarry, normmat, labels, 3)
    print("you like this person:", result[classifyresult - 1])
    return
예제 #13
0
def classifyPerson():
    resultList = ['not at all', 'in small doses', 'in large doses']
    percentTats = float(input("percentage of time spent playing video games?"))
    ffMiles = float(input("frequent filter miles earned per year"))
    iceCream = float(input("liters of ice cream consumed per year"))
    datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    inArr = numpy.array([ffMiles, percentTats, iceCream])
    classifierResult = kNN.classify0((inArr - minVals) / ranges, normMat,
                                     datingDataMat, 3)
    print("you will probably like this person: ".resultList[classifierResult -
                                                            1])
예제 #14
0
def classifyPerson():
    resultlist=['not at all','in small doss','in large does']
    percentTats=float(raw_input("percentage of time spent playing video game?"))
    ffMiles=float(raw_input("frequent filer miles earned per year?"))
    icecream=float(raw_input("liters of ice cream consumed per year?"))
    datingDataMat,datingLabel = kNN.file2matrix('datingTestSet2.txt')
    normat,rangeval,minval=kNN.autonorm(datingDataMat)
    print "normat:%s" %(normat)
    inX=array([ffMiles,percentTats,icecream]) 
    retVal=kNN.classify0((inX - minval)/rangeval,normat,datingLabel,3)
    print "retval[%d]" %(retVal)
    print "resutl:%s " %(resultlist[retVal])
def classifyPerson():
    resultlist = ['not at all','in small doses','in large doses']
    games = float(raw_input(
        "percentage of time spent playing video games?"))
    flymiles = float(raw_input(
        "frequent flier miles earned per year?"))
    icecream = float(raw_input(
        "liters of ice cream consumed per year?"))
    datingdata, datinglabel = kNN.file2matrix('datingTestSet2.txt')
    normdata, ranges, minv = kNN.autoNorm(datingdata)
    inarr = array([flymiles, games, icecream])
    result = kNN.classify0((inarr - minv)/ranges, normdata, datinglabel, 3)
    print "you will probably like this person:", resultlist[result-1]
예제 #16
0
def datingClassTest():
    hoRatio = 0.10
    datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int(m*hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3)
        print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[i])
        if classifierResult != datingLabels[i]:
            errorCount += 1.0
    print "the total error rate is: %f" % (errorCount/float(numTestVecs))
예제 #17
0
 def test_matplot(self):
     fileName = "datingTestSet.txt"
     datingDataMat, datingLabels = kNN.file2matrix(fileName)
     # 创建一幅图
     fig = plt.figure()
     ax = fig.add_subplot(111)
     # 散点图使用datingDataMat矩阵的第1、第2列数据(从0开始),分别表示特征值
     # 横轴表示“玩视频游戏所耗时间百分比”
     # 纵轴表示“每周所消费的冰淇淋公升数” 。
     # datingDataMat[:, 1] 表示矩阵中所有行中第一列的数据
     ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2],
                15.0 * array(datingLabels), 15.0 * array(datingLabels))
     plt.show()
예제 #18
0
def datingClassTest():
    hoRatio = 0.550
    datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt')
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int(m * hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = kNN.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3)
        print 'the classifier came back with: %s, the real answer is: %s' % (classifierResult, datingLabels[i])
        if (classifierResult != datingLabels[i]):
            errorCount += 1.0
    print "the total error rate is: %f" % (errorCount / float(numTestVecs))
예제 #19
0
def showDatingInput():
    # 输入测试数据
    resultList = ['not at all', 'in small doses', 'in large doses']
    mPercentGame = float(
        raw_input('the percentange of time spent playing vedio games:'))
    mPercentMiles = float(raw_input('the miles earned every year:'))
    mpercentIce = float(raw_input('the ice cream consumed per year:'))
    testArray = [mPercentMiles, mPercentGame, mpercentIce]

    mat, labels = kNN.file2matrix('datingTestSet2.txt')
    normMat, mRange, mMin = kNN.autoNum(mat)
    ansType = kNN.classify0((testArray - mMin) / mRange, normMat, labels, 5)
    print 'This guy is mostly', resultList[int(ansType) - 1]
예제 #20
0
def dating_class_test():
    dating_data_mat, dating_labels = file2matrix(DATING_DATA)
    norm_data, ranges, min_val = auto_norm(dating_data_mat)
    m = norm_data.shape[0]
    num_test_vecs = int(m * HO_RATIO)
    error_count = 0.0
    for i in range(num_test_vecs):
        classifier_res = classify0(norm_data[i, :],
                                   norm_data[num_test_vecs:m, :],
                                   dating_labels[num_test_vecs:m], K)
        print('the classifier came back with: %d, the real answer is: %d' %
              (classifier_res, dating_labels[i]))
        if (classifier_res != dating_labels[i]): error_count += 1.0
    print("the total error rate is: %f" % (error_count / float(num_test_vecs)))
예제 #21
0
def classifyPerson():
    #resultList = ['not at all','in small doses', 'in large doses']
    percentTats = float(raw_input(\
                "percentage of time spent playing video games?"))
    ffMiles = float(raw_input("frequent flier miles earned per year?")
                    )  #使用sumlime配置的环境python27无法读取输入数据
    iceCream = float(raw_input(
        "liters of ice cream consumed per year?"))  #点击*.py运行即可,程序末尾添加待输入
    datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt')
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    inArr = array([ffMiles, percentTats, iceCream])
    classifierResult = kNN.classify0((inArr-\
                        minVals)/ranges,normMat,datingLabels,3)
    print "You will probably like this person: ",\
    classifierResult
예제 #22
0
def datingSetTest(horate):
	datingDataMat,datingLabel=kNN.file2matrix('datingTestSet2.txt')		
        print "data[%d]:%s,\nlabel:%s" %(datingDataMat.shape[0],datingDataMat,datingLabel)
	datingDataMat,rangeval,minval=kNN.autonorm(datingDataMat)
        print "data[%d]:%s" %(datingDataMat.shape[0],datingDataMat)
	m=datingDataMat.shape[0]
	count=int(m*horate)
        errcount=0.0
        for i in range(1,count):
            retVal=kNN.classify0(datingDataMat[i,:],datingDataMat[count:m,:],datingLabel[count:m],3)
            print "orignal:%d,calculate:%d"%(datingLabel[i],retVal)
            if retVal != datingLabel[i]:
				errcount+=1.0
				print "error."
        print "error rate:%f" %(errcount/float(count))
예제 #23
0
def classifyPerson():
    """
    根据提示输入数据
    :return:
    """
    ll = ["不喜欢的人", "魅力一般的人", "具有魅力的人"]
    x1 = float(raw_input("玩视频游戏所耗时间的百分比?"))
    x2 = float(raw_input("每年获得的飞行常客里程数为?"))
    x3 = float(raw_input("每周消费的冰淇淋的功升数为?"))

    x, y = kNN.file2matrix("datingTestSet2.txt")
    normX, rage, minV = kNN.autoNorm(x)

    inX = (array([x1, x2, x3]) - minV) / rage

    ret = kNN.classify0(inX, x, y, 3)

    print "你对的喜欢程度可能是:", ll[int(ret) - 1]
예제 #24
0
def datingClassTest():
    """
    分类器测试:约会分类网站的用户数据分类的测试
    :return:
    """
    hoRet = 0.10
    x, y = kNN.file2matrix("datingTestSet.txt")
    normX, rage, minV = kNN.autoNorm(x)
    m = normX.shape[0]
    errorCount = 0
    testNum = int(hoRet * m)
    for i in range(testNum):
        yr = kNN.classify0(normX[i, :], normX[testNum:m, :], y, 3)
        print "第%d个分类为%s,原来分类为%s" % (i, yr, y[i])
        if yr != y[i]:
            errorCount += 1
    print "错误数为:%d,数错误率为:%f%% " % (errorCount, float(errorCount) * 100 / m)
    return
예제 #25
0
def showDatingTestData():
    """
    测试约会案例,文件数据转换成矩阵数据,使用10%数据作为测试集
    :return:
    """
    mRatio = 0.1
    mat, labels = kNN.file2matrix('datingTestSet2.txt')
    normMat, mRange, mMin = kNN.autoNum(mat)  # 数据归一化
    mCount = mat.shape[0]  # 数据行数
    mTestCount = int(mRatio * mCount)
    # 测试集数目
    mError = 0  # 错误数
    for i in range(mTestCount):
        mResult = kNN.classify0(normMat[i, :], normMat[mTestCount:mCount, :],
                                labels[mTestCount:mCount], 5)
        if (mResult != labels[i]):
            mError += 1
    print 'The error rate is: %f' % (mError * 1.0 / mTestCount)
    print 'The total test count is %d and the error count is %d' % (mTestCount,
                                                                    mError)
예제 #26
0
def datingClassTest():
    #选取10%的数据测试分类器
    hoRatio = 0.10
    #原始文本转换
    datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt')
    #归一化
    normMat, ranges, minVals = autoNorm(datingDataMat)
    #设置测试个数
    m = normMat.shape[0]
    numTestVecs = int(m * hoRatio)

    errorCount = 0.0
    for i in range(numTestVecs):
        #分类算法
        classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],\
                                     datingLabels[numTestVecs:m],3)
        print ("the classifier came back with: %d, the real answer is: %d"\
                % (classifierResult , datingLabels[i]))
        if (classifierResult != datingLabels[i]):
            errorCount += 1.0
    print("the total error rate is:%f" % (errorCount / float(numTestVecs)))
import os
import kNN

CURRENT_DIR = os.path.dirname(__file__)

groups, labels = kNN.createDataset()

print kNN.classify0([0,0,0],groups,labels,3)

dataSetFile = os.path.join(CURRENT_DIR + '/datingTestSet.txt')

datingDataMat,datingLabels = kNN.file2matrix(dataSetFile)

print kNN.classify0([40920,  8.326976,	0.953952],groups,labels,3)
예제 #28
0
def main4():
    datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt')
    # 获得归一化参数
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat) 
예제 #29
0
def file2matrixTest():
    returnMat, classLabelVector = kNN.file2matrix(
        "C:\\Users\yangy\PycharmProjects\MLIA\kNN\datingTestSet2.txt")
    print('returnMat:', returnMat)
    print('classLabelVector:', classLabelVector)
'''
Created on Jul 26, 2015

@author: selaselah
'''
import numpy as np
import kNN
import matplotlib
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
data,labels = kNN.file2matrix('datingTestSet.txt')
#ax.scatter(data[:,1], data[:,2])
ax.scatter(data[:,1], data[:,2], 15.0*array(labels), 15.0*array(labels))
ax.axis([-2,25,-0.2,2.0])
plt.xlabel('Percentage of Time Spent Playing Video Games')
plt.ylabel('Liters of Ice Cream Consumed Per Week')
plt.show()
예제 #31
0
from numpy import *
import kNN as knnnn
array1 = array([[1, 2], [3, 4], [5, 6]])
knnnn.file2matrix(
    '/Users/Colin_Zhang/Desktop/machinelearninginaction/Ch02/datingTestSet2.txt'
)
예제 #32
0
import numpy
import kNN
import matplotlib
import matplotlib.pyplot as plt

fig = plt.figure()
ax1 = fig.add_subplot(311)
datingDataMat, datingLabels = kNN.file2matrix('f:\\datingTestSet.txt')
ax1.scatter(datingDataMat[:, 0], datingDataMat[:, 1],
            15.0 * numpy.array(datingLabels), 15.0 * numpy.array(datingLabels))
ax1.set_xlabel('fly')
ax2 = fig.add_subplot(312)
ax2.scatter(datingDataMat[:, 0], datingDataMat[:, 2],
            15.0 * numpy.array(datingLabels), 15.0 * numpy.array(datingLabels))
ax2 = fig.add_subplot(313)
ax2.scatter(datingDataMat[:, 1], datingDataMat[:, 2],
            15.0 * numpy.array(datingLabels), 15.0 * numpy.array(datingLabels))
plt.show()
예제 #33
0
import matplotlib
import matplotlib.pyplot as plt
import kNN
from numpy import *
datingDataMat,datingLabels=kNN.file2matrix('E:/Personal/BOOK/机器学习/机器学习实战源代码/machinelearninginaction/Ch02/datingTestSet2.txt')
fig=plt.figure()
ax=fig.add_subplot(111)
ax.scatter(datingDataMat[:,1],datingDataMat[:,2],15.0*array(datingLabels),15.0*array(datingLabels))
#plt.show()
#print(datingDataMat)

norMat,ranges,minVals=kNN.autoNorm(datingDataMat)
#print(norMat)

kNN.datingClassTest()
예제 #34
0
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 18 21:20:35 2018

@author: ldz
"""
# =============================================================================
'''testDatingClassifier'''
# =============================================================================
from kNN import file2matrix, autoNorm, classify0
hoRatio = 0.10  #hold out 10%
k = 3
datingDataMat, datingLabels = file2matrix(
    'datingTestSet2.txt')  #load data setfrom file
normMat, ranges, minVals = autoNorm(datingDataMat)
m = normMat.shape[0]
numTestVecs = int(m * hoRatio)
errorCount = 0.0
for i in range(numTestVecs):
    classifierResult = classify0(normMat[i, :], normMat[numTestVecs:m, :],
                                 datingLabels[numTestVecs:m], k)
    print "the classifier came back with: %d, the real answer is: %d" % (
        classifierResult, datingLabels[i])
    if (classifierResult != datingLabels[i]): errorCount += 1.0
print("the total error rate is: %f" % (errorCount / float(numTestVecs)))
print("number of error:" + str(errorCount))
print("number of test:" + str(numTestVecs))
'''
File Name:    main
Description:  主函数,主要调用kNN.py中的函数
Author:       jwj
Date:         2018/1/18
'''
__author__ = 'jwj'

import kNN

if __name__ == '__main__':
    group, labels = kNN.createDataSet()
    label = kNN.classify([0, 0], group, labels, 3)
    print(label)

    dataArray, dataLabels = kNN.file2matrix("datingTestSet2.txt")
    kNN.autoNorm(dataArray)

    normMat, ranges, minVals = kNN.autoNorm(dataArray)
    # print(normMat)

    # kNN.dataClassTest()
    # kNN.classifyPerson()

    kNN.handwritingClassTest()
예제 #36
0
import sys
import kNN
from numpy import *
import matplotlib
import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111)
mat = kNN.file2matrix('123.txt', 2)

'''ax.scatter(mat[:,0], mat[:,1], 2, color ='blue') '''
ax.scatter(mat[:,1], mat[:,0], 2, color ='blue')
plt.show()
예제 #37
0
#Testing with new points
testvector = [.2, .2]                                                   #first point 
answer = kNN.classify0(testvector,group, labels, 3)                     #classify the first point
# type "print answer" to see result
ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) #plot first point
#second point -  created, classified and plotted
testvector = [.5, .5]                                                   
answer = kNN.classify0(testvector,group, labels, 3)
ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' )
#third point -  created, classified and plotted
testvector = [.75, .75]
answer = kNN.classify0(testvector,group, labels, 3)
ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' )

'''Perform K-Nearest Neighbor classification on the datingTestSet2 data set. Do not forget to include the data set in the working directory'''
datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt')                          # Load data values and labels from the datingTestSet2.txt
datingLabelArray = np.array(datingLabels)                                                   

colormap2 = { 1:'red', 2:'blue', 3:'green' }                                                    #Define color map with 3 colors

ColoredDatingLabel = [] 
for things in datingLabelArray:                                                                     #Get a vector representing the colors
    ColoredDatingLabel.append(colormap2[things])                                                    #for each data item

ax2 = FigDating.add_subplot(312, xlim=(0,100000), ylim=(0,25))                                      #create second sub plot 
ax2.scatter(datingDataMat[:,0], datingDataMat[:,1], s= 20, c= ColoredDatingLabel, marker = 'o' )    #Plot a scatter diagram for the data loaded

normMat, ranges, minVals = kNN.autoNorm(datingDataMat)                                              #normalize the data
ax3 = FigDating.add_subplot(313, xlim=(0,1), ylim=(0,1))                                            #create third sub plot
ax3.scatter(normMat[:,0], normMat[:,1], s = 20, c= ColoredDatingLabel, marker = 'o' )               #Plot normalized data
예제 #38
0
import sys
import kNN
from pylab import *
from numpy import *
import numpy as np
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

mat, lab = kNN.file2matrix('datingTestSet2.txt')
normMat, ranges, minVals = kNN.autoNorm(mat)


def randrange(n, vmin, vmax):
    return (vmax - vmin) * np.random.rand(n) + vmin


fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
#ax.scatter(normMat[:,0], normMat[:,1], normMat[:,2], 'o', 'c')
n = 1
for c, m, zl, zh in [('r', 'o', -50, -25), ('b', '^', -30, -5)]:
    xs = randrange(n, 23, 32)
    ys = randrange(n, 0, 100)
    zs = randrange(n, zl, zh)

ClassSet = lab
colorSet = []
for label in ClassSet:
    if label is '1':
        colorSet.append('r')
예제 #39
0
import kNN as KNN

group, labels = KNN.createDataSet()
clas = KNN.classify0([0, 0], group, labels, 3)
"""
output: B
"""
"""
this file has 3 columns
  ■ Number of frequent flyer miles earned per year
  ■ Percentage of time spent playing video games
  ■ Liters of ice cream consumed per week
"""
datingDataMat, datingLabels = KNN.file2matrix('data/datingTestSet.txt')
"""
output:
  >>> datingDataMat
    array([[ 7.29170000e+04, 7.10627300e+00, 2.23600000e-01],
    [ 1.42830000e+04, 2.44186700e+00, 1.90838000e-01],
    [ 7.34750000e+04, 8.31018900e+00, 8.52795000e-01],
    ...,
    [ 1.24290000e+04, 4.43233100e+00, 9.24649000e-01],
    [ 2.52880000e+04, 1.31899030e+01, 1.05013800e+00],
    [ 4.91800000e+03, 3.01112400e+00, 1.90663000e-01]])

  >>> datingLabels[0:20]
    ['didntLike', 'smallDoses', 'didntLike', 'largeDoses', 'smallDoses',
    'smallDoses', 'didntLike', 'smallDoses', 'didntLike', 'didntLike',
    'largeDoses', 'largeDose s', 'largeDoses', 'didntLike', 'didntLike',
    'smallDoses', 'smallDoses', 'didntLike', 'smallDoses', 'didntLike']
"""
예제 #40
0
import kNN
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

# easy try
group, labels = kNN.createDataSet()
predict = kNN.classify0([0, 0], group, labels, 3)
print(predict)

# load data
datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt')
print(datingDataMat)
print(datingLabels[0:20])

# scatter plot
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(datingDataMat[:, 0], datingDataMat[:, 1],
           15.0 * np.array(datingLabels), 15.0 * np.array(datingLabels))
plt.show()

# normalization
normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
print(normMat)
print(ranges)
print(minVals)

# test error rate
kNN.datingClassTest()
예제 #41
0
        if node.right_child != None:
            self.search(item, node.right_child, nodeList, k)

        return


if __name__ == '__main__':
    dataList = np.array([[2, 4], [5, 1], [3, 6], [7, 3], [6, 4.3], [2, 1],
                         [1, 7]])
    labelList = np.array([0, 0, 0, 0, 1, 1, 1])

    kdRoot = kdTree_heap(dataList, labelList)
    print(kdRoot.transfer_dict(kdRoot.root))
    print(kdRoot.length)
    label, nodeList = kdRoot.knn_algo([6, 3.8], k=3)
    print(nodeList)

    datingDataMat, datingLabels = file2matrix('./data/datingTestSet.txt')
    from sklearn import preprocessing
    datingDataMat = preprocessing.MinMaxScaler().fit_transform(datingDataMat)
    numTest = int(0.1 * datingDataMat.shape[0])
    kdRoot = kdTree_heap(datingDataMat[numTest:], datingLabels[numTest:])
    errorCount = 0
    for i in range(numTest):
        classifierResult, nodeList = kdRoot.knn_algo(datingDataMat[i], k=3)
        print('the classifier came back with : {}, the real answer is : {}'.
              format(classifierResult, datingLabels[i]))
        if classifierResult != datingLabels[i]:
            errorCount += 1.0
    print(errorCount)
예제 #42
0
import kNN
import matplotlib
import matplotlib.pyplot as plt
from numpy import *
from pylab import *

data, label = kNN.file2matrix('datingTestSet2.txt')
fig = plt.figure(1)
ax = fig.add_subplot(211)
ax.scatter(data[:, 0], data[:, 1], 15 * array(label), 15 * array(label))
xlabel('fly km')
ylabel('play game')

#fig=plt.figure(1)
ax = fig.add_subplot(212)
ax.scatter(data[:, 1], data[:, 2], 15 * array(label), 15 * array(label))
xlabel('play game')
ylabel('consume')
plt.show()
예제 #43
0
import sys
import kNN
from pylab import *
from numpy import *
import numpy as np
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
mat,lab = kNN.file2matrix('datingTestSet2.txt')
normMat, ranges, minVals = kNN.autoNorm(mat)

def randrange(n, vmin, vmax):
    return (vmax - vmin)*np.random.rand(n) + vmin

fig = plt.figure()
ax = fig.add_subplot(111,projection='3d')
#ax.scatter(normMat[:,0], normMat[:,1], normMat[:,2], 'o', 'c')
n = 1
for c, m, zl, zh in [('r', 'o', -50, -25), ('b', '^', -30, -5)]:
    xs = randrange(n, 23, 32)
    ys = randrange(n, 0, 100)
    zs = randrange(n, zl, zh)

ClassSet=lab
colorSet = []
for label in ClassSet:
	if label is '1':
		colorSet.append('r')
	elif label is '2':
		colorSet.append('b')
	elif label is '3':
예제 #44
0
파일: main.py 프로젝트: thatwaylw/pycl
'''
@author: laiwei
'''
import kNN
from numpy import *
#group, labels = kNN.createDataSet()
#print (group, labels)
#print (kNN.classify0([0, 0], group, labels, 3))
#print (kNN.classify0([0.7, 0.8], group, labels, 3))

datingMat, datingLabels = kNN.file2matrix("datingTestSet2.txt")
#print(datingMat)
#print(datingMat[:,0])
#print(datingLabels)

import matplotlib
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
#ax.scatter(datingMat[:,1], datingMat[:,2])
ax.scatter(datingMat[:, 0], datingMat[:, 1], 15.0 * array(datingLabels),
           15.0 * array(datingLabels))
plt.show()

kNN.datingClassTest()
# kNN.handwritingClassTest()
예제 #45
0
import sys
import kNN
from numpy import *
import matplotlib
import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111)
mat, lab = kNN.file2matrix("datingTestSet2.txt")
ax.scatter(mat[:, 1], mat[:, 2], 15.0 * array(map(int, lab)), 15.0 * array(map(int, lab)))
plt.show()
예제 #46
0
# encoding: utf-8
from numpy import *
import kNN
import matplotlib
import matplotlib.pyplot as plt

fig = plt.figure()

ax = fig.add_subplot(111)
datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt')

## 由于每个指标的范围不一致, 我们这里需要进行归一化特征值
normMat, ranges, minVals = kNN.autoNorm(datingDataMat)

ax.scatter(normMat[:, 1], normMat[:,2])

# 添加坐标轴的labels
plt.xlabel('Percentage of Time Spent Playing Video Games')
plt.ylabel('Liters of Ice Cream Consumed Per Week')

plt.show()

예제 #47
0
파일: __init__.py 프로젝트: ccravens/ml
import kNN
import matplotlib
import matplotlib.pyplot as plt
from numpy import all
import operator
from array import array

datingDataMat,datingLabels = kNN.file2matrix("test.txt")
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(datingDataMat[:,1], datingDataMat[:,2])
plt.show()
예제 #48
0
파일: Test.py 프로젝트: CrazyRacer/python2
import kNN
import numpy
import matplotlib
import matplotlib.pyplot as plt

print kNN.classify0([0, 0], numpy.array([[1, 0], [2, 1]]), ['A', 'B'], 1)

datingDataMat, datingLabels = kNN.file2matrix('test2.txt')
# datingDataMat = numpy.zeros((3,3))
# datingDataMat[2,:] = [2,1,0]
# print datingDataMat ,datingDataMat[:,2]
print datingLabels
print 15.0*numpy.array(datingLabels)

fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2],15.0*numpy.array(datingLabels),15.0*numpy.array(datingLabels))
plt.show()

# ax.scatter([2,3,1],[3,1,2])
예제 #49
0
# -*- coding: utf-8 -*-
'''
Created on 2015年9月29日

@author: rains
'''
import matplotlib.pyplot as plt
import numpy as np
import os

import kNN

curdir='f:\\project\\python\\machine-learning-in-action/Ch02'
mat1,fab1 = kNN.file2matrix(curdir+"/datingTestSet.txt")

#查看训练集
# fig=plt.figure()
# ax = fig.add_subplot(111)
# ax.scatter(mat1[:,0],mat1[:,1],15.0*np.array(fab1),15.0*np.array(fab1))
# plt.show()

#测试归一化
mat1 = kNN.autoNorm(mat1)

#测试简单的分类器准确率
kNN.datingClassTest()

#手写数字识别
#kNN.handwritingClassTest()