def train():
    '''对knn进行训练'''

    datingDataMat, datingLables = knn.file2matrix('datingTestSet2.txt')

    normMat, rangeVals, minVals = knn.autoNorm(datingDataMat)

    print normMat
    print datingLables

    # knn.plotData(datingDataMat, datingLables)

    # 用作交叉验证集的数量百分比
    hoRatio = 0.10
    # 数据集的总数量
    m = normMat.shape[0]

    # 测试集
    numTestVecs = int(m * hoRatio)

    errorCount = 0.0

    for i in range(numTestVecs):
        classifierResult = knn.classify0(normMat[i, :],
                                         normMat[numTestVecs:m, :],
                                         datingLables[numTestVecs:m], 3)

        print '分类器返回: %d, 实际的结果是:%d' % (classifierResult, datingLables[i])

        if classifierResult != datingLables[i]:
            errorCount += 1.0

    print '错误率是: %f' % (errorCount / (float(numTestVecs)))
def predict():
    resultList = ['一点也不喜欢', '有点喜欢', '非常喜欢']
    percentTats = float(raw_input('玩游戏的时间是: '))
    ffMiles = float(raw_input('每年的飞行公里数: '))
    iceCream = float(raw_input('每年消耗的冰淇淋: '))

    datingDataMat, datingLabels = knn.file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = knn.autoNorm(datingDataMat)

    inArr = np.array([ffMiles, percentTats, iceCream])

    # 对输入数据的正规化处理
    inArrNorm = (inArr - minVals) / ranges

    classifierResult = knn.classify0(inArrNorm, normMat, datingLabels, 3)

    print '预测你可能喜欢这个人的程度:', resultList[classifierResult - 1]
def predict():
    resultList = ['一点也不喜欢', '有点喜欢', '非常喜欢']
    percentTats = float(raw_input('玩游戏的时间是: '))
    ffMiles = float(raw_input('每年的飞行公里数: '))
    iceCream = float(raw_input('每年消耗的冰淇淋: '))

    datingDataMat, datingLabels = knn.file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = knn.autoNorm(datingDataMat)

    inArr = np.array([ffMiles, percentTats, iceCream])

    # 对输入数据的正规化处理
    inArrNorm = (inArr - minVals) / ranges

    classifierResult = knn.classify0(inArrNorm, normMat, datingLabels, 3)

    print '预测你可能喜欢这个人的程度:', resultList[classifierResult-1]
def datingClassTest():
    hoRatio = 0.10
    datingDataMat, datingLabels = knn.file2matix(
        '/home/matija/Projects/personal_projects/show-me-the-code/data-science/CollectiveIntelligence/dataSets/datingTestSet1.txt'
    )
    normMat, ranges, minVals = knn.autoNorm(datingDataMat)
    m = normMat.shape[0]
    #vectors to test knn clf
    numTestVecs = int(m * hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = knn.classify0(normMat[i,:], normMat[numTestVecs:m,:]\
                                         ,datingLabels, 3)
        print "the classifier came back with: %d, the real answer is: %d"\
                    %(classifierResult, datingLabels[i])

        if (classifierResult != datingLabels[i]): errorCount += 1.0

    print "the total error rate is: %f" % (errorCount / float(numTestVecs))
def train():
    '''对knn进行训练'''

    datingDataMat, datingLables = knn.file2matrix('datingTestSet2.txt')

    normMat, rangeVals, minVals = knn.autoNorm(datingDataMat)

    print normMat
    print datingLables

    # knn.plotData(datingDataMat, datingLables)



    # 用作交叉验证集的数量百分比
    hoRatio = 0.10
    # 数据集的总数量
    m = normMat.shape[0]

    # 测试集
    numTestVecs = int(m*hoRatio)

    errorCount = 0.0

    for i in range(numTestVecs):
        classifierResult = knn.classify0(normMat[i, :],
                                         normMat[numTestVecs:m, :],
                                         datingLables[numTestVecs:m],
                                         3)

        print '分类器返回: %d, 实际的结果是:%d' % (classifierResult, datingLables[i])

        if classifierResult != datingLables[i]:
            errorCount += 1.0


    print '错误率是: %f' % (errorCount / (float(numTestVecs)))
Exemple #6
0
    datingDataMat, datingLabels = knn.file2matrix('datingTestSet.txt')
    print(datingDataMat)
    print(datingLabels)

    # 显示
    fig = plt.figure()
    ax = fig.add_subplot(111)
    # 看不到任何有用的模式信息
    ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2])
    # 标注上色彩
    ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 15.0 * array(datingLabels), 15.0 * array(datingLabels))
    plt.show()
    plt.close()

    # 归一化数据
    normMat, ranges, minVals = knn.autoNorm(datingDataMat)
    print('norm mat:')
    print(normMat)
    print('range:')
    print(ranges)
    print('norm mat:')
    print(minVals)

    # 测试分类器,使用数据集前hoRatio比例做测试集
    hoRatio = 0.10
    m = normMat.shape[0]
    numTestVecs = int(m * hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = knn.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3)
        print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[i]))
Exemple #7
0
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 21 10:38:54 2018

@author: fsxn2
"""

import knn
import matplotlib
import matplotlib.pyplot as plt
#group,labels=knn.createDataSet()
#print(knn.classify0([0,0],group,labels,3))
group, labels = knn.file2matrix("input.txt")
auto, ranges, minval = knn.autoNorm(group)
print(auto)
print(ranges)
print(minval)
#fig=plt.figure()
#ax=fig.add_subplot(111)
#ax.scatter(group[:,1],group[:2])
#plt.show()
print(knn.classify0([1, 0, 3], group, labels, 3))