Exemple #1
0
def datingClassTest():
    """
    对约会网站进行数据测试
    """

    # 从文件中加载数据
    datingDataMat, datingLabels = file2matrix(
        '../../../Data/KNN/dating/dating.txt')
    draw(datingDataMat, datingLabels)

    # 归一化数据
    normMat, ranges, minVals = autoNorm0(datingDataMat)
    # m 表示数据的行数,即矩阵的第一维
    m = normMat.shape[0]

    # 测试数据集范围,训练数据集比例=1-hoRatio
    hoRatio = 0.1
    numTestVecs = int(m * hoRatio)
    print('numTestVecs=', numTestVecs)
    errorCount = 0.0
    for i in range(numTestVecs):
        # 对数据测试
        classifierResult = classify_knn.classify0(normMat[i, :],
                                                  normMat[numTestVecs:m, :],
                                                  datingLabels[numTestVecs:m],
                                                  3)
        print("the classifier came back with: %d, the real answer is: %d" %
              (classifierResult, datingLabels[i]))
        if (classifierResult != datingLabels[i]): errorCount += 1.0
    print("the total error rate is: %f" % (errorCount / float(numTestVecs)))
    print(errorCount)
Exemple #2
0
def handwritingClassTest():
    # 导入训练数据
    hwLabels = []
    trainingFileList = listdir('../../../Data/KNN/handwriting/trainingDigits')
    m = len(trainingFileList)
    trainingMat = np.zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        # 将 32*32的矩阵->1*1024的矩阵
        trainingMat[i, :] = img2vector(
            '../../../Data/KNN/handwriting/trainingDigits/%s' % fileNameStr)

    # 导入测试数据
    testFileList = listdir('../../../Data/KNN/handwriting/testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector(
            '../../../Data/KNN/handwriting/testDigits/%s' % fileNameStr)
        classifierResult = classify_knn.classify0(vectorUnderTest, trainingMat,
                                                  hwLabels, 3)
        print("the classifier came back with: %d, the real answer is: %d" %
              (classifierResult, classNumStr))
        if (classifierResult != classNumStr): errorCount += 1.0
    print("\nthe total number of errors is: %d" % errorCount)
    print("\nthe total error rate is: %f" % (errorCount / float(mTest)))
Exemple #3
0
def testMovies():
    """
    给定一个新坐标,判断是哪一类数据
    """
    group, labels = createDataSet()
    print(str(group))
    print(str(labels))
    print(classify_knn.classify0([18, 90], group, labels, 3))
    print(classify_knn.classify1([18, 90], group, labels, 3))
Exemple #4
0
def testPoints():
    """
    给定一个新坐标,判断是哪一类数据
    """
    group, labels = createDataSet()
    print(str(group))
    print(str(labels))
    print(classify_knn.classify0([0.1, 0.1], group, labels, 3))
    print(classify_knn.classify1([0.1, 0.1], group, labels, 3))
Exemple #5
0
def classifyPerson():
    resultList = ['not at all', 'in small doses', 'in large doses']
    percentTats = 10
    ffMiles = 10000
    iceCream = 0.5
    datingDataMat, datingLabels = file2matrix(
        '../../../Data/KNN/dating/dating.txt')
    normMat, ranges, minVals = autoNorm0(datingDataMat)
    inArr = np.array([ffMiles, percentTats, iceCream])
    classifierResult = classify_knn.classify0((inArr - minVals) / ranges,
                                              normMat, datingLabels, 3)
    print("You will probably like this person: ",
          resultList[classifierResult - 1])