Beispiel #1
0
def classifyPerson():
    strLabels = ['not at all', 'small doses', 'large doses']
    print ''

    ffMiles = float(raw_input('frequent flier miles earned per year: '))
    percentTats = float(raw_input('percentage of time spent playing video games: '))
    iceCream = float(raw_input('liters of ice cream consumed per year: '))

    datingDataMat, datingLabels = fileUtil.file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = normalizer.autoNorm(datingDataMat)

    testVec = numpy.array([ffMiles, percentTats, iceCream])
    classifierResult = kNN.classify((testVec - minVals) / ranges, normMat, datingLabels, 3)

    print ''
    print 'You will probably like this person:', strLabels[classifierResult - 1]
Beispiel #2
0
def classifyPerson():
    strLabels = ['not at all', 'small doses', 'large doses']
    print ''

    ffMiles = float(raw_input('frequent flier miles earned per year: '))
    percentTats = float(
        raw_input('percentage of time spent playing video games: '))
    iceCream = float(raw_input('liters of ice cream consumed per year: '))

    datingDataMat, datingLabels = fileUtil.file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = normalizer.autoNorm(datingDataMat)

    testVec = numpy.array([ffMiles, percentTats, iceCream])
    classifierResult = kNN.classify((testVec - minVals) / ranges, normMat,
                                    datingLabels, 3)

    print ''
    print 'You will probably like this person:', strLabels[classifierResult -
                                                           1]
Beispiel #3
0
def handwritingClassTest():
    hwLabels = []

    # load the training set
    trainingFileList = listdir('trainingDigits')
    m = len(trainingFileList)

    trainingMat = zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        # take off .txt
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])

        hwLabels.append(classNumStr)

        trainingMat[i, :] = fileUtil.img2vector('trainingDigits/%s' %
                                                fileNameStr)

    # iterate through the test set
    testFileList = listdir('testDigits')
    errorCount = 0.0

    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        # take off .txt
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])

        vectorUnderTest = fileUtil.img2vector('testDigits/%s' % fileNameStr)
        classifierResult = kNN.classify(vectorUnderTest, trainingMat, hwLabels,
                                        3)

        print "the classifier came back with: %d, the real answer is: %d" % (
            classifierResult, classNumStr)

        if (classifierResult != classNumStr):
            errorCount += 1.0

    print "\ntotal number of errors: %d" % errorCount
    print "\ntotal error rate: %f" % (errorCount / float(mTest))
Beispiel #4
0
def datingClassTest():
    # hold out for test
    hoRatio = 0.50

    # load data set from file
    datingDataMat, datingLabels = fileUtil.file2matrix('datingTestSet2.txt')

    normMat, ranges, minVals = normalizer.autoNorm(datingDataMat)

    m = normMat.shape[0]
    numTestVecs = int(m * hoRatio)

    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = kNN.classify(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3)
        print "classifier result: %d, real answer: %d" % (classifierResult, datingLabels[i])
        if (classifierResult != datingLabels[i]):
            errorCount += 1.0

    print "total count: %s, error count: %s" % (numTestVecs, errorCount)
    print "total error rate: %f" % (errorCount / float(numTestVecs))
Beispiel #5
0
def handwritingClassTest():
    hwLabels = []

    # load the training set
    trainingFileList = listdir('trainingDigits')
    m = len(trainingFileList)

    trainingMat = zeros((m,1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        # take off .txt
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])

        hwLabels.append(classNumStr)

        trainingMat[i,:] = fileUtil.img2vector('trainingDigits/%s' % fileNameStr)

    # iterate through the test set
    testFileList = listdir('testDigits')
    errorCount = 0.0

    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        # take off .txt
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])

        vectorUnderTest = fileUtil.img2vector('testDigits/%s' % fileNameStr)
        classifierResult = kNN.classify(vectorUnderTest, trainingMat, hwLabels, 3)

        print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr)

        if (classifierResult != classNumStr):
            errorCount += 1.0

    print "\ntotal number of errors: %d" % errorCount
    print "\ntotal error rate: %f" % (errorCount/float(mTest))
Beispiel #6
0
def datingClassTest():
    # hold out for test
    hoRatio = 0.50

    # load data set from file
    datingDataMat, datingLabels = fileUtil.file2matrix('datingTestSet2.txt')

    normMat, ranges, minVals = normalizer.autoNorm(datingDataMat)

    m = normMat.shape[0]
    numTestVecs = int(m * hoRatio)

    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = kNN.classify(normMat[i, :],
                                        normMat[numTestVecs:m, :],
                                        datingLabels[numTestVecs:m], 3)
        print "classifier result: %d, real answer: %d" % (classifierResult,
                                                          datingLabels[i])
        if (classifierResult != datingLabels[i]):
            errorCount += 1.0

    print "total count: %s, error count: %s" % (numTestVecs, errorCount)
    print "total error rate: %f" % (errorCount / float(numTestVecs))