예제 #1
0
def main2():
    '''
    测试分类器
    '''
    group,labels = kNN.createDataSet()    
    result = kNN.classify0([0,0], group, labels, 3)
    print result
예제 #2
0
def handwritingClassTest():
    hwLabels = []
    trainingFileList = listdir('trainingDigits')
    m = len(trainingFileList)
    trainingMat = zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr)
    testFileList = listdir('testDigits')
    errorcount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)
        classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)
        print 'the classifier came back with:%d,the real answer is :%d' % (classifierResult, classNumStr)
        if (classifierResult != classNumStr): errorcount += 1.0

    print "\nThe local number of errors is :%d" % errorcount
    print "\nThe total error rate is: %f " % (errorcount/float(mTest))
예제 #3
0
def handwritingClassTest(k=3):
    hwLabels = []
    trainingFileList = os.listdir('trainingDigits')
    m = len(trainingFileList)
    trainingMat = np.zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2Vector('trainingDigits/%s' % fileNameStr)

    testFileList = os.listdir('testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2Vector('testDigits/%s' % fileNameStr)
        classifierResult = kNN.classify0(vectorUnderTest, trainingMat,
                                         hwLabels, k)
        # print('the classifier came back with: %d, the real answer is: %d' % (classifierResult, classNumStr))
        if classifierResult != classNumStr:
            errorCount += 1.0

    # print('the total number of error is: %d' % errorCount)
    print('k = %d, and the total error rate is: %f' %
          (k, errorCount / float(mTest)))
    return errorCount / float(mTest)
예제 #4
0
def handWritingClassTest():
    hwLables = []
    #get the fileName list in the directory
    trainingFileList = listdir('digits/trainingDigits')
    trainingDatalength = len(trainingFileList)
    #set the traning martrix
    trainingMat = np.zeros((trainingDatalength,1024))
    print trainingDatalength
    for i in range(trainingDatalength):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split(".")[0]
        # print (int)(fileStr.split('_')[0])
        classNumStr = int(fileStr.split('_')[0])
        print "%d:%d"%(i,classNumStr)
        hwLables.append(classNumStr)

        trainingMat[i,:] = img2Vector("digits/trainingDigits/%s"%fileNameStr)
    print('hwLabels')
    print hwLables
    testFileList = listdir('digits/testDigits')
    testDataLength = len(testFileList)
    errorCount = 0.0
    for i in range(testDataLength):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split(".")[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2Vector("digits/testDigits/%s"%fileNameStr)
        classfiyResult = kNN.classify0(vectorUnderTest , trainingMat , hwLables , 3)
        print "the clasifier came back with : %d ,the real answer is %d" %(classfiyResult,classNumStr)
        if(classNumStr != classfiyResult): errorCount += 1.0
    print 'the total error count is %d\n the error rate is %f' %(errorCount,errorCount/(float)(testDataLength))
예제 #5
0
def datingClassTest():
    """
    约会网站测试

    :return:
    """
    # 设置测试数据比例
    hoRatio = 0.1

    # 从文件中加载数据
    datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt')

    # 归一化数据
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)

    # m = 数据的行数 即第一维矩阵
    m = normMat.shape[0]

    # 设置测试的样本数量
    numTestVecs = int(m * hoRatio)
    print('numTestVecs = ', numTestVecs)

    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = kNN.classify0(normMat[i, :],
                                         normMat[numTestVecs:m, :],
                                         datingLabels[numTestVecs:m], 3)
        print('The classifier came back with %d, the real answer is: %d' %
              (classifierResult, datingLabels[i]))
        if classifierResult != datingLabels[i]:
            errorCount += 1.0
    print('The total error rate is %f' % (errorCount / float(numTestVecs)))
    print(errorCount)
예제 #6
0
def classifyImg(trainingdir,testdir):
    trainingfiles=listdir(trainingdir)
    filecount=len(trainingfiles)
    trainMatrixData=zeros((filecount,1024))
    label = []
    for i in range(filecount):
        fields = trainingfiles[i].split('_');
        num = fields[0]
        label.append(num)
        filepath=trainingdir + '/' + trainingfiles[i]
        trainMatrixData[i,:] = kNN.img2matrix(filepath)
    testfiles=listdir(testdir)    
    filecount =  len(testfiles)
    errcount = 0.0
    for i in range(filecount):
        fields = testfiles[i].split('_');
        num = fields[0]
        filepath=testdir + '/' + testfiles[i]
        testMatrix = kNN.img2matrix(filepath)
        retval = kNN.classify0(testMatrix, trainMatrixData,label,3)
        if (retval != num):
            errcount += 1
            print "error"
            print "retval:%s,num:%s"%(retval,num)
    print "rate:%f"%(errcount/float(filecount))
예제 #7
0
def handwritingClassTest():
    hwLabels = []
    trainingFileList = listdir('trainingDigits')  # 获取目录文件
    m = len(trainingFileList)
    trainingMat = zeros(m, 1024)

    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])  # 从文件名解析分类数字
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr)

    testFileList = listdir('testDigits')  # 获取测试目录文件
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)
        classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)
        print '分类结果是:%d, 真是结果是:%d' % (classifierResult, classNumStr)

        if (classifierResult != classNumStr):
            errorCount += 1.0
    print '分类错误的总数是:%d' % errorCount
    print '总的错误率:%f' % (errorCount / float(mTest))
예제 #8
0
def handwritting_test():
    hw_labels = []
    training_file_list = os.listdir(TRAINING_DATA_DIR)
    m = len(training_file_list)
    training_mat = np.zeros((m, 1024))
    #处理训练数据
    for i in range(m):
        filename = training_file_list[i]
        file = filename.split('.')[0]
        num_class = int(file.split('_')[0])
        hw_labels.append(num_class)
        training_mat[i, :] = img2vector(TRAINING_DATA_DIR + '/' + filename)
    #处理测试数据
    test_file_list = os.listdir(TEST_DATA_DIR)
    error_count = 0
    m_test = len(test_file_list)
    #对每个测试集中的图像进行分类
    for i in range(m_test):
        filename = test_file_list[i]
        file = filename.split('.')[0]
        test_num_class = int(file.split('_')[0])
        test_image_vec = img2vector(TEST_DATA_DIR + '/' + filename)
        classifier_res = classify0(test_image_vec, training_mat, hw_labels, K)
        print("the classifier came back with: %d,the real answer is: %d" %
              (classifier_res, test_num_class))
        if (classifier_res != test_num_class):
            error_count += 1.0
    print("\nthe total number of errors is: %d" % error_count)
    print("\nthe total rate is:%f" % (error_count / float(m_test)))
예제 #9
0
def datingClassTest():
    hoRatio = 0.50  #hold out 10%
    datingDataMat, datingLabels = kNN.file2matrix(
        'datingTestSet.txt')  #load data setfrom file
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int(m * hoRatio)  # 50% train set,50% test set
    errorCount = 0.0
    errorAns = m
    for k in range(20):
        errorCount = 0
        for i in range(numTestVecs):
            classifierResult = kNN.classify0(normMat[i, :],
                                             normMat[numTestVecs:m, :],
                                             datingLabels[numTestVecs:m],
                                             k + 1)
            # print "the classifier came back with: %s, the real answer is: %s" % (classifierResult, datingLabels[i])
            if (classifierResult != datingLabels[i]): errorCount += 1.0
        print "when k is %d the total error rate is: %f" % (k + 1, (
            errorCount / float(numTestVecs)))
        print errorCount
        if errorCount <= errorAns:
            errorAns = errorCount
            ans = k + 1
    print "the best k is", ans
예제 #10
0
    def hand_write_class_test(self):
        hand_write_labels = []
        train_file_list = listdir(
            "/home/zhangzhiliang/Documents/my_git/DATA-SCIENTIST-/machine_learing_algorithm/"
            "machine_learning_in_action/2_KNN/digits/trainingDigits")
        train_num_of_digits = len(train_file_list)
        train_data = np.zeros((train_num_of_digits, 1024))
        for i in range(train_num_of_digits):
            file_name_str = train_file_list[i]
            file_str = file_name_str.split('.')[0]
            class_digits_str = int(file_str.split('_')[0])
            hand_write_labels.append(class_digits_str)
            train_data[i, :] = self.img_to_vector(
                'train_Digits {}'.format(file_name_str))

        test_file_list = listdir(
            "/home/zhangzhiliang/Documents/my_git/DATA-SCIENTIST-/machine_learing_algorithm/"
            "machine_learning_in_action/2_KNN/digits/testDigits")
        error_count = 0.0
        test_num_of_digits = len(test_file_list)
        for i in range(test_num_of_digits):
            file_name_str = test_file_list[i]
            file_str = file_name_str.split('.')[0]
            class_digits_str = int(file_str.split('_')[0])
            test_data = self.img_to_vector(
                'test_Digits {}'.format(file_name_str))
            result = kNN.classify0(test_data, train_data, hand_write_labels, 3)
            print(
                "the classifier came back with: {:d}, the real answer is: {:d}"
                .format(result, class_digits_str))
            if (result != class_digits_str):
                error_count += 1.0
        print("the total number of errors is {:d}".format(error_count))
        print("the total number of error rate is : {:f}".format(
            error_count / float(test_num_of_digits)))
def handwritingClassTest():
    hwLabels = []
    trainingFileList = os.listdir('trainingDigits')
    m = len(trainingFileList)
    trainingMat = np.zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat = img2vector('trainingDigits/' + fileNameStr)
    testFileList = os.listdir('testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(m):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector('testDigits/' + fileNameStr)
        classifierResult = kNN.classify0(vectorUnderTest, trainingMat,
                                         hwLabels, 3)
        print(
            'the classifier came back with:{0},the real answer is:{1}'.format(
                classifierResult, classNumStr))
        if (classifierResult != classNumStr): errorCount += 1.0
    print('the total number of errors is:{}'.format(errorCount))
    print('the total erroe rate is:{:.2f}'.format(errorCount / float(mTest)))
예제 #12
0
def hand_writing_class_test():
    hw_labels = []
    training_file_list = listdir('trainingDigits')
    m = len(training_file_list)
    training_mat = zeros((m, 1024))
    for i in range(m):
        file_name_str = training_file_list[i]
        file_str = file_name_str.split('.')[0]
        class_num_str = int(file_str.split('_')[0])
        hw_labels.append(class_num_str)
        training_mat[i, :] = img2vector('trainingDigits/' + file_name_str)
    test_file_list = listdir('testDigits')
    error_count = 0.0
    m_test = len(test_file_list)
    for i in range(m_test):
        file_name_str = test_file_list[i]
        file_str = file_name_str.split('.')[0]
        class_num_str = int(file_str.split('_')[0])
        vector_under_test = img2vector('testDigits/' + file_name_str)
        classifier_result = classify0(vector_under_test, training_mat, hw_labels, 3)
        print 'the classifier came back with: %d, the real answer is: %d,' % (classifier_result, class_num_str)
        if classifier_result != class_num_str:
            error_count += 1
    print "the total number of errors is: %d" % error_count
    print "the total error rate is: %f" % (error_count/float(m_test))
예제 #13
0
def handwriting_test_class():
    hw_labels = []
    training_filter_list = os.listdir(digits_dir_0)
    m = len(training_filter_list)
    training_mat = numpy.zeros((m, 1024)) # 空间存储

    for i in range(m):
        filename = training_filter_list[i]
        class_num = get_class_num_from_filename(filename)
        hw_labels.append(class_num)
        training_mat[i, :] = img2vector(os.path.join(digits_dir_0, filename))
    test_file_list = os.listdir(digits_dir_1)
    error_count = 0.0

    m_test = len(test_file_list)

    for i in range(m_test):
        filename = test_file_list[i]
        class_num = get_class_num_from_filename(filename)
        vector_under_test = img2vector(os.path.join(digits_dir_1, filename))
        classifier_result = classify0(vector_under_test,
                                      training_mat,
                                      hw_labels,
                                      3)
        print("the classifier came back with: %d, the real answer is: %d" % classifier_result, class_num)
        if classifier_result != class_num:
            error_count += 1.0
    print("\nthe total number of errors is: %d" % error_count)
    print("\nthe total error rate of is: %f" % (error_count/float(m_test)))
예제 #14
0
def handwriting_test_class():
    hw_labels = []
    training_filter_list = os.listdir(digits_dir_0)
    m = len(training_filter_list)
    training_mat = numpy.zeros((m, 1024))  # 空间存储

    for i in range(m):
        filename = training_filter_list[i]
        class_num = get_class_num_from_filename(filename)
        hw_labels.append(class_num)
        training_mat[i, :] = img2vector(os.path.join(digits_dir_0, filename))
    test_file_list = os.listdir(digits_dir_1)
    error_count = 0.0

    m_test = len(test_file_list)

    for i in range(m_test):
        filename = test_file_list[i]
        class_num = get_class_num_from_filename(filename)
        vector_under_test = img2vector(os.path.join(digits_dir_1, filename))
        classifier_result = classify0(vector_under_test, training_mat,
                                      hw_labels, 3)
        print(
            "the classifier came back with: %d, the real answer is: %d" %
            classifier_result, class_num)
        if classifier_result != class_num:
            error_count += 1.0
    print("\nthe total number of errors is: %d" % error_count)
    print("\nthe total error rate of is: %f" % (error_count / float(m_test)))
def hwClassifyTest():
    hwlabels = []
    #listdir is an os function, get a list of filename in the dir 
    trainfilelist = listdir('trainingDigits')
    m = len(trainfilelist)
    traindata = zeros((m,1024))
    for i in range(m):
        filenamestr = trainfilelist[i]
        #0_12.txt filestr=0_12; numclass=0;split by '.' and '_'
        filestr = filenamestr.split('.')[0]
        numclass = int(filestr.split('_')[0])
        hwlabels.append(numclass)
        traindata[i,:] = img2vector('trainingDigits/%s' % filenamestr)
    testfilelist = listdir('testDigits')
    errorcount = 0
    mtest = len(testfilelist)
    for i in range(10):#change the data of range(mtest)
        filenamestr = testfilelist[i]
        filestr = filenamestr.split('.')[0]
        numclass = int(filestr.split('_')[0])
        testvect = img2vector('testDigits/%s' % filenamestr)
        result = kNN.classify0(testvect, traindata, hwlabels, 3)
        print "handwriting classifier result: %d, the resl answer is: %d"\
              % (result, numclass)  #CARE the (..)
        if (result != numclass): errorcount += 1.0
    print "\nthe error rate is:%f " % (errorcount/float(mtest))#CARE the (..)
def handwriteringClassTest():
    hwLabels = []
    trainingFileList = os.listdir('digits/trainingDigits')  #获取目录内容
    m = len(trainingFileList)
    trainingMat = np.zeros([m, 1024])
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = imgtxt2vector('digits/trainingDigits/' +
                                          fileNameStr)
    testFileList = os.listdir('digits/testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = imgtxt2vector('digits/testDigits/' + fileNameStr)
        classifierResult = kNN.classify0(vectorUnderTest, trainingMat,
                                         hwLabels, 3)
        print('the predicted answer is %d,the true answer is %d.' %
              (classifierResult, classNumStr))
        if classifierResult != classNumStr:
            errorCount += 1
    print('the total error rate is %f.' % (errorCount / mTest))
예제 #17
0
def hand_writing_class_test():
    hwlabels = []
    training_file_list = listdir('trainingDigits')
    m = len(training_file_list)
    training_mat = zeros((m, 1024))
    for i in range(m):
        file_name_str = training_file_list[i]
        file_str = file_name_str.split('.')[0]
        class_num_str = int(file_str.split('_')[0])
        hwlabels.append(class_num_str)
        training_mat[i, :] = img_to_vector('trainingDigits/%s' % file_name_str)
    test_file_list = listdir('testDigits')
    error_count = 0.0
    m_test = len(test_file_list)
    for i in range(m_test):
        file_name_str = test_file_list[i]
        file_str = file_name_str.split('.')[0]
        class_num_str = int(file_str.split('_')[0])
        vector_under_test = img_to_vector('testDigits/%s' % file_name_str)
        classifier_result = kNN.classify0(vector_under_test, training_mat,
                                          hwlabels, 3)
        print("the classifier came back with: %d , the real answer is:%d" %
              (classifier_result, class_num_str))
        if (classifier_result != class_num_str):
            error_count += 1.0
    print("\nthe total number of error is :%d" % error_count)
    print("\nthe total error rate is: %f" % (error_count / float(m_test)))
def handwritingClassTest():
	hwLabels = []
	trainingFileList = listdir('/home/jimlee/Documents/Git/kNN/HandwritingRecognitionSystem/trainingDigits')
	m = len(trainingFileList)
	trainingMat = zeros((m,1024))
	for i in range(m):
		fileNameStr = trainingFileList[i]
		fileStr = fileNameStr.split('.')[0]
		classNumStr = int(fileStr.split('_')[0])
		hwLabels.append(classNumStr)
		trainingMat[i,:] = img2verctor('/home/jimlee/Documents/Git/kNN/HandwritingRecognitionSystem/trainingDigits/%s' % fileNameStr)
	testFileList = listdir('/home/jimlee/Documents/Git/kNN/HandwritingRecognitionSystem/testDigits')
	errorCount = 0.0
	m_test = len(testFileList)
	for i in range(m_test):
		fileNameStr = testFileList[i]
		fileStr = fileNameStr.split('.')[0]
		classNumStr = int(fileStr.split('_')[0])
		vectorUnderTest = img2verctor('/home/jimlee/Documents/Git/kNN/HandwritingRecognitionSystem/testDigits/%s' % fileNameStr)
		classifierResult = kNN.classify0(vectorUnderTest,trainingMat,hwLabels,3)
		print "the classifier came back with: %d, the real answer is: %d" %(classifierResult,classNumStr)
		if (classifierResult != classNumStr):
			errorCount += 1.0
	print "\nthe total number of errors is: %d" % errorCount
	print "\nthe total error rate is: %f" % (errorCount/float(m_test))
예제 #19
0
def handwritingClassTest():
    hwLabels = []
    # 获取文件目录内容
    trainingFileList = os.listdir('trainingDigits')
    m = len(trainingFileList)
    trainingMat = np.zeros((m, 1024))
    for i in range(m):
        # 从文件中解析分类数字
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split(".")[0]
        classNumStr = int(fileStr.split('_')[0])

        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr)
    testFileList = os.listdir('testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split(".")[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)
        classifierResult = kNN.classify0(vectorUnderTest, trainingMat,
                                         hwLabels, 3)
        print("the classifier came back with: %d, the real answer is: %d" %
              (classifierResult, classNumStr))
        if (classifierResult != classNumStr):
            errorCount += 1
    print("\nthe total number of errors is: %d" % errorCount)
    print("\nthe total error rate is: %f" % (errorCount / float(mTest)))
예제 #20
0
	def testKNN2(self):
		dataSet, labels = file2matrix('datingTestSet.txt')
		normDataSet, ranges, minVals = kNN.autoNorm(dataSet)
		testInput = numpy.array([51052, 4.680098, 0.625224])
		testInput = (testInput-minVals) / ranges
		c = kNN.classify0(testInput, normDataSet, labels, 3)
		self.assertEqual(c, 1)
예제 #21
0
def handwritingClassTest():
    hwLabels = []
    trainingFileList = listdir('trainingDigits')
    m = len(trainingFileList)
    trainingMat = zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr)
    testFileList = listdir('testDigits')
    errorcount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)
        classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)
        print 'the classifier came back with:%d,the real answer is :%d' % (
            classifierResult, classNumStr)
        if (classifierResult != classNumStr): errorcount += 1.0

    print "\nThe local number of errors is :%d" % errorcount
    print "\nThe total error rate is: %f " % (errorcount / float(mTest))
예제 #22
0
def handwritingClassTest():
    hwLabels = []
    trainingFileList = os.listdir(
        'resources/trainingDigits')  #load the training set
    m = len(trainingFileList)
    trainingMat = zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]  #take off .txt
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vector('resources/trainingDigits/%s' %
                                       fileNameStr)
    testFileList = os.listdir(
        'resources/testDigits')  #iterate through the test set
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]  #take off .txt
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector('resources/testDigits/%s' % fileNameStr)
        classifierResult = kNN.classify0(vectorUnderTest, trainingMat,
                                         hwLabels, 3)
        print("the classifier came back with: %d, the real answer is: %d" %
              (classifierResult, classNumStr))
        if (classifierResult != classNumStr): errorCount += 1.0
    print("\nthe total number of errors is: %d" % errorCount)
    print("\nthe total error rate is: %f" % (errorCount / float(mTest)))
예제 #23
0
def handwritingClassTest():
    hwLabels = []
    trainingFileList = listdir('digits/trainingDigits')
    m = len(trainingFileList)
    trainingMat = zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vector('digits/trainingDigits/%s' %
                                       fileNameStr)
    testFileList = listdir('digits/txt')  # testDigits
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        if fileNameStr != '.DS_Store':
            # fileStr = fileNameStr.split('.')[0]
            classNumStr = int(fileStr.split('_')[0])
            vectorUnderTest = img2vector('digits/txt/%s' %
                                         fileNameStr)  # testDigits
            classifierResult = kNN.classify0(vectorUnderTest, trainingMat,
                                             hwLabels, 3)
            print('the classifier came back with: %d, the real answer is: %d' %
                  (classifierResult, classNumStr))
            if classifierResult != classNumStr:
                errorCount += 1.0
    print '\n the total number of errors is: %d' % errorCount
    print '\n the total error rate is: %f' % (errorCount / float(mTest))
예제 #24
0
def handwritingClassTest():
    hwLabels = []
    trainingFileList = listdir("trainingDigits")
    m = len(trainingFileList)
    trainingMat = zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vector("trainingDigits/%s" % fileNameStr)
        testFileList = listdir("testDigits")
        errorCount = 0.0
        mTest = len(testFileList)

    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector("testDigits/%s" % fileNameStr)
        classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)
        print("came back with %d, real is %d" %
              (classifierResult, classNumStr))
        if (classifierResult != classNumStr):
            errorCount += 1.0
            print("came back with %d, real is %d" %
                  (classifierResult, classNumStr),
                  end="  ")
            print("file name : %s " % fileNameStr)

    print("\n number of error : %d" % errorCount)
    print("\n error rate : %f " % (errorCount / float(mTest)))
    print("\n acc rate : %f" % (1.0 - (errorCount / float(mTest))))
예제 #25
0
def handWritingClassTest():
    hwLabels = []
    trainingFileList = listdir('trainingDigits')
    m = len(trainingFileList)
    trainingMat = zeros(shape=(m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[
            0]  # take off the '.txt' from the filename
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr)
    testFileList = listdir('testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector('trainingDigits/%s' % fileNameStr)
        classifierResult = kNN.classify0(vectorUnderTest, trainingMat,
                                         hwLabels, 3)
        print(
            f"The classifier came back with: {classifierResult}, the real answer is {classNumStr}."
        )
        if (classifierResult != classNumStr): errorCount += 1
    print(f"\nThe total number of errors is: {errorCount}.")
    print(f"\nThe total error rate is: {errorCount / float(mTest)}.")
예제 #26
0
def handwritingClassTest():
    hwLabels = []
    trainingFileList = listdir('digits/trainingDigits')
    m = len(trainingFileList)
    trainingMat = np.zeros([m, 1024])
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vector('digits/trainingDigits/' + fileNameStr)
    testFileList = listdir('digits/testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector('digits/testDigits/' + fileNameStr)
        classifierResult = kNN.classify0(vectorUnderTest, trainingMat,
                                         hwLabels, 3)
        print('the classifier came back with: ' + str(classifierResult) +
              ', the real answer is: ' + str(classNumStr))
        if classifierResult != classNumStr:
            errorCount += 1.0
    print('\nthe total number of error is: ' + str(errorCount) +
          '\nthe total error rate is: ' + str(errorCount / float(mTest)))
def handwritingClassTest(height, width):
	'''
	the height and width of the image
	'''
	hwLabels = []
	trainingFileList = listdir('trainingDigits')
	m = len(trainingFileList)
	trainingMat = zeros((m, int(height * width)))
	for i in range(m):
		fileNameStr = trainingFileList[i]
		fileStr = fileNameStr.split('.')[0]
		classNameStr = int(fileStr.split('_')[0])
		hwLabels.append(classNameStr)
		trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr, height, width)

	testFileList = listdir('testDigits')
	errorCount = 0.0
	mTest = len(testFileList)
	for i in range(mTest):
		fileNameStr = testFileList[i]
		fileStr = fileNameStr.split('.')[0]
		'''
		the file name consists of 'class name'_'...'
		'''
		classNumStr  = int(fileStr.split('_')[0])
		vecUnderTest = img2vector('testDigits/%s' % fileNameStr, height, width)
		classifierResult = kNN.classify0(vecUnderTest, trainingMat, hwLabels, 5)
		print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr)
		if(classifierResult != classNumStr):
			errorCount += 1

	print "\nthe total number of errors is: %d" % errorCount
	print "\nthe total error rate is: %f" % (errorCount/float(mTest))	
def calcuteError(train_X, train_y, test_X, test_y):
    testSize = len(test_X)
    errorCount = 0.0
    for i in range(testSize):
        preLabel = kNN.classify0(test_X[i], train_X, train_y, 3)
        if preLabel != test_y[i]:
            errorCount += 1
    print "error rate: %f" % (errorCount / testSize)
예제 #29
0
def classify0Test():
    """
    测试第一个分类器

    :return:
    """
    group, labels = kNN.createDataSet()
    print('group:', group)
    print('labels:', labels)
    print('result:', kNN.classify0([0.1, 0.1], group, labels, 3))
예제 #30
0
def calcErrorRate(features, lables, holdOutRatio):
    testCount = int(features.shape[0]*holdOutRatio)
    errorCount = 0.0
    for i in range(testCount):
        predictedLabel = kNN.classify0(features[i, :], features, labels, 3)
        realLabel = labels[i]
        print "predicted : " + predictedLabel + "\t\treal : " + realLabel
        if predictedLabel != realLabel :
            errorCount += 1.0
    return errorCount/testCount
예제 #31
0
def classifyPerson():
    resultList = ['not at all', 'in small doses', 'in large doses']
    percentTats = float(input("percentage of time spent playing video games?"))
    ffMiles = float(input("frequent flier miles earned per year?"))
    iceCream = float(input("liters of ice cream consumed per year?"))
    datingDataMat,classLabelVector = txtfile2matrix.file2matrix("C:/Users/jasper/iCloudDrive/newbie_programming/python/ml/chapter2/datingTestSet.txt",3)
    datingLabels = txtfile2matrix.char2int(classLabelVector)
    normMat, ranges, minVals = normalization.autoNorm(datingDataMat)
    inArr = np.array([ffMiles, percentTats, iceCream,],dtype=float)
    classifierResult = kNN.classify0((inArr - minVals)/ranges, normMat, datingLabels, 3)
    print("You will probably like this person: %s" % (resultList[classifierResult - 1]))
예제 #32
0
def classifyPerson():
    resultList = ['not at all', 'in small doses', 'in large doses']
    percentTats = float(input("percent of time spent playing video games?"))
    ffMiles = float(input("frequent flier miles earned per year?"))
    iceCream = float(input("liters of ice Cream consumed per year?"))
    datingDataMat, datingLabels = file2matrix('datingTestSet2.txt')  # 数据加载
    normDataSet, ranges, minVals = autoNorm(datingDataMat)  # 归一化
    inArr = np.array([ffMiles, percentTats, iceCream])
    classifierResult = classify0((inArr - minVals) / ranges, normDataSet,
                                 datingLabels, 3)
    print("You will probably like this person:",
          resultList[classifierResult - 1])
예제 #33
0
def classifyPerson():
    resultList = ['not at all', 'in small doses', 'in large doses']
    percentTats = float(input("percentage of time spent playing video games?"))
    ffMiles = float(input("frequent filter miles earned per year"))
    iceCream = float(input("liters of ice cream consumed per year"))
    datingDataMat, datingLabels = kNN.file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    inArr = numpy.array([ffMiles, percentTats, iceCream])
    classifierResult = kNN.classify0((inArr - minVals) / ranges, normMat,
                                     datingDataMat, 3)
    print("you will probably like this person: ".resultList[classifierResult -
                                                            1])
예제 #34
0
def classifyPerson():
    resultlist=['not at all','in small doss','in large does']
    percentTats=float(raw_input("percentage of time spent playing video game?"))
    ffMiles=float(raw_input("frequent filer miles earned per year?"))
    icecream=float(raw_input("liters of ice cream consumed per year?"))
    datingDataMat,datingLabel = kNN.file2matrix('datingTestSet2.txt')
    normat,rangeval,minval=kNN.autonorm(datingDataMat)
    print "normat:%s" %(normat)
    inX=array([ffMiles,percentTats,icecream]) 
    retVal=kNN.classify0((inX - minval)/rangeval,normat,datingLabel,3)
    print "retval[%d]" %(retVal)
    print "resutl:%s " %(resultlist[retVal])
예제 #35
0
def classifyperson():
    result = ['not at all', 'small doses', 'large dose']

    ffmiles = float(input('frequent filter miles earned per year:'))
    gametimepercent = float(input('% of time spent on game:'))
    icecream = float(input('liters of ice cream consumed per year:'))
    datamat, labels = kNN.file2matrix('datingTestSet.txt')
    normmat, ranges, minvals = kNN.autoNorm(datamat)
    inarry = (array([ffmiles, gametimepercent, icecream]) - minvals) / ranges
    classifyresult = kNN.classify0(inarry, normmat, labels, 3)
    print("you like this person:", result[classifyresult - 1])
    return
예제 #36
0
def classifyPerson():
    resultList = ['not at all', 'in small doses', 'in large doses']
    percentTats = float(input('percentage of time spent playing video games?'))
    ffMiles = float(input('frequent flier miles earned per year?'))
    iceCream = float(input('liters of ice cream consumed per year?'))

    datingDataMat, datingLabels = file2matrix('datingTestSet2.txt')
    normData, ranges, minVals = autoNorm(datingDataMat)
    inArr = array([ffMiles, percentTats, iceCream])
    classifierResult = kNN.classify0(inArr, normData, datingLabels, 3)
    print('You will probably like this person: ',
          resultList[classifierResult - 1])
def classifyPerson():
    resultlist = ['not at all','in small doses','in large doses']
    games = float(raw_input(
        "percentage of time spent playing video games?"))
    flymiles = float(raw_input(
        "frequent flier miles earned per year?"))
    icecream = float(raw_input(
        "liters of ice cream consumed per year?"))
    datingdata, datinglabel = kNN.file2matrix('datingTestSet2.txt')
    normdata, ranges, minv = kNN.autoNorm(datingdata)
    inarr = array([flymiles, games, icecream])
    result = kNN.classify0((inarr - minv)/ranges, normdata, datinglabel, 3)
    print "you will probably like this person:", resultlist[result-1]
예제 #38
0
def datingClassTest():
    hoRatio = 0.550
    datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt')
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int(m * hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = kNN.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3)
        print 'the classifier came back with: %s, the real answer is: %s' % (classifierResult, datingLabels[i])
        if (classifierResult != datingLabels[i]):
            errorCount += 1.0
    print "the total error rate is: %f" % (errorCount / float(numTestVecs))
예제 #39
0
def showDatingInput():
    # 输入测试数据
    resultList = ['not at all', 'in small doses', 'in large doses']
    mPercentGame = float(
        raw_input('the percentange of time spent playing vedio games:'))
    mPercentMiles = float(raw_input('the miles earned every year:'))
    mpercentIce = float(raw_input('the ice cream consumed per year:'))
    testArray = [mPercentMiles, mPercentGame, mpercentIce]

    mat, labels = kNN.file2matrix('datingTestSet2.txt')
    normMat, mRange, mMin = kNN.autoNum(mat)
    ansType = kNN.classify0((testArray - mMin) / mRange, normMat, labels, 5)
    print 'This guy is mostly', resultList[int(ansType) - 1]
예제 #40
0
def dating_class_test():
    dating_data_mat, dating_labels = file2matrix(DATING_DATA)
    norm_data, ranges, min_val = auto_norm(dating_data_mat)
    m = norm_data.shape[0]
    num_test_vecs = int(m * HO_RATIO)
    error_count = 0.0
    for i in range(num_test_vecs):
        classifier_res = classify0(norm_data[i, :],
                                   norm_data[num_test_vecs:m, :],
                                   dating_labels[num_test_vecs:m], K)
        print('the classifier came back with: %d, the real answer is: %d' %
              (classifier_res, dating_labels[i]))
        if (classifier_res != dating_labels[i]): error_count += 1.0
    print("the total error rate is: %f" % (error_count / float(num_test_vecs)))
예제 #41
0
파일: main.py 프로젝트: thjking/inAction
def datingClassTest():
    x = 0.10
    Mat, Labels = fileLoad.filematrix0('datingTestSet.txt')
    normMat, ranges, minVals = norm.normal(Mat)
    m = normMat.shape[0]
    numTestVecs = int(m * x)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = kNN.classify0(normMat[i,:],normMat[numTestVecs:m,:],\
                                         Labels[numTestVecs:m],3)
        print "the classifier came back with: %d, the real answer is: %d"\
                                                %(classifierResult,Labels[i])
        if (classifierResult != Labels[i]): errorCount += 1.0
    print "the total error rate is %f" % (errorCount / float(numTestVecs))
def datingClassTest():
    hoRatio = 0.10
    datingdata, datinglabel = file2matrix('datingTestSet2.txt')
    normdata, ranges, minv = autoNorm(datingdata)
    m = normdata.shape[0]
    numoftest = int(m*hoRatio)
    errorcount = 0
    for i in range(numoftest):
        result = kNN.classify0(normdata[i,:],normdata[numoftest:m,:],
                           datinglabel[numoftest:m],3)
        print "the classifier came back with: %d, the real answer is: %d"\
              % (result, datinglabel[i])
        if (result != datinglabel[i]): errorcount +=1.0
    print "the total error rate is: %f" % (errorcount/float(numoftest))
예제 #43
0
파일: kNN.py 프로젝트: zhlei99/MLStudy
def classifyPerson():
    """
    imput someone information and predicts how much she will like this person
    """  
    resultList = ['not at all','in small doses','in large doses'] 
    percentTats = float (input(\
                                   "percentage of time spent playing video games?"))
    ffMiles = float(input("frequent fliter miles earned per year?"))    
    iceCream = float(input("liters of ice cream consumed per year?")) 
    datingDataMat, datingLabels = file2matrix('datingTestSet.txt')
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    classifierResult = kNN.classify0([ffMiles, percentTats, iceCream ],normMat, \
                                     datingLabels,3)
    print ("you will probably like this person : %s" %(resultList[classifierResult - 1]))
예제 #44
0
def datingSetTest(horate):
	datingDataMat,datingLabel=kNN.file2matrix('datingTestSet2.txt')		
        print "data[%d]:%s,\nlabel:%s" %(datingDataMat.shape[0],datingDataMat,datingLabel)
	datingDataMat,rangeval,minval=kNN.autonorm(datingDataMat)
        print "data[%d]:%s" %(datingDataMat.shape[0],datingDataMat)
	m=datingDataMat.shape[0]
	count=int(m*horate)
        errcount=0.0
        for i in range(1,count):
            retVal=kNN.classify0(datingDataMat[i,:],datingDataMat[count:m,:],datingLabel[count:m],3)
            print "orignal:%d,calculate:%d"%(datingLabel[i],retVal)
            if retVal != datingLabel[i]:
				errcount+=1.0
				print "error."
        print "error rate:%f" %(errcount/float(count))
def datingClassTest():
	'''
	hoRatio = the ratio of test set
	'''
	hoRatio = 0.10
	datingDataMat, datingLabels = file2matrix("datingTestSet2.txt")
	normMat, ranges, minVals = autoNorm(datingDataMat)
	m = normMat.shape[0]
	numTestVecs = int(m * hoRatio)
	errorCount = 0.0
	for i in range(numTestVecs):
		classifierResult = kNN.classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 5)
		print "the classifier came back with %d, the real answer is: %d" % (classifierResult, datingLabels[i])
		if(classifierResult != datingLabels[i]):
			errorCount += 1.0
	print "the total error rate is: %f" % (errorCount / float(numTestVecs))
예제 #46
0
def datingClassTest():

    hoRatio = 0.10
    datingDataMat,datingLabels = kNN.file2matrix('datingTestSet.txt')
    normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
    m = normMat.shape[0]
    numTestVecs = int(m*hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = kNN.classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
        print("the classifier came back with: {0}, the real answer is: {1}".format(classifierResult, datingLabels[i]))

        if (classifierResult != datingLabels[i]):
            errorCount += 1.0

    print("the total error rate is: {0}".format(errorCount/float(numTestVecs)))
예제 #47
0
def calcErrorRate(trainingFeatureSet, trainingLabelSet, testDataDir):
    testFiles = os.listdir(testDataDir)
    testDataSetSize = len(testFiles)

    errorCount = 0.0

    for file in testFiles:
        filePath = os.path.join(testDataDir, file)
        testData = img2vector(filePath)
        trueLabel = file[0] # 测试数据的真实label
        predictedLabel = kNN.classify0(testData, trainingFeatureSet, trainingLabelSet, KNN_K)
        if predictedLabel != trueLabel:
            errorCount += 1
            print "Predicted : ", predictedLabel, "    True : " , trueLabel, "   File : ",  file

    return errorCount/testDataSetSize
예제 #48
0
파일: handwriting.py 프로젝트: bingSz/AI
def classifyHandWriting(path, trainingPath):
	trainingFileList = listdir(trainingPath)
	listlength = len(trainingFileList)

	trainingMat = zeros((listlength, 1024))
	labels = [];

	for i in range(listlength):
		filename = trainingFileList[i]

		trainingMat[i, :] = kNN.imgVector(trainingPath + '/' + filename)

		labels.append(filename.strip().split('.')[0].split('_')[0])
	
	classification = kNN.imgVector(path)

	return kNN.classify0(classification, trainingMat, labels, 3)
예제 #49
0
	def testKNN3(self):
		hwLabels = []
		trainingFileList = os.listdir('trainingDigits')
		m = len(trainingFileList)
		trainingMat = numpy.zeros((m,1024))
		for i in range(m):
			fileNameStr = trainingFileList[i]
			fileStr = fileNameStr.split('.')[0]     #take off .txt
			classNumStr = int(fileStr.split('_')[0])
			hwLabels.append(classNumStr)
			trainingMat[i,:] = kNN.img2vector('trainingDigits/%s' % fileNameStr)
		testFileList = os.listdir('testDigits')
		fileNameStr = testFileList[0]
		fileStr = fileNameStr.split('.')[0]     #take off .txt
		classNumStr = int(fileStr.split('_')[0])
		vectorUnderTest = kNN.img2vector('testDigits/%s' % fileNameStr)
		c = kNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3)
		self.assertEqual(c, 0)
예제 #50
0
def handwritingClassTest():
    hwLabels=[]     #保存分类结果
    trainingFileList=os.listdir('trainingDigits')   #训练数据目录列表
    m=len(trainingFileList) #目录中一共有多少个文件
    trainingMat=zeros((m,1024))     #
    for i in range(m):
        fileNameStr=trainingFileList[i]
        fileStr=fileNameStr.split('.')[0]       #文件名格式为  0_3.txt
        classNumStr=int(fileStr.split('_')[0])  #第一部分为文件中实际的值
        hwLabels.append(classNumStr)
        trainingMat[i,:]=img2vector('trainingDigits/%s'%fileNameStr)
    testFileList=os.listdir('testDigits')
    errorCount=0.0
    mTest=len(testFileList)
    for i in range(mTest):
        fileNameStr=testFileList[i]
        fileStr=fileNameStr.split('.')[0]
        classNumStr=int(fileStr.split('_')[0])
        #trainingMat 训练图像转化来的向量
        #vectorUnderTest 测试图像转化来的向量
        vectorUnderTest=img2vector('testDigits/%s'%fileNameStr)
        #在kNN分类器中
        #vectorUnder是等待分类的数据,trainingMat相当于字曲,hwLabels为结果标号
        classifierResult=kNN.classify0(vectorUnderTest,trainingMat,hwLabels,3)
        print("the classifier came back with: %d, the real anser is: %d "%(classifierResult,classNumStr))
        if(classifierResult!=classNumStr):  #classNumStr是由文件名得出的数字
            errorCount+=1.0
    print("\nthe total number of errors is: %d"%errorCount)
    print("\nthe total error rate is: %f"%(errorCount/float(mTest)))
    
    pass

    
    
    
    
예제 #51
0
파일: 2.py 프로젝트: niumeng07/ML
#kNN简单例子
from numpy import *
import operator

random.rand(4,4)
randMat=mat(random.rand(4,4))
randMatI=randMat.I

import kNN
group,labels=kNN.createDataSet()

print(group)
print(labels)

result=kNN.classify0([0,0],group,labels,3)

print(result)
print(kNN.classify0([1,1],group,labels,3))

#reload(kNN)
datingDataMat,datingLabels=kNN.file2matrix('datingTestSet2.txt')
print(datingDataMat)
print(datingLabels[0:20])

#kNN约会推荐系统
import matplotlib
import matplotlib.pyplot as plt
fig=plt.figure()
ax=fig.add_subplot(111)
ax.scatter(datingDataMat[:,1],datingDataMat[:,2])
예제 #52
0
train=train_raw.values
test=test_raw.values


labels=train[0::,0:1]
labels=labels[:,0]

i,j=train.shape

result=[]

for ii in range(i):
    for jj in range(j):
         if (train[ii,jj]>1):
             train[ii,jj]=1


for iii in range(i):
    result[iii,0]=kNN.classify0(train[iii:iii+1,0::],train,labels,4)

predictions_file = open("out.csv","wb")
open_file_object = csv.writer(predictions_file)
open_file_object.writerows(["Label"])
predictions_file.close()





# Classifying movie genres with k-Nearest Neighbors

import kNN

group, labels = kNN.createDataSet()
print(group)
print(labels)

result = kNN.classify0([0, 0], group, labels, 3)
print(result)
예제 #54
0
import kNN
import numpy as np
import operator

# kNN example
data_set = np.array([[1.0, 1.0], [1.0, 0.9], [0.1, 0.1], [0.1, 0.2]])
labels = ['A', 'A', 'B', 'B']
in_x = [0.2, 0.3]

classfied = kNN.classify0(in_x, data_set, labels, 3)
print('in_x classified: %s' % classfied)

import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_title('kNN Algorithm Example')
ax.scatter(data_set[0:2, 0], data_set[0:2, 1], c='r', label='A')
ax.scatter(data_set[2:4, 0], data_set[2:4, 1], c='b', label='B')
for i in range(4):
    ax.text(data_set[i][0]-0.04, data_set[i][1]-0.01, labels[i])
ax.scatter([0.2], [0.3], c='b')
ax.text(0.21, 0.29, '<-- Data classfied here')
ax.legend(loc='upper left')
plt.show()
예제 #55
0
FigDating = plt.figure()

group,labels = kNN.createDataSet()                                       #Create the data set with four items(2-D vectors). Each of them one of two labels associated with them 
colormap1 = { 'A':'red', 'B':'blue'}                                     #Make a color map
ColoredGroupLabels = []

for things in labels:                                                   #Get a vector representing the colors 
    ColoredGroupLabels.append(colormap1[things])                         #for each data item

ax1 = FigDating.add_subplot(311, xlim=(-0.1,1.1), ylim=(-.05,1.15))     #Dividing the figure into 3 sub plots and selecting the top-most
ax1.scatter(group[:,0], group[:,1], s= 20, c= ColoredGroupLabels, marker = 'o' )    #Plotting the data as a scatter plot with color(c) property as per the labelling. 

#Testing with new points
testvector = [.2, .2]                                                   #first point 
answer = kNN.classify0(testvector,group, labels, 3)                     #classify the first point
# type "print answer" to see result
ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' ) #plot first point
#second point -  created, classified and plotted
testvector = [.5, .5]                                                   
answer = kNN.classify0(testvector,group, labels, 3)
ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' )
#third point -  created, classified and plotted
testvector = [.75, .75]
answer = kNN.classify0(testvector,group, labels, 3)
ax1.scatter(testvector[0], testvector[1], s= 20, c= colormap1[answer], marker = 'x' )

'''Perform K-Nearest Neighbor classification on the datingTestSet2 data set. Do not forget to include the data set in the working directory'''
datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt')                          # Load data values and labels from the datingTestSet2.txt
datingLabelArray = np.array(datingLabels)                                                   
예제 #56
0
import kNN

group, labels = kNN.createDataSet()

print (group)

print (labels)
oneToTest=[0.0,1.0]
print (kNN.classify0(oneToTest,group,labels))
예제 #57
0
	def testKNN(self):
		group, labels = createDataSet()
		c = kNN.classify0([0, 0], group, labels, 3)
		self.assertEqual(c, 'B')
#coding:utf-8
import kNN

group, labels = kNN.createDataSet()
print kNN.classify0([0,0],group,labels,3)

datingDataMat, datingLabels = kNN.file2matrix('datingTestSet.txt')
print datingDataMat
print datingLabels[0:20]

'''
import matplotlib
import matplotlib.pyplot as plt
from numpy import array
fig = plt.figure()
ax = fig.add_subplot(121)
ax.scatter(datingDataMat[:,1], datingDataMat[:,2], 15.0*array(datingLabels), 15.0*array(datingLabels))
ax.axis([-2, 25, -0.2, 2.0])
plt.xlabel(u'玩视频游戏所耗时间百分比')
plt.ylabel(u'每周消费的冰淇淋公升数')

ax = fig.add_subplot(122)
ax.scatter(datingDataMat[:,0], datingDataMat[:,1], 15.0*array(datingLabels), 15.0*array(datingLabels))
ax.axis([-5000, 100000, -1, 25])
plt.xlabel(u'每年获取的飞行常客里程数')
plt.ylabel(u'玩视频游戏所耗时间百分比')
plt.show()
'''

normMat, ranges, minVals = kNN.autoNorm(datingDataMat)
print normMat
예제 #59
0
파일: Test.py 프로젝트: CrazyRacer/python2
import kNN
import numpy
import matplotlib
import matplotlib.pyplot as plt

print kNN.classify0([0, 0], numpy.array([[1, 0], [2, 1]]), ['A', 'B'], 1)

datingDataMat, datingLabels = kNN.file2matrix('test2.txt')
# datingDataMat = numpy.zeros((3,3))
# datingDataMat[2,:] = [2,1,0]
# print datingDataMat ,datingDataMat[:,2]
print datingLabels
print 15.0*numpy.array(datingLabels)

fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2],15.0*numpy.array(datingLabels),15.0*numpy.array(datingLabels))
plt.show()

# ax.scatter([2,3,1],[3,1,2])
import os
import kNN

CURRENT_DIR = os.path.dirname(__file__)

groups, labels = kNN.createDataset()

print kNN.classify0([0,0,0],groups,labels,3)

dataSetFile = os.path.join(CURRENT_DIR + '/datingTestSet.txt')

datingDataMat,datingLabels = kNN.file2matrix(dataSetFile)

print kNN.classify0([40920,  8.326976,	0.953952],groups,labels,3)