def testDimByLevel(self):
        tree = kdTree.KdTree()
        self.assertEqual(2, tree._dim)
        self.assertEqual(0, tree._dim_by_level(0))
        self.assertEqual(1, tree._dim_by_level(1))
        self.assertEqual(0, tree._dim_by_level(2))

        tree = kdTree.KdTree(5)
        self.assertEqual(5, tree._dim)
        self.assertEqual(0, tree._dim_by_level(0))
        self.assertEqual(1, tree._dim_by_level(1))
        self.assertEqual(2, tree._dim_by_level(2))
        self.assertEqual(0, tree._dim_by_level(5))
        self.assertEqual(1, tree._dim_by_level(6))
        self.assertEqual(4, tree._dim_by_level(9))
Beispiel #2
0
def datingClassTest():
    """
    Desc:
        对约会网站的测试方法,并将分类错误的数量和分类错误率打印出来
    Args:
        None
    Returns:
        None
    """
    # 设置测试数据的的一个比例(训练数据集比例=1-hoRatio)
    hoRatio = 0.1  # 测试范围,一部分测试一部分作为样本
    # 从文件中加载数据
    datingDataMat, datingLabels = file2matrix(
        "datingTestSet2.txt")  # load data setfrom file
    # 归一化数据
    normMat, ranges, minVals = autoNorm(datingDataMat)
    # m 表示数据的行数,即矩阵的第一维
    m = normMat.shape[0]
    # 设置测试的样本数量, numTestVecs:m表示训练样本的数量
    numTestVecs = int(m * hoRatio)
    print('numTestVecs=', numTestVecs)
    errorCount = 0
    kd = kt.KdTree(normMat[numTestVecs:].tolist(), datingLabels)
    for i in range(numTestVecs):
        # 对数据测试
        res = kt.find_nearest(kd, normMat[i])
        classifierResult = res.node_type
        classifierResult = classify0(normMat[i], normMat[numTestVecs:m],
                                     datingLabels[numTestVecs:m], 3)
        print("the classifier came back with: %d, the real answer is: %d" %
              (classifierResult, datingLabels[i]))
        errorCount += classifierResult != datingLabels[i]
    print("the total error rate is: %f" % (errorCount / numTestVecs))
    print(errorCount)
    def testInsert(self):
        tree = kdTree.KdTree()
        self.assertTrue(tree.is_empty())

        p = kdTree.Point([1, 2])
        tree.insert(p)
        self.assertFalse(tree.is_empty())
        self.assertIsNotNone(tree._root)
    def testSearch3(self):
        p = kdTree.Point([1, 2])
        pp = kdTree.Point([1, 2])

        tree = kdTree.KdTree()
        tree.insert(p)

        self.assertEqual(p, tree.search(pp))
        self.assertTrue(
            tree.search(pp) is p, "Returned point should be the node's point")
        self.assertFalse(
            tree.search(pp) is pp,
            "Returned point should be the node's point, not the given point")
    def testLeveledDist(self):
        tree = kdTree.KdTree()

        p1 = kdTree.Point([3, 4])
        p2 = kdTree.Point([0, 0])
        node = kdTree.Node(p2)

        self.assertEqual(25, p1.squared_distance_to(p2))
        self.assertEqual(5, p1.distance_to(p2))
        self.assertEqual(3, tree.leveled_distance(node, p1,
                                                  0))  # 0 => X compare
        self.assertEqual(4, tree.leveled_distance(node, p1,
                                                  1))  # 1 => Y compare
        self.assertEqual(3, tree.leveled_distance(node, p1,
                                                  2))  # 0 => X compare
    def testSearch(self):
        p1 = kdTree.Point([3, 4])
        p2 = kdTree.Point([0, 0])
        p3 = kdTree.Point([1, 2])

        tree = kdTree.KdTree()
        tree.insert(p1)
        tree.insert(p2)

        self.assertEqual(p1, tree.search(p1))
        self.assertTrue(tree.contains(p1))

        self.assertEqual(p2, tree.search(p2))
        self.assertTrue(tree.contains(p2))

        self.assertEqual(None, tree.search(p3))
        self.assertFalse(tree.contains(p3))
Beispiel #7
0
def handwritingClassTest():
    """
    Desc:
        手写数字识别分类器,并将分类错误数和分类错误率打印出来
    Args:
        None
    Returns:
        None
    """
    # 1. 导入数据
    hwLabels = []
    trainingFileList = os.listdir("trainingDigits")  # load the training set
    m = len(trainingFileList)
    trainingMat = zeros((m, 1024))
    # hwLabels存储0~9对应的index位置, trainingMat存放的每个位置对应的图片向量
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]  # take off .txt
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        # 将 32*32的矩阵->1*1024的矩阵
        trainingMat[i] = img2vector('trainingDigits/%s' % fileNameStr)

    # 2. 导入测试数据
    testFileList = os.listdir('testDigits')  # iterate through the test set
    errorCount = 0
    mTest = len(testFileList)
    kd = kt.KdTree(trainingMat.tolist(), hwLabels)

    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]  # take off .txt
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)
        result = kt.find_nearest(kd, vectorUnderTest[0])
        classifierResult = result.node_type
        # classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)
        print("the classifier came back with: %d, the real answer is: %d" %
              (classifierResult, classNumStr))
        errorCount += classifierResult != classNumStr
    print("\nthe total number of errors is: %d" % errorCount)
    print("\nthe total error rate is: %f" % (errorCount / mTest))
    def testRange(self):
        p1 = kdTree.Point([3, 1])
        p2 = kdTree.Point([4, 4])
        p3 = kdTree.Point([2, 3])
        p4 = kdTree.Point([0.5, 0.5])

        tree = kdTree.KdTree()
        tree.insert(p1)
        tree.insert(p2)
        tree.insert(p3)
        tree.insert(p4)

        points = tree.range(kdTree.Point([0, 0]), kdTree.Point([1, 1]))
        self.assertEqual(1, len(points))
        self.assertTrue(p4 in points)

        points = tree.range(kdTree.Point([1, 2]), kdTree.Point([5, 5]))
        self.assertEqual(2, len(points))
        self.assertTrue(p2 in points)
        self.assertTrue(p3 in points)
    def testSearch2(self):
        p1 = kdTree.Point([3, 1, 3])
        p2 = kdTree.Point([4, 4, 2])
        p3 = kdTree.Point([2, 3, 4])

        tree = kdTree.KdTree(dimensions=3)
        tree.insert(p1)
        tree.insert(p2)
        tree.insert(p3)

        self.assertEqual(p1, tree.search(p1))
        self.assertEqual(p2, tree.search(p2))
        self.assertEqual(p3, tree.search(p3))

        p = kdTree.Point([1, 2, 3])
        self.assertEqual(None, tree.search(p))

        self.assertTrue(tree.contains(p1))
        self.assertTrue(tree.contains(p2))
        self.assertTrue(tree.contains(p3))
        self.assertFalse(tree.contains(p))
    def testNearest3D(self):
        p1 = kdTree.Point([3, 1, 0])
        p2 = kdTree.Point([4, 4, 0])
        p3 = kdTree.Point([2, 3, 1])
        p4 = kdTree.Point([0.5, 0.5, 10])

        tree = kdTree.KdTree(dimensions=3)
        tree.insert(p1)
        tree.insert(p2)
        tree.insert(p3)
        tree.insert(p4)
        self.assertEqual(4, tree.size())

        p = kdTree.Point([2, 2, 1])
        nn = tree.nearest(p)
        self.assertIsNotNone(nn)
        self.assertEqual(p3, nn)

        p = kdTree.Point([2, 2, 8])
        nn = tree.nearest(p)
        self.assertIsNotNone(nn)
        self.assertEqual(p4, nn)
    def testNearest(self):
        p1 = kdTree.Point([3, 1])
        p2 = kdTree.Point([4, 4])
        p3 = kdTree.Point([2, 3])
        p4 = kdTree.Point([0.5, 0.5])

        tree = kdTree.KdTree()
        tree.insert(p1)
        tree.insert(p2)
        tree.insert(p3)
        tree.insert(p4)
        self.assertEqual(4, tree.size())

        p = kdTree.Point([1, 2])
        nn = tree.nearest(p)
        self.assertIsNotNone(nn)
        self.assertEqual(p3, nn)

        p = kdTree.Point([5, 5])
        nn = tree.nearest(p)
        self.assertIsNotNone(nn)
        self.assertEqual(p2, nn)
Beispiel #12
0
import kdTreeFind as kf
from time import clock
from random import random
import sys
reload(sys)
sys.setdefaultencoding('utf8')

def random_point(k):
    return [random() for _ in range(k)]



def random_points(k, n):
    return [random_point(k) for _ in range(n)]


if __name__ == "__main__":
    data = [[2, 3], [5, 4], [9, 6], [4, 7], [8, 1], [7, 2]]  # samples

    kd = kd1.KdTree(data)

    ret = kf.find_nearest(kd, [3.0, 4.5])
    print ret

    N = 400000
    t0 = clock()
    kd2 = kd1.KdTree(random_points(3, N))
    ret2 = kf.find_nearest(kd2, [0.1, 0.5, 0.8])
    t1 = clock()
    print "time: ", t1 - t0, "s"
    print ret2
    print((1 - np.count_nonzero(np.subtract(pred, gt)) / gt.shape[0]) * 100,
          "% accuracy")


if __name__ == '__main__':
    if len(sys.argv) < 10:
        print(
            'Usage: [[Python]] digitClassifer.py [1] [training file] [path to training data] [ground truth file] '
            '[testing file] [training junk file] [path to junk data] [ground truth file] [testing junk file]'
        )
    else:
        trainVec, testVec, train_labels, test_labels, train_int_classes, test_int_classes, training, testing = pre_processing_pipeline(
        )
        if int(sys.argv[1]) == 1:
            model, model2 = training_pipeline(trainVec, train_labels,
                                              lambda x: kd.KdTree(x, 1),
                                              lambda x1, x2: svm.SVM(x1, x2))
        else:
            load = open('kdTreeModel.txt', 'rb')
            model = pk.load(load)
            load = open('svmModel.txt', 'rb')
            model2 = pk.load(load)

        # kdtree training and testing results
        print("\nTesting KDTREE->")
        kdtest_classified = classify_pipeline(trainVec, model)
        print("\nTraining Accuracy:", end="")
        acc(kdtest_classified, train_labels)
        outputs(training, kdtest_classified, train_int_classes,
                'kdTree-training')
 def testIsEmpty(self):
     tree = kdTree.KdTree()
     self.assertTrue(tree.is_empty())