def testDimByLevel(self): tree = kdTree.KdTree() self.assertEqual(2, tree._dim) self.assertEqual(0, tree._dim_by_level(0)) self.assertEqual(1, tree._dim_by_level(1)) self.assertEqual(0, tree._dim_by_level(2)) tree = kdTree.KdTree(5) self.assertEqual(5, tree._dim) self.assertEqual(0, tree._dim_by_level(0)) self.assertEqual(1, tree._dim_by_level(1)) self.assertEqual(2, tree._dim_by_level(2)) self.assertEqual(0, tree._dim_by_level(5)) self.assertEqual(1, tree._dim_by_level(6)) self.assertEqual(4, tree._dim_by_level(9))
def datingClassTest(): """ Desc: 对约会网站的测试方法,并将分类错误的数量和分类错误率打印出来 Args: None Returns: None """ # 设置测试数据的的一个比例(训练数据集比例=1-hoRatio) hoRatio = 0.1 # 测试范围,一部分测试一部分作为样本 # 从文件中加载数据 datingDataMat, datingLabels = file2matrix( "datingTestSet2.txt") # load data setfrom file # 归一化数据 normMat, ranges, minVals = autoNorm(datingDataMat) # m 表示数据的行数,即矩阵的第一维 m = normMat.shape[0] # 设置测试的样本数量, numTestVecs:m表示训练样本的数量 numTestVecs = int(m * hoRatio) print('numTestVecs=', numTestVecs) errorCount = 0 kd = kt.KdTree(normMat[numTestVecs:].tolist(), datingLabels) for i in range(numTestVecs): # 对数据测试 res = kt.find_nearest(kd, normMat[i]) classifierResult = res.node_type classifierResult = classify0(normMat[i], normMat[numTestVecs:m], datingLabels[numTestVecs:m], 3) print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[i])) errorCount += classifierResult != datingLabels[i] print("the total error rate is: %f" % (errorCount / numTestVecs)) print(errorCount)
def testInsert(self): tree = kdTree.KdTree() self.assertTrue(tree.is_empty()) p = kdTree.Point([1, 2]) tree.insert(p) self.assertFalse(tree.is_empty()) self.assertIsNotNone(tree._root)
def testSearch3(self): p = kdTree.Point([1, 2]) pp = kdTree.Point([1, 2]) tree = kdTree.KdTree() tree.insert(p) self.assertEqual(p, tree.search(pp)) self.assertTrue( tree.search(pp) is p, "Returned point should be the node's point") self.assertFalse( tree.search(pp) is pp, "Returned point should be the node's point, not the given point")
def testLeveledDist(self): tree = kdTree.KdTree() p1 = kdTree.Point([3, 4]) p2 = kdTree.Point([0, 0]) node = kdTree.Node(p2) self.assertEqual(25, p1.squared_distance_to(p2)) self.assertEqual(5, p1.distance_to(p2)) self.assertEqual(3, tree.leveled_distance(node, p1, 0)) # 0 => X compare self.assertEqual(4, tree.leveled_distance(node, p1, 1)) # 1 => Y compare self.assertEqual(3, tree.leveled_distance(node, p1, 2)) # 0 => X compare
def testSearch(self): p1 = kdTree.Point([3, 4]) p2 = kdTree.Point([0, 0]) p3 = kdTree.Point([1, 2]) tree = kdTree.KdTree() tree.insert(p1) tree.insert(p2) self.assertEqual(p1, tree.search(p1)) self.assertTrue(tree.contains(p1)) self.assertEqual(p2, tree.search(p2)) self.assertTrue(tree.contains(p2)) self.assertEqual(None, tree.search(p3)) self.assertFalse(tree.contains(p3))
def handwritingClassTest(): """ Desc: 手写数字识别分类器,并将分类错误数和分类错误率打印出来 Args: None Returns: None """ # 1. 导入数据 hwLabels = [] trainingFileList = os.listdir("trainingDigits") # load the training set m = len(trainingFileList) trainingMat = zeros((m, 1024)) # hwLabels存储0~9对应的index位置, trainingMat存放的每个位置对应的图片向量 for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] # take off .txt classNumStr = int(fileStr.split('_')[0]) hwLabels.append(classNumStr) # 将 32*32的矩阵->1*1024的矩阵 trainingMat[i] = img2vector('trainingDigits/%s' % fileNameStr) # 2. 导入测试数据 testFileList = os.listdir('testDigits') # iterate through the test set errorCount = 0 mTest = len(testFileList) kd = kt.KdTree(trainingMat.tolist(), hwLabels) for i in range(mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split('.')[0] # take off .txt classNumStr = int(fileStr.split('_')[0]) vectorUnderTest = img2vector('testDigits/%s' % fileNameStr) result = kt.find_nearest(kd, vectorUnderTest[0]) classifierResult = result.node_type # classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3) print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr)) errorCount += classifierResult != classNumStr print("\nthe total number of errors is: %d" % errorCount) print("\nthe total error rate is: %f" % (errorCount / mTest))
def testRange(self): p1 = kdTree.Point([3, 1]) p2 = kdTree.Point([4, 4]) p3 = kdTree.Point([2, 3]) p4 = kdTree.Point([0.5, 0.5]) tree = kdTree.KdTree() tree.insert(p1) tree.insert(p2) tree.insert(p3) tree.insert(p4) points = tree.range(kdTree.Point([0, 0]), kdTree.Point([1, 1])) self.assertEqual(1, len(points)) self.assertTrue(p4 in points) points = tree.range(kdTree.Point([1, 2]), kdTree.Point([5, 5])) self.assertEqual(2, len(points)) self.assertTrue(p2 in points) self.assertTrue(p3 in points)
def testSearch2(self): p1 = kdTree.Point([3, 1, 3]) p2 = kdTree.Point([4, 4, 2]) p3 = kdTree.Point([2, 3, 4]) tree = kdTree.KdTree(dimensions=3) tree.insert(p1) tree.insert(p2) tree.insert(p3) self.assertEqual(p1, tree.search(p1)) self.assertEqual(p2, tree.search(p2)) self.assertEqual(p3, tree.search(p3)) p = kdTree.Point([1, 2, 3]) self.assertEqual(None, tree.search(p)) self.assertTrue(tree.contains(p1)) self.assertTrue(tree.contains(p2)) self.assertTrue(tree.contains(p3)) self.assertFalse(tree.contains(p))
def testNearest3D(self): p1 = kdTree.Point([3, 1, 0]) p2 = kdTree.Point([4, 4, 0]) p3 = kdTree.Point([2, 3, 1]) p4 = kdTree.Point([0.5, 0.5, 10]) tree = kdTree.KdTree(dimensions=3) tree.insert(p1) tree.insert(p2) tree.insert(p3) tree.insert(p4) self.assertEqual(4, tree.size()) p = kdTree.Point([2, 2, 1]) nn = tree.nearest(p) self.assertIsNotNone(nn) self.assertEqual(p3, nn) p = kdTree.Point([2, 2, 8]) nn = tree.nearest(p) self.assertIsNotNone(nn) self.assertEqual(p4, nn)
def testNearest(self): p1 = kdTree.Point([3, 1]) p2 = kdTree.Point([4, 4]) p3 = kdTree.Point([2, 3]) p4 = kdTree.Point([0.5, 0.5]) tree = kdTree.KdTree() tree.insert(p1) tree.insert(p2) tree.insert(p3) tree.insert(p4) self.assertEqual(4, tree.size()) p = kdTree.Point([1, 2]) nn = tree.nearest(p) self.assertIsNotNone(nn) self.assertEqual(p3, nn) p = kdTree.Point([5, 5]) nn = tree.nearest(p) self.assertIsNotNone(nn) self.assertEqual(p2, nn)
import kdTreeFind as kf from time import clock from random import random import sys reload(sys) sys.setdefaultencoding('utf8') def random_point(k): return [random() for _ in range(k)] def random_points(k, n): return [random_point(k) for _ in range(n)] if __name__ == "__main__": data = [[2, 3], [5, 4], [9, 6], [4, 7], [8, 1], [7, 2]] # samples kd = kd1.KdTree(data) ret = kf.find_nearest(kd, [3.0, 4.5]) print ret N = 400000 t0 = clock() kd2 = kd1.KdTree(random_points(3, N)) ret2 = kf.find_nearest(kd2, [0.1, 0.5, 0.8]) t1 = clock() print "time: ", t1 - t0, "s" print ret2
print((1 - np.count_nonzero(np.subtract(pred, gt)) / gt.shape[0]) * 100, "% accuracy") if __name__ == '__main__': if len(sys.argv) < 10: print( 'Usage: [[Python]] digitClassifer.py [1] [training file] [path to training data] [ground truth file] ' '[testing file] [training junk file] [path to junk data] [ground truth file] [testing junk file]' ) else: trainVec, testVec, train_labels, test_labels, train_int_classes, test_int_classes, training, testing = pre_processing_pipeline( ) if int(sys.argv[1]) == 1: model, model2 = training_pipeline(trainVec, train_labels, lambda x: kd.KdTree(x, 1), lambda x1, x2: svm.SVM(x1, x2)) else: load = open('kdTreeModel.txt', 'rb') model = pk.load(load) load = open('svmModel.txt', 'rb') model2 = pk.load(load) # kdtree training and testing results print("\nTesting KDTREE->") kdtest_classified = classify_pipeline(trainVec, model) print("\nTraining Accuracy:", end="") acc(kdtest_classified, train_labels) outputs(training, kdtest_classified, train_int_classes, 'kdTree-training')
def testIsEmpty(self): tree = kdTree.KdTree() self.assertTrue(tree.is_empty())