def testBuild(self): kd = KDTree(dimensions = 3) points = array([[5, 4, 3], [10, 2, 1], [4, 5, 6], [6, 1, 5], [1, 2, 3]]) labels = array([[1], [1], [1], [-1], [-1]]) kd.build(points, labels) print(kd.searchKNN(([4, 4, 6]), 3)[0][0]._data) pass
def maisProximo(): maxN = 1000 startN = 1 increment = 100 N = 40 dados = [] labels = [] f1 = open('labels.dat', 'w') f2 = open('dados.dat', 'w') for k in range(startN, maxN, increment): b = 0.0 for j in range(N): lista = [] inserirDados(lista, k) arv = KDTree(k, 2) construirArvore(arv, lista) t1 = time.time() arv.nearestNeighbour(lista[randint(0, len(lista))]) t2 = time.time() b += t2-t1 f1.write(str(k)) f2.write(str(b/N)) print k, (b/N) dados.append(b/N) labels.append(k) f1.close() f2.close() drawGraph(dados, labels, "titulo")
def test_point_types(self): emptyTree = KDTree.create(dimensions=3) point1 = (2, 3, 4) point2 = [4, 5, 6] Point = collections.namedtuple('Point', 'x y z') point3 = Point(5, 3, 2) tree = KDTree.create([point1, point2, point3]) res, dist = tree.search_nn((1, 2, 3)) self.assertEqual(res, KDTree.KDNode((2, 3, 4)))
def test_invalid_child(self): """ Children on wrong subtree invalidate Tree """ child = KDTree.KDNode((3, 2)) child.axis = 2 tree = KDTree.create([(2, 3)]) tree.left = child self.assertFalse(tree.is_valid()) tree = KDTree.create([(4, 1)]) tree.right = child self.assertFalse(tree.is_valid())
def contact_residues(A, B, cutoff=4.0, pointing=45.0): tree = KDTree([b.co for b in B]) atoms = [] for a in A: #if a.anam in [" C "," N "," O "," CA "]: continue d, co = tree.nearest(a.co) if d < cutoff * cutoff: b = tree.index_of(co) ang = side_chain_angle_to_contact(a, A, B[b]) if ang != -1 and ang < pointing: atoms.append(a) residues = [x.rnum for x in atoms] residues = list(set(residues)) # remove duplicates residues.sort() return residues
def get_knn_e_dist_with_kdtree(train_samples, new_inst, k): kdtree = get_kd_tree(train_samples) knn_heaps = KDTree.find_knn_from_kd_tree(new_inst, kdtree, k) idx = np.array([item[2] for item in knn_heaps.knns[1:]]) dist_sq = np.array([item[0] for item in knn_heaps.knns[1:]]) # print(idx) # print(dist_sq) return idx, np.sqrt(dist_sq)
def get_kd_tree(train_samples): global kd_tree if kd_tree is None: # print(train_samples.dtype) m, _ = train_samples.shape ls = np.array(list(range(m))) kd_tree = KDTree.construct_kd_tree(train_samples, ls) return kd_tree
def testBalanced(self): t = array([[0, 1, 2]]) t = t.T samples = 5000 x1 = rand(samples, 2) + array([3, 3]) y1 = ones((samples, 1), int) * t[0][0] x2 = rand(samples, 2) + array([6, 6]) y2 = ones((samples, 1), int) * t[1][0] x3 = rand(samples, 2) + array([7, 2]) y3 = ones((samples, 1), int) * t[2][0] x = append(append(x1, x2, axis = 0), x3, axis = 0) y = append(append(y1, y2, axis = 0), y3, axis = 0) testPoint = array([3.2, 3]) kd = KDTree(dimensions = 2) kd.build(x, y) print(kd.isBlanced())
def build(self, points, labels): ''' suppose n is samples' amount, m is features' amount. points: array, training data, n * m labels: array, n * 1 ''' dimensions = points.shape[1] self._kdTree = KDTree(dimensions=dimensions) self._kdTree.build(points, labels)
def test_search_nn(self, nodes=100): points = list(islice(random_points(), 0, nodes)) tree = KDTree.create(points) point = random_point() nn, dist = tree.search_nn(point) best, best_dist = self.find_best(tree, point) self.assertEqual(best_dist, dist, msg=', '.join(repr(p) for p in points) + ' / ' + repr(point))
def test_remove_duplicates(self): """ creates a tree with only duplicate points, and removes them all """ points = [(1, 1)] * 100 tree = KDTree.create(points) self.assertTrue(tree.is_valid()) random.shuffle(points) while points: point = points.pop(0) tree = tree.remove(point) # Check if the Tree is valid after the removal self.assertTrue(tree.is_valid()) # Check if the removal reduced the number of nodes by 1 (not more, not less) remaining_points = len(points) nodes_in_tree = len(list(tree.inorder())) self.assertEqual(nodes_in_tree, remaining_points)
def do_random_remove(self): """ Creates a random tree, removes all points in random order """ points = list(set(islice(random_points(), 0, 20))) tree = KDTree.create(points) self.assertTrue(tree.is_valid()) random.shuffle(points) while points: point = points.pop(0) tree = tree.remove(point) # Check if the Tree is valid after the removal self.assertTrue(tree.is_valid()) # Check if the point has actually been removed self.assertTrue(point not in [n.data for n in tree.inorder()]) # Check if the removal reduced the number of nodes by 1 (not more, not less) remaining_points = len(points) nodes_in_tree = len(list(tree.inorder())) self.assertEqual(nodes_in_tree, remaining_points)
def test_search_knn(self): points = [(50, 20), (51, 19), (1, 80)] tree = KDTree.create(points) point = (48, 18) all_dist = [] for p in tree.inorder(): dist = p.dist(point) all_dist.append([p, dist]) all_dist = sorted(all_dist, key=lambda n: n[1]) result = tree.search_knn(point, 1) self.assertEqual(result[0][1], all_dist[0][1]) result = tree.search_knn(point, 2) self.assertEqual(result[0][1], all_dist[0][1]) self.assertEqual(result[1][1], all_dist[1][1]) result = tree.search_knn(point, 3) self.assertEqual(result[0][1], all_dist[0][1]) self.assertEqual(result[1][1], all_dist[1][1]) self.assertEqual(result[2][1], all_dist[2][1])
def test_remove_empty_tree(self): tree = KDTree.create(dimensions=2) tree.remove((1, 2)) self.assertFalse(bool(tree))
def random_tree(nodes=20, dimensions=3, minval=0, maxval=100): points = list(islice(random_points(), 0, nodes)) tree = KDTree.create(points) return tree
def testAdd(self): kd = KDTree(dimensions = 3) points = array([[5, 4, 3], [10, 2, 1], [4, 5, 6], [6, 1, 5], [1, 2, 3]]) labels = array([[1], [1], [1], [-1], [-1]]) kd.add([5, 4, 3], 1)
class KNN(object): def __init__(self): self._kdTree = None def __init__(self, points, labels): ''' suppose n is samples' amount, m is features' amount. points: array, training data, n * m labels: array, n * 1 ''' self.build(points, labels) def build(self, points, labels): ''' suppose n is samples' amount, m is features' amount. points: array, training data, n * m labels: array, n * 1 ''' dimensions = points.shape[1] self._kdTree = KDTree(dimensions=dimensions) self._kdTree.build(points, labels) def add(self, x, y): ''' add a point x: single array y: int ''' pass def add(self, x, y): ''' suppose n is samples' amount, m is features' amount. x: array, training data, n * m y: array, n * 1 ''' pass def getKNN(self, point, k=5): ''' get nearest n points ''' return self._kdTree.searchKNN(point, k) def getLabel(self, point, k=5): ''' get label of point ''' nns = self._kdTree.searchKNN(point, k) votes = {} for item in nns: label = item[0]._label[0] if label not in votes: votes[label] = 1 else: votes[label] += 1 return sorted(votes.items(), key=lambda x: x[1], reverse=True)[0][0]
#!/usr/bin/env python # coding=utf-8 T = [[2, 3], [5, 4], [9, 6], [4, 7], [8, 1], [7, 2]] T1 = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]] import KDTree kd_tree = KDTree.build_kdtree(T, 0, 2) print '-' * 15, '[9,4]', '-' * 15 print KDTree.search_kdtree(kd_tree, 0, [9, 4], 2) kd_tree = KDTree.build_kdtree(T1, 0, 3) print '-' * 15, '[1.,1.,1.]', '-' * 15 print KDTree.search_kdtree(kd_tree, 0, [1., 1., 1.], 3) #对照sklearn中实现的最近邻算法 from sklearn.neighbors import NearestNeighbors neigh = NearestNeighbors(n_neighbors=1) neigh.fit(T) print '-' * 15, '[9,4]', '-' * 15 print T print neigh.kneighbors([[9, 4]]) print '-' * 15, '[1.,1.,1.]', '-' * 15 print T1 neigh.fit(T1) print neigh.kneighbors([[1., 1., 1.]])