def test_find_closest_distant_point(): tree = KDTree([(10, 12), (70, 70), (35, 45)]) new_point = (0, 10000) KDTree.find_closest = counted(KDTree.find_closest) result = tree.find_closest(new_point) assert_equals(result, (70, 70)) assert_equals(tree.find_closest.calls, 3)
def fit(self, points): """聚类主函数 聚类过程主要是通过expand_cluste_order函数实现,流程如下: 给定一个起始点pt,计算pt的core_distance和eps-邻域,并更新eps-邻域中数据点的 reachability_distance 然后按reachability_distance从小到大依次处理pt的eps-邻域中未处理的点(流程同上) 遍历整个数据集,对每个未expand的数据点都执行expand,便完成了聚类,结果存储在self.results中 数据点遍历顺序存储在self.results_order中,二者结合便可以导出具体的聚类信息 :param points: [list] 输入数据列表,list中的每个元素都是长度固定的1维np数组 :returns: :rtype: """ """ results[遍历标记,核心距离,可达距离] results_order 存放数据遍历顺序 """ self.point_num = len(points) self.point_size = points[0].size self.results = [[None, np.inf, np.inf] for x in range(self.point_num)] self.results_order = [] ## 数据存储在kd树中以便检索【好像并没有用到检索...】 self.kd_tree = KDTree(self.point_size) self.kd_tree.create(points) for point_id in range(self.point_num): ## 如果当前节点没有处理过,执行expand if not self.results[point_id][0]: self._expand_cluste_order(point_id) return self
def test_find_closest_negative_numbers(): tree = KDTree([(10, 12), (70, -70), (-35, 45)]) new_point = (-40, 60) KDTree.find_closest = counted(KDTree.find_closest) result = tree.find_closest(new_point) assert_equals(result, (-35, 45)) assert_equals(tree.find_closest.calls, 2)
def test_find_closest_very_close_point(): tree = KDTree([(10, 12), (70, 70), (35, 45)]) new_point = (8, 10) KDTree.find_closest = counted(KDTree.find_closest) result = tree.find_closest(new_point) assert_equals(result, (10, 12)) assert_equals(tree.find_closest.calls, 2)
def test_find_closest_three_points(): tree = KDTree([(10, 12), (70, 70), (35, 45)]) new_point = (50, 50) KDTree.find_closest = counted(KDTree.find_closest) result = tree.find_closest(new_point) assert_equals(result, (35, 45)) assert_equals(tree.find_closest.calls, 3)
def test_find_closest_same_dimension(): tree = KDTree([(30, 40), (30, 4), (30, 60)]) new_point = (30, 55) KDTree.find_closest = counted(KDTree.find_closest) result = tree.find_closest(new_point) assert_equals(result, (30, 60)) assert_equals(tree.find_closest.calls, 3)
def check_neighbors(dualtree, breadth_first, k, metric, kwargs): kdt = KDTree(X, leaf_size=1, metric=metric, **kwargs) dist1, ind1 = kdt.query(Y, k, dualtree=dualtree, breadth_first=breadth_first) dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs) # don't check indices here: if there are any duplicate distances, # the indices may not match. Distances should not have this problem. assert_allclose(dist1, dist2)
def test_find_closest_identical_point(): tree = KDTree([(10, 12), (70, 70), (35, 45)]) new_point = (10, 12) KDTree.find_closest = counted(KDTree.find_closest) result = tree.find_closest(new_point) result_distance = KDTree._get_distance(result, new_point) assert_equals(result, (10, 12)) assert_equals(result_distance, 0) assert_equals(tree.find_closest.calls, 2)
def make_temp_tree(self, new_list): ''' Generates a new tree with the new list. List elements should be Node objects. Move this to KDTree at some point ''' temp_tree = KDTree() for l in new_list: temp_tree.insert(l.point,linked_object=l.linked_object) return temp_tree
def fit(self, X, y): self.label_set = set(y) data = list() for con, lab in it.izip(X, y): if not self.dense: con = con.toarray() con = con[0] data.append((con, lab)) # Create a KDTree using the data given and store it self.data = KDTree(data, self.k_neighbours, balanced=self.balanced)
def test_find_closest_3k(): data = [ (30, 40, 10), (5, 25, 2), (10, 12, 30), (70, 70, 10), (50, 30, 5), (35, 45, 15)] tree = KDTree(data) new_point = (34, 100, 50) KDTree.find_closest = counted(KDTree.find_closest) result = tree.find_closest(new_point) assert_equals(result, (70, 70, 10)) assert_equals(tree.find_closest.calls, 5)
class KDTreeTest(unittest.TestCase): def setUp(self): self.data = [Vector([1]),Vector([2]),Vector([0])] self.kd_tree = KDTree() def median_test(self): median = self.kd_tree.median(self.data) m = self.data[len(self.data)/2] self.assertEquals(m, median) def append_test(self): def f(data): for x in data: self.kd_tree.append(x, self.kd_tree.root) f(self.data) self.assertEquals(self.data[0], self.kd_tree.root.data) self.assertEquals(self.data[1], self.kd_tree.root.children[1].data) self.assertEquals(self.data[2], self.kd_tree.root.children[0].data) def append_medians_test(self): self.kd_tree.append_medians(self.data) #self.assertEquals(Vector([2]), self.kd_tree.root.data) #self.assertEquals(Vector([1]), self.kd_tree.root.children[1].data) #self.assertEquals(Vector([0]), self.kd_tree.root.children[0].data) def nearest_test(self): self.kd_tree.append_medians(self.data) nearest=self.kd_tree.nearest(Vector([3]), self.kd_tree.root) self.assertEquals(Vector([2]), nearest.values()[0])
def test_find_closest_randomized(): RANGES = (-1000, 1000) DATA_SIZE = 6000 data = [(uniform(*RANGES), uniform(*RANGES)) for _ in xrange(DATA_SIZE)] tree = KDTree(data) new_point = (uniform(*RANGES), uniform(*RANGES)) KDTree.find_closest = counted(KDTree.find_closest) result = tree.find_closest(new_point) result_distance = KDTree._get_distance(result, new_point) distances = [KDTree._get_distance(point, new_point) for point in data] assert_equals(result_distance, min(distances)) # Number of calls will be inconsistent, but should not check all points print "Number of calls: ", tree.find_closest.calls assert tree.find_closest.calls < len(data)
def test_kd_buildup(points: List[Point]) -> float: tracemalloc.start() starting_mem, _ = tracemalloc.get_traced_memory() tree = KDTree(points) current, peak = tracemalloc.get_traced_memory() tracemalloc.stop() return peak - starting_mem
def test_kd_tree_KDE(n_samples=100, n_features=3): np.random.seed(0) X = np.random.random((n_samples, n_features)) Y = np.random.random((n_samples, n_features)) kdt = KDTree(X, leaf_size=10) for kernel in [ 'gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear', 'cosine' ]: for h in [0.001, 0.01, 0.1]: dens_true = compute_kernel_slow(Y, X, kernel, h) def check_results(kernel, h, atol, rtol, dualtree, breadth_first): dens = kdt.kernel_density(Y, h, atol=atol, rtol=rtol, kernel=kernel, dualtree=dualtree, breadth_first=breadth_first) assert_allclose(dens, dens_true, atol=atol, rtol=rtol) for rtol in [0, 1E-5]: for atol in [1E-10, 1E-5, 0.1]: for dualtree in (True, False): if dualtree and rtol > 0: continue for breadth_first in (True, False): yield (check_results, kernel, h, atol, rtol, dualtree, breadth_first)
def generate_b_camera_view(self): # This will take the current list of points and generate the frontal view, i.e. what the camera can see. self.b_tree = KDTree(.0381/2) base_x = .90 base_z = .25 # Populates tree with the frontmost blocks for i in range(8): this_x = base_x + self.dx*i if self.b_blocks[i][1] == "none": self.b_tree.insert_unique([this_x, self.y+self.dy, base_z+self.dz],self.b_blocks[i+16][1]) else: self.b_tree.insert_unique([this_x, self.y, base_z+self.dz],self.b_blocks[i][1]) for i in range(8,16): this_x = base_x + self.dx*(i-8) if self.b_blocks[i][1] == "none": self.b_tree.insert_unique([this_x, self.y+self.dy, base_z],self.b_blocks[i+16][1]) else: self.b_tree.insert_unique([this_x, self.y, base_z],self.b_blocks[i][1]) #print self.b_tree.nodes self.publish_points(self.b_tree,self.point_cloud_pub) return self.b_tree
def test_kd_tree_pickle(): import pickle np.random.seed(0) X = np.random.random((10, 3)) kdt1 = KDTree(X, leaf_size=1) ind1, dist1 = kdt1.query(X) def check_pickle_protocol(protocol): s = pickle.dumps(kdt1, protocol=protocol) kdt2 = pickle.loads(s) ind2, dist2 = kdt2.query(X) assert_allclose(ind1, ind2) assert_allclose(dist1, dist2) for protocol in (0, 1, 2): yield check_pickle_protocol, protocol
def test_kd_tree_query_radius(n_samples=100, n_features=10): np.random.seed(0) X = 2 * np.random.random(size=(n_samples, n_features)) - 1 query_pt = np.zeros(n_features, dtype=float) eps = 1E-15 # roundoff error can cause test to fail kdt = KDTree(X, leaf_size=5) rad = np.sqrt(((X - query_pt) ** 2).sum(1)) for r in np.linspace(rad[0], rad[-1], 100): ind = kdt.query_radius(query_pt, r + eps)[0] i = np.where(rad <= r + eps)[0] ind.sort() i.sort() assert_allclose(i, ind)
def test_kd_tree_query_radius(n_samples=100, n_features=10): np.random.seed(0) X = 2 * np.random.random(size=(n_samples, n_features)) - 1 query_pt = np.zeros(n_features, dtype=float) eps = 1E-15 # roundoff error can cause test to fail kdt = KDTree(X, leaf_size=5) rad = np.sqrt(((X - query_pt)**2).sum(1)) for r in np.linspace(rad[0], rad[-1], 100): ind = kdt.query_radius(query_pt, r + eps)[0] i = np.where(rad <= r + eps)[0] ind.sort() i.sort() assert_allclose(i, ind)
def test_kd_tree_query_radius_distance(n_samples=100, n_features=10): np.random.seed(0) X = 2 * np.random.random(size=(n_samples, n_features)) - 1 query_pt = np.zeros(n_features, dtype=float) eps = 1E-15 # roundoff error can cause test to fail kdt = KDTree(X, leaf_size=5) rad = np.sqrt(((X - query_pt) ** 2).sum(1)) for r in np.linspace(rad[0], rad[-1], 100): ind, dist = kdt.query_radius(query_pt, r + eps, return_distance=True) ind = ind[0] dist = dist[0] d = np.sqrt(((query_pt - X[ind]) ** 2).sum(1)) assert_allclose(d, dist)
def test_kd_tree_query_radius_distance(n_samples=100, n_features=10): np.random.seed(0) X = 2 * np.random.random(size=(n_samples, n_features)) - 1 query_pt = np.zeros(n_features, dtype=float) eps = 1E-15 # roundoff error can cause test to fail kdt = KDTree(X, leaf_size=5) rad = np.sqrt(((X - query_pt)**2).sum(1)) for r in np.linspace(rad[0], rad[-1], 100): ind, dist = kdt.query_radius(query_pt, r + eps, return_distance=True) ind = ind[0] dist = dist[0] d = np.sqrt(((query_pt - X[ind])**2).sum(1)) assert_allclose(d, dist)
def __init__(self, *cameras): self.point_pub = rospy.Publisher("/block_test", PointStamped, queue_size=10) self.cameras = cameras # Make kd-tree with a tolerance when trying to add duplicated blocks self.k = KDTree(.02) #m self.intersector = PointIntersector() time.sleep(1) rospy.Subscriber("/camera/block_detection", BlockStamped, self.got_block, queue_size=1)
def test_kd_search(points: List[Point], rectangles: List[Rectangle]) -> List[float]: tree = KDTree(points) def time_individual(rectangle: Rectangle) -> float: min_x, max_x, min_y, max_y = rectangle.to_tuple() start_time = default_timer() tree.search(min_x, max_x, min_y, max_y) end_time = default_timer() return end_time - start_time return list(map(time_individual, rectangles))
def generate_a_blocks(self): self.a_tree = KDTree(.0381/2) base_x = 1.73 base_z = .14 for i in range(self.initial_blocks/2): this_x = base_x + self.dx*i self.a_tree.insert_unique([this_x,self.y,base_z],"blue") self.a_tree.insert_unique([this_x,self.y,base_z+self.dz],"blue") self.publish_points(self.a_tree,self.point_cloud_pub) return self.a_tree
def hospitals(): points = [] c = 0 infile = open('hospitals.txt', 'r') for i in infile: points.append([]) points[c].append((list(map(float, i.rstrip().split(","))))) points[c].append(c) c += 1 return KDTree(points), points
def petrol_bunk(): points = [] c = 0 infile = open('petrol_bunk.txt', 'r') for i in infile: points.append([]) points[c].append((list(map(float, i.rstrip().split(","))))) points[c].append(c) c += 1 return KDTree(points), points
def main(): print("Testing KD Tree...") test_times = 100 run_time_1 = run_time_2 = 0 for _ in range(test_times): # 随机生成数据 low = 0 high = 100 n_rows = 1000 n_cols = 2 X = gen_data(low, high, n_rows, n_cols) y = gen_data(low, high, n_rows) Xi = gen_data(low, high, n_cols) # 创建Kd树 tree = KDTree() tree.build_tree(X, y) # Kd树查找 start = time() nd = tree.nearest_neighbour_search(Xi) run_time_1 += time() - start ret1 = get_eu_dist(Xi, nd.split[0]) # 普通线性查找 start = time() row = exhausted_search(X, Xi) run_time_2 += time() - start ret2 = get_eu_dist(Xi, row) # 比较结果 assert ret1 == ret2, "target:%s\nrestult1:%s\nrestult2:%s\ntree:\n%s" % ( Xi, nd, row, tree) print("%d tests passed!" % test_times) print("KD Tree Search %.2f s" % run_time_1) print("Exhausted search %.2f s" % run_time_2)
class BlockServer(): ''' Not used anymore. BlockServer acts as a server to deal with block detection from the cameras. Given a BlockStamped message, the server will group them into blocks. ''' def __init__(self, *cameras): self.point_pub = rospy.Publisher("/block_test", PointStamped, queue_size=10) self.cameras = cameras # Make kd-tree with a tolerance when trying to add duplicated blocks self.k = KDTree(.02) #m self.intersector = PointIntersector() time.sleep(1) rospy.Subscriber("/camera/block_detection", BlockStamped, self.got_block, queue_size=1) def got_block(self,msg): # Find the camera that this image was taken in and transform points appropriately. camera = [c for c in self.cameras if c.name == msg.header.frame_id][0] map_point = self.intersector.intersect_point(camera, msg.point, offset=msg.offset) self.publish_point(map_point) #try: self.k.insert_unique_average(map_point,[msg.color, msg.rotation_index]) # except: # rospy.logwarn("An ERROR was found and excepted.") print self.k.nodes # print def publish_point(self,point): p = Point(x=point[0], y=point[1], z=point[2]) h = Header(stamp=rospy.Time.now(), frame_id="map") self.point_pub.publish(header=h, point=p)
def test_kd_tree_two_point(n_samples=100, n_features=3): np.random.seed(0) X = np.random.random((n_samples, n_features)) Y = np.random.random((n_samples, n_features)) r = np.linspace(0, 1, 10) kdt = KDTree(X, leaf_size=10) D = DistanceMetric.get_metric("euclidean").pairwise(Y, X) counts_true = [(D <= ri).sum() for ri in r] def check_two_point(r, dualtree): counts = kdt.two_point_correlation(Y, r=r, dualtree=dualtree) assert_allclose(counts, counts_true) for dualtree in (True, False): yield check_two_point, r, dualtree
def test_random(self): count, sigma1, sigma2 = 10000, 0.6, 0.5 np.random.seed(0) x = np.random.normal(3, sigma1, count) y = np.random.normal(3, sigma2, count) point = [3.01, 3.01] for i in range(count): if 2.98 < x[i] < 3.03 and 2.98 < y[i] < 3.03: ax.scatter(x[i], y[i], c='b', marker='s', s=10, alpha=0.7) # ax.scatter(x, y, c='b', marker='s', s=10, alpha=0.7) points = np.c_[x, y] tree = KDTree(points) show_closest(tree, point, 50, 'm') plt.show()
def generate_c_blocks(self): self.c_tree = KDTree(.0381/2) base_x = .19 base_z = .20 colors = ["blue","blue","blue","blue", "red","red","red","red", "green","green","green","green", "yellow","yellow","yellow","yellow"] random.shuffle(colors) for i in range(self.initial_blocks/2): this_x = base_x + self.dx*i self.c_tree.insert_unique([this_x,self.y,base_z],colors[i]) self.c_tree.insert_unique([this_x,self.y,base_z+self.dz],colors[i+self.initial_blocks/2]) self.publish_points(self.c_tree,self.point_cloud_pub) return self.c_tree
class TestKDTreeAnalyze(unittest.TestCase): def setUp(self): points = [(2, 3), (5, 4), (9, 6), (4, 7), (8, 1), (7, 2)] self.tree = KDTree(points) def test_traversal(self): logger.info("PreOrder:") self.tree.traversal(lambda n: logger.info(n), "preorder") logger.info("InOrder:") self.tree.traversal(lambda n: logger.info(n), "inorder") logger.info("PostOrder:") self.tree.traversal(lambda n: logger.info(n), "postorder") def test_closest(self): dist, node, count, nodes = self.tree.closest((5.5, 9.5)) logger.info("dist = {0}, node = {1}, visit_count = {2}".format(dist, node, count)) logger.info("visited nodes = {0}".format(nodes)) self.assertTrue(np.all(node.point == (4, 7)), "actual is {0}".format(node.point)) def test_visualization(self): self.tree.traversal(draw_point, 'preorder') show_closest(self.tree, (5.5, 9.5), 'm') show_kclosest(self.tree, (5.5, 9.5), 3, 'b') plt.show()
class TestKDTree1d(unittest.TestCase): def setUp(self): np.random.seed(0) points = np.random.randint(0, 50, 10) self.tree = KDTree(np.transpose([points])) def test_traversal(self): logger.info("PreOrder:") self.tree.traversal(lambda n: logger.info(n), "preorder") logger.info("InOrder:") self.tree.traversal(lambda n: logger.info(n), "inorder") logger.info("PostOrder:") self.tree.traversal(lambda n: logger.info(n), "postorder") def test_closest(self): dist, node, count, nodes = self.tree.closest([15]) logger.info("dist = {0}, node = {1}, visit_count = {2}".format( dist, node, count)) logger.info("visited nodes = {0}".format(nodes))
def test_randoms(self): count, sigma1, sigma2 = 500, 0.6, 0.5 np.random.seed(0) x1 = np.random.normal(5, sigma1, count) y1 = np.random.normal(5, sigma2, count) x2 = np.random.normal(3, sigma1, count) y2 = np.random.normal(4, sigma2, count) x3 = np.random.normal(4.5, sigma1, count) y3 = np.random.normal(2.5, sigma2, count) point = [np.random.normal(5, 0.6), np.random.normal(5, 0.5)] ax.scatter(x1, y1, c='b', marker='s', s=10, alpha=0.7) ax.scatter(x2, y2, c='r', marker='^', s=10, alpha=0.7) ax.scatter(x3, y3, c='g', s=10, alpha=0.7) points = np.c_[np.r_[x1, x2, x3], np.r_[y1, y2, y3]] tree = KDTree(points) show_closest(tree, point, 'm') plt.show()
class temp_GenerateBlockPoints(): ''' Temp way of generating blocks. Generate point cloud and a block_tree ''' def __init__(self,inital_blocks): self.point_cloud_pub = rospy.Publisher("/camera/block_point_cloud", PointCloud, queue_size=2) self.point_cloud_pub_diag = rospy.Publisher("/camera/diag_block_point_cloud", PointCloud, queue_size=2) self.initial_blocks = inital_blocks self.dx = .0635 self.dy = .0629 # Used for half blocks self.dz = .0381 self.y = 2.153 def generate_c_blocks(self): self.c_tree = KDTree(.0381/2) base_x = .19 base_z = .20 colors = ["blue","blue","blue","blue", "red","red","red","red", "green","green","green","green", "yellow","yellow","yellow","yellow"] random.shuffle(colors) for i in range(self.initial_blocks/2): this_x = base_x + self.dx*i self.c_tree.insert_unique([this_x,self.y,base_z],colors[i]) self.c_tree.insert_unique([this_x,self.y,base_z+self.dz],colors[i+self.initial_blocks/2]) self.publish_points(self.c_tree,self.point_cloud_pub) return self.c_tree def generate_b_blocks(self): ''' Block order (from front view) (first front layer) 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 (second back layer) 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 ''' self.b_tree_diag = KDTree(.0381/2) base_x = .90 base_z = .25 blocks = [["red","red","red","red", "blue","blue","blue","blue", "green","green","green","green", "yellow","yellow","yellow","yellow"], ["red","red","blue","blue", "green","green","yellow","yellow"]] random.shuffle(blocks[1]) random.shuffle(blocks[0]) # At these indicies are where we are going to put the half blocks. half_locations = np.array(random.sample(range(0, 16), 4)) # Populate blocks list with full and half blocks. self.b_blocks = np.full(32,"none",dtype=object) for i in range(16): if i in half_locations: self.b_blocks[i] = blocks[1][0] del blocks[1][0] self.b_blocks[i+16] = blocks[1][0] del blocks[1][0] else: self.b_blocks[i] = blocks[0][0] del blocks[0][0] # Go through each dimension and add it to the tree. This is the diagnostics tree, not what's visible to the camera. for i in range(8): this_x = base_x + self.dx*i self.b_tree_diag.insert_unique([this_x,self.y,base_z+self.dz],self.b_blocks[i]) self.b_blocks[i] = [[this_x,self.y,base_z+self.dz],self.b_blocks[i]] #print ('%7s')%self.b_blocks[i], #print for i in range(8,16): this_x = base_x + self.dx*(i-8) self.b_tree_diag.insert_unique([this_x,self.y,base_z],self.b_blocks[i]) self.b_blocks[i] = [[this_x,self.y,base_z],self.b_blocks[i]] #print ('%7s')%self.b_blocks[i], #print #print for i in range(16,24): this_x = base_x + self.dx*(i-16) self.b_tree_diag.insert_unique([this_x,self.y+self.dy,base_z+self.dz],self.b_blocks[i]) self.b_blocks[i] = [[this_x,self.y+self.dy,base_z+self.dz],self.b_blocks[i]] #print ('%7s')%self.b_blocks[i], #print for i in range(24,32): this_x = base_x + self.dx*(i-24) self.b_tree_diag.insert_unique([this_x,self.y+self.dy,base_z],self.b_blocks[i]) self.b_blocks[i] = [[this_x,self.y+self.dy,base_z],self.b_blocks[i]] #print ('%7s')%self.b_blocks[i], #print self.publish_points(self.b_tree_diag,self.point_cloud_pub_diag) def generate_b_camera_view(self): # This will take the current list of points and generate the frontal view, i.e. what the camera can see. self.b_tree = KDTree(.0381/2) base_x = .90 base_z = .25 # Populates tree with the frontmost blocks for i in range(8): this_x = base_x + self.dx*i if self.b_blocks[i][1] == "none": self.b_tree.insert_unique([this_x, self.y+self.dy, base_z+self.dz],self.b_blocks[i+16][1]) else: self.b_tree.insert_unique([this_x, self.y, base_z+self.dz],self.b_blocks[i][1]) for i in range(8,16): this_x = base_x + self.dx*(i-8) if self.b_blocks[i][1] == "none": self.b_tree.insert_unique([this_x, self.y+self.dy, base_z],self.b_blocks[i+16][1]) else: self.b_tree.insert_unique([this_x, self.y, base_z],self.b_blocks[i][1]) #print self.b_tree.nodes self.publish_points(self.b_tree,self.point_cloud_pub) return self.b_tree def remove_b_blocks(self,indicies): for i in indicies: self.b_blocks[i] = "none" return self.generate_b_camera_view() def generate_a_blocks(self): self.a_tree = KDTree(.0381/2) base_x = 1.73 base_z = .14 for i in range(self.initial_blocks/2): this_x = base_x + self.dx*i self.a_tree.insert_unique([this_x,self.y,base_z],"blue") self.a_tree.insert_unique([this_x,self.y,base_z+self.dz],"blue") self.publish_points(self.a_tree,self.point_cloud_pub) return self.a_tree def publish_points(self, tree, topic): points = [] channels = [[],[],[]] for p in tree.nodes: #print p.linked_object if p.linked_object == "blue": channels[0].append(0) #R channels[1].append(0) #G channels[2].append(1) #B elif p.linked_object == "red": channels[0].append(1) #R channels[1].append(0) #G channels[2].append(0) #B elif p.linked_object == "green": channels[0].append(0) #R channels[1].append(1) #G channels[2].append(0) #B elif p.linked_object == "yellow": channels[0].append(1) #R channels[1].append(1) #G channels[2].append(0) #B elif p.linked_object == "none": channels[0].append(0) #R channels[1].append(0) #G channels[2].append(0) #B points.append(Point32(*p.point)) rgb_channels = [ChannelFloat32(name="r", values=channels[0]),ChannelFloat32(name="g", values=channels[1]),ChannelFloat32(name="b", values=channels[2])] time.sleep(1.5) topic.publish(PointCloud( header=Header( stamp=rospy.Time.now(), frame_id="map" ), points=points, channels=rgb_channels ) )
def setUp(self): self.data = [Vector([1]),Vector([2]),Vector([0])] self.kd_tree = KDTree()
from data import Data from kd_tree import KDTree kd = KDTree(2) d = Data() d.extract('example.dat') print d.data kd.build_tree(d.data.keys()[:7]) print kd print kd.nearest((7,7), kd.root) #print kd
class KNearest: def __init__(self, k_neighbours=5, dense=False, balanced=False): self.k_neighbours = k_neighbours self.dense = dense self.balanced = balanced self.data = None def fit(self, X, y): self.label_set = set(y) data = list() for con, lab in it.izip(X, y): if not self.dense: con = con.toarray() con = con[0] data.append((con, lab)) # Create a KDTree using the data given and store it self.data = KDTree(data, self.k_neighbours, balanced=self.balanced) def predict(self, X_test): predictions = list() if not self.dense: X_test = toArray(X_test) for u in X_test: dists = list() # neighbours = bucket of vectors to compare u with neighbours = self.data.search(u) # Make a list of distances between u and each neighbour for n in neighbours: dists.append((self.__distance(u, n[0]), n[1])) # Sort the list so we can get the k closest neighbours dists = sorted(dists, key=lambda dist: dist[0]) nearest = dists[:self.k_neighbours] # Find Majority predictions.append(self.__findMajority(nearest)) return predictions def __findMajority(self, dists): labels = dict() # Make a dict of occurences of each label in dists for l in self.label_set: labels[l] = 0 for __dist, lab in dists: labels[lab] += 1 # Find max label maxval = 0 maxkey = -1 for key, value in labels.iteritems(): if value > maxval: maxval = value maxkey = key return maxkey # Euclidean distance of 2 vectors def __distance(self, U, V): s = 0 for xu, xv in it.izip(U, V): s += (xu - xv)**2 dist = sqrt(s) return dist
class OPTICS(Verbose): def __init__(self, max_eps=0.5, min_samples=10, metric=euclidean, verbose=True): self.max_eps = max_eps self.min_samples = min_samples self.metric = metric super(OPTICS, self).__init__(verbose) def _get_neighbors(self, point_id): """计算数据点的eps-邻域,同时也得到core-distance :param point_id: :returns: :rtype: """ point = self.kd_tree[point_id] neighbors = self.kd_tree.query_ball_point(point, self.max_eps) ## 把节点本身排除在外 neighbors.pop(0) if len(neighbors) < self.min_samples: core_distance = np.inf else: core_distance = neighbors[self.min_samples - 1][0] return [x[1] for x in neighbors], core_distance def _update(self, order_seeds, neighbors, point_id): """ :returns: :rtype: """ ## 能进到这个函数的core_distance都是满足core_distance<np.inf的 core_distance = self.results[point_id][1] for neighbor in neighbors: ## 如果该邻域点没有处理过,更新reachability_distance ## 注意:如果某个点已经处理过(计算core_distance并作为point_id进入该函数),那么 ## 该点的reachability_distance就不会再被更新了,即采用“先到先得”的模式 if not self.results[neighbor][0]: self.printer("节点{}尚未处理,计算可达距离".format(neighbor)) new_reachability_distance = max( core_distance, self.metric(self.kd_tree[point_id], self.kd_tree[neighbor])) ## 如果新的reachability_distance小于老的,那么进行更新,否则不更新 if new_reachability_distance < self.results[neighbor][2]: self.printer("节点{}的可达距离从{}缩短至{}".format( neighbor, self.results[neighbor][2], new_reachability_distance)) self.results[neighbor][2] = new_reachability_distance ## 对新数据执行插入,对老数据执行decrease_key order_seeds.push([new_reachability_distance, neighbor]) def _expand_cluste_order(self, point_id): """ FIXME briefly describe function :param point_id: :returns: :rtype: """ neighbors, core_distance = self._get_neighbors(point_id) self.printer("节点{}的邻域点数量为{},核心距离为{}".format(point_id, len(neighbors), core_distance)) self.results[point_id][0] = True # 标记为已处理 self.results[point_id][1] = core_distance if (not self.results_order.count(point_id)): self.results_order.append(point_id) # 记录数据点被处理的顺序 if core_distance < np.inf: self.printer("节点{}为核心点,递归处理其邻域".format(point_id)) ## order_seeds是以reachability_distance为key,point_id为handle的优先队列(堆) order_seeds = Heap(verbose=False) data = [[self.results[x][2], x] for x in neighbors] order_seeds.heapify(data) self._update(order_seeds, neighbors, point_id) while not order_seeds.is_empty: _, current_point_id = order_seeds.pop() neighbors, core_distance = self._get_neighbors( current_point_id) self.printer("节点{}的邻域点数量为{},核心距离为{}".format( current_point_id, len(neighbors), core_distance)) self.results[current_point_id][0] = True # 标记为已处理 self.results[current_point_id][1] = core_distance if (not self.results_order.count(current_point_id)): self.results_order.append(current_point_id) if core_distance < np.inf: self.printer("节点{}为核心点,递归处理其邻域".format(point_id)) self._update(order_seeds, neighbors, current_point_id) def fit(self, points): """聚类主函数 聚类过程主要是通过expand_cluste_order函数实现,流程如下: 给定一个起始点pt,计算pt的core_distance和eps-邻域,并更新eps-邻域中数据点的 reachability_distance 然后按reachability_distance从小到大依次处理pt的eps-邻域中未处理的点(流程同上) 遍历整个数据集,对每个未expand的数据点都执行expand,便完成了聚类,结果存储在self.results中 数据点遍历顺序存储在self.results_order中,二者结合便可以导出具体的聚类信息 :param points: [list] 输入数据列表,list中的每个元素都是长度固定的1维np数组 :returns: :rtype: """ """ results[遍历标记,核心距离,可达距离] results_order 存放数据遍历顺序 """ self.point_num = len(points) self.point_size = points[0].size self.results = [[None, np.inf, np.inf] for x in range(self.point_num)] self.results_order = [] ## 数据存储在kd树中以便检索【好像并没有用到检索...】 self.kd_tree = KDTree(self.point_size) self.kd_tree.create(points) for point_id in range(self.point_num): ## 如果当前节点没有处理过,执行expand if not self.results[point_id][0]: self._expand_cluste_order(point_id) return self def extract(self, eps): """从计算结果中抽取出聚类信息 抽取的方式比较简单,就是扫描所有数据点,判断当前点的core_distance 和reachability_distance与给定eps的大小,然后决定点的类别。规则如下: 1. 如果reachability_distance<eps,属于当前类别 2. 如果大于eps,不属于当前类别 2-1. 如果core_distance小于eps,可以自成一类 2-2. 如果core_distance大于eps,认为是噪声点 注意: 数据的扫描顺序同fit函数中的处理顺序是一致的。 :returns: :rtype: """ if eps > self.max_eps: raise ValueError("eps参数不能大于{},当前值为{}".format(self.max_eps, eps)) labels = np.zeros(self.point_num, dtype=np.int64) counter = count() idx = next(counter) for point_id in self.results_order: # for point_id in range(self.point_num): _, core_distance, reachability_distance = self.results[point_id] ## 如果可达距离大于eps,认为要么是core point要么是噪音数据 if reachability_distance > eps: ## 如果core distance小于eps,那么可以成为一个类 if core_distance < eps: idx = next(counter) labels[point_id] = idx ## 否则成为噪声数据 else: labels[point_id] = 0 ## 可达距离小于eps,属于当前类别 ## 这个点的顺序是由fit函数中的主循环函数维持的,注意 else: labels[point_id] = idx return labels
def test_should_find_closest_1(): kdtree = KDTree(dataset_2d) dist, ind = kdtree.query(np.array([9, 2]), n=1) assert ind == [4]
def test_should_find_n_closest_9(): kdtree = KDTree(dataset_3d) dist, ind = kdtree.query(np.array([4, 5, 6]), n=3) assert ind == [2, 3, 1] # ????? падает!
def test_should_find_n_closest_8(): kdtree = KDTree(dataset_2d) dist, ind = kdtree.query(np.array([3, 2]), n=3) assert ind == [0, 1, 5]
def test_should_find_n_closest_7(): kdtree = KDTree(dataset_2d) dist, ind = kdtree.query(np.array([7, 1]), n=3) assert ind == [5, 4, 1]
def test_should_find_closest_6(): kdtree = KDTree(dataset_3d) dist, ind = kdtree.query(np.array([8, 8, 8]), n=1) assert ind == [4]
def test_should_find_closest_5(): kdtree = KDTree(dataset_3d) dist, ind = kdtree.query(np.array([4, 5, 6]), n=1) assert ind == [2]
def test_should_find_closest_4(): kdtree = KDTree(dataset_3d) dist, ind = kdtree.query(np.array([1, 2, 3]), n=1) assert ind == [3]
def main(): dataset_2d = np.array([[2, 3], [5, 4], [9, 6], [4, 7], [8, 1], [7, 2]]) kdtree = KDTree(dataset_2d) dist, ind = kdtree.query(np.array([9, 2]), n=1)
def __create_kdtree(self): self.__KDTree_T = KDTree() for curr_cluster in self.__heap_q: for rep_point in curr_cluster.rep: self.__KDTree_T.insert(rep_point, curr_cluster)
def build_tree(pixels): kdt = KDTree(3) for pix in pixels: kdt.insert(pix) return kdt
def generate_b_blocks(self): ''' Block order (from front view) (first front layer) 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 (second back layer) 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 ''' self.b_tree_diag = KDTree(.0381/2) base_x = .90 base_z = .25 blocks = [["red","red","red","red", "blue","blue","blue","blue", "green","green","green","green", "yellow","yellow","yellow","yellow"], ["red","red","blue","blue", "green","green","yellow","yellow"]] random.shuffle(blocks[1]) random.shuffle(blocks[0]) # At these indicies are where we are going to put the half blocks. half_locations = np.array(random.sample(range(0, 16), 4)) # Populate blocks list with full and half blocks. self.b_blocks = np.full(32,"none",dtype=object) for i in range(16): if i in half_locations: self.b_blocks[i] = blocks[1][0] del blocks[1][0] self.b_blocks[i+16] = blocks[1][0] del blocks[1][0] else: self.b_blocks[i] = blocks[0][0] del blocks[0][0] # Go through each dimension and add it to the tree. This is the diagnostics tree, not what's visible to the camera. for i in range(8): this_x = base_x + self.dx*i self.b_tree_diag.insert_unique([this_x,self.y,base_z+self.dz],self.b_blocks[i]) self.b_blocks[i] = [[this_x,self.y,base_z+self.dz],self.b_blocks[i]] #print ('%7s')%self.b_blocks[i], #print for i in range(8,16): this_x = base_x + self.dx*(i-8) self.b_tree_diag.insert_unique([this_x,self.y,base_z],self.b_blocks[i]) self.b_blocks[i] = [[this_x,self.y,base_z],self.b_blocks[i]] #print ('%7s')%self.b_blocks[i], #print #print for i in range(16,24): this_x = base_x + self.dx*(i-16) self.b_tree_diag.insert_unique([this_x,self.y+self.dy,base_z+self.dz],self.b_blocks[i]) self.b_blocks[i] = [[this_x,self.y+self.dy,base_z+self.dz],self.b_blocks[i]] #print ('%7s')%self.b_blocks[i], #print for i in range(24,32): this_x = base_x + self.dx*(i-24) self.b_tree_diag.insert_unique([this_x,self.y+self.dy,base_z],self.b_blocks[i]) self.b_blocks[i] = [[this_x,self.y+self.dy,base_z],self.b_blocks[i]] #print ('%7s')%self.b_blocks[i], #print self.publish_points(self.b_tree_diag,self.point_cloud_pub_diag)
class Cure(Clustering): def __init__(self, data, number_of_clusters, alpha, c, sample_size=None): self.__data = data.tolist() if isinstance(data, np.ndarray) else data self.__k = number_of_clusters self.__alpha = alpha self.__c = c # Representative points self.__sampled_data = None self.__sampling_reservoir = reservoir_sampling(sample_size) if sample_size is not None else None if self.__sampling_reservoir is not None: self.__sample_data() self.__dimension = len(data[0]) if len(data) > 0 else 0 self.__clusters = None self.__representors = None self.__centers = None self.__validate_arguments() def clustering(self): # Stores representatives for each cluster self.__create_heap() self.__create_kdtree() while len(self.__heap_q) > self.__k: cluster_u = self.__heap_q[0] cluster_v = cluster_u.closest self.__heap_q.remove(cluster_u) self.__heap_q.remove(cluster_v) self.__delete_rep(cluster_u) self.__delete_rep(cluster_v) cluster_w = self.__merge_clusters(cluster_u, cluster_v) self.__insert_rep(cluster_w) if len(self.__heap_q) > 0: cluster_w.closest = self.__heap_q[0] # arbitrary cluster from heap cluster_w.distance_closest = cluster_w.distance(cluster_w.closest) for curr_cluster in self.__heap_q: distance = cluster_w.distance(curr_cluster) if distance < cluster_w.distance_closest: cluster_w.closest = curr_cluster cluster_w.distance_closest = distance if curr_cluster.closest is cluster_u or curr_cluster.closest is cluster_v: if curr_cluster.distance_closest < distance: curr_cluster.closest, curr_cluster.distance_closest = self.__closest_cluster(curr_cluster, distance) if curr_cluster.closest is None: curr_cluster.closest = cluster_w curr_cluster.distance = distance else: curr_cluster.closest = cluster_w curr_cluster.distance_closest = distance elif curr_cluster.distance_closest > distance: curr_cluster.closest = cluster_w curr_cluster.distance_closest = distance self.__heap_q.append(cluster_w) self.__heap_q.sort(key=lambda x: x.distance_closest, reverse=False) self.__clusters = [cure_cluster for cure_cluster in self.__heap_q] self.__representors = [cure_cluster.rep for cure_cluster in self.__heap_q] self.__centers = [cure_cluster.center for cure_cluster in self.__heap_q] def __closest_cluster(self, x, dist): closest_distance = dist closest_cluster = None euclidean_dist = dist ** 0.5 for point in x.rep: closest_nodes = self.__KDTree_T.find_closest_nodes(point, euclidean_dist) for candidate_distance, kdtree_node in closest_nodes: if candidate_distance < closest_distance and kdtree_node is not None \ and kdtree_node.payload is not x: closest_distance = candidate_distance closest_cluster = kdtree_node.payload return closest_cluster, closest_distance def __merge_clusters(self, cluster_u, cluster_v): # merge clusters cluster_w = CureCluster(None, None) cluster_w.points = cluster_u.points + cluster_v.points cluster_w.indexes = cluster_u.indexes + cluster_v.indexes # mean of merged cluster cluster_w.center = [0] * self.__dimension if cluster_w.points[1:] == cluster_w.points[:-1]: cluster_w.center = cluster_w.points[0] else: for index in range(self.__dimension): cluster_w.center[index] = (len(cluster_u.points) * cluster_u.center[index] + len(cluster_v.points) * cluster_v.center[index]) / (len(cluster_u.points) + len(cluster_v.points)) temp_set = [] for index in range(self.__c): max_distance = 0 max_point = None for point in cluster_w.points: if index == 0: min_distance = squared_euclidean_distance(point, cluster_w.center) else: min_distance = min([squared_euclidean_distance(point, p) for p in temp_set]) if min_distance >= max_distance: max_distance = min_distance max_point = point if max_point not in temp_set: temp_set.append(max_point) cluster_w.rep = [[val + self.__alpha * (cluster_w.center[idx] - val) for idx, val in enumerate(point)] for point in temp_set] return cluster_w def __insert_rep(self, cluster): for p in cluster.rep: self.__KDTree_T.insert(p, cluster) def __delete_rep(self, cluster): for p in cluster.rep: self.__KDTree_T.remove(p, payload=cluster) def __create_heap(self): # Initializes each point as a Cluster object self.__heap_q = [CureCluster(point, index) for index, point in enumerate(self.__sampled_data if self.__sampled_data is not None else self.__data)] for curr_cluster in self.__heap_q: curr_cluster.closest = min([k for k in self.__heap_q if curr_cluster != k], key=lambda k: curr_cluster.distance(k)) curr_cluster.distance_closest = curr_cluster.distance(curr_cluster.closest) self.__heap_q.sort(key=lambda x: x.distance_closest, reverse=False) def __create_kdtree(self): self.__KDTree_T = KDTree() for curr_cluster in self.__heap_q: for rep_point in curr_cluster.rep: self.__KDTree_T.insert(rep_point, curr_cluster) def __sample_data(self): next(self.__sampling_reservoir) samples = [] for idx, sample in enumerate(self.__data): samples = self.__sampling_reservoir.send(idx) samples.sort() self.__sampled_data = [] for sample in samples: self.__sampled_data.append(self.__data[sample]) def __validate_arguments(self): if len(self.__data) == 0: raise ValueError("Empty input data. Data should contain at least one point.") if self.__k <= 0: raise ValueError( "Incorrect amount of clusters '{:d}'. Amount of cluster should be greater than 0.".format(self.__k)) elif not type(self.__k) == int: raise ValueError( "Incorret type for amount of clusters '{:d}'. Amount of cluster should be an integer.".format(self.__k)) if self.__alpha < 0: raise ValueError( "Incorrect compression (k) level '{:f}'. Compression should not be negative.".format(self.__alpha)) if self.__c <= 0: raise ValueError( "Incorrect amount of representatives '{:d}'. Amount of representatives should be greater than 0.".format (self.__c)) def get_clusters(self): return self.__clusters def get_indexes(self): return [cluster.indexes for cluster in self.__clusters] def get_representors(self): return self.__representors def get_centers(self): return self.__centers