def kdtree_knn_search(root: Node, db: np.ndarray, result_set: KNNResultSet, query: np.ndarray): #搜索query点的邻近点 if root is None: return False #1、节点不存在 if root.is_leaf(): # compare the contents of a leaf leaf_points = db[root.point_indices, :] #获取叶子节点的所有点 diff = np.linalg.norm(np.expand_dims(query, 0) - leaf_points, axis=1) for i in range(diff.shape[0]): result_set.add_point(diff[i], root.point_indices[i]) return False #2、叶子节点 # 作业2 # 提示:仍通过递归的方式实现搜索 # 屏蔽开始 if root.value >= query[root.axis]: if kdtree_knn_search(root.left, db, result_set, query): return True elif math.fabs(root.value - query[root.axis]) < result_set.worst_dist: return kdtree_knn_search(root.right, db, result_set, query) return False #3、左右子树都不满足,就需要返回上一层树,所以为False else: if kdtree_knn_search(root.right, db, result_set, query): return True elif math.fabs(root.value - query[root.axis]) < result_set.worst_dist: return kdtree_knn_search(root.left, db, result_set, query) return False #3、左右子树都不满足,就需要返回上一层树,所以为False # 屏蔽结束 return False
def kdtree_knn_search(root: Node, db: np.ndarray, result_set: KNNResultSet, query: np.ndarray): if root is None: return False if root.is_leaf(): # compare the contents of a leaf leaf_points = db[root.point_indices, :] diff = np.linalg.norm(np.expand_dims(query, 0) - leaf_points, axis=1) for i in range(diff.shape[0]): result_set.add_point(diff[i], root.point_indices[i]) return False # 作业2 # 提示:仍通过递归的方式实现搜索 # 屏蔽开始 # search in the left or right by axis, and search in the circle, whose radius is fixed if query[root.axis] <= root.value: kdtree_knn_search(root.left, db, result_set, query) if math.fabs(query[root.axis] - root.value) < result_set.worstDist(): kdtree_knn_search(root.right, db, result_set, query) else: kdtree_knn_search(root.right, db, result_set, query) if math.fabs(query[root.axis] - root.value) < result_set.worstDist(): kdtree_knn_search(root.left, db, result_set, query) # 屏蔽结束 return False
def kdtree_knn_search(root: Node, db: np.ndarray, result_set: KNNResultSet, query: np.ndarray): if root is None: return False if root.is_leaf(): # compare the contents of a leaf leaf_points = db[root.point_indices, :] diff = np.linalg.norm(np.expand_dims(query, 0) - leaf_points, axis=1) for i in range(diff.shape[0]): result_set.add_point(diff[i], root.point_indices[i]) return False if query[root.axis] <= root.value: kdtree_knn_search(root.left, db, result_set, query) if math.fabs(query[root.axis] - root.value) < result_set.get_worst_dist(): kdtree_knn_search(root.right, db, result_set, query) else: kdtree_knn_search(root.right, db, result_set, query) if math.fabs(query[root.axis] - root.value) < result_set.get_worst_dist(): kdtree_knn_search(root.left, db, result_set, query) return False
def octree_knn_search(root: Octant, db: np.ndarray, result_set: KNNResultSet, query: np.ndarray): if root is None: return False if root.is_leaf and len(root.point_indices) > 0: leaf_points = db[root.point_indices, :] diff = np.linalg.norm(np.expand_dims(query, 0) - leaf_points, axis=1) for i in range(diff.shape[0]): result_set.add_point(diff[i], root.point_indices[i]) # check whether we can stop search now return inside(query, result_set.worstDist(), root) morton_code = 0 if query[0] > root.center[0]: morton_code = morton_code | 1 if query[1] > root.center[1]: morton_code = morton_code | 2 if query[2] > root.center[2]: morton_code = morton_code | 4 if octree_knn_search(root.children[morton_code], db, result_set, query): return True for c, child in enumerate(root.children): if c == morton_code or child is None: continue if False == overlaps(query, result_set.worstDist(), child): continue if octree_knn_search(child, db, result_set, query): return True return inside(query, result_set.worstDist(), root)
def knn_search(root: Node, result_set: KNNResultSet, key): if root is None: return False # compare the root itself 第一步 query的点和root计算worst result_set.add_point(math.fabs(root.key - key), root.value) if result_set.worstDist( ) == 0: # A special case - if the worst distance is 0, no need to search anymore return True if root.key >= key: # query point < root.key, search left # iterate left branch first if knn_search(root.left, result_set, key): # if key!= query, need to go through one subtree. return True # ( knn_search root 里面root是none返回false, key == query时候 worstDist=0,返回true,不然就一直迭代) elif math.fabs(root.key - key) < result_set.worstDist( ): ## May not need to search for the other subtree, depends on worst distance. return knn_search(root.right, result_set, key) return False else: # iterate right branch first if knn_search(root.right, result_set, key): return True elif math.fabs(root.key - key) < result_set.worstDist(): return knn_search(root.left, result_set, key) return False
def kdtree_knn_search(root: Node, db: np.ndarray, result_set: KNNResultSet, query: np.ndarray): if root is None: return False if root.is_leaf(): # 是叶子 不会被分割 ,直接丢到结果集里面 # compare the contents of a leaf leaf_points = db[root.point_indices, :] diff = np.linalg.norm(np.expand_dims(query, 0) - leaf_points, axis=1) for i in range(diff.shape[0]): result_set.add_point(diff[i], root.point_indices[i]) return False if query[root.axis] <= root.value: ## axis维度上的查询点 在根节点的左边 kdtree_knn_search(root.left, db, result_set, query) # q[axis] inside the partition if math.fabs(query[root.axis] - root.value) < result_set.worstDist(): kdtree_knn_search(root.right, db, result_set, query) # |q[axis] - splitting_value| < w else: kdtree_knn_search(root.right, db, result_set, query) if math.fabs(query[root.axis] - root.value) < result_set.worstDist(): kdtree_knn_search(root.left, db, result_set, query) return False
def kdtree_knn_search(root: Node, db: np.ndarray, result_set: KNNResultSet, query: np.ndarray): if root is None: return False if root.is_leaf(): # compare the contents of a leaf leaf_points = db[root.point_indices, :] diff = np.linalg.norm(np.expand_dims(query, 0) - leaf_points, axis=1) for i in range(diff.shape[0]): result_set.add_point(diff[i], root.point_indices[i]) return False # 作业2 # 提示:仍通过递归的方式实现搜索 # 屏蔽开始 root.value = (middle_left_point_value + middle_right_point_value) * 0.5 root.left = kdtree_recursive_build(root.left, db, point_indices_sorted[0:middle_right_idx], axis_round_robin(axis, dim=db.shape[1]) leaf_size) # bi-divide, and get the right-son tree root.right = kdtree_recursive_build(root.right, db, point_indices_sorted[middle_right_idx:]), axis_round_robin(axis, dim=db.shape[1]), leaf_size)
def knn_search(root:Node,result_set:KNNResultSet,key): if root is None: return False # compare the root itself #计算worst_dist ,并把当前root.value(index二叉树)里的值加入到resut_set 中 result_set.add_point(math.fabs(root.key - key),root.value) # A special case – if the worst distance is 0, no need to search anymore if result_set.worstDist() == 0: return True # iterate left branch first if root.key >= key: # If key != query, need to go through one subtree if knn_search(root.left, result_set, key): return True # May not need to search for the other subtree, depends on worst distance elif math.fabs(root.key-key) < result_set.worstDist(): return knn_search(root.right, result_set, key) return False else: # iterate right branch first if knn_search(root.right, result_set, key): return True elif math.fabs(root.key-key) < result_set.worstDist(): return knn_search(root.left, result_set, key) return False
def kdtree_knn_search(root: Node, db: np.ndarray, result_set: KNNResultSet, query: np.ndarray): if root is None: return False if root.is_leaf(): # compare the contents of a leaf, put into the result set leaf_points = db[root.point_indices, :] diff = np.linalg.norm(np.expand_dims(query, 0) - leaf_points, axis=1) for i in range(diff.shape[0]): result_set.add_point(diff[i], root.point_indices[i]) return False # 作业2 # 提示:仍通过递归的方式实现搜索 # 屏蔽开始 if query[root.axis] < root.value: # query[axis] inside the partition kdtree_knn_search(root.left, db, result_set, query) if math.fabs(query[root.axis] - root.value) < result_set.worstDist(): # |query[axis]-splitting_value| < w kdtree_knn_search(root.right, db, result_set, query) else: kdtree_knn_search(root.right, db, result_set, query) if math.fabs(query[root.axis] - root.value) < result_set.worstDist(): kdtree_knn_search(root.left, db, result_set, query) # 屏蔽结束 return False
def main(): # configuration leaf_size = 32 min_extent = 0.0001 k = 8 radius = 1 N = 100000 # 对N个点做搜索 # read data filename = "../000000.bin" db_np = read_velodyne_bin(filename) root = kd_tree_construction(db_np, leaf_size) depth = [0] max_depth = [0] traverse_kdtree(root, depth, max_depth) knn_result_set = KNNResultSet(k) query = db_np[0, :] kd_tree_knn_search(root, db_np, knn_result_set, query) print(knn_result_set) radius_result_set = RadiusNNResultSet(radius) kd_tree_radius_search(root, db_np, radius_result_set, query) print(radius_result_set)
def main(): # configuration db_size = 64 dim = 3 leaf_size = 4 k = 1 db_np = np.random.rand(db_size, dim) root = kdtree_construction(db_np, leaf_size=leaf_size) depth = [0] max_depth = [0] traverse_kdtree(root, depth, max_depth) print("tree max depth: %d" % max_depth[0]) query = np.asarray([0, 0, 0]) result_set = KNNResultSet(capacity=k) kdtree_knn_search(root, db_np, result_set, query) print(result_set) diff = np.linalg.norm(np.expand_dims(query, 0) - db_np, axis=1) nn_idx = np.argsort(diff) nn_dist = diff[nn_idx] print(nn_idx[0:k]) print(nn_dist[0:k]) print("Radius search:") query = np.asarray([0, 0, 0]) result_set = RadiusNNResultSet(radius=0.5) kdtree_radius_search(root, db_np, result_set, query) print(result_set)
def main(): # configuration db_size = 100 k = 5 radius = 2.0 data = np.random.permutation(db_size).tolist() root = None for i, point in enumerate(data): root = insert(root, point, i) # values = i, 数据中点的索引,对后面的NN搜索有用。 print("data = ", data) query_key = 6 result_set = KNNResultSet(capacity=k) ## k等于5 查找与 query_key最近的k个数据 knn_search(root, result_set, query_key) print('kNN Search:') print('index - distance') print(result_set) result_set = RadiusNNResultSet(radius=radius) radius_search(root, result_set, query_key) print('Radius NN Search:') print('index - distance') print(result_set)
def main(): # 生成模拟数据 db_size = 64 dim = 3 leaf_size = 4 k = 8 db_np = np.random.rand(db_size, dim) root = kdtree_construction(db_np, leaf_size=leaf_size) # 测试Kd-Tree遍历 depth = [0] max_depth = [0] traverse_kdtree(root, depth, max_depth) print("Tree max depth: %d" % max_depth[0]) # 测试KNN search query = np.asarray([0, 0, 0]) result_set = KNNResultSet(capacity=k) kdtree_knn_search(root, db_np, result_set, query) print(result_set) # 测试brute-force法 diff = np.linalg.norm(np.expand_dims(query, 0) - db_np, axis=1) nn_idx = np.argsort(diff) nn_dist = diff[nn_idx] print(nn_idx[0:k]) print(nn_dist[0:k]) # 测试Radius search result_set = RadiusNNResultSet(radius=0.5) kdtree_radius_search(root, db_np, result_set, query) print(result_set)
def main(): # configuration db_size = 100 k = 5 radius = 2.0 #data = np.random.permutation(db_size).tolist() data = [3, 4, 7, 0, 6, 5, 1, 2] root = None for i, point in enumerate(data): root = insert(root, point, i) query_key = 6 result_set = KNNResultSet(capacity=k) knn_search(root, result_set, query_key) print('kNN Search:') print('index - distance') print(result_set) result_set = RadiusNNResultSet(radius=radius) radius_search(root, result_set, query_key) print('Radius NN Search:') print('index - distance') print(result_set)
def kdtree_contribute_Matrix(S, K): N = len(S) A = np.zeros((N, N)) leaf_size = 4 root = kdtree.kdtree_construction(S, leaf_size=leaf_size) for i in range(N): query = S[i] result_set = KNNResultSet(capacity=K) kdtree.kdtree_knn_search(root, S, result_set, query) index = result_set.knn_output_index() for j in index: A[i][j] = 1 # A[j][i] = A[i][j] if i == j: A[i][j] = 0 return A
def main(): # configuration N = 64000 D = 3 leaf_size = 4 min_extent = 0.05 k = 8 r = 0.372 # generate point cloud: point_cloud = np.random.rand(N, D) octree = OCTree(point_cloud=point_cloud, leaf_size=leaf_size, min_extent=min_extent) # octree.traverse() # random query test: for _ in range(100): # generate query point: query = np.random.rand(D) # 01 -- knn: brute-force as baseline: dists = np.linalg.norm(point_cloud - query, axis=1) sorting_idx = np.argsort(dists) brute_force_result = {i for i in sorting_idx[:k]} knn_result_set = KNNResultSet(capacity=k) octree.knn_search(query, knn_result_set) knn_result = {i.index for i in knn_result_set.dist_index_list} assert len(brute_force_result - knn_result) == 0 # 02 -- rnn: brute-force as baseline: dists = np.linalg.norm(point_cloud - query, axis=1) brute_force_result = {i for i, d in enumerate(dists) if d <= r} rnn_result_set = RadiusNNResultSet(radius=r) octree.rnn_search(query, rnn_result_set) rnn_result = {i.index for i in rnn_result_set.dist_index_list} assert len(brute_force_result - rnn_result) == 0 print('[OCTree kNN & RNN Random Query Test]: Successful') begin_t = time.time() print("[OCTree]: RNN search normal:") for i in range(100): query = np.random.rand(3) rnn_result_set = RadiusNNResultSet(radius=0.5) octree.rnn_search(query, rnn_result_set) # print(result_set) print("\tSearch takes %.3fms\n" % ((time.time() - begin_t) * 1000)) begin_t = time.time() print("[OCTree]: RNN search fast:") for i in range(100): query = np.random.rand(3) rnn_result_set = RadiusNNResultSet(radius=0.5) octree.rnn_fast_search(query, rnn_result_set) # print(result_set) print("\tSearch takes %.3fms\n" % ((time.time() - begin_t) * 1000))
def octree_knn_search(root: Octant, db: np.ndarray, result_set: KNNResultSet, query: np.ndarray): if root is None: return False if root.is_leaf and len(root.point_indices) > 0: # compare the contents of a leaf leaf_points = db[root.point_indices, :] diff = np.linalg.norm(np.expand_dims(query, 0) - leaf_points, axis=1) for i in range(diff.shape[0]): result_set.add_point(diff[i], root.point_indices[i]) # check whether we can stop search now return inside(query, result_set.worstDist(), root) # 提前结束:核心-八叉树对3个维度有限制; # 结束条件:其实是两个方向:向下递归,向上回溯 # 1-当一个节点返回True时,表示找到knn了,维度限制导致不会有更优的了,不再向下,立即结束; # 2-最坏距离球如果 inside 当前节点,不需要再向上回溯检查其他节点,立即结束; # 跳过条件:在检查 # 1. search the first relevant child: # 找到最近的孩子,根据查询点的莫顿码 morton_code = 0 if query[0] > root.center[0]: morton_code = morton_code | 1 if query[1] > root.center[1]: morton_code = morton_code | 2 if query[2] > root.center[2]: morton_code = morton_code | 4 if octree_knn_search(root.children[morton_code], db, result_set, query): return True # 2. check other children for c, child in enumerate(root.children): if c == morton_code or child is None: continue if False == overlaps(query, result_set.worstDist(), child): continue if octree_knn_search(child, db, result_set, query): return True # final check of if we can stop search return inside(query, result_set.worstDist(), root)
def octree_knn_search(root: Octant, db: np.ndarray, result_set: KNNResultSet, query: np.ndarray): if root is None: return False if root.is_leaf and len(root.point_indices) > 0: # compare the contents of a leaf leaf_points = db[root.point_indices, :] diff = np.linalg.norm(np.expand_dims(query, 0) - leaf_points, axis=1) for i in range(diff.shape[0]): result_set.add_point(diff[i], root.point_indices[i]) # check whether we can stop search now #判断需要查询的点query,和半径为result_set.worstDist()构成的球,是否在octant内,该octant为叶子节点。 #如果在内则不需要去查询其他的Octant return inside(query, result_set.worstDist(), root) # 作业7 # 屏蔽开始 #如果不是叶子节点,先找到查询点属于哪个子Octant children_idx = 0 if query[0] > root.center[0]: # x轴 children_idx = children_idx | 1 if query[1] > root.center[1]: # y轴 children_idx = children_idx | 2 if query[2] > root.center[2]: # z轴 children_idx = children_idx | 4 #如果在这个子octant中发现,查询点在该子octant中,同时由worst_dist构成的球也被octant包含,所以直接返回 if octree_knn_search(root.children[children_idx], db, result_set, query): return True #如果不满足上边的情况则需要遍历其他子octant for c, child in enumerate(root.children): if c == children_idx or child == None: continue if overlaps(query, result_set.worstDist(), child) == False: continue if octree_knn_search(root.children[c], db, result_set, query): return True # 屏蔽结束 # final check of if we can stop search return inside(query, result_set.worstDist(), root)
def octree_knn_search(root: Octant, db: np.ndarray, result_set: KNNResultSet, query: np.ndarray): if root is None: return False if root.is_leaf and len(root.point_indices) > 0: # compare the contents of a leaf leaf_points = db[root.point_indices, :] diff = np.linalg.norm(np.expand_dims(query, 0) - leaf_points, axis=1) for i in range(diff.shape[0]): result_set.add_point(diff[i], root.point_indices[i]) # check whether we can stop search now return inside(query, result_set.worstDist(), root) # 作业7 # 屏蔽开始 child_idx = 0 # 判断所查询点在八叉树的位置 if query[0] > root.center[0]: child_idx = child_idx | 1 if query[1] > root.center[1]: child_idx = child_idx | 2 if query[2] > root.center[2]: child_idx = child_idx | 4 # 递归判断是否在该区域就可以找到足够多的点 if octree_knn_search(root.children[child_idx], db, result_set, query): return True # 没有在查询点区域找到,对其他区域进行搜索 for c, child in enumerate(root.children): if c == child_idx or child is None: # 搜索区域没有点或所搜索到查询点区域,skip continue if False == overlaps(query, result_set.worstDist(), child): # 搜索区域与查询点和最坏距离构成的球面没有交点,skip continue if octree_knn_search(child, db, result_set, query): # 其他情况可以进入搜索区域搜索(递归) return True # 屏蔽结束 # final check of if we can stop search return inside(query, result_set.worstDist(), root)
def octree_knn_search(root: Octant, db: np.ndarray, result_set: KNNResultSet, query: np.ndarray): if root is None: return False if root.is_leaf and len(root.point_indices) > 0: # compare the contents of a leaf leaf_points = db[root.point_indices, :] diff = np.linalg.norm(np.expand_dims(query, 0) - leaf_points, axis=1) for i in range(diff.shape[0]): result_set.add_point(diff[i], root.point_indices[i]) # check whether we can stop search now return inside(query, result_set.worstDist(), root) # 作业7 # 屏蔽开始 # Determine & search the most relevant child morton_code = 0 if query[0] > root.center[0]: morton_code = morton_code | 1 if query[1] > root.center[1]: morton_code = morton_code | 2 if query[2] > root.center[2]: morton_code = morton_code | 4 if octree_knn_search(root.children[morton_code], db, result_set, query): return True # check other children for c, child in enumerate(root.children): if c == morton_code or child is None: continue # if an octant is not overlapping with query ball, skip if overlaps(query, result_set.worstDist(), child) == False: continue if octree_knn_search(child, db, result_set, query): return True # 屏蔽结束 # final check of if we can stop search return inside(query, result_set.worstDist(), root) # if query ball is inside an octant, stop
def kdtree_knn_search(root: Node, db: np.ndarray, result_set: KNNResultSet, query: np.ndarray): if root is None: return False if root.is_leaf(): # compare the contents of a leaf leaf_points = db[root.point_indices, :] diff = np.linalg.norm(np.expand_dims(query, 0) - leaf_points, axis=1) for i in range(diff.shape[0]): result_set.add_point(diff[i], root.point_indices[i]) return False # 作业2 # 提示:仍通过递归的方式实现搜索 # 屏蔽开始 # 屏蔽结束 return False
def octree_knn_search(root: Octant, db: np.ndarray, result_set: KNNResultSet, query: np.ndarray): if root is None: return False if root.is_leaf and len(root.point_indices) > 0: # compare the contents of a leaf leaf_points = db[root.point_indices, :] diff = np.linalg.norm(np.expand_dims(query, 0) - leaf_points, axis=1) for i in range(diff.shape[0]): result_set.add_point(diff[i], root.point_indices[i]) # check whether we can stop search now return inside(query, result_set.worstDist(), root) # 作业7 # 屏蔽开始 # 屏蔽结束 # final check of if we can stop search return inside(query, result_set.worstDist(), root)
def KDTreeBenchmark(root_dir, files, k, leaf_size, radius, feature=None, feature2=None): construction_time_sum = 0 knn_time_sum = 0 radius_time_sum = 0 brute_time_sum = 0 iteration_num = 0 for file in files: if file.find('bin') == -1: continue iteration_num += 1 filename = os.path.join(root_dir, file) db_np = read_velodyne_bin(filename) begin_t = time.time() root = kdtree.kdtree_construction(db_np, leaf_size, feature, feature2) construction_time_sum += time.time() - begin_t query = db_np[0, :] begin_t = time.time() result_set = KNNResultSet(capacity=k) kdtree.kdtree_knn_search(root, db_np, result_set, query) print("result set from KD Tree\n", result_set) knn_time_sum += time.time() - begin_t # print("--------") begin_t = time.time() result_set = RadiusNNResultSet(radius=radius) kdtree.kdtree_radius_search(root, db_np, result_set, query) #print(result_set) radius_time_sum += time.time() - begin_t #print("--------") begin_t = time.time() diff = np.linalg.norm(np.expand_dims(query, 0) - db_np, axis=1) nn_idx = np.argsort(diff) nn_dist = diff[nn_idx] #print(nn_idx[0:k]) #print(nn_dist[0:k]) brute_time_sum += time.time() - begin_t depth = [0] max_depth = [0] kdtree.traverse_kdtree(root, depth, max_depth) print("tree depth: %d, max depth: %d" % (depth[0], max_depth[0])) print("Kdtree: build %.3f, knn %.3f, radius %.3f, brute %.3f" % (construction_time_sum * 1000 / iteration_num, knn_time_sum * 1000 / iteration_num, radius_time_sum * 1000 / iteration_num, brute_time_sum * 1000 / iteration_num))
def correctness_base(): # points = read_velodyne_bin("000000.bin")[0:100000, :] points = read_test_file("../test.txt") point_indices = np.array(range(points.shape[0])) print(points.shape) K = 8 query_point = points[1000] leaf_size = 32 radius = 0.5 print("******** KNN search (build with median) ***********") start = time.time() root = kdtree_construction(points, leaf_size) build = time.time() knn_result_set = KNNResultSet(K) kdtree_knn_search(root, points, knn_result_set, query_point) end = time.time() print("KDTree build takes {}ms".format(1000 * (build - start))) print("KDTree KNN search takes {}ms".format(1000 * (end - build))) print("Comparision times = {}".format(knn_result_set.comparision_count)) knn_result_set.list() print("")
def fit(self, data): #TODO #step1 随机选取 K个数据点 作为聚类的中心 self.centers_ = data[random.sample(range(data.shape[0]), self.k_)] #random.sample(list,num) old_centers = np.copy(self.centers_) #存储old_centers #step2 E-Step(expectation):N个点、K个中心,求N个点到K个中心的nearest-neighbor #kd-tree config leaf_size = 1 k = 1 # 结果每个点选取属于自己的类中心 for _ in range(self.max_iter_): labels = [[] for i in range(self.k_)] #用于分类所有数据点 root = kdtree.kdtree_construction( self.centers_, leaf_size=leaf_size) #对中心点进行构建kd-tree for i in range(data.shape[0]): #对每一个点在4个中心点中进行 1-NN的搜索 result_set = KNNResultSet(capacity=k) query = data[i] kdtree.kdtree_knn_search(root, self.centers_, result_set, query) #返回对应中心点的索引 # labels[result_set.output_index].append(data[i]) #print(result_set) output_index = result_set.knn_output_index()[0] #获取最邻近点的索引 labels[output_index].append(data[i]) #将点放入类中 #step3 M-Step(maximization):更新中心点的位置,把属于同一个类的数据点求一个均值,作为这个类的中心值 for i in range(self.k_): #求K类里,每个类的的中心点 points = np.array(labels[i]) self.centers_[i] = points.mean(axis=0) #取点的均值,作为新的聚类中心 # print(points) # print(self.centers_[i]) if np.sum( np.abs(self.centers_ - old_centers) ) < self.tolerance_ * self.k_: # 如果前后聚类中心的距离相差小于self.tolerance_ * self.k_ 输出 break old_centers = np.copy(self.centers_) #保存旧中心点 self.fitted = True Point_Show(self.centers_)
def main(): # configuration db_size = 1280 dim = 3 leaf_size = 4 k = 1 db_np = np.random.rand(db_size, dim)#生成size×dim的array,每个数都是0-1之间的随机数 radius_time_sum = 0 brute_time_sum = 0 root = kdtree_construction(db_np, leaf_size) # root = kdtree_construction_median(db_np, leaf_size) # depth = [0] # max_depth = [0] # traverse_kdtree(root, depth, max_depth) # print("tree max depth: %d" % max_depth[0]) query = np.asarray([0, 0, 0]) result_set = KNNResultSet(capacity=k) kdtree_knn_search(root, db_np, result_set, query) # print(result_set) # diff = np.linalg.norm(np.expand_dims(query, 0) - db_np, axis=1) nn_idx = np.argsort(diff) nn_dist = diff[nn_idx] # print(nn_idx[0:k]) # print(nn_dist[0:k]) print("Radius search:") query = np.asarray([7, 3, 4]) begin_t = time.time() result_set = RadiusNNResultSet(radius = 2) kdtree_radius_search(root, db_np, result_set, query) radius_time_sum = time.time() - begin_t # print(result_set) begin_t = time.time() diff = np.linalg.norm(np.expand_dims(query, 0) - db_np, axis=1) nn_idx = np.argsort(diff) nn_dist = diff[nn_idx] brute_time_sum = time.time() - begin_t print('KD-tree: radius_search/brute',radius_time_sum/brute_time_sum)
def knn_search(root: Node, result_set: KNNResultSet, key): if root is None: return False # compare the root itself result_set.add_point(math.fabs(root.key - key), root.value) if result_set.worstDist() == 0: return True if root.key >= key: # iterate left branch first if knn_search(root.left, result_set, key): return True elif math.fabs(root.key-key) < result_set.worstDist(): return knn_search(root.right, result_set, key) return False else: # iterate right branch first if knn_search(root.right, result_set, key): return True elif math.fabs(root.key-key) < result_set.worstDist(): return knn_search(root.left, result_set, key) return False
def main(): # 生成模拟数据 db_size = 3000 dim = 3 leaf_size = 4 min_extent = 0.0001 k = 8 db_np = np.random.rand(db_size, dim) root = octree_construction(db_np, leaf_size, min_extent) # 测试Octree遍历 depth = [0] max_depth = [0] traverse_octree(root, depth, max_depth) print("Tree max depth: %d" % max_depth[0]) # 测试KNN search query = np.asarray([0, 0, 0]) result_set = KNNResultSet(capacity=k) octree_knn_search(root, db_np, result_set, query) print(result_set) # 测试brute-force法 diff = np.linalg.norm(np.expand_dims(query, 0) - db_np, axis=1) nn_idx = np.argsort(diff) nn_dist = diff[nn_idx] print(nn_idx[0:k]) print(nn_dist[0:k]) # 测试Radius search (normal) begin_t = time.time() print("Radius search normal:") for i in range(100): query = np.random.rand(3) result_set = RadiusNNResultSet(radius=0.5) octree_radius_search(root, db_np, result_set, query) print("Search takes %.3fms\n" % ((time.time() - begin_t) * 1000)) # 测试Radius search (fast) begin_t = time.time() print("Radius search fast:") for i in range(100): query = np.random.rand(3) result_set = RadiusNNResultSet(radius=0.5) octree_radius_search_fast(root, db_np, result_set, query) print("Search takes %.3fms\n" % ((time.time() - begin_t) * 1000))
def main(): # configuration N = 64000 D = 3 leaf_size = 4 k = 8 r = 0.372 point_cloud = np.random.rand(N, D) kd_tree = KDTree( point_cloud = point_cloud, init_axis = 0, leaf_size = leaf_size ) # kd_tree.traverse() # random query test: for _ in range(100): # generate query point: query = np.random.rand(D) # 01 -- knn: brute-force as baseline: dists = np.linalg.norm(point_cloud - query, axis=1) sorting_idx = np.argsort(dists) brute_force_result = {i for i in sorting_idx[:k]} knn_result_set = KNNResultSet(capacity=k) kd_tree.knn_search(query, knn_result_set) knn_result = {i.index for i in knn_result_set.dist_index_list} assert len(brute_force_result - knn_result) == 0 # 02 -- rnn: brute-force as baseline: dists = np.linalg.norm(point_cloud - query, axis=1) brute_force_result = {i for i, d in enumerate(dists) if d <= r} rnn_result_set = RadiusNNResultSet(radius = r) kd_tree.rnn_search(query, rnn_result_set) rnn_result = {i.index for i in rnn_result_set.dist_index_list} assert len(brute_force_result - rnn_result) == 0 print('[KDTree kNN & RNN Random Query Test]: Successful')
def main(): # configuration db_size = 64 dim = 3 leaf_size = 4 k = 1 db_np = np.random.rand(db_size, dim) print('db_np = ', db_np) root = kdtree_construction(db_np, leaf_size=leaf_size) depth = [0] max_depth = [0] traverse_kdtree(root, depth, max_depth) print("tree max depth: %d" % max_depth[0]) query = np.asarray([0, 0, 0]) result_set = KNNResultSet(capacity=k) kdtree_knn_search(root, db_np, result_set, query) print(result_set)