def main(): print("Testing KD Tree...") test_times = 100 run_time_1 = run_time_2 = 0 for _ in range(test_times): # Generate dataset randomly low = 0 high = 100 n_rows = 10000 n_cols = 3 X = gen_data(low, high, n_rows, n_cols) y = gen_data(low, high, n_rows) Xi = gen_data(low, high, n_cols) # Build KD Tree tree = KDTree() tree.build_tree(X, y) # KD Tree Search start = time() nd1 = tree.nearest_neighbour_search(Xi) run_time_1 += time() - start ret1 = get_euclidean_distance(Xi, nd1.split[0]) # Exhausted search start = time() nd2 = exhausted_search(tree, Xi) run_time_2 += time() - start ret2 = get_euclidean_distance(Xi, nd2.split[0]) # Compare result assert ret1 == ret2, "target:%s\nrestult1:%s\nrestult2:%s\ntree:\n%s" % ( str(Xi), str(nd1), str(nd2), str(tree)) print("%d tests passed!" % test_times) print("KD Tree Search %.2f s" % run_time_1) print("Exhausted search %.2f s" % run_time_2)
def exhausted_search(tree, Xi): """Linear search the nearest neighbour. Arguments: tree {KD Tree} Xi {list} -- [description] Returns: Node -- [description] """ dist_best = float('inf') nd_best = None que = [tree.root] while que: nd = que.pop(0) dist = get_euclidean_distance(Xi, nd.split[0]) if dist < dist_best: dist_best = dist nd_best = nd if nd.left is not None: que.append(nd.left) if nd.right is not None: que.append(nd.right) return nd_best
def main(): print("Testing KD Tree...") test_times = 100 run_time_1 = run_time_2 = 0 for _ in range(test_times): # Generate dataset randomly low = 0 high = 100 n_rows = 1000 n_cols = 2 X = gen_data(low, high, n_rows, n_cols) y = gen_data(low, high, n_rows) Xi = gen_data(low, high, n_cols) # Build KNN k = 2 model = KNeighborsBase() model.fit(X, y, k_neighbors=k) # KD Tree Search start = time() heap = model._knn_search(Xi) run_time_1 += time() - start ret1 = [get_euclidean_distance(Xi, nd.split[0]) for nd in heap.items] ret1.sort() # Exhausted search start = time() ret2 = exhausted_search(X, Xi, k) run_time_2 += time() - start ret2 = [get_euclidean_distance(Xi, row) for row in ret2] ret2.sort() # Compare result assert ret1 == ret2, "target:%s\nrestult1:%s\nrestult2:%s\ntree:\n%s" \ % (Xi, ret1, ret2, model.tree) print("%d tests passed!" % test_times) print("KNN Search %.2f s" % run_time_1) print("Exhausted search %.2f s" % run_time_2)
def exhausted_search(X, Xi): """Linear search the nearest neighbour. Arguments: X {list} -- 2d list with int or float. Xi {list} -- 1d list with int or float. Returns: list -- 1d list with int or float.r. """ dist_best = float('inf') row_best = None for row in X: dist = get_euclidean_distance(Xi, row) if dist < dist_best: dist_best = dist row_best = row return row_best
def exhausted_search(X, Xi, k): """Linear search the nearest neighbour. Arguments: X {list} -- 2d list with int or float. Xi {list} -- 1d list with int or float. k {int} -- number of neighbours. Returns: list -- The lists of the K nearest neighbour. """ idxs = [] for _ in range(k): best_dist = float('inf') idxs.append(None) for i, row in enumerate(X): dist = get_euclidean_distance(Xi, row) if dist < best_dist and i not in idxs: best_dist = dist idxs[-1] = i return [X[i] for i in idxs]