def test(): ''' Test kdtree searching algorithm ''' data, normalizing, labeling = process_data('datingTestSet.txt') normalized_dataset, ranges, min_vals, max_vals = normalizing label_indices, labels = labeling # set size of test set ho_ratio = 0.1 m = data.shape[0] test_dataset_size = int(m * ho_ratio) training_set = normalized_dataset[test_dataset_size:] training_set_label_indices = label_indices[test_dataset_size:] tree = kd_tree(training_set) error_count = 0 for i in range(test_dataset_size): # find nearest neighbor normalized_test_node = (data[:test_dataset_size][i] - min_vals) / ranges normalized_test_node_index = label_indices[:test_dataset_size][i] search_results = nn_search_tree(tree, normalized_test_node) kd_label = labels[training_set_label_indices[where( ((training_set[:, 0] == search_results[0]) & (training_set[:, 1] == search_results[1]) & (training_set[:, 2] == search_results[2])))[0]]][0] if kd_label is not labels[normalized_test_node_index]: error_count += 1 return error_count, test_dataset_size
def time_test(self, n, max_depth=10): data = self.gen_data(n) queries = self.gen_data(n) # time brute force start = time.clock() brute_res = nn_brute.nn(data, queries) end = time.clock() brute_time = end - start # time kd_tree - no max depth tree = kd_tree(data) kd_res = [] start = time.clock() for query in queries: kd_res.append(tree.nns(query)) end = time.clock() kd_time = end - start # time kd_tree - with max depth tree = kd_tree(data, max_depth) kd_md_res = [] start = time.clock() for query in queries: kd_md_res.append(tree.nns(query)) end = time.clock() kd_md_time = end - start if brute_res == kd_res and kd_res == kd_md_res: print("brute time: {0:.2f}".format(brute_time)) print("kd time: {0:.2f}".format(kd_time)) print("kd with max_depth time: {0:.2f}".format(kd_md_time)) else: flag1 = brute_res == kd_res flag2 = kd_res == kd_md_res print("ERROR: Mismatch in results") print("brute_res == kd_res: {} kd_res == kd_md_res: {}".format(flag1, flag2)) if not(flag1): test_kd.find_mismatch(brute_res, kd_res) else: test_kd.find_mismatch(kd_res, kd_md_res)
def test0(self): # data = [[1,1], [2,2], [3,3], [4,4], [5,5], [6,6]] data = self.gen_data(10, 0, 15) queries = [[7,7]] tree = kd_tree(data) kd_res = tree.nns(queries[0]) brute_res = nn_brute.nn(data, queries) if [kd_res] != brute_res: print("TEST 0: FAIL") else: if DEBUG: print("kd_res: ({0:.2f},{0:.2f})".format(*kd_res)) print("brute_res: ({0:.2f}, {0:.2f})".format(*brute_res[0])) print("TEST 0: PASS")
def test_radius_search(): # 10 neighbors neighbors_unit_circle = array([[cos(x), sin(x)] for x in random.rand(10)]) expected_neighbors = apply_along_axis(lambda x: random.rand() * x, 1, neighbors_unit_circle) randoms_unit_circle = array([[cos(x), sin(x)] for x in random.rand(1000)]) other_points = apply_along_axis(lambda x: (random.rand() + 1) * x, 1, randoms_unit_circle) point_cloud = append(expected_neighbors, other_points, axis=0) random.shuffle(point_cloud) node = kd_tree(point_cloud) neighbors = radius_search([0., 0.], 1., node) assert len(neighbors) == 10 for neighbor in neighbors: assert neighbor in expected_neighbors
x2 = np.random.normal(3, sigma1, count) y2 = np.random.normal(4, sigma2, count) x3 = np.random.normal(4.5, sigma1, count) y3 = np.random.normal(2.5, sigma2, count) ax.scatter(x1, y1, c='b', marker='s', s=10, alpha=0.7) ax.scatter(x2, y2, c='r', marker='^', s=10, alpha=0.7) ax.scatter(x3, y3, c='g', s=10, alpha=0.7) point = [np.random.normal(5, 0.6), np.random.normal(5, 0.5)] ax.scatter(*point, c='m', marker='*', s=100, alpha=1.0) points = np.c_[np.r_[x1, x2, x3], np.r_[y1, y2, y3]] tree = kd_tree(points, [1] * count + [2] * count + [3] * count) colors = ['r', 'y', 'g', 'b', 'm', 'c', 'k'] def show_closests(k, i): closest_points, _, _ = closest(tree, point, k=k) max_dist = closest_points[-1][0] print("draw circle with radius {0}".format(max_dist)) for d, node in closest_points: # print("point = {0}, distance = {1}".format(node, d)) ax.add_patch( Circle(point, d, color=colors[i % len(colors)], fill=False)) show_closests(1, 0)