コード例 #1
0
ファイル: search_kd_tree.py プロジェクト: gr4vytr0n/ml
def test():
    '''
        Test kdtree searching algorithm
    '''
    data, normalizing, labeling = process_data('datingTestSet.txt')
    normalized_dataset, ranges, min_vals, max_vals = normalizing
    label_indices, labels = labeling

    # set size of test set
    ho_ratio = 0.1
    m = data.shape[0]
    test_dataset_size = int(m * ho_ratio)

    training_set = normalized_dataset[test_dataset_size:]
    training_set_label_indices = label_indices[test_dataset_size:]

    tree = kd_tree(training_set)

    error_count = 0
    for i in range(test_dataset_size):
        # find nearest neighbor
        normalized_test_node = (data[:test_dataset_size][i] -
                                min_vals) / ranges
        normalized_test_node_index = label_indices[:test_dataset_size][i]

        search_results = nn_search_tree(tree, normalized_test_node)

        kd_label = labels[training_set_label_indices[where(
            ((training_set[:, 0] == search_results[0]) &
             (training_set[:, 1] == search_results[1]) &
             (training_set[:, 2] == search_results[2])))[0]]][0]

        if kd_label is not labels[normalized_test_node_index]:
            error_count += 1
    return error_count, test_dataset_size
コード例 #2
0
    def time_test(self, n, max_depth=10):
        data = self.gen_data(n)
        queries = self.gen_data(n)
        # time brute force
        start = time.clock()
        brute_res = nn_brute.nn(data, queries)
        end = time.clock()
        brute_time = end - start

        # time kd_tree - no max depth
        tree = kd_tree(data)
        kd_res = []
        start = time.clock()
        for query in queries:
            kd_res.append(tree.nns(query))
        end = time.clock()
        kd_time = end - start

        # time kd_tree - with max depth
        tree = kd_tree(data, max_depth)
        kd_md_res = []
        start = time.clock()
        for query in queries:
            kd_md_res.append(tree.nns(query))
        end = time.clock()
        kd_md_time = end - start

        if brute_res == kd_res and kd_res == kd_md_res:
            print("brute time: {0:.2f}".format(brute_time))
            print("kd time: {0:.2f}".format(kd_time))
            print("kd with max_depth time: {0:.2f}".format(kd_md_time))
        else:
            flag1 = brute_res == kd_res
            flag2 = kd_res == kd_md_res
            print("ERROR: Mismatch in results")
            print("brute_res == kd_res: {}   kd_res == kd_md_res: {}".format(flag1, flag2))
            if not(flag1):
                test_kd.find_mismatch(brute_res, kd_res)
            else:
                test_kd.find_mismatch(kd_res, kd_md_res)
コード例 #3
0
 def test0(self):
     # data = [[1,1], [2,2], [3,3], [4,4], [5,5], [6,6]]
     data = self.gen_data(10, 0, 15)
     queries = [[7,7]]
     tree = kd_tree(data)
     kd_res = tree.nns(queries[0])
     brute_res = nn_brute.nn(data, queries)
     if [kd_res] != brute_res:
         print("TEST 0: FAIL")
     else:
         if DEBUG:
             print("kd_res: ({0:.2f},{0:.2f})".format(*kd_res))
             print("brute_res: ({0:.2f}, {0:.2f})".format(*brute_res[0]))
         print("TEST 0: PASS")
コード例 #4
0
def test_radius_search():
    # 10 neighbors
    neighbors_unit_circle = array([[cos(x), sin(x)] for x in random.rand(10)])
    expected_neighbors = apply_along_axis(lambda x: random.rand() * x, 1,
                                          neighbors_unit_circle)

    randoms_unit_circle = array([[cos(x), sin(x)] for x in random.rand(1000)])
    other_points = apply_along_axis(lambda x: (random.rand() + 1) * x, 1,
                                    randoms_unit_circle)

    point_cloud = append(expected_neighbors, other_points, axis=0)
    random.shuffle(point_cloud)

    node = kd_tree(point_cloud)
    neighbors = radius_search([0., 0.], 1., node)
    assert len(neighbors) == 10
    for neighbor in neighbors:
        assert neighbor in expected_neighbors
コード例 #5
0
x2 = np.random.normal(3, sigma1, count)
y2 = np.random.normal(4, sigma2, count)

x3 = np.random.normal(4.5, sigma1, count)
y3 = np.random.normal(2.5, sigma2, count)

ax.scatter(x1, y1, c='b', marker='s', s=10, alpha=0.7)
ax.scatter(x2, y2, c='r', marker='^', s=10, alpha=0.7)
ax.scatter(x3, y3, c='g', s=10, alpha=0.7)

point = [np.random.normal(5, 0.6), np.random.normal(5, 0.5)]

ax.scatter(*point, c='m', marker='*', s=100, alpha=1.0)

points = np.c_[np.r_[x1, x2, x3], np.r_[y1, y2, y3]]
tree = kd_tree(points, [1] * count + [2] * count + [3] * count)

colors = ['r', 'y', 'g', 'b', 'm', 'c', 'k']


def show_closests(k, i):
    closest_points, _, _ = closest(tree, point, k=k)
    max_dist = closest_points[-1][0]
    print("draw circle with radius {0}".format(max_dist))
    for d, node in closest_points:
        # print("point = {0}, distance = {1}".format(node, d))
        ax.add_patch(
            Circle(point, d, color=colors[i % len(colors)], fill=False))


show_closests(1, 0)