예제 #1
0
    def search_knn(self, point, k, dist=None):
        if dist is None:
            get_dist = lambda n: n.dist(point)
        else:
            get_dist = lambda n: dist(n.data, point)

        results = BoundedPriorityQueue(k)

        self._search_node(point, k, results, get_dist)

        # We sort the final result by the distance in the tuple
        # (<KdNode>, distance)
        BY_VALUE = lambda kv: kv[1]
        return sorted(results.items(), key=BY_VALUE)
    def knn_search(root, query_point, k_neighbors, k_dimensions, distance_func):
        bpq = BoundedPriorityQueue(k_neighbors)
        visited_tree_paths = set([])
        root_tree_path = ''
        num_points_visited = {'count': 1}
 
        def knn_search_rec(current_kdnode, parent_kdnode, depth, tree_path):
            if current_kdnode is None:
                return

            # determine in which half of the hyperplane-splitted region
            # in which the query point lies
            split_point = current_kdnode.point[:k_dimensions]
            axis = depth % k_dimensions
            splitting_hyperplane = split_point[axis]

            if query_point[axis] < splitting_hyperplane:
                knn_search_rec(current_kdnode.left, current_kdnode,\
                               depth + 1, tree_path + '0')
            else:
                knn_search_rec(current_kdnode.right, current_kdnode,\
                               depth + 1, tree_path + '1')

            # keep track of the K nearest distances and corresponding points
            d = distance_func(split_point, query_point)
            bpq.maxheap_insert(d, current_kdnode.point)

            # if the hypersphere radius intersects the hyperplane of the
            # current axis, then it is possible that a nearer neighbor lies
            # on the other half of the hyperplane
            knn_distances = bpq.get_priorities()
            hypersphere_radius = max(knn_distances)
            hyperplane_distance = abs(splitting_hyperplane - query_point[axis])
            hypersphere_will_cross_hyperplane =\
                    (hypersphere_radius > hyperplane_distance)

            if (current_kdnode is root) or (tree_path in visited_tree_paths):
                return
            else:
                # mark kdnode as visited_tree_paths
                visited_tree_paths.add(tree_path)
                num_points_visited['count'] += 1
    
            if current_kdnode is parent_kdnode.left:
                subtree_on_other_side_of_hyperplane = parent_kdnode.right
                other_tree_path = tree_path[:-1] + '1'
            else:
                subtree_on_other_side_of_hyperplane = parent_kdnode.left
                other_tree_path = tree_path[:-1] + '0'

            if not bpq.is_full() or hypersphere_will_cross_hyperplane:
                if other_tree_path not in visited_tree_paths:
                    knn_search_rec(subtree_on_other_side_of_hyperplane, \
                                   parent_kdnode, depth, other_tree_path)
            else:
                visited_tree_paths.add(other_tree_path)

        knn_search_rec(root, parent_kdnode=None, depth=0, tree_path=root_tree_path)
        nearest_points = bpq.get_elements()
        distances = bpq.get_priorities()

        return nearest_points, distances, num_points_visited['count']
    def knn_search(root, query_point, k_neighbors, k_dimensions,
                   distance_func):
        bpq = BoundedPriorityQueue(k_neighbors)
        visited_tree_paths = set([])
        root_tree_path = ''
        num_points_visited = {'count': 1}

        def knn_search_rec(current_kdnode, parent_kdnode, depth, tree_path):
            if current_kdnode is None:
                return

            # determine in which half of the hyperplane-splitted region
            # in which the query point lies
            split_point = current_kdnode.point[:k_dimensions]
            axis = depth % k_dimensions
            splitting_hyperplane = split_point[axis]

            if query_point[axis] < splitting_hyperplane:
                knn_search_rec(current_kdnode.left, current_kdnode,\
                               depth + 1, tree_path + '0')
            else:
                knn_search_rec(current_kdnode.right, current_kdnode,\
                               depth + 1, tree_path + '1')

            # keep track of the K nearest distances and corresponding points
            d = distance_func(split_point, query_point)
            bpq.maxheap_insert(d, current_kdnode.point)

            # if the hypersphere radius intersects the hyperplane of the
            # current axis, then it is possible that a nearer neighbor lies
            # on the other half of the hyperplane
            knn_distances = bpq.get_priorities()
            hypersphere_radius = max(knn_distances)
            hyperplane_distance = abs(splitting_hyperplane - query_point[axis])
            hypersphere_will_cross_hyperplane =\
                    (hypersphere_radius > hyperplane_distance)

            if (current_kdnode is root) or (tree_path in visited_tree_paths):
                return
            else:
                # mark kdnode as visited_tree_paths
                visited_tree_paths.add(tree_path)
                num_points_visited['count'] += 1

            if current_kdnode is parent_kdnode.left:
                subtree_on_other_side_of_hyperplane = parent_kdnode.right
                other_tree_path = tree_path[:-1] + '1'
            else:
                subtree_on_other_side_of_hyperplane = parent_kdnode.left
                other_tree_path = tree_path[:-1] + '0'

            if not bpq.is_full() or hypersphere_will_cross_hyperplane:
                if other_tree_path not in visited_tree_paths:
                    knn_search_rec(subtree_on_other_side_of_hyperplane, \
                                   parent_kdnode, depth, other_tree_path)
            else:
                visited_tree_paths.add(other_tree_path)

        knn_search_rec(root,
                       parent_kdnode=None,
                       depth=0,
                       tree_path=root_tree_path)
        nearest_points = bpq.get_elements()
        distances = bpq.get_priorities()

        return nearest_points, distances, num_points_visited['count']