Ejemplo n.º 1
0
    def initialize_heaps(data, n_neighbors, leaf_array):
        graph_heap = make_heap(data.shape[0], 10)
        search_heap = make_heap(data.shape[0], n_neighbors * 2)
        tried = set([(-1, -1)])
        for n in range(leaf_array.shape[0]):
            for i in range(leaf_array.shape[1]):
                if leaf_array[n, i] < 0:
                    break
                for j in range(i + 1, leaf_array.shape[1]):
                    if leaf_array[n, j] < 0:
                        break
                    if (leaf_array[n, i], leaf_array[n, j]) in tried:
                        continue

                    d = dist(data[leaf_array[n, i]], data[leaf_array[n, j]],
                             *dist_args)
                    unchecked_heap_push(graph_heap, leaf_array[n, i], d,
                                        leaf_array[n, j], 1)
                    unchecked_heap_push(graph_heap, leaf_array[n, j], d,
                                        leaf_array[n, i], 1)
                    unchecked_heap_push(search_heap, leaf_array[n, i], d,
                                        leaf_array[n, j], 1)
                    unchecked_heap_push(search_heap, leaf_array[n, j], d,
                                        leaf_array[n, i], 1)
                    tried.add((leaf_array[n, i], leaf_array[n, j]))

        return graph_heap, search_heap
Ejemplo n.º 2
0
    def initialized_nnd_search(data, indptr, indices, initialization, query_points):

        for i in numba.prange(query_points.shape[0]):

            tried = set(initialization[0, i])

            while True:

                # Find smallest flagged vertex
                vertex = smallest_flagged(initialization, i)

                if vertex == -1:
                    break
                candidates = indices[indptr[vertex] : indptr[vertex + 1]]
                for j in range(candidates.shape[0]):
                    if (
                        candidates[j] == vertex
                        or candidates[j] == -1
                        or candidates[j] in tried
                    ):
                        continue
                    d = dist(data[candidates[j]], query_points[i], *dist_args)
                    unchecked_heap_push(initialization, i, d, candidates[j], 1)
                    tried.add(candidates[j])

        return initialization
Ejemplo n.º 3
0
 def init_from_random(n_neighbors, data, query_points, heap, rng_state):
     for i in range(query_points.shape[0]):
         indices = rejection_sample(n_neighbors, data.shape[0], rng_state)
         for j in range(indices.shape[0]):
             if indices[j] < 0:
                 continue
             d = dist(data[indices[j]], query_points[i], *dist_args)
             heap_push(heap, i, d, indices[j], 1)
     return
Ejemplo n.º 4
0
    def init_from_tree(tree, data, query_points, heap, rng_state):
        for i in range(query_points.shape[0]):
            indices = search_flat_tree(query_points[i], tree.hyperplanes,
                                       tree.offsets, tree.children,
                                       tree.indices, rng_state)

            for j in range(indices.shape[0]):
                if indices[j] < 0:
                    continue
                d = dist(data[indices[j]], query_points[i], *dist_args)
                heap_push(heap, i, d, indices[j], 1)

        return
Ejemplo n.º 5
0
    def nn_descent(data,
                   n_neighbors,
                   rng_state,
                   max_candidates=50,
                   n_iters=10,
                   delta=0.001,
                   rho=0.5,
                   rp_tree_init=True,
                   leaf_array=None,
                   verbose=False):
        n_vertices = data.shape[0]

        current_graph = make_heap(data.shape[0], n_neighbors)
        for i in range(data.shape[0]):
            indices = rejection_sample(n_neighbors, data.shape[0], rng_state)
            for j in range(indices.shape[0]):
                d = dist(data[i], data[indices[j]], *dist_args)
                heap_push(current_graph, i, d, indices[j], 1)
                heap_push(current_graph, indices[j], d, i, 1)

        if rp_tree_init:
            for n in range(leaf_array.shape[0]):
                tried = set([(-1, -1)])
                for i in range(leaf_array.shape[1]):
                    if leaf_array[n, i] < 0:
                        break
                    for j in range(i + 1, leaf_array.shape[1]):
                        if leaf_array[n, j] < 0:
                            break
                        if (leaf_array[n, i], leaf_array[n, j]) in tried:
                            continue
                        d = dist(data[leaf_array[n, i]],
                                 data[leaf_array[n, j]], *dist_args)
                        heap_push(current_graph, leaf_array[n, i], d,
                                  leaf_array[n, j], 1)
                        heap_push(current_graph, leaf_array[n, j], d,
                                  leaf_array[n, i], 1)
                        tried.add((leaf_array[n, i], leaf_array[n, j]))

        for n in range(n_iters):

            (new_candidate_neighbors,
             old_candidate_neighbors) = build_candidates(
                 current_graph, n_vertices, n_neighbors, max_candidates,
                 rng_state, rho)

            c = 0
            for i in range(n_vertices):
                for j in range(max_candidates):
                    p = int(new_candidate_neighbors[0, i, j])
                    if p < 0:
                        continue
                    for k in range(j, max_candidates):
                        q = int(new_candidate_neighbors[0, i, k])
                        if q < 0:
                            continue

                        d = dist(data[p], data[q], *dist_args)
                        c += heap_push(current_graph, p, d, q, 1)
                        c += heap_push(current_graph, q, d, p, 1)

                    for k in range(max_candidates):
                        q = int(old_candidate_neighbors[0, i, k])
                        if q < 0:
                            continue

                        d = dist(data[p], data[q], *dist_args)
                        c += heap_push(current_graph, p, d, q, 1)
                        c += heap_push(current_graph, q, d, p, 1)

            if c <= delta * n_neighbors * data.shape[0]:
                break

        return deheap_sort(current_graph)
Ejemplo n.º 6
0
    def nn_descent(
        data,
        n_neighbors,
        rng_state,
        max_candidates=50,
        n_iters=10,
        delta=0.001,
        rho=0.5,
        rp_tree_init=True,
        leaf_array=None,
        verbose=False,
    ):
        n_vertices = data.shape[0]

        current_graph = make_heap(data.shape[0], n_neighbors)
        for i in range(data.shape[0]):
            indices = rejection_sample(n_neighbors, data.shape[0], rng_state)
            for j in range(indices.shape[0]):
                d = dist(data[i], data[indices[j]], *dist_args)
                heap_push(current_graph, i, d, indices[j], 1)
                heap_push(current_graph, indices[j], d, i, 1)

        if rp_tree_init:
            for n in range(leaf_array.shape[0]):
                for i in range(leaf_array.shape[1]):
                    if leaf_array[n, i] < 0:
                        break
                    for j in range(i + 1, leaf_array.shape[1]):
                        if leaf_array[n, j] < 0:
                            break
                        d = dist(
                            data[leaf_array[n, i]], data[leaf_array[n, j]], *dist_args
                        )
                        heap_push(
                            current_graph, leaf_array[n, i], d, leaf_array[n, j], 1
                        )
                        heap_push(
                            current_graph, leaf_array[n, j], d, leaf_array[n, i], 1
                        )

        for n in range(n_iters):
            if verbose:
                print("\t", n, " / ", n_iters)

            candidate_neighbors = build_candidates(
                current_graph, n_vertices, n_neighbors, max_candidates, rng_state
            )

            c = 0
            for i in range(n_vertices):
                for j in range(max_candidates):
                    p = int(candidate_neighbors[0, i, j])
                    if p < 0 or tau_rand(rng_state) < rho:
                        continue
                    for k in range(max_candidates):
                        q = int(candidate_neighbors[0, i, k])
                        if (
                            q < 0
                            or not candidate_neighbors[2, i, j]
                            and not candidate_neighbors[2, i, k]
                        ):
                            continue

                        d = dist(data[p], data[q], *dist_args)
                        c += heap_push(current_graph, p, d, q, 1)
                        c += heap_push(current_graph, q, d, p, 1)

            if c <= delta * n_neighbors * data.shape[0]:
                break

        return deheap_sort(current_graph)