Example #1
0
    def initialized_nnd_search(data, indptr, indices, initialization, query_points):

        for i in numba.prange(query_points.shape[0]):

            tried = set(initialization[0, i])

            while True:

                # Find smallest flagged vertex
                vertex = smallest_flagged(initialization, i)

                if vertex == -1:
                    break
                candidates = indices[indptr[vertex] : indptr[vertex + 1]]
                for j in range(candidates.shape[0]):
                    if (
                        candidates[j] == vertex
                        or candidates[j] == -1
                        or candidates[j] in tried
                    ):
                        continue
                    d = dist(data[candidates[j]], query_points[i], *dist_args)
                    unchecked_heap_push(initialization, i, d, candidates[j], 1)
                    tried.add(candidates[j])

        return initialization
Example #2
0
def nn_descent_internal_high_memory(
    current_graph,
    data,
    n_neighbors,
    rng_state,
    tried,
    max_candidates=50,
    dist=dist.euclidean,
    n_iters=10,
    delta=0.001,
    rho=0.5,
    verbose=False,
):
    n_vertices = data.shape[0]

    for n in range(n_iters):
        with numba.objmode():
            # Call into object mode to temporarily sleep (and thus release GIL)
            logging.info("(obj mode) high mem nn descent iter.")
            time.sleep(0.05)

        if verbose:
            print("\t", n, " / ", n_iters)

        (new_candidate_neighbors, old_candidate_neighbors) = new_build_candidates(
            current_graph, n_vertices, n_neighbors, max_candidates, rng_state, rho
        )

        c = 0
        for i in range(n_vertices):
            for j in range(max_candidates):
                p = int(new_candidate_neighbors[0, i, j])
                if p < 0:
                    continue
                for k in range(j, max_candidates):
                    q = int(new_candidate_neighbors[0, i, k])
                    if q < 0 or (p, q) in tried:
                        continue

                    d = dist(data[p], data[q])
                    c += unchecked_heap_push(current_graph, p, d, q, 1)
                    tried.add((p, q))
                    if p != q:
                        c += unchecked_heap_push(current_graph, q, d, p, 1)
                        tried.add((q, p))

                for k in range(max_candidates):
                    q = int(old_candidate_neighbors[0, i, k])
                    if q < 0 or (p, q) in tried:
                        continue

                    d = dist(data[p], data[q])
                    c += unchecked_heap_push(current_graph, p, d, q, 1)
                    tried.add((p, q))
                    if p != q:
                        c += unchecked_heap_push(current_graph, q, d, p, 1)
                        tried.add((q, p))

        if c <= delta * n_neighbors * data.shape[0]:
            return
Example #3
0
def nn_descent_internal_high_memory(
    current_graph,
    data,
    n_neighbors,
    rng_state,
    tried,
    max_candidates=50,
    dist=dist.euclidean,
    dist_args=(),
    n_iters=10,
    delta=0.001,
    rho=0.5,
    verbose=False,
):
    n_vertices = data.shape[0]

    for n in range(n_iters):
        if verbose:
            print("\t", n, " / ", n_iters)

        (new_candidate_neighbors, old_candidate_neighbors) = new_build_candidates(
            current_graph, n_vertices, n_neighbors, max_candidates, rng_state, rho
        )

        c = 0
        for i in range(n_vertices):
            for j in range(max_candidates):
                p = int(new_candidate_neighbors[0, i, j])
                if p < 0:
                    continue
                for k in range(j, max_candidates):
                    q = int(new_candidate_neighbors[0, i, k])
                    if q < 0 or (p, q) in tried:
                        continue

                    d = dist(data[p], data[q], *dist_args)
                    c += unchecked_heap_push(current_graph, p, d, q, 1)
                    tried.add((p, q))
                    if p != q:
                        c += unchecked_heap_push(current_graph, q, d, p, 1)
                        tried.add((q, p))

                for k in range(max_candidates):
                    q = int(old_candidate_neighbors[0, i, k])
                    if q < 0 or (p, q) in tried:
                        continue

                    d = dist(data[p], data[q], *dist_args)
                    c += unchecked_heap_push(current_graph, p, d, q, 1)
                    tried.add((p, q))
                    if p != q:
                        c += unchecked_heap_push(current_graph, q, d, p, 1)
                        tried.add((q, p))

        if c <= delta * n_neighbors * data.shape[0]:
            return
Example #4
0
def sparse_initialized_nnd_search(
    inds,
    indptr,
    data,
    search_indptr,
    search_inds,
    initialization,
    query_inds,
    query_indptr,
    query_data,
    sparse_dist,
    dist_args,
):
    for i in numba.prange(query_indptr.shape[0] - 1):

        tried = set(initialization[0, i])

        to_inds = query_inds[query_indptr[i]:query_indptr[i + 1]]
        to_data = query_data[query_indptr[i]:query_indptr[i + 1]]

        while True:

            # Find smallest flagged vertex
            vertex = smallest_flagged(initialization, i)

            if vertex == -1:
                break
            candidates = search_inds[search_indptr[vertex]:search_indptr[vertex
                                                                         + 1]]

            for j in range(candidates.shape[0]):
                if (candidates[j] == vertex or candidates[j] == -1
                        or candidates[j] in tried):
                    continue

                from_inds = inds[indptr[candidates[j]]:indptr[candidates[j] +
                                                              1]]
                from_data = data[indptr[candidates[j]]:indptr[candidates[j] +
                                                              1]]

                d = sparse_dist(from_inds, from_data, to_inds, to_data,
                                *dist_args)
                unchecked_heap_push(initialization, i, d, candidates[j], 1)
                tried.add(candidates[j])

    return initialization
Example #5
0
def sparse_nn_descent_internal_high_memory(
    current_graph,
    inds,
    indptr,
    data,
    n_vertices,
    n_neighbors,
    rng_state,
    tried,
    max_candidates=50,
    sparse_dist=umap.sparse.sparse_euclidean,
    n_iters=10,
    delta=0.001,
    rho=0.5,
    verbose=False,
):
    for n in range(n_iters):
        if verbose:
            print("\t", n, " / ", n_iters)

        (new_candidate_neighbors,
         old_candidate_neighbors) = new_build_candidates(
             current_graph, n_vertices, n_neighbors, max_candidates, rng_state,
             rho)

        c = 0
        for i in range(n_vertices):
            for j in range(max_candidates):
                p = int(new_candidate_neighbors[0, i, j])
                if p < 0:
                    continue
                for k in range(j, max_candidates):
                    q = int(new_candidate_neighbors[0, i, k])
                    if q < 0 or (p, q) in tried:
                        continue

                    from_inds = inds[indptr[p]:indptr[p + 1]]
                    from_data = data[indptr[p]:indptr[p + 1]]

                    to_inds = inds[indptr[q]:indptr[q + 1]]
                    to_data = data[indptr[q]:indptr[q + 1]]

                    d = sparse_dist(from_inds, from_data, to_inds, to_data)

                    c += unchecked_heap_push(current_graph, p, d, q, 1)
                    tried.add((p, q))
                    if p != q:
                        c += unchecked_heap_push(current_graph, q, d, p, 1)
                        tried.add((q, p))

                for k in range(max_candidates):
                    q = int(old_candidate_neighbors[0, i, k])
                    if q < 0 or (p, q) in tried:
                        continue

                    from_inds = inds[indptr[p]:indptr[p + 1]]
                    from_data = data[indptr[p]:indptr[p + 1]]

                    to_inds = inds[indptr[q]:indptr[q + 1]]
                    to_data = data[indptr[q]:indptr[q + 1]]

                    d = sparse_dist(from_inds, from_data, to_inds, to_data)

                    c += unchecked_heap_push(current_graph, p, d, q, 1)
                    tried.add((p, q))
                    if p != q:
                        c += unchecked_heap_push(current_graph, q, d, p, 1)
                        tried.add((q, p))

        if c <= delta * n_neighbors * n_vertices:
            return