Beispiel #1
0
def initialise_search(forest, data, query_points, n_neighbors, rng_state, dist):
    results = make_heap(query_points.shape[0], n_neighbors)
    init_from_random(n_neighbors, data, query_points, results, rng_state, dist)
    if forest is not None:
        for tree in forest:
            init_from_tree(tree, data, query_points, results, rng_state, dist)

    return results
Beispiel #2
0
def nn_descent(
    data,
    n_neighbors,
    rng_state,
    max_candidates=50,
    dist=dist.euclidean,
    n_iters=10,
    delta=0.001,
    rho=0.5,
    rp_tree_init=True,
    leaf_array=None,
    low_memory=False,
    verbose=False,
):
    tried = set([(-1, -1)])

    current_graph = make_heap(data.shape[0], n_neighbors)
    for i in range(data.shape[0]):
        indices = rejection_sample(n_neighbors, data.shape[0], rng_state)
        for j in range(indices.shape[0]):
            d = dist(data[i], data[indices[j]])
            heap_push(current_graph, i, d, indices[j], 1)
            heap_push(current_graph, indices[j], d, i, 1)
            tried.add((i, indices[j]))
            tried.add((indices[j], i))

    if rp_tree_init:
        init_rp_tree(data, dist, current_graph, leaf_array, tried=tried)

    if low_memory:
        nn_descent_internal_low_memory(
            current_graph,
            data,
            n_neighbors,
            rng_state,
            max_candidates=max_candidates,
            dist=dist,
            n_iters=n_iters,
            delta=delta,
            rho=rho,
            verbose=verbose,
        )
    else:
        nn_descent_internal_high_memory(
            current_graph,
            data,
            n_neighbors,
            rng_state,
            tried,
            max_candidates=max_candidates,
            dist=dist,
            n_iters=n_iters,
            delta=delta,
            rho=rho,
            verbose=verbose,
        )

    return deheap_sort(current_graph)
Beispiel #3
0
def init_current_graph(data, dist, n_neighbors, rng_state):
    current_graph = make_heap(data.shape[0], n_neighbors)
    for i in range(data.shape[0]):
        indices = rejection_sample(n_neighbors, data.shape[0], rng_state)
        for j in range(indices.shape[0]):
            d = dist(data[i], data[indices[j]])
            heap_push(current_graph, i, d, indices[j], 1)
            heap_push(current_graph, indices[j], d, i, 1)
    return current_graph
Beispiel #4
0
def sparse_initialise_search(
    forest,
    inds,
    indptr,
    data,
    query_inds,
    query_indptr,
    query_data,
    n_neighbors,
    rng_state,
    sparse_dist,
):
    results = make_heap(query_indptr.shape[0] - 1, n_neighbors)
    sparse_init_from_random(
        n_neighbors,
        inds,
        indptr,
        data,
        query_inds,
        query_indptr,
        query_data,
        results,
        rng_state,
        sparse_dist,
    )
    if forest is not None:
        for tree in forest:
            sparse_init_from_tree(
                tree,
                inds,
                indptr,
                data,
                query_inds,
                query_indptr,
                query_data,
                results,
                rng_state,
                sparse_dist,
            )

    return results
Beispiel #5
0
def sparse_nn_descent(
    inds,
    indptr,
    data,
    n_vertices,
    n_neighbors,
    rng_state,
    max_candidates=50,
    sparse_dist=Jvis.sparse.sparse_euclidean,
    n_iters=10,
    delta=0.001,
    rho=0.5,
    low_memory=False,
    rp_tree_init=True,
    leaf_array=None,
    verbose=False,
):

    tried = set([(-1, -1)])

    current_graph = make_heap(n_vertices, n_neighbors)
    for i in range(n_vertices):
        indices = rejection_sample(n_neighbors, n_vertices, rng_state)
        for j in range(indices.shape[0]):

            from_inds = inds[indptr[i]:indptr[i + 1]]
            from_data = data[indptr[i]:indptr[i + 1]]

            to_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]]
            to_data = data[indptr[indices[j]]:indptr[indices[j] + 1]]

            d = sparse_dist(from_inds, from_data, to_inds, to_data)

            heap_push(current_graph, i, d, indices[j], 1)
            heap_push(current_graph, indices[j], d, i, 1)
            tried.add((i, indices[j]))
            tried.add((indices[j], i))

    if rp_tree_init:
        sparse_init_rp_tree(
            inds,
            indptr,
            data,
            sparse_dist,
            current_graph,
            leaf_array,
            tried=tried,
        )

    if low_memory:
        sparse_nn_descent_internal_low_memory(
            current_graph,
            inds,
            indptr,
            data,
            n_vertices,
            n_neighbors,
            rng_state,
            max_candidates=max_candidates,
            sparse_dist=sparse_dist,
            n_iters=n_iters,
            delta=delta,
            rho=rho,
            verbose=verbose,
        )
    else:
        sparse_nn_descent_internal_high_memory(
            current_graph,
            inds,
            indptr,
            data,
            n_vertices,
            n_neighbors,
            rng_state,
            tried,
            max_candidates=max_candidates,
            sparse_dist=sparse_dist,
            n_iters=n_iters,
            delta=delta,
            rho=rho,
            verbose=verbose,
        )

    return deheap_sort(current_graph)