コード例 #1
0
def sparse_current_graph_map_jit(
    heap,
    rows,
    n_neighbors,
    inds,
    indptr,
    data,
    rng_state,
    seed_per_row,
    sparse_dist,
):
    rng_state_local = rng_state.copy()
    for i in rows:
        if seed_per_row:
            seed(rng_state_local, i)
        if heap[0, i, 0] < 0.0:
            for j in range(n_neighbors - np.sum(heap[0, i] >= 0.0)):
                idx = np.abs(tau_rand_int(rng_state_local)) % data.shape[0]

                from_inds = inds[indptr[i]:indptr[i + 1]]
                from_data = data[indptr[i]:indptr[i + 1]]

                to_inds = inds[indptr[idx]:indptr[idx + 1]]
                to_data = data[indptr[idx]:indptr[idx + 1]]

                d = sparse_dist(from_inds, from_data, to_inds, to_data)

                heap_push(heap, i, d, idx, 1)

    return True
コード例 #2
0
def candidates_map_jit(rows, n_neighbors, current_graph, heap_updates, offset,
                       rng_state, seed_per_row):
    rng_state_local = rng_state.copy()
    count = 0
    for i in rows:
        if seed_per_row:
            seed(rng_state_local, i)
        for j in range(n_neighbors):
            if current_graph[0, i - offset, j] < 0:
                continue
            idx = current_graph[0, i - offset, j]
            isn = current_graph[2, i - offset, j]
            d = tau_rand(rng_state_local)
            # if tau_rand(rng_state_local) < rho:
            # updates are common to old and new - decided by 'isn' flag
            hu = heap_updates[count]
            hu[0] = i
            hu[1] = d
            hu[2] = idx
            hu[3] = isn
            count += 1

            hu = heap_updates[count]
            hu[0] = idx
            hu[1] = d
            hu[2] = i
            hu[3] = isn
            count += 1
    return count
コード例 #3
0
ファイル: threaded.py プロジェクト: sleighsoft/pynndescent
def current_graph_map_jit(
    rows,
    n_vertices,
    n_neighbors,
    data,
    heap_updates,
    rng_state,
    seed_per_row,
    dist,
    dist_args,
):
    rng_state_local = rng_state.copy()
    count = 0
    for i in rows:
        if seed_per_row:
            seed(rng_state_local, i)
        indices = rejection_sample(n_neighbors, n_vertices, rng_state_local)
        for j in range(indices.shape[0]):
            d = dist(data[i], data[indices[j]], *dist_args)
            hu = heap_updates[count]
            hu[0] = i
            hu[1] = d
            hu[2] = indices[j]
            hu[3] = 1
            count += 1
            hu = heap_updates[count]
            hu[0] = indices[j]
            hu[1] = d
            hu[2] = i
            hu[3] = 1
            count += 1
    return count
コード例 #4
0
def current_graph_map_jit(heap, rows, n_neighbors, data, rng_state,
                          seed_per_row, dist, dist_args):
    rng_state_local = rng_state.copy()
    for i in rows:
        if seed_per_row:
            seed(rng_state_local, i)
        if heap[0, i, 0] < 0.0:
            for j in range(n_neighbors - np.sum(heap[0, i] >= 0.0)):
                idx = np.abs(tau_rand_int(rng_state_local)) % data.shape[0]
                d = dist(data[i], data[idx], *dist_args)
                heap_push(heap, i, d, idx, 1)

    return True
コード例 #5
0
ファイル: pynndescent_.py プロジェクト: batermj/pynndescent
def init_current_graph(
    data, dist, dist_args, n_neighbors, rng_state, seed_per_row=False
):
    current_graph = make_heap(data.shape[0], n_neighbors)
    for i in range(data.shape[0]):
        if seed_per_row:
            seed(rng_state, i)
        indices = rejection_sample(n_neighbors, data.shape[0], rng_state)
        for j in range(indices.shape[0]):
            d = dist(data[i], data[indices[j]], *dist_args)
            heap_push(current_graph, i, d, indices[j], 1)
            heap_push(current_graph, indices[j], d, i, 1)
    return current_graph
コード例 #6
0
def sparse_current_graph_map_jit(
    rows,
    n_vertices,
    n_neighbors,
    inds,
    indptr,
    data,
    heap_updates,
    rng_state,
    seed_per_row,
    sparse_dist,
    dist_args,
):
    rng_state_local = rng_state.copy()
    count = 0
    for i in rows:
        if seed_per_row:
            seed(rng_state_local, i)
        indices = rejection_sample(n_neighbors, n_vertices, rng_state_local)
        for j in range(indices.shape[0]):

            from_inds = inds[indptr[i]:indptr[i + 1]]
            from_data = data[indptr[i]:indptr[i + 1]]

            to_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]]
            to_data = data[indptr[indices[j]]:indptr[indices[j] + 1]]

            d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args)

            hu = heap_updates[count]
            hu[0] = i
            hu[1] = d
            hu[2] = indices[j]
            hu[3] = 1
            count += 1
            hu = heap_updates[count]
            hu[0] = indices[j]
            hu[1] = d
            hu[2] = i
            hu[3] = 1
            count += 1
    return count
コード例 #7
0
def nn_descent(
    data,
    n_neighbors,
    rng_state,
    max_candidates=50,
    dist=dist.euclidean,
    dist_args=(),
    n_iters=10,
    delta=0.001,
    rho=0.5,
    rp_tree_init=True,
    leaf_array=None,
    verbose=False,
    seed_per_row=False,
):
    n_vertices = data.shape[0]
    tried = set([(-1, -1)])

    current_graph = make_heap(data.shape[0], n_neighbors)
    for i in range(data.shape[0]):
        if seed_per_row:
            seed(rng_state, i)
        indices = rejection_sample(n_neighbors, data.shape[0], rng_state)
        for j in range(indices.shape[0]):
            d = dist(data[i], data[indices[j]], *dist_args)
            heap_push(current_graph, i, d, indices[j], 1)
            heap_push(current_graph, indices[j], d, i, 1)
            tried.add((i, indices[j]))
            tried.add((indices[j], i))

    if rp_tree_init:
        init_rp_tree(data,
                     dist,
                     dist_args,
                     current_graph,
                     leaf_array,
                     tried=tried)

    for n in range(n_iters):
        if verbose:
            print("\t", n, " / ", n_iters)

        (new_candidate_neighbors,
         old_candidate_neighbors) = new_build_candidates(
             current_graph,
             n_vertices,
             n_neighbors,
             max_candidates,
             rng_state,
             rho,
             seed_per_row,
         )

        c = 0
        for i in range(n_vertices):
            for j in range(max_candidates):
                p = int(new_candidate_neighbors[0, i, j])
                if p < 0:
                    continue
                for k in range(j, max_candidates):
                    q = int(new_candidate_neighbors[0, i, k])
                    if q < 0 or (p, q) in tried:
                        continue

                    d = dist(data[p], data[q], *dist_args)
                    c += unchecked_heap_push(current_graph, p, d, q, 1)
                    tried.add((p, q))
                    if p != q:
                        c += unchecked_heap_push(current_graph, q, d, p, 1)
                        tried.add((q, p))

                for k in range(max_candidates):
                    q = int(old_candidate_neighbors[0, i, k])
                    if q < 0 or (p, q) in tried:
                        continue

                    d = dist(data[p], data[q], *dist_args)
                    c += unchecked_heap_push(current_graph, p, d, q, 1)
                    tried.add((p, q))
                    if p != q:
                        c += unchecked_heap_push(current_graph, q, d, p, 1)
                        tried.add((q, p))

        if c <= delta * n_neighbors * data.shape[0]:
            break

    return deheap_sort(current_graph)
コード例 #8
0
def nn_descent(
    data,
    n_neighbors,
    rng_state,
    max_candidates=50,
    dist=dist.euclidean,
    dist_args=(),
    n_iters=10,
    delta=0.001,
    rho=0.5,
    rp_tree_init=True,
    leaf_array=None,
    low_memory=False,
    verbose=False,
    seed_per_row=False,
):
    tried = set([(-1, -1)])

    current_graph = make_heap(data.shape[0], n_neighbors)
    for i in range(data.shape[0]):
        if seed_per_row:
            seed(rng_state, i)
        indices = rejection_sample(n_neighbors, data.shape[0], rng_state)
        for j in range(indices.shape[0]):
            d = dist(data[i], data[indices[j]], *dist_args)
            heap_push(current_graph, i, d, indices[j], 1)
            heap_push(current_graph, indices[j], d, i, 1)
            tried.add((i, indices[j]))
            tried.add((indices[j], i))

    if rp_tree_init:
        init_rp_tree(data,
                     dist,
                     dist_args,
                     current_graph,
                     leaf_array,
                     tried=tried)

    if low_memory:
        nn_descent_internal_low_memory(
            current_graph,
            data,
            n_neighbors,
            rng_state,
            max_candidates=max_candidates,
            dist=dist,
            dist_args=dist_args,
            n_iters=n_iters,
            delta=delta,
            rho=rho,
            verbose=verbose,
            seed_per_row=seed_per_row,
        )
    else:
        nn_descent_internal_high_memory(
            current_graph,
            data,
            n_neighbors,
            rng_state,
            tried,
            max_candidates=max_candidates,
            dist=dist,
            dist_args=dist_args,
            n_iters=n_iters,
            delta=delta,
            rho=rho,
            verbose=verbose,
            seed_per_row=seed_per_row,
        )

    return deheap_sort(current_graph)