Esempio n. 1
0
def init_rp_tree_reduce_jit(n_tasks, current_graph, heap_updates, offsets,
                            index):
    for update_i in range(n_tasks):
        o = offsets[update_i]
        for j in range(o[index], o[index + 1]):
            heap_update = heap_updates[update_i, j]
            heap_push(
                current_graph,
                int(heap_update[0]),
                heap_update[1],
                int(heap_update[2]),
                int(heap_update[3]),
            )
Esempio n. 2
0
def init_current_graph(
    data, dist, dist_args, n_neighbors, rng_state, seed_per_row=False
):
    current_graph = make_heap(data.shape[0], n_neighbors)
    for i in range(data.shape[0]):
        if seed_per_row:
            seed(rng_state, i)
        indices = rejection_sample(n_neighbors, data.shape[0], rng_state)
        for j in range(indices.shape[0]):
            d = dist(data[i], data[indices[j]], *dist_args)
            heap_push(current_graph, i, d, indices[j], 1)
            heap_push(current_graph, indices[j], d, i, 1)
    return current_graph
Esempio n. 3
0
    def init_from_tree(tree, data, query_points, heap, rng_state):
        for i in range(query_points.shape[0]):
            indices = search_flat_tree(query_points[i], tree.hyperplanes,
                                       tree.offsets, tree.children,
                                       tree.indices, rng_state)

            for j in range(indices.shape[0]):
                if indices[j] < 0:
                    continue
                d = dist(data[indices[j]], query_points[i], *dist_args)
                heap_push(heap, i, d, indices[j], 1)

        return
Esempio n. 4
0
def init_random(n_neighbors, inds, indptr, data, heap, dist, rng_state):
    n_samples = indptr.shape[0] - 1
    for i in range(n_samples):
        if heap[0][i, 0] < 0.0:
            for j in range(n_neighbors - np.sum(heap[0][i] >= 0.0)):
                idx = np.abs(tau_rand_int(rng_state)) % n_samples

                from_inds = inds[indptr[idx] : indptr[idx + 1]]
                from_data = data[indptr[idx] : indptr[idx + 1]]

                to_inds = inds[indptr[i] : indptr[i + 1]]
                to_data = data[indptr[i] : indptr[i + 1]]
                d = dist(from_inds, from_data, to_inds, to_data)

                heap_push(heap, i, d, idx, 1)

    return
Esempio n. 5
0
def current_graph_map_jit(
    heap,
    rows,
    n_neighbors,
    data,
    rng_state,
    seed_per_row,
    dist,
):
    rng_state_local = rng_state.copy()
    for i in rows:
        if seed_per_row:
            seed(rng_state_local, i)
        if heap[0][i, 0] < 0.0:
            for j in range(n_neighbors - np.sum(heap[0][i] >= 0.0)):
                idx = np.abs(tau_rand_int(rng_state_local)) % data.shape[0]
                d = dist(data[i], data[idx])
                heap_push(heap, i, d, idx, 1)

    return True
Esempio n. 6
0
def init_rp_tree(data, dist, dist_args, current_graph, leaf_array, tried=None):
    if tried is None:
        tried = set([(-1, -1)])

    for n in range(leaf_array.shape[0]):
        for i in range(leaf_array.shape[1]):
            p = leaf_array[n, i]
            if p < 0:
                break
            for j in range(i + 1, leaf_array.shape[1]):
                q = leaf_array[n, j]
                if q < 0:
                    break
                if (p, q) in tried:
                    continue
                d = dist(data[p], data[q], *dist_args)
                heap_push(current_graph, p, d, q, 1)
                tried.add((p, q))
                if p != q:
                    heap_push(current_graph, q, d, p, 1)
                    tried.add((q, p))
Esempio n. 7
0
def sparse_init_from_tree(
    tree,
    inds,
    indptr,
    data,
    query_inds,
    query_indptr,
    query_data,
    heap,
    rng_state,
    sparse_dist,
    dist_args,
):
    for i in range(query_indptr.shape[0] - 1):

        to_inds = query_inds[query_indptr[i]:query_indptr[i + 1]]
        to_data = query_data[query_indptr[i]:query_indptr[i + 1]]

        indices = search_sparse_flat_tree(
            to_inds,
            to_data,
            tree.hyperplanes,
            tree.offsets,
            tree.children,
            tree.indices,
            rng_state,
        )

        for j in range(indices.shape[0]):
            if indices[j] < 0:
                continue
            from_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]]
            from_data = data[indptr[indices[j]]:indptr[indices[j] + 1]]

            d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args)
            heap_push(heap, i, d, indices[j], 1)

    return
Esempio n. 8
0
def nn_decent_reduce_jit(n_tasks, current_graph, heap_updates, offsets, index):
    c = 0
    for update_i in range(n_tasks):
        o = offsets[update_i]
        for j in range(o[index], o[index + 1]):
            heap_update = heap_updates[update_i, j]
            c += heap_push(
                current_graph,
                heap_update[0],
                heap_update[1],
                heap_update[2],
                heap_update[3],
            )
    return c
Esempio n. 9
0
def candidates_reduce_jit(
    n_tasks,
    current_graph,
    new_candidate_neighbors,
    old_candidate_neighbors,
    heap_updates,
    offsets,
    index,
):
    for update_i in range(n_tasks):
        o = offsets[update_i]
        for j in range(o[index], o[index + 1]):
            heap_update = heap_updates[update_i, j]
            if heap_update[3]:
                c = heap_push(
                    new_candidate_neighbors,
                    int(heap_update[0]),
                    heap_update[1],
                    int(heap_update[2]),
                    int(heap_update[3]),
                )
                if c > 0:
                    k = int(heap_update[4])
                    if k >= 0:
                        i = int(heap_update[0])
                    else:
                        i = int(heap_update[2])
                        k = -k - 1
                    current_graph[2, i, k] = 0
            else:
                heap_push(
                    old_candidate_neighbors,
                    int(heap_update[0]),
                    heap_update[1],
                    int(heap_update[2]),
                    int(heap_update[3]),
                )
Esempio n. 10
0
def nn_descent(
    data,
    n_neighbors,
    rng_state,
    max_candidates=50,
    dist=dist.euclidean,
    dist_args=(),
    n_iters=10,
    delta=0.001,
    rho=0.5,
    rp_tree_init=True,
    leaf_array=None,
    verbose=False,
    seed_per_row=False,
):
    n_vertices = data.shape[0]
    tried = set([(-1, -1)])

    current_graph = make_heap(data.shape[0], n_neighbors)
    for i in range(data.shape[0]):
        if seed_per_row:
            seed(rng_state, i)
        indices = rejection_sample(n_neighbors, data.shape[0], rng_state)
        for j in range(indices.shape[0]):
            d = dist(data[i], data[indices[j]], *dist_args)
            heap_push(current_graph, i, d, indices[j], 1)
            heap_push(current_graph, indices[j], d, i, 1)
            tried.add((i, indices[j]))
            tried.add((indices[j], i))

    if rp_tree_init:
        init_rp_tree(data,
                     dist,
                     dist_args,
                     current_graph,
                     leaf_array,
                     tried=tried)

    for n in range(n_iters):
        if verbose:
            print("\t", n, " / ", n_iters)

        (new_candidate_neighbors,
         old_candidate_neighbors) = new_build_candidates(
             current_graph,
             n_vertices,
             n_neighbors,
             max_candidates,
             rng_state,
             rho,
             seed_per_row,
         )

        c = 0
        for i in range(n_vertices):
            for j in range(max_candidates):
                p = int(new_candidate_neighbors[0, i, j])
                if p < 0:
                    continue
                for k in range(j, max_candidates):
                    q = int(new_candidate_neighbors[0, i, k])
                    if q < 0 or (p, q) in tried:
                        continue

                    d = dist(data[p], data[q], *dist_args)
                    c += unchecked_heap_push(current_graph, p, d, q, 1)
                    tried.add((p, q))
                    if p != q:
                        c += unchecked_heap_push(current_graph, q, d, p, 1)
                        tried.add((q, p))

                for k in range(max_candidates):
                    q = int(old_candidate_neighbors[0, i, k])
                    if q < 0 or (p, q) in tried:
                        continue

                    d = dist(data[p], data[q], *dist_args)
                    c += unchecked_heap_push(current_graph, p, d, q, 1)
                    tried.add((p, q))
                    if p != q:
                        c += unchecked_heap_push(current_graph, q, d, p, 1)
                        tried.add((q, p))

        if c <= delta * n_neighbors * data.shape[0]:
            break

    return deheap_sort(current_graph)
Esempio n. 11
0
def sparse_nn_descent_internal_low_memory(
        current_graph,
        inds,
        indptr,
        data,
        n_vertices,
        n_neighbors,
        rng_state,
        max_candidates=50,
        sparse_dist=sparse_euclidean,
        dist_args=(),
        n_iters=10,
        delta=0.001,
        rho=0.5,
        verbose=False,
):
    for n in range(n_iters):
        if verbose:
            print("\t", n, " / ", n_iters)

        (new_candidate_neighbors,
         old_candidate_neighbors) = new_build_candidates(
             current_graph,
             n_vertices,
             n_neighbors,
             max_candidates,
             rng_state,
             rho,
             False,
         )

        c = 0
        for i in range(n_vertices):
            for j in range(max_candidates):
                p = int(new_candidate_neighbors[0, i, j])
                if p < 0:
                    continue
                for k in range(j, max_candidates):
                    q = int(new_candidate_neighbors[0, i, k])
                    if q < 0:
                        continue

                    from_inds = inds[indptr[p]:indptr[p + 1]]
                    from_data = data[indptr[p]:indptr[p + 1]]

                    to_inds = inds[indptr[q]:indptr[q + 1]]
                    to_data = data[indptr[q]:indptr[q + 1]]

                    d = sparse_dist(from_inds, from_data, to_inds, to_data,
                                    *dist_args)

                    c += heap_push(current_graph, p, d, q, 1)
                    if p != q:
                        c += heap_push(current_graph, q, d, p, 1)

                for k in range(max_candidates):
                    q = int(old_candidate_neighbors[0, i, k])
                    if q < 0:
                        continue

                    from_inds = inds[indptr[p]:indptr[p + 1]]
                    from_data = data[indptr[p]:indptr[p + 1]]

                    to_inds = inds[indptr[q]:indptr[q + 1]]
                    to_data = data[indptr[q]:indptr[q + 1]]

                    d = sparse_dist(from_inds, from_data, to_inds, to_data,
                                    *dist_args)

                    c += heap_push(current_graph, p, d, q, 1)
                    if p != q:
                        c += heap_push(current_graph, q, d, p, 1)

        if c <= delta * n_neighbors * n_vertices:
            return
Esempio n. 12
0
def sparse_nn_descent(
    inds,
    indptr,
    data,
    n_vertices,
    n_neighbors,
    rng_state,
    max_candidates=50,
    sparse_dist=sparse_euclidean,
    dist_args=(),
    n_iters=10,
    delta=0.001,
    rho=0.5,
    low_memory=False,
    rp_tree_init=True,
    leaf_array=None,
    verbose=False,
):

    tried = set([(-1, -1)])

    current_graph = make_heap(n_vertices, n_neighbors)
    for i in range(n_vertices):
        indices = rejection_sample(n_neighbors, n_vertices, rng_state)
        for j in range(indices.shape[0]):

            from_inds = inds[indptr[i]:indptr[i + 1]]
            from_data = data[indptr[i]:indptr[i + 1]]

            to_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]]
            to_data = data[indptr[indices[j]]:indptr[indices[j] + 1]]

            d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args)

            heap_push(current_graph, i, d, indices[j], 1)
            heap_push(current_graph, indices[j], d, i, 1)
            tried.add((i, indices[j]))
            tried.add((indices[j], i))

    if rp_tree_init:
        sparse_init_rp_tree(
            inds,
            indptr,
            data,
            sparse_dist,
            dist_args,
            current_graph,
            leaf_array,
            tried=tried,
        )

    if low_memory:
        sparse_nn_descent_internal_low_memory(
            current_graph,
            inds,
            indptr,
            data,
            n_vertices,
            n_neighbors,
            rng_state,
            max_candidates=max_candidates,
            sparse_dist=sparse_dist,
            dist_args=dist_args,
            n_iters=n_iters,
            delta=delta,
            rho=rho,
            verbose=verbose,
        )
    else:
        sparse_nn_descent_internal_high_memory(
            current_graph,
            inds,
            indptr,
            data,
            n_vertices,
            n_neighbors,
            rng_state,
            tried,
            max_candidates=max_candidates,
            sparse_dist=sparse_dist,
            dist_args=dist_args,
            n_iters=n_iters,
            delta=delta,
            rho=rho,
            verbose=verbose,
        )

    return deheap_sort(current_graph)
Esempio n. 13
0
def nn_descent(
    data,
    n_neighbors,
    rng_state,
    max_candidates=50,
    dist=dist.euclidean,
    dist_args=(),
    n_iters=10,
    delta=0.001,
    rho=0.5,
    rp_tree_init=True,
    leaf_array=None,
    low_memory=False,
    verbose=False,
    seed_per_row=False,
):
    tried = set([(-1, -1)])

    current_graph = make_heap(data.shape[0], n_neighbors)
    for i in range(data.shape[0]):
        if seed_per_row:
            seed(rng_state, i)
        indices = rejection_sample(n_neighbors, data.shape[0], rng_state)
        for j in range(indices.shape[0]):
            d = dist(data[i], data[indices[j]], *dist_args)
            heap_push(current_graph, i, d, indices[j], 1)
            heap_push(current_graph, indices[j], d, i, 1)
            tried.add((i, indices[j]))
            tried.add((indices[j], i))

    if rp_tree_init:
        init_rp_tree(data,
                     dist,
                     dist_args,
                     current_graph,
                     leaf_array,
                     tried=tried)

    if low_memory:
        nn_descent_internal_low_memory(
            current_graph,
            data,
            n_neighbors,
            rng_state,
            max_candidates=max_candidates,
            dist=dist,
            dist_args=dist_args,
            n_iters=n_iters,
            delta=delta,
            rho=rho,
            verbose=verbose,
            seed_per_row=seed_per_row,
        )
    else:
        nn_descent_internal_high_memory(
            current_graph,
            data,
            n_neighbors,
            rng_state,
            tried,
            max_candidates=max_candidates,
            dist=dist,
            dist_args=dist_args,
            n_iters=n_iters,
            delta=delta,
            rho=rho,
            verbose=verbose,
            seed_per_row=seed_per_row,
        )

    return deheap_sort(current_graph)
Esempio n. 14
0
def nn_descent(data,
               n_neighbors,
               rng_state,
               max_candidates=50,
               dist=dist.euclidean,
               dist_args=(),
               n_iters=10,
               delta=0.001,
               rho=0.5,
               rp_tree_init=True,
               leaf_array=None,
               verbose=False):
    n_vertices = data.shape[0]

    current_graph = make_heap(data.shape[0], n_neighbors)
    for i in range(data.shape[0]):
        indices = rejection_sample(n_neighbors, data.shape[0], rng_state)
        for j in range(indices.shape[0]):
            d = dist(data[i], data[indices[j]], *dist_args)
            heap_push(current_graph, i, d, indices[j], 1)
            heap_push(current_graph, indices[j], d, i, 1)

    if rp_tree_init:
        for n in range(leaf_array.shape[0]):
            tried = set([(-1, -1)])
            for i in range(leaf_array.shape[1]):
                if leaf_array[n, i] < 0:
                    break
                for j in range(i + 1, leaf_array.shape[1]):
                    if leaf_array[n, j] < 0:
                        break
                    if (leaf_array[n, i], leaf_array[n, j]) in tried:
                        continue
                    d = dist(data[leaf_array[n, i]], data[leaf_array[n, j]],
                             *dist_args)
                    heap_push(current_graph, leaf_array[n, i], d,
                              leaf_array[n, j], 1)
                    heap_push(current_graph, leaf_array[n, j], d,
                              leaf_array[n, i], 1)
                    tried.add((leaf_array[n, i], leaf_array[n, j]))
                    tried.add((leaf_array[n, j], leaf_array[n, i]))

    for n in range(n_iters):

        (new_candidate_neighbors, old_candidate_neighbors) = build_candidates(
            current_graph, n_vertices, n_neighbors, max_candidates, rng_state,
            rho)

        c = 0
        for i in range(n_vertices):
            for j in range(max_candidates):
                p = int(new_candidate_neighbors[0, i, j])
                if p < 0:
                    continue
                for k in range(j, max_candidates):
                    q = int(new_candidate_neighbors[0, i, k])
                    if q < 0:
                        continue

                    d = dist(data[p], data[q], *dist_args)
                    c += heap_push(current_graph, p, d, q, 1)
                    c += heap_push(current_graph, q, d, p, 1)

                for k in range(max_candidates):
                    q = int(old_candidate_neighbors[0, i, k])
                    if q < 0:
                        continue

                    d = dist(data[p], data[q], *dist_args)
                    c += heap_push(current_graph, p, d, q, 1)
                    c += heap_push(current_graph, q, d, p, 1)

        if c <= delta * n_neighbors * data.shape[0]:
            break

    return deheap_sort(current_graph)
Esempio n. 15
0
def sparse_nn_descent(
    inds,
    indptr,
    data,
    n_vertices,
    n_neighbors,
    rng_state,
    max_candidates=50,
    sparse_dist=sparse_euclidean,
    dist_args=(),
    n_iters=10,
    delta=0.001,
    rho=0.5,
    rp_tree_init=True,
    leaf_array=None,
    verbose=False,
):

    tried = set([(-1, -1)])

    current_graph = make_heap(n_vertices, n_neighbors)
    for i in range(n_vertices):
        indices = rejection_sample(n_neighbors, n_vertices, rng_state)
        for j in range(indices.shape[0]):

            from_inds = inds[indptr[i]:indptr[i + 1]]
            from_data = data[indptr[i]:indptr[i + 1]]

            to_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]]
            to_data = data[indptr[indices[j]]:indptr[indices[j] + 1]]

            d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args)

            heap_push(current_graph, i, d, indices[j], 1)
            heap_push(current_graph, indices[j], d, i, 1)
            tried.add((i, indices[j]))
            tried.add((indices[j], i))

    if rp_tree_init:
        sparse_init_rp_tree(
            inds,
            indptr,
            data,
            sparse_dist,
            dist_args,
            current_graph,
            leaf_array,
            tried=tried,
        )

    for n in range(n_iters):
        if verbose:
            print("\t", n, " / ", n_iters)

        (new_candidate_neighbors,
         old_candidate_neighbors) = new_build_candidates(
             current_graph,
             n_vertices,
             n_neighbors,
             max_candidates,
             rng_state,
             rho,
             False,
         )

        c = 0
        for i in range(n_vertices):
            for j in range(max_candidates):
                p = int(new_candidate_neighbors[0, i, j])
                if p < 0:
                    continue
                for k in range(j, max_candidates):
                    q = int(new_candidate_neighbors[0, i, k])
                    if q < 0 or (p, q) in tried:
                        continue

                    from_inds = inds[indptr[p]:indptr[p + 1]]
                    from_data = data[indptr[p]:indptr[p + 1]]

                    to_inds = inds[indptr[q]:indptr[q + 1]]
                    to_data = data[indptr[q]:indptr[q + 1]]

                    d = sparse_dist(from_inds, from_data, to_inds, to_data,
                                    *dist_args)

                    c += unchecked_heap_push(current_graph, p, d, q, 1)
                    tried.add((p, q))
                    if p != q:
                        c += unchecked_heap_push(current_graph, q, d, p, 1)
                        tried.add((q, p))

                for k in range(max_candidates):
                    q = int(old_candidate_neighbors[0, i, k])
                    if q < 0 or (p, q) in tried:
                        continue

                    from_inds = inds[indptr[p]:indptr[p + 1]]
                    from_data = data[indptr[p]:indptr[p + 1]]

                    to_inds = inds[indptr[q]:indptr[q + 1]]
                    to_data = data[indptr[q]:indptr[q + 1]]

                    d = sparse_dist(from_inds, from_data, to_inds, to_data,
                                    *dist_args)

                    c += unchecked_heap_push(current_graph, p, d, q, 1)
                    tried.add((p, q))
                    if p != q:
                        c += unchecked_heap_push(current_graph, q, d, p, 1)
                        tried.add((q, p))

        if c <= delta * n_neighbors * n_vertices:
            break

    return deheap_sort(current_graph)
Esempio n. 16
0
    def nn_descent(
        inds,
        indptr,
        data,
        n_vertices,
        n_neighbors,
        rng_state,
        max_candidates=50,
        n_iters=10,
        delta=0.001,
        rho=0.5,
        rp_tree_init=True,
        leaf_array=None,
        verbose=False,
    ):
        current_graph = make_heap(n_vertices, n_neighbors)
        for i in range(n_vertices):
            indices = rejection_sample(n_neighbors, n_vertices, rng_state)
            for j in range(indices.shape[0]):

                from_inds = inds[indptr[i]:indptr[i + 1]]
                from_data = data[indptr[i]:indptr[i + 1]]

                to_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]]
                to_data = data[indptr[indices[j]]:indptr[indices[j] + 1]]

                d = sparse_dist(from_inds, from_data, to_inds, to_data,
                                *dist_args)

                heap_push(current_graph, i, d, indices[j], 1)
                heap_push(current_graph, indices[j], d, i, 1)

        if rp_tree_init:
            for n in range(leaf_array.shape[0]):
                for i in range(leaf_array.shape[1]):
                    if leaf_array[n, i] < 0:
                        break
                    for j in range(i + 1, leaf_array.shape[1]):
                        if leaf_array[n, j] < 0:
                            break

                        from_inds = inds[indptr[leaf_array[
                            n, i]]:indptr[leaf_array[n, i] + 1]]
                        from_data = data[indptr[leaf_array[
                            n, i]]:indptr[leaf_array[n, i] + 1]]

                        to_inds = inds[indptr[leaf_array[
                            n, j]]:indptr[leaf_array[n, j] + 1]]
                        to_data = data[indptr[leaf_array[
                            n, j]]:indptr[leaf_array[n, j] + 1]]

                        d = sparse_dist(from_inds, from_data, to_inds, to_data,
                                        *dist_args)

                        heap_push(current_graph, leaf_array[n, i], d,
                                  leaf_array[n, j], 1)
                        heap_push(current_graph, leaf_array[n, j], d,
                                  leaf_array[n, i], 1)

        for n in range(n_iters):
            if verbose:
                print("\t", n, " / ", n_iters)

            candidate_neighbors = build_candidates(current_graph, n_vertices,
                                                   n_neighbors, max_candidates,
                                                   rng_state)

            c = 0
            for i in range(n_vertices):
                for j in range(max_candidates):
                    p = int(candidate_neighbors[0, i, j])
                    if p < 0 or tau_rand(rng_state) < rho:
                        continue
                    for k in range(max_candidates):
                        q = int(candidate_neighbors[0, i, k])
                        if (q < 0 or not candidate_neighbors[2, i, j]
                                and not candidate_neighbors[2, i, k]):
                            continue

                        from_inds = inds[indptr[p]:indptr[p + 1]]
                        from_data = data[indptr[p]:indptr[p + 1]]

                        to_inds = inds[indptr[q]:indptr[q + 1]]
                        to_data = data[indptr[q]:indptr[q + 1]]

                        d = sparse_dist(from_inds, from_data, to_inds, to_data,
                                        *dist_args)

                        c += heap_push(current_graph, p, d, q, 1)
                        c += heap_push(current_graph, q, d, p, 1)

            if c <= delta * n_neighbors * n_vertices:
                break

        return deheap_sort(current_graph)