Exemple #1
0
def candidates_map_jit(rows, n_neighbors, current_graph, heap_updates, offset,
                       rho, rng_state, seed_per_row):
    rng_state_local = rng_state.copy()
    count = 0
    for i in rows:
        if seed_per_row:
            seed(rng_state_local, i)
        for j in range(n_neighbors):
            if current_graph[0, i - offset, j] < 0:
                continue
            idx = current_graph[0, i - offset, j]
            isn = current_graph[2, i - offset, j]
            d = tau_rand(rng_state_local)
            if tau_rand(rng_state_local) < rho:
                # updates are common to old and new - decided by 'isn' flag
                hu = heap_updates[count]
                hu[0] = i
                hu[1] = d
                hu[2] = idx
                hu[3] = isn
                hu[4] = j
                count += 1
                hu = heap_updates[count]
                hu[0] = idx
                hu[1] = d
                hu[2] = i
                hu[3] = isn
                hu[4] = -j - 1  # means i is at index 2
                count += 1
    return count
Exemple #2
0
def diversify_csr(
    graph_indptr,
    graph_indices,
    graph_data,
    data_indptr,
    data_indices,
    data_data,
    dist,
    rng_state,
    prune_probability=1.0,
):

    n_nodes = graph_indptr.shape[0] - 1

    for i in numba.prange(n_nodes):

        current_indices = graph_indices[graph_indptr[i]:graph_indptr[i + 1]]
        current_data = graph_data[graph_indptr[i]:graph_indptr[i + 1]]

        order = np.argsort(current_data)
        retained = np.ones(order.shape[0], dtype=np.int8)

        for idx in range(1, order.shape[0]):

            j = order[idx]

            for k in range(idx):

                l = order[k]

                if retained[l] == 1:
                    p = current_indices[j]
                    q = current_indices[l]

                    from_inds = data_indices[data_indptr[p]:data_indptr[p + 1]]
                    from_data = data_data[data_indptr[p]:data_indptr[p + 1]]

                    to_inds = data_indices[data_indptr[q]:data_indptr[q + 1]]
                    to_data = data_data[data_indptr[q]:data_indptr[q + 1]]
                    d = dist(from_inds, from_data, to_inds, to_data)

                    if current_data[l] > FLOAT32_EPS and d < current_data[j]:
                        if tau_rand(rng_state) < prune_probability:
                            retained[j] = 0
                            break

        for idx in range(order.shape[0]):
            j = order[idx]
            if retained[j] == 0:
                graph_data[graph_indptr[i] + j] = 0

    return
Exemple #3
0
def diversify(
    indices,
    distances,
    data_indices,
    data_indptr,
    data_data,
    dist,
    rng_state,
    prune_probability=1.0,
):

    for i in numba.prange(indices.shape[0]):

        new_indices = [indices[i, 0]]
        new_distances = [distances[i, 0]]
        for j in range(1, indices.shape[1]):
            if indices[i, j] < 0:
                break

            flag = True
            for k in range(len(new_indices)):
                c = new_indices[k]

                from_ind = data_indices[
                    data_indptr[indices[i, j]] : data_indptr[indices[i, j] + 1]
                ]
                from_data = data_data[
                    data_indptr[indices[i, j]] : data_indptr[indices[i, j] + 1]
                ]

                to_ind = data_indices[data_indptr[c] : data_indptr[c + 1]]
                to_data = data_data[data_indptr[c] : data_indptr[c + 1]]

                d = dist(from_ind, from_data, to_ind, to_data)
                if new_distances[k] > FLOAT32_EPS and d < distances[i, j]:
                    if tau_rand(rng_state) < prune_probability:
                        flag = False
                        break

            if flag:
                new_indices.append(indices[i, j])
                new_distances.append(distances[i, j])

        for j in range(indices.shape[1]):
            if j < len(new_indices):
                indices[i, j] = new_indices[j]
                distances[i, j] = new_distances[j]
            else:
                indices[i, j] = -1
                distances[i, j] = np.inf

    return indices, distances
Exemple #4
0
    def nn_descent(
        inds,
        indptr,
        data,
        n_vertices,
        n_neighbors,
        rng_state,
        max_candidates=50,
        n_iters=10,
        delta=0.001,
        rho=0.5,
        rp_tree_init=True,
        leaf_array=None,
        verbose=False,
    ):
        current_graph = make_heap(n_vertices, n_neighbors)
        for i in range(n_vertices):
            indices = rejection_sample(n_neighbors, n_vertices, rng_state)
            for j in range(indices.shape[0]):

                from_inds = inds[indptr[i]:indptr[i + 1]]
                from_data = data[indptr[i]:indptr[i + 1]]

                to_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]]
                to_data = data[indptr[indices[j]]:indptr[indices[j] + 1]]

                d = sparse_dist(from_inds, from_data, to_inds, to_data,
                                *dist_args)

                heap_push(current_graph, i, d, indices[j], 1)
                heap_push(current_graph, indices[j], d, i, 1)

        if rp_tree_init:
            for n in range(leaf_array.shape[0]):
                for i in range(leaf_array.shape[1]):
                    if leaf_array[n, i] < 0:
                        break
                    for j in range(i + 1, leaf_array.shape[1]):
                        if leaf_array[n, j] < 0:
                            break

                        from_inds = inds[indptr[leaf_array[
                            n, i]]:indptr[leaf_array[n, i] + 1]]
                        from_data = data[indptr[leaf_array[
                            n, i]]:indptr[leaf_array[n, i] + 1]]

                        to_inds = inds[indptr[leaf_array[
                            n, j]]:indptr[leaf_array[n, j] + 1]]
                        to_data = data[indptr[leaf_array[
                            n, j]]:indptr[leaf_array[n, j] + 1]]

                        d = sparse_dist(from_inds, from_data, to_inds, to_data,
                                        *dist_args)

                        heap_push(current_graph, leaf_array[n, i], d,
                                  leaf_array[n, j], 1)
                        heap_push(current_graph, leaf_array[n, j], d,
                                  leaf_array[n, i], 1)

        for n in range(n_iters):
            if verbose:
                print("\t", n, " / ", n_iters)

            candidate_neighbors = build_candidates(current_graph, n_vertices,
                                                   n_neighbors, max_candidates,
                                                   rng_state)

            c = 0
            for i in range(n_vertices):
                for j in range(max_candidates):
                    p = int(candidate_neighbors[0, i, j])
                    if p < 0 or tau_rand(rng_state) < rho:
                        continue
                    for k in range(max_candidates):
                        q = int(candidate_neighbors[0, i, k])
                        if (q < 0 or not candidate_neighbors[2, i, j]
                                and not candidate_neighbors[2, i, k]):
                            continue

                        from_inds = inds[indptr[p]:indptr[p + 1]]
                        from_data = data[indptr[p]:indptr[p + 1]]

                        to_inds = inds[indptr[q]:indptr[q + 1]]
                        to_data = data[indptr[q]:indptr[q + 1]]

                        d = sparse_dist(from_inds, from_data, to_inds, to_data,
                                        *dist_args)

                        c += heap_push(current_graph, p, d, q, 1)
                        c += heap_push(current_graph, q, d, p, 1)

            if c <= delta * n_neighbors * n_vertices:
                break

        return deheap_sort(current_graph)