def candidates_map_jit(rows, n_neighbors, current_graph, heap_updates, offset, rho, rng_state, seed_per_row): rng_state_local = rng_state.copy() count = 0 for i in rows: if seed_per_row: seed(rng_state_local, i) for j in range(n_neighbors): if current_graph[0, i - offset, j] < 0: continue idx = current_graph[0, i - offset, j] isn = current_graph[2, i - offset, j] d = tau_rand(rng_state_local) if tau_rand(rng_state_local) < rho: # updates are common to old and new - decided by 'isn' flag hu = heap_updates[count] hu[0] = i hu[1] = d hu[2] = idx hu[3] = isn hu[4] = j count += 1 hu = heap_updates[count] hu[0] = idx hu[1] = d hu[2] = i hu[3] = isn hu[4] = -j - 1 # means i is at index 2 count += 1 return count
def diversify_csr( graph_indptr, graph_indices, graph_data, data_indptr, data_indices, data_data, dist, rng_state, prune_probability=1.0, ): n_nodes = graph_indptr.shape[0] - 1 for i in numba.prange(n_nodes): current_indices = graph_indices[graph_indptr[i]:graph_indptr[i + 1]] current_data = graph_data[graph_indptr[i]:graph_indptr[i + 1]] order = np.argsort(current_data) retained = np.ones(order.shape[0], dtype=np.int8) for idx in range(1, order.shape[0]): j = order[idx] for k in range(idx): l = order[k] if retained[l] == 1: p = current_indices[j] q = current_indices[l] from_inds = data_indices[data_indptr[p]:data_indptr[p + 1]] from_data = data_data[data_indptr[p]:data_indptr[p + 1]] to_inds = data_indices[data_indptr[q]:data_indptr[q + 1]] to_data = data_data[data_indptr[q]:data_indptr[q + 1]] d = dist(from_inds, from_data, to_inds, to_data) if current_data[l] > FLOAT32_EPS and d < current_data[j]: if tau_rand(rng_state) < prune_probability: retained[j] = 0 break for idx in range(order.shape[0]): j = order[idx] if retained[j] == 0: graph_data[graph_indptr[i] + j] = 0 return
def diversify( indices, distances, data_indices, data_indptr, data_data, dist, rng_state, prune_probability=1.0, ): for i in numba.prange(indices.shape[0]): new_indices = [indices[i, 0]] new_distances = [distances[i, 0]] for j in range(1, indices.shape[1]): if indices[i, j] < 0: break flag = True for k in range(len(new_indices)): c = new_indices[k] from_ind = data_indices[ data_indptr[indices[i, j]] : data_indptr[indices[i, j] + 1] ] from_data = data_data[ data_indptr[indices[i, j]] : data_indptr[indices[i, j] + 1] ] to_ind = data_indices[data_indptr[c] : data_indptr[c + 1]] to_data = data_data[data_indptr[c] : data_indptr[c + 1]] d = dist(from_ind, from_data, to_ind, to_data) if new_distances[k] > FLOAT32_EPS and d < distances[i, j]: if tau_rand(rng_state) < prune_probability: flag = False break if flag: new_indices.append(indices[i, j]) new_distances.append(distances[i, j]) for j in range(indices.shape[1]): if j < len(new_indices): indices[i, j] = new_indices[j] distances[i, j] = new_distances[j] else: indices[i, j] = -1 distances[i, j] = np.inf return indices, distances
def nn_descent( inds, indptr, data, n_vertices, n_neighbors, rng_state, max_candidates=50, n_iters=10, delta=0.001, rho=0.5, rp_tree_init=True, leaf_array=None, verbose=False, ): current_graph = make_heap(n_vertices, n_neighbors) for i in range(n_vertices): indices = rejection_sample(n_neighbors, n_vertices, rng_state) for j in range(indices.shape[0]): from_inds = inds[indptr[i]:indptr[i + 1]] from_data = data[indptr[i]:indptr[i + 1]] to_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]] to_data = data[indptr[indices[j]]:indptr[indices[j] + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) heap_push(current_graph, i, d, indices[j], 1) heap_push(current_graph, indices[j], d, i, 1) if rp_tree_init: for n in range(leaf_array.shape[0]): for i in range(leaf_array.shape[1]): if leaf_array[n, i] < 0: break for j in range(i + 1, leaf_array.shape[1]): if leaf_array[n, j] < 0: break from_inds = inds[indptr[leaf_array[ n, i]]:indptr[leaf_array[n, i] + 1]] from_data = data[indptr[leaf_array[ n, i]]:indptr[leaf_array[n, i] + 1]] to_inds = inds[indptr[leaf_array[ n, j]]:indptr[leaf_array[n, j] + 1]] to_data = data[indptr[leaf_array[ n, j]]:indptr[leaf_array[n, j] + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) heap_push(current_graph, leaf_array[n, i], d, leaf_array[n, j], 1) heap_push(current_graph, leaf_array[n, j], d, leaf_array[n, i], 1) for n in range(n_iters): if verbose: print("\t", n, " / ", n_iters) candidate_neighbors = build_candidates(current_graph, n_vertices, n_neighbors, max_candidates, rng_state) c = 0 for i in range(n_vertices): for j in range(max_candidates): p = int(candidate_neighbors[0, i, j]) if p < 0 or tau_rand(rng_state) < rho: continue for k in range(max_candidates): q = int(candidate_neighbors[0, i, k]) if (q < 0 or not candidate_neighbors[2, i, j] and not candidate_neighbors[2, i, k]): continue from_inds = inds[indptr[p]:indptr[p + 1]] from_data = data[indptr[p]:indptr[p + 1]] to_inds = inds[indptr[q]:indptr[q + 1]] to_data = data[indptr[q]:indptr[q + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) c += heap_push(current_graph, p, d, q, 1) c += heap_push(current_graph, q, d, p, 1) if c <= delta * n_neighbors * n_vertices: break return deheap_sort(current_graph)