def nn_descent( inds, indptr, data, n_neighbors, rng_state, max_candidates=50, dist=sparse_euclidean, n_iters=10, delta=0.001, rp_tree_init=True, leaf_array=None, low_memory=False, verbose=False, seed_per_row=False, ): n_samples = indptr.shape[0] - 1 current_graph = make_heap(n_samples, n_neighbors) if rp_tree_init: init_rp_tree(inds, indptr, data, dist, current_graph, leaf_array) init_random(n_neighbors, inds, indptr, data, current_graph, dist, rng_state) if low_memory: nn_descent_internal_low_memory_parallel( current_graph, inds, indptr, data, n_neighbors, rng_state, max_candidates=max_candidates, dist=dist, n_iters=n_iters, delta=delta, verbose=verbose, seed_per_row=seed_per_row, ) else: nn_descent_internal_high_memory_parallel( current_graph, inds, indptr, data, n_neighbors, rng_state, max_candidates=max_candidates, dist=dist, n_iters=n_iters, delta=delta, verbose=verbose, seed_per_row=seed_per_row, ) return deheap_sort(current_graph)
def query(self, query_data, k=10, queue_size=5.0): """Query the training data for the k nearest neighbors Parameters ---------- query_data: array-like, last dimension self.dim An array of points to query k: integer (default = 10) The number of nearest neighbors to return queue_size: float (default 5.0) The multiplier of the internal search queue. This controls the speed/accuracy tradeoff. Low values will search faster but with more approximate results. High values will search more accurately, but will require more computation to do so. Values should generally be in the range 1.0 to 10.0. Returns ------- indices, distances: array (n_query_points, k), array (n_query_points, k) The first array, ``indices``, provides the indices of the data points in the training set that are the nearest neighbors of each query point. Thus ``indices[i, j]`` is the index into the training data of the jth nearest neighbor of the ith query points. Similarly ``distances`` provides the distances to the neighbors of the query points such that ``distances[i, j]`` is the distance from the ith query point to its jth nearest neighbor in the training data. """ # query_data = check_array(query_data, dtype=np.float64, order='C') query_data = np.asarray(query_data).astype(np.float32) self._init_search_graph() init = initialise_search( self._rp_forest, self._raw_data, query_data, int(k * queue_size), self._random_init, self._tree_init, self.rng_state, ) result = self._search( self._raw_data, self._search_graph.indptr, self._search_graph.indices, init, query_data, ) indices, dists = deheap_sort(result) return indices[:, :k], dists[:, :k]
def find_component_connection_edge( component1, component2, search_closure, raw_data, visited, rng_state, search_size=10, epsilon=0.0, ): indices = [np.zeros(1, dtype=np.int64) for i in range(2)] indices[0] = component1[rejection_sample(np.int64(search_size), component1.shape[0], rng_state)] indices[1] = component2[rejection_sample(np.int64(search_size), component2.shape[0], rng_state)] query_side = 0 query_points = raw_data[indices[query_side]] candidate_indices = indices[1 - query_side].copy() changed = [True, True] best_dist = np.inf best_edge = (indices[0][0], indices[1][0]) while changed[0] or changed[1]: inds, dists, _ = search_closure(query_points, candidate_indices, search_size, epsilon, visited) inds, dists = deheap_sort(inds, dists) for i in range(dists.shape[0]): for j in range(dists.shape[1]): if dists[i, j] < best_dist: best_dist = dists[i, j] best_edge = (indices[query_side][i], inds[i, j]) candidate_indices = indices[query_side] new_indices = np.unique(inds[:, 0]) if indices[1 - query_side].shape[0] == new_indices.shape[0]: changed[1 - query_side] = np.any( indices[1 - query_side] != new_indices) indices[1 - query_side] = new_indices query_points = raw_data[indices[1 - query_side]] query_side = 1 - query_side return best_edge[0], best_edge[1], best_dist
def __init__( self, data, metric="euclidean", metric_kwds=None, n_neighbors=15, n_trees=None, leaf_size=None, pruning_level=0, tree_init=True, random_state=np.random, algorithm="standard", max_candidates=20, n_iters=None, delta=0.001, rho=0.5, n_jobs=None, seed_per_row=False, verbose=False, ): if n_trees is None: n_trees = 5 + int(round((data.shape[0])**0.5 / 20.0)) if n_iters is None: n_iters = max(5, int(round(np.log2(data.shape[0])))) self.n_trees = n_trees self.n_neighbors = n_neighbors self.metric = metric self.metric_kwds = metric_kwds self.leaf_size = leaf_size self.prune_level = pruning_level self.max_candidates = max_candidates self.n_iters = n_iters self.delta = delta self.rho = rho self.dim = data.shape[1] self.verbose = verbose data = check_array(data, dtype=np.float32, accept_sparse="csr") self._raw_data = data if not tree_init or n_trees == 0: self.tree_init = False else: self.tree_init = True metric_kwds = metric_kwds or {} self._dist_args = tuple(metric_kwds.values()) self.random_state = check_random_state(random_state) if callable(metric): self._distance_func = metric elif metric in dist.named_distances: self._distance_func = dist.named_distances[metric] else: raise ValueError("Metric is neither callable, " + "nor a recognised string") if metric in ("cosine", "correlation", "dice", "jaccard"): self._angular_trees = True else: self._angular_trees = False self.rng_state = self.random_state.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64) if self.tree_init: if verbose: print(ts(), "Building RP forest with", str(n_trees), "trees") self._rp_forest = make_forest( data, n_neighbors, n_trees, leaf_size, self.rng_state, self._angular_trees, ) leaf_array = rptree_leaf_array(self._rp_forest) else: self._rp_forest = None leaf_array = np.array([[-1]]) if threaded.effective_n_jobs_with_context(n_jobs) != 1: if algorithm != "standard": raise ValueError( "Algorithm {} not supported in parallel mode".format( algorithm)) if isspmatrix_csr(self._raw_data): raise ValueError( "Sparse input is not currently supported in parallel mode") if verbose: print(ts(), "parallel NN descent for", str(n_iters), "iterations") if isspmatrix_csr(self._raw_data): # Sparse case self._is_sparse = True if metric in sparse.sparse_named_distances: self._distance_func = sparse.sparse_named_distances[metric] if metric in sparse.sparse_need_n_features: metric_kwds["n_features"] = self._raw_data.shape[1] self._dist_args = tuple(metric_kwds.values()) else: raise ValueError( "Metric {} not supported for sparse data".format( metric)) self._neighbor_graph = sparse_threaded.sparse_nn_descent( self._raw_data.indices, self._raw_data.indptr, self._raw_data.data, self._raw_data.shape[0], self.n_neighbors, self.rng_state, self.max_candidates, self._distance_func, self._dist_args, self.n_iters, self.delta, self.rho, rp_tree_init=self.tree_init, leaf_array=leaf_array, verbose=verbose, n_jobs=n_jobs, seed_per_row=seed_per_row, ) else: # Regular case self._is_sparse = False self._neighbor_graph = threaded.nn_descent( self._raw_data, self.n_neighbors, self.rng_state, self.max_candidates, self._distance_func, self._dist_args, self.n_iters, self.delta, self.rho, rp_tree_init=self.tree_init, leaf_array=leaf_array, verbose=verbose, n_jobs=n_jobs, seed_per_row=seed_per_row, ) elif algorithm == "standard" or leaf_array.shape[0] == 1: if isspmatrix_csr(self._raw_data): self._is_sparse = True if metric in sparse.sparse_named_distances: self._distance_func = sparse.sparse_named_distances[metric] if metric in sparse.sparse_need_n_features: metric_kwds["n_features"] = self._raw_data.shape[1] self._dist_args = tuple(metric_kwds.values()) else: raise ValueError( "Metric {} not supported for sparse data".format( metric)) if verbose: print(ts(), "metric NN descent for", str(n_iters), "iterations") self._neighbor_graph = sparse_nnd.sparse_nn_descent( self._raw_data.indices, self._raw_data.indptr, self._raw_data.data, self._raw_data.shape[0], self.n_neighbors, self.rng_state, self.max_candidates, sparse_dist=self._distance_func, dist_args=self._dist_args, n_iters=self.n_iters, rp_tree_init=False, leaf_array=leaf_array, verbose=verbose, ) else: self._is_sparse = False if verbose: print(ts(), "NN descent for", str(n_iters), "iterations") self._neighbor_graph = nn_descent( self._raw_data, self.n_neighbors, self.rng_state, self.max_candidates, self._distance_func, self._dist_args, self.n_iters, self.delta, self.rho, rp_tree_init=True, leaf_array=leaf_array, verbose=verbose, seed_per_row=seed_per_row, ) elif algorithm == "alternative": self._is_sparse = False if verbose: print(ts(), "Using alternative algorithm") graph_heap, search_heap = initialize_heaps( self._raw_data, self.n_neighbors, leaf_array, self._distance_func, self._dist_args, ) graph = lil_matrix((data.shape[0], data.shape[0])) graph.rows, graph.data = deheap_sort(graph_heap) graph = graph.maximum(graph.transpose()) self._neighbor_graph = deheap_sort( initialized_nnd_search( self._raw_data, graph.indptr, graph.indices, search_heap, self._raw_data, self._distance_func, self._dist_args, )) else: raise ValueError("Unknown algorithm selected") if np.any(self._neighbor_graph[0] < 0): warn("Failed to correctly find n_neighbors for some samples." "Results may be less than ideal. Try re-running with" "different parameters.")
def nn_descent( data, n_neighbors, rng_state, max_candidates=50, dist=dist.euclidean, dist_args=(), n_iters=10, delta=0.001, rho=0.5, rp_tree_init=True, leaf_array=None, verbose=False, seed_per_row=False, ): n_vertices = data.shape[0] tried = set([(-1, -1)]) current_graph = make_heap(data.shape[0], n_neighbors) for i in range(data.shape[0]): if seed_per_row: seed(rng_state, i) indices = rejection_sample(n_neighbors, data.shape[0], rng_state) for j in range(indices.shape[0]): d = dist(data[i], data[indices[j]], *dist_args) heap_push(current_graph, i, d, indices[j], 1) heap_push(current_graph, indices[j], d, i, 1) tried.add((i, indices[j])) tried.add((indices[j], i)) if rp_tree_init: init_rp_tree(data, dist, dist_args, current_graph, leaf_array, tried=tried) for n in range(n_iters): if verbose: print("\t", n, " / ", n_iters) (new_candidate_neighbors, old_candidate_neighbors) = new_build_candidates( current_graph, n_vertices, n_neighbors, max_candidates, rng_state, rho, seed_per_row, ) c = 0 for i in range(n_vertices): for j in range(max_candidates): p = int(new_candidate_neighbors[0, i, j]) if p < 0: continue for k in range(j, max_candidates): q = int(new_candidate_neighbors[0, i, k]) if q < 0 or (p, q) in tried: continue d = dist(data[p], data[q], *dist_args) c += unchecked_heap_push(current_graph, p, d, q, 1) tried.add((p, q)) if p != q: c += unchecked_heap_push(current_graph, q, d, p, 1) tried.add((q, p)) for k in range(max_candidates): q = int(old_candidate_neighbors[0, i, k]) if q < 0 or (p, q) in tried: continue d = dist(data[p], data[q], *dist_args) c += unchecked_heap_push(current_graph, p, d, q, 1) tried.add((p, q)) if p != q: c += unchecked_heap_push(current_graph, q, d, p, 1) tried.add((q, p)) if c <= delta * n_neighbors * data.shape[0]: break return deheap_sort(current_graph)
def nn_descent( inds, indptr, data, n_neighbors, rng_state, max_candidates=50, dist=sparse_euclidean, n_iters=10, delta=0.001, init_graph=EMPTY_GRAPH, rp_tree_init=True, leaf_array=None, low_memory=False, verbose=False, ): n_samples = indptr.shape[0] - 1 if init_graph[0].shape[0] == 1: # EMPTY_GRAPH current_graph = make_heap(n_samples, n_neighbors) if rp_tree_init: init_rp_tree(inds, indptr, data, dist, current_graph, leaf_array) init_random(n_neighbors, inds, indptr, data, current_graph, dist, rng_state) elif init_graph[0].shape[0] == n_samples and init_graph[0].shape[1] == n_neighbors: current_graph = init_graph else: raise ValueError("Invalid initial graph specified!") if low_memory: nn_descent_internal_low_memory_parallel( current_graph, inds, indptr, data, n_neighbors, rng_state, max_candidates=max_candidates, dist=dist, n_iters=n_iters, delta=delta, verbose=verbose, ) else: nn_descent_internal_high_memory_parallel( current_graph, inds, indptr, data, n_neighbors, rng_state, max_candidates=max_candidates, dist=dist, n_iters=n_iters, delta=delta, verbose=verbose, ) return deheap_sort(current_graph[0], current_graph[1])
def sparse_nn_descent( inds, indptr, data, n_vertices, n_neighbors, rng_state, max_candidates=50, sparse_dist=sparse_euclidean, dist_args=(), n_iters=10, delta=0.001, rho=0.5, low_memory=False, rp_tree_init=True, leaf_array=None, verbose=False, ): tried = set([(-1, -1)]) current_graph = make_heap(n_vertices, n_neighbors) for i in range(n_vertices): indices = rejection_sample(n_neighbors, n_vertices, rng_state) for j in range(indices.shape[0]): from_inds = inds[indptr[i]:indptr[i + 1]] from_data = data[indptr[i]:indptr[i + 1]] to_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]] to_data = data[indptr[indices[j]]:indptr[indices[j] + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) heap_push(current_graph, i, d, indices[j], 1) heap_push(current_graph, indices[j], d, i, 1) tried.add((i, indices[j])) tried.add((indices[j], i)) if rp_tree_init: sparse_init_rp_tree( inds, indptr, data, sparse_dist, dist_args, current_graph, leaf_array, tried=tried, ) if low_memory: sparse_nn_descent_internal_low_memory( current_graph, inds, indptr, data, n_vertices, n_neighbors, rng_state, max_candidates=max_candidates, sparse_dist=sparse_dist, dist_args=dist_args, n_iters=n_iters, delta=delta, rho=rho, verbose=verbose, ) else: sparse_nn_descent_internal_high_memory( current_graph, inds, indptr, data, n_vertices, n_neighbors, rng_state, tried, max_candidates=max_candidates, sparse_dist=sparse_dist, dist_args=dist_args, n_iters=n_iters, delta=delta, rho=rho, verbose=verbose, ) return deheap_sort(current_graph)
def nn_descent( data, n_neighbors, rng_state, max_candidates=50, dist=dist.euclidean, dist_args=(), n_iters=10, delta=0.001, rho=0.5, rp_tree_init=True, leaf_array=None, low_memory=False, verbose=False, seed_per_row=False, ): tried = set([(-1, -1)]) current_graph = make_heap(data.shape[0], n_neighbors) for i in range(data.shape[0]): if seed_per_row: seed(rng_state, i) indices = rejection_sample(n_neighbors, data.shape[0], rng_state) for j in range(indices.shape[0]): d = dist(data[i], data[indices[j]], *dist_args) heap_push(current_graph, i, d, indices[j], 1) heap_push(current_graph, indices[j], d, i, 1) tried.add((i, indices[j])) tried.add((indices[j], i)) if rp_tree_init: init_rp_tree(data, dist, dist_args, current_graph, leaf_array, tried=tried) if low_memory: nn_descent_internal_low_memory( current_graph, data, n_neighbors, rng_state, max_candidates=max_candidates, dist=dist, dist_args=dist_args, n_iters=n_iters, delta=delta, rho=rho, verbose=verbose, seed_per_row=seed_per_row, ) else: nn_descent_internal_high_memory( current_graph, data, n_neighbors, rng_state, tried, max_candidates=max_candidates, dist=dist, dist_args=dist_args, n_iters=n_iters, delta=delta, rho=rho, verbose=verbose, seed_per_row=seed_per_row, ) return deheap_sort(current_graph)
def nn_descent(data, n_neighbors, rng_state, max_candidates=50, dist=dist.euclidean, dist_args=(), n_iters=10, delta=0.001, rho=0.5, rp_tree_init=True, leaf_array=None, verbose=False): n_vertices = data.shape[0] current_graph = make_heap(data.shape[0], n_neighbors) for i in range(data.shape[0]): indices = rejection_sample(n_neighbors, data.shape[0], rng_state) for j in range(indices.shape[0]): d = dist(data[i], data[indices[j]], *dist_args) heap_push(current_graph, i, d, indices[j], 1) heap_push(current_graph, indices[j], d, i, 1) if rp_tree_init: for n in range(leaf_array.shape[0]): tried = set([(-1, -1)]) for i in range(leaf_array.shape[1]): if leaf_array[n, i] < 0: break for j in range(i + 1, leaf_array.shape[1]): if leaf_array[n, j] < 0: break if (leaf_array[n, i], leaf_array[n, j]) in tried: continue d = dist(data[leaf_array[n, i]], data[leaf_array[n, j]], *dist_args) heap_push(current_graph, leaf_array[n, i], d, leaf_array[n, j], 1) heap_push(current_graph, leaf_array[n, j], d, leaf_array[n, i], 1) tried.add((leaf_array[n, i], leaf_array[n, j])) tried.add((leaf_array[n, j], leaf_array[n, i])) for n in range(n_iters): (new_candidate_neighbors, old_candidate_neighbors) = build_candidates( current_graph, n_vertices, n_neighbors, max_candidates, rng_state, rho) c = 0 for i in range(n_vertices): for j in range(max_candidates): p = int(new_candidate_neighbors[0, i, j]) if p < 0: continue for k in range(j, max_candidates): q = int(new_candidate_neighbors[0, i, k]) if q < 0: continue d = dist(data[p], data[q], *dist_args) c += heap_push(current_graph, p, d, q, 1) c += heap_push(current_graph, q, d, p, 1) for k in range(max_candidates): q = int(old_candidate_neighbors[0, i, k]) if q < 0: continue d = dist(data[p], data[q], *dist_args) c += heap_push(current_graph, p, d, q, 1) c += heap_push(current_graph, q, d, p, 1) if c <= delta * n_neighbors * data.shape[0]: break return deheap_sort(current_graph)
def __init__(self, data, metric='euclidean', metric_kwds={}, n_neighbors=15, n_trees=8, leaf_size=15, pruning_level=0, tree_init=True, random_state=np.random, algorithm='standard', max_candidates=20, n_iters=10, delta=0.001, rho=0.5): self.n_trees = n_trees self.n_neighbors = n_neighbors self.metric = metric self.metric_kwds = metric_kwds self.leaf_size = leaf_size self.prune_level = pruning_level self.max_candidates = max_candidates self.n_iters = n_iters self.delta = delta self.rho = rho self.dim = data.shape[1] data = check_array(data).astype(np.float32) if not tree_init or n_trees == 0: self.tree_init = False else: self.tree_init = True self._dist_args = tuple(metric_kwds.values()) self.random_state = check_random_state(random_state) self._raw_data = data.copy() if callable(metric): self._distance_func = metric elif metric in dist.named_distances: self._distance_func = dist.named_distances[metric] if metric in ('cosine', 'correlation', 'dice', 'jaccard'): self._angular_trees = True else: self._angular_trees = False self.rng_state = \ random_state.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64) indices = np.arange(data.shape[0]) if self.tree_init: if self._angular_trees: self._rp_forest = [ flatten_tree( make_angular_tree(data, indices, self.rng_state, self.leaf_size), self.leaf_size) for i in range(n_trees) ] else: self._rp_forest = [ flatten_tree( make_euclidean_tree(data, indices, self.rng_state, self.leaf_size), self.leaf_size) for i in range(n_trees) ] leaf_array = np.vstack([tree.indices for tree in self._rp_forest]) else: self._rp_forest = None leaf_array = np.array([[-1]]) if algorithm == 'standard' or leaf_array.shape[0] == 1: self._neighbor_graph = nn_descent( self._raw_data, self.n_neighbors, self.rng_state, self.max_candidates, self._distance_func, self._dist_args, self.n_iters, self.delta, self.rho, True, leaf_array) elif algorithm == 'alternative': self._search = make_initialized_nnd_search(self._distance_func, self._dist_args) graph_heap, search_heap = initialize_heaps(self._raw_data, self.n_neighbors, leaf_array, self._distance_func, self._dist_args) graph = lil_matrix((data.shape[0], data.shape[0])) graph.rows, graph.data = deheap_sort(graph_heap) graph = graph.maximum(graph.transpose()) self._neighbor_graph = deheap_sort( self._search(self._raw_data, graph.indptr, graph.indices, search_heap, self._raw_data)) else: raise ValueError('Unknown algorithm selected') self._search_graph = lil_matrix((data.shape[0], data.shape[0]), dtype=np.float32) self._search_graph.rows = self._neighbor_graph[0] self._search_graph.data = self._neighbor_graph[1] self._search_graph = self._search_graph.maximum( self._search_graph.transpose()).tocsr() self._search_graph = prune(self._search_graph, prune_level=self.prune_level, n_neighbors=self.n_neighbors) self._search_graph = (self._search_graph != 0).astype(np.int8) self._random_init, self._tree_init = make_initialisations( self._distance_func, self._dist_args) self._search = make_initialized_nnd_search(self._distance_func, self._dist_args) return
def sparse_nn_descent( inds, indptr, data, n_vertices, n_neighbors, rng_state, max_candidates=50, sparse_dist=sparse_euclidean, dist_args=(), n_iters=10, delta=0.001, rho=0.5, rp_tree_init=True, leaf_array=None, verbose=False, ): tried = set([(-1, -1)]) current_graph = make_heap(n_vertices, n_neighbors) for i in range(n_vertices): indices = rejection_sample(n_neighbors, n_vertices, rng_state) for j in range(indices.shape[0]): from_inds = inds[indptr[i]:indptr[i + 1]] from_data = data[indptr[i]:indptr[i + 1]] to_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]] to_data = data[indptr[indices[j]]:indptr[indices[j] + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) heap_push(current_graph, i, d, indices[j], 1) heap_push(current_graph, indices[j], d, i, 1) tried.add((i, indices[j])) tried.add((indices[j], i)) if rp_tree_init: sparse_init_rp_tree( inds, indptr, data, sparse_dist, dist_args, current_graph, leaf_array, tried=tried, ) for n in range(n_iters): if verbose: print("\t", n, " / ", n_iters) (new_candidate_neighbors, old_candidate_neighbors) = new_build_candidates( current_graph, n_vertices, n_neighbors, max_candidates, rng_state, rho, False, ) c = 0 for i in range(n_vertices): for j in range(max_candidates): p = int(new_candidate_neighbors[0, i, j]) if p < 0: continue for k in range(j, max_candidates): q = int(new_candidate_neighbors[0, i, k]) if q < 0 or (p, q) in tried: continue from_inds = inds[indptr[p]:indptr[p + 1]] from_data = data[indptr[p]:indptr[p + 1]] to_inds = inds[indptr[q]:indptr[q + 1]] to_data = data[indptr[q]:indptr[q + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) c += unchecked_heap_push(current_graph, p, d, q, 1) tried.add((p, q)) if p != q: c += unchecked_heap_push(current_graph, q, d, p, 1) tried.add((q, p)) for k in range(max_candidates): q = int(old_candidate_neighbors[0, i, k]) if q < 0 or (p, q) in tried: continue from_inds = inds[indptr[p]:indptr[p + 1]] from_data = data[indptr[p]:indptr[p + 1]] to_inds = inds[indptr[q]:indptr[q + 1]] to_data = data[indptr[q]:indptr[q + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) c += unchecked_heap_push(current_graph, p, d, q, 1) tried.add((p, q)) if p != q: c += unchecked_heap_push(current_graph, q, d, p, 1) tried.add((q, p)) if c <= delta * n_neighbors * n_vertices: break return deheap_sort(current_graph)
def nn_descent( inds, indptr, data, n_vertices, n_neighbors, rng_state, max_candidates=50, n_iters=10, delta=0.001, rho=0.5, rp_tree_init=True, leaf_array=None, verbose=False, ): current_graph = make_heap(n_vertices, n_neighbors) for i in range(n_vertices): indices = rejection_sample(n_neighbors, n_vertices, rng_state) for j in range(indices.shape[0]): from_inds = inds[indptr[i]:indptr[i + 1]] from_data = data[indptr[i]:indptr[i + 1]] to_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]] to_data = data[indptr[indices[j]]:indptr[indices[j] + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) heap_push(current_graph, i, d, indices[j], 1) heap_push(current_graph, indices[j], d, i, 1) if rp_tree_init: for n in range(leaf_array.shape[0]): for i in range(leaf_array.shape[1]): if leaf_array[n, i] < 0: break for j in range(i + 1, leaf_array.shape[1]): if leaf_array[n, j] < 0: break from_inds = inds[indptr[leaf_array[ n, i]]:indptr[leaf_array[n, i] + 1]] from_data = data[indptr[leaf_array[ n, i]]:indptr[leaf_array[n, i] + 1]] to_inds = inds[indptr[leaf_array[ n, j]]:indptr[leaf_array[n, j] + 1]] to_data = data[indptr[leaf_array[ n, j]]:indptr[leaf_array[n, j] + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) heap_push(current_graph, leaf_array[n, i], d, leaf_array[n, j], 1) heap_push(current_graph, leaf_array[n, j], d, leaf_array[n, i], 1) for n in range(n_iters): if verbose: print("\t", n, " / ", n_iters) candidate_neighbors = build_candidates(current_graph, n_vertices, n_neighbors, max_candidates, rng_state) c = 0 for i in range(n_vertices): for j in range(max_candidates): p = int(candidate_neighbors[0, i, j]) if p < 0 or tau_rand(rng_state) < rho: continue for k in range(max_candidates): q = int(candidate_neighbors[0, i, k]) if (q < 0 or not candidate_neighbors[2, i, j] and not candidate_neighbors[2, i, k]): continue from_inds = inds[indptr[p]:indptr[p + 1]] from_data = data[indptr[p]:indptr[p + 1]] to_inds = inds[indptr[q]:indptr[q + 1]] to_data = data[indptr[q]:indptr[q + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) c += heap_push(current_graph, p, d, q, 1) c += heap_push(current_graph, q, d, p, 1) if c <= delta * n_neighbors * n_vertices: break return deheap_sort(current_graph)