def search_init( query_inds, query_data, k, inds, indptr, data, forest, n_neighbors, tried, sparse_dist, rng_state, ): heap_priorities = np.float32(np.inf) + np.zeros(k, dtype=np.float32) heap_indices = np.int32(-1) + np.zeros(k, dtype=np.int32) n_samples = indptr.shape[0] - 1 n_random_samples = min(k, n_neighbors) for tree in forest: indices = search_sparse_flat_tree( query_inds, query_data, tree.hyperplanes, tree.offsets, tree.children, tree.indices, rng_state, ) n_initial_points = indices.shape[0] n_random_samples = min(k, n_neighbors) - n_initial_points for j in range(n_initial_points): candidate = indices[j] from_inds = inds[indptr[candidate] : indptr[candidate + 1]] from_data = data[indptr[candidate] : indptr[candidate + 1]] d = sparse_dist(from_inds, from_data, query_inds, query_data) # indices are guaranteed different simple_heap_push(heap_priorities, heap_indices, d, candidate) mark_visited(tried, candidate) if n_random_samples > 0: for i in range(n_random_samples): candidate = np.abs(tau_rand_int(rng_state)) % n_samples if has_been_visited(tried, candidate) == 0: from_inds = inds[indptr[candidate] : indptr[candidate + 1]] from_data = data[indptr[candidate] : indptr[candidate + 1]] d = sparse_dist(from_inds, from_data, query_inds, query_data,) simple_heap_push(heap_priorities, heap_indices, d, candidate) mark_visited(tried, candidate) return heap_priorities, heap_indices
def search_from_init( query_inds, query_data, inds, indptr, data, search_indptr, search_inds, heap_priorities, heap_indices, epsilon, tried, sparse_dist, dist_args, ): distance_scale = 1.0 + epsilon distance_bound = distance_scale * heap_priorities[0] heap_size = heap_priorities.shape[0] seed_set = [(heap_priorities[j], heap_indices[j]) for j in range(heap_size)] heapq.heapify(seed_set) # Find smallest seed point d_vertex, vertex = heapq.heappop(seed_set) while d_vertex < distance_bound: for j in range(search_indptr[vertex], search_indptr[vertex + 1]): candidate = search_inds[j] if has_been_visited(tried, candidate) == 0: mark_visited(tried, candidate) from_inds = inds[indptr[candidate]:indptr[candidate + 1]] from_data = data[indptr[candidate]:indptr[candidate + 1]] d = sparse_dist(from_inds, from_data, query_inds, query_data, *dist_args) if d < distance_bound: simple_heap_push(heap_priorities, heap_indices, d, candidate) heapq.heappush(seed_set, (d, candidate)) # Update bound distance_bound = distance_scale * heap_priorities[0] # find new smallest seed point if len(seed_set) == 0: break else: d_vertex, vertex = heapq.heappop(seed_set) return heap_priorities, heap_indices
def custom_search_closure(query_points, candidate_indices, k, epsilon, visited): result = make_heap(query_points.shape[0], k) distance_scale = 1.0 + epsilon for i in range(query_points.shape[0]): visited[:] = 0 if dist == alternative_dot or dist == alternative_cosine: norm = np.sqrt((query_points[i]**2).sum()) if norm > 0.0: current_query = query_points[i] / norm else: continue else: current_query = query_points[i] heap_priorities = result[1][i] heap_indices = result[0][i] seed_set = [(np.float32(np.inf), np.int32(-1)) for j in range(0)] ############ Init ################ n_initial_points = candidate_indices.shape[0] for j in range(n_initial_points): candidate = np.int32(candidate_indices[j]) d = dist(data[candidate], current_query) # indices are guaranteed different simple_heap_push(heap_priorities, heap_indices, d, candidate) heapq.heappush(seed_set, (d, candidate)) mark_visited(visited, candidate) ############ Search ############## distance_bound = distance_scale * heap_priorities[0] # Find smallest seed point d_vertex, vertex = heapq.heappop(seed_set) while d_vertex < distance_bound: for j in range(indptr[vertex], indptr[vertex + 1]): candidate = indices[j] if has_been_visited(visited, candidate) == 0: mark_visited(visited, candidate) d = dist(data[candidate], current_query) if d < distance_bound: simple_heap_push(heap_priorities, heap_indices, d, candidate) heapq.heappush(seed_set, (d, candidate)) # Update bound distance_bound = distance_scale * heap_priorities[0] # find new smallest seed point if len(seed_set) == 0: break else: d_vertex, vertex = heapq.heappop(seed_set) return result