def _dijkstra(self, start, targets): """A helper method that implements the Dijkstra algorithm. Return the shortest-path tree represented as a dict came_from. """ came_from = {start: None} targets = set(targets) # initialize the cost of every node to be infinity, except the start frontier = HeapQueue((node, INF) for node in self.nodes) frontier.push(start, 0) while frontier: # node popped from the queue already has its shortest path found, # can be safely discarded cur_node, cur_cost = frontier.pop() targets.discard(cur_node) # if all targets are found, stop if not targets: break for nxt_node, weight in self.neighbors(cur_node): # only relax the nodes to which shortest paths are not yet found if nxt_node in frontier: nxt_cost = cur_cost + weight # if new cost less than the current cost, update it if nxt_cost < frontier[nxt_node]: frontier.push(nxt_node, nxt_cost) came_from[nxt_node] = cur_node return came_from
def balanced_partition(promoted_data1, promoted_data2, data_objects, distance_function): partition1 = set() partition2 = set() queue1 = HeapQueue(data_objects, key=lambda data:distance_function(data, promoted_data1)) queue2 = HeapQueue(data_objects, key=lambda data:distance_function(data, promoted_data2)) while queue1 or queue2: while queue1: data = queue1.pop() if data not in partition2: partition1.add(data) break while queue2: data = queue2.pop() if data not in partition1: partition2.add(data) break return partition1, partition2
def get_nearest(self, query_data, range=_INFINITY, limit=_INFINITY): """ Returns an iterator on the indexed data nearest to the query_data. The returned items are tuples containing the data and its distance to the query_data, in increasing distance order. The results can be limited by the range (maximum distance from the query_data) and limit arguments. """ if self.root is None: # No indexed data! return distance = self.distance_function(query_data, self.root.data) min_distance = max(distance - self.root.radius, 0) pending_queue = HeapQueue( content=[_ItemWithDistances(item=self.root, distance=distance, min_distance=min_distance)], key=lambda iwd: iwd.min_distance, ) nearest_queue = HeapQueue(key=lambda iwd: iwd.distance) yielded_count = 0 while pending_queue: pending = pending_queue.pop() node = pending.item assert isinstance(node, _Node) for child in node.children.itervalues(): if abs(pending.distance - child.distance_to_parent) - child.radius <= range: child_distance = self.distance_function(query_data, child.data) child_min_distance = max(child_distance - child.radius, 0) if child_min_distance <= range: iwd = _ItemWithDistances(item=child, distance=child_distance, min_distance=child_min_distance) if isinstance(child, _Entry): nearest_queue.push(iwd) else: pending_queue.push(iwd) # Tries to yield known results so far if pending_queue: next_pending = pending_queue.head() next_pending_min_distance = next_pending.min_distance else: next_pending_min_distance = _INFINITY while nearest_queue: next_nearest = nearest_queue.head() assert isinstance(next_nearest, _ItemWithDistances) if next_nearest.distance <= next_pending_min_distance: _ = nearest_queue.pop() assert _ is next_nearest yield self.ResultItem(data=next_nearest.item.data, distance=next_nearest.distance) yielded_count += 1 if yielded_count >= limit: # Limit reached return else: break