def test_ids_basic(): # basic functionality tests for i in range(100): id1, id2 = DHTID.generate(), DHTID.generate() assert DHTID.MIN <= id1 < DHTID.MAX and DHTID.MIN <= id2 <= DHTID.MAX assert DHTID.xor_distance(id1, id1) == DHTID.xor_distance(id2, id2) == 0 assert DHTID.xor_distance(id1, id2) > 0 or (id1 == id2) assert DHTID.from_bytes(bytes(id1)) == id1 and DHTID.from_bytes( id2.to_bytes()) == id2
async def simple_traverse_dht( query_id: DHTID, initial_nodes: Collection[DHTID], beam_size: int, get_neighbors: Callable[[DHTID], Awaitable[Tuple[Collection[DHTID], bool]]], visited_nodes: Collection[DHTID] = () ) -> Tuple[List[DHTID], Set[DHTID]]: """ Traverse the DHT graph using get_neighbors function, find :beam_size: nearest nodes according to DHTID.xor_distance. :note: This is a simplified (but working) algorithm provided for documentation purposes. Actual DHTNode uses `traverse_dht` - a generalization of this this algorithm that allows multiple queries and concurrent workers. :param query_id: search query, find k_nearest neighbors of this DHTID :param initial_nodes: nodes used to pre-populate beam search heap, e.g. [my_own_DHTID, ...maybe_some_peers] :param beam_size: beam search will not give up until it exhausts this many nearest nodes (to query_id) from the heap Recommended value: A beam size of k_nearest * (2-5) will yield near-perfect results. :param get_neighbors: A function that returns neighbors of a given node and controls beam search stopping criteria. async def get_neighbors(node: DHTID) -> neighbors_of_that_node: List[DHTID], should_continue: bool If should_continue is False, beam search will halt and return k_nearest of whatever it found by then. :param visited_nodes: beam search will neither call get_neighbors on these nodes, nor return them as nearest :returns: a list of k nearest nodes (nearest to farthest), and a set of all visited nodes (including visited_nodes) """ visited_nodes = set( visited_nodes ) # note: copy visited_nodes because we will add more nodes to this collection. initial_nodes = [ node_id for node_id in initial_nodes if node_id not in visited_nodes ] if not initial_nodes: return [], visited_nodes unvisited_nodes = [(distance, uid) for uid, distance in zip( initial_nodes, query_id.xor_distance(initial_nodes))] heapq.heapify( unvisited_nodes) # nearest-first heap of candidates, unlimited size nearest_nodes = [ (-distance, node_id) for distance, node_id in heapq.nsmallest(beam_size, unvisited_nodes) ] heapq.heapify( nearest_nodes ) # farthest-first heap of size beam_size, used for early-stopping and to select results while len(nearest_nodes) > beam_size: heapq.heappop(nearest_nodes) visited_nodes |= set(initial_nodes) upper_bound = -nearest_nodes[0][ 0] # distance to farthest element that is still in beam was_interrupted = False # will set to True if host triggered beam search to stop via get_neighbors while (not was_interrupted) and len( unvisited_nodes) != 0 and unvisited_nodes[0][0] <= upper_bound: _, node_id = heapq.heappop( unvisited_nodes ) # note: this --^ is the smallest element in heap (see heapq) neighbors, was_interrupted = await get_neighbors(node_id) neighbors = [ node_id for node_id in neighbors if node_id not in visited_nodes ] visited_nodes.update(neighbors) for neighbor_id, distance in zip(neighbors, query_id.xor_distance(neighbors)): if distance <= upper_bound or len(nearest_nodes) < beam_size: heapq.heappush(unvisited_nodes, (distance, neighbor_id)) heapq_add_or_replace = heapq.heappush if len( nearest_nodes) < beam_size else heapq.heappushpop heapq_add_or_replace(nearest_nodes, (-distance, neighbor_id)) upper_bound = -nearest_nodes[0][ 0] # distance to beam_size-th nearest element found so far return [ node_id for _, node_id in heapq.nlargest(beam_size, nearest_nodes) ], visited_nodes