async def get_account( self, block_hash: bytes, address: bytes, cancel_token: CancelToken) -> Account: key = keccak(address) proof = await self._get_proof(cancel_token, block_hash, account_key=b'', key=key) header = await self.get_block_header_by_hash(block_hash, cancel_token) rlp_account = HexaryTrie.get_from_proof(header.state_root, key, proof) return rlp.decode(rlp_account, sedes=Account)
def _request_tracking_trie_items( self, request_tracker: TrieNodeRequestTracker, root_hash: Hash32) -> Iterator[Tuple[Nibbles, Nibbles, bytes]]: """ Walk through the supplied trie, yielding the request tracker and node request for any missing trie nodes. :yield: path to leaf node, a key (as nibbles), and the value found in the trie :raise: MissingTraversalNode if a node is missing while walking the trie """ if self._next_trie_root_hash is None: # We haven't started beam syncing, so don't know which root to start at return trie = HexaryTrie(self._db, root_hash) starting_index = bytes_to_nibbles(root_hash) while self.manager.is_running: try: path_to_node = request_tracker.next_path_to_explore( starting_index) except trie_exceptions.PerfectVisibility: # This doesn't necessarily mean we are finished. # Any active prefixes might still be hiding some significant portion of the trie # But it's all we're able to explore for now, until more node data arrives return try: cached_node, uncached_key = request_tracker.get_cached_parent( path_to_node) except KeyError: cached_node = None node_getter = partial(trie.traverse, path_to_node) else: node_getter = partial(trie.traverse_from, cached_node, uncached_key) try: node = node_getter() except trie_exceptions.MissingTraversalNode as exc: # Found missing account trie node if path_to_node == exc.nibbles_traversed: raise elif cached_node is None: # The path and nibbles traversed should always match in a non-cached traversal raise RuntimeError( f"Unexpected: on a non-cached traversal to {path_to_node}, the" f" exception only claimed to traverse {exc.nibbles_traversed} -- {exc}" ) from exc else: # We need to re-raise a version of the exception that includes the whole path # from the root node (when using cached nodes, we only have the path from # the parent node to the child node) # We could always raise this re-wrapped version, but skipping it (probably?) # improves performance. missing_hash = exc.missing_node_hash raise trie_exceptions.MissingTraversalNode( missing_hash, path_to_node) from exc except trie_exceptions.TraversedPartialPath as exc: node = exc.simulated_node if node.value: full_key_nibbles = path_to_node + node.suffix if len(node.sub_segments): # It shouldn't be a problem to skip handling this case, because all keys are # hashed 32 bytes. raise NotImplementedError( "The state backfiller doesn't handle keys of different lengths, where" f" one key is a prefix of another. But found {node} in trie with" f" {root_hash!r}") yield path_to_node, full_key_nibbles, node.value # Note that we do not mark value nodes as completed. It is up to the caller # to do that when it is ready. For example, the storage iterator will # immediately treat the key as completed. The account iterator will # not treat the key as completed until all of its storage and bytecode # are also marked as complete. else: # If this is just an intermediate node, then we can mark it as confirmed. request_tracker.confirm_prefix(path_to_node, node)
from trie import HexaryTrie from web3.auto import w3 import rlp from eth_utils import ( keccak, ) trie = HexaryTrie(db={}) print(w3.toHex(trie.root_hash)) print(w3.toHex(keccak(trie.root_node))) print(w3.toHex(keccak(rlp.encode(trie.root_node)))) print("1", w3.toHex(rlp.encode(1))) print("ONE", w3.toHex(rlp.encode("ONE"))) print("2", w3.toHex(rlp.encode(2))) print("TWO", w3.toHex(rlp.encode("TWO"))) trie.set(rlp.encode(1), rlp.encode("ONE")) print(w3.toHex(trie.root_hash)) print(w3.toHex(keccak(rlp.encode(trie.get_node(trie.root_hash))))) assert trie.root_hash == keccak(rlp.encode(trie.get_node(trie.root_hash))) print(trie.get_node(trie.root_hash)) trie.set(rlp.encode(2), rlp.encode("TWO")) print(w3.toHex(trie.root_hash)) print(w3.toHex(keccak(rlp.encode(trie.get_node(trie.root_hash))))) assert trie.root_hash == keccak(rlp.encode(trie.get_node(trie.root_hash))) print(trie.get_node(trie.root_hash))
class BeamDownloader(BaseService, PeerSubscriber): """ Coordinate the request of needed state data: accounts, storage, bytecodes, and other arbitrary intermediate nodes in the trie. """ _total_processed_nodes = 0 _urgent_processed_nodes = 0 _predictive_processed_nodes = 0 _total_timeouts = 0 _timer = Timer(auto_start=False) _report_interval = 10 # Number of seconds between progress reports. _reply_timeout = 20 # seconds # We are only interested in peers entering or leaving the pool subscription_msg_types: FrozenSet[Type[Command]] = frozenset() # This is a rather arbitrary value, but when the sync is operating normally we never see # the msg queue grow past a few hundred items, so this should be a reasonable limit for # now. msg_queue_maxsize: int = 2000 def __init__(self, db: BaseAsyncDB, peer_pool: ETHPeerPool, event_bus: TrinityEventBusEndpoint, token: CancelToken = None) -> None: super().__init__(token) self._db = db self._trie_db = HexaryTrie(db) self._node_data_peers = WaitingPeers[ETHPeer](NodeData) self._event_bus = event_bus # Track the needed node data that is urgent and important: buffer_size = MAX_STATE_FETCH * REQUEST_BUFFER_MULTIPLIER self._node_tasks = TaskQueue[Hash32](buffer_size, lambda task: 0) # list of events waiting on new data self._new_data_events: Set[asyncio.Event] = set() self._peer_pool = peer_pool # Track node data that might be useful: hashes we bumped into while getting urgent nodes self._hash_to_priority: Dict[Hash32, int] = {} self._maybe_useful_nodes = TaskQueue[Hash32]( buffer_size, lambda node_hash: self._hash_to_priority[node_hash], ) self._predicted_nodes: Dict[Hash32, bytes] = {} self._prediction_successes = 0 self._peers_without_full_trie: Set[ETHPeer] = set() # It's possible that you are connected to a peer that doesn't have a full state DB # In that case, we may get stuck requesting predictive nodes from them over and over # because they don't have anything but the nodes required to prove recent block # executions. If we get stuck in that scenario, turn off allow_predictive_only. # For now, we just turn it off for all peers, for simplicity. self._allow_predictive_only = True async def ensure_node_present(self, node_hash: Hash32) -> int: """ Wait until the node that is the preimage of `node_hash` is available in the database. If it is not available in the first check, request it from peers. Mark this node as urgent and important (rather than predictive), which increases request priority. Note that if your ultimate goal is an account or storage data, it's probably better to use download_account or download_storage. This method is useful for other scenarios, like bytecode lookups or intermediate node lookups. :return: whether node was missing from the database on the first check """ if self._is_node_missing(node_hash): if node_hash not in self._node_tasks: await self._node_tasks.add((node_hash, )) await self._node_hashes_present((node_hash, )) return 1 else: return 0 async def predictive_node_present(self, node_hash: Hash32) -> int: """ Wait until the node that is the preimage of `node_hash` is available in the database. If it is not available in the first check, request it from peers. Mark this node as preductive, which reduces request priority. :return: whether node was missing from the database on the first check """ if self._is_node_missing(node_hash): if node_hash not in self._node_tasks and node_hash not in self._maybe_useful_nodes: self._hash_to_priority[node_hash] = 1 await self._maybe_useful_nodes.add((node_hash, )) await self._node_hashes_present((node_hash, )) return 1 else: return 0 async def ensure_nodes_present(self, node_hashes: Iterable[Hash32]) -> int: """ Like :meth:`ensure_node_present`, but waits for multiple nodes to be available. :return: whether nodes had to be downloaded """ missing_nodes = tuple( set(node_hash for node_hash in node_hashes if self._is_node_missing(node_hash))) await self._node_tasks.add(missing_nodes) await self._node_hashes_present(missing_nodes) return len(missing_nodes) async def predictive_nodes_present(self, node_hashes: Iterable[Hash32]) -> int: """ Like :meth:`predictive_node_present`, but waits for multiple nodes to be available. :return: whether nodes had to be downloaded """ missing_nodes = tuple( set(node_hash for node_hash in node_hashes if self._is_node_missing(node_hash))) await self._maybe_useful_nodes.add( tuple(node_hash for node_hash in missing_nodes if node_hash not in self._maybe_useful_nodes)) await self._node_hashes_present(missing_nodes) return len(missing_nodes) def _is_node_missing(self, node_hash: Hash32) -> bool: if len(node_hash) != 32: raise ValidationError( f"Must request node by its 32-byte hash: 0x{node_hash.hex()}") self.logger.debug2("checking if node 0x%s is present", node_hash.hex()) if node_hash not in self._db: # Instead of immediately storing predicted nodes, we keep them in memory # So when we check if a node is available, we also check if prediction is in memory if node_hash in self._predicted_nodes: # Part of the benefit is that we can identify how effective our predictions are self._prediction_successes += 1 # Now we store the predictive node in the database self._db[node_hash] = self._predicted_nodes.pop(node_hash) return False else: return True else: return False async def download_accounts(self, account_addresses: Iterable[Hash32], root_hash: Hash32, predictive: bool = False) -> int: """ Like :meth:`download_account`, but waits for multiple addresses to be available. :return: total number of trie node downloads that were required to locally prove """ missing_account_hashes = set( keccak(address) for address in account_addresses) completed_account_hashes = set() nodes_downloaded = 0 # will never take more than 64 attempts to get a full account for _ in range(64): need_nodes = set() with self._trie_db.at_root(root_hash) as snapshot: for account_hash in missing_account_hashes: try: snapshot[account_hash] except MissingTrieNode as exc: need_nodes.add(exc.missing_node_hash) else: completed_account_hashes.add(account_hash) if predictive: await self.predictive_nodes_present(need_nodes) else: await self.ensure_nodes_present(need_nodes) nodes_downloaded += len(need_nodes) missing_account_hashes -= completed_account_hashes if not missing_account_hashes: return nodes_downloaded else: raise Exception( f"State Downloader failed to download {account_addresses!r} at " f"state root 0x{root_hash.hex} in 64 runs") async def download_account(self, account_hash: Hash32, root_hash: Hash32, predictive: bool = False) -> Tuple[bytes, int]: """ Check the given account address for presence in the state database. Wait until we have the state proof for the given address. If the account is not available in the first check, then request any trie nodes that we need to determine and prove the account rlp. Mark these nodes as urgent and important, which increases request priority. :return: The downloaded account rlp, and how many state trie node downloads were required """ # will never take more than 64 attempts to get a full account for num_downloads_required in range(64): try: with self._trie_db.at_root(root_hash) as snapshot: account_rlp = snapshot[account_hash] except MissingTrieNode as exc: await self.ensure_node_present(exc.missing_node_hash) if predictive: await self.predictive_node_present(exc.missing_node_hash) else: await self.ensure_node_present(exc.missing_node_hash) else: # Account is fully available within the trie return account_rlp, num_downloads_required else: raise Exception( f"State Downloader failed to download 0x{account_hash.hex()} at " f"state root 0x{root_hash.hex} in 64 runs") async def download_storage(self, storage_key: Hash32, storage_root_hash: Hash32, account: Address, predictive: bool = False) -> int: """ Check the given storage key for presence in the account's storage database. Wait until we have a trie proof for the given storage key. If the storage key value is not available in the first check, then request any trie nodes that we need to determine and prove the storage value. Mark these nodes as urgent and important, which increases request priority. :return: how many storage trie node downloads were required """ # should never take more than 64 attempts to get a full account for num_downloads_required in range(64): try: with self._trie_db.at_root(storage_root_hash) as snapshot: # request the data just to see which part is missing snapshot[storage_key] except MissingTrieNode as exc: if predictive: await self.predictive_node_present(exc.missing_node_hash) else: await self.ensure_node_present(exc.missing_node_hash) else: # Account is fully available within the trie return num_downloads_required else: raise Exception( f"State Downloader failed to download storage 0x{storage_key.hex()} in " f"{to_checksum_address(account)} at storage root 0x{storage_root_hash} " f"in 64 runs.") async def _match_node_requests_to_peers(self) -> None: """ Monitor TaskQueue for needed trie nodes, and request them from peers. Repeat as necessary. Prefer urgent nodes over preductive ones. """ while self.is_operational: urgent_batch_id, urgent_hashes = await self._get_waiting_urgent_hashes( ) predictive_batch_id, predictive_hashes = self._maybe_add_predictive_nodes( urgent_hashes) # combine to single tuple of hashes node_hashes = self._combine_urgent_predictive( urgent_hashes, predictive_hashes) if not node_hashes: self.logger.warning("restarting because empty node hashes") await self.sleep(0.02) continue # Get an available peer, preferring the one that gives us the most node data throughput peer = await self._node_data_peers.get_fastest() if urgent_batch_id is None: # We will make a request of all-predictive nodes if peer in self._peers_without_full_trie: self.logger.warning( "Skipping all-predictive loading on %s", peer) self._node_data_peers.put_nowait(peer) self._maybe_useful_nodes.complete(predictive_batch_id, ()) self._allow_predictive_only = False continue if any(len(h) != 32 for h in node_hashes): # This was inserted to identify and resolve a buggy situation short_node_urgent_hashes = tuple(h for h in node_hashes if len(h) != 32) raise ValidationError( f"Some of the requested node hashes are too short! {short_node_urgent_hashes!r}" ) # Request all the nodes from the given peer, and immediately move on to # try to request other nodes from another peer. self.run_task( self._get_nodes_from_peer( peer, node_hashes, urgent_batch_id, urgent_hashes, predictive_hashes, predictive_batch_id, )) async def _get_waiting_urgent_hashes( self) -> Tuple[int, Tuple[Hash32, ...]]: # if any predictive nodes are waiting, then time out after a short pause to grab them if self._allow_predictive_only and self._maybe_useful_nodes.num_pending( ): timeout = 0.05 else: timeout = None try: return await self.wait( self._node_tasks.get(eth_constants.MAX_STATE_FETCH), timeout=timeout, ) except TimeoutError: return None, () def _maybe_add_predictive_nodes( self, urgent_hashes: Tuple[Hash32, ...]) -> Tuple[int, Tuple[Hash32, ...]]: # how many predictive nodes should we request? num_predictive_backfills = min( eth_constants.MAX_STATE_FETCH - len(urgent_hashes), self._maybe_useful_nodes.num_pending(), ) if num_predictive_backfills: return self._maybe_useful_nodes.get_nowait( num_predictive_backfills, ) else: return None, () def _combine_urgent_predictive( self, urgent_hashes: Tuple[Hash32, ...], predictive_hashes: Tuple[Hash32, ...]) -> Tuple[Hash32, ...]: non_urgent_predictive_hashes = tuple( set(predictive_hashes).difference(urgent_hashes)) request_urgent_hashes = tuple(h for h in urgent_hashes if h not in self._predicted_nodes) return request_urgent_hashes + non_urgent_predictive_hashes async def _get_nodes_from_peer(self, peer: ETHPeer, node_hashes: Tuple[Hash32, ...], urgent_batch_id: int, urgent_node_hashes: Tuple[Hash32, ...], predictive_node_hashes: Tuple[Hash32, ...], predictive_batch_id: int) -> None: nodes = await self._request_nodes(peer, node_hashes) if len(nodes) == 0 and urgent_batch_id is None: self.logger.debug("Shutting off all-predictive loading on %s", peer) self._peers_without_full_trie.add(peer) urgent_nodes = { node_hash: node for node_hash, node in nodes if node_hash in urgent_node_hashes } predictive_nodes = { node_hash: node for node_hash, node in nodes if node_hash in predictive_node_hashes } if len(urgent_nodes) + len(predictive_nodes) < len(nodes): raise ValidationError( f"All nodes must be either urgent or predictive") if len(urgent_nodes) == 0 and urgent_batch_id is not None: self.logger.info("%s returned no urgent nodes from %r", peer, urgent_node_hashes) for node_hash, node in urgent_nodes.items(): self._db[node_hash] = node await self._spawn_predictive_nodes(node, priority=1) if urgent_batch_id is not None: self._node_tasks.complete(urgent_batch_id, tuple(urgent_nodes.keys())) self._predicted_nodes.update(predictive_nodes) for node_hash, node in predictive_nodes.items(): priority = self._hash_to_priority.pop(node_hash) await self._spawn_predictive_nodes(node, priority=priority + 1) if predictive_batch_id is not None: # retire all predictions, if the responding node doesn't have them, then we don't # want to keep asking self._maybe_useful_nodes.complete(predictive_batch_id, predictive_node_hashes) self._urgent_processed_nodes += len(urgent_nodes) for node_hash in predictive_nodes.keys(): if node_hash not in urgent_node_hashes: self._predictive_processed_nodes += 1 self._total_processed_nodes += len(nodes) if len(nodes): for new_data in self._new_data_events: new_data.set() async def _spawn_predictive_nodes(self, node: bytes, priority: int) -> None: """ Identify node hashes for nodes we might need in the future, and insert them to the predictive node queue. """ # priority is the depth of the node away from an urgent node, plus one. # For example, the child of an urgent node has priority 2 if priority > 3: # We would simply download all nodes if we kept adding predictions, so # instead we cut it off at a certain depth return try: decoded_node = rlp.decode(node) except rlp.DecodingError: # Could not decode rlp, it's probably a bytecode, carry on... return if len(decoded_node) == 17 and (priority <= 2 or all(decoded_node[:16])): # if this is a fully filled branch node, then spawn predictive node tasks predictive_room = min( self._maybe_useful_nodes._maxsize - len(self._maybe_useful_nodes), 16, ) request_nodes = tuple( Hash32(h) for h in decoded_node[:16] if _is_hash(h) and Hash32(h) not in self._maybe_useful_nodes) queue_hashes = set(request_nodes[:predictive_room]) for sub_hash in queue_hashes: self._hash_to_priority[sub_hash] = priority new_nodes = tuple(h for h in queue_hashes if h not in self._maybe_useful_nodes) # this should always complete immediately because of the drop above await self._maybe_useful_nodes.add(new_nodes) else: self.logger.debug2("Not predicting node: %r", decoded_node) def _is_node_present(self, node_hash: Hash32) -> bool: """ Check if node_hash has data in the database or in the predicted node set. """ return node_hash in self._db or node_hash in self._predicted_nodes async def _node_hashes_present(self, node_hashes: Tuple[Hash32, ...]) -> None: remaining_hashes = set(node_hashes) # save an event that gets triggered when new data comes in new_data = asyncio.Event() self._new_data_events.add(new_data) iterations = itertools.count() while remaining_hashes and next(iterations) < 1000: await new_data.wait() found_hashes = set(found for found in remaining_hashes if self._is_node_present(found)) remaining_hashes -= found_hashes new_data.clear() if remaining_hashes: self.logger.error("Never collected node data for hashes %r", remaining_hashes) self._new_data_events.remove(new_data) def register_peer(self, peer: BasePeer) -> None: super().register_peer(peer) # when a new peer is added to the pool, add it to the idle peer list self._node_data_peers.put_nowait(peer) # type: ignore async def _request_nodes( self, peer: ETHPeer, node_hashes: Tuple[Hash32, ...]) -> NodeDataBundles: try: completed_nodes = await self._make_node_request(peer, node_hashes) except BaseP2PError as exc: self.logger.warning( "Unexpected p2p err while downloading nodes from %s: %s", peer, exc) self.logger.debug( "Problem downloading nodes from peer, dropping...", exc_info=True) return tuple() except OperationCancelled: self.logger.debug( "Service cancellation while fetching segment, dropping %s from queue", peer, exc_info=True, ) return tuple() except PeerConnectionLost: self.logger.debug( "%s went away, cancelling the nodes request and moving on...", peer) return tuple() except CancelledError: self.logger.debug("Pending nodes call to %r future cancelled", peer) return tuple() except Exception as exc: self.logger.info( "Unexpected err while downloading nodes from %s: %s", peer, exc) self.logger.debug( "Problem downloading nodes from peer, dropping...", exc_info=True) return tuple() else: if len(completed_nodes) > 0: # peer completed successfully, so have it get back in line for processing self._node_data_peers.put_nowait(peer) else: # peer didn't return enough results, wait a while before trying again delay = EMPTY_PEER_RESPONSE_PENALTY self.logger.debug( "Pausing %s for %.1fs, for replying with no node data " "to request for: %r", peer, delay, [encode_hex(h) for h in node_hashes], ) self.call_later(delay, self._node_data_peers.put_nowait, peer) return completed_nodes async def _make_node_request( self, peer: ETHPeer, original_node_hashes: Tuple[Hash32, ...]) -> NodeDataBundles: node_hashes = tuple(set(original_node_hashes)) num_nodes = len(node_hashes) self.logger.debug2("Requesting %d nodes from %s", num_nodes, peer) try: return await peer.requests.get_node_data( node_hashes, timeout=self._reply_timeout) except TimeoutError as err: # This kind of exception shouldn't necessarily *drop* the peer, # so capture error, log and swallow self.logger.debug("Timed out requesting %d nodes from %s", num_nodes, peer) self._total_timeouts += 1 return tuple() async def _run(self) -> None: """ Request all nodes in the queue, running indefinitely """ self._timer.start() self.logger.info("Starting incremental state sync") self.run_task(self._periodically_report_progress()) with self.subscribe(self._peer_pool): await self.wait(self._match_node_requests_to_peers()) async def _periodically_report_progress(self) -> None: while self.is_operational: msg = "processed=%d " % self._total_processed_nodes msg += "urgent=%d " % self._urgent_processed_nodes msg += "predictive=%d " % self._predictive_processed_nodes msg += "pred_success=%d " % self._prediction_successes msg += "tnps=%d " % (self._total_processed_nodes / self._timer.elapsed) msg += "timeouts=%d" % self._total_timeouts self.logger.info("Beam-Sync: %s", msg) await self.sleep(self._report_interval)
def add_receipt(self, block_header: BlockHeader, index_key: int, receipt: Receipt) -> bytes: receipt_db = HexaryTrie(db=self.db, root_hash=block_header.receipt_root) receipt_db[index_key] = rlp.encode(receipt) return receipt_db.root_hash
def test_hexary_trie_missing_node(): db = {} trie = HexaryTrie(db, prune=True) key1 = to_bytes(0x0123) trie.set( key1, b'use a value long enough that it must be hashed according to trie spec' ) key2 = to_bytes(0x1234) trie.set(key2, b'val2') trie_root_hash = trie.root_hash # delete first child of the root root_node = trie.root_node.raw first_child_hash = root_node[0] del db[first_child_hash] # Get exception with relevant info about key with pytest.raises(MissingTrieNode) as exc_info: trie.get(key1) message = str(exc_info.value) assert encode_hex(key1) in message assert encode_hex(trie_root_hash) in message assert encode_hex(first_child_hash) in message # Get exception when trying to write into key with shared prefix key1_shared_prefix = to_bytes(0x0234) with pytest.raises(MissingTrieNode) as set_exc_info: trie.set(key1_shared_prefix, b'val2') set_exc_message = str(set_exc_info.value) assert encode_hex(key1_shared_prefix) in set_exc_message assert encode_hex(trie_root_hash) in set_exc_message assert encode_hex(first_child_hash) in set_exc_message # Get exception when trying to delete key with missing data with pytest.raises(MissingTrieNode) as delete_exc_info: trie.delete(key1) delete_exc_message = str(delete_exc_info.value) assert encode_hex(key1) in delete_exc_message assert encode_hex(trie_root_hash) in delete_exc_message assert encode_hex(first_child_hash) in delete_exc_message # Get exception when checking if key exists with missing data key1_shared_prefix2 = to_bytes(0x0345) with pytest.raises(MissingTrieNode) as existance_exc_info: key1_shared_prefix2 in trie existance_exc_message = str(existance_exc_info.value) assert encode_hex(key1_shared_prefix2) in existance_exc_message assert encode_hex(trie_root_hash) in existance_exc_message assert encode_hex(first_child_hash) in existance_exc_message # Other keys are still accessible assert trie.get(key2) == b'val2'
def add_transaction(self, block_header: BlockHeaderAPI, index_key: int, transaction: SignedTransactionAPI) -> Hash32: transaction_db = HexaryTrie(self.db, root_hash=block_header.transaction_root) transaction_db[index_key] = transaction.encode() return transaction_db.root_hash
def __init__(self, db, state_root=BLANK_ROOT_HASH): # Keep a reference to the original db instance to use it as part of _get_account()'s cache # key. self._unwrapped_db = db self.db = JournalDB(db) self._trie = HashTrie(HexaryTrie(self.db, state_root))
class BeamDownloader(Service, PeerSubscriber): """ Coordinate the request of needed state data: accounts, storage, bytecodes, and other arbitrary intermediate nodes in the trie. """ _total_processed_nodes = 0 _urgent_processed_nodes = 0 _predictive_processed_nodes = 0 _total_timeouts = 0 _predictive_requests = 0 _urgent_requests = 0 _time_on_urgent = 0.0 _timer = Timer(auto_start=False) _report_interval = 10 # Number of seconds between progress reports. _reply_timeout = 10 # seconds _num_urgent_requests_by_peer: typing.Counter[ETHPeer] _num_predictive_requests_by_peer: typing.Counter[ETHPeer] # We are only interested in peers entering or leaving the pool subscription_msg_types: FrozenSet[Type[CommandAPI[Any]]] = frozenset() # This is a rather arbitrary value, but when the sync is operating normally we never see # the msg queue grow past a few hundred items, so this should be a reasonable limit for # now. msg_queue_maxsize: int = 2000 def __init__( self, db: AtomicDatabaseAPI, peer_pool: ETHPeerPool, queen_tracker: QueenTrackerAPI, event_bus: EndpointAPI) -> None: self.logger = get_logger('trinity.sync.beam.BeamDownloader') self._db = db self._trie_db = HexaryTrie(db) self._event_bus = event_bus # Track the needed node data that is urgent and important: buffer_size = MAX_STATE_FETCH * REQUEST_BUFFER_MULTIPLIER self._node_tasks = TaskQueue[Hash32](buffer_size, lambda task: 0) # list of events waiting on new data self._new_data_events: Set[asyncio.Event] = set() self._peer_pool = peer_pool # Track node data that might be useful: hashes we bumped into while getting urgent nodes self._maybe_useful_nodes = TaskQueue[Hash32]( buffer_size, # Everything is the same priority, for now lambda node_hash: 0, ) self._num_urgent_requests_by_peer = Counter() self._num_predictive_requests_by_peer = Counter() self._queen_tracker = queen_tracker async def ensure_nodes_present( self, node_hashes: Iterable[Hash32], urgent: bool = True) -> int: """ Wait until the nodes that are the preimages of `node_hashes` are available in the database. If one is not available in the first check, request it from peers. :param urgent: Should this node be downloaded urgently? If False, download as backfill Note that if your ultimate goal is an account or storage data, it's probably better to use download_account or download_storage. This method is useful for other scenarios, like bytecode lookups or intermediate node lookups. :return: how many nodes had to be downloaded """ if urgent: queue = self._node_tasks else: queue = self._maybe_useful_nodes return await self._wait_for_nodes(node_hashes, queue) async def _wait_for_nodes( self, node_hashes: Iterable[Hash32], queue: TaskQueue[Hash32]) -> int: missing_nodes = set( node_hash for node_hash in node_hashes if self._is_node_missing(node_hash) ) unrequested_nodes = tuple( node_hash for node_hash in missing_nodes if node_hash not in queue ) if unrequested_nodes: await queue.add(unrequested_nodes) if missing_nodes: await self._node_hashes_present(missing_nodes) return len(unrequested_nodes) def _is_node_missing(self, node_hash: Hash32) -> bool: if len(node_hash) != 32: raise ValidationError(f"Must request node by its 32-byte hash: 0x{node_hash.hex()}") self.logger.debug2("checking if node 0x%s is present", node_hash.hex()) return node_hash not in self._db async def download_accounts( self, account_addresses: Collection[Address], root_hash: Hash32, urgent: bool = True) -> int: """ Like :meth:`download_account`, but waits for multiple addresses to be available. :return: total number of trie node downloads that were required to locally prove """ if len(account_addresses) == 0: return 0 last_log_time = time.monotonic() missing_account_hashes = set(keccak(address) for address in account_addresses) completed_account_hashes = set() nodes_downloaded = 0 # will never take more than 64 attempts to get a full account for _ in range(64): need_nodes = set() with self._trie_db.at_root(root_hash) as snapshot: for account_hash in missing_account_hashes: try: snapshot[account_hash] except MissingTrieNode as exc: need_nodes.add(exc.missing_node_hash) else: completed_account_hashes.add(account_hash) # Log if taking a long time to download addresses now = time.monotonic() if urgent and now - last_log_time > ESTIMATED_BEAMABLE_SECONDS: self.logger.info( "Beam account download: %d/%d (%.0f%%)", len(completed_account_hashes), len(account_addresses), 100 * len(completed_account_hashes) / len(account_addresses), ) last_log_time = now await self.ensure_nodes_present(need_nodes, urgent) nodes_downloaded += len(need_nodes) missing_account_hashes -= completed_account_hashes if not missing_account_hashes: return nodes_downloaded else: raise Exception( f"State Downloader failed to download {account_addresses!r} at " f"state root 0x{root_hash.hex} in 64 runs" ) async def download_account( self, account_hash: Hash32, root_hash: Hash32, urgent: bool = True) -> Tuple[bytes, int]: """ Check the given account address for presence in the state database. Wait until we have the state proof for the given address. If the account is not available in the first check, then request any trie nodes that we need to determine and prove the account rlp. Mark these nodes as urgent and important, which increases request priority. :return: The downloaded account rlp, and how many state trie node downloads were required """ # will never take more than 64 attempts to get a full account for num_downloads_required in range(64): try: with self._trie_db.at_root(root_hash) as snapshot: account_rlp = snapshot[account_hash] except MissingTrieNode as exc: await self.ensure_nodes_present({exc.missing_node_hash}, urgent) else: # Account is fully available within the trie return account_rlp, num_downloads_required else: raise Exception( f"State Downloader failed to download 0x{account_hash.hex()} at " f"state root 0x{root_hash.hex} in 64 runs" ) async def download_storage( self, storage_key: Hash32, storage_root_hash: Hash32, account: Address, urgent: bool = True) -> int: """ Check the given storage key for presence in the account's storage database. Wait until we have a trie proof for the given storage key. If the storage key value is not available in the first check, then request any trie nodes that we need to determine and prove the storage value. Mark these nodes as urgent and important, which increases request priority. :return: how many storage trie node downloads were required """ # should never take more than 64 attempts to get a full account for num_downloads_required in range(64): try: with self._trie_db.at_root(storage_root_hash) as snapshot: # request the data just to see which part is missing snapshot[storage_key] except MissingTrieNode as exc: await self.ensure_nodes_present({exc.missing_node_hash}, urgent) else: # Account is fully available within the trie return num_downloads_required else: raise Exception( f"State Downloader failed to download storage 0x{storage_key.hex()} in " f"{to_checksum_address(account)} at storage root 0x{storage_root_hash.hex()} " f"in 64 runs." ) async def _match_urgent_node_requests_to_peers(self) -> None: """ Monitor for urgent trie node needs. An urgent node means that a current block import is paused until that trie node is retrieved. Ask our best peer for that trie node, and then wait for the next urgent node need. Repeat indefinitely. """ while self.manager.is_running: urgent_batch_id, urgent_hashes = await self._node_tasks.get( eth_constants.MAX_STATE_FETCH ) # Get best peer, by GetNodeData speed peer = await self._queen_tracker.get_queen_peer() peer_is_requesting = peer.eth_api.get_node_data.is_requesting if peer_is_requesting: # Our best peer for node data has an in-flight GetNodeData request # Probably, backfill is asking this peer for data # This is right in the critical path, so we'd prefer this never happen self.logger.debug( "Want to download urgent data, but %s is locked on other request", peer, ) # Don't do anything different, allow the request lock to handle the situation self._num_urgent_requests_by_peer[peer] += 1 self._urgent_requests += 1 # Request all the urgent nodes from the queen peer. EVM execution is blocked # anyway, so just hang on this peer to return. await self._get_nodes_from_peer( peer, urgent_hashes, urgent_batch_id, urgent=True, ) async def _match_predictive_node_requests_to_peers(self) -> None: """ Monitor for predictive nodes. These might be required by future blocks. They might not, because we run a speculative execution which might follow a different code path than the final block import does. When predictive nodes are queued up, ask the fastest available peasant (non-queen) peer for them. Without waiting for a response from the peer, continue and check if more predictive trie nodes are requested. Repeat indefinitely. """ while self.manager.is_running: batch_id, hashes = await self._maybe_useful_nodes.get(eth_constants.MAX_STATE_FETCH) peer = await self._queen_tracker.pop_fastest_peasant() self._num_predictive_requests_by_peer[peer] += 1 self._predictive_requests += 1 self.manager.run_task( self._get_predictive_nodes_from_peer, peer, hashes, batch_id, ) @staticmethod def _append_unique_hashes( first_hashes: Tuple[Hash32, ...], non_unique_hashes: Tuple[Hash32, ...]) -> Tuple[Hash32, ...]: unique_hashes_to_add = tuple(set(non_unique_hashes).difference(first_hashes)) return first_hashes + unique_hashes_to_add async def _get_predictive_nodes_from_peer( self, peer: ETHPeer, node_hashes: Tuple[Hash32, ...], batch_id: int) -> None: await self._get_nodes_from_peer(peer, node_hashes, batch_id, urgent=False) self._queen_tracker.insert_peer(peer) async def _get_nodes_from_peer( self, peer: ETHPeer, node_hashes: Tuple[Hash32, ...], batch_id: int, urgent: bool) -> None: if urgent: urgent_timer = Timer() nodes, new_nodes = await self._store_nodes(peer, node_hashes, urgent) if len(nodes) == 0 and urgent: self.logger.debug("%s returned no urgent nodes from %r", peer, node_hashes) self._total_processed_nodes += len(nodes) if urgent: self._node_tasks.complete(batch_id, tuple(node_hash for node_hash, _ in nodes)) self._urgent_processed_nodes += len(nodes) time_on_urgent = urgent_timer.elapsed self.logger.debug( "beam-rtt: got %d/%d +%d urgent nodes in %.3fs from %s (%s)", len(nodes), len(node_hashes), len(new_nodes), time_on_urgent, peer.remote, node_hashes[0][:2].hex() ) self._time_on_urgent += time_on_urgent else: self._maybe_useful_nodes.complete(batch_id, tuple(node_hash for node_hash, _ in nodes)) self._predictive_processed_nodes += len(nodes) async def _store_nodes( self, peer: ETHPeer, node_hashes: Tuple[Hash32, ...], urgent: bool) -> Tuple[NodeDataBundles, NodeDataBundles]: nodes = await self._request_nodes(peer, node_hashes) new_nodes = tuple( (node_hash, node) for node_hash, node in nodes if self._is_node_missing(node_hash) ) if new_nodes: # batch all DB writes into one, for performance with self._db.atomic_batch() as batch: for node_hash, node in new_nodes: batch[node_hash] = node # If there are any new nodes returned, then notify any coros that are waiting on # node data to resume. # Note that we notify waiting coros even if no new data returned, but they are urgent. # We do this in case the urgent data was retrieved by backfill, or generated locally. # That way, urgent coros don't get stuck hanging until a timeout. This can cause an # especially flaky test_beam_syncer_backfills_all_state[42]. if urgent or new_nodes: for new_data in self._new_data_events: new_data.set() return nodes, new_nodes def _is_node_present(self, node_hash: Hash32) -> bool: """ Check if node_hash has data in the database or in the predicted node set. """ return node_hash in self._db async def _node_hashes_present(self, node_hashes: Set[Hash32]) -> None: remaining_hashes = node_hashes.copy() # save an event that gets triggered when new data comes in new_data = asyncio.Event() self._new_data_events.add(new_data) iterations = itertools.count() while remaining_hashes and next(iterations) < 1000: await new_data.wait() found_hashes = set(found for found in remaining_hashes if self._is_node_present(found)) remaining_hashes -= found_hashes new_data.clear() if remaining_hashes: self.logger.error("Never collected node data for hashes %r", remaining_hashes) self._new_data_events.remove(new_data) def register_peer(self, peer: BasePeer) -> None: super().register_peer(peer) # when a new peer is added to the pool, add it to the idle peer list async def _request_nodes( self, peer: ETHPeer, original_node_hashes: Tuple[Hash32, ...]) -> NodeDataBundles: node_hashes = tuple(set(original_node_hashes)) num_nodes = len(node_hashes) self.logger.debug2("Requesting %d nodes from %s", num_nodes, peer) try: completed_nodes = await peer.eth_api.get_node_data( node_hashes, timeout=self._reply_timeout) except PeerConnectionLost: self.logger.debug("%s went away, cancelling the nodes request and moving on...", peer) self._queen_tracker.penalize_queen(peer) return tuple() except BaseP2PError as exc: self.logger.warning("Unexpected p2p err while downloading nodes from %s: %s", peer, exc) self.logger.debug("Problem downloading nodes from peer, dropping...", exc_info=True) self._queen_tracker.penalize_queen(peer) return tuple() except CancelledError: self.logger.debug("Pending nodes call to %r future cancelled", peer) self._queen_tracker.penalize_queen(peer) raise except asyncio.TimeoutError: # This kind of exception shouldn't necessarily *drop* the peer, # so capture error, log and swallow self.logger.debug("Timed out requesting %d nodes from %s", num_nodes, peer) self._queen_tracker.penalize_queen(peer) self._total_timeouts += 1 return tuple() except Exception as exc: self.logger.info("Unexpected err while downloading nodes from %s: %s", peer, exc) self.logger.debug( "Problem downloading nodes from %s", peer, exc_info=True, ) self._queen_tracker.penalize_queen(peer) return tuple() else: if len(completed_nodes) > 0: # peer completed successfully, so have it get back in line for processing pass else: # peer didn't return enough results, wait a while before trying again self.logger.debug("%s returned 0 state trie nodes, penalize...", peer) self._queen_tracker.penalize_queen(peer) return completed_nodes async def run(self) -> None: """ Request all nodes in the queue, running indefinitely """ self._timer.start() self.logger.info("Starting beam state sync") self.manager.run_task(self._periodically_report_progress) with self.subscribe(self._peer_pool): self.manager.run_daemon_task(self._match_predictive_node_requests_to_peers) await self._match_urgent_node_requests_to_peers() async def _periodically_report_progress(self) -> None: while self.manager.is_running: self._time_on_urgent = 0 interval_timer = Timer() await asyncio.sleep(self._report_interval) msg = "all=%d " % self._total_processed_nodes msg += "urgent=%d " % self._urgent_processed_nodes # The percent of time spent in the last interval waiting on an urgent node # from the queen peer: msg += "crit=%.0f%% " % (100 * self._time_on_urgent / interval_timer.elapsed) msg += "pred=%d " % self._predictive_processed_nodes msg += "all/sec=%d " % (self._total_processed_nodes / self._timer.elapsed) msg += "urgent/sec=%d " % (self._urgent_processed_nodes / self._timer.elapsed) msg += "urg_reqs=%d " % (self._urgent_requests) msg += "pred_reqs=%d " % (self._predictive_requests) msg += "timeouts=%d" % self._total_timeouts msg += " u_pend=%d" % self._node_tasks.num_pending() msg += " u_prog=%d" % self._node_tasks.num_in_progress() msg += " p_pend=%d" % self._maybe_useful_nodes.num_pending() msg += " p_prog=%d" % self._maybe_useful_nodes.num_in_progress() self.logger.debug("beam-sync: %s", msg) # log peer counts show_top_n_peers = 5 self.logger.debug( "beam-queen-usage-top-%d: urgent=%s, predictive=%s", show_top_n_peers, [ (str(peer.remote), num) for peer, num in self._num_urgent_requests_by_peer.most_common(show_top_n_peers) ], [ (str(peer.remote), num) for peer, num in self._num_predictive_requests_by_peer.most_common(show_top_n_peers) ], ) self._num_urgent_requests_by_peer.clear() self._num_predictive_requests_by_peer.clear()
def _insert_squash_test(): trie = HexaryTrie(db={}) with trie.squash_changes() as memory_trie: for k, v in sorted(TEST_DATA.items()): memory_trie[k] = v return trie
def _insert_test(): trie = HexaryTrie(db={}) for k, v in sorted(TEST_DATA.items()): trie[k] = v return trie
def add_receipt(self, block_header, index_key, receipt): receipt_db = HexaryTrie(db=self.db, root_hash=block_header.receipt_root) receipt_db[index_key] = rlp.encode(receipt) return receipt_db.root_hash
def add_transaction(self, block_header, index_key, transaction): transaction_db = HexaryTrie(self.db, root_hash=block_header.transaction_root) transaction_db[index_key] = rlp.encode(transaction) return transaction_db.root_hash
class _Transaction(rlp.Serializable): fields = [('nonce', big_endian_int), ('gas_price', big_endian_int), ('gas', big_endian_int), ('to', address), ('value', big_endian_int), ('data', binary), ('v', big_endian_int), ('r', big_endian_int), ('s', big_endian_int)] BLANK_ROOT_HASH = Hash32( b'V\xe8\x1f\x17\x1b\xccU\xa6\xff\x83E\xe6\x92\xc0\xf8n\x5bH\xe0\x1b\x99l\xad\xc0\x01b/\xb5\xe3c\xb4!' ) Transactions = Sequence[_Transaction] TrieRootAndData = Tuple[Hash32, Dict[Hash32, bytes]] block = w3.eth.getBlock(8290728) trie = HexaryTrie(db={}) assert trie.root_hash == BLANK_ROOT_HASH print(w3.toHex(trie.root_hash)) txs = [] B = len(block.transactions) for key in range(B): tx = w3.eth.getTransaction(block.transactions[key]) raw_tx = _Transaction(tx.nonce, tx.gasPrice, tx.gas, w3.toBytes(hexstr=tx.to), tx.value, w3.toBytes(hexstr=w3.toHex(hexstr=tx.input)), tx.v, w3.toInt(tx.r), w3.toInt(tx.s)) rlp_tx = rlp.encode(raw_tx) assert tx.hash == keccak(rlp_tx) txs.append(raw_tx) trie.set(rlp.encode(key), rlp_tx)
async def get_account(self, block_hash: bytes, address: bytes) -> Account: key = keccak(address) proof = await self._get_proof(block_hash, account_key=b'', key=key) header = await self.get_block_header_by_hash(block_hash) rlp_account = HexaryTrie.get_from_proof(header.state_root, key, proof) return rlp.decode(rlp_account, sedes=Account)
class BeamDownloader(Service, PeerSubscriber): """ Coordinate the request of needed state data: accounts, storage, bytecodes, and other arbitrary intermediate nodes in the trie. """ do_predictive_downloads = False _total_processed_nodes = 0 _urgent_processed_nodes = 0 _predictive_processed_nodes = 0 _total_timeouts = 0 _predictive_only_requests = 0 _total_requests = 0 _timer = Timer(auto_start=False) _report_interval = 10 # Number of seconds between progress reports. _reply_timeout = 10 # seconds _num_urgent_requests_by_peer: typing.Counter[ETHPeer] _num_predictive_requests_by_peer: typing.Counter[ETHPeer] # We are only interested in peers entering or leaving the pool subscription_msg_types: FrozenSet[Type[CommandAPI[Any]]] = frozenset() # This is a rather arbitrary value, but when the sync is operating normally we never see # the msg queue grow past a few hundred items, so this should be a reasonable limit for # now. msg_queue_maxsize: int = 2000 def __init__(self, db: AtomicDatabaseAPI, peer_pool: ETHPeerPool, queen_tracker: QueenTrackerAPI, event_bus: EndpointAPI) -> None: self.logger = get_logger('trinity.sync.beam.BeamDownloader') self._db = db self._trie_db = HexaryTrie(db) self._node_data_peers = WaitingPeers[ETHPeer](NodeDataV65) self._event_bus = event_bus # Track the needed node data that is urgent and important: buffer_size = MAX_STATE_FETCH * REQUEST_BUFFER_MULTIPLIER self._node_tasks = TaskQueue[Hash32](buffer_size, lambda task: 0) # list of events waiting on new data self._new_data_events: Set[asyncio.Event] = set() self._peer_pool = peer_pool # Track node data that might be useful: hashes we bumped into while getting urgent nodes self._maybe_useful_nodes = TaskQueue[Hash32]( buffer_size, # Everything is the same priority, for now lambda node_hash: 0, ) # It's possible that you are connected to a peer that doesn't have a full state DB # In that case, we may get stuck requesting predictive nodes from them over and over # because they don't have anything but the nodes required to prove recent block # executions. If we get stuck in that scenario, turn off allow_predictive_only. # For now, we just turn it off for all peers, for simplicity. self._allow_predictive_only = True self._num_urgent_requests_by_peer = Counter() self._num_predictive_requests_by_peer = Counter() self._queen_tracker = queen_tracker async def ensure_nodes_present(self, node_hashes: Iterable[Hash32], urgent: bool = True) -> int: """ Wait until the nodes that are the preimages of `node_hashes` are available in the database. If one is not available in the first check, request it from peers. :param urgent: Should this node be downloaded urgently? If False, download as backfill Note that if your ultimate goal is an account or storage data, it's probably better to use download_account or download_storage. This method is useful for other scenarios, like bytecode lookups or intermediate node lookups. :return: how many nodes had to be downloaded """ if urgent: queue = self._node_tasks else: queue = self._maybe_useful_nodes return await self._wait_for_nodes(node_hashes, queue) async def _wait_for_nodes(self, node_hashes: Iterable[Hash32], queue: TaskQueue[Hash32]) -> int: missing_nodes = set(node_hash for node_hash in node_hashes if self._is_node_missing(node_hash)) unrequested_nodes = tuple(node_hash for node_hash in missing_nodes if node_hash not in queue) if unrequested_nodes: await queue.add(unrequested_nodes) if missing_nodes: await self._node_hashes_present(missing_nodes) return len(unrequested_nodes) def _is_node_missing(self, node_hash: Hash32) -> bool: if len(node_hash) != 32: raise ValidationError( f"Must request node by its 32-byte hash: 0x{node_hash.hex()}") self.logger.debug2("checking if node 0x%s is present", node_hash.hex()) return node_hash not in self._db async def download_accounts(self, account_addresses: Iterable[Address], root_hash: Hash32, urgent: bool = True) -> int: """ Like :meth:`download_account`, but waits for multiple addresses to be available. :return: total number of trie node downloads that were required to locally prove """ missing_account_hashes = set( keccak(address) for address in account_addresses) completed_account_hashes = set() nodes_downloaded = 0 # will never take more than 64 attempts to get a full account for _ in range(64): need_nodes = set() with self._trie_db.at_root(root_hash) as snapshot: for account_hash in missing_account_hashes: try: snapshot[account_hash] except MissingTrieNode as exc: need_nodes.add(exc.missing_node_hash) else: completed_account_hashes.add(account_hash) await self.ensure_nodes_present(need_nodes, urgent) nodes_downloaded += len(need_nodes) missing_account_hashes -= completed_account_hashes if not missing_account_hashes: return nodes_downloaded else: raise Exception( f"State Downloader failed to download {account_addresses!r} at " f"state root 0x{root_hash.hex} in 64 runs") async def download_account(self, account_hash: Hash32, root_hash: Hash32, urgent: bool = True) -> Tuple[bytes, int]: """ Check the given account address for presence in the state database. Wait until we have the state proof for the given address. If the account is not available in the first check, then request any trie nodes that we need to determine and prove the account rlp. Mark these nodes as urgent and important, which increases request priority. :return: The downloaded account rlp, and how many state trie node downloads were required """ # will never take more than 64 attempts to get a full account for num_downloads_required in range(64): try: with self._trie_db.at_root(root_hash) as snapshot: account_rlp = snapshot[account_hash] except MissingTrieNode as exc: await self.ensure_nodes_present({exc.missing_node_hash}, urgent) else: # Account is fully available within the trie return account_rlp, num_downloads_required else: raise Exception( f"State Downloader failed to download 0x{account_hash.hex()} at " f"state root 0x{root_hash.hex} in 64 runs") async def download_storage(self, storage_key: Hash32, storage_root_hash: Hash32, account: Address, urgent: bool = True) -> int: """ Check the given storage key for presence in the account's storage database. Wait until we have a trie proof for the given storage key. If the storage key value is not available in the first check, then request any trie nodes that we need to determine and prove the storage value. Mark these nodes as urgent and important, which increases request priority. :return: how many storage trie node downloads were required """ # should never take more than 64 attempts to get a full account for num_downloads_required in range(64): try: with self._trie_db.at_root(storage_root_hash) as snapshot: # request the data just to see which part is missing snapshot[storage_key] except MissingTrieNode as exc: await self.ensure_nodes_present({exc.missing_node_hash}, urgent) else: # Account is fully available within the trie return num_downloads_required else: raise Exception( f"State Downloader failed to download storage 0x{storage_key.hex()} in " f"{to_checksum_address(account)} at storage root 0x{storage_root_hash.hex()} " f"in 64 runs.") async def _match_node_requests_to_peers(self) -> None: """ Monitor TaskQueue for needed trie nodes, and request them from peers. Repeat as necessary. Prefer urgent nodes over predictive ones. """ while self.manager.is_running: urgent_batch_id, urgent_hashes = await self._get_waiting_urgent_hashes( ) predictive_batch_id, predictive_hashes = self._maybe_add_predictive_nodes( urgent_hashes) # combine to single tuple of unique hashes node_hashes = self._append_unique_hashes(urgent_hashes, predictive_hashes) if not node_hashes: # There are no urgent or predictive hashes waiting, retry continue # Get best peer, by GetNodeData speed peer = await self._queen_tracker.get_queen_peer() if urgent_batch_id is not None and peer.eth_api.get_node_data.is_requesting: # Our best peer for node data has an in-flight GetNodeData request # Probably, backfill is asking this peer for data # This is right in the critical path, so we'd prefer this never happen self.logger.debug( "Want to download urgent data, but %s is locked on other request", peer, ) # Don't do anything different, allow the request lock to handle the situation if any(len(h) != 32 for h in node_hashes): # This was inserted to identify and resolve a buggy situation short_node_urgent_hashes = tuple(h for h in node_hashes if len(h) != 32) raise ValidationError( f"Some of the requested node hashes are too short! {short_node_urgent_hashes!r}" ) if urgent_batch_id is None: self._predictive_only_requests += 1 self._num_predictive_requests_by_peer[peer] += 1 else: self._num_urgent_requests_by_peer[peer] += 1 self._total_requests += 1 # Request all the nodes from the given peer, and immediately move on to # try to request other nodes from another peer. await self._get_nodes_from_peer( peer, node_hashes, urgent_batch_id, urgent_hashes, predictive_hashes, predictive_batch_id, ) async def _get_waiting_urgent_hashes( self) -> Tuple[int, Tuple[Hash32, ...]]: # if any predictive nodes are waiting, then time out after a short pause to grab them try: return await asyncio.wait_for( self._node_tasks.get(eth_constants.MAX_STATE_FETCH), timeout=DELAY_BEFORE_NON_URGENT_REQUEST, ) except asyncio.TimeoutError: return None, () def _maybe_add_predictive_nodes( self, urgent_hashes: Tuple[Hash32, ...]) -> Tuple[int, Tuple[Hash32, ...]]: # how many predictive nodes should we request? num_predictive_backfills = min( eth_constants.MAX_STATE_FETCH - len(urgent_hashes), self._maybe_useful_nodes.num_pending(), ) if num_predictive_backfills: return self._maybe_useful_nodes.get_nowait( num_predictive_backfills, ) else: return None, () @staticmethod def _append_unique_hashes( first_hashes: Tuple[Hash32, ...], non_unique_hashes: Tuple[Hash32, ...]) -> Tuple[Hash32, ...]: unique_hashes_to_add = tuple( set(non_unique_hashes).difference(first_hashes)) return first_hashes + unique_hashes_to_add async def _get_nodes_from_peer(self, peer: ETHPeer, node_hashes: Tuple[Hash32, ...], urgent_batch_id: int, urgent_node_hashes: Tuple[Hash32, ...], predictive_node_hashes: Tuple[Hash32, ...], predictive_batch_id: int) -> None: nodes = await self._request_nodes(peer, node_hashes) urgent_nodes = { node_hash: node for node_hash, node in nodes if node_hash in urgent_node_hashes } predictive_nodes = { node_hash: node for node_hash, node in nodes if node_hash in predictive_node_hashes } if len(urgent_nodes) + len(predictive_nodes) < len(nodes): raise ValidationError( f"All nodes must be either urgent or predictive") if len(urgent_nodes) == 0 and urgent_batch_id is not None: self.logger.debug("%s returned no urgent nodes from %r", peer, urgent_node_hashes) # batch all DB writes into one, for performance with self._db.atomic_batch() as batch: for node_hash, node in nodes: batch[node_hash] = node if urgent_batch_id is not None: self._node_tasks.complete(urgent_batch_id, tuple(urgent_nodes.keys())) if predictive_batch_id is not None: self._maybe_useful_nodes.complete(predictive_batch_id, tuple(predictive_nodes.keys())) self._urgent_processed_nodes += len(urgent_nodes) for node_hash in predictive_nodes.keys(): if node_hash not in urgent_node_hashes: self._predictive_processed_nodes += 1 self._total_processed_nodes += len(nodes) if len(nodes): for new_data in self._new_data_events: new_data.set() def _is_node_present(self, node_hash: Hash32) -> bool: """ Check if node_hash has data in the database or in the predicted node set. """ return node_hash in self._db async def _node_hashes_present(self, node_hashes: Set[Hash32]) -> None: remaining_hashes = node_hashes.copy() # save an event that gets triggered when new data comes in new_data = asyncio.Event() self._new_data_events.add(new_data) iterations = itertools.count() while remaining_hashes and next(iterations) < 1000: await new_data.wait() found_hashes = set(found for found in remaining_hashes if self._is_node_present(found)) remaining_hashes -= found_hashes new_data.clear() if remaining_hashes: self.logger.error("Never collected node data for hashes %r", remaining_hashes) self._new_data_events.remove(new_data) def register_peer(self, peer: BasePeer) -> None: super().register_peer(peer) # when a new peer is added to the pool, add it to the idle peer list async def _request_nodes( self, peer: ETHPeer, node_hashes: Tuple[Hash32, ...]) -> NodeDataBundles: try: completed_nodes = await self._make_node_request(peer, node_hashes) except PeerConnectionLost: self.logger.debug( "%s went away, cancelling the nodes request and moving on...", peer) self._queen_tracker.penalize_queen(peer) return tuple() except BaseP2PError as exc: self.logger.warning( "Unexpected p2p err while downloading nodes from %s: %s", peer, exc) self.logger.debug( "Problem downloading nodes from peer, dropping...", exc_info=True) self._queen_tracker.penalize_queen(peer) return tuple() except OperationCancelled: self.logger.debug( "Service cancellation while fetching nodes, dropping %s from queue", peer, exc_info=True, ) self._queen_tracker.penalize_queen(peer) return tuple() except CancelledError: self.logger.debug("Pending nodes call to %r future cancelled", peer) self._queen_tracker.penalize_queen(peer) raise except Exception as exc: self.logger.info( "Unexpected err while downloading nodes from %s: %s", peer, exc) self.logger.debug( "Problem downloading nodes from %s", peer, exc_info=True, ) self._queen_tracker.penalize_queen(peer) return tuple() else: if len(completed_nodes) > 0: # peer completed successfully, so have it get back in line for processing pass else: # peer didn't return enough results, wait a while before trying again self.logger.debug( "%s returned 0 state trie nodes, penalize...", peer) self._queen_tracker.penalize_queen(peer) return completed_nodes async def _make_node_request( self, peer: ETHPeer, original_node_hashes: Tuple[Hash32, ...]) -> NodeDataBundles: node_hashes = tuple(set(original_node_hashes)) num_nodes = len(node_hashes) self.logger.debug2("Requesting %d nodes from %s", num_nodes, peer) try: return await peer.eth_api.get_node_data( node_hashes, timeout=self._reply_timeout) except asyncio.TimeoutError: # This kind of exception shouldn't necessarily *drop* the peer, # so capture error, log and swallow self.logger.debug("Timed out requesting %d nodes from %s", num_nodes, peer) self._total_timeouts += 1 return tuple() async def run(self) -> None: """ Request all nodes in the queue, running indefinitely """ self._timer.start() self.logger.info("Starting beam state sync") self.manager.run_task(self._periodically_report_progress) with self.subscribe(self._peer_pool): await self._match_node_requests_to_peers() async def _periodically_report_progress(self) -> None: while self.manager.is_running: msg = "all=%d " % self._total_processed_nodes msg += "urgent=%d " % self._urgent_processed_nodes msg += "pred=%d " % self._predictive_processed_nodes msg += "all/sec=%d " % (self._total_processed_nodes / self._timer.elapsed) msg += "urgent/sec=%d " % (self._urgent_processed_nodes / self._timer.elapsed) msg += "reqs=%d " % (self._total_requests) msg += "pred_reqs=%d " % (self._predictive_only_requests) msg += "timeouts=%d" % self._total_timeouts msg += " u_pend=%d" % self._node_tasks.num_pending() msg += " u_prog=%d" % self._node_tasks.num_in_progress() msg += " p_pend=%d" % self._maybe_useful_nodes.num_pending() msg += " p_prog=%d" % self._maybe_useful_nodes.num_in_progress() self.logger.debug("Beam-Sync: %s", msg) # log peer counts show_top_n_peers = 3 self.logger.debug( "Beam-Sync-Peer-Usage-Top-%d: urgent=%s, predictive=%s", show_top_n_peers, self._num_urgent_requests_by_peer.most_common( show_top_n_peers), self._num_predictive_requests_by_peer.most_common( show_top_n_peers), ) self._num_urgent_requests_by_peer.clear() self._num_predictive_requests_by_peer.clear() await asyncio.sleep(self._report_interval)
def load_state(cls, dbfile=None): """ Create or load State. returns: State """ if not dbfile: return (cls(HexaryTrie(db={}), 0, 0, BLANK_ROOT_HASH))
def add_receipt(self, block_header: BlockHeaderAPI, index_key: int, receipt: ReceiptAPI) -> Hash32: receipt_db = HexaryTrie(db=self.db, root_hash=block_header.receipt_root) receipt_db[index_key] = receipt.encode() return receipt_db.root_hash
def __init__(self, db, root_hash=BLANK_ROOT_HASH, read_only=False): if read_only: self.db = ImmutableDB(db) else: self.db = TrackedDB(db) self._trie = HashTrie(HexaryTrie(self.db, root_hash))
def assert_proof(trie, key): proof = trie.get_proof(key) proof_value = HexaryTrie.get_from_proof(trie.root_hash, key, proof) assert proof_value == trie.get(key)
class BeamDownloader(Service, PeerSubscriber): """ Coordinate the request of needed state data: accounts, storage, bytecodes, and other arbitrary intermediate nodes in the trie. """ _total_processed_nodes = 0 _urgent_processed_nodes = 0 _predictive_processed_nodes = 0 _predictive_found_nodes_woke_up = 0 _predictive_found_nodes_during_timeout = 0 _total_timeouts = 0 _predictive_requests = 0 _urgent_requests = 0 _time_on_urgent = 0.0 _timer = Timer(auto_start=False) _report_interval = 10 # Number of seconds between progress reports. _reply_timeout = 10 # seconds _num_urgent_requests_by_peer: typing.Counter[ETHPeer] _num_predictive_requests_by_peer: typing.Counter[ETHPeer] _preview_events: Dict[asyncio.Event, Set[Hash32]] _num_peers = 0 # How many extra peers (besides the queen) should we ask for the urgently-needed trie node? _spread_factor = 0 # We periodically reduce the "spread factor" once every N seconds: _reduce_spread_factor_interval = 120 # We might reserve some peers to ask for predictive nodes, if we start to fall behind _min_predictive_peers = 0 # Keep track of the block number for each predictive missing node hash _block_number_lookup: DefaultDict[Hash32, BlockNumber] # We are only interested in peers entering or leaving the pool subscription_msg_types: FrozenSet[Type[CommandAPI[Any]]] = frozenset() # This is a rather arbitrary value, but when the sync is operating normally we never see # the msg queue grow past a few hundred items, so this should be a reasonable limit for # now. msg_queue_maxsize: int = 2000 def __init__(self, db: AtomicDatabaseAPI, peer_pool: ETHPeerPool, queen_tracker: QueenTrackerAPI, event_bus: EndpointAPI) -> None: self.logger = get_logger('trinity.sync.beam.BeamDownloader') self._db = db self._trie_db = HexaryTrie(db) self._event_bus = event_bus # Track the needed node data that is urgent and important: buffer_size = MAX_STATE_FETCH * REQUEST_BUFFER_MULTIPLIER self._node_tasks = TaskQueue[Hash32](buffer_size, lambda task: 0) # list of events waiting on new data self._new_data_event: asyncio.Event = asyncio.Event() self._preview_events = {} self._peer_pool = peer_pool # Track node data for upcoming blocks self._block_number_lookup = defaultdict(lambda: BlockNumber(0)) self._maybe_useful_nodes = TaskQueue[Hash32]( buffer_size, # Prefer trie nodes from earliest blocks lambda node_hash: self._block_number_lookup[node_hash], ) self._num_urgent_requests_by_peer = Counter() self._num_predictive_requests_by_peer = Counter() self._queen_tracker = queen_tracker self._threadpool = ThreadPoolExecutor() asyncio.get_event_loop().set_default_executor(self._threadpool) async def ensure_nodes_present(self, node_hashes: Collection[Hash32], block_number: BlockNumber, urgent: bool = True) -> int: """ Wait until the nodes that are the preimages of `node_hashes` are available in the database. If one is not available in the first check, request it from peers. :param urgent: Should this node be downloaded urgently? If False, download as backfill Note that if your ultimate goal is an account or storage data, it's probably better to use download_account or download_storage. This method is useful for other scenarios, like bytecode lookups or intermediate node lookups. :return: how many nodes had to be downloaded """ if urgent: num_nodes_found = await self._wait_for_nodes( node_hashes, urgent, ) else: for node_hash in node_hashes: # Priority is based on lowest block number that needs the given node if self._block_number_lookup.get( node_hash, block_number + 1) > block_number: self._block_number_lookup[node_hash] = block_number num_nodes_found = await self._wait_for_nodes( node_hashes, urgent, ) requested_node_count = len(node_hashes) if num_nodes_found == requested_node_count: for node_hash in node_hashes: self._block_number_lookup.pop(node_hash, None) elif num_nodes_found < requested_node_count: found_hashes = await asyncio.get_event_loop().run_in_executor( None, self._get_unique_present_hashes, node_hashes, ) for node_hash in found_hashes: self._block_number_lookup.pop(node_hash, None) return num_nodes_found def _max_spread_beam_factor(self) -> int: max_factor = self._num_peers - 1 - self._min_predictive_peers return max(0, max_factor) def _get_unique_missing_hashes(self, hashes: Iterable[Hash32]) -> Set[Hash32]: return set(node_hash for node_hash in hashes if node_hash not in self._db) def _get_unique_present_hashes(self, hashes: Iterable[Hash32]) -> Set[Hash32]: return set(node_hash for node_hash in hashes if node_hash in self._db) async def _wait_for_nodes(self, node_hashes: Iterable[Hash32], urgent: bool) -> int: """ Insert the given node hashes into the queue to be retrieved, then block until they become present in the database. :return: number of new nodes received -- might be smaller than len(node_hashes) on timeout """ missing_nodes = await self._run_preview_in_thread( urgent, self._get_unique_missing_hashes, node_hashes, ) if urgent: queue = self._node_tasks else: queue = self._maybe_useful_nodes unrequested_nodes = tuple(node_hash for node_hash in missing_nodes if node_hash not in queue) if missing_nodes: if unrequested_nodes: await queue.add(unrequested_nodes) return await self._node_hashes_present(missing_nodes, urgent) else: return 0 def _account_review( self, account_address_hashes: Iterable[Hash32], root_hash: Hash32) -> Tuple[Set[Hash32], Dict[Hash32, bytes]]: """ Check these accounts in the trie. :return: (missing trie nodes, completed_hashes->encoded_account_rlp) """ need_nodes = set() completed_accounts = {} with self._trie_db.at_root(root_hash) as snapshot: for account_hash in account_address_hashes: try: account_rlp = snapshot[account_hash] except MissingTrieNode as exc: need_nodes.add(exc.missing_node_hash) else: completed_accounts[account_hash] = account_rlp return need_nodes, completed_accounts def _get_unique_hashes(self, addresses: Collection[Address]) -> Set[Hash32]: uniques = set(addresses) return {keccak(address) for address in uniques} async def download_accounts(self, account_addresses: Collection[Address], root_hash: Hash32, block_number: BlockNumber, urgent: bool = True) -> int: """ Like :meth:`download_account`, but waits for multiple addresses to be available. :return: total number of trie node downloads that were required to locally prove """ if len(account_addresses) == 0: return 0 last_log_time = time.monotonic() missing_account_hashes = await self._run_preview_in_thread( urgent, self._get_unique_hashes, account_addresses, ) completed_account_hashes: Set[Hash32] = set() nodes_downloaded = 0 # will never take more than 64 attempts to get a full account for _ in range(64): need_nodes, newly_completed = await self._run_preview_in_thread( urgent, self._account_review, missing_account_hashes, root_hash, ) completed_account_hashes.update(newly_completed.keys()) # Log if taking a long time to download addresses now = time.monotonic() if urgent and now - last_log_time > ESTIMATED_BEAMABLE_SECONDS: self.logger.info( "Beam account download: %d/%d (%.0f%%)", len(completed_account_hashes), len(account_addresses), 100 * len(completed_account_hashes) / len(account_addresses), ) last_log_time = now await self.ensure_nodes_present(need_nodes, block_number, urgent) nodes_downloaded += len(need_nodes) missing_account_hashes -= completed_account_hashes if not missing_account_hashes: return nodes_downloaded else: raise Exception( f"State Downloader failed to download {account_addresses!r} at " f"state root 0x{root_hash.hex} in 64 runs") async def download_account(self, account_hash: Hash32, root_hash: Hash32, block_number: BlockNumber, urgent: bool = True) -> Tuple[bytes, int]: """ Check the given account address for presence in the state database. Wait until we have the state proof for the given address. If the account is not available in the first check, then request any trie nodes that we need to determine and prove the account rlp. Mark these nodes as urgent and important, which increases request priority. :return: The downloaded account rlp, and how many state trie node downloads were required """ # will never take more than 64 attempts to get a full account for num_downloads_required in range(64): need_nodes, newly_completed = await self._run_preview_in_thread( urgent, self._account_review, [account_hash], root_hash, ) if need_nodes: await self.ensure_nodes_present(need_nodes, block_number, urgent) else: # Account is fully available within the trie return newly_completed[account_hash], num_downloads_required else: raise Exception( f"State Downloader failed to download 0x{account_hash.hex()} at " f"state root 0x{root_hash.hex} in 64 runs") async def download_storage(self, storage_key: Hash32, storage_root_hash: Hash32, account: Address, block_number: BlockNumber, urgent: bool = True) -> int: """ Check the given storage key for presence in the account's storage database. Wait until we have a trie proof for the given storage key. If the storage key value is not available in the first check, then request any trie nodes that we need to determine and prove the storage value. Mark these nodes as urgent and important, which increases request priority. :return: how many storage trie node downloads were required """ # should never take more than 64 attempts to get a full account for num_downloads_required in range(64): need_nodes = await self._run_preview_in_thread( urgent, self._storage_review, storage_key, storage_root_hash, ) if need_nodes: await self.ensure_nodes_present(need_nodes, block_number, urgent) else: # Account is fully available within the trie return num_downloads_required else: raise Exception( f"State Downloader failed to download storage 0x{storage_key.hex()} in " f"{to_checksum_address(account)} at storage root 0x{storage_root_hash.hex()} " f"in 64 runs.") def _storage_review(self, storage_key: Hash32, storage_root_hash: Hash32) -> Set[Hash32]: """ Check this storage slot in the trie. :return: missing trie nodes """ with self._trie_db.at_root(storage_root_hash) as snapshot: try: # request the data just to see which part is missing snapshot[storage_key] except MissingTrieNode as exc: return {exc.missing_node_hash} else: return set() async def _match_urgent_node_requests_to_peers(self) -> None: """ Monitor for urgent trie node needs. An urgent node means that a current block import is paused until that trie node is retrieved. Ask our best peer for that trie node, and then wait for the next urgent node need. Repeat indefinitely. """ while self.manager.is_running: urgent_batch_id, urgent_hashes = await self._node_tasks.get( eth_constants.MAX_STATE_FETCH) # Get best peer, by GetNodeData speed queen = await self._queen_tracker.get_queen_peer() queen_is_requesting = queen.eth_api.get_node_data.is_requesting if queen_is_requesting: # Our best peer for node data has an in-flight GetNodeData request # Probably, backfill is asking this peer for data # This is right in the critical path, so we'd prefer this never happen self.logger.debug( "Want to download urgent data, but %s is locked on other request", queen, ) # Don't do anything different, allow the request lock to handle the situation self._num_urgent_requests_by_peer[queen] += 1 self._urgent_requests += 1 await self._find_urgent_nodes( queen, urgent_hashes, urgent_batch_id, ) async def _find_urgent_nodes(self, queen: ETHPeer, urgent_hashes: Tuple[Hash32, ...], batch_id: int) -> None: # Generate and schedule the tasks to request the urgent node(s) from multiple peers knights = tuple(self._queen_tracker.pop_knights()) urgent_requests = [ create_task( self._get_nodes(peer, urgent_hashes, urgent=True), name=f"BeamDownloader._get_nodes({peer.remote}, ...)", ) for peer in (queen, ) + knights ] # Process the returned nodes, in the order they complete urgent_timer = Timer() async with cleanup_tasks(*urgent_requests): for result_coro in asyncio.as_completed(urgent_requests): nodes_returned, new_nodes, peer = await result_coro time_on_urgent = urgent_timer.elapsed # After the first peer returns something, cancel all other pending tasks if len(nodes_returned) > 0: # Stop waiting for other peer responses break elif peer == queen: self.logger.debug("queen %s returned 0 urgent nodes of %r", peer, urgent_hashes) # Wait for the next peer response # Log the received urgent nodes if peer == queen: log_header = "beam-queen-urgent-rtt" else: log_header = "spread-beam-urgent-rtt" self.logger.debug( "%s: got %d/%d +%d nodes in %.3fs from %s (%s)", log_header, len(nodes_returned), len(urgent_hashes), len(new_nodes), time_on_urgent, peer.remote, urgent_hashes[0][:2].hex(), ) # Stat updates self._total_processed_nodes += len(new_nodes) self._urgent_processed_nodes += len(new_nodes) self._time_on_urgent += time_on_urgent # If it took to long to get a single urgent node, then increase "spread" factor if len(urgent_hashes ) == 1 and time_on_urgent > MAX_ACCEPTABLE_WAIT_FOR_URGENT_NODE: new_spread_factor = clamp( 0, self._max_spread_beam_factor(), self._spread_factor + 1, ) if new_spread_factor != self._spread_factor: self.logger.debug( "spread-beam-update: Urgent node latency=%.3fs, update factor %d to %d", time_on_urgent, self._spread_factor, new_spread_factor, ) self._queen_tracker.set_desired_knight_count(new_spread_factor) self._spread_factor = new_spread_factor # Complete the task in the TaskQueue task_hashes = tuple(node_hash for node_hash, _ in nodes_returned) await self._node_tasks.complete(batch_id, task_hashes) # Re-insert the peers for the next request for knight in knights: self._queen_tracker.insert_peer(knight) async def _match_predictive_node_requests_to_peers(self) -> None: """ Monitor for predictive nodes. These might be required by future blocks. They might not, because we run a speculative execution which might follow a different code path than the final block import does. When predictive nodes are queued up, ask the fastest available peasant (non-queen) peer for them. Without waiting for a response from the peer, continue and check if more predictive trie nodes are requested. Repeat indefinitely. """ while self.manager.is_running: try: batch_id, hashes = await asyncio.wait_for( self._maybe_useful_nodes.get( eth_constants.MAX_STATE_FETCH), timeout=TOO_LONG_PREDICTIVE_PEER_DELAY, ) except asyncio.TimeoutError: # Reduce the number of predictive peers, we seem to have plenty if self._min_predictive_peers > 0: self._min_predictive_peers -= 1 self.logger.debug( "Decremented predictive peers to %d", self._min_predictive_peers, ) # Re-attempt continue # Find any hashes that were discovered through other means, like urgent requests: existing_hashes = await asyncio.get_event_loop().run_in_executor( None, self._get_unique_present_hashes, hashes, ) # If any hashes are already found, clear them out and retry if existing_hashes: # Wake up any paused preview threads await self._wakeup_preview_waiters(existing_hashes) # Clear out any tasks that are no longer necessary await self._maybe_useful_nodes.complete( batch_id, tuple(existing_hashes)) # Restart from the top continue try: peer = await asyncio.wait_for( self._queen_tracker.pop_fastest_peasant(), timeout=TOO_LONG_PREDICTIVE_PEER_DELAY, ) except asyncio.TimeoutError: # Increase the minimum number of predictive peers, we seem to not have enough new_predictive_peers = min( self._min_predictive_peers + 1, # Don't reserve more than half the peers for prediction self._num_peers // 2, ) if new_predictive_peers != self._min_predictive_peers: self.logger.debug( "Updating predictive peer count from %d to %d", self._min_predictive_peers, new_predictive_peers, ) self._min_predictive_peers = new_predictive_peers cancel_attempt = True else: if peer.eth_api.get_node_data.is_requesting: self.logger.debug( "Want predictive nodes from %s, but it has an active request, skipping...", peer, ) self._queen_tracker.insert_peer( peer, NON_IDEAL_RESPONSE_PENALTY) cancel_attempt = True else: cancel_attempt = False if cancel_attempt: # Prepare to restart await self._maybe_useful_nodes.complete(batch_id, ()) continue self._num_predictive_requests_by_peer[peer] += 1 self._predictive_requests += 1 self.manager.run_task( self._get_predictive_nodes_from_peer, peer, hashes, batch_id, ) async def _get_predictive_nodes_from_peer(self, peer: ETHPeer, node_hashes: Tuple[Hash32, ...], batch_id: int) -> None: nodes, new_nodes, _ = await self._get_nodes(peer, node_hashes, urgent=False) self._total_processed_nodes += len(nodes) self._predictive_processed_nodes += len(new_nodes) task_hashes = tuple(node_hash for node_hash, _ in nodes) await self._maybe_useful_nodes.complete(batch_id, task_hashes) # Re-insert the peasant into the tracker if len(nodes): delay = 0.0 else: delay = 8.0 self._queen_tracker.insert_peer(peer, delay) async def _get_nodes( self, peer: ETHPeer, node_hashes: Tuple[Hash32, ...], urgent: bool) -> Tuple[NodeDataBundles, NodeDataBundles, ETHPeer]: nodes = await self._request_nodes(peer, node_hashes) ( new_nodes, found_independent, ) = await self._run_preview_in_thread(urgent, self._store_nodes, node_hashes, nodes, urgent) if urgent: if new_nodes or found_independent: # If there are any new nodes returned, then notify any coros that are waiting on # node data to resume. # If the data was retrieved another way (like backfilled), then # still trigger a new data event. That way, urgent coros don't # get stuck hanging until a timeout. This can cause an especially # flaky test_beam_syncer_backfills_all_state[42]. self._new_data_event.set() elif new_nodes: new_hashes = set(node_hash for node_hash, _ in new_nodes) await self._wakeup_preview_waiters(new_hashes) return nodes, new_nodes, peer async def _wakeup_preview_waiters(self, node_hashes: Iterable[Hash32]) -> None: # Wake up any coroutines waiting for the particular data that was returned. # (If no data returned, then no coros should wake up, and we can skip the block) preview_waiters = await asyncio.get_event_loop().run_in_executor( None, self._get_preview_waiters, node_hashes, ) for waiter in preview_waiters: waiter.set() def _get_preview_waiters( self, node_hashes: Iterable[Hash32]) -> Tuple[asyncio.Event, ...]: # Convert to set for a faster presence-test in the returned tuple comprehension new_hashes = set(node_hashes) # defensive copy _preview_events, since this method runs in a thread waiters = tuple(self._preview_events.items()) return tuple(waiting_event for waiting_event, node_hashes in waiters if new_hashes & node_hashes) def _store_nodes(self, node_hashes: Tuple[Hash32, ...], nodes: NodeDataBundles, urgent: bool) -> Tuple[NodeDataBundles, bool]: """ Store supplied nodes in the database, return the subset of them that are new. Also, return whether the requested nodes were found another way, if the nodes are urgently-needed. """ new_nodes = tuple((node_hash, node) for node_hash, node in nodes if node_hash not in self._db) if new_nodes: # batch all DB writes into one, for performance with self._db.atomic_batch() as batch: for node_hash, node in new_nodes: batch[node_hash] = node # Don't bother checking if the nodes were found another way found_independent = False elif urgent: # Check if the nodes were found another way, if they are urgently needed for requested_hash in node_hashes: if requested_hash in self._db: found_independent = True break else: found_independent = False else: # Don't bother checking if the nodes were found another way, if they are predictive found_independent = False return new_nodes, found_independent async def _node_hashes_present(self, node_hashes: Set[Hash32], urgent: bool) -> int: """ Block until the supplied node hashes have been inserted into the database. :return: number of new nodes received -- might be smaller than len(node_hashes) on timeout """ remaining_hashes = node_hashes.copy() timeout = BLOCK_IMPORT_MISSING_STATE_TIMEOUT start_time = time.monotonic() if not urgent: wait_event = asyncio.Event() self._preview_events[wait_event] = node_hashes while remaining_hashes and time.monotonic() - start_time < timeout: if urgent: await self._new_data_event.wait() self._new_data_event.clear() else: try: await asyncio.wait_for( wait_event.wait(), timeout=CHECK_PREVIEW_STATE_TIMEOUT, ) except asyncio.TimeoutError: # Check if the data showed up due to an urgent import preview_timeout = True pass else: preview_timeout = False finally: wait_event.clear() found_hashes = await self._run_preview_in_thread( urgent, self._get_unique_present_hashes, remaining_hashes, ) if not urgent: if preview_timeout: self._predictive_found_nodes_during_timeout += len( found_hashes) else: self._predictive_found_nodes_woke_up += len(found_hashes) if found_hashes: remaining_hashes -= found_hashes if not urgent and remaining_hashes: self._preview_events[wait_event] = remaining_hashes if not urgent: del self._preview_events[wait_event] if remaining_hashes: if urgent: logger = self.logger.error else: logger = self.logger.warning logger( "Could not collect node data for %d %s hashes %r within %.0f seconds (took %.1fs)", len(remaining_hashes), "urgent" if urgent else "preview", list(remaining_hashes)[0:2], timeout, time.monotonic() - start_time, ) return len(node_hashes) - len(remaining_hashes) def register_peer(self, peer: BasePeer) -> None: self._num_peers += 1 def deregister_peer(self, peer: BasePeer) -> None: self._num_peers -= 1 async def _request_nodes( self, peer: ETHPeer, original_node_hashes: Tuple[Hash32, ...]) -> NodeDataBundles: node_hashes = tuple(set(original_node_hashes)) num_nodes = len(node_hashes) self.logger.debug2("Requesting %d nodes from %s", num_nodes, peer) try: completed_nodes = await peer.eth_api.get_node_data( node_hashes, timeout=self._reply_timeout) except PeerConnectionLost: self.logger.debug( "%s went away, cancelling the nodes request and moving on...", peer) self._queen_tracker.penalize_queen(peer) return tuple() except BaseP2PError as exc: self.logger.warning( "Unexpected p2p err while downloading nodes from %s: %s", peer, exc) self.logger.debug( "Problem downloading nodes from peer, dropping...", exc_info=True) self._queen_tracker.penalize_queen(peer) return tuple() except CancelledError: self.logger.debug("Pending nodes call to %r future cancelled", peer) self._queen_tracker.penalize_queen(peer) raise except asyncio.TimeoutError: # This kind of exception shouldn't necessarily *drop* the peer, # so capture error, log and swallow self.logger.debug("Timed out requesting %d nodes from %s", num_nodes, peer) self._queen_tracker.penalize_queen(peer) self._total_timeouts += 1 return tuple() except Exception as exc: self.logger.info( "Unexpected err while downloading nodes from %s: %s", peer, exc) self.logger.debug( "Problem downloading nodes from %s", peer, exc_info=True, ) self._queen_tracker.penalize_queen(peer) return tuple() else: if len(completed_nodes) > 0: # peer completed successfully, so have it get back in line for processing pass else: # peer didn't return enough results, wait a while before trying again self.logger.debug( "%s returned 0 state trie nodes, penalize...", peer) self._queen_tracker.penalize_queen(peer) return completed_nodes async def _run_preview_in_thread(self, urgent: bool, method: Callable[..., TReturn], *args: Any) -> TReturn: if urgent: return method(*args) else: return await asyncio.get_event_loop().run_in_executor( None, method, *args, ) async def run(self) -> None: """ Request all nodes in the queue, running indefinitely """ self._timer.start() self.logger.info("Starting beam state sync") self.manager.run_daemon_task(self._periodically_report_progress) self.manager.run_daemon_task(self._reduce_spread_factor) with self.subscribe(self._peer_pool): self.manager.run_daemon_task( self._match_predictive_node_requests_to_peers) await self._match_urgent_node_requests_to_peers() async def _reduce_spread_factor(self) -> None: # The number of backup urgent requester peers increases when the RTT is too high # This method makes sure that it eventually drops back to 0 in a healthy sync # environment. while self.manager.is_running: await asyncio.sleep(self._reduce_spread_factor_interval) if self._spread_factor > 0: self.logger.debug( "spread-beam-update: Reduce spread beam factor %d to %d", self._spread_factor, self._spread_factor - 1, ) self._spread_factor -= 1 self._queen_tracker.set_desired_knight_count( self._spread_factor) async def _periodically_report_progress(self) -> None: try: # _work_queue is only defined in python 3.8 -- don't report the stat otherwise threadpool_queue = self._threadpool._work_queue # type: ignore except AttributeError: threadpool_queue = None while self.manager.is_running: self._time_on_urgent = 0 interval_timer = Timer() await asyncio.sleep(self._report_interval) if threadpool_queue: threadpool_queue_len = threadpool_queue.qsize() else: threadpool_queue_len = "?" msg = "all=%d " % self._total_processed_nodes msg += "urgent=%d " % self._urgent_processed_nodes # The percent of time spent in the last interval waiting on an urgent node # from the queen peer: msg += "crit=%.0f%% " % (100 * self._time_on_urgent / interval_timer.elapsed) msg += "pred=%d " % self._predictive_processed_nodes msg += "all/sec=%d " % (self._total_processed_nodes / self._timer.elapsed) msg += "urgent/sec=%d " % (self._urgent_processed_nodes / self._timer.elapsed) msg += "urg_reqs=%d " % (self._urgent_requests) msg += "pred_reqs=%d " % (self._predictive_requests) msg += "timeouts=%d" % self._total_timeouts msg += " u_pend=%d" % self._node_tasks.num_pending() msg += " u_prog=%d" % self._node_tasks.num_in_progress() msg += " p_pend=%d" % self._maybe_useful_nodes.num_pending() msg += " p_prog=%d" % self._maybe_useful_nodes.num_in_progress() msg += " p_wait=%d" % len(self._preview_events) msg += " p_woke=%d" % self._predictive_found_nodes_woke_up msg += " p_found=%d" % self._predictive_found_nodes_during_timeout msg += " thread_Q=20+%s" % threadpool_queue_len self.logger.debug("beam-sync: %s", msg) self._predictive_found_nodes_woke_up = 0 self._predictive_found_nodes_during_timeout = 0 # log peer counts show_top_n_peers = 5 self.logger.debug( "beam-queen-usage-top-%d: urgent=%s, predictive=%s, spread=%d, reserve_pred=%d", show_top_n_peers, [(str(peer.remote), num) for peer, num in self. _num_urgent_requests_by_peer.most_common(show_top_n_peers)], [(str(peer.remote), num) for peer, num in self._num_predictive_requests_by_peer. most_common(show_top_n_peers)], self._spread_factor, self._min_predictive_peers, ) self._num_urgent_requests_by_peer.clear() self._num_predictive_requests_by_peer.clear()
def test_hexary_trie_raises_on_pruning_snapshot(): trie = HexaryTrie({}, prune=True) with pytest.raises(ValidationError): with trie.at_root(BLANK_NODE_HASH): pass
def add_transaction(self, block_header: BlockHeader, index_key: int, transaction: 'BaseTransaction') -> bytes: transaction_db = HexaryTrie(self.db, root_hash=block_header.transaction_root) transaction_db[index_key] = rlp.encode(transaction) return transaction_db.root_hash