async def _find_launch_headers(self, peer: TChainPeer) -> Tuple[BlockHeaderAPI, ...]: """ When getting started with a peer, find exactly where the headers start differing from the current database of headers by requesting contiguous headers from peer. Return the first headers returned that are missing from the local db. It is possible that it will be unreasonable to find the exact starting header. For example, the canonical head may update while waiting for a response from the skeleton peer. In that case, return a *stale* header that we already know about, and there will be some duplicate header downloads. """ newest_matching_header = await self._find_newest_matching_skeleton_header(peer) # This next gap will have at least one header that's new to us, because it overlaps # with the skeleton header that is next in the previous skeleton request, and # we chose the starting skeleton header so it goes past our canonical head start_num = BlockNumber(newest_matching_header.block_number + 1) launch_headers = await self._fetch_headers_from(peer, start_num, skip=0) if len(launch_headers) == 0: raise ValidationError( f"{peer} gave 0 headers when seeking common meat ancestors from {start_num}" ) # identify headers that are not already stored locally completed_headers, new_headers = await skip_complete_headers( launch_headers, self._is_header_imported) if completed_headers: self.logger.debug( "During header sync launch, skipping over (%d) already stored headers %s: %s..%s", len(completed_headers), humanize_integer_sequence(h.block_number for h in completed_headers), completed_headers[0], completed_headers[-1], ) if len(new_headers) == 0: self.logger.debug( "Canonical head updated while finding new head from %s, returning old %s instead", peer, launch_headers[-1], ) return (launch_headers[-1], ) else: try: launch_parent = await self._db.coro_get_block_header_by_hash( new_headers[0].parent_hash) except HeaderNotFound as exc: raise ValidationError( f"First header {new_headers[0]} did not have parent in DB" ) from exc # validate new headers against the parent in the database await self._chain.coro_validate_chain( launch_parent, new_headers, SEAL_CHECK_RANDOM_SAMPLE_RATE, ) return new_headers
async def next_header_batch( self) -> AsyncIterator[Tuple[BlockHeaderAPI, ...]]: """Try to fetch headers until the given peer's head_hash. Returns when the peer's head_hash is available in our ChainDB, or if any error occurs during the sync. """ peer = self._peer head = await self.db.coro_get_canonical_head() head_td = await self.db.coro_get_score(head.hash) if peer.head_info.head_td <= head_td: self.logger.info( "Head TD (%d) announced by %s not higher than ours (%d), not syncing", peer.head_info.head_td, peer, head_td) return else: self.logger.debug( "%s announced Head TD %d, which is higher than ours (%d), starting sync", peer, peer.head_info.head_td, head_td) self.sync_progress = SyncProgress( head.block_number, head.block_number, peer.head_info.head_number, ) self.logger.info("Starting sync with %s", peer) last_received_header: BlockHeaderAPI = None # When we start the sync with a peer, we always request up to MAX_REORG_DEPTH extra # headers before our current head's number, in case there were chain reorgs since the last # time _sync() was called. All of the extra headers that are already present in our DB # will be discarded by skip_complete_headers() so we don't unnecessarily process them # again. start_at = BlockNumber( max(GENESIS_BLOCK_NUMBER + 1, head.block_number - MAX_REORG_DEPTH)) while self.manager.is_running: if not peer.manager.is_running: self.logger.info("%s disconnected, aborting sync", peer) break try: all_headers = await self._request_headers(peer, start_at) if last_received_header is None: # Skip over existing headers on the first run-through completed_headers, new_headers = await skip_complete_headers( all_headers, self.db.coro_header_exists) if len(new_headers) == 0 and len(completed_headers) > 0: head = await self.db.coro_get_canonical_head() start_at = BlockNumber( max(all_headers[-1].block_number + 1, head.block_number - MAX_REORG_DEPTH)) self.logger.debug( "All %d headers redundant, head at %s, fetching from #%d", len(completed_headers), head, start_at, ) continue elif completed_headers: self.logger.debug( "Header sync skipping over (%d) already stored headers %s: %s..%s", len(completed_headers), humanize_integer_sequence( h.block_number for h in completed_headers), completed_headers[0], completed_headers[-1], ) else: new_headers = all_headers self.logger.debug2('sync received new headers: %s', new_headers) except OperationCancelled: self.logger.info("Sync with %s completed", peer) break except asyncio.TimeoutError: self.logger.warning( "Timeout waiting for header batch from %s, aborting sync", peer) await peer.disconnect(DisconnectReason.TIMEOUT) break except ValidationError as err: self.logger.warning( "Invalid header response sent by peer %s disconnecting: %s", peer, err, ) await peer.disconnect(DisconnectReason.USELESS_PEER) break if not new_headers: if last_received_header is None: request_parent = head else: request_parent = last_received_header if head_td < peer.head_info.head_td: # peer claims to have a better header, but didn't return it. Boot peer # TODO ... also blacklist, because it keeps trying to reconnect self.logger.warning( "%s announced difficulty %s, but didn't return any headers after %r@%s", peer, peer.head_info.head_td, request_parent, head_td, ) await peer.disconnect(DisconnectReason.SUBPROTOCOL_ERROR) else: self.logger.info( "Got no new headers from %s, aborting sync", peer) break first = new_headers[0] first_parent = None if last_received_header is None: # on the first request, make sure that the earliest ancestor has a parent in our db try: first_parent = await self.db.coro_get_block_header_by_hash( first.parent_hash) except HeaderNotFound: self.logger.warning( "Unable to find common ancestor betwen our chain and %s", peer, ) break elif last_received_header.hash != first.parent_hash: # on follow-ups, require the first header in this batch to be next in succession self.logger.warning( "Header batch starts with %r, with parent %s, but last header was %r", first, encode_hex(first.parent_hash[:4]), last_received_header, ) break self.logger.debug( "Got new header chain from %s: %s..%s", peer, first, new_headers[-1], ) try: await self.chain.coro_validate_chain( last_received_header or first_parent, new_headers, self._seal_check_random_sample_rate, ) except ValidationError as e: self.logger.warning( "Received invalid headers from %s, disconnecting: %s", peer, e) await peer.disconnect(DisconnectReason.SUBPROTOCOL_ERROR) break for header in new_headers: head_td += header.difficulty # Setting the latest header hash for the peer, before queuing header processing tasks self._target_header_hash = peer.head_info.head_hash yield new_headers last_received_header = new_headers[-1] self.sync_progress = self.sync_progress.update_current_block( last_received_header.block_number, ) start_at = BlockNumber(last_received_header.block_number + 1)
async def _sync_from_headers( self, task_integrator: BaseOrderedTaskPreparation[BlockHeader, Hash32], completion_check: Callable[[BlockHeader], Awaitable[bool]], ) -> AsyncIterator[Tuple[BlockHeader, ...]]: """ Watch for new headers to be added to the queue, and add the prerequisite tasks as they become available. """ get_headers_coro = self._header_syncer.new_sync_headers( HEADER_QUEUE_SIZE_TARGET) # Track the highest registered block header by number, purely for stats/logging highest_block_num = -1 async for headers in self.wait_iter(get_headers_coro): self._got_first_header.set() try: # We might end up with duplicates that can be safely ignored. # Likely scenario: switched which peer downloads headers, and the new peer isn't # aware of some of the in-progress headers task_integrator.register_tasks(headers, ignore_duplicates=True) except MissingDependency as missing_exc: # The parent of this header is not registered as a dependency yet. # Some reasons this might happen, in rough descending order of likelihood: # - a normal fork: the canonical head isn't the parent of the first header synced # - a bug: headers were queued out of order in new_sync_headers # - a bug: old headers were pruned out of the tracker, but not in DB yet # Skip over all headers found in db, (could happen with a long backtrack) completed_headers, new_headers = await self.wait( skip_complete_headers(headers, completion_check)) if completed_headers: self.logger.debug( "Chain sync skipping over (%d) already stored headers %s: %s..%s", len(completed_headers), humanize_integer_sequence(h.block_number for h in completed_headers), completed_headers[0], completed_headers[-1], ) if not new_headers: # no new headers to process, wait for next batch to come in continue # If the parent header doesn't exist yet, this is a legit bug instead of a fork, # let the HeaderNotFound exception bubble up try: parent_header = await self.wait( self.db.coro_get_block_header_by_hash( new_headers[0].parent_hash)) except HeaderNotFound: await self._log_missing_parent(new_headers[0], highest_block_num, missing_exc) # Nowhere to go from here, re-raise raise # If this isn't a trivial case, log it as a possible fork canonical_head = await self.db.coro_get_canonical_head() if canonical_head not in new_headers and canonical_head != parent_header: self.logger.info( "Received a header before processing its parent during regular sync. " "Canonical head is %s, the received header " "is %s, with parent %s. This might be a fork, importing to determine if it " "is the longest chain", canonical_head, new_headers[0], parent_header, ) # Set first header's parent as finished task_integrator.set_finished_dependency(parent_header) # Re-register the header tasks, which will now succeed task_integrator.register_tasks(new_headers, ignore_duplicates=True) # Clobber the headers variable so that the follow-up work below is consistent with # or without exceptions (ie~ only add headers not in DB to body/receipt queue) headers = new_headers yield headers # Don't race ahead of the database, by blocking when the persistance queue is too long await self._db_buffer_capacity.wait() highest_block_num = max(headers[-1].block_number, highest_block_num)
def test_humanize_integer_sequence(seq, expected): actual = humanize_integer_sequence(seq) assert actual == expected