async def test_header_gap_fill_detects_invalid_attempt(caplog, event_loop, event_bus, chaindb_with_gaps, chaindb_1000, chaindb_uncle): client_context = ChainContextFactory(headerdb__db=chaindb_with_gaps.db) server_context = ChainContextFactory(headerdb__db=chaindb_uncle.db) peer_pair = LatestETHPeerPairFactory( alice_peer_context=client_context, bob_peer_context=server_context, event_bus=event_bus, ) async with peer_pair as (client_peer, server_peer): client = SequentialHeaderChainGapSyncer( LatestTestChain(chaindb_with_gaps.db), chaindb_with_gaps, MockPeerPoolWithConnectedPeers([client_peer], event_bus=event_bus) ) server_peer_pool = MockPeerPoolWithConnectedPeers([server_peer], event_bus=event_bus) uncle_chaindb = AsyncChainDB(chaindb_uncle.db) async with run_peer_pool_event_server( event_bus, server_peer_pool, handler_type=ETHPeerPoolEventServer ), background_asyncio_service(ETHRequestServer( event_bus, TO_NETWORKING_BROADCAST_CONFIG, uncle_chaindb, )): server_peer.logger.info("%s is serving 1000 blocks", server_peer) client_peer.logger.info("%s is syncing up 1000", client_peer) # We check for 499 because 500 exists from the very beginning (the checkpoint) expected_block_number = 499 async with background_asyncio_service(client): try: await wait_for_head( chaindb_with_gaps, chaindb_1000.get_canonical_block_header_by_number(expected_block_number), sync_timeout=5, ) except asyncio.TimeoutError: assert "Attempted to fill gap with invalid header" in caplog.text # Monkey patch the uncle chaindb to effectively make the attacker peer # switch to the correct chain. uncle_chaindb.db = chaindb_1000.db await wait_for_head( chaindb_with_gaps, chaindb_1000.get_canonical_block_header_by_number(expected_block_number) ) else: raise AssertionError("Succeeded when it was expected to fail")
async def test_sequential_header_gapfill_syncer(request, event_loop, event_bus, chaindb_with_gaps, chaindb_1000): client_context = ChainContextFactory(headerdb__db=chaindb_with_gaps.db) server_context = ChainContextFactory(headerdb__db=chaindb_1000.db) peer_pair = LatestETHPeerPairFactory( alice_peer_context=client_context, bob_peer_context=server_context, event_bus=event_bus, ) async with peer_pair as (client_peer, server_peer): chain_with_gaps = LatestTestChain(chaindb_with_gaps.db) client = SequentialHeaderChainGapSyncer( chain_with_gaps, chaindb_with_gaps, MockPeerPoolWithConnectedPeers([client_peer], event_bus=event_bus) ) server_peer_pool = MockPeerPoolWithConnectedPeers([server_peer], event_bus=event_bus) async with run_peer_pool_event_server( event_bus, server_peer_pool, handler_type=ETHPeerPoolEventServer ), background_asyncio_service(ETHRequestServer( event_bus, TO_NETWORKING_BROADCAST_CONFIG, AsyncChainDB(chaindb_1000.db), )): server_peer.logger.info("%s is serving 1000 blocks", server_peer) client_peer.logger.info("%s is syncing up 1000", client_peer) async with background_asyncio_service(client): await wait_for_head( # We check for 499 because 500 is there from the very beginning (the checkpoint) chaindb_with_gaps, chaindb_1000.get_canonical_block_header_by_number(499) ) # This test is supposed to only fill in headers, so the following should fail. # If this ever succeeds it probably means the fixture was re-created with trivial # blocks and the test will fail and remind us what kind of fixture we want here. with pytest.raises(BlockNotFound): chain_with_gaps.get_canonical_block_by_number(499)
def __init__( self, chain: AsyncChainAPI, db: AtomicDatabaseAPI, chain_db: BaseAsyncChainDB, peer_pool: ETHPeerPool, event_bus: EndpointAPI, metrics_registry: MetricsRegistry, checkpoint: Checkpoint = None, force_beam_block_number: BlockNumber = None, enable_backfill: bool = True, enable_state_backfill: bool = True) -> None: self.logger = get_logger('trinity.sync.beam.chain.BeamSyncer') self.metrics_registry = metrics_registry self._body_for_header_exists = body_for_header_exists(chain_db, chain) if checkpoint is None: self._launch_strategy: SyncLaunchStrategyAPI = FromGenesisLaunchStrategy(chain_db) else: self._launch_strategy = FromCheckpointLaunchStrategy( chain_db, chain, checkpoint, peer_pool, ) self._header_syncer = ETHHeaderChainSyncer( chain, chain_db, peer_pool, self._launch_strategy, ) self._header_persister = HeaderOnlyPersist( self._header_syncer, chain_db, force_beam_block_number, self._launch_strategy, ) self._backfiller = BeamStateBackfill(db, peer_pool) if enable_state_backfill: self._queen_queue: QueenTrackerAPI = self._backfiller else: self._queen_queue = QueeningQueue(peer_pool) self._state_downloader = BeamDownloader( db, peer_pool, self._queen_queue, event_bus, ) self._data_hunter = MissingDataEventHandler( self._state_downloader, event_bus, self.metrics_registry, ) self._block_importer = BeamBlockImporter( chain, db, self._state_downloader, self._backfiller, event_bus, self.metrics_registry, ) self._launchpoint_header_syncer = HeaderLaunchpointSyncer(self._header_syncer) self._body_syncer = RegularChainBodySyncer( chain, chain_db, peer_pool, self._launchpoint_header_syncer, self._block_importer, ) self._manual_header_syncer = ManualHeaderSyncer() self._fast_syncer = RigorousFastChainBodySyncer( chain, chain_db, peer_pool, self._manual_header_syncer, ) self._header_backfill = SequentialHeaderChainGapSyncer(chain, chain_db, peer_pool) self._block_backfill = BodyChainGapSyncer(chain, chain_db, peer_pool) self._chain = chain self._enable_backfill = enable_backfill self._enable_state_backfill = enable_state_backfill
class BeamSyncer(Service): """ Organizes several moving parts to coordinate beam sync. Roughly: - Sync *only* headers up until you have caught up with a peer, ie~ the launchpoint - Launch a service responsible for serving event bus requests for missing state data - When you catch up with a peer, start downloading transactions needed to execute a block - At the launchpoint, switch to full block imports, with a custom importer This syncer relies on a seperately orchestrated beam sync component, which: - listens for DoStatelessBlockImport events - emits events when data is missing, like CollectMissingAccount - emits StatelessBlockImportDone when the block import is completed in the DB There is an option, currently only used for testing, to force beam sync at a particular block number (rather than trigger it when catching up with a peer). """ def __init__( self, chain: AsyncChainAPI, db: AtomicDatabaseAPI, chain_db: BaseAsyncChainDB, peer_pool: ETHPeerPool, event_bus: EndpointAPI, metrics_registry: MetricsRegistry, checkpoint: Checkpoint = None, force_beam_block_number: BlockNumber = None, enable_backfill: bool = True, enable_state_backfill: bool = True) -> None: self.logger = get_logger('trinity.sync.beam.chain.BeamSyncer') self.metrics_registry = metrics_registry self._body_for_header_exists = body_for_header_exists(chain_db, chain) if checkpoint is None: self._launch_strategy: SyncLaunchStrategyAPI = FromGenesisLaunchStrategy(chain_db) else: self._launch_strategy = FromCheckpointLaunchStrategy( chain_db, chain, checkpoint, peer_pool, ) self._header_syncer = ETHHeaderChainSyncer( chain, chain_db, peer_pool, self._launch_strategy, ) self._header_persister = HeaderOnlyPersist( self._header_syncer, chain_db, force_beam_block_number, self._launch_strategy, ) self._backfiller = BeamStateBackfill(db, peer_pool) if enable_state_backfill: self._queen_queue: QueenTrackerAPI = self._backfiller else: self._queen_queue = QueeningQueue(peer_pool) self._state_downloader = BeamDownloader( db, peer_pool, self._queen_queue, event_bus, ) self._data_hunter = MissingDataEventHandler( self._state_downloader, event_bus, self.metrics_registry, ) self._block_importer = BeamBlockImporter( chain, db, self._state_downloader, self._backfiller, event_bus, self.metrics_registry, ) self._launchpoint_header_syncer = HeaderLaunchpointSyncer(self._header_syncer) self._body_syncer = RegularChainBodySyncer( chain, chain_db, peer_pool, self._launchpoint_header_syncer, self._block_importer, ) self._manual_header_syncer = ManualHeaderSyncer() self._fast_syncer = RigorousFastChainBodySyncer( chain, chain_db, peer_pool, self._manual_header_syncer, ) self._header_backfill = SequentialHeaderChainGapSyncer(chain, chain_db, peer_pool) self._block_backfill = BodyChainGapSyncer(chain, chain_db, peer_pool) self._chain = chain self._enable_backfill = enable_backfill self._enable_state_backfill = enable_state_backfill async def run(self) -> None: try: await self._launch_strategy.fulfill_prerequisites() except asyncio.TimeoutError as exc: self.logger.exception( "Timed out while trying to fulfill prerequisites of " f"sync launch strategy: {exc} from {self._launch_strategy}" ) self.manager.cancel() return self.manager.run_daemon_child_service(self._block_importer) self.manager.run_daemon_child_service(self._header_syncer) # Kick off the body syncer early (it hangs on the launchpoint header syncer anyway) # It needs to start early because we want to "re-run" the header at the tip, # which it gets grumpy about. (it doesn't want to receive the canonical header tip # as a header to process) self.manager.run_daemon_child_service(self._body_syncer) # Launch the state syncer endpoint early self.manager.run_daemon_child_service(self._data_hunter) # Only persist headers at start async with background_asyncio_service(self._header_persister) as manager: await manager.wait_finished() # When header store exits, we have caught up # We want to trigger beam sync on the last block received, # not wait for the next one to be broadcast final_headers = self._header_persister.get_final_headers() # First, download block bodies for previous 6 blocks, for validation await self._download_blocks(final_headers[0]) # Now, tell the MissingDataEventHandler about the minimum acceptable block number for # data requests. This helps during pivots to quickly reject requests from old block imports self._data_hunter.minimum_beam_block_number = min( header.block_number for header in final_headers ) # Now let the beam sync importer kick in self._launchpoint_header_syncer.set_launchpoint_headers(final_headers) # We wait until beam sync has launched before starting backfill, because # they both request block bodies, but beam sync needs them urgently. if self._enable_backfill: # There's no chance to introduce new gaps after this point. Therefore we can run this # until it has filled all gaps and let it finish. self.manager.run_child_service(self._header_backfill) # In contrast, block gap fill needs to run indefinitely because of beam sync pivoting. self.manager.run_daemon_child_service(self._block_backfill) # Now we can check the lag (presumably ~0) and start backfill self.manager.run_daemon_task(self._monitor_historical_backfill) # Will start the state background service or the basic queen queue self.manager.run_child_service(self._queen_queue) # TODO wait until first header with a body comes in?... # Start state downloader service self.manager.run_daemon_child_service(self._state_downloader) # run sync until cancelled await self.manager.wait_finished() def get_block_count_lag(self) -> int: """ :return: the difference in block number between the currently importing block and the latest known block """ return self._body_syncer.get_block_count_lag() async def _download_blocks(self, before_header: BlockHeaderAPI) -> None: """ When importing a block, we need to validate uncles against the previous six blocks, so download those bodies and persist them to the database. """ parents_needed = FULL_BLOCKS_NEEDED_TO_START_BEAM self.logger.info( "Downloading %d block bodies for uncle validation, before %s", parents_needed, before_header, ) # select the recent ancestors to sync block bodies for parent_headers = tuple(reversed([ header async for header in self._get_ancestors(parents_needed, header=before_header) ])) # identify starting tip and headers with possible uncle conflicts for validation if len(parent_headers) < parents_needed: self.logger.info( "Collecting %d blocks to genesis for uncle validation", len(parent_headers), ) sync_from_tip = await self._chain.coro_get_canonical_block_header_by_number( BlockNumber(0) ) uncle_conflict_headers = parent_headers else: sync_from_tip = parent_headers[0] uncle_conflict_headers = parent_headers[1:] # check if we already have the blocks for the uncle conflict headers if await self._all_verification_bodies_present(uncle_conflict_headers): self.logger.debug("All needed block bodies are already available") else: # tell the header syncer to emit those headers self._manual_header_syncer.emit(uncle_conflict_headers) # tell the fast syncer which tip to start from self._fast_syncer.set_starting_tip(sync_from_tip) # run the fast syncer (which downloads block bodies and then exits) self.logger.info("Getting recent block data for uncle validation") async with background_asyncio_service(self._fast_syncer) as manager: await manager.wait_finished() # When this completes, we have all the uncles needed to validate self.logger.info("Have all data needed for Beam validation, continuing...") async def _get_ancestors(self, limit: int, header: BlockHeaderAPI) -> AsyncIterator[BlockHeaderAPI]: """ Return `limit` number of ancestor headers from the specified header. """ headers_returned = 0 while header.parent_hash != GENESIS_PARENT_HASH and headers_returned < limit: parent = await self._chain.coro_get_block_header_by_hash(header.parent_hash) yield parent headers_returned += 1 header = parent async def _all_verification_bodies_present( self, headers_with_potential_conflicts: Iterable[BlockHeaderAPI]) -> bool: for header in headers_with_potential_conflicts: if not await self._body_for_header_exists(header): return False return True async def _monitor_historical_backfill(self) -> None: while self.manager.is_running: await asyncio.sleep(PREDICTED_BLOCK_TIME) if self._block_backfill.get_manager().is_cancelled: return else: lag = self.get_block_count_lag() if lag >= PAUSE_BACKFILL_AT_LAG and not self._block_backfill.is_paused: self.logger.debug( "Pausing historical header/block sync because we lag %s blocks", lag, ) self._block_backfill.pause() self._header_backfill.pause() elif lag <= RESUME_BACKFILL_AT_LAG and self._block_backfill.is_paused: self.logger.debug( "Resuming historical header/block sync because we lag %s blocks", lag, ) self._block_backfill.resume() self._header_backfill.resume()
async def test_header_gap_fill_detects_invalid_attempt(caplog, event_loop, event_bus, chaindb_with_gaps, chaindb_1000, chaindb_uncle): client_context = ChainContextFactory(headerdb__db=chaindb_with_gaps.db) server_context = ChainContextFactory(headerdb__db=chaindb_uncle.db) peer_pair = LatestETHPeerPairFactory( alice_peer_context=client_context, bob_peer_context=server_context, event_bus=event_bus, ) async with peer_pair as (client_peer, server_peer): client_peer_pool = MockPeerPoolWithConnectedPeers([client_peer], event_bus=event_bus) client = SequentialHeaderChainGapSyncer( LatestTestChain(chaindb_with_gaps.db), chaindb_with_gaps, client_peer_pool) server_peer_pool = MockPeerPoolWithConnectedPeers([server_peer], event_bus=event_bus) uncle_chaindb = AsyncChainDB(chaindb_uncle.db) async with run_peer_pool_event_server( event_bus, server_peer_pool, handler_type=ETHPeerPoolEventServer ), background_asyncio_service( ETHRequestServer( event_bus, TO_NETWORKING_BROADCAST_CONFIG, uncle_chaindb, )): server_peer.logger.info("%s is serving 1000 blocks", server_peer) client_peer.logger.info("%s is syncing up 1000", client_peer) # We check for 499 because 500 exists from the very beginning (the checkpoint) expected_block_number = 499 final_header = chaindb_1000.get_canonical_block_header_by_number( expected_block_number) async with background_asyncio_service(client): try: await wait_for_head( chaindb_with_gaps, final_header, sync_timeout=5, ) except asyncio.TimeoutError: assert "Attempted to fill gap with invalid header" in caplog.text # Monkey patch the uncle chaindb to effectively make the attacker peer # switch to the correct chain. uncle_chaindb.db = chaindb_1000.db # The hack goes on: Now that our attacker peer turned friendly we may be stuck # waiting for a new skeleton peer forever. This isn't a real life scenario # because: a.) an attacker probably won't turn friendly and b.) new blocks and # peers will constantly yield new skeleton peers. # This ugly hack will tick the chain tip monitor as we simulate a joining peer. for subscriber in client_peer_pool._subscribers: subscriber.register_peer(client_peer) await wait_for_head( chaindb_with_gaps, final_header, sync_timeout=20, ) else: raise AssertionError( "Succeeded when it was expected to fail")
async def test_sequential_header_gapfill_syncer(request, event_loop, event_bus, chaindb_with_gaps, chaindb_1000): client_context = ChainContextFactory(headerdb__db=chaindb_with_gaps.db) server_context = ChainContextFactory(headerdb__db=chaindb_1000.db) peer_pair = LatestETHPeerPairFactory( alice_peer_context=client_context, bob_peer_context=server_context, event_bus=event_bus, ) async with peer_pair as (client_peer, server_peer): chain_with_gaps = LatestTestChain(chaindb_with_gaps.db) client = SequentialHeaderChainGapSyncer( chain_with_gaps, chaindb_with_gaps, MockPeerPoolWithConnectedPeers([client_peer], event_bus=event_bus)) # Ensure we use small chunks to be able to test pause/resume properly client._max_backfill_header_at_once = 100 server_peer_pool = MockPeerPoolWithConnectedPeers([server_peer], event_bus=event_bus) async with run_peer_pool_event_server( event_bus, server_peer_pool, handler_type=ETHPeerPoolEventServer ), background_asyncio_service( ETHRequestServer( event_bus, TO_NETWORKING_BROADCAST_CONFIG, AsyncChainDB(chaindb_1000.db), )): server_peer.logger.info("%s is serving 1000 blocks", server_peer) client_peer.logger.info("%s is syncing up 1000", client_peer) async with background_asyncio_service(client): # We intentionally only sync up to a block *below* the first gap to have a more # difficult scenario for pause/resume. We want to make sure we not only can pause # at the times where we synced up to an actual gap. Instead we want to be sure # we can pause after we synced up to the `_max_backfill_header_at_once` limit which # may be shorter than the actual gap in the chain. await wait_for_head( chaindb_with_gaps, chaindb_1000.get_canonical_block_header_by_number(100)) # Pause the syncer for a moment and check if it continued syncing (it should not!) client.pause() # Verify that we stopped the chain fast enough, before the gap was fully filled # This is a potential source of test flakiness with pytest.raises(HeaderNotFound): chaindb_with_gaps.get_canonical_block_header_by_number(249) await asyncio.sleep(1) # Make sure that the gap filling doesn't complete for a while. We could # theoretically get false positives if it's not paused but very slow to fill headers with pytest.raises(HeaderNotFound): chaindb_with_gaps.get_canonical_block_header_by_number(249) # Now resume syncing client.resume() await wait_for_head( # We check for 499 because 500 is there from the very beginning (the checkpoint) chaindb_with_gaps, chaindb_1000.get_canonical_block_header_by_number(499)) # This test is supposed to only fill in headers, so the following should fail. # If this ever succeeds it probably means the fixture was re-created with trivial # blocks and the test will fail and remind us what kind of fixture we want here. with pytest.raises(BlockNotFound): chain_with_gaps.get_canonical_block_by_number(499)