Esempio n. 1
0
async def test_get_nowait(tasks, get_size, expected_tasks):
    q = TaskQueue()
    await q.add(tasks)

    batch, tasks = q.get_nowait(get_size)

    assert tasks == expected_tasks

    q.complete(batch, tasks)

    assert all(task not in q for task in tasks)
Esempio n. 2
0
async def test_unfinished_tasks_readded():
    q = TaskQueue()
    await wait(q.add((2, 1, 3)))

    batch, tasks = await wait(q.get())

    q.complete(batch, (2, ))

    batch, tasks = await wait(q.get())

    assert tasks == (1, 3)
Esempio n. 3
0
async def test_cannot_complete_batch_with_wrong_task():
    q = TaskQueue()

    await wait(q.add((1, 2)))

    batch, tasks = await wait(q.get())

    # cannot complete a valid task with a task it wasn't given
    with pytest.raises(ValidationError):
        q.complete(batch, (3, 4))

    # partially invalid completion calls leave the valid task in an incomplete state
    with pytest.raises(ValidationError):
        q.complete(batch, (1, 3))

    assert 1 in q
Esempio n. 4
0
async def test_cannot_complete_batch_unless_pending():
    q = TaskQueue()

    await wait(q.add((1, 2)))

    # cannot complete a valid task without a batch id
    with pytest.raises(ValidationError):
        q.complete(None, (1, 2))

    assert 1 in q

    batch, tasks = await wait(q.get())

    # cannot complete a valid task with an invalid batch id
    with pytest.raises(ValidationError):
        q.complete(batch + 1, (1, 2))

    assert 1 in q
Esempio n. 5
0
async def test_unfinished_tasks_readded():
    q = TaskQueue()
    await wait(q.add((2, 1, 3)))

    assert q.num_pending() == 3

    batch, tasks = await wait(q.get())

    assert q.num_pending() == 0

    q.complete(batch, (2, ))

    assert q.num_pending() == 2

    batch, tasks = await wait(q.get())

    assert tasks == (1, 3)
    assert q.num_pending() == 0
Esempio n. 6
0
async def test_queue_contains_task_until_complete(tasks):
    q = TaskQueue(order_fn=id)

    first_task = tasks[0]

    assert first_task not in q

    await wait(q.add(tasks))

    assert first_task in q

    batch, pending_tasks = await wait(q.get())

    assert first_task in q

    q.complete(batch, pending_tasks)

    assert first_task not in q
Esempio n. 7
0
async def test_queue_size_reset_after_complete():
    q = TaskQueue(maxsize=2)

    await wait(q.add((1, 2)))

    batch, tasks = await wait(q.get())

    # there should not be room to add another task
    try:
        await wait(q.add((3, )))
    except asyncio.TimeoutError:
        pass
    else:
        raise AssertionError("should not be able to add task past maxsize")

    # do imaginary work here, then complete it all

    q.complete(batch, tasks)

    # there should be room to add more now
    await wait(q.add((3, )))
Esempio n. 8
0
class HeaderMeatSyncer(BaseService, PeerSubscriber, Generic[TChainPeer]):
    # We are only interested in peers entering or leaving the pool
    subscription_msg_types: FrozenSet[Type[CommandAPI]] = frozenset()
    msg_queue_maxsize = 2000

    _filler_header_tasks: TaskQueue[Tuple[BlockHeader, int, TChainPeer]]

    def __init__(self, chain: BaseAsyncChain, peer_pool: BaseChainPeerPool,
                 stitcher: HeaderStitcher, token: CancelToken) -> None:
        super().__init__(token=token)
        self._chain = chain
        self._stitcher = stitcher
        max_pending_fillers = 50
        self._filler_header_tasks = TaskQueue(
            max_pending_fillers,
            # order by block number of the parent header
            compose(attrgetter('block_number'), itemgetter(0)),
        )

        # queue up idle peers, ordered by speed that they return block bodies
        self._waiting_peers: WaitingPeers[TChainPeer] = WaitingPeers(
            BaseBlockHeaders)
        self._peer_pool = peer_pool

    def register_peer(self, peer: BasePeer) -> None:
        super().register_peer(peer)
        # when a new peer is added to the pool, add it to the idle peer list
        self._waiting_peers.put_nowait(peer)  # type: ignore

    async def schedule_segment(self, parent_header: BlockHeader,
                               gap_length: int,
                               skeleton_peer: TChainPeer) -> None:
        """
        :param parent_header: the parent of the gap to fill
        :param gap_length: how long is the header gap
        :param skeleton_peer: the peer that provided the parent_header - will not use to fill gaps
        """
        await self.wait(
            self._filler_header_tasks.add(
                ((parent_header, gap_length, skeleton_peer), )))

    async def _run(self) -> None:
        self.run_daemon_task(self._display_stats())
        with self.subscribe(self._peer_pool):
            await self.wait(self._match_header_dls_to_peers())

    async def _display_stats(self) -> None:
        q = self._filler_header_tasks
        while self.is_operational:
            await self.sleep(5)
            self.logger.debug(
                "Header Skeleton Gaps: active=%d queued=%d max=%d",
                q.num_in_progress(),
                len(q),
                q._maxsize,
            )

    async def _match_header_dls_to_peers(self) -> None:
        while self.is_operational:
            batch_id, (
                (parent_header, gap,
                 skeleton_peer), ) = await self._filler_header_tasks.get(1)

            await self._match_dl_to_peer(batch_id, parent_header, gap,
                                         skeleton_peer)

    async def _match_dl_to_peer(self, batch_id: int,
                                parent_header: BlockHeader, gap: int,
                                skeleton_peer: TChainPeer) -> None:
        def fail_task() -> None:
            self._filler_header_tasks.complete(batch_id, tuple())

        peer = await self._waiting_peers.get_fastest()

        def complete_task() -> None:
            self._filler_header_tasks.complete(
                batch_id, ((parent_header, gap, skeleton_peer), ))

        self.run_task(
            self._run_fetch_segment(peer, parent_header, gap, complete_task,
                                    fail_task))

    async def _run_fetch_segment(self, peer: TChainPeer,
                                 parent_header: BlockHeader, length: int,
                                 complete_task_fn: Callable[[], None],
                                 fail_task_fn: Callable[[], None]) -> None:
        try:
            completed_headers = await peer.wait(
                self._fetch_segment(peer, parent_header, length))
        except BaseP2PError as exc:
            self.logger.info(
                "Unexpected p2p err while downloading headers from %s: %s",
                peer, exc)
            self.logger.debug(
                "Problem downloading headers from peer, dropping...",
                exc_info=True)
            fail_task_fn()
        except OperationCancelled:
            self.logger.debug(
                "Service cancellation while fetching segment, dropping %s from queue",
                peer,
                exc_info=True,
            )
            fail_task_fn()
            raise
        except Exception as exc:
            self.logger.info(
                "Unexpected err while downloading headers from %s: %s", peer,
                exc)
            self.logger.debug(
                "Problem downloading headers from peer, dropping...",
                exc_info=True)
            fail_task_fn()
        else:
            if len(completed_headers) == length:
                # peer completed successfully, so have it get back in line for processing
                self._waiting_peers.put_nowait(peer)
                complete_task_fn()
            else:
                # peer didn't return enough results, wait a while before trying again
                delay = EMPTY_PEER_RESPONSE_PENALTY
                self.logger.debug(
                    "Pausing %s for %.1fs, for sending %d headers",
                    peer,
                    delay,
                    len(completed_headers),
                )
                self.call_later(delay, self._waiting_peers.put_nowait, peer)
                fail_task_fn()

    async def _fetch_segment(self, peer: TChainPeer,
                             parent_header: BlockHeader,
                             length: int) -> Tuple[BlockHeader, ...]:
        if length > peer.max_headers_fetch:
            raise ValidationError(
                f"Can't request {length} headers, because peer maximum is {peer.max_headers_fetch}"
            )
        headers = await self._request_headers(peer,
                                              parent_header.block_number + 1,
                                              length)
        if not headers:
            return tuple()
        elif headers[0].parent_hash != parent_header.hash:
            # Segment doesn't match leading peer, drop this peer
            # Eventually, we'll do something smarter, in case the leading peer is the divergent one
            self.logger.warning(
                "%s returned segment starting %s & parent %s, doesn't match %s, ignoring result...",
                peer,
                headers[0],
                humanize_hash(headers[0].parent_hash),
                parent_header,
            )
            return tuple()
        elif len(headers) != length:
            self.logger.debug(
                "Ignoring %d headers from %s, because wanted %d",
                len(headers),
                peer,
                length,
            )
            return tuple()
        else:
            try:
                await self.wait(
                    self._chain.coro_validate_chain(
                        parent_header,
                        headers,
                        SEAL_CHECK_RANDOM_SAMPLE_RATE,
                    ))
            except ValidationError as e:
                self.logger.warning(
                    "Received invalid header segment from %s against known parent %s, "
                    "disconnecting: %s",
                    peer,
                    parent_header,
                    e,
                )
                await peer.disconnect(DisconnectReason.subprotocol_error)
                return tuple()
            else:
                # stitch headers together in order, ignoring duplicates
                self._stitcher.register_tasks(headers, ignore_duplicates=True)
                return headers

    async def _request_headers(self, peer: TChainPeer, start_at: BlockNumber,
                               length: int) -> Tuple[BlockHeader, ...]:
        self.logger.debug("Requesting %d headers from %s", length, peer)
        try:
            return await peer.requests.get_block_headers(start_at,
                                                         length,
                                                         skip=0,
                                                         reverse=False)
        except TimeoutError as err:
            self.logger.debug("Timed out requesting %d headers from %s",
                              length, peer)
            return tuple()
        except CancelledError:
            self.logger.debug("Pending headers call to %r future cancelled",
                              peer)
            return tuple()
        except OperationCancelled:
            self.logger.debug2(
                "Pending headers call to %r operation cancelled", peer)
            return tuple()
        except PeerConnectionLost:
            self.logger.debug(
                "Peer went away, cancelling the headers request and moving on..."
            )
            return tuple()
        except Exception:
            self.logger.exception("Unknown error when getting headers")
            raise
Esempio n. 9
0
class BaseBodyChainSyncer(BaseService, PeerSubscriber):

    NO_PEER_RETRY_PAUSE = 5.0
    "If no peers are available for downloading the chain data, retry after this many seconds"

    # We are only interested in peers entering or leaving the pool
    subscription_msg_types: FrozenSet[Type[Command]] = frozenset()

    # This is a rather arbitrary value, but when the sync is operating normally we never see
    # the msg queue grow past a few hundred items, so this should be a reasonable limit for
    # now.
    msg_queue_maxsize = 2000

    tip_monitor_class = ETHChainTipMonitor

    _pending_bodies: Dict[BlockHeader, BlockBody]

    def __init__(self,
                 chain: BaseAsyncChain,
                 db: BaseAsyncChainDB,
                 peer_pool: ETHPeerPool,
                 token: CancelToken = None) -> None:
        super().__init__(token=token)
        self.chain = chain
        self.db = db
        self._peer_pool = peer_pool
        self._pending_bodies = {}

        # queue up any idle peers, in order of how fast they return block bodies
        self._body_peers: WaitingPeers[ETHPeer] = WaitingPeers(
            commands.BlockBodies)

        # Track incomplete block body download tasks
        # - arbitrarily allow several requests-worth of headers queued up
        # - try to get bodies from lower block numbers first
        buffer_size = MAX_BODIES_FETCH * REQUEST_BUFFER_MULTIPLIER
        self._block_body_tasks = TaskQueue(buffer_size,
                                           attrgetter('block_number'))

    async def _run(self) -> None:
        with self.subscribe(self._peer_pool):
            await self.events.cancelled.wait()

    async def _assign_body_download_to_peers(self) -> None:
        """
        Loop indefinitely, assigning idle peers to download any block bodies needed for syncing.
        """
        while self.is_operational:
            # from all the peers that are not currently downloading block bodies, get the fastest
            peer = await self.wait(self._body_peers.get_fastest())

            # get headers for bodies that we need to download, preferring lowest block number
            batch_id, headers = await self.wait(
                self._block_body_tasks.get(MAX_BODIES_FETCH))

            # schedule the body download and move on
            peer.run_task(
                self._run_body_download_batch(peer, batch_id, headers))

    async def _block_body_bundle_processing(
            self, bundles: Tuple[BlockBodyBundle, ...]) -> None:
        """
        By default, no body bundle processing is needed.

        Subclasses may choose to do some post-processing. Notably, fast sync immediately saves
        block body bundles to the database.
        """
        pass

    async def _run_body_download_batch(
            self, peer: ETHPeer, batch_id: int,
            all_headers: Tuple[BlockHeader, ...]) -> None:
        """
        Given a single batch retrieved from self._block_body_tasks, get as many of the block bodies
        as possible, and mark them as complete.
        """

        non_trivial_headers = tuple(header for header in all_headers
                                    if not _is_body_empty(header))
        trivial_headers = tuple(header for header in all_headers
                                if _is_body_empty(header))

        if trivial_headers:
            self.logger.debug2(
                "Found %d/%d trivial block bodies, skipping those requests",
                len(trivial_headers),
                len(all_headers),
            )

        # even if trivial_headers is (), assign it so the finally block can run, in case of error
        completed_headers = trivial_headers

        try:
            if non_trivial_headers:
                bundles, received_headers = await peer.wait(
                    self._get_block_bodies(peer, non_trivial_headers))
                await self._block_body_bundle_processing(bundles)
                completed_headers = trivial_headers + received_headers

        except BaseP2PError as exc:
            self.logger.info(
                "Unexpected p2p perror while downloading body from peer: %s",
                exc)
            self.logger.debug(
                "Problem downloading body from peer, dropping...",
                exc_info=True)
        else:
            if len(non_trivial_headers) == 0:
                # peer had nothing to do, so have it get back in line for processing
                self._body_peers.put_nowait(peer)
            elif len(completed_headers) > 0:
                # peer completed with at least 1 result, so have it get back in line for processing
                self._body_peers.put_nowait(peer)
            else:
                # peer returned no results, wait a while before trying again
                delay = EMPTY_PEER_RESPONSE_PENALTY
                self.logger.debug(
                    "Pausing %s for %.1fs, for sending 0 block bodies", peer,
                    delay)
                loop = self.get_event_loop()
                loop.call_later(delay,
                                partial(self._body_peers.put_nowait, peer))
        finally:
            self._mark_body_download_complete(batch_id, completed_headers)

    def _mark_body_download_complete(
            self, batch_id: int, completed_headers: Tuple[BlockHeader,
                                                          ...]) -> None:
        self._block_body_tasks.complete(batch_id, completed_headers)

    async def _get_block_bodies(
        self,
        peer: ETHPeer,
        headers: Tuple[BlockHeader, ...],
    ) -> Tuple[Tuple[BlockBodyBundle, ...], Tuple[BlockHeader, ...]]:
        """
        Request and return block bodies, pairing them with the associated headers.
        Store the bodies for later use, during block import (or persist).

        Note the difference from _request_block_bodies, which only issues the request,
        and doesn't pair the results with the associated block headers that were successfully
        delivered.
        """
        block_body_bundles = await self.wait(
            self._request_block_bodies(peer, headers))

        if len(block_body_bundles) == 0:
            self.logger.debug(
                "Got block bodies for 0/%d headers from %s, from %r..%r",
                len(headers),
                peer,
                headers[0],
                headers[-1],
            )
            return tuple(), tuple()

        bodies_by_root = {
            (transaction_root, uncles_hash): block_body
            for block_body, (transaction_root,
                             _), uncles_hash in block_body_bundles
        }

        header_roots = {
            header: (header.transaction_root, header.uncles_hash)
            for header in headers
        }

        completed_header_roots = valfilter(lambda root: root in bodies_by_root,
                                           header_roots)

        completed_headers = tuple(completed_header_roots.keys())

        # store bodies for later usage, during block import
        pending_bodies = {
            header: bodies_by_root[root]
            for header, root in completed_header_roots.items()
        }
        self._pending_bodies = merge(self._pending_bodies, pending_bodies)

        self.logger.debug(
            "Got block bodies for %d/%d headers from %s, from %r..%r",
            len(completed_header_roots),
            len(headers),
            peer,
            headers[0],
            headers[-1],
        )

        return block_body_bundles, completed_headers

    async def _request_block_bodies(
            self, peer: ETHPeer,
            batch: Tuple[BlockHeader, ...]) -> Tuple[BlockBodyBundle, ...]:
        """
        Requests the batch of block bodies from the given peer, returning the
        returned block bodies data, or an empty tuple on an error.
        """
        self.logger.debug("Requesting block bodies for %d headers from %s",
                          len(batch), peer)
        try:
            block_body_bundles = await peer.requests.get_block_bodies(batch)
        except TimeoutError as err:
            self.logger.debug(
                "Timed out requesting block bodies for %d headers from %s",
                len(batch),
                peer,
            )
            return tuple()
        except CancelledError:
            self.logger.debug(
                "Pending block bodies call to %r future cancelled", peer)
            return tuple()
        except OperationCancelled:
            self.logger.debug2(
                "Pending block bodies call to %r operation cancelled", peer)
            return tuple()
        except PeerConnectionLost:
            self.logger.debug(
                "Peer went away, cancelling the block body request and moving on..."
            )
            return tuple()
        except Exception:
            self.logger.exception("Unknown error when getting block bodies")
            raise

        return block_body_bundles
Esempio n. 10
0
class FastChainBodySyncer(BaseBodyChainSyncer):
    """
    Sync with the Ethereum network by fetching block headers/bodies and storing them in our DB.

    Here, the run() method returns as soon as we complete a sync with the peer that announced the
    highest TD, at which point we must run the StateDownloader to fetch the state for our chain
    head.
    """
    def __init__(self,
                 chain: BaseAsyncChain,
                 db: BaseAsyncChainDB,
                 peer_pool: ETHPeerPool,
                 header_syncer: HeaderSyncerAPI,
                 token: CancelToken = None) -> None:
        super().__init__(chain, db, peer_pool, token)

        # queue up any idle peers, in order of how fast they return receipts
        self._receipt_peers: WaitingPeers[ETHPeer] = WaitingPeers(
            commands.Receipts)

        self._header_syncer = header_syncer

        # Track receipt download tasks
        # - arbitrarily allow several requests-worth of headers queued up
        # - try to get receipts from lower block numbers first
        buffer_size = MAX_RECEIPTS_FETCH * REQUEST_BUFFER_MULTIPLIER
        self._receipt_tasks = TaskQueue(buffer_size,
                                        attrgetter('block_number'))

        # track when both bodies and receipts are collected, so that blocks can be persisted
        self._block_persist_tracker = OrderedTaskPreparation(
            BlockPersistPrereqs,
            id_extractor=attrgetter('hash'),
            # make sure that a block is not persisted until the parent block is persisted
            dependency_extractor=attrgetter('parent_hash'),
        )
        # Track whether the fast chain syncer completed its goal
        self.is_complete = False

    async def _run(self) -> None:
        head = await self.wait(self.db.coro_get_canonical_head())
        self.tracker = ChainSyncPerformanceTracker(head)

        self._block_persist_tracker.set_finished_dependency(head)
        self.run_daemon_task(self._launch_prerequisite_tasks())
        self.run_daemon_task(self._assign_receipt_download_to_peers())
        self.run_daemon_task(self._assign_body_download_to_peers())
        self.run_daemon_task(self._persist_ready_blocks())
        self.run_daemon_task(self._display_stats())
        await super()._run()

    def register_peer(self, peer: BasePeer) -> None:
        # when a new peer is added to the pool, add it to the idle peer lists
        super().register_peer(peer)
        peer = cast(ETHPeer, peer)
        self._body_peers.put_nowait(peer)
        self._receipt_peers.put_nowait(peer)

    async def _launch_prerequisite_tasks(self) -> None:
        """
        Watch for new headers to be added to the queue, and add the prerequisite
        tasks as they become available.
        """
        async for headers in self.wait_iter(
                self._header_syncer.new_sync_headers()):
            try:
                # We might end up with duplicates that can be safely ignored.
                # Likely scenario: switched which peer downloads headers, and the new peer isn't
                # aware of some of the in-progress headers
                self._block_persist_tracker.register_tasks(
                    headers, ignore_duplicates=True)
            except MissingDependency:
                # The parent of this header is not registered as a dependency yet.
                # Some reasons this might happen, in rough descending order of likelihood:
                #   - a normal fork: the canonical head isn't the parent of the first header synced
                #   - a bug: the DB has inconsistent state, say saved headers but not block bodies
                #   - a bug: headers were queued out of order in new_sync_headers

                # If the parent header doesn't exist yet, this is a legit bug instead of a fork,
                # let the HeaderNotFound exception bubble up
                parent_header = await self.wait(
                    self.db.coro_get_block_header_by_hash(
                        headers[0].parent_hash))

                # This appears to be a fork, since the parent header is persisted,
                self.logger.info(
                    "Fork found while starting fast sync. Canonical head was %s, but the next "
                    "header %s, has parent %s. Importing fork in case it's the longest chain.",
                    await self.db.coro_get_canonical_head(),
                    headers[0],
                    parent_header,
                )
                # Set first header's parent as finished
                self._block_persist_tracker.set_finished_dependency(
                    parent_header)
                # Re-register the header tasks, which will now succeed
                self._block_persist_tracker.register_tasks(headers)

            # Sometimes duplicates are added to the queue, when switching from one sync to another.
            # We can simply ignore them.
            new_body_tasks = tuple(h for h in headers
                                   if h not in self._block_body_tasks)
            new_receipt_tasks = tuple(h for h in headers
                                      if h not in self._receipt_tasks)

            # if any one of the output queues gets full, hang until there is room
            await self.wait(
                asyncio.gather(
                    self._block_body_tasks.add(new_body_tasks),
                    self._receipt_tasks.add(new_receipt_tasks),
                ))

    async def _display_stats(self) -> None:
        while self.is_operational:
            await self.sleep(5)
            self.logger.debug(
                "(in progress, queued, max size) of bodies, receipts: %r",
                [(q.num_in_progress(), len(q), q._maxsize) for q in (
                    self._block_body_tasks,
                    self._receipt_tasks,
                )],
            )

            stats = self.tracker.report()
            utcnow = int(datetime.datetime.utcnow().timestamp())
            head_age = utcnow - stats.latest_head.timestamp
            self.logger.info(
                ("blks=%-4d  "
                 "txs=%-5d  "
                 "bps=%-3d  "
                 "tps=%-4d  "
                 "elapsed=%0.1f  "
                 "head=#%d %s  "
                 "age=%s"),
                stats.num_blocks,
                stats.num_transactions,
                stats.blocks_per_second,
                stats.transactions_per_second,
                stats.elapsed,
                stats.latest_head.block_number,
                humanize_hash(stats.latest_head.hash),
                humanize_elapsed(head_age),
            )

    async def _persist_ready_blocks(self) -> None:
        """
        Persist blocks as soon as all their prerequisites are done: body and receipt downloads.
        Persisting must happen in order, so that the block's parent has already been persisted.

        Also, determine if fast sync with this peer should end, having reached (or surpassed)
        its target hash. If so, shut down this service.
        """
        while self.is_operational:
            # jhis tracker waits for all prerequisites to be complete, and returns headers in
            # order, so that each header's parent is already persisted.
            completed_headers = await self.wait(
                self._block_persist_tracker.ready_tasks())

            await self.wait(self._persist_blocks(completed_headers))

            target_hash = self._header_syncer.get_target_header_hash()

            if target_hash in [header.hash for header in completed_headers]:
                # exit the service when reaching the target hash
                self._mark_complete()
                break

    def _mark_complete(self) -> None:
        self.is_complete = True
        self.cancel_nowait()

    async def _persist_blocks(self, headers: Tuple[BlockHeader, ...]) -> None:
        """
        Persist blocks for the given headers, directly to the database

        :param headers: headers for which block bodies and receipts have been downloaded
        """
        for header in headers:
            vm_class = self.chain.get_vm_class(header)
            block_class = vm_class.get_block_class()

            if _is_body_empty(header):
                transactions: List[BaseTransaction] = []
                uncles: List[BlockHeader] = []
            else:
                body = self._pending_bodies.pop(header)
                uncles = body.uncles

                # transaction data was already persisted in _block_body_bundle_processing, but
                # we need to include the transactions for them to be added to the hash->txn lookup
                tx_class = block_class.get_transaction_class()
                transactions = [
                    tx_class.from_base_transaction(tx)
                    for tx in body.transactions
                ]

                # record progress in the tracker
                self.tracker.record_transactions(len(transactions))

            block = block_class(header, transactions, uncles)
            await self.wait(self.db.coro_persist_block(block))
            self.tracker.set_latest_head(header)

    async def _assign_receipt_download_to_peers(self) -> None:
        """
        Loop indefinitely, assigning idle peers to download receipts needed for syncing.
        """
        while self.is_operational:
            # from all the peers that are not currently downloading receipts, get the fastest
            peer = await self.wait(self._receipt_peers.get_fastest())

            # get headers for receipts that we need to download, preferring lowest block number
            batch_id, headers = await self.wait(
                self._receipt_tasks.get(MAX_RECEIPTS_FETCH))

            # schedule the receipt download and move on
            peer.run_task(
                self._run_receipt_download_batch(peer, batch_id, headers))

    def _mark_body_download_complete(
            self, batch_id: int, completed_headers: Tuple[BlockHeader,
                                                          ...]) -> None:
        super()._mark_body_download_complete(batch_id, completed_headers)
        self._block_persist_tracker.finish_prereq(
            BlockPersistPrereqs.StoreBlockBodies,
            completed_headers,
        )

    async def _run_receipt_download_batch(
            self, peer: ETHPeer, batch_id: int, headers: Tuple[BlockHeader,
                                                               ...]) -> None:
        """
        Given a single batch retrieved from self._receipt_tasks, get as many of the receipt bundles
        as possible, and mark them as complete.
        """
        # If there is an exception during _process_receipts, prepare to mark the task as finished
        # with no headers collected:
        completed_headers: Tuple[BlockHeader, ...] = tuple()
        try:
            completed_headers = await peer.wait(
                self._process_receipts(peer, headers))

            self._block_persist_tracker.finish_prereq(
                BlockPersistPrereqs.StoreReceipts,
                completed_headers,
            )
        except BaseP2PError as exc:
            self.logger.info(
                "Unexpected p2p perror while downloading receipt from peer: %s",
                exc)
            self.logger.debug(
                "Problem downloading receipt from peer, dropping...",
                exc_info=True)
        else:
            # peer completed successfully, so have it get back in line for processing
            if len(completed_headers) > 0:
                # peer completed successfully, so have it get back in line for processing
                self._receipt_peers.put_nowait(peer)
            else:
                # peer returned no results, wait a while before trying again
                delay = EMPTY_PEER_RESPONSE_PENALTY
                self.logger.debug(
                    "Pausing %s for %.1fs, for sending 0 receipts", peer,
                    delay)
                self.call_later(delay, self._receipt_peers.put_nowait, peer)
        finally:
            self._receipt_tasks.complete(batch_id, completed_headers)

    async def _block_body_bundle_processing(
            self, bundles: Tuple[BlockBodyBundle, ...]) -> None:
        """
        Fast sync writes all the block body bundle data directly to the database,
        in order to make it... fast.
        """
        for (_, (_, trie_data_dict), _) in bundles:
            await self.wait(self.db.coro_persist_trie_data_dict(trie_data_dict)
                            )

    async def _process_receipts(
            self, peer: ETHPeer,
            all_headers: Tuple[BlockHeader, ...]) -> Tuple[BlockHeader, ...]:
        """
        Downloads and persists the receipts for the given set of block headers.
        Some receipts may be trivial, having a blank root hash, and will not be requested.

        :param peer: to issue the receipt request to
        :param all_headers: attempt to get receipts for as many of these headers as possible
        :return: the headers for receipts that were successfully downloaded (or were trivial)
        """
        # Post-Byzantium blocks may have identical receipt roots (e.g. when they have the same
        # number of transactions and all succeed/failed: ropsten blocks 2503212 and 2503284),
        # so we do this to avoid requesting the same receipts multiple times.

        # combine headers with the same receipt root, so we can mark them as completed, later
        receipt_root_to_headers = groupby(attrgetter('receipt_root'),
                                          all_headers)

        # Ignore headers that have an empty receipt root
        trivial_headers = tuple(
            receipt_root_to_headers.pop(BLANK_ROOT_HASH, tuple()))

        # pick one of the headers for each missing receipt root
        unique_headers_needed = tuple(
            first(headers)
            for root, headers in receipt_root_to_headers.items())

        if not unique_headers_needed:
            return trivial_headers

        receipt_bundles = await self._request_receipts(peer,
                                                       unique_headers_needed)

        if not receipt_bundles:
            return trivial_headers

        try:
            await self._validate_receipts(unique_headers_needed,
                                          receipt_bundles)
        except ValidationError as err:
            self.logger.info(
                "Disconnecting from %s: sent invalid receipt: %s",
                peer,
                err,
            )
            await peer.disconnect(DisconnectReason.bad_protocol)
            return trivial_headers

        # process all of the returned receipts, storing their trie data
        # dicts in the database
        receipts, trie_roots_and_data_dicts = zip(*receipt_bundles)
        receipt_roots, trie_data_dicts = zip(*trie_roots_and_data_dicts)
        for trie_data in trie_data_dicts:
            await self.wait(self.db.coro_persist_trie_data_dict(trie_data))

        # Identify which headers have the receipt roots that are now complete.
        completed_header_groups = tuple(
            headers for root, headers in receipt_root_to_headers.items()
            if root in receipt_roots)
        newly_completed_headers = tuple(concat(completed_header_groups))

        self.logger.debug(
            "Got receipts for %d/%d headers from %s, with %d trivial headers",
            len(newly_completed_headers),
            len(all_headers) - len(trivial_headers),
            peer,
            len(trivial_headers),
        )
        return newly_completed_headers + trivial_headers

    async def _validate_receipts(
            self, headers: Tuple[BlockHeader, ...],
            receipt_bundles: Tuple[ReceiptBundle, ...]) -> None:

        header_by_root = {
            header.receipt_root: header
            for header in headers if not _is_receipts_empty(header)
        }
        receipts_by_root = {
            receipt_root: receipts
            for (receipts, (receipt_root, _)) in receipt_bundles
            if receipt_root != BLANK_ROOT_HASH
        }
        for receipt_root, header in header_by_root.items():
            if receipt_root not in receipts_by_root:
                # this receipt group was not returned by the peer, skip validation
                continue
            for receipt in receipts_by_root[receipt_root]:
                await self.chain.coro_validate_receipt(receipt, header)

    async def _request_receipts(
            self, peer: ETHPeer,
            batch: Tuple[BlockHeader, ...]) -> Tuple[ReceiptBundle, ...]:
        """
        Requests the batch of receipts from the given peer, returning the
        received receipt data.
        """
        self.logger.debug("Requesting receipts for %d headers from %s",
                          len(batch), peer)
        try:
            receipt_bundles = await peer.requests.get_receipts(batch)
        except TimeoutError as err:
            self.logger.debug(
                "Timed out requesting receipts for %d headers from %s",
                len(batch),
                peer,
            )
            return tuple()
        except CancelledError:
            self.logger.debug("Pending receipts call to %r future cancelled",
                              peer)
            return tuple()
        except OperationCancelled:
            self.logger.debug2(
                "Pending receipts call to %r operation cancelled", peer)
            return tuple()
        except PeerConnectionLost:
            self.logger.debug(
                "Peer went away, cancelling the receipts request and moving on..."
            )
            return tuple()
        except Exception:
            self.logger.exception("Unknown error when getting receipts")
            raise

        if not receipt_bundles:
            return tuple()

        return receipt_bundles
Esempio n. 11
0
async def test_two_pending_adds_one_release():
    q = TaskQueue(2)

    asyncio.ensure_future(q.add((3, 1, 2)))

    # wait for ^ to run and pause
    await asyncio.sleep(0)
    # note that the highest-priority items are queued first
    assert 1 in q
    assert 2 in q
    assert 3 not in q

    # two tasks are queued, none are started
    assert len(q) == 2
    assert q.num_in_progress() == 0

    asyncio.ensure_future(q.add((0, 4)))
    # wait for ^ to run and pause
    await asyncio.sleep(0)

    # task consumer 1 completes the first two pending
    batch, tasks = await wait(q.get())
    assert tasks == (1, 2)

    # both tasks started
    assert len(q) == 2
    assert q.num_in_progress() == 2

    q.complete(batch, tasks)

    # tasks are drained, but new ones aren't added yet...
    assert q.num_in_progress() == 0
    assert len(q) == 0

    await asyncio.sleep(0.01)

    # Now the tasks are added
    assert q.num_in_progress() == 0
    assert len(q) == 2

    # task consumer 2 gets the next two, in priority order
    batch, tasks = await wait(q.get())

    assert len(tasks) == 2

    assert tasks == (0, 3)

    assert q.num_in_progress() == 2
    assert len(q) == 2

    # clean up, so the pending get() call can complete
    q.complete(batch, tasks)

    # All current tasks finished
    assert q.num_in_progress() == 0

    await asyncio.sleep(0)

    # only task 4 remains
    assert q.num_in_progress() == 0
    assert len(q) == 1
Esempio n. 12
0
class BaseBodyChainSyncer(BaseService, PeerSubscriber):

    NO_PEER_RETRY_PAUSE = 5.0
    "If no peers are available for downloading the chain data, retry after this many seconds"

    # We are only interested in peers entering or leaving the pool
    subscription_msg_types: FrozenSet[Type[Command]] = frozenset()

    # This is a rather arbitrary value, but when the sync is operating normally we never see
    # the msg queue grow past a few hundred items, so this should be a reasonable limit for
    # now.
    msg_queue_maxsize = 2000

    tip_monitor_class = ETHChainTipMonitor

    _pending_bodies: Dict[BlockHeader, BlockBody]

    def __init__(self,
                 chain: BaseAsyncChain,
                 db: BaseAsyncChainDB,
                 peer_pool: ETHPeerPool,
                 header_syncer: HeaderSyncerAPI,
                 token: CancelToken = None) -> None:
        super().__init__(token=token)
        self.chain = chain
        self.db = db
        self._peer_pool = peer_pool
        self._pending_bodies = {}

        self._header_syncer = header_syncer

        # queue up any idle peers, in order of how fast they return block bodies
        self._body_peers: WaitingPeers[ETHPeer] = WaitingPeers(
            commands.BlockBodies)

        # Track incomplete block body download tasks
        # - arbitrarily allow several requests-worth of headers queued up
        # - try to get bodies from lower block numbers first
        buffer_size = MAX_BODIES_FETCH * REQUEST_BUFFER_MULTIPLIER
        self._block_body_tasks = TaskQueue(buffer_size,
                                           attrgetter('block_number'))

        # Track if there is capacity for more block importing
        self._db_buffer_capacity = asyncio.Event()
        self._db_buffer_capacity.set()  # start with capacity

        # Track if any headers have been received yet
        self._got_first_header = asyncio.Event()

    async def _run(self) -> None:
        with self.subscribe(self._peer_pool):
            await self.cancellation()

    async def _sync_from_headers(
        self,
        task_integrator: BaseOrderedTaskPreparation[BlockHeader, Hash32],
        completion_check: Callable[[BlockHeader], Awaitable[bool]],
    ) -> AsyncIterator[Tuple[BlockHeader, ...]]:
        """
        Watch for new headers to be added to the queue, and add the prerequisite
        tasks as they become available.
        """
        get_headers_coro = self._header_syncer.new_sync_headers(
            HEADER_QUEUE_SIZE_TARGET)

        # Track the highest registered block header by number, purely for stats/logging
        highest_block_num = -1

        async for headers in self.wait_iter(get_headers_coro):
            self._got_first_header.set()
            try:
                # We might end up with duplicates that can be safely ignored.
                # Likely scenario: switched which peer downloads headers, and the new peer isn't
                # aware of some of the in-progress headers
                task_integrator.register_tasks(headers, ignore_duplicates=True)
            except MissingDependency as missing_exc:
                # The parent of this header is not registered as a dependency yet.
                # Some reasons this might happen, in rough descending order of likelihood:
                #   - a normal fork: the canonical head isn't the parent of the first header synced
                #   - a bug: headers were queued out of order in new_sync_headers
                #   - a bug: old headers were pruned out of the tracker, but not in DB yet

                # Skip over all headers found in db, (could happen with a long backtrack)
                completed_headers, new_headers = await self.wait(
                    skip_complete_headers(headers, completion_check))
                if completed_headers:
                    self.logger.debug(
                        "Chain sync skipping over (%d) already stored headers %s: %s..%s",
                        len(completed_headers),
                        humanize_integer_sequence(h.block_number
                                                  for h in completed_headers),
                        completed_headers[0],
                        completed_headers[-1],
                    )
                    if not new_headers:
                        # no new headers to process, wait for next batch to come in
                        continue

                # If the parent header doesn't exist yet, this is a legit bug instead of a fork,
                # let the HeaderNotFound exception bubble up
                try:
                    parent_header = await self.wait(
                        self.db.coro_get_block_header_by_hash(
                            new_headers[0].parent_hash))
                except HeaderNotFound:
                    await self._log_missing_parent(new_headers[0],
                                                   highest_block_num,
                                                   missing_exc)

                    # Nowhere to go from here, re-raise
                    raise

                # If this isn't a trivial case, log it as a possible fork
                canonical_head = await self.db.coro_get_canonical_head()
                if canonical_head not in new_headers and canonical_head != parent_header:
                    self.logger.info(
                        "Received a header before processing its parent during regular sync. "
                        "Canonical head is %s, the received header "
                        "is %s, with parent %s. This might be a fork, importing to determine if it "
                        "is the longest chain",
                        canonical_head,
                        new_headers[0],
                        parent_header,
                    )

                # Set first header's parent as finished
                task_integrator.set_finished_dependency(parent_header)
                # Re-register the header tasks, which will now succeed
                task_integrator.register_tasks(new_headers,
                                               ignore_duplicates=True)
                # Clobber the headers variable so that the follow-up work below is consistent with
                # or without exceptions (ie~ only add headers not in DB to body/receipt queue)
                headers = new_headers

            yield headers

            # Don't race ahead of the database, by blocking when the persistance queue is too long
            await self._db_buffer_capacity.wait()

            highest_block_num = max(headers[-1].block_number,
                                    highest_block_num)

    async def _assign_body_download_to_peers(self) -> None:
        """
        Loop indefinitely, assigning idle peers to download any block bodies needed for syncing.
        """
        while self.is_operational:
            # from all the peers that are not currently downloading block bodies, get the fastest
            peer = await self.wait(self._body_peers.get_fastest())

            # get headers for bodies that we need to download, preferring lowest block number
            batch_id, headers = await self.wait(
                self._block_body_tasks.get(MAX_BODIES_FETCH))

            # schedule the body download and move on
            peer.run_task(
                self._run_body_download_batch(peer, batch_id, headers))

    async def _block_body_bundle_processing(
            self, bundles: Tuple[BlockBodyBundle, ...]) -> None:
        """
        By default, no body bundle processing is needed.

        Subclasses may choose to do some post-processing. Notably, fast sync immediately saves
        block body bundles to the database.
        """
        pass

    async def _run_body_download_batch(
            self, peer: ETHPeer, batch_id: int,
            all_headers: Tuple[BlockHeader, ...]) -> None:
        """
        Given a single batch retrieved from self._block_body_tasks, get as many of the block bodies
        as possible, and mark them as complete.
        """

        non_trivial_headers = tuple(header for header in all_headers
                                    if not _is_body_empty(header))
        trivial_headers = tuple(header for header in all_headers
                                if _is_body_empty(header))

        if trivial_headers:
            self.logger.debug2(
                "Found %d/%d trivial block bodies, skipping those requests",
                len(trivial_headers),
                len(all_headers),
            )

        # even if trivial_headers is (), assign it so the finally block can run, in case of error
        completed_headers = trivial_headers

        try:
            if non_trivial_headers:
                bundles, received_headers = await peer.wait(
                    self._get_block_bodies(peer, non_trivial_headers))
                await self._block_body_bundle_processing(bundles)
                completed_headers = trivial_headers + received_headers

        except BaseP2PError as exc:
            self.logger.info(
                "Unexpected p2p perror while downloading body from peer: %s",
                exc)
            self.logger.debug(
                "Problem downloading body from peer, dropping...",
                exc_info=True)
        else:
            if len(non_trivial_headers) == 0:
                # peer had nothing to do, so have it get back in line for processing
                self._body_peers.put_nowait(peer)
            elif len(completed_headers) > 0:
                # peer completed with at least 1 result, so have it get back in line for processing
                self._body_peers.put_nowait(peer)
            else:
                # peer returned no results, wait a while before trying again
                delay = EMPTY_PEER_RESPONSE_PENALTY
                self.logger.debug(
                    "Pausing %s for %.1fs, for sending 0 block bodies", peer,
                    delay)
                loop = self.get_event_loop()
                loop.call_later(delay,
                                partial(self._body_peers.put_nowait, peer))
        finally:
            self._mark_body_download_complete(batch_id, completed_headers)

    def _mark_body_download_complete(
            self, batch_id: int, completed_headers: Tuple[BlockHeader,
                                                          ...]) -> None:
        self._block_body_tasks.complete(batch_id, completed_headers)

    async def _get_block_bodies(
        self,
        peer: ETHPeer,
        headers: Tuple[BlockHeader, ...],
    ) -> Tuple[Tuple[BlockBodyBundle, ...], Tuple[BlockHeader, ...]]:
        """
        Request and return block bodies, pairing them with the associated headers.
        Store the bodies for later use, during block import (or persist).

        Note the difference from _request_block_bodies, which only issues the request,
        and doesn't pair the results with the associated block headers that were successfully
        delivered.
        """
        block_body_bundles = await self.wait(
            self._request_block_bodies(peer, headers))

        if len(block_body_bundles) == 0:
            self.logger.debug(
                "Got block bodies for 0/%d headers from %s, from %r..%r",
                len(headers),
                peer,
                headers[0],
                headers[-1],
            )
            return tuple(), tuple()

        bodies_by_root = {
            (transaction_root, uncles_hash): block_body
            for block_body, (transaction_root,
                             _), uncles_hash in block_body_bundles
        }

        header_roots = {
            header: (header.transaction_root, header.uncles_hash)
            for header in headers
        }

        completed_header_roots = valfilter(lambda root: root in bodies_by_root,
                                           header_roots)

        completed_headers = tuple(completed_header_roots.keys())

        # store bodies for later usage, during block import
        pending_bodies = {
            header: bodies_by_root[root]
            for header, root in completed_header_roots.items()
        }
        self._pending_bodies = merge(self._pending_bodies, pending_bodies)

        self.logger.debug(
            "Got block bodies for %d/%d headers from %s, from %r..%r",
            len(completed_header_roots),
            len(headers),
            peer,
            headers[0],
            headers[-1],
        )

        return block_body_bundles, completed_headers

    async def _request_block_bodies(
            self, peer: ETHPeer,
            batch: Tuple[BlockHeader, ...]) -> Tuple[BlockBodyBundle, ...]:
        """
        Requests the batch of block bodies from the given peer, returning the
        returned block bodies data, or an empty tuple on an error.
        """
        self.logger.debug("Requesting block bodies for %d headers from %s",
                          len(batch), peer)
        try:
            block_body_bundles = await peer.requests.get_block_bodies(batch)
        except TimeoutError as err:
            self.logger.debug(
                "Timed out requesting block bodies for %d headers from %s",
                len(batch),
                peer,
            )
            return tuple()
        except CancelledError:
            self.logger.debug(
                "Pending block bodies call to %r future cancelled", peer)
            return tuple()
        except OperationCancelled:
            self.logger.debug2(
                "Pending block bodies call to %r operation cancelled", peer)
            return tuple()
        except PeerConnectionLost:
            self.logger.debug(
                "Peer went away, cancelling the block body request and moving on..."
            )
            return tuple()
        except Exception:
            self.logger.exception("Unknown error when getting block bodies")
            raise

        return block_body_bundles

    async def _log_missing_parent(self, first_header: BlockHeader,
                                  highest_block_num: int,
                                  missing_exc: Exception) -> None:
        self.logger.warning(
            "Parent missing for header %r, restarting header sync",
            first_header)
        block_num = first_header.block_number
        try:
            local_header = await self.db.coro_get_canonical_block_header_by_number(
                block_num)
        except HeaderNotFound as exc:
            self.logger.debug("Could not find canonical header at #%d: %s",
                              block_num, exc)
            local_header = None

        try:
            local_parent = await self.db.coro_get_canonical_block_header_by_number(
                block_num - 1)
        except HeaderNotFound as exc:
            self.logger.debug(
                "Could not find canonical header parent at #%d: %s", block_num,
                exc)
            local_parent = None

        try:
            canonical_tip = await self.db.coro_get_canonical_head()
        except HeaderNotFound as exc:
            self.logger.debug("Could not find canonical tip: %s", exc)
            canonical_tip = None

        self.logger.debug(
            ("Header syncer returned header %s, which has no parent in our DB. "
             "Instead at #%d, our header is %s, whose parent is %s, with canonical tip %s. "
             "The highest received header is %d. Triggered by missing dependency: %s"
             ),
            first_header,
            block_num,
            local_header,
            local_parent,
            canonical_tip,
            highest_block_num,
            missing_exc,
        )
Esempio n. 13
0
class HeaderMeatSyncer(Service, PeerSubscriber, Generic[TChainPeer]):
    # We are only interested in peers entering or leaving the pool
    subscription_msg_types: FrozenSet[Type[CommandAPI[Any]]] = frozenset()
    msg_queue_maxsize = 2000

    _filler_header_tasks: TaskQueue[Tuple[BlockHeader, int, TChainPeer]]

    def __init__(self, chain: AsyncChainAPI, peer_pool: BaseChainPeerPool,
                 stitcher: HeaderStitcher) -> None:
        self.logger = get_logger('trinity.sync.common.headers.SkeletonSyncer')
        self._chain = chain
        self._stitcher = stitcher
        max_pending_fillers = 50
        self._filler_header_tasks = TaskQueue(
            max_pending_fillers,
            # order by block number of the parent header
            compose(attrgetter('block_number'), itemgetter(0)),
        )

        # queue up idle peers, ordered by speed that they return block bodies
        self._waiting_peers: WaitingPeers[TChainPeer] = WaitingPeers(
            (ETHBlockHeaders, LESBlockHEaders), )
        self._peer_pool = peer_pool
        self.sync_progress: SyncProgress = None

    def register_peer(self, peer: BasePeer) -> None:
        super().register_peer(peer)
        # when a new peer is added to the pool, add it to the idle peer list
        self._waiting_peers.put_nowait(peer)  # type: ignore

    async def schedule_segment(self, parent_header: BlockHeader,
                               gap_length: int,
                               skeleton_peer: TChainPeer) -> None:
        """
        :param parent_header: the parent of the gap to fill
        :param gap_length: how long is the header gap
        :param skeleton_peer: the peer that provided the parent_header - will not use to fill gaps
        """
        try:
            await self._filler_header_tasks.add(
                ((parent_header, gap_length, skeleton_peer), ))
        except ValidationError as exc:
            self.logger.debug(
                "Tried to re-add a duplicate list of headers to the download queue: %s",
                exc,
            )
            # Since the task is already queued up, there is no value in
            # re-adding it, so it is safe to drop the exception after logging
            # it. One example of a time it happens is when the skeleton sync
            # restarts, and happens to choose the same skeleton structure. It
            # tries to reinsert the same duplicate meat filler tasks.

    async def run(self) -> None:
        self.manager.run_daemon_task(self._display_stats)
        with self.subscribe(self._peer_pool):
            await self._match_header_dls_to_peers()

    async def _display_stats(self) -> None:
        q = self._filler_header_tasks
        while self.manager.is_running:
            await asyncio.sleep(5)
            self.logger.debug(
                "Header Skeleton Gaps: active=%d queued=%d max=%d",
                q.num_in_progress(),
                len(q),
                q._maxsize,
            )

    async def _match_header_dls_to_peers(self) -> None:
        while self.manager.is_running:
            batch_id, (
                (parent_header, gap,
                 skeleton_peer), ) = await self._filler_header_tasks.get(1)

            await self._match_dl_to_peer(batch_id, parent_header, gap,
                                         skeleton_peer)

    async def _match_dl_to_peer(self, batch_id: int,
                                parent_header: BlockHeader, gap: int,
                                skeleton_peer: TChainPeer) -> None:
        def fail_task() -> None:
            self._filler_header_tasks.complete(batch_id, tuple())

        peer = await self._waiting_peers.get_fastest()
        if not self.sync_progress:
            await self._init_sync_progress(parent_header, peer)

        def complete_task() -> None:
            self._filler_header_tasks.complete(
                batch_id, ((parent_header, gap, skeleton_peer), ))

        peer.manager.run_task(self._run_fetch_segment, peer, parent_header,
                              gap, complete_task, fail_task)

    async def _run_fetch_segment(self, peer: TChainPeer,
                                 parent_header: BlockHeader, length: int,
                                 complete_task_fn: Callable[[], None],
                                 fail_task_fn: Callable[[], None]) -> None:
        try:
            completed_headers = await self._fetch_segment(
                peer, parent_header, length)
        except BaseP2PError as exc:
            self.logger.info(
                "Unexpected p2p err while downloading headers from %s: %s",
                peer, exc)
            self.logger.debug(
                "Problem downloading headers from peer, dropping...",
                exc_info=True)
            fail_task_fn()
        except Exception as exc:
            self.logger.info(
                "Unexpected err while downloading headers from %s: %s", peer,
                exc)
            self.logger.debug(
                "Problem downloading headers from peer, dropping...",
                exc_info=True)
            fail_task_fn()
        else:
            if len(completed_headers) == length:
                # peer completed successfully, so have it get back in line for processing
                self._waiting_peers.put_nowait(peer)
                complete_task_fn()
            else:
                # peer didn't return enough results, wait a while before trying again
                delay = EMPTY_PEER_RESPONSE_PENALTY
                self.logger.debug(
                    "Pausing %s for %.1fs, for sending %d headers",
                    peer,
                    delay,
                    len(completed_headers),
                )
                loop = asyncio.get_event_loop()
                loop.call_later(delay,
                                partial(self._waiting_peers.put_nowait, peer))
                fail_task_fn()

    async def _fetch_segment(self, peer: TChainPeer,
                             parent_header: BlockHeader,
                             length: int) -> Tuple[BlockHeader, ...]:
        if length > peer.max_headers_fetch:
            raise ValidationError(
                f"Can't request {length} headers, because peer maximum is {peer.max_headers_fetch}"
            )

        headers = await self._request_headers(
            peer,
            BlockNumber(parent_header.block_number + 1),
            length,
        )
        if not headers:
            return tuple()
        elif headers[0].parent_hash != parent_header.hash:
            # Segment doesn't match leading peer, drop this peer
            # Eventually, we'll do something smarter, in case the leading peer is the divergent one
            self.logger.warning(
                "%s returned segment starting %s & parent %s, doesn't match %s, ignoring result...",
                peer,
                headers[0],
                humanize_hash(headers[0].parent_hash),
                parent_header,
            )
            return tuple()
        elif len(headers) != length:
            self.logger.debug(
                "Ignoring %d headers from %s, because wanted %d",
                len(headers),
                peer,
                length,
            )
            return tuple()
        else:
            try:
                await self._chain.coro_validate_chain(
                    parent_header,
                    headers,
                    SEAL_CHECK_RANDOM_SAMPLE_RATE,
                )
            except ValidationError as e:
                self.logger.warning(
                    "Received invalid header segment from %s against known parent %s, "
                    ": %s",
                    peer,
                    parent_header,
                    e,
                )
                return tuple()
            else:
                # stitch headers together in order, ignoring duplicates
                self._stitcher.register_tasks(headers, ignore_duplicates=True)
                if self.sync_progress:
                    last_received_header = headers[-1]
                    self.sync_progress = self.sync_progress.update_current_block(
                        last_received_header.block_number, )
                return headers

    async def _request_headers(self, peer: TChainPeer,
                               start_at: BlockIdentifier,
                               length: int) -> Tuple[BlockHeader, ...]:
        self.logger.debug("Requesting %d headers from %s", length, peer)
        try:
            return await peer.chain_api.get_block_headers(start_at,
                                                          length,
                                                          skip=0,
                                                          reverse=False)
        except asyncio.TimeoutError:
            self.logger.debug("Timed out requesting %d headers from %s",
                              length, peer)
            return tuple()
        except CancelledError:
            self.logger.debug("Pending headers call to %r future cancelled",
                              peer)
            return tuple()
        except PeerConnectionLost:
            self.logger.debug(
                "Peer went away, cancelling the headers request and moving on..."
            )
            return tuple()
        except Exception:
            self.logger.exception("Unknown error when getting headers")
            raise

    async def _init_sync_progress(self, parent_header: BlockHeader,
                                  peer: TChainPeer) -> None:
        try:
            latest_block_number = peer.head_info.head_number
        except AttributeError:
            headers = await self._request_headers(peer,
                                                  peer.head_info.head_hash, 1)
            if headers:
                latest_block_number = headers[0].block_number
            else:
                return

        self.sync_progress = SyncProgress(
            parent_header.block_number,
            parent_header.block_number,
            latest_block_number,
        )