Beispiel #1
0
async def test_two_pending_adds_one_release():
    q = TaskQueue(2)

    asyncio.ensure_future(q.add((3, 1, 2)))

    # wait for ^ to run and pause
    await asyncio.sleep(0)
    # note that the highest-priority items are queued first
    assert 1 in q
    assert 2 in q
    assert 3 not in q

    asyncio.ensure_future(q.add((0, 4)))
    # wait for ^ to run and pause
    await asyncio.sleep(0)

    # task consumer 1 completes the first two pending
    batch, tasks = await wait(q.get())
    assert tasks == (1, 2)
    q.complete(batch, tasks)

    # task consumer 2 gets the next two, in priority order
    batch, tasks = await wait(q.get())
    assert len(tasks) in {0, 1}

    if len(tasks) == 1:
        _, tasks2 = await wait(q.get())
        all_tasks = tuple(sorted(tasks + tasks2))
    elif len(tasks) == 2:
        all_tasks = tasks

    assert all_tasks == (0, 3)

    # clean up, so the pending get() call can complete
    q.complete(batch, tasks)
Beispiel #2
0
async def test_unfinished_tasks_readded():
    q = TaskQueue()
    await wait(q.add((2, 1, 3)))

    batch, tasks = await wait(q.get())

    q.complete(batch, (2, ))

    batch, tasks = await wait(q.get())

    assert tasks == (1, 3)
Beispiel #3
0
async def test_queue_size_reset_after_complete():
    q = TaskQueue(maxsize=2)

    await wait(q.add((1, 2)))

    batch, tasks = await wait(q.get())

    # there should not be room to add another task
    try:
        await wait(q.add((3, )))
    except asyncio.TimeoutError:
        pass
    else:
        assert False, "should not be able to add task past maxsize"

    # do imaginary work here, then complete it all

    q.complete(batch, tasks)

    # there should be room to add more now
    await wait(q.add((3, )))
Beispiel #4
0
async def test_cannot_complete_batch_with_wrong_task():
    q = TaskQueue()

    await wait(q.add((1, 2)))

    batch, tasks = await wait(q.get())

    # cannot complete a valid task with a task it wasn't given
    with pytest.raises(ValidationError):
        q.complete(batch, (3, 4))

    # partially invalid completion calls leave the valid task in an incomplete state
    with pytest.raises(ValidationError):
        q.complete(batch, (1, 3))

    assert 1 in q
Beispiel #5
0
async def test_queue_contains_task_until_complete():
    q = TaskQueue()

    assert 2 not in q

    await wait(q.add((2, )))

    assert 2 in q

    batch, tasks = await wait(q.get())

    assert 2 in q

    q.complete(batch, tasks)

    assert 2 not in q
Beispiel #6
0
async def test_queue_get_cap(start_tasks, get_max, expected, remainder):
    q = TaskQueue()

    await wait(q.add(start_tasks))

    batch, tasks = await wait(q.get(get_max))
    assert tasks == expected

    if remainder:
        _, tasks2 = await wait(q.get())
        assert tasks2 == remainder
    else:
        try:
            _, tasks2 = await wait(q.get())
        except asyncio.TimeoutError:
            pass
        else:
            assert False, f"No more tasks to get, but got {tasks2!r}"
Beispiel #7
0
async def test_cannot_complete_batch_unless_pending():
    q = TaskQueue()

    await wait(q.add((1, 2)))

    # cannot complete a valid task without a batch id
    with pytest.raises(ValidationError):
        q.complete(None, (1, 2))

    assert 1 in q

    batch, tasks = await wait(q.get())

    # cannot complete a valid task with an invalid batch id
    with pytest.raises(ValidationError):
        q.complete(batch + 1, (1, 2))

    assert 1 in q
Beispiel #8
0
async def test_queue_contains_task_until_complete(tasks):
    q = TaskQueue(order_fn=id)

    first_task = tasks[0]

    assert first_task not in q

    await wait(q.add(tasks))

    assert first_task in q

    batch, pending_tasks = await wait(q.get())

    assert first_task in q

    q.complete(batch, pending_tasks)

    assert first_task not in q
Beispiel #9
0
async def test_two_pending_adds_one_release():
    q = TaskQueue(2)

    asyncio.ensure_future(q.add((3, 1, 2)))

    # wait for ^ to run and pause
    await asyncio.sleep(0)
    # note that the highest-priority items are queued first
    assert 1 in q
    assert 2 in q
    assert 3 not in q

    # two tasks are queued, none are started
    assert len(q) == 2
    assert q.num_in_progress() == 0

    asyncio.ensure_future(q.add((0, 4)))
    # wait for ^ to run and pause
    await asyncio.sleep(0)

    # task consumer 1 completes the first two pending
    batch, tasks = await wait(q.get())
    assert tasks == (1, 2)

    # both tasks started
    assert len(q) == 2
    assert q.num_in_progress() == 2

    q.complete(batch, tasks)

    # tasks are drained, but new ones aren't added yet...
    assert q.num_in_progress() == 0
    assert len(q) == 0

    await asyncio.sleep(0.01)

    # Now the tasks are added
    assert q.num_in_progress() == 0
    assert len(q) == 2

    # task consumer 2 gets the next two, in priority order
    batch, tasks = await wait(q.get())

    assert len(tasks) == 2

    assert tasks == (0, 3)

    assert q.num_in_progress() == 2
    assert len(q) == 2

    # clean up, so the pending get() call can complete
    q.complete(batch, tasks)

    # All current tasks finished
    assert q.num_in_progress() == 0

    await asyncio.sleep(0)

    # only task 4 remains
    assert q.num_in_progress() == 0
    assert len(q) == 1
Beispiel #10
0
class FastChainSyncer(BaseBodyChainSyncer):
    """
    Sync with the Ethereum network by fetching block headers/bodies and storing them in our DB.

    Here, the run() method returns as soon as we complete a sync with the peer that announced the
    highest TD, at which point we must run the StateDownloader to fetch the state for our chain
    head.
    """
    db: AsyncChainDB

    def __init__(self,
                 chain: AsyncChain,
                 db: AsyncHeaderDB,
                 peer_pool: ETHPeerPool,
                 token: CancelToken = None) -> None:
        super().__init__(chain, db, peer_pool, token)

        # queue up any idle peers, in order of how fast they return receipts
        self._receipt_peers = WaitingPeers(commands.Receipts)

        # Track receipt download tasks
        # - arbitrarily allow several requests-worth of headers queued up
        # - try to get receipts from lower block numbers first
        buffer_size = MAX_RECEIPTS_FETCH * REQUEST_BUFFER_MULTIPLIER
        self._receipt_tasks = TaskQueue(buffer_size,
                                        attrgetter('block_number'))

        # track when both bodies and receipts are collected, so that blocks can be persisted
        self._block_persist_tracker = OrderedTaskPreparation(
            BlockPersistPrereqs,
            id_extractor=attrgetter('hash'),
            # make sure that a block is not persisted until the parent block is persisted
            dependency_extractor=attrgetter('parent_hash'),
        )

    async def _run(self) -> None:
        head = await self.wait(self.db.coro_get_canonical_head())
        self._block_persist_tracker.set_finished_dependency(head)
        self.run_daemon_task(self._launch_prerequisite_tasks())
        self.run_daemon_task(self._assign_receipt_download_to_peers())
        self.run_daemon_task(self._assign_body_download_to_peers())
        self.run_daemon_task(self._persist_ready_blocks())
        self.run_daemon_task(self._display_stats())
        await super()._run()

    def register_peer(self, peer: BasePeer) -> None:
        # when a new peer is added to the pool, add it to the idle peer lists
        super().register_peer(peer)
        peer = cast(ETHPeer, peer)
        self._body_peers.put_nowait(peer)
        self._receipt_peers.put_nowait(peer)

    async def _launch_prerequisite_tasks(self) -> None:
        """
        Watch for new headers to be added to the queue, and add the prerequisite
        tasks as they become available.
        """
        while self.is_operational:
            batch_id, headers = await self.wait(self.header_queue.get())

            try:
                self._block_persist_tracker.register_tasks(headers)
            except DuplicateTasks as exc:
                # Likely scenario: switched which peer downloads headers, and the new peer isn't
                # aware of some of the in-progress headers
                self.logger.debug(
                    "Duplicate headers during fast sync %r, skipping",
                    exc.duplicates)
                duplicates = cast(Tuple[BlockHeader, ...], exc.duplicates)
                self.header_queue.complete(batch_id, duplicates)
                continue
            except MissingDependency:
                # The parent of this header is not registered as a dependency yet.
                # Some reasons this might happen, in rough descending order of likelihood:
                #   - a normal fork: the canonical head isn't the parent of the first header synced
                #   - a bug: the DB has inconsistent state, say saved headers but not block bodies
                #   - a bug: headers were queued out of order in header_queue

                # If the parent header doesn't exist yet, this is a legit bug instead of a fork,
                # let the HeaderNotFound exception bubble up
                parent_header = await self.wait(
                    self.db.coro_get_block_header_by_hash(
                        headers[0].parent_hash))

                # This appears to be a fork, since the parent header is persisted,
                self.logger.info(
                    "Fork found while starting fast sync. Canonical head was %s, but the next "
                    "header %s, has parent %s. Importing fork in case it's the longest chain.",
                    await self.db.coro_get_canonical_head(),
                    headers[0],
                    parent_header,
                )
                # Set first header's parent as finished
                self._block_persist_tracker.set_finished_dependency(
                    parent_header)
                # Re-register the header tasks, which will now succeed
                self._block_persist_tracker.register_tasks(headers)

            # Sometimes duplicates are added to the queue, when switching from one sync to another.
            # We can simply ignore them.
            new_body_tasks = tuple(h for h in headers
                                   if h not in self._block_body_tasks)
            new_receipt_tasks = tuple(h for h in headers
                                      if h not in self._receipt_tasks)

            # if any one of the output queues gets full, hang until there is room
            await self.wait(
                asyncio.gather(
                    self._block_body_tasks.add(new_body_tasks),
                    self._receipt_tasks.add(new_receipt_tasks),
                ))
            self.header_queue.complete(batch_id, headers)

    async def _display_stats(self) -> None:
        last_head = await self.wait(self.db.coro_get_canonical_head())
        timer = Timer()

        while self.is_operational:
            await self.sleep(5)
            self.logger.debug(
                "(in progress, queued, max size) of headers, bodies, receipts: %r",
                [(q.num_in_progress(), len(q), q._maxsize) for q in (
                    self.header_queue,
                    self._block_body_tasks,
                    self._receipt_tasks,
                )],
            )

            head = await self.wait(self.db.coro_get_canonical_head())
            if head == last_head:
                continue
            else:
                block_num_change = head.block_number - last_head.block_number
                last_head = head

                self.logger.info(
                    "Advanced by %d blocks in %0.1f seconds, new head: #%d",
                    block_num_change, timer.pop_elapsed(), head.block_number)

    async def _persist_ready_blocks(self) -> None:
        """
        Persist blocks as soon as all their prerequisites are done: body and receipt downloads.
        Persisting must happen in order, so that the block's parent has already been persisted.

        Also, determine if fast sync with this peer should end, having reached (or surpassed)
        its target hash. If so, shut down this service.
        """
        while self.is_operational:
            # jhis tracker waits for all prerequisites to be complete, and returns headers in
            # order, so that each header's parent is already persisted.
            completed_headers = await self.wait(
                self._block_persist_tracker.ready_tasks())

            await self._persist_blocks(completed_headers)

            target_hash = self.get_target_header_hash()

            if target_hash in [header.hash for header in completed_headers]:
                # simply exit the service when reaching the target hash
                self.cancel_nowait()
                break

    async def _persist_blocks(self, headers: Tuple[BlockHeader, ...]) -> None:
        """
        Persist blocks for the given headers, directly to the database

        :param headers: headers for which block bodies and receipts have been downloaded
        """
        for header in headers:
            vm_class = self.chain.get_vm_class(header)
            block_class = vm_class.get_block_class()

            if _is_body_empty(header):
                transactions: List[BaseTransaction] = []
                uncles: List[BlockHeader] = []
            else:
                body = self._pending_bodies.pop(header)
                uncles = body.uncles

                # transaction data was already persisted in _block_body_bundle_processing, but
                # we need to include the transactions for them to be added to the hash->txn lookup
                tx_class = block_class.get_transaction_class()
                transactions = [
                    tx_class.from_base_transaction(tx)
                    for tx in body.transactions
                ]

            block = block_class(header, transactions, uncles)
            await self.wait(self.db.coro_persist_block(block))

    async def _assign_receipt_download_to_peers(self) -> None:
        """
        Loop indefinitely, assigning idle peers to download receipts needed for syncing.
        """
        while self.is_operational:
            # from all the peers that are not currently downloading receipts, get the fastest
            peer = await self.wait(self._receipt_peers.get_fastest())

            # get headers for receipts that we need to download, preferring lowest block number
            batch_id, headers = await self.wait(
                self._receipt_tasks.get(MAX_RECEIPTS_FETCH))

            # schedule the receipt download and move on
            peer.run_task(
                self._run_receipt_download_batch(peer, batch_id, headers))

    def _mark_body_download_complete(
            self, batch_id: int, completed_headers: Tuple[BlockHeader,
                                                          ...]) -> None:
        super()._mark_body_download_complete(batch_id, completed_headers)
        self._block_persist_tracker.finish_prereq(
            BlockPersistPrereqs.StoreBlockBodies,
            completed_headers,
        )

    async def _run_receipt_download_batch(
            self, peer: ETHPeer, batch_id: int, headers: Tuple[BlockHeader,
                                                               ...]) -> None:
        """
        Given a single batch retrieved from self._receipt_tasks, get as many of the receipt bundles
        as possible, and mark them as complete.
        """
        # If there is an exception during _process_receipts, prepare to mark the task as finished
        # with no headers collected:
        completed_headers: Tuple[BlockHeader, ...] = tuple()
        try:
            completed_headers = await peer.wait(
                self._process_receipts(peer, headers))

            self._block_persist_tracker.finish_prereq(
                BlockPersistPrereqs.StoreReceipts,
                completed_headers,
            )
        except BaseP2PError as exc:
            self.logger.info(
                "Unexpected p2p perror while downloading receipt from peer: %s",
                exc)
            self.logger.debug(
                "Problem downloading receipt from peer, dropping...",
                exc_info=True)
        else:
            # peer completed successfully, so have it get back in line for processing
            if len(completed_headers) > 0:
                # peer completed successfully, so have it get back in line for processing
                self._receipt_peers.put_nowait(peer)
            else:
                # peer returned no results, wait a while before trying again
                delay = self.EMPTY_PEER_RESPONSE_PENALTY
                self.logger.debug(
                    "Pausing %s for %.1fs, for sending 0 receipts", peer,
                    delay)
                self.call_later(delay, self._receipt_peers.put_nowait, peer)
        finally:
            self._receipt_tasks.complete(batch_id, completed_headers)

    async def _block_body_bundle_processing(
            self, bundles: Tuple[BlockBodyBundle, ...]) -> None:
        """
        Fast sync writes all the block body bundle data directly to the database,
        in order to make it... fast.
        """
        for (_, (_, trie_data_dict), _) in bundles:
            await self.wait(self.db.coro_persist_trie_data_dict(trie_data_dict)
                            )

    async def _process_receipts(
            self, peer: ETHPeer,
            all_headers: Tuple[BlockHeader, ...]) -> Tuple[BlockHeader, ...]:
        """
        Downloads and persists the receipts for the given set of block headers.
        Some receipts may be trivial, having a blank root hash, and will not be requested.

        :param peer: to issue the receipt request to
        :param all_headers: attempt to get receipts for as many of these headers as possible
        :return: the headers for receipts that were successfully downloaded (or were trivial)
        """
        # Post-Byzantium blocks may have identical receipt roots (e.g. when they have the same
        # number of transactions and all succeed/failed: ropsten blocks 2503212 and 2503284),
        # so we do this to avoid requesting the same receipts multiple times.

        # combine headers with the same receipt root, so we can mark them as completed, later
        receipt_root_to_headers = groupby(attrgetter('receipt_root'),
                                          all_headers)

        # Ignore headers that have an empty receipt root
        trivial_headers = tuple(
            receipt_root_to_headers.pop(BLANK_ROOT_HASH, tuple()))

        # pick one of the headers for each missing receipt root
        unique_headers_needed = tuple(
            first(headers)
            for root, headers in receipt_root_to_headers.items())

        if not unique_headers_needed:
            return trivial_headers

        receipt_bundles = await self._request_receipts(peer,
                                                       unique_headers_needed)

        if not receipt_bundles:
            return trivial_headers

        try:
            await self._validate_receipts(unique_headers_needed,
                                          receipt_bundles)
        except ValidationError as err:
            self.logger.info(
                "Disconnecting from %s: sent invalid receipt: %s",
                peer,
                err,
            )
            await peer.disconnect(DisconnectReason.bad_protocol)
            return trivial_headers

        # process all of the returned receipts, storing their trie data
        # dicts in the database
        receipts, trie_roots_and_data_dicts = zip(*receipt_bundles)
        receipt_roots, trie_data_dicts = zip(*trie_roots_and_data_dicts)
        for trie_data in trie_data_dicts:
            await self.wait(self.db.coro_persist_trie_data_dict(trie_data))

        # Identify which headers have the receipt roots that are now complete.
        completed_header_groups = tuple(
            headers for root, headers in receipt_root_to_headers.items()
            if root in receipt_roots)
        newly_completed_headers = tuple(concat(completed_header_groups))

        self.logger.debug(
            "Got receipts for %d/%d headers from %s, with %d trivial headers",
            len(newly_completed_headers),
            len(all_headers) - len(trivial_headers),
            peer,
            len(trivial_headers),
        )
        return newly_completed_headers + trivial_headers

    async def _validate_receipts(
            self, headers: Tuple[BlockHeader, ...],
            receipt_bundles: Tuple[ReceiptBundle, ...]) -> None:

        header_by_root = {
            header.receipt_root: header
            for header in headers if not _is_receipts_empty(header)
        }
        receipts_by_root = {
            receipt_root: receipts
            for (receipts, (receipt_root, _)) in receipt_bundles
            if receipt_root != BLANK_ROOT_HASH
        }
        for receipt_root, header in header_by_root.items():
            if receipt_root not in receipts_by_root:
                # this receipt group was not returned by the peer, skip validation
                continue
            for receipt in receipts_by_root[receipt_root]:
                await self.chain.coro_validate_receipt(receipt, header)

    async def _request_receipts(
            self, peer: ETHPeer,
            batch: Tuple[BlockHeader, ...]) -> Tuple[ReceiptBundle, ...]:
        """
        Requests the batch of receipts from the given peer, returning the
        received receipt data.
        """
        self.logger.debug("Requesting receipts for %d headers from %s",
                          len(batch), peer)
        try:
            receipt_bundles = await peer.requests.get_receipts(batch)
        except TimeoutError as err:
            self.logger.debug(
                "Timed out requesting receipts for %d headers from %s",
                len(batch),
                peer,
            )
            return tuple()
        except CancelledError:
            self.logger.debug("Pending receipts call to %r future cancelled",
                              peer)
            return tuple()
        except OperationCancelled:
            self.logger.trace(
                "Pending receipts call to %r operation cancelled", peer)
            return tuple()
        except PeerConnectionLost:
            self.logger.debug(
                "Peer went away, cancelling the receipts request and moving on..."
            )
            return tuple()
        except Exception:
            self.logger.exception("Unknown error when getting receipts")
            raise

        if not receipt_bundles:
            return tuple()

        return receipt_bundles
Beispiel #11
0
async def test_invalid_priority_order(order_fn):
    q = TaskQueue(order_fn=order_fn)

    with pytest.raises(ValidationError):
        await wait(q.add((1, )))
Beispiel #12
0
async def test_valid_priority_order(order_fn):
    q = TaskQueue(order_fn=order_fn)

    # this just needs to not crash, when testing sortability
    await wait(q.add((1, )))
Beispiel #13
0
async def test_custom_priority_order():
    q = TaskQueue(maxsize=4, order_fn=lambda x: 0 - x)

    await wait(q.add((2, 1, 3)))
    (batch, tasks) = await wait(q.get())
    assert tasks == (3, 2, 1)
Beispiel #14
0
async def test_default_priority_order():
    q = TaskQueue(maxsize=4)
    await wait(q.add((2, 1, 3)))
    (batch, tasks) = await wait(q.get())
    assert tasks == (1, 2, 3)
Beispiel #15
0
class BaseHeaderChainSyncer(BaseService, PeerSubscriber):
    """
    Sync with the Ethereum network by fetching/storing block headers.

    Here, the run() method will execute the sync loop until our local head is the same as the one
    with the highest TD announced by any of our peers.
    """
    # We'll only sync if we are connected to at least min_peers_to_sync.
    min_peers_to_sync = 1
    # TODO: Instead of a fixed timeout, we should use a variable one that gets adjusted based on
    # the round-trip times from our download requests.
    _reply_timeout = 60
    _seal_check_random_sample_rate = SEAL_CHECK_RANDOM_SAMPLE_RATE
    # the latest header hash of the peer on the current sync
    _target_header_hash = None
    header_queue: TaskQueue[BlockHeader]

    def __init__(self,
                 chain: AsyncChain,
                 db: AsyncHeaderDB,
                 peer_pool: AnyPeerPool,
                 token: CancelToken = None) -> None:
        super().__init__(token)
        self.chain = chain
        self.db = db
        self.peer_pool = peer_pool
        self._handler = PeerRequestHandler(self.db, self.logger,
                                           self.cancel_token)
        self._syncing = False
        self._sync_complete = asyncio.Event()
        self._sync_requests: asyncio.Queue[
            HeaderRequestingPeer] = asyncio.Queue()

        # pending queue size should be big enough to avoid starving the processing consumers, but
        # small enough to avoid wasteful over-requests before post-processing can happen
        max_pending_headers = ETHPeer.max_headers_fetch * 8
        self.header_queue = TaskQueue(max_pending_headers,
                                      attrgetter('block_number'))

    @property
    def msg_queue_maxsize(self) -> int:
        # This is a rather arbitrary value, but when the sync is operating normally we never see
        # the msg queue grow past a few hundred items, so this should be a reasonable limit for
        # now.
        return 2000

    def get_target_header_hash(self) -> Hash32:
        if self._target_header_hash is None:
            raise ValidationError(
                "Cannot check the target hash when there is no active sync")
        else:
            return self._target_header_hash

    def register_peer(self, peer: BasePeer) -> None:
        self._sync_requests.put_nowait(
            cast(HeaderRequestingPeer, self.peer_pool.highest_td_peer))

    async def _handle_msg_loop(self) -> None:
        while self.is_operational:
            peer, cmd, msg = await self.wait(self.msg_queue.get())
            # Our handle_msg() method runs cpu-intensive tasks in sub-processes so that the main
            # loop can keep processing msgs, and that's why we use self.run_task() instead of
            # awaiting for it to finish here.
            self.run_task(
                self.handle_msg(cast(HeaderRequestingPeer, peer), cmd, msg))

    async def handle_msg(self, peer: HeaderRequestingPeer,
                         cmd: protocol.Command,
                         msg: protocol._DecodedMsgType) -> None:
        try:
            await self._handle_msg(peer, cmd, msg)
        except OperationCancelled:
            # Silently swallow OperationCancelled exceptions because otherwise they'll be caught
            # by the except below and treated as unexpected.
            pass
        except Exception:
            self.logger.exception(
                "Unexpected error when processing msg from %s", peer)

    async def _run(self) -> None:
        self.run_task(self._handle_msg_loop())
        with self.subscribe(self.peer_pool):
            while self.is_operational:
                try:
                    peer = await self.wait(self._sync_requests.get())
                except OperationCancelled:
                    # In the case of a fast sync, we return once the sync is completed, and our
                    # caller must then run the StateDownloader.
                    return
                else:
                    self.run_task(self.sync(peer))

    async def sync(self, peer: HeaderRequestingPeer) -> None:
        if self._syncing:
            self.logger.debug(
                "Got a NewBlock or a new peer, but already syncing so doing nothing"
            )
            return
        elif len(self.peer_pool) < self.min_peers_to_sync:
            self.logger.info(
                "Connected to less peers (%d) than the minimum (%d) required to sync, "
                "doing nothing", len(self.peer_pool), self.min_peers_to_sync)
            return

        self._syncing = True
        try:
            await self._sync(peer)
            self.logger.debug('Sync with peer %s finished normally', peer)
        except OperationCancelled as e:
            self.logger.info("Sync with %s was shut down: %s", peer, e)
        finally:
            self._syncing = False

    async def _sync(self, peer: HeaderRequestingPeer) -> None:
        """Try to fetch/process blocks until the given peer's head_hash.

        Returns when the peer's head_hash is available in our ChainDB, or if any error occurs
        during the sync.

        If in fast-sync mode, the _sync_completed event will be set upon successful completion of
        a sync.
        """
        head = await self.wait(self.db.coro_get_canonical_head())
        head_td = await self.wait(self.db.coro_get_score(head.hash))
        if peer.head_td <= head_td:
            self.logger.info(
                "Head TD (%d) announced by %s not higher than ours (%d), not syncing",
                peer.head_td, peer, head_td)
            return
        else:
            self.logger.debug(
                "%s announced Head TD %d, which is higher than ours (%d), starting sync",
                peer, peer.head_td, head_td)

        self.logger.info("Starting sync with %s", peer)
        last_received_header: BlockHeader = None
        # When we start the sync with a peer, we always request up to MAX_REORG_DEPTH extra
        # headers before our current head's number, in case there were chain reorgs since the last
        # time _sync() was called. All of the extra headers that are already present in our DB
        # will be discarded by _fetch_missing_headers() so we don't unnecessarily process them
        # again.
        start_at = max(GENESIS_BLOCK_NUMBER + 1,
                       head.block_number - MAX_REORG_DEPTH)
        while self.is_operational:
            if not peer.is_operational:
                self.logger.info("%s disconnected, aborting sync", peer)
                break

            try:
                fetch_headers_coro = self._fetch_missing_headers(
                    peer, start_at)
                headers = await self.wait(fetch_headers_coro)
                self.logger.trace('sync received new headers', headers)
            except OperationCancelled:
                self.logger.info("Sync with %s completed", peer)
                break
            except TimeoutError:
                self.logger.warn(
                    "Timeout waiting for header batch from %s, aborting sync",
                    peer)
                await peer.disconnect(DisconnectReason.timeout)
                break
            except ValidationError as err:
                self.logger.warn(
                    "Invalid header response sent by peer %s disconnecting: %s",
                    peer,
                    err,
                )
                await peer.disconnect(DisconnectReason.useless_peer)
                break

            if not headers:
                if last_received_header is None:
                    request_parent = head
                else:
                    request_parent = last_received_header
                if head_td < peer.head_td:
                    # peer claims to have a better header, but didn't return it. Boot peer
                    # TODO ... also blacklist, because it keeps trying to reconnect
                    self.logger.warning(
                        "%s announced difficulty %s, but didn't return any headers after %r@%s",
                        peer,
                        peer.head_td,
                        request_parent,
                        head_td,
                    )
                    await peer.disconnect(DisconnectReason.subprotocol_error)
                else:
                    self.logger.info(
                        "Got no new headers from %s, aborting sync", peer)
                break

            first = headers[0]
            first_parent = None
            if last_received_header is None:
                # on the first request, make sure that the earliest ancestor has a parent in our db
                try:
                    first_parent = await self.wait(
                        self.db.coro_get_block_header_by_hash(
                            first.parent_hash))
                except HeaderNotFound:
                    self.logger.warn(
                        "Unable to find common ancestor betwen our chain and %s",
                        peer)
                    break
            elif last_received_header.hash != first.parent_hash:
                # on follow-ups, require the first header in this batch to be next in succession
                self.logger.warn(
                    "Header batch starts with %r, with parent %s, but last header was %r",
                    first,
                    encode_hex(first.parent_hash[:4]),
                    last_received_header,
                )
                break

            self.logger.debug(
                "Got new header chain from %s starting at #%d",
                peer,
                first.block_number,
            )
            try:
                await self.chain.coro_validate_chain(
                    last_received_header or first_parent,
                    headers,
                    self._seal_check_random_sample_rate,
                )
            except ValidationError as e:
                self.logger.warn(
                    "Received invalid headers from %s, disconnecting: %s",
                    peer, e)
                await peer.disconnect(DisconnectReason.subprotocol_error)
                break

            for header in headers:
                head_td += header.difficulty

            # Setting the latest header hash for the peer, before queuing header processing tasks
            self._target_header_hash = peer.head_hash

            new_headers = tuple(h for h in headers
                                if h not in self.header_queue)
            await self.wait(self.header_queue.add(new_headers))
            last_received_header = headers[-1]
            start_at = last_received_header.block_number + 1

    async def _fetch_missing_headers(self, peer: HeaderRequestingPeer,
                                     start_at: int) -> Tuple[BlockHeader, ...]:
        """Fetch a batch of headers starting at start_at and return the ones we're missing."""
        self.logger.debug("Requsting chain of headers from %s starting at #%d",
                          peer, start_at)

        headers = await peer.requests.get_block_headers(
            start_at,
            peer.max_headers_fetch,
            skip=0,
            reverse=False,
        )

        # We only want headers that are missing, so we iterate over the list
        # until we find the first missing header, after which we return all of
        # the remaining headers.
        async def get_missing_tail(
            self: 'BaseHeaderChainSyncer',
            headers: Tuple[BlockHeader,
                           ...]) -> AsyncGenerator[BlockHeader, None]:
            iter_headers = iter(headers)
            for header in iter_headers:
                if header in self.header_queue:
                    self.logger.debug(
                        "Discarding header that is already queued: %s", header)
                    continue

                is_present = await self.wait(
                    self.db.coro_header_exists(header.hash))
                if is_present:
                    self.logger.debug(
                        "Discarding header that we already have: %s", header)
                else:
                    yield header
                    break

            for header in iter_headers:
                yield header

        # The inner list comprehension is needed because async_generators
        # cannot be cast to a tuple.
        tail_headers = tuple(
            [header async for header in get_missing_tail(self, headers)])

        return tail_headers

    @abstractmethod
    async def _handle_msg(self, peer: HeaderRequestingPeer,
                          cmd: protocol.Command,
                          msg: protocol._DecodedMsgType) -> None:
        raise NotImplementedError("Must be implemented by subclasses")
Beispiel #16
0
class BaseHeaderChainSyncer(BaseService, PeerSubscriber):
    """
    Sync with the Ethereum network by fetching/storing block headers.

    Here, the run() method will execute the sync loop until our local head is the same as the one
    with the highest TD announced by any of our peers.
    """
    # We'll only sync if we are connected to at least min_peers_to_sync.
    min_peers_to_sync = 1
    # the latest header hash of the peer on the current sync
    header_queue: TaskQueue[BlockHeader]

    # This is a rather arbitrary value, but when the sync is operating normally we never see
    # the msg queue grow past a few hundred items, so this should be a reasonable limit for
    # now.
    msg_queue_maxsize = 2000

    def __init__(self,
                 chain: AsyncChain,
                 db: AsyncHeaderDB,
                 peer_pool: BaseChainPeerPool,
                 token: CancelToken = None) -> None:
        super().__init__(token)
        self.chain = chain
        self.db = db
        self.peer_pool = peer_pool
        self._handler = PeerRequestHandler(self.db, self.logger, self.cancel_token)
        self._peer_header_syncer: 'PeerHeaderSyncer' = None
        self._last_target_header_hash = None
        self._tip_monitor = self.tip_monitor_class(peer_pool, token=self.cancel_token)

        # pending queue size should be big enough to avoid starving the processing consumers, but
        # small enough to avoid wasteful over-requests before post-processing can happen
        max_pending_headers = ETHPeer.max_headers_fetch * 8
        self.header_queue = TaskQueue(max_pending_headers, attrgetter('block_number'))

    def get_target_header_hash(self) -> Hash32:
        if self._peer_header_syncer is None and self._last_target_header_hash is None:
            raise ValidationError("Cannot check the target hash before a sync has run")
        elif self._peer_header_syncer is not None:
            return self._peer_header_syncer.get_target_header_hash()
        else:
            return self._last_target_header_hash

    @property
    @abstractmethod
    def tip_monitor_class(self) -> Type[BaseChainTipMonitor]:
        pass

    async def _handle_msg_loop(self) -> None:
        while self.is_operational:
            peer, cmd, msg = await self.wait(self.msg_queue.get())
            # Our handle_msg() method runs cpu-intensive tasks in sub-processes so that the main
            # loop can keep processing msgs, and that's why we use self.run_task() instead of
            # awaiting for it to finish here.
            self.run_task(self.handle_msg(cast(BaseChainPeer, peer), cmd, msg))

    async def handle_msg(self, peer: BaseChainPeer, cmd: protocol.Command,
                         msg: protocol._DecodedMsgType) -> None:
        try:
            await self._handle_msg(peer, cmd, msg)
        except OperationCancelled:
            # Silently swallow OperationCancelled exceptions because otherwise they'll be caught
            # by the except below and treated as unexpected.
            pass
        except Exception:
            self.logger.exception("Unexpected error when processing msg from %s", peer)

    async def _run(self) -> None:
        self.run_daemon(self._tip_monitor)
        self.run_daemon_task(self._handle_msg_loop())
        with self.subscribe(self.peer_pool):
            try:
                async for highest_td_peer in self._tip_monitor.wait_tip_info():
                    self.run_task(self.sync(highest_td_peer))
            except OperationCancelled:
                # In the case of a fast sync, we return once the sync is completed, and our
                # caller must then run the StateDownloader.
                return
            else:
                self.logger.debug("chain tip monitor stopped returning tip info to %s", self)

    @property
    def _syncing(self) -> bool:
        return self._peer_header_syncer is not None

    @contextmanager
    def _get_peer_header_syncer(self, peer: BaseChainPeer) -> Iterator['PeerHeaderSyncer']:
        if self._syncing:
            raise ValidationError("Cannot sync headers from two peers at the same time")

        self._peer_header_syncer = PeerHeaderSyncer(
            self.chain,
            self.db,
            peer,
            self.cancel_token,
        )
        self.run_child_service(self._peer_header_syncer)
        try:
            yield self._peer_header_syncer
        except OperationCancelled:
            pass
        else:
            self._peer_header_syncer.cancel_nowait()
        finally:
            self.logger.info("Header Sync with %s ended", peer)
            self._last_target_header_hash = self._peer_header_syncer.get_target_header_hash()
            self._peer_header_syncer = None

    async def sync(self, peer: BaseChainPeer) -> None:
        if self._syncing:
            self.logger.debug(
                "Got a NewBlock or a new peer, but already syncing so doing nothing")
            return
        elif len(self.peer_pool) < self.min_peers_to_sync:
            self.logger.info(
                "Connected to less peers (%d) than the minimum (%d) required to sync, "
                "doing nothing", len(self.peer_pool), self.min_peers_to_sync)
            return

        with self._get_peer_header_syncer(peer) as syncer:
            async for header_batch in syncer.next_header_batch():
                new_headers = tuple(h for h in header_batch if h not in self.header_queue)
                await self.wait(self.header_queue.add(new_headers))

    @abstractmethod
    async def _handle_msg(self, peer: BaseChainPeer, cmd: protocol.Command,
                          msg: protocol._DecodedMsgType) -> None:
        raise NotImplementedError("Must be implemented by subclasses")
Beispiel #17
0
async def test_cannot_add_single_non_tuple_task():
    q = TaskQueue()
    with pytest.raises(ValidationError):
        await wait(q.add(1))
Beispiel #18
0
class BaseHeaderChainSyncer(BaseService):
    """
    Sync with the Ethereum network by fetching/storing block headers.

    Here, the run() method will execute the sync loop until our local head is the same as the one
    with the highest TD announced by any of our peers.
    """
    # We'll only sync if we are connected to at least min_peers_to_sync.
    min_peers_to_sync = 1
    # the latest header hash of the peer on the current sync
    header_queue: TaskQueue[BlockHeader]

    def __init__(self,
                 chain: AsyncChain,
                 db: AsyncHeaderDB,
                 peer_pool: BaseChainPeerPool,
                 token: CancelToken = None) -> None:
        super().__init__(token)
        self.chain = chain
        self.db = db
        self.peer_pool = peer_pool
        self._peer_header_syncer: 'PeerHeaderSyncer' = None
        self._last_target_header_hash: Hash32 = None
        self._tip_monitor = self.tip_monitor_class(peer_pool, token=self.cancel_token)

        # pending queue size should be big enough to avoid starving the processing consumers, but
        # small enough to avoid wasteful over-requests before post-processing can happen
        max_pending_headers = ETHPeer.max_headers_fetch * 8
        self.header_queue = TaskQueue(max_pending_headers, attrgetter('block_number'))

    def get_target_header_hash(self) -> Hash32:
        if self._peer_header_syncer is None and self._last_target_header_hash is None:
            raise ValidationError("Cannot check the target hash before a sync has run")
        elif self._peer_header_syncer is not None:
            return self._peer_header_syncer.get_target_header_hash()
        else:
            return self._last_target_header_hash

    @property
    @abstractmethod
    def tip_monitor_class(self) -> Type[BaseChainTipMonitor]:
        pass

    async def _run(self) -> None:
        self.run_daemon(self._tip_monitor)
        if self.peer_pool.event_bus is not None:
            self.run_daemon_task(self.handle_sync_status_requests())
        try:
            async for highest_td_peer in self._tip_monitor.wait_tip_info():
                self.run_task(self.sync(highest_td_peer))
        except OperationCancelled:
            # In the case of a fast sync, we return once the sync is completed, and our
            # caller must then run the StateDownloader.
            return
        else:
            self.logger.debug("chain tip monitor stopped returning tip info to %s", self)

    @property
    def _syncing(self) -> bool:
        return self._peer_header_syncer is not None

    @contextmanager
    def _get_peer_header_syncer(self, peer: BaseChainPeer) -> Iterator['PeerHeaderSyncer']:
        if self._syncing:
            raise ValidationError("Cannot sync headers from two peers at the same time")

        self._peer_header_syncer = PeerHeaderSyncer(
            self.chain,
            self.db,
            peer,
            self.cancel_token,
        )
        self.run_child_service(self._peer_header_syncer)
        try:
            yield self._peer_header_syncer
        except OperationCancelled:
            pass
        else:
            self._peer_header_syncer.cancel_nowait()
        finally:
            self.logger.info("Header Sync with %s ended", peer)
            self._last_target_header_hash = self._peer_header_syncer.get_target_header_hash()
            self._peer_header_syncer = None

    async def sync(self, peer: BaseChainPeer) -> None:
        if self._syncing:
            self.logger.debug(
                "Got a NewBlock or a new peer, but already syncing so doing nothing")
            return
        elif len(self.peer_pool) < self.min_peers_to_sync:
            self.logger.info(
                "Connected to less peers (%d) than the minimum (%d) required to sync, "
                "doing nothing", len(self.peer_pool), self.min_peers_to_sync)
            return

        with self._get_peer_header_syncer(peer) as syncer:
            async for header_batch in syncer.next_header_batch():
                new_headers = tuple(h for h in header_batch if h not in self.header_queue)
                await self.wait(self.header_queue.add(new_headers))

    def get_sync_status(self) -> Tuple[bool, Optional[SyncProgress]]:
        if not self._syncing:
            return False, None
        return True, self._peer_header_syncer.sync_progress

    async def handle_sync_status_requests(self) -> None:
        async for req in self.peer_pool.event_bus.stream(SyncingRequest):
            self.peer_pool.event_bus.broadcast(SyncingResponse(*self.get_sync_status()),
                                               req.broadcast_config())
Beispiel #19
0
async def test_unlimited_queue_by_default():
    q = TaskQueue()
    await wait(q.add(tuple(range(100001))))