Esempio n. 1
0
class ViewChanger(HasActionQueue, MessageProcessor):
    def __init__(self, node):
        self.node = node

        self.view_no = 0  # type: int

        HasActionQueue.__init__(self)

        self.inBox = deque()
        self.outBox = deque()
        self.inBoxRouter = Router(
            (InstanceChange, self.process_instance_change_msg),
            (ViewChangeDone, self.process_vchd_msg))

        self.instanceChanges = InstanceChanges()

        # The quorum of `ViewChangeDone` msgs is different depending on whether we're doing a real view change,
        # or just propagating view_no and Primary from `CurrentState` messages sent to a newly joined Node.
        # TODO: separate real view change and Propagation of Primary
        # TODO: separate catch-up, view-change and primary selection so that
        # they are really independent.
        self.propagate_primary = False

        # Tracks if other nodes are indicating that this node is in lower view
        # than others. Keeps a map of view no to senders
        # TODO: Consider if sufficient ViewChangeDone for 2 different (and
        # higher views) are received, should one view change be interrupted in
        # between.
        self._next_view_indications = SortedDict()

        self._view_change_in_progress = False

        self.previous_master_primary = None

        self.set_defaults()

        self.initInsChngThrottling()

    def __repr__(self):
        return "{}".format(self.name)

    # PROPERTIES

    @property
    def name(self):
        return self.node.name

    @property
    def config(self):
        return self.node.config

    @property
    def quorums(self):
        return self.node.quorums

    @property
    def view_change_in_progress(self):
        return self._view_change_in_progress

    @view_change_in_progress.setter
    def view_change_in_progress(self, value):
        self._view_change_in_progress = value

    @property
    def quorum(self) -> int:
        # TODO: re-factor this, separate this two states (selection of a new
        # primary and propagation of existing one)
        if not self.view_change_in_progress:
            return self.node.quorums.propagate_primary.value
        if self.propagate_primary:
            return self.node.quorums.propagate_primary.value
        return self.node.quorums.view_change_done.value

    @property
    def _hasViewChangeQuorum(self):
        # This method should just be present for master instance.
        """
        Checks whether n-f nodes completed view change and whether one
        of them is the next primary
        """
        num_of_ready_nodes = len(self._view_change_done)
        diff = self.quorum - num_of_ready_nodes
        if diff > 0:
            logger.debug('{} needs {} ViewChangeDone messages'.format(
                self, diff))
            return False

        logger.debug("{} got view change quorum ({} >= {})".format(
            self.name, num_of_ready_nodes, self.quorum))
        return True

    @property
    def _is_propagated_view_change_completed(self):
        if not self._propagated_view_change_completed and \
                self.node.poolLedger is not None and \
                self.propagate_primary:

            accepted = self.get_sufficient_same_view_change_done_messages()
            if accepted is not None:
                accepted_pool_ledger_i = \
                    next(filter(lambda x: x[0] == POOL_LEDGER_ID,
                                accepted[1]))
                self_pool_ledger_i = \
                    next(filter(lambda x: x[0] == POOL_LEDGER_ID,
                                self.node.ledger_summary))
                logger.debug("{} Primary selection has been already completed "
                             "on pool ledger info = {}, primary {}, self pool "
                             "ledger info {}".format(self,
                                                     accepted_pool_ledger_i,
                                                     accepted[0],
                                                     self_pool_ledger_i))
                self._propagated_view_change_completed = True

        return self._propagated_view_change_completed

    @property
    def has_view_change_from_primary(self) -> bool:
        if not self._has_view_change_from_primary:
            next_primary_name = self.node.elector.next_primary_node_name(0)

            if next_primary_name not in self._view_change_done:
                logger.debug(
                    "{} has not received ViewChangeDone from the next "
                    "primary {} (view_no: {}, totalNodes: {})".format(
                        self.name, next_primary_name, self.view_no,
                        self.node.totalNodes))
            else:
                logger.debug(
                    '{} received ViewChangeDone from primary {}'.format(
                        self, next_primary_name))
                self._has_view_change_from_primary = True

        return self._has_view_change_from_primary

    @property
    def has_acceptable_view_change_quorum(self):
        if not self._has_acceptable_view_change_quorum:
            self._has_acceptable_view_change_quorum = (
                self._hasViewChangeQuorum
                and (self._is_propagated_view_change_completed
                     or self.has_view_change_from_primary))
        return self._has_acceptable_view_change_quorum

    @property
    def is_behind_for_view(self) -> bool:
        # Checks if the node is currently behind the accepted state for this
        # view, only makes sense to call when the node has an acceptable
        # view change quorum
        _, accepted_ledger_summary = self.get_sufficient_same_view_change_done_messages(
        )
        for (ledgerId, own_ledger_size, _), (_, accepted_ledger_size, _) in \
                zip(self.node.ledger_summary, accepted_ledger_summary):
            if own_ledger_size < accepted_ledger_size:
                logger.debug(
                    "{} ledger {} sizes are differ: own {} accepted {}"
                    "".format(self, ledgerId, own_ledger_size,
                              accepted_ledger_size))
                return True
        return False

    # __ PROPERTIES __

    # EXTERNAL EVENTS

    def on_master_degradation(self):
        """
        """
        view_no = self.view_no + 1
        logger.info(
            "{} sending instance with view_no = {} and trying to start "
            "view change since performance of master instance degraded"
            "".format(self, view_no))
        self.sendInstanceChange(view_no)
        self.do_view_change_if_possible(view_no)

    def on_primary_loss(self):
        view_no = self.view_no + 1
        logger.info("{} sending instance with view_no = {} and trying "
                    "to start view change since primary was lost"
                    "".format(self, view_no))
        self.sendInstanceChange(view_no, Suspicions.PRIMARY_DISCONNECTED)
        self.do_view_change_if_possible(view_no)

    # TODO we have `on_primary_loss`, do we need that one?
    def on_primary_about_to_be_disconnected(self):
        view_no = self.view_no + 1
        logger.info("{} sending instance with view_no = {} "
                    "since primary is about to be disconnected"
                    "".format(self, view_no))
        self.sendInstanceChange(view_no,
                                Suspicions.PRIMARY_ABOUT_TO_BE_DISCONNECTED)

    def on_suspicious_primary(self, suspicion: Suspicions):
        view_no = self.view_no + 1
        logger.info("{} sending instance with view_no = {} since primary "
                    "seems suspicious, reason {}"
                    "".format(self, view_no, suspicion.reason))
        self.sendInstanceChange(view_no, suspicion)
        # TODO why we don't try to start view change here

    def on_view_change_not_completed_in_time(self):
        view_no = self.view_no + 1
        logger.info("{} sending instance with view_no = {} since "
                    "view change to view {} is not completed in time"
                    "".format(self, view_no, self.view_no))
        self.sendInstanceChange(view_no, Suspicions.INSTANCE_CHANGE_TIMEOUT)

    def on_catchup_complete(self):
        if self.node.is_synced and self.node.master_replica.isPrimary is None and \
                not self._is_propagated_view_change_completed:
            self._send_view_change_done_message()

        self._start_selection()

    def on_future_view_vchd_msg(self, view_no, frm):
        assert view_no > self.view_no
        if view_no not in self._next_view_indications:
            self._next_view_indications[view_no] = set()
        self._next_view_indications[view_no].add(frm)
        self._start_view_change_if_possible(view_no)

    # __ EXTERNAL EVENTS __

    def process_instance_change_msg(self, instChg: InstanceChange,
                                    frm: str) -> None:
        """
        Validate and process an instance change request.

        :param instChg: the instance change request
        :param frm: the name of the node that sent this `msg`
        """
        logger.debug("{} received instance change request: {} from {}".format(
            self, instChg, frm))

        # TODO: add sender to blacklist?
        if not isinstance(instChg.viewNo, int):
            self.node.discard(
                instChg, "{}field view_no has incorrect type: {}".format(
                    VIEW_CHANGE_PREFIX, type(instChg.viewNo)))
        elif instChg.viewNo <= self.view_no:
            self.node.discard(
                instChg, "Received instance change request with view no {} "
                "which is not more than its view no {}".format(
                    instChg.viewNo, self.view_no), logger.info)
        else:
            # Record instance changes for views but send instance change
            # only when found master to be degraded. if quorum of view changes
            #  found then change view even if master not degraded
            self._on_verified_instance_change_msg(instChg, frm)

            if self.instanceChanges.hasInstChngFrom(instChg.viewNo, self.name):
                logger.debug(
                    "{} received instance change message {} but has already "
                    "sent an instance change message".format(self, instChg))
            elif not self.node.monitor.isMasterDegraded():
                logger.debug(
                    "{} received instance change message {} but did not "
                    "find the master to be slow".format(self, instChg))
            else:
                logger.info(
                    "{}{} found master degraded after receiving instance change"
                    " message from {}".format(VIEW_CHANGE_PREFIX, self, frm))
                self.sendInstanceChange(instChg.viewNo)

    def process_vchd_msg(self, msg: ViewChangeDone, sender: str) -> bool:
        """
        Processes ViewChangeDone messages. Once n-f messages have been
        received, decides on a primary for specific replica.

        :param msg: ViewChangeDone message
        :param sender: the name of the node from which this message was sent
        """

        logger.debug("{}'s primary selector started processing of "
                     "ViewChangeDone msg from {} : {}".format(
                         self.name, sender, msg))

        view_no = msg.viewNo

        if self.view_no != view_no:
            self.discard(msg,
                         '{} got Primary from {} for view no {} '
                         'whereas current view no is {}'.format(
                             self, sender, view_no, self.view_no),
                         logMethod=logger.debug)
            return False

        new_primary_name = msg.name
        if new_primary_name == self.previous_master_primary:
            self.discard(msg,
                         '{} got Primary from {} for {} who was primary of '
                         'master in previous view too'.format(
                             self, sender, new_primary_name),
                         logMethod=logger.debug)
            return False

        # Since a node can send ViewChangeDone more than one time
        self._on_verified_view_change_done_msg(msg, sender)
        # TODO why do we check that after the message tracking
        if self.node.master_replica.hasPrimary:
            self.discard(
                msg, "it already decided primary which is {}".format(
                    self.node.master_replica.primaryName), logger.debug)
            return False

        self._start_selection()

    def send(self, msg):
        """
        Send a message to the node.

        :param msg: the message to send
        """
        logger.debug("{}'s view_changer sending {}".format(self.name, msg))
        self.outBox.append(msg)

    async def serviceQueues(self, limit=None) -> int:
        """
        Service at most `limit` messages from the inBox.

        :param limit: the maximum number of messages to service
        :return: the number of messages successfully processed
        """

        return await self.inBoxRouter.handleAll(self.inBox, limit)

    def sendInstanceChange(self,
                           view_no: int,
                           suspicion=Suspicions.PRIMARY_DEGRADED):
        """
        Broadcast an instance change request to all the remaining nodes

        :param view_no: the view number when the instance change is requested
        """

        # If not found any sent instance change messages in last
        # `ViewChangeWindowSize` seconds or the last sent instance change
        # message was sent long enough ago then instance change message can be
        # sent otherwise no.
        canSendInsChange, cooldown = self.insChngThrottler.acquire()
        if canSendInsChange:
            logger.info("{}{} sending an instance change with view_no {}"
                        " since {}".format(VIEW_CHANGE_PREFIX, self, view_no,
                                           suspicion.reason))
            logger.info("{}{} metrics for monitor: {}".format(
                MONITORING_PREFIX, self, self.node.monitor.prettymetrics))
            msg = self._create_instance_change_msg(view_no, suspicion.code)
            self.send(msg)
            # record instance change vote for self and try to change the view
            # if quorum is reached
            self._on_verified_instance_change_msg(msg, self.name)
        else:
            logger.debug(
                "{} cannot send instance change sooner then {} seconds".format(
                    self, cooldown))

    # noinspection PyAttributeOutsideInit
    def initInsChngThrottling(self):
        windowSize = self.node.config.ViewChangeWindowSize
        ratchet = Ratchet(a=2, b=0.05, c=1, base=2, peak=windowSize)
        self.insChngThrottler = Throttler(windowSize, ratchet.get)

    def _create_instance_change_msg(self, view_no, suspicion_code):
        return InstanceChange(view_no, suspicion_code)

    def _on_verified_instance_change_msg(self, msg, frm):
        view_no = msg.viewNo

        if not self.instanceChanges.hasInstChngFrom(view_no, frm):
            self.instanceChanges.addVote(msg, frm)
            if view_no > self.view_no:
                self.do_view_change_if_possible(view_no)

    def do_view_change_if_possible(self, view_no):
        # TODO: Need to handle skewed distributions which can arise due to
        # malicious nodes sending messages early on
        can, whyNot = self._canViewChange(view_no)
        if can:
            logger.info("{}{} initiating a view change to {} from {}".format(
                VIEW_CHANGE_PREFIX, self, view_no, self.view_no))
            self.propagate_primary = False
            self.startViewChange(view_no)
        else:
            logger.debug(whyNot)
        return can

    def _start_view_change_if_possible(self, view_no) -> bool:
        ind_count = len(self._next_view_indications[view_no])
        if self.quorums.propagate_primary.is_reached(ind_count):
            logger.info(
                '{}{} starting view change for {} after {} view change '
                'indications from other nodes'.format(VIEW_CHANGE_PREFIX, self,
                                                      view_no, ind_count))
            self.propagate_primary = True
            self.startViewChange(view_no)
            return True
        return False

    def _canViewChange(self, proposedViewNo: int) -> (bool, str):
        """
        Return whether there's quorum for view change for the proposed view
        number and its view is less than or equal to the proposed view
        """
        msg = None
        quorum = self.quorums.view_change.value
        if not self.instanceChanges.hasQuorum(proposedViewNo, quorum):
            msg = '{} has no quorum for view {}'.format(self, proposedViewNo)
        elif not proposedViewNo > self.view_no:
            msg = '{} is in higher view more than {}'.format(
                self, proposedViewNo)

        return not bool(msg), msg

    def startViewChange(self, proposed_view_no: int):
        """
        Trigger the view change process.

        :param proposed_view_no: the new view number after view change.
        """
        # TODO: consider moving this to pool manager
        # TODO: view change is a special case, which can have different
        # implementations - we need to make this logic pluggable

        for view_no in tuple(self._next_view_indications.keys()):
            if view_no > proposed_view_no:
                break
            self._next_view_indications.pop(view_no)

        self.view_no = proposed_view_no
        self.view_change_in_progress = True
        self.previous_master_primary = self.node.master_primary_name
        self.set_defaults()
        self.initInsChngThrottling()

        self.node.on_view_change_start()
        self.node.start_catchup()

    def _on_verified_view_change_done_msg(self, msg, frm):
        new_primary_name = msg.name
        ledger_summary = msg.ledgerInfo

        # TODO what is the case when node sends several different
        # view change done messages
        data = (new_primary_name, ledger_summary)
        self._view_change_done[frm] = data

    def _start_selection(self):

        error = None

        if not self.node.is_synced:
            error = "mode is {}".format(self.node.mode)
        elif not self.has_acceptable_view_change_quorum:
            error = "has no view change quorum or no message from next primary"
        else:
            rv = self.get_sufficient_same_view_change_done_messages()
            if rv is None:
                error = "there are not sufficient same ViewChangeDone messages"
            elif not (self._is_propagated_view_change_completed
                      or self._verify_primary(*rv)):
                error = "failed to verify primary"

        if error is not None:
            logger.debug('{} cannot start primary selection because {}'.format(
                self, error))
            return

        if self.is_behind_for_view:
            logger.debug(
                '{} is synced and has an acceptable view change quorum '
                'but is behind the accepted state'.format(self))
            self.node.start_catchup()
            return

        logger.debug("{} starting selection".format(self))

        nodeReg = None
        # in case of already completed view change
        # use node registry actual for the moment when it happened
        if self._is_propagated_view_change_completed:
            assert self._accepted_view_change_done_message is not None
            ledger_summary = self._accepted_view_change_done_message[1]
            pool_ledger_size = ledger_summary[POOL_LEDGER_ID][1]
            nodeReg = self.node.poolManager.getNodeRegistry(pool_ledger_size)

        self.node.select_primaries(nodeReg)

        if self.view_change_in_progress:
            self.view_change_in_progress = False
            self.node.on_view_change_complete()
            self.instanceChanges.pop(self.view_no - 1, None)
            self.previous_master_primary = None
            self.propagate_primary = False

    def set_defaults(self):
        # Tracks view change done message
        self._view_change_done = {}  # replica name -> data

        # Set when an appropriate view change quorum is found which has
        # sufficient same ViewChangeDone messages
        self._primary_verified = False

        self._has_view_change_from_primary = False

        self._has_acceptable_view_change_quorum = False

        self._accepted_view_change_done_message = None

        # accept any primary if propagating view change done msgs
        # for already completed view change
        self._propagated_view_change_completed = False

    def get_sufficient_same_view_change_done_messages(self) -> Optional[Tuple]:
        # Returns whether has a quorum of ViewChangeDone messages that are same
        # TODO: Does not look like optimal implementation.
        if self._accepted_view_change_done_message is None and \
                self._view_change_done:
            votes = self._view_change_done.values()
            votes = [(nm, tuple(tuple(i) for i in info)) for nm, info in votes]
            (new_primary, ledger_info), vote_count = mostCommonElement(votes)
            if vote_count >= self.quorum:
                logger.debug(
                    '{} found acceptable primary {} and ledger info {}'.format(
                        self, new_primary, ledger_info))
                self._accepted_view_change_done_message = (new_primary,
                                                           ledger_info)
            else:
                logger.debug('{} does not have acceptable primary, only {} '
                             'votes for {}'.format(self, vote_count,
                                                   (new_primary, ledger_info)))

        return self._accepted_view_change_done_message

    def _verify_primary(self, new_primary, ledger_info):
        """
        This method is called when sufficient number of ViewChangeDone
        received and makes steps to switch to the new primary
        """
        expected_primary = self.node.elector.next_primary_node_name(0)
        if new_primary != expected_primary:
            logger.error("{}{} expected next primary to be {}, but majority "
                         "declared {} instead for view {}".format(
                             PRIMARY_SELECTION_PREFIX, self.name,
                             expected_primary, new_primary, self.view_no))
            return False

        self._primary_verified = True
        return True
        # TODO: check if ledger status is expected

    def _send_view_change_done_message(self):
        """
        Sends ViewChangeDone message to other protocol participants
        """
        new_primary_name = self.node.elector.next_primary_node_name(0)
        ledger_summary = self.node.ledger_summary
        message = ViewChangeDone(self.view_no, new_primary_name,
                                 ledger_summary)

        logger.debug("{} is sending ViewChangeDone msg to all : {}"
                     "".format(self, message))

        self.send(message)
        self._on_verified_view_change_done_msg(message, self.name)

    # overridden method of PrimaryDecider
    def get_msgs_for_lagged_nodes(self) -> List[ViewChangeDone]:
        # Should not return a list, only done for compatibility with interface
        """
        Returns the last accepted `ViewChangeDone` message.
        If no view change has happened returns ViewChangeDone
        with view no 0 to a newly joined node
        """
        # TODO: Consider a case where more than one node joins immediately,
        # then one of the node might not have an accepted
        # ViewChangeDone message
        messages = []
        accepted = self._accepted_view_change_done_message
        if accepted:
            messages.append(ViewChangeDone(self.view_no, *accepted))
        elif self.name in self._view_change_done:
            messages.append(
                ViewChangeDone(self.view_no,
                               *self._view_change_done[self.name]))
        else:
            logger.debug(
                '{} has no ViewChangeDone message to send for view {}'.format(
                    self, self.view_no))
        return messages
Esempio n. 2
0
class Replicas:
    _replica_class = Replica

    def __init__(self, node, monitor: Monitor, config=None, metrics: MetricsCollector = NullMetricsCollector()):
        # passing full node because Replica requires it
        self._node = node
        self._monitor = monitor
        self._metrics = metrics
        self._config = config
        self._replicas = SortedDict()  # type: SortedDict[int, Replica]
        self._messages_to_replicas = dict()  # type: Dict[deque]
        self.register_monitor_handler()

    def add_replica(self, instance_id) -> int:
        is_master = instance_id == 0
        description = "master" if is_master else "backup"
        bls_bft = self._create_bls_bft_replica(is_master)
        replica = self._new_replica(instance_id, is_master, bls_bft)
        self._replicas[instance_id] = replica
        self._messages_to_replicas[instance_id] = deque()
        self._monitor.addInstance(instance_id)

        logger.display("{} added replica {} to instance {} ({})"
                       .format(self._node.name,
                               replica,
                               instance_id,
                               description),
                       extra={"tags": ["node-replica"]})

    def remove_replica(self, inst_id: int):
        if inst_id not in self._replicas:
            return
        replica = self._replicas.pop(inst_id)

        # Aggregate all the currently forwarded requests
        req_keys = set()
        for msg in replica.inBox:
            if isinstance(msg, ReqKey):
                req_keys.add(msg.digest)
        for req_queue in replica.requestQueues.values():
            for req_key in req_queue:
                req_keys.add(req_key)
        for pp in replica.sentPrePrepares.values():
            for req_key in pp.reqIdr:
                req_keys.add(req_key)
        for pp in replica.prePrepares.values():
            for req_key in pp.reqIdr:
                req_keys.add(req_key)

        for req_key in req_keys:
            if req_key in replica.requests:
                replica.requests.free(req_key)

        self._messages_to_replicas.pop(inst_id, None)
        self._monitor.removeInstance(inst_id)
        logger.display("{} removed replica {} from instance {}".
                       format(self._node.name, replica, replica.instId),
                       extra={"tags": ["node-replica"]})

    # TODO unit test
    @property
    def some_replica_is_primary(self) -> bool:
        return any([r.isPrimary for r in self._replicas.values()])

    @property
    def master_replica_is_primary(self):
        if self.num_replicas > 0:
            return self._master_replica.isPrimary

    @property
    def _master_replica(self):
        return self._replicas[MASTER_REPLICA_INDEX]

    def service_inboxes(self, limit: int = None):
        number_of_processed_messages = \
            sum(replica.serviceQueues(limit) for replica in self._replicas.values())
        return number_of_processed_messages

    def pass_message(self, message, instance_id=None):
        if instance_id is not None:
            if instance_id not in self._replicas.keys():
                return
            self._replicas[instance_id].inBox.append(message)
        else:
            for replica in self._replicas.values():
                replica.inBox.append(message)

    def get_output(self, limit: int = None) -> Generator:
        if limit is None:
            per_replica = None
        else:
            per_replica = round(limit / self.num_replicas)
            if per_replica == 0:
                logger.debug("{} forcibly setting replica "
                             "message limit to {}"
                             .format(self._node.name,
                                     per_replica))
                per_replica = 1
        for replica in list(self._replicas.values()):
            num = 0
            while replica.outBox:
                yield replica.outBox.popleft()
                num += 1
                if per_replica and num >= per_replica:
                    break

    def take_ordereds_out_of_turn(self) -> tuple:
        """
        Takes all Ordered messages from outbox out of turn
        """
        for replica in self._replicas.values():
            yield replica.instId, replica._remove_ordered_from_queue()

    def _new_replica(self, instance_id: int, is_master: bool, bls_bft: BlsBft) -> Replica:
        """
        Create a new replica with the specified parameters.
        """
        return self._replica_class(self._node, instance_id, self._config, is_master, bls_bft, self._metrics)

    def _create_bls_bft_replica(self, is_master):
        bls_factory = create_default_bls_bft_factory(self._node)
        bls_bft_replica = bls_factory.create_bls_bft_replica(is_master)
        return bls_bft_replica

    @property
    def num_replicas(self):
        return len(self._replicas)

    @property
    def sum_inbox_len(self):
        return sum(len(replica.inBox) for replica in self._replicas.values())

    @property
    def all_instances_have_primary(self) -> bool:
        return all(replica.primaryName is not None
                   for replica in self._replicas.values())

    @property
    def primary_name_by_inst_id(self) -> dict:
        return {r.instId: r.primaryName.split(":", maxsplit=1)[0] if r.primaryName else None
                for r in self._replicas.values()}

    @property
    def inst_id_by_primary_name(self) -> dict:
        return {r.primaryName.split(":", maxsplit=1)[0]: r.instId
                for r in self._replicas.values() if r.primaryName}

    def register_new_ledger(self, ledger_id):
        for replica in self._replicas.values():
            replica.register_ledger(ledger_id)

    def register_monitor_handler(self):
        # attention: handlers will work over unordered request only once
        self._monitor.unordered_requests_handlers.append(
            self.unordered_request_handler_logging)

    def unordered_request_handler_logging(self, unordereds):
        replica = self._master_replica
        for unordered in unordereds:
            reqId, duration = unordered

            # get ppSeqNo and viewNo
            preprepares = replica.sentPrePrepares if replica.isPrimary else replica.prePrepares
            ppSeqNo = None
            viewNo = None
            for key in preprepares:
                if any([pre_pre_req == reqId for pre_pre_req in preprepares[key].reqIdr]):
                    ppSeqNo = preprepares[key].ppSeqNo
                    viewNo = preprepares[key].viewNo
                    break
            if ppSeqNo is None or viewNo is None:
                logger.warning('Unordered request with reqId: {} was not found in prePrepares. '
                               'Prepares count: {}, Commits count: {}'.format(reqId,
                                                                              len(replica.prepares),
                                                                              len(replica.commits)))
                continue

            # get pre-prepare sender
            prepre_sender = replica.primaryNames[viewNo]

            # get prepares info
            prepares = replica.prepares[(viewNo, ppSeqNo)][0] \
                if (viewNo, ppSeqNo) in replica.prepares else []
            n_prepares = len(prepares)
            str_prepares = 'noone'
            if n_prepares:
                str_prepares = ', '.join(prepares)

            # get commits info
            commits = replica.commits[(viewNo, ppSeqNo)][0] \
                if (viewNo, ppSeqNo) in replica.commits else []
            n_commits = len(commits)
            str_commits = 'noone'
            if n_commits:
                str_commits = ', '.join(commits)

            # get txn content
            content = replica.requests[reqId].finalised.as_dict \
                if reqId in replica.requests else 'no content saved'

            logger.warning('Consensus for digest {} was not achieved within {} seconds. '
                           'Primary node is {}. '
                           'Received Pre-Prepare from {}. '
                           'Received {} valid Prepares from {}. '
                           'Received {} valid Commits from {}. '
                           'Transaction contents: {}. '
                           .format(reqId, duration, replica.primaryName.split(':')[0], prepre_sender,
                                   n_prepares, str_prepares, n_commits, str_commits, content))

    def keys(self):
        return self._replicas.keys()

    def values(self):
        return self._replicas.values()

    def items(self):
        return self._replicas.items()

    def __getitem__(self, item):
        if not isinstance(item, int):
            raise PlenumTypeError('item', item, int)
        return self._replicas[item]

    def __len__(self):
        return self.num_replicas

    def __iter__(self):
        return self._replicas.__iter__()
Esempio n. 3
0
class Replicas:
    _replica_class = Replica

    def __init__(self,
                 node,
                 monitor: Monitor,
                 config=None,
                 metrics: MetricsCollector = NullMetricsCollector()):
        # passing full node because Replica requires it
        self._node = node
        self._monitor = monitor
        self._metrics = metrics
        self._config = config
        self._replicas = SortedDict()  # type: SortedDict[int, Replica]
        self._messages_to_replicas = dict()  # type: Dict[deque]
        self.register_monitor_handler()

    def add_replica(self, instance_id) -> int:
        is_master = instance_id == 0
        description = "master" if is_master else "backup"
        bls_bft = self._create_bls_bft_replica(is_master)
        replica = self._new_replica(instance_id, is_master, bls_bft)
        replica.set_view_no(
            self._node.viewNo if self._node.viewNo is not None else 0)
        self._replicas[instance_id] = replica
        self._messages_to_replicas[instance_id] = deque()
        self._monitor.addInstance(instance_id)

        logger.display("{} added replica {} to instance {} ({})".format(
            self._node.name, replica, instance_id, description),
                       extra={"tags": ["node-replica"]})

        logger.info('reset monitor due to replica addition')
        self._monitor.reset()

    def remove_replica(self, inst_id: int):
        if inst_id not in self._replicas:
            return
        replica = self._replicas.pop(inst_id)
        replica.cleanup()

        self._messages_to_replicas.pop(inst_id, None)
        self._monitor.removeInstance(inst_id)
        logger.display("{} removed replica {} from instance {}".format(
            self._node.name, replica, replica.instId),
                       extra={"tags": ["node-replica"]})

    def send_to_internal_bus(self, msg, inst_id: int = None):
        if inst_id is None:
            for replica in self._replicas.values():
                replica.internal_bus.send(msg)
        else:
            if inst_id in self._replicas:
                self._replicas[inst_id].internal_bus.send(msg)
            else:
                logger.info("Cannot send msg ({}) to the replica {} "
                            "because it does not exist.".format(msg, inst_id))

    def subscribe_to_internal_bus(self,
                                  message_type: Type,
                                  handler: Callable,
                                  inst_id: int = None):
        if inst_id is None:
            for replica in self._replicas.values():
                replica.internal_bus.subscribe(message_type, handler)
        else:
            if inst_id in self._replicas:
                self._replicas[inst_id].internal_bus.subscribe(
                    message_type, handler)
            else:
                logger.info("Cannot subscribe for {} for the replica {} "
                            "because it does not exist.".format(
                                message_type, inst_id))

    # TODO unit test
    @property
    def some_replica_is_primary(self) -> bool:
        return any([r.isPrimary for r in self._replicas.values()])

    @property
    def master_replica_is_primary(self):
        if self.num_replicas > 0:
            return self._master_replica.isPrimary

    @property
    def _master_replica(self):
        return self._replicas[MASTER_REPLICA_INDEX]

    def service_inboxes(self, limit: int = None):
        number_of_processed_messages = \
            sum(replica.serviceQueues(limit) for replica in self._replicas.values())
        return number_of_processed_messages

    def pass_message(self, message, instance_id=None):
        if instance_id is not None:
            if instance_id not in self._replicas.keys():
                return
            self._replicas[instance_id].inBox.append(message)
        else:
            for replica in self._replicas.values():
                replica.inBox.append(message)

    def get_output(self, limit: int = None) -> Generator:
        if limit is None:
            per_replica = None
        else:
            per_replica = round(limit / self.num_replicas)
            if per_replica == 0:
                logger.debug("{} forcibly setting replica "
                             "message limit to {}".format(
                                 self._node.name, per_replica))
                per_replica = 1
        for replica in list(self._replicas.values()):
            num = 0
            while replica.outBox:
                yield replica.outBox.popleft()
                num += 1
                if per_replica and num >= per_replica:
                    break

    def take_ordereds_out_of_turn(self) -> tuple:
        """
        Takes all Ordered messages from outbox out of turn
        """
        for replica in self._replicas.values():
            yield replica.instId, replica._remove_ordered_from_queue()

    def _new_replica(self, instance_id: int, is_master: bool,
                     bls_bft: BlsBft) -> Replica:
        """
        Create a new replica with the specified parameters.
        """
        return self._replica_class(self._node, instance_id, self._config,
                                   is_master, bls_bft, self._metrics)

    def _create_bls_bft_replica(self, is_master):
        bls_factory = create_default_bls_bft_factory(self._node)
        bls_bft_replica = bls_factory.create_bls_bft_replica(is_master)
        return bls_bft_replica

    @property
    def num_replicas(self):
        return len(self._replicas)

    @property
    def sum_inbox_len(self):
        return sum(len(replica.inBox) for replica in self._replicas.values())

    @property
    def all_instances_have_primary(self) -> bool:
        return all(replica.primaryName is not None
                   for replica in self._replicas.values())

    @property
    def primary_name_by_inst_id(self) -> dict:
        return {
            r.instId: replica_name_to_node_name(r.primaryName)
            for r in self._replicas.values()
        }

    @property
    def inst_id_by_primary_name(self) -> dict:
        return {
            replica_name_to_node_name(r.primaryName): r.instId
            for r in self._replicas.values() if r.primaryName
        }

    def register_new_ledger(self, ledger_id):
        for replica in self._replicas.values():
            replica.register_ledger(ledger_id)

    def register_monitor_handler(self):
        # attention: handlers will work over unordered request only once
        self._monitor.unordered_requests_handlers.append(
            self.unordered_request_handler_logging)

    def unordered_request_handler_logging(self, unordereds):
        replica = self._master_replica
        for unordered in unordereds:
            reqId, duration = unordered

            # get ppSeqNo and viewNo
            preprepares = replica._ordering_service.sent_preprepares if replica.isPrimary else replica._ordering_service.prePrepares
            ppSeqNo = None
            viewNo = None
            for key in preprepares:
                if any([
                        pre_pre_req == reqId
                        for pre_pre_req in preprepares[key].reqIdr
                ]):
                    ppSeqNo = preprepares[key].ppSeqNo
                    viewNo = preprepares[key].viewNo
                    break
            if ppSeqNo is None or viewNo is None:
                logger.warning(
                    'Unordered request with reqId: {} was not found in prePrepares. '
                    'Prepares count: {}, Commits count: {}'.format(
                        reqId, len(replica._ordering_service.prepares),
                        len(replica._ordering_service.commits)))
                continue

            # get pre-prepare sender
            prepre_sender = replica.primaryNames.get(viewNo, 'UNKNOWN')

            # get prepares info
            prepares = replica._ordering_service.prepares[(viewNo, ppSeqNo)][0] \
                if (viewNo, ppSeqNo) in replica._ordering_service.prepares else []
            n_prepares = len(prepares)
            str_prepares = 'noone'
            if n_prepares:
                str_prepares = ', '.join(prepares)

            # get commits info
            commits = replica._ordering_service.commits[(viewNo, ppSeqNo)][0] \
                if (viewNo, ppSeqNo) in replica._ordering_service.commits else []
            n_commits = len(commits)
            str_commits = 'noone'
            if n_commits:
                str_commits = ', '.join(commits)

            # get txn content
            content = replica.requests[reqId].request.as_dict \
                if reqId in replica.requests else 'no content saved'

            logger.warning(
                'Consensus for digest {} was not achieved within {} seconds. '
                'Primary node is {}. '
                'Received Pre-Prepare from {}. '
                'Received {} valid Prepares from {}. '
                'Received {} valid Commits from {}. '
                'Transaction contents: {}. '.format(
                    reqId, duration,
                    replica_name_to_node_name(replica.primaryName),
                    prepre_sender, n_prepares, str_prepares, n_commits,
                    str_commits, content))

    def keys(self):
        return self._replicas.keys()

    def values(self):
        return self._replicas.values()

    def items(self):
        return self._replicas.items()

    def __getitem__(self, item):
        if not isinstance(item, int):
            raise PlenumTypeError('item', item, int)
        return self._replicas[item]

    def __len__(self):
        return self.num_replicas

    def __iter__(self):
        return self._replicas.__iter__()
Esempio n. 4
0
class CheckpointService:
    STASHED_CHECKPOINTS_BEFORE_CATCHUP = 1

    def __init__(
            self,
            data: ConsensusSharedData,
            bus: InternalBus,
            network: ExternalBus,
            stasher: StashingRouter,
            db_manager: DatabaseManager,
            metrics: MetricsCollector = NullMetricsCollector(),
    ):
        self._data = data
        self._bus = bus
        self._network = network
        self._checkpoint_state = SortedDict(lambda k: k[1])
        self._stasher = stasher
        self._subscription = Subscription()
        self._validator = CheckpointMsgValidator(self._data)
        self._db_manager = db_manager
        self.metrics = metrics

        # Stashed checkpoints for each view. The key of the outermost
        # dictionary is the view_no, value being a dictionary with key as the
        # range of the checkpoint and its value again being a mapping between
        # senders and their sent checkpoint
        # Dict[view_no, Dict[(seqNoStart, seqNoEnd),  Dict[sender, Checkpoint]]]
        self._stashed_recvd_checkpoints = {}

        self._config = getConfig()
        self._logger = getlogger()

        self._subscription.subscribe(stasher, Checkpoint,
                                     self.process_checkpoint)

        self._subscription.subscribe(bus, Ordered, self.process_ordered)
        self._subscription.subscribe(bus, BackupSetupLastOrdered,
                                     self.process_backup_setup_last_ordered)
        self._subscription.subscribe(bus, NewViewAccepted,
                                     self.process_new_view_accepted)

    def cleanup(self):
        self._subscription.unsubscribe_all()

    @property
    def view_no(self):
        return self._data.view_no

    @property
    def is_master(self):
        return self._data.is_master

    @property
    def last_ordered_3pc(self):
        return self._data.last_ordered_3pc

    @measure_consensus_time(MetricsName.PROCESS_CHECKPOINT_TIME,
                            MetricsName.BACKUP_PROCESS_CHECKPOINT_TIME)
    def process_checkpoint(self, msg: Checkpoint, sender: str) -> (bool, str):
        """
        Process checkpoint messages
        :return: whether processed (True) or stashed (False)
        """
        if msg.instId != self._data.inst_id:
            return None, None
        self._logger.info('{} processing checkpoint {} from {}'.format(
            self, msg, sender))
        result, reason = self._validator.validate(msg)
        if result == PROCESS:
            self._do_process_checkpoint(msg, sender)
        return result, reason

    def _do_process_checkpoint(self, msg: Checkpoint, sender: str) -> bool:
        """
        Process checkpoint messages

        :return: whether processed (True) or stashed (False)
        """
        seqNoEnd = msg.seqNoEnd
        seqNoStart = msg.seqNoStart
        key = (seqNoStart, seqNoEnd)

        if key not in self._checkpoint_state or not self._checkpoint_state[
                key].digest:
            self._stash_checkpoint(msg, sender)
            self._remove_stashed_checkpoints(self.last_ordered_3pc)
            self._start_catchup_if_needed()
            return False

        checkpoint_state = self._checkpoint_state[key]
        # Raise the error only if master since only master's last
        # ordered 3PC is communicated during view change
        if self.is_master and checkpoint_state.digest != msg.digest:
            self._logger.warning("{} received an incorrect digest {} for "
                                 "checkpoint {} from {}".format(
                                     self, msg.digest, key, sender))
            return True

        checkpoint_state.receivedDigests[sender] = msg.digest
        self._check_if_checkpoint_stable(key)
        return True

    def process_backup_setup_last_ordered(self, msg: BackupSetupLastOrdered):
        if msg.inst_id != self._data.inst_id:
            return
        self.update_watermark_from_3pc()

    def process_ordered(self, ordered: Ordered):
        if ordered.instId != self._data.inst_id:
            return
        for batch_id in reversed(self._data.preprepared):
            if batch_id.pp_seq_no == ordered.ppSeqNo:
                self._add_to_checkpoint(batch_id.pp_seq_no, batch_id.pp_digest,
                                        ordered.ledgerId, batch_id.view_no,
                                        ordered.auditTxnRootHash)
                return
        raise LogicError(
            "CheckpointService | Can't process Ordered msg because "
            "ppSeqNo {} not in preprepared".format(ordered.ppSeqNo))

    def _start_catchup_if_needed(self):
        stashed_checkpoint_ends = self._stashed_checkpoints_with_quorum()
        lag_in_checkpoints = len(stashed_checkpoint_ends)
        if self._checkpoint_state:
            (s, e) = firstKey(self._checkpoint_state)
            # If the first stored own checkpoint has a not aligned lower bound
            # (this means that it was started after a catch-up), is complete
            # and there is a quorumed stashed checkpoint from other replicas
            # with the same end then don't include this stashed checkpoint
            # into the lag
            if s % self._config.CHK_FREQ != 0 \
                    and self._checkpoint_state[(s, e)].seqNo == e \
                    and e in stashed_checkpoint_ends:
                lag_in_checkpoints -= 1
        is_stashed_enough = \
            lag_in_checkpoints > self.STASHED_CHECKPOINTS_BEFORE_CATCHUP
        if not is_stashed_enough:
            return

        if self.is_master:
            self._logger.display(
                '{} has lagged for {} checkpoints so updating watermarks to {}'
                .format(self, lag_in_checkpoints, stashed_checkpoint_ends[-1]))
            self.set_watermarks(low_watermark=stashed_checkpoint_ends[-1])
            if not self._data.is_primary:
                self._logger.display(
                    '{} has lagged for {} checkpoints so the catchup procedure starts'
                    .format(self, lag_in_checkpoints))
                self._bus.send(NeedMasterCatchup())
        else:
            self._logger.info(
                '{} has lagged for {} checkpoints so adjust last_ordered_3pc to {}, '
                'shift watermarks and clean collections'.format(
                    self, lag_in_checkpoints, stashed_checkpoint_ends[-1]))
            # Adjust last_ordered_3pc, shift watermarks, clean operational
            # collections and process stashed messages which now fit between
            # watermarks
            key_3pc = (self.view_no, stashed_checkpoint_ends[-1])
            self._bus.send(
                NeedBackupCatchup(inst_id=self._data.inst_id,
                                  caught_up_till_3pc=key_3pc))
            self.caught_up_till_3pc(key_3pc)

    def gc_before_new_view(self):
        self._reset_checkpoints()
        self._remove_stashed_checkpoints(till_3pc_key=(self.view_no, 0))

    def caught_up_till_3pc(self, caught_up_till_3pc):
        self._reset_checkpoints()
        self._remove_stashed_checkpoints(till_3pc_key=caught_up_till_3pc)
        self.update_watermark_from_3pc()

    def catchup_clear_for_backup(self):
        self._reset_checkpoints()
        self._remove_stashed_checkpoints()
        self.set_watermarks(low_watermark=0, high_watermark=sys.maxsize)

    def _add_to_checkpoint(self, ppSeqNo, digest, ledger_id, view_no,
                           audit_txn_root_hash):
        for (s, e) in self._checkpoint_state.keys():
            if s <= ppSeqNo <= e:
                state = self._checkpoint_state[s, e]  # type: CheckpointState
                state.digests.append(digest)
                state = updateNamedTuple(state, seqNo=ppSeqNo)
                self._checkpoint_state[s, e] = state
                break
        else:
            s, e = ppSeqNo, math.ceil(
                ppSeqNo / self._config.CHK_FREQ) * self._config.CHK_FREQ
            self._logger.debug("{} adding new checkpoint state for {}".format(
                self, (s, e)))
            state = CheckpointState(ppSeqNo, [
                digest,
            ], None, {}, False)
            self._checkpoint_state[s, e] = state

        if state.seqNo == e:
            if len(state.digests) == self._config.CHK_FREQ:
                self._do_checkpoint(state, s, e, ledger_id, view_no,
                                    audit_txn_root_hash)
            self._process_stashed_checkpoints((s, e), view_no)

    @measure_consensus_time(MetricsName.SEND_CHECKPOINT_TIME,
                            MetricsName.BACKUP_SEND_CHECKPOINT_TIME)
    def _do_checkpoint(self, state, s, e, ledger_id, view_no,
                       audit_txn_root_hash):
        # TODO CheckpointState/Checkpoint is not a namedtuple anymore
        # 1. check if updateNamedTuple works for the new message type
        # 2. choose another name

        # TODO: This is hack of hacks, should be removed when refactoring is complete
        if not self.is_master and audit_txn_root_hash is None:
            audit_txn_root_hash = "7RJ5bkAKRy2CCvarRij2jiHC16SVPjHcrpVdNsboiQGv"

        state = updateNamedTuple(state, digest=audit_txn_root_hash, digests=[])
        self._checkpoint_state[s, e] = state
        self._logger.info(
            "{} sending Checkpoint {} view {} checkpointState digest {}. Ledger {} "
            "txn root hash {}. Committed state root hash {} Uncommitted state root hash {}"
            .format(
                self, (s, e), view_no, state.digest, ledger_id,
                self._db_manager.get_txn_root_hash(ledger_id),
                self._db_manager.get_state_root_hash(ledger_id,
                                                     committed=True),
                self._db_manager.get_state_root_hash(ledger_id,
                                                     committed=False)))
        checkpoint = Checkpoint(self._data.inst_id, view_no, s, e,
                                state.digest)
        self._network.send(checkpoint)
        self._data.checkpoints.append(checkpoint)

    def _mark_checkpoint_stable(self, seqNo):
        previousCheckpoints = []
        for (s, e), state in self._checkpoint_state.items():
            if e == seqNo:
                # TODO CheckpointState/Checkpoint is not a namedtuple anymore
                # 1. check if updateNamedTuple works for the new message type
                # 2. choose another name
                state = updateNamedTuple(state, isStable=True)
                self._checkpoint_state[s, e] = state
                self._set_stable_checkpoint(e)
                break
            else:
                previousCheckpoints.append((s, e))
        else:
            self._logger.debug("{} could not find {} in checkpoints".format(
                self, seqNo))
            return
        self.set_watermarks(low_watermark=seqNo)
        for k in previousCheckpoints:
            self._logger.trace("{} removing previous checkpoint {}".format(
                self, k))
            self._checkpoint_state.pop(k)
        self._remove_stashed_checkpoints(till_3pc_key=(self.view_no, seqNo))
        self._bus.send(
            CheckpointStabilized(self._data.inst_id, (self.view_no, seqNo)))
        self._logger.info("{} marked stable checkpoint {}".format(
            self, (s, e)))

    def _check_if_checkpoint_stable(self, key: Tuple[int, int]):
        ckState = self._checkpoint_state[key]
        if self._data.quorums.checkpoint.is_reached(
                len(ckState.receivedDigests)):
            self._mark_checkpoint_stable(ckState.seqNo)
            return True
        else:
            self._logger.debug('{} has state.receivedDigests as {}'.format(
                self, ckState.receivedDigests.keys()))
            return False

    def _stash_checkpoint(self, ck: Checkpoint, sender: str):
        self._logger.debug('{} stashing {} from {}'.format(self, ck, sender))
        seqNoStart, seqNoEnd = ck.seqNoStart, ck.seqNoEnd
        if ck.viewNo not in self._stashed_recvd_checkpoints:
            self._stashed_recvd_checkpoints[ck.viewNo] = {}
        stashed_for_view = self._stashed_recvd_checkpoints[ck.viewNo]
        if (seqNoStart, seqNoEnd) not in stashed_for_view:
            stashed_for_view[seqNoStart, seqNoEnd] = {}
        stashed_for_view[seqNoStart, seqNoEnd][sender] = ck

    def _stashed_checkpoints_with_quorum(self):
        end_pp_seq_numbers = []
        quorum = self._data.quorums.checkpoint
        for (_, seq_no_end), senders in self._stashed_recvd_checkpoints.get(
                self.view_no, {}).items():
            if quorum.is_reached(len(senders)):
                end_pp_seq_numbers.append(seq_no_end)
        return sorted(end_pp_seq_numbers)

    def _process_stashed_checkpoints(self, key, view_no):
        # Remove all checkpoints from previous views if any
        self._remove_stashed_checkpoints(till_3pc_key=(self.view_no, 0))

        if key not in self._stashed_recvd_checkpoints.get(view_no, {}):
            self._logger.trace("{} have no stashed checkpoints for {}")
            return

        # Get a snapshot of all the senders of stashed checkpoints for `key`
        senders = list(self._stashed_recvd_checkpoints[view_no][key].keys())
        total_processed = 0
        consumed = 0

        for sender in senders:
            # Check if the checkpoint from `sender` is still in
            # `stashed_recvd_checkpoints` because it might be removed from there
            # in case own checkpoint was stabilized when we were processing
            # stashed checkpoints from previous senders in this loop
            if view_no in self._stashed_recvd_checkpoints \
                    and key in self._stashed_recvd_checkpoints[view_no] \
                    and sender in self._stashed_recvd_checkpoints[view_no][key]:
                if self.process_checkpoint(
                        self._stashed_recvd_checkpoints[view_no][key].pop(
                            sender), sender):
                    consumed += 1
                # Note that if `process_checkpoint` returned False then the
                # checkpoint from `sender` was re-stashed back to
                # `stashed_recvd_checkpoints`
                total_processed += 1

        # If we have consumed stashed checkpoints for `key` from all the
        # senders then remove entries which have become empty
        if view_no in self._stashed_recvd_checkpoints \
                and key in self._stashed_recvd_checkpoints[view_no] \
                and len(self._stashed_recvd_checkpoints[view_no][key]) == 0:
            del self._stashed_recvd_checkpoints[view_no][key]
            if len(self._stashed_recvd_checkpoints[view_no]) == 0:
                del self._stashed_recvd_checkpoints[view_no]

        restashed = total_processed - consumed
        self._logger.info('{} processed {} stashed checkpoints for {}, '
                          '{} of them were stashed again'.format(
                              self, total_processed, key, restashed))

        return total_processed

    def reset_watermarks_before_new_view(self):
        # Reset any previous view watermarks since for view change to
        # successfully complete, the node must have reached the same state
        # as other nodes
        self.set_watermarks(low_watermark=0)

    def should_reset_watermarks_before_new_view(self):
        if self.view_no <= 0:
            return False
        if self.last_ordered_3pc[
                0] == self.view_no and self.last_ordered_3pc[1] > 0:
            return False
        return True

    def set_watermarks(self, low_watermark: int, high_watermark: int = None):
        self._data.low_watermark = low_watermark
        self._data.high_watermark = self._data.low_watermark + self._config.LOG_SIZE \
            if high_watermark is None else \
            high_watermark

        self._logger.info('{} set watermarks as {} {}'.format(
            self, self._data.low_watermark, self._data.high_watermark))
        self._stasher.process_all_stashed(STASH_WATERMARKS)

    def update_watermark_from_3pc(self):
        last_ordered_3pc = self.last_ordered_3pc
        if (last_ordered_3pc is not None) and (last_ordered_3pc[0]
                                               == self.view_no):
            self._logger.info(
                "update_watermark_from_3pc to {}".format(last_ordered_3pc))
            self.set_watermarks(last_ordered_3pc[1])
        else:
            self._logger.info(
                "try to update_watermark_from_3pc but last_ordered_3pc is None"
            )

    def _remove_stashed_checkpoints(self, till_3pc_key=None):
        """
        Remove stashed received checkpoints up to `till_3pc_key` if provided,
        otherwise remove all stashed received checkpoints
        """
        if till_3pc_key is None:
            self._stashed_recvd_checkpoints.clear()
            self._logger.info(
                '{} removing all stashed checkpoints'.format(self))
            return

        for view_no in list(self._stashed_recvd_checkpoints.keys()):

            if view_no < till_3pc_key[0]:
                self._logger.info(
                    '{} removing stashed checkpoints for view {}'.format(
                        self, view_no))
                del self._stashed_recvd_checkpoints[view_no]

            elif view_no == till_3pc_key[0]:
                for (s, e) in list(
                        self._stashed_recvd_checkpoints[view_no].keys()):
                    if e <= till_3pc_key[1]:
                        self._logger.info(
                            '{} removing stashed checkpoints: '
                            'viewNo={}, seqNoStart={}, seqNoEnd={}'.format(
                                self, view_no, s, e))
                        del self._stashed_recvd_checkpoints[view_no][(s, e)]
                if len(self._stashed_recvd_checkpoints[view_no]) == 0:
                    del self._stashed_recvd_checkpoints[view_no]

    def _reset_checkpoints(self):
        # That function most probably redundant in PBFT approach,
        # because according to paper, checkpoints cleared only when next stabilized.
        # Avoid using it while implement other services.
        self._checkpoint_state.clear()
        self._data.checkpoints.clear()
        # TODO: change to = 1 in ViewChangeService integration.
        self._data.stable_checkpoint = 0

    def _set_stable_checkpoint(self, end_seq_no):
        if not list(self._data.checkpoints.irange_key(end_seq_no, end_seq_no)):
            raise LogicError('Stable checkpoint must be in checkpoints')
        self._data.stable_checkpoint = end_seq_no

        self._data.checkpoints = \
            SortedListWithKey([c for c in self._data.checkpoints if c.seqNoEnd >= end_seq_no],
                              key=lambda checkpoint: checkpoint.seqNoEnd)

    def __str__(self) -> str:
        return "{} - checkpoint_service".format(self._data.name)

    # TODO: move to OrderingService as a handler for Cleanup messages
    # def _clear_batch_till_seq_no(self, seq_no):
    #     self._data.preprepared = [pp for pp in self._data.preprepared if pp.ppSeqNo >= seq_no]
    #     self._data.prepared = [p for p in self._data.prepared if p.ppSeqNo >= seq_no]

    def discard(self, msg, reason, sender):
        self._logger.trace("{} discard message {} from {} "
                           "with the reason: {}".format(
                               self, msg, sender, reason))

    def process_new_view_accepted(self, msg: NewViewAccepted):
        # 1. update shared data
        cp = msg.checkpoint
        if cp not in self._data.checkpoints:
            self._data.checkpoints.append(cp)
        self._set_stable_checkpoint(cp.seqNoEnd)
        self.set_watermarks(low_watermark=cp.seqNoEnd)

        # 2. send NewViewCheckpointsApplied
        self._bus.send(
            NewViewCheckpointsApplied(view_no=msg.view_no,
                                      view_changes=msg.view_changes,
                                      checkpoint=msg.checkpoint,
                                      batches=msg.batches))
        return PROCESS, None