Python InstanceChanges 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: plenum.server.models

클래스/타입: InstanceChanges

hotexamples.com에서의 예제들: 10

Python InstanceChanges - 10개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 plenum.server.models.InstanceChanges에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

InstanceChanges(6)

add_vote(4)

has_inst_chng_from(3)

has_quorum(3)

has_view(2)

pop(2)

addVote(1)

hasInstChngFrom(1)

hasQuorum(1)

hasView(1)

keys(1)

예제 #1

파일 보기

파일: view_changer.py 프로젝트: shubhamoodles/indy-plenum

    def __init__(self, node):
        self.node = node

        self._view_no = 0  # type: int

        HasActionQueue.__init__(self)

        self.inBox = deque()
        self.outBox = deque()
        self.inBoxRouter = Router(
            (InstanceChange, self.process_instance_change_msg),
            (ViewChangeDone, self.process_vchd_msg),
            (FutureViewChangeDone, self.process_future_view_vchd_msg))

        self.instanceChanges = InstanceChanges()

        # The quorum of `ViewChangeDone` msgs is different depending on whether we're doing a real view change,
        # or just propagating view_no and Primary from `CurrentState` messages sent to a newly joined Node.
        # TODO: separate real view change and Propagation of Primary
        # TODO: separate catch-up, view-change and primary selection so that
        # they are really independent.
        self.propagate_primary = False

        # Tracks if other nodes are indicating that this node is in lower view
        # than others. Keeps a map of view no to senders
        # TODO: Consider if sufficient ViewChangeDone for 2 different (and
        # higher views) are received, should one view change be interrupted in
        # between.
        self._next_view_indications = {}

        self._view_change_in_progress = False

        self.previous_master_primary = None

        self.set_defaults()

        self.initInsChngThrottling()

        # Action for _schedule instanceChange messages
        self.instance_change_action = None

        # Count of instance change rounds
        self.instance_change_rounds = 0

        # Time for view_change_starting
        self.start_view_change_ts = 0

        # Last successful viewNo.
        # In some cases view_change process can be uncompleted in time.
        # In that case we want to know, which viewNo was successful (last completed view_change)
        self.last_completed_view_no = 0

        # Force periodic view change if enabled in config
        force_view_change_freq = node.config.ForceViewChangeFreq
        if force_view_change_freq > 0:
            self.startRepeating(self.on_master_degradation,
                                force_view_change_freq)

예제 #2

파일 보기

파일: view_changer.py 프로젝트: zhigunenko-dsr/indy-plenum

    def __init__(self, node):
        self.node = node

        self.view_no = 0  # type: int

        HasActionQueue.__init__(self)

        self.inBox = deque()
        self.outBox = deque()
        self.inBoxRouter = Router(
            (InstanceChange, self.process_instance_change_msg),
            (ViewChangeDone, self.process_vchd_msg)
        )

        self.instanceChanges = InstanceChanges()

        # The quorum of `ViewChangeDone` msgs is different depending on whether we're doing a real view change,
        # or just propagating view_no and Primary from `CurrentState` messages sent to a newly joined Node.
        # TODO: separate real view change and Propagation of Primary
        # TODO: separate catch-up, view-change and primary selection so that
        # they are really independent.
        self.propagate_primary = False

        # Tracks if other nodes are indicating that this node is in lower view
        # than others. Keeps a map of view no to senders
        # TODO: Consider if sufficient ViewChangeDone for 2 different (and
        # higher views) are received, should one view change be interrupted in
        # between.
        self._next_view_indications = SortedDict()

        self._view_change_in_progress = False

        self.previous_master_primary = None

        self.set_defaults()

        self.initInsChngThrottling()

        # Action for _schedule instanceChange messages
        self.instance_change_action = None

        # Count of instance change rounds
        self.instance_change_rounds = 0

예제 #3

파일 보기

파일: test_models.py 프로젝트: shlemph/indy-plenum

def test_too_old_messages_dont_count_towards_quorum(instance_changes, tconf):
    frm1 = "Node1"
    frm2 = "Node2"
    view_no = 1
    quorum = 2
    time_provider = MockTimestamp(0)
    instance_changes = InstanceChanges(tconf, time_provider)
    msg = InstanceChange(view_no, Suspicions.PRIMARY_DEGRADED.code)

    instance_changes.add_vote(msg, frm1)
    time_provider.value += (tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL/2)
    instance_changes.add_vote(msg, frm2)

    time_provider.value += (tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL/2) + 1
    assert not instance_changes.has_quorum(view_no, quorum)

    assert instance_changes.has_view(view_no)
    assert instance_changes[view_no].msg == msg
    assert not instance_changes.has_inst_chng_from(view_no, frm1)
    assert instance_changes.has_inst_chng_from(view_no, frm2)

예제 #4

파일 보기

파일: test_models.py 프로젝트: shlemph/indy-plenum

def test_equal_votes_dont_accumulate_when_added(instance_changes, tconf):
    frm = "Node1"
    view_no = 1
    time_provider = MockTimestamp(0)
    second_vote_time = 1
    instance_changes = InstanceChanges(tconf, time_provider)
    msg = InstanceChange(view_no, Suspicions.PRIMARY_DEGRADED.code)

    instance_changes.add_vote(msg, frm)
    time_provider.value = second_vote_time
    instance_changes.add_vote(msg, frm)

    assert instance_changes[view_no].voters[frm] == second_vote_time
    assert len(instance_changes[view_no].voters) == 1
    assert len(instance_changes) == 1

예제 #5

파일 보기

class ViewChanger(HasActionQueue, MessageProcessor):
    def __init__(self, node):
        self.node = node

        self.view_no = 0  # type: int

        HasActionQueue.__init__(self)

        self.inBox = deque()
        self.outBox = deque()
        self.inBoxRouter = Router(
            (InstanceChange, self.process_instance_change_msg),
            (ViewChangeDone, self.process_vchd_msg))

        self.instanceChanges = InstanceChanges()

        # The quorum of `ViewChangeDone` msgs is different depending on whether we're doing a real view change,
        # or just propagating view_no and Primary from `CurrentState` messages sent to a newly joined Node.
        # TODO: separate real view change and Propagation of Primary
        # TODO: separate catch-up, view-change and primary selection so that
        # they are really independent.
        self.propagate_primary = False

        # Tracks if other nodes are indicating that this node is in lower view
        # than others. Keeps a map of view no to senders
        # TODO: Consider if sufficient ViewChangeDone for 2 different (and
        # higher views) are received, should one view change be interrupted in
        # between.
        self._next_view_indications = SortedDict()

        self._view_change_in_progress = False

        self.previous_master_primary = None

        self.set_defaults()

        self.initInsChngThrottling()

    def __repr__(self):
        return "{}".format(self.name)

    # PROPERTIES

    @property
    def name(self):
        return self.node.name

    @property
    def config(self):
        return self.node.config

    @property
    def quorums(self):
        return self.node.quorums

    @property
    def view_change_in_progress(self):
        return self._view_change_in_progress

    @view_change_in_progress.setter
    def view_change_in_progress(self, value):
        self._view_change_in_progress = value

    @property
    def quorum(self) -> int:
        # TODO: re-factor this, separate this two states (selection of a new
        # primary and propagation of existing one)
        if not self.view_change_in_progress:
            return self.node.quorums.propagate_primary.value
        if self.propagate_primary:
            return self.node.quorums.propagate_primary.value
        return self.node.quorums.view_change_done.value

    @property
    def _hasViewChangeQuorum(self):
        # This method should just be present for master instance.
        """
        Checks whether n-f nodes completed view change and whether one
        of them is the next primary
        """
        num_of_ready_nodes = len(self._view_change_done)
        diff = self.quorum - num_of_ready_nodes
        if diff > 0:
            logger.debug('{} needs {} ViewChangeDone messages'.format(
                self, diff))
            return False

        logger.debug("{} got view change quorum ({} >= {})".format(
            self.name, num_of_ready_nodes, self.quorum))
        return True

    @property
    def _is_propagated_view_change_completed(self):
        if not self._propagated_view_change_completed and \
                self.node.poolLedger is not None and \
                self.propagate_primary:

            accepted = self.get_sufficient_same_view_change_done_messages()
            if accepted is not None:
                accepted_pool_ledger_i = \
                    next(filter(lambda x: x[0] == POOL_LEDGER_ID,
                                accepted[1]))
                self_pool_ledger_i = \
                    next(filter(lambda x: x[0] == POOL_LEDGER_ID,
                                self.node.ledger_summary))
                logger.debug("{} Primary selection has been already completed "
                             "on pool ledger info = {}, primary {}, self pool "
                             "ledger info {}".format(self,
                                                     accepted_pool_ledger_i,
                                                     accepted[0],
                                                     self_pool_ledger_i))
                self._propagated_view_change_completed = True

        return self._propagated_view_change_completed

    @property
    def has_view_change_from_primary(self) -> bool:
        if not self._has_view_change_from_primary:
            next_primary_name = self.node.elector.next_primary_node_name(0)

            if next_primary_name not in self._view_change_done:
                logger.debug(
                    "{} has not received ViewChangeDone from the next "
                    "primary {} (view_no: {}, totalNodes: {})".format(
                        self.name, next_primary_name, self.view_no,
                        self.node.totalNodes))
            else:
                logger.debug(
                    '{} received ViewChangeDone from primary {}'.format(
                        self, next_primary_name))
                self._has_view_change_from_primary = True

        return self._has_view_change_from_primary

    @property
    def has_acceptable_view_change_quorum(self):
        if not self._has_acceptable_view_change_quorum:
            self._has_acceptable_view_change_quorum = (
                self._hasViewChangeQuorum
                and (self._is_propagated_view_change_completed
                     or self.has_view_change_from_primary))
        return self._has_acceptable_view_change_quorum

    @property
    def is_behind_for_view(self) -> bool:
        # Checks if the node is currently behind the accepted state for this
        # view, only makes sense to call when the node has an acceptable
        # view change quorum
        _, accepted_ledger_summary = self.get_sufficient_same_view_change_done_messages(
        )
        for (ledgerId, own_ledger_size, _), (_, accepted_ledger_size, _) in \
                zip(self.node.ledger_summary, accepted_ledger_summary):
            if own_ledger_size < accepted_ledger_size:
                logger.debug(
                    "{} ledger {} sizes are differ: own {} accepted {}"
                    "".format(self, ledgerId, own_ledger_size,
                              accepted_ledger_size))
                return True
        return False

    # __ PROPERTIES __

    # EXTERNAL EVENTS

    def on_master_degradation(self):
        """
        """
        view_no = self.view_no + 1
        logger.info(
            "{} sending instance with view_no = {} and trying to start "
            "view change since performance of master instance degraded"
            "".format(self, view_no))
        self.sendInstanceChange(view_no)
        self.do_view_change_if_possible(view_no)

    def on_primary_loss(self):
        view_no = self.view_no + 1
        logger.info("{} sending instance with view_no = {} and trying "
                    "to start view change since primary was lost"
                    "".format(self, view_no))
        self.sendInstanceChange(view_no, Suspicions.PRIMARY_DISCONNECTED)
        self.do_view_change_if_possible(view_no)

    # TODO we have `on_primary_loss`, do we need that one?
    def on_primary_about_to_be_disconnected(self):
        view_no = self.view_no + 1
        logger.info("{} sending instance with view_no = {} "
                    "since primary is about to be disconnected"
                    "".format(self, view_no))
        self.sendInstanceChange(view_no,
                                Suspicions.PRIMARY_ABOUT_TO_BE_DISCONNECTED)

    def on_suspicious_primary(self, suspicion: Suspicions):
        view_no = self.view_no + 1
        logger.info("{} sending instance with view_no = {} since primary "
                    "seems suspicious, reason {}"
                    "".format(self, view_no, suspicion.reason))
        self.sendInstanceChange(view_no, suspicion)
        # TODO why we don't try to start view change here

    def on_view_change_not_completed_in_time(self):
        view_no = self.view_no + 1
        logger.info("{} sending instance with view_no = {} since "
                    "view change to view {} is not completed in time"
                    "".format(self, view_no, self.view_no))
        self.sendInstanceChange(view_no, Suspicions.INSTANCE_CHANGE_TIMEOUT)

    def on_catchup_complete(self):
        if self.node.is_synced and self.node.master_replica.isPrimary is None and \
                not self._is_propagated_view_change_completed:
            self._send_view_change_done_message()

        self._start_selection()

    def on_future_view_vchd_msg(self, view_no, frm):
        assert view_no > self.view_no
        if view_no not in self._next_view_indications:
            self._next_view_indications[view_no] = set()
        self._next_view_indications[view_no].add(frm)
        self._start_view_change_if_possible(view_no)

    # __ EXTERNAL EVENTS __

    def process_instance_change_msg(self, instChg: InstanceChange,
                                    frm: str) -> None:
        """
        Validate and process an instance change request.

        :param instChg: the instance change request
        :param frm: the name of the node that sent this `msg`
        """
        logger.debug("{} received instance change request: {} from {}".format(
            self, instChg, frm))

        # TODO: add sender to blacklist?
        if not isinstance(instChg.viewNo, int):
            self.node.discard(
                instChg, "{}field view_no has incorrect type: {}".format(
                    VIEW_CHANGE_PREFIX, type(instChg.viewNo)))
        elif instChg.viewNo <= self.view_no:
            self.node.discard(
                instChg, "Received instance change request with view no {} "
                "which is not more than its view no {}".format(
                    instChg.viewNo, self.view_no), logger.info)
        else:
            # Record instance changes for views but send instance change
            # only when found master to be degraded. if quorum of view changes
            #  found then change view even if master not degraded
            self._on_verified_instance_change_msg(instChg, frm)

            if self.instanceChanges.hasInstChngFrom(instChg.viewNo, self.name):
                logger.debug(
                    "{} received instance change message {} but has already "
                    "sent an instance change message".format(self, instChg))
            elif not self.node.monitor.isMasterDegraded():
                logger.debug(
                    "{} received instance change message {} but did not "
                    "find the master to be slow".format(self, instChg))
            else:
                logger.info(
                    "{}{} found master degraded after receiving instance change"
                    " message from {}".format(VIEW_CHANGE_PREFIX, self, frm))
                self.sendInstanceChange(instChg.viewNo)

    def process_vchd_msg(self, msg: ViewChangeDone, sender: str) -> bool:
        """
        Processes ViewChangeDone messages. Once n-f messages have been
        received, decides on a primary for specific replica.

        :param msg: ViewChangeDone message
        :param sender: the name of the node from which this message was sent
        """

        logger.debug("{}'s primary selector started processing of "
                     "ViewChangeDone msg from {} : {}".format(
                         self.name, sender, msg))

        view_no = msg.viewNo

        if self.view_no != view_no:
            self.discard(msg,
                         '{} got Primary from {} for view no {} '
                         'whereas current view no is {}'.format(
                             self, sender, view_no, self.view_no),
                         logMethod=logger.debug)
            return False

        new_primary_name = msg.name
        if new_primary_name == self.previous_master_primary:
            self.discard(msg,
                         '{} got Primary from {} for {} who was primary of '
                         'master in previous view too'.format(
                             self, sender, new_primary_name),
                         logMethod=logger.debug)
            return False

        # Since a node can send ViewChangeDone more than one time
        self._on_verified_view_change_done_msg(msg, sender)
        # TODO why do we check that after the message tracking
        if self.node.master_replica.hasPrimary:
            self.discard(
                msg, "it already decided primary which is {}".format(
                    self.node.master_replica.primaryName), logger.debug)
            return False

        self._start_selection()

    def send(self, msg):
        """
        Send a message to the node.

        :param msg: the message to send
        """
        logger.debug("{}'s view_changer sending {}".format(self.name, msg))
        self.outBox.append(msg)

    async def serviceQueues(self, limit=None) -> int:
        """
        Service at most `limit` messages from the inBox.

        :param limit: the maximum number of messages to service
        :return: the number of messages successfully processed
        """

        return await self.inBoxRouter.handleAll(self.inBox, limit)

    def sendInstanceChange(self,
                           view_no: int,
                           suspicion=Suspicions.PRIMARY_DEGRADED):
        """
        Broadcast an instance change request to all the remaining nodes

        :param view_no: the view number when the instance change is requested
        """

        # If not found any sent instance change messages in last
        # `ViewChangeWindowSize` seconds or the last sent instance change
        # message was sent long enough ago then instance change message can be
        # sent otherwise no.
        canSendInsChange, cooldown = self.insChngThrottler.acquire()
        if canSendInsChange:
            logger.info("{}{} sending an instance change with view_no {}"
                        " since {}".format(VIEW_CHANGE_PREFIX, self, view_no,
                                           suspicion.reason))
            logger.info("{}{} metrics for monitor: {}".format(
                MONITORING_PREFIX, self, self.node.monitor.prettymetrics))
            msg = self._create_instance_change_msg(view_no, suspicion.code)
            self.send(msg)
            # record instance change vote for self and try to change the view
            # if quorum is reached
            self._on_verified_instance_change_msg(msg, self.name)
        else:
            logger.debug(
                "{} cannot send instance change sooner then {} seconds".format(
                    self, cooldown))

    # noinspection PyAttributeOutsideInit
    def initInsChngThrottling(self):
        windowSize = self.node.config.ViewChangeWindowSize
        ratchet = Ratchet(a=2, b=0.05, c=1, base=2, peak=windowSize)
        self.insChngThrottler = Throttler(windowSize, ratchet.get)

    def _create_instance_change_msg(self, view_no, suspicion_code):
        return InstanceChange(view_no, suspicion_code)

    def _on_verified_instance_change_msg(self, msg, frm):
        view_no = msg.viewNo

        if not self.instanceChanges.hasInstChngFrom(view_no, frm):
            self.instanceChanges.addVote(msg, frm)
            if view_no > self.view_no:
                self.do_view_change_if_possible(view_no)

    def do_view_change_if_possible(self, view_no):
        # TODO: Need to handle skewed distributions which can arise due to
        # malicious nodes sending messages early on
        can, whyNot = self._canViewChange(view_no)
        if can:
            logger.info("{}{} initiating a view change to {} from {}".format(
                VIEW_CHANGE_PREFIX, self, view_no, self.view_no))
            self.propagate_primary = False
            self.startViewChange(view_no)
        else:
            logger.debug(whyNot)
        return can

    def _start_view_change_if_possible(self, view_no) -> bool:
        ind_count = len(self._next_view_indications[view_no])
        if self.quorums.propagate_primary.is_reached(ind_count):
            logger.info(
                '{}{} starting view change for {} after {} view change '
                'indications from other nodes'.format(VIEW_CHANGE_PREFIX, self,
                                                      view_no, ind_count))
            self.propagate_primary = True
            self.startViewChange(view_no)
            return True
        return False

    def _canViewChange(self, proposedViewNo: int) -> (bool, str):
        """
        Return whether there's quorum for view change for the proposed view
        number and its view is less than or equal to the proposed view
        """
        msg = None
        quorum = self.quorums.view_change.value
        if not self.instanceChanges.hasQuorum(proposedViewNo, quorum):
            msg = '{} has no quorum for view {}'.format(self, proposedViewNo)
        elif not proposedViewNo > self.view_no:
            msg = '{} is in higher view more than {}'.format(
                self, proposedViewNo)

        return not bool(msg), msg

    def startViewChange(self, proposed_view_no: int):
        """
        Trigger the view change process.

        :param proposed_view_no: the new view number after view change.
        """
        # TODO: consider moving this to pool manager
        # TODO: view change is a special case, which can have different
        # implementations - we need to make this logic pluggable

        for view_no in tuple(self._next_view_indications.keys()):
            if view_no > proposed_view_no:
                break
            self._next_view_indications.pop(view_no)

        self.view_no = proposed_view_no
        self.view_change_in_progress = True
        self.previous_master_primary = self.node.master_primary_name
        self.set_defaults()
        self.initInsChngThrottling()

        self.node.on_view_change_start()
        self.node.start_catchup()

    def _on_verified_view_change_done_msg(self, msg, frm):
        new_primary_name = msg.name
        ledger_summary = msg.ledgerInfo

        # TODO what is the case when node sends several different
        # view change done messages
        data = (new_primary_name, ledger_summary)
        self._view_change_done[frm] = data

    def _start_selection(self):

        error = None

        if not self.node.is_synced:
            error = "mode is {}".format(self.node.mode)
        elif not self.has_acceptable_view_change_quorum:
            error = "has no view change quorum or no message from next primary"
        else:
            rv = self.get_sufficient_same_view_change_done_messages()
            if rv is None:
                error = "there are not sufficient same ViewChangeDone messages"
            elif not (self._is_propagated_view_change_completed
                      or self._verify_primary(*rv)):
                error = "failed to verify primary"

        if error is not None:
            logger.debug('{} cannot start primary selection because {}'.format(
                self, error))
            return

        if self.is_behind_for_view:
            logger.debug(
                '{} is synced and has an acceptable view change quorum '
                'but is behind the accepted state'.format(self))
            self.node.start_catchup()
            return

        logger.debug("{} starting selection".format(self))

        nodeReg = None
        # in case of already completed view change
        # use node registry actual for the moment when it happened
        if self._is_propagated_view_change_completed:
            assert self._accepted_view_change_done_message is not None
            ledger_summary = self._accepted_view_change_done_message[1]
            pool_ledger_size = ledger_summary[POOL_LEDGER_ID][1]
            nodeReg = self.node.poolManager.getNodeRegistry(pool_ledger_size)

        self.node.select_primaries(nodeReg)

        if self.view_change_in_progress:
            self.view_change_in_progress = False
            self.node.on_view_change_complete()
            self.instanceChanges.pop(self.view_no - 1, None)
            self.previous_master_primary = None
            self.propagate_primary = False

    def set_defaults(self):
        # Tracks view change done message
        self._view_change_done = {}  # replica name -> data

        # Set when an appropriate view change quorum is found which has
        # sufficient same ViewChangeDone messages
        self._primary_verified = False

        self._has_view_change_from_primary = False

        self._has_acceptable_view_change_quorum = False

        self._accepted_view_change_done_message = None

        # accept any primary if propagating view change done msgs
        # for already completed view change
        self._propagated_view_change_completed = False

    def get_sufficient_same_view_change_done_messages(self) -> Optional[Tuple]:
        # Returns whether has a quorum of ViewChangeDone messages that are same
        # TODO: Does not look like optimal implementation.
        if self._accepted_view_change_done_message is None and \
                self._view_change_done:
            votes = self._view_change_done.values()
            votes = [(nm, tuple(tuple(i) for i in info)) for nm, info in votes]
            (new_primary, ledger_info), vote_count = mostCommonElement(votes)
            if vote_count >= self.quorum:
                logger.debug(
                    '{} found acceptable primary {} and ledger info {}'.format(
                        self, new_primary, ledger_info))
                self._accepted_view_change_done_message = (new_primary,
                                                           ledger_info)
            else:
                logger.debug('{} does not have acceptable primary, only {} '
                             'votes for {}'.format(self, vote_count,
                                                   (new_primary, ledger_info)))

        return self._accepted_view_change_done_message

    def _verify_primary(self, new_primary, ledger_info):
        """
        This method is called when sufficient number of ViewChangeDone
        received and makes steps to switch to the new primary
        """
        expected_primary = self.node.elector.next_primary_node_name(0)
        if new_primary != expected_primary:
            logger.error("{}{} expected next primary to be {}, but majority "
                         "declared {} instead for view {}".format(
                             PRIMARY_SELECTION_PREFIX, self.name,
                             expected_primary, new_primary, self.view_no))
            return False

        self._primary_verified = True
        return True
        # TODO: check if ledger status is expected

    def _send_view_change_done_message(self):
        """
        Sends ViewChangeDone message to other protocol participants
        """
        new_primary_name = self.node.elector.next_primary_node_name(0)
        ledger_summary = self.node.ledger_summary
        message = ViewChangeDone(self.view_no, new_primary_name,
                                 ledger_summary)

        logger.debug("{} is sending ViewChangeDone msg to all : {}"
                     "".format(self, message))

        self.send(message)
        self._on_verified_view_change_done_msg(message, self.name)

    # overridden method of PrimaryDecider
    def get_msgs_for_lagged_nodes(self) -> List[ViewChangeDone]:
        # Should not return a list, only done for compatibility with interface
        """
        Returns the last accepted `ViewChangeDone` message.
        If no view change has happened returns ViewChangeDone
        with view no 0 to a newly joined node
        """
        # TODO: Consider a case where more than one node joins immediately,
        # then one of the node might not have an accepted
        # ViewChangeDone message
        messages = []
        accepted = self._accepted_view_change_done_message
        if accepted:
            messages.append(ViewChangeDone(self.view_no, *accepted))
        elif self.name in self._view_change_done:
            messages.append(
                ViewChangeDone(self.view_no,
                               *self._view_change_done[self.name]))
        else:
            logger.debug(
                '{} has no ViewChangeDone message to send for view {}'.format(
                    self, self.view_no))
        return messages

예제 #6

파일 보기

파일: test_models.py 프로젝트: shlemph/indy-plenum

def instance_changes(tconf):
    return InstanceChanges(tconf)

예제 #7

파일 보기

파일: test_models.py 프로젝트: shlemph/indy-plenum

def test_old_ic_discard(instance_changes, tconf):
    frm = "Node1"
    view_no = 1
    quorum = 2
    time_provider = MockTimestamp(0)
    instance_changes = InstanceChanges(tconf, time_provider)
    msg = InstanceChange(view_no, Suspicions.PRIMARY_DEGRADED.code)

    time_provider.value = 0
    instance_changes.add_vote(msg, frm)
    time_provider.value += tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL + 1
    assert not instance_changes.has_view(view_no)

    instance_changes.add_vote(msg, frm)
    time_provider.value += tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL + 1
    assert not instance_changes.has_inst_chng_from(view_no, frm)

    instance_changes.add_vote(msg, frm)
    time_provider.value += tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL + 1
    assert not instance_changes.has_quorum(view_no, quorum)

예제 #8

파일 보기

class ViewChanger(HasActionQueue, MessageProcessor):

    def __init__(self, node):
        self.node = node

        self._view_no = 0  # type: int

        HasActionQueue.__init__(self)
        self.pre_vc_strategy = None
        if hasattr(self.config, 'PRE_VC_STRATEGY'):
            self.pre_vc_strategy = preVCStrategies.get(self.config.PRE_VC_STRATEGY)(self)

        self.inBox = deque()
        self.outBox = deque()
        self.inBoxRouter = Router(
            (InstanceChange, self.process_instance_change_msg),
            (ViewChangeDone, self.process_vchd_msg),
            (FutureViewChangeDone, self.process_future_view_vchd_msg)
        )

        self.instanceChanges = InstanceChanges(node.config)

        # The quorum of `ViewChangeDone` msgs is different depending on whether we're doing a real view change,
        # or just propagating view_no and Primary from `CurrentState` messages sent to a newly joined Node.
        # TODO: separate real view change and Propagation of Primary
        # TODO: separate catch-up, view-change and primary selection so that
        # they are really independent.
        self.propagate_primary = False

        # Tracks if other nodes are indicating that this node is in lower view
        # than others. Keeps a map of view no to senders
        # TODO: Consider if sufficient ViewChangeDone for 2 different (and
        # higher views) are received, should one view change be interrupted in
        # between.
        self._next_view_indications = {}

        self._view_change_in_progress = False

        self.previous_view_no = None
        self.previous_master_primary = None

        self.set_defaults()

        self.initInsChngThrottling()

        # Action for _schedule instanceChange messages
        self.instance_change_action = None

        # Count of instance change rounds
        self.instance_change_rounds = 0

        # Time for view_change_starting
        self.start_view_change_ts = 0

        # Last successful viewNo.
        # In some cases view_change process can be uncompleted in time.
        # In that case we want to know, which viewNo was successful (last completed view_change)
        self.last_completed_view_no = 0

        # Force periodic view change if enabled in config
        force_view_change_freq = node.config.ForceViewChangeFreq
        if force_view_change_freq > 0:
            self.startRepeating(self.on_master_degradation, force_view_change_freq)

        # Start periodic freshness check
        state_freshness_update_interval = node.config.STATE_FRESHNESS_UPDATE_INTERVAL
        if state_freshness_update_interval > 0:
            self.startRepeating(self.check_freshness, state_freshness_update_interval)

    def __repr__(self):
        return "{}".format(self.name)

    # PROPERTIES

    @property
    def view_no(self):
        return self._view_no

    @view_no.setter
    def view_no(self, value):
        logger.info("{} setting view no to {}".format(self.name, value))
        self._view_no = value

    @property
    def name(self):
        return self.node.name

    @property
    def config(self):
        return self.node.config

    @property
    def quorums(self):
        return self.node.quorums

    @property
    def view_change_in_progress(self):
        return self._view_change_in_progress

    @view_change_in_progress.setter
    def view_change_in_progress(self, value):
        self._view_change_in_progress = value

    @property
    def quorum(self) -> int:
        # TODO: re-factor this, separate this two states (selection of a new
        # primary and propagation of existing one)
        if not self.view_change_in_progress:
            return self.node.quorums.propagate_primary.value
        if self.propagate_primary:
            return self.node.quorums.propagate_primary.value
        return self.node.quorums.view_change_done.value

    @property
    def _hasViewChangeQuorum(self):
        # This method should just be present for master instance.
        """
        Checks whether n-f nodes completed view change and whether one
        of them is the next primary
        """
        num_of_ready_nodes = len(self._view_change_done)
        diff = self.quorum - num_of_ready_nodes
        if diff > 0:
            logger.info('{} needs {} ViewChangeDone messages'.format(self, diff))
            return False

        logger.info("{} got view change quorum ({} >= {})".
                    format(self.name, num_of_ready_nodes, self.quorum))
        return True

    @property
    def _is_propagated_view_change_completed(self):
        if not self._propagated_view_change_completed and \
                self.node.poolLedger is not None and \
                self.propagate_primary:

            accepted = self.get_sufficient_same_view_change_done_messages()
            if accepted is not None:
                accepted_pool_ledger_i = \
                    next(filter(lambda x: x[0] == POOL_LEDGER_ID,
                                accepted[1]))
                self_pool_ledger_i = \
                    next(filter(lambda x: x[0] == POOL_LEDGER_ID,
                                self.node.ledger_summary))
                logger.info("{} Primary selection has been already completed "
                            "on pool ledger info = {}, primary {}, self pool ledger info {}".
                            format(self, accepted_pool_ledger_i, accepted[0], self_pool_ledger_i))
                self._propagated_view_change_completed = True

        return self._propagated_view_change_completed

    @property
    def has_view_change_from_primary(self) -> bool:
        if not self._has_view_change_from_primary:
            next_primary_name = self.node.elector._next_primary_node_name_for_master()

            if next_primary_name not in self._view_change_done:
                logger.info("{} has not received ViewChangeDone from the next "
                            "primary {} (view_no: {}, totalNodes: {})".
                            format(self.name, next_primary_name, self.view_no, self.node.totalNodes))
            else:
                logger.info('{} received ViewChangeDone from primary {}'.format(self, next_primary_name))
                self._has_view_change_from_primary = True

        return self._has_view_change_from_primary

    @property
    def has_acceptable_view_change_quorum(self):
        if not self._has_acceptable_view_change_quorum:
            self._has_acceptable_view_change_quorum = (
                self._hasViewChangeQuorum and
                (self._is_propagated_view_change_completed or
                 self.has_view_change_from_primary)
            )
        return self._has_acceptable_view_change_quorum

    @property
    def is_behind_for_view(self) -> bool:
        # Checks if the node is currently behind the accepted state for this
        # view, only makes sense to call when the node has an acceptable
        # view change quorum
        _, accepted_ledger_summary = self.get_sufficient_same_view_change_done_messages()
        for (ledgerId, own_ledger_size, _), (_, accepted_ledger_size, _) in \
                zip(self.node.ledger_summary, accepted_ledger_summary):
            if own_ledger_size < accepted_ledger_size:
                logger.info("{} ledger {} sizes are differ: own {} accepted {}".
                            format(self, ledgerId, own_ledger_size, accepted_ledger_size))
                return True
        return False

    # __ PROPERTIES __

    # EXTERNAL EVENTS

    def on_master_degradation(self):
        self.propose_view_change()

    def check_freshness(self):
        if self.is_state_fresh_enough():
            logger.debug("{} not sending instance change because found state to be fresh enough".format(self))
            return
        self.propose_view_change(Suspicions.STATE_SIGS_ARE_NOT_UPDATED)

    def send_instance_change_if_needed(self, proposed_view_no, reason):
        can, whyNot = self._canViewChange(proposed_view_no)
        # if scheduled action will be awakened after view change completed,
        # then this action must be stopped also.
        if not can and self.view_no < proposed_view_no and self.is_primary_disconnected():
            # Resend the same instance change message if we are not archive
            # InstanceChange quorum
            logger.info("Resend instance change message to all recipients")
            self.sendInstanceChange(proposed_view_no, reason)
            self._schedule(action=self.instance_change_action,
                           seconds=self.config.INSTANCE_CHANGE_TIMEOUT)
            logger.info("Count of rounds without quorum of "
                        "instance change messages: {}".format(self.instance_change_rounds))
            self.instance_change_rounds += 1
        else:
            # ViewChange procedure was started, therefore stop scheduling
            # resending instanceChange messages
            logger.info("Stop scheduling instance change resending")
            self._cancel(action=self.instance_change_action)
            self.instance_change_action = None
            self.instance_change_rounds = 0

    def on_primary_loss(self):
        view_no = self.propose_view_change(Suspicions.PRIMARY_DISCONNECTED)
        if self.instance_change_action:
            # It's an action, scheduled for previous instanceChange round
            logger.info("Stop previous instance change resending schedule")
            self._cancel(action=self.instance_change_action)
            self.instance_change_rounds = 0
        self.instance_change_action = partial(self.send_instance_change_if_needed,
                                              view_no,
                                              Suspicions.PRIMARY_DISCONNECTED)
        self._schedule(self.instance_change_action,
                       seconds=self.config.INSTANCE_CHANGE_TIMEOUT)

    # TODO we have `on_primary_loss`, do we need that one?
    def on_primary_about_to_be_disconnected(self):
        self.propose_view_change(Suspicions.PRIMARY_ABOUT_TO_BE_DISCONNECTED)

    def on_suspicious_primary(self, suspicion: Suspicions):
        self.propose_view_change(suspicion)

    def on_view_change_not_completed_in_time(self):
        self.propose_view_change(Suspicions.INSTANCE_CHANGE_TIMEOUT)

    def on_catchup_complete(self):
        if self.node.is_synced and self.node.master_replica.isPrimary is None and \
                not self._is_propagated_view_change_completed:
            self._send_view_change_done_message()

        self._start_selection()

    def process_future_view_vchd_msg(self, future_vcd_msg: FutureViewChangeDone, frm):
        # if we already started a view change then do not decide on a new one
        if self.view_change_in_progress:
            return

        from_current_state = future_vcd_msg.from_current_state
        view_no = future_vcd_msg.vcd_msg.viewNo
        # ToDo maybe we should compare with last_completed_view_no instead of viewNo.
        if not ((view_no > self.view_no) or
                (self.view_no == 0 and from_current_state)):
            # it means we already processed this future View Change Done
            return

        if self.view_no == 0 and from_current_state:
            # This is the first Propagate Primary,
            # so we need to make sure that we connected to the real primary for the proposed view
            # see test_view_change_after_back_to_quorum_with_disconnected_primary
            self.node.schedule_initial_propose_view_change()

        if view_no not in self._next_view_indications:
            self._next_view_indications[view_no] = {}
        self._next_view_indications[view_no][frm] = future_vcd_msg.vcd_msg
        self._start_view_change_if_possible(view_no, propagate_primary=from_current_state)

    # __ EXTERNAL EVENTS __

    def process_instance_change_msg(self, instChg: InstanceChange, frm: str) -> None:
        """
        Validate and process an instance change request.

        :param instChg: the instance change request
        :param frm: the name of the node that sent this `msg`
        """
        if frm not in self.node.nodestack.connecteds:
            self.node.discard(
                instChg,
                "received instance change request: {} from {} "
                "which is not in connected list: {}".
                format(instChg, frm, self.node.nodestack.connecteds), logger.info)
            return

        logger.info("{} received instance change request: {} from {}".format(self, instChg, frm))

        # TODO: add sender to blacklist?
        if not isinstance(instChg.viewNo, int):
            self.node.discard(
                instChg, "{}field view_no has incorrect type: {}".
                format(VIEW_CHANGE_PREFIX, type(instChg.viewNo)))
        elif instChg.viewNo <= self.view_no:
            self.node.discard(
                instChg,
                "Received instance change request with view no {} "
                "which is not more than its view no {}".
                format(instChg.viewNo, self.view_no), logger.info)
        else:
            # Record instance changes for views but send instance change
            # only when found master to be degraded. if quorum of view changes
            #  found then change view even if master not degraded
            self._on_verified_instance_change_msg(instChg, frm)

            if self.instanceChanges.has_inst_chng_from(instChg.viewNo, self.name):
                logger.info("{} received instance change message {} but has already "
                            "sent an instance change message".format(self, instChg))
            elif not self.node.monitor.isMasterDegraded():
                logger.info("{} received instance change message {} but did not "
                            "find the master to be slow".format(self, instChg))
            else:
                logger.display("{}{} found master degraded after receiving instance change"
                               " message from {}".format(VIEW_CHANGE_PREFIX, self, frm))
                self.sendInstanceChange(instChg.viewNo)

    def process_vchd_msg(self, msg: ViewChangeDone, sender: str) -> bool:
        """
        Processes ViewChangeDone messages. Once n-f messages have been
        received, decides on a primary for specific replica.

        :param msg: ViewChangeDone message
        :param sender: the name of the node from which this message was sent
        """
        logger.info("{}'s primary selector started processing of ViewChangeDone msg from {} : {}".
                    format(self.name, sender, msg))
        view_no = msg.viewNo
        if self.view_no != view_no:
            self.discard(msg, '{} got Primary from {} for view no {} '
                              'whereas current view no is {}'.
                         format(self, sender, view_no, self.view_no),
                         logMethod=logger.info)
            return False

        new_primary_name = msg.name
        if new_primary_name == self.previous_master_primary:
            self.discard(msg, '{} got Primary from {} for {} who was primary of '
                              'master in previous view too'.
                         format(self, sender, new_primary_name),
                         logMethod=logger.info)
            return False

        # Since a node can send ViewChangeDone more than one time
        self._on_verified_view_change_done_msg(msg, sender)
        # TODO why do we check that after the message tracking
        if self.node.master_replica.hasPrimary:
            self.discard(msg, "it already decided primary which is {}".
                         format(self.node.master_replica.primaryName), logger.info)
            return False

        self._start_selection()

    def send(self, msg):
        """
        Send a message to the node.

        :param msg: the message to send
        """
        logger.debug("{}'s view_changer sending {}".format(self.name, msg))
        self.outBox.append(msg)

    async def serviceQueues(self, limit=None) -> int:
        """
        Service at most `limit` messages from the inBox.

        :param limit: the maximum number of messages to service
        :return: the number of messages successfully processed
        """
        # do not start any view changes until catch-up is finished!
        if not Mode.is_done_syncing(self.node.mode):
            return 0
        return await self.inBoxRouter.handleAll(self.inBox, limit)

    def sendInstanceChange(self, view_no: int,
                           suspicion=Suspicions.PRIMARY_DEGRADED):
        """
        Broadcast an instance change request to all the remaining nodes

        :param view_no: the view number when the instance change is requested
        """

        # If not found any sent instance change messages in last
        # `ViewChangeWindowSize` seconds or the last sent instance change
        # message was sent long enough ago then instance change message can be
        # sent otherwise no.
        canSendInsChange, cooldown = self.insChngThrottler.acquire()
        if canSendInsChange:
            logger.info(
                "{}{} sending an instance change with view_no {}"
                " since {}".format(
                    VIEW_CHANGE_PREFIX,
                    self,
                    view_no,
                    suspicion.reason))
            logger.info("{}{} metrics for monitor: {}"
                        .format(MONITORING_PREFIX, self,
                                self.node.monitor.prettymetrics))
            msg = self._create_instance_change_msg(view_no, suspicion.code)
            self.send(msg)
            # record instance change vote for self and try to change the view
            # if quorum is reached
            self._on_verified_instance_change_msg(msg, self.name)
        else:
            logger.info("{} cannot send instance change sooner then {} seconds".format(self, cooldown))

    # noinspection PyAttributeOutsideInit
    def initInsChngThrottling(self):
        windowSize = self.node.config.ViewChangeWindowSize
        ratchet = Ratchet(a=2, b=0.05, c=1, base=2, peak=windowSize)
        self.insChngThrottler = Throttler(windowSize, ratchet.get)

    def _create_instance_change_msg(self, view_no, suspicion_code):
        return InstanceChange(view_no, suspicion_code)

    def _on_verified_instance_change_msg(self, msg, frm):
        view_no = msg.viewNo

        if not self.instanceChanges.has_inst_chng_from(view_no, frm):
            self.instanceChanges.add_vote(msg, frm)
            if view_no > self.view_no:
                self.do_view_change_if_possible(view_no)

    def do_view_change_if_possible(self, view_no):
        # TODO: Need to handle skewed distributions which can arise due to
        # malicious nodes sending messages early on
        can, whyNot = self._canViewChange(view_no)
        if can:
            logger.display("{}{} initiating a view change to {} from {}".
                           format(VIEW_CHANGE_PREFIX, self, view_no, self.view_no))
            self.propagate_primary = False
            self.startViewChange(view_no)
        else:
            logger.info(whyNot)
        return can

    def _qourum_is_reached(self, count, propagate_primary):
        if propagate_primary:
            return self.quorums.propagate_primary.is_reached(count)
        else:
            return self.quorums.view_change_done.is_reached(count)

    def _start_view_change_if_possible(self, view_no, propagate_primary=False) -> bool:
        ind_count = len(self._next_view_indications[view_no])
        if self._qourum_is_reached(ind_count, propagate_primary):
            logger.display('{}{} starting view change for {} after {} view change '
                           'indications from other nodes'.format(VIEW_CHANGE_PREFIX, self, view_no, ind_count))
            self.propagate_primary = propagate_primary
            self.startViewChange(view_no)
            return True
        return False

    def _canViewChange(self, proposedViewNo: int) -> (bool, str):
        """
        Return whether there's quorum for view change for the proposed view
        number and its view is less than or equal to the proposed view
        """
        msg = None
        quorum = self.quorums.view_change.value
        if not self.instanceChanges.has_quorum(proposedViewNo, quorum):
            msg = '{} has no quorum for view {}'.format(self, proposedViewNo)
        elif not proposedViewNo > self.view_no:
            msg = '{} is in higher view more than {}'.format(
                self, proposedViewNo)

        return not bool(msg), msg

    def startViewChange(self, proposed_view_no: int, continue_vc=False):
        """
        Trigger the view change process.

        :param proposed_view_no: the new view number after view change.
        """
        # TODO: consider moving this to pool manager
        # TODO: view change is a special case, which can have different
        # implementations - we need to make this logic pluggable

        if self.pre_vc_strategy and (not self.propagate_primary) and (not continue_vc):
            self.pre_vc_strategy.prepare_view_change(proposed_view_no)
            return
        elif self.pre_vc_strategy:
            self.pre_vc_strategy.on_strategy_complete()

        self.previous_view_no = self.view_no
        self.view_no = proposed_view_no
        self.view_change_in_progress = True
        self.previous_master_primary = self.node.master_primary_name
        self.set_defaults()
        self._process_vcd_for_future_view()

        self.initInsChngThrottling()

        self.node.on_view_change_start()
        self.node.start_catchup()

    def _process_vcd_for_future_view(self):
        # make sure that all received VCD messages for future view
        # (including the current view) are stored, as they will be needed for a quorum
        # to finish the View Change and start selection.
        # This is especially critical for Propagate Primary mode (on receiving CURRENT_STATE by a new node).
        if self.view_no in self._next_view_indications:
            for frm, vcd in self._next_view_indications[self.view_no].items():
                # we call _on_verified_view_change_done_msg, not process_vchd_msg,
                # since we may be in propagate primary mode where some of validation inside process_vchd_msg
                # is not correct (for example, checking that the new primary differs from the current one)
                self._on_verified_view_change_done_msg(vcd, frm)

        # remove all for previous views
        for view_no in tuple(self._next_view_indications.keys()):
            if view_no <= self.view_no:
                del self._next_view_indications[view_no]

    def _on_verified_view_change_done_msg(self, msg, frm):
        new_primary_name = msg.name
        ledger_summary = msg.ledgerInfo

        # TODO what is the case when node sends several different
        # view change done messages
        data = (new_primary_name, ledger_summary)
        self._view_change_done[frm] = data

    def _start_selection(self):

        error = None

        if not self.node.is_synced:
            error = "mode is {}".format(self.node.mode)
        elif not self.has_acceptable_view_change_quorum:
            error = "has no view change quorum or no message from next primary"
        else:
            rv = self.get_sufficient_same_view_change_done_messages()
            if rv is None:
                error = "there are not sufficient same ViewChangeDone messages"
            elif not (self._is_propagated_view_change_completed or
                      self._verify_primary(*rv)):
                error = "failed to verify primary"

        if error is not None:
            logger.info('{} cannot start primary selection because {}'.format(self, error))
            return

        if self.is_behind_for_view:
            logger.info('{} is synced and has an acceptable view change quorum '
                        'but is behind the accepted state'.format(self))
            self.node.start_catchup()
            return

        logger.info("{} starting selection".format(self))

        nodeReg = None
        # in case of already completed view change
        # use node registry actual for the moment when it happened
        if self._is_propagated_view_change_completed:
            assert self._accepted_view_change_done_message is not None
            ledger_summary = self._accepted_view_change_done_message[1]
            pool_ledger_size = ledger_summary[POOL_LEDGER_ID][1]
            nodeReg = self.node.poolManager.getNodeRegistry(pool_ledger_size)
        if self.view_change_in_progress:
            self.node.backup_instance_faulty_processor.restore_replicas()

        self.node.select_primaries(nodeReg)

        if self.view_change_in_progress:
            self.view_change_in_progress = False
            self.node.on_view_change_complete()
            # when we had INSTANCE_CHANGE message, they added into instanceChanges
            # by msg.view_no. When view change was occured and view_no is changed,
            # then we should delete all INSTANCE_CHANGE messages with current (already changed)
            # view_no (which used in corresponded INSTANCE_CHANGE messages)
            # Therefore we delete all INSTANCE_CHANGE messages from previous and current view number
            for view_number in list(self.instanceChanges.keys()):
                if view_number <= self.view_no:
                    self.instanceChanges.pop(view_number, None)
            self.previous_view_no = None
            self.previous_master_primary = None
            self.propagate_primary = False

    def set_defaults(self):
        # Tracks view change done message
        self._view_change_done = {}  # replica name -> data

        # Set when an appropriate view change quorum is found which has
        # sufficient same ViewChangeDone messages
        self._primary_verified = False

        self._has_view_change_from_primary = False

        self._has_acceptable_view_change_quorum = False

        self._accepted_view_change_done_message = None

        # accept any primary if propagating view change done msgs
        # for already completed view change
        self._propagated_view_change_completed = False

    def get_sufficient_same_view_change_done_messages(self) -> Optional[Tuple]:
        # Returns whether has a quorum of ViewChangeDone messages that are same
        # TODO: Does not look like optimal implementation.
        if self._accepted_view_change_done_message is None and \
                self._view_change_done:
            votes = self._view_change_done.values()
            votes = [(nm, tuple(tuple(i) for i in info)) for nm, info in votes]
            (new_primary, ledger_info), vote_count = mostCommonElement(votes)
            if vote_count >= self.quorum:
                logger.info('{} found acceptable primary {} and ledger info {}'.
                            format(self, new_primary, ledger_info))
                self._accepted_view_change_done_message = (new_primary,
                                                           ledger_info)
            else:
                logger.info('{} does not have acceptable primary, only {} votes for {}'.
                            format(self, vote_count, (new_primary, ledger_info)))

        return self._accepted_view_change_done_message

    def _verify_primary(self, new_primary, ledger_info):
        """
        This method is called when sufficient number of ViewChangeDone
        received and makes steps to switch to the new primary
        """
        expected_primary = self.node.elector._next_primary_node_name_for_master()
        if new_primary != expected_primary:
            logger.error("{}{} expected next primary to be {}, but majority "
                         "declared {} instead for view {}"
                         .format(PRIMARY_SELECTION_PREFIX, self.name,
                                 expected_primary, new_primary, self.view_no))
            return False

        self._primary_verified = True
        return True
        # TODO: check if ledger status is expected

    def _send_view_change_done_message(self):
        """
        Sends ViewChangeDone message to other protocol participants
        """
        new_primary_name = self.node.elector._next_primary_node_name_for_master()
        ledger_summary = self.node.ledger_summary
        message = ViewChangeDone(self.view_no,
                                 new_primary_name,
                                 ledger_summary)

        logger.info("{} is sending ViewChangeDone msg to all : {}".format(self, message))

        self.send(message)
        self._on_verified_view_change_done_msg(message, self.name)

    # overridden method of PrimaryDecider
    def get_msgs_for_lagged_nodes(self) -> List[ViewChangeDone]:
        # Should not return a list, only done for compatibility with interface
        """
        Returns the last accepted `ViewChangeDone` message.
        If no view change has happened returns ViewChangeDone
        with view no 0 to a newly joined node
        """
        # TODO: Consider a case where more than one node joins immediately,
        # then one of the node might not have an accepted
        # ViewChangeDone message
        messages = []
        accepted = self._accepted_view_change_done_message
        if accepted:
            messages.append(ViewChangeDone(self.last_completed_view_no, *accepted))
        elif self.name in self._view_change_done:
            messages.append(ViewChangeDone(self.last_completed_view_no,
                                           *self._view_change_done[self.name]))
        else:
            logger.info('{} has no ViewChangeDone message to send for view {}'.
                        format(self, self.view_no))
        return messages

    def propose_view_change(self, suspicion=Suspicions.PRIMARY_DEGRADED):
        proposed_view_no = self.view_no
        # TODO: For some reason not incrementing view_no in most cases leads to lots of failing/flaky tests
        # if suspicion == Suspicions.INSTANCE_CHANGE_TIMEOUT or not self.view_change_in_progress:
        if suspicion != Suspicions.STATE_SIGS_ARE_NOT_UPDATED or not self.view_change_in_progress:
            proposed_view_no += 1
        self.sendInstanceChange(proposed_view_no, suspicion)
        return proposed_view_no

    def is_primary_disconnected(self):
        return \
            self.node.primaries_disconnection_times[self.node.master_replica.instId] and self.node.master_primary_name and \
            self.node.master_primary_name not in self.node.nodestack.conns

    def is_state_fresh_enough(self):
        replica = self.node.master_replica
        timestamps = replica.get_ledgers_last_update_time().values()
        oldest_timestamp = min(timestamps)
        time_elapsed = replica.get_time_for_3pc_batch() - oldest_timestamp
        threshold = self.config.ACCEPTABLE_FRESHNESS_INTERVALS_COUNT * self.config.STATE_FRESHNESS_UPDATE_INTERVAL
        return time_elapsed < threshold

예제 #9

파일 보기

파일: node.py 프로젝트: jbest2015/plenum

    def __init__(self,
                 name: str,
                 nodeRegistry: Dict[str, HA],
                 clientAuthNr: ClientAuthNr=None,
                 ha: HA=None,
                 cliname: str=None,
                 cliha: HA=None,
                 basedirpath: str=None,
                 primaryDecider: PrimaryDecider = None,
                 opVerifiers: Iterable[Any]=None):
        """
        Create a new node.

        :param nodeRegistry: names and host addresses of all nodes in the pool
        :param clientAuthNr: client authenticator implementation to be used
        :param basedirpath: path to the base directory used by `nstack` and
            `cstack`
        :param primaryDecider: the mechanism to be used to decide the primary
        of a protocol instance
        """
        self.opVerifiers = opVerifiers or []

        self.primaryDecider = primaryDecider
        me = nodeRegistry[name]

        self.allNodeNames = list(nodeRegistry.keys())
        if isinstance(me, NodeDetail):
            sha = me.ha
            scliname = me.cliname
            scliha = me.cliha
            nodeReg = {k: v.ha for k, v in nodeRegistry.items()}
        else:
            sha = me if isinstance(me, HA) else HA(*me)
            scliname = None
            scliha = None
            nodeReg = {k: HA(*v) for k, v in nodeRegistry.items()}
        if not ha:  # pull it from the registry
            ha = sha
        if not cliname:  # default to the name plus the suffix
            cliname = scliname if scliname else name + CLIENT_STACK_SUFFIX
        if not cliha:  # default to same ip, port + 1
            cliha = scliha if scliha else HA(ha[0], ha[1]+1)

        nstack = dict(name=name,
                      ha=ha,
                      main=True,
                      auto=AutoMode.never)

        cstack = dict(name=cliname,
                      ha=cliha,
                      main=True,
                      auto=AutoMode.always)

        if basedirpath:
            nstack['basedirpath'] = basedirpath
            cstack['basedirpath'] = basedirpath

        self.clientAuthNr = clientAuthNr or SimpleAuthNr()

        self.nodeInBox = deque()
        self.clientInBox = deque()
        self.created = time.perf_counter()

        HasActionQueue.__init__(self)
        NodeStacked.__init__(self, nstack, nodeReg)
        ClientStacked.__init__(self, cstack)
        Motor.__init__(self)
        Propagator.__init__(self)

        self.totalNodes = len(nodeRegistry)
        self.f = getMaxFailures(self.totalNodes)
        self.requiredNumberOfInstances = self.f + 1  # per RBFT
        self.minimumNodes = (2 * self.f) + 1  # minimum for a functional pool

        self.txnStore = TransactionStore()

        self.replicas = []  # type: List[replica.Replica]

        self.instanceChanges = InstanceChanges()

        self.viewNo = 0                             # type: int

        self.rank = self.getRank(self.name, nodeRegistry)

        self.elector = None  # type: PrimaryDecider

        self.forwardedRequests = set()  # type: Set[Tuple[(str, int)]]

        self.instances = Instances()

        self.monitor = Monitor(self.name,
                               Delta=.8, Lambda=60, Omega=5,
                               instances=self.instances)

        # Requests that are to be given to the replicas by the node. Each
        # element of the list is a deque for the replica with number equal to
        # its index in the list and each element of the deque is a named tuple
        self.msgsToReplicas = []  # type: List[deque]

        # Requests that are to be given to the elector by the node
        self.msgsToElector = deque()

        nodeRoutes = [(Propagate, self.processPropagate),
                      (InstanceChange, self.processInstanceChange)]

        nodeRoutes.extend((msgTyp, self.sendToElector) for msgTyp in
                          [Nomination, Primary, Reelection])

        nodeRoutes.extend((msgTyp, self.sendToReplica) for msgTyp in
                          [PrePrepare, Prepare, Commit])

        self.nodeMsgRouter = Router(*nodeRoutes)

        self.clientMsgRouter = Router((Request,
                                       self.processRequest))

        self.perfCheckFreq = 10

        self._schedule(self.checkPerformance, self.perfCheckFreq)

        self.clientBlacklister = SimpleBlacklister(
            self.name + CLIENT_BLACKLISTER_SUFFIX)  # type: Blacklister

        self.nodeBlacklister = SimpleBlacklister(
            self.name + NODE_BLACKLISTER_SUFFIX)  # type: Blacklister

        # BE CAREFUL HERE
        # This controls which message types are excluded from signature
        # verification. These are still subject to RAET's signature verification
        # but client signatures will not be checked on these. Expressly
        # prohibited from being in this is ClientRequest and Propagation,
        # which both require client signature verification
        self.authnWhitelist = (Nomination, Primary, Reelection,
                               Batch,
                               PrePrepare, Prepare,
                               Commit, InstanceChange)
        self.addReplicas()

예제 #10

파일 보기

파일: node.py 프로젝트: jbest2015/plenum

class Node(HasActionQueue, NodeStacked, ClientStacked, Motor,
           Propagator, MessageProcessor):

    suspicions = {s.code: s.reason for s in Suspicions.getList()}

    def __init__(self,
                 name: str,
                 nodeRegistry: Dict[str, HA],
                 clientAuthNr: ClientAuthNr=None,
                 ha: HA=None,
                 cliname: str=None,
                 cliha: HA=None,
                 basedirpath: str=None,
                 primaryDecider: PrimaryDecider = None,
                 opVerifiers: Iterable[Any]=None):
        """
        Create a new node.

        :param nodeRegistry: names and host addresses of all nodes in the pool
        :param clientAuthNr: client authenticator implementation to be used
        :param basedirpath: path to the base directory used by `nstack` and
            `cstack`
        :param primaryDecider: the mechanism to be used to decide the primary
        of a protocol instance
        """
        self.opVerifiers = opVerifiers or []

        self.primaryDecider = primaryDecider
        me = nodeRegistry[name]

        self.allNodeNames = list(nodeRegistry.keys())
        if isinstance(me, NodeDetail):
            sha = me.ha
            scliname = me.cliname
            scliha = me.cliha
            nodeReg = {k: v.ha for k, v in nodeRegistry.items()}
        else:
            sha = me if isinstance(me, HA) else HA(*me)
            scliname = None
            scliha = None
            nodeReg = {k: HA(*v) for k, v in nodeRegistry.items()}
        if not ha:  # pull it from the registry
            ha = sha
        if not cliname:  # default to the name plus the suffix
            cliname = scliname if scliname else name + CLIENT_STACK_SUFFIX
        if not cliha:  # default to same ip, port + 1
            cliha = scliha if scliha else HA(ha[0], ha[1]+1)

        nstack = dict(name=name,
                      ha=ha,
                      main=True,
                      auto=AutoMode.never)

        cstack = dict(name=cliname,
                      ha=cliha,
                      main=True,
                      auto=AutoMode.always)

        if basedirpath:
            nstack['basedirpath'] = basedirpath
            cstack['basedirpath'] = basedirpath

        self.clientAuthNr = clientAuthNr or SimpleAuthNr()

        self.nodeInBox = deque()
        self.clientInBox = deque()
        self.created = time.perf_counter()

        HasActionQueue.__init__(self)
        NodeStacked.__init__(self, nstack, nodeReg)
        ClientStacked.__init__(self, cstack)
        Motor.__init__(self)
        Propagator.__init__(self)

        self.totalNodes = len(nodeRegistry)
        self.f = getMaxFailures(self.totalNodes)
        self.requiredNumberOfInstances = self.f + 1  # per RBFT
        self.minimumNodes = (2 * self.f) + 1  # minimum for a functional pool

        self.txnStore = TransactionStore()

        self.replicas = []  # type: List[replica.Replica]

        self.instanceChanges = InstanceChanges()

        self.viewNo = 0                             # type: int

        self.rank = self.getRank(self.name, nodeRegistry)

        self.elector = None  # type: PrimaryDecider

        self.forwardedRequests = set()  # type: Set[Tuple[(str, int)]]

        self.instances = Instances()

        self.monitor = Monitor(self.name,
                               Delta=.8, Lambda=60, Omega=5,
                               instances=self.instances)

        # Requests that are to be given to the replicas by the node. Each
        # element of the list is a deque for the replica with number equal to
        # its index in the list and each element of the deque is a named tuple
        self.msgsToReplicas = []  # type: List[deque]

        # Requests that are to be given to the elector by the node
        self.msgsToElector = deque()

        nodeRoutes = [(Propagate, self.processPropagate),
                      (InstanceChange, self.processInstanceChange)]

        nodeRoutes.extend((msgTyp, self.sendToElector) for msgTyp in
                          [Nomination, Primary, Reelection])

        nodeRoutes.extend((msgTyp, self.sendToReplica) for msgTyp in
                          [PrePrepare, Prepare, Commit])

        self.nodeMsgRouter = Router(*nodeRoutes)

        self.clientMsgRouter = Router((Request,
                                       self.processRequest))

        self.perfCheckFreq = 10

        self._schedule(self.checkPerformance, self.perfCheckFreq)

        self.clientBlacklister = SimpleBlacklister(
            self.name + CLIENT_BLACKLISTER_SUFFIX)  # type: Blacklister

        self.nodeBlacklister = SimpleBlacklister(
            self.name + NODE_BLACKLISTER_SUFFIX)  # type: Blacklister

        # BE CAREFUL HERE
        # This controls which message types are excluded from signature
        # verification. These are still subject to RAET's signature verification
        # but client signatures will not be checked on these. Expressly
        # prohibited from being in this is ClientRequest and Propagation,
        # which both require client signature verification
        self.authnWhitelist = (Nomination, Primary, Reelection,
                               Batch,
                               PrePrepare, Prepare,
                               Commit, InstanceChange)
        self.addReplicas()

    def start(self):
        oldstatus = self.status
        super().start()
        if oldstatus in Status.going():
            logger.info("{} is already {}, so start has no effect".
                        format(self, self.status.name))
        else:
            self.startNodestack()
            self.startClientstack()

            self.elector = self.newPrimaryDecider()

            # if first time running this node
            if not self.nodestack.remotes:
                logger.info("{} first time running; waiting for key sharing..."
                            "".format(self))
            else:
                self.maintainConnections()

    @staticmethod
    def getRank(name: str, allNames: Sequence[str]):
        return sorted(allNames).index(name)

    def newPrimaryDecider(self):
        if self.primaryDecider:
            return self.primaryDecider
        else:
            return primary_elector.PrimaryElector(self)

    @property
    def nodeCount(self) -> int:
        """
        The plus one is for this node, for example, if this node has three
        connections, then there would be four total nodes
        :return: number of connected nodes this one
        """
        return len(self._conns) + 1

    def onStopping(self):
        """
        Actions to be performed on stopping the node.

        - Close the UDP socket of the nodestack
        """
        if self.nodestack:
            self.nodestack.close()
            self.nodestack = None
        if self.clientstack:
            self.clientstack.close()
            self.clientstack = None
        self.reset()
        self.logstats()
        self.conns.clear()

    def reset(self):
        logger.info("{} reseting...".format(self), extra={"cli": False})
        self.nextCheck = 0
        self.aqStash.clear()
        self.actionQueue.clear()
        self.elector = None

    async def prod(self, limit: int=None) -> int:
        """
        This function is executed by the node each time it gets its share of
        CPU time from the event loop.

        :param limit: the number of items to be serviced in this attempt
        :return: total number of messages serviced by this node
        """
        await self.serviceLifecycle()
        c = 0
        if self.status is not Status.stopped:
            c += await self.serviceNodeMsgs(limit)
            c += self.serviceReplicas(limit)
            c += await self.serviceClientMsgs(limit)
            c += self._serviceActions()
            c += await self.serviceElector()
            self.flushOutBoxes()
        return c

    def serviceReplicas(self, limit) -> int:
        """
        Execute `serviceReplicaMsgs`, `serviceReplicaOutBox` and
        `serviceReplicaInBox` with `limit` number of messages. See the
        respective functions for more information.

        :param limit: the maximum number of messages to process
        :return: the sum of messages successfully processed by
        serviceReplicaMsgs, serviceReplicaInBox and serviceReplicaOutBox
        """
        a = self.serviceReplicaMsgs(limit)
        b = self.serviceReplicaOutBox(limit)
        c = self.serviceReplicaInBox(limit)
        return a + b + c

    async def serviceNodeMsgs(self, limit: int) -> int:
        """
        Process `limit` number of messages from the nodeInBox.

        :param limit: the maximum number of messages to process
        :return: the number of messages successfully processed
        """
        n = await self.nodestack.service(limit)
        await self.processNodeInBox()
        return n

    async def serviceClientMsgs(self, limit: int) -> int:
        """
        Process `limit` number of messages from the clientInBox.

        :param limit: the maximum number of messages to process
        :return: the number of messages successfully processed
        """
        c = await self.clientstack.service(limit)
        await self.processClientInBox()
        return c

    async def serviceElector(self) -> int:
        """
        Service the elector's inBox, outBox and action queues.

        :return: the number of messages successfully serviced
        """
        if not self.isReady():
            return 0
        o = self.serviceElectorOutBox()
        i = await self.serviceElectorInbox()
        a = self.elector._serviceActions()
        return o + i + a

    def onConnsChanged(self, newConns: Set[str], staleConns: Set[str]):
        """
        A series of operations to perform once a connection count has changed.

        - Set f to max number of failures this system can handle.
        - Set status to one of started, started_hungry or starting depending on
            the number of protocol instances.
        - Check protocol instances. See `checkProtocolInstaces()`

        """
        if self.isGoing():
            if self.nodeCount >= self.totalNodes:
                self.status = Status.started
                self.stopKeySharing()
            elif self.nodeCount >= self.minimumNodes:
                self.status = Status.started_hungry
            else:
                self.status = Status.starting
        self.elector.nodeCount = self.nodeCount
        if self.isReady():
            self.checkInstances()
            if isinstance(self.elector, PrimaryElector):
                msgs = self.elector.getElectionMsgsForLaggedNodes()
                logger.debug("{} has msgs {} for new nodes {}".format(self,
                                                                      msgs, newConns))
                for n in newConns:
                    self.sendElectionMsgsToLaggedNode(n, msgs)

    def sendElectionMsgsToLaggedNode(self, nodeName: str, msgs: List[Any]):
        rid = self.nodestack.getRemote(nodeName).uid
        for msg in msgs:
            logger.debug("{} sending election message {} to lagged node {}"
                                     .format(self, msg, nodeName))
            self.send(msg, rid)

    def _statusChanged(self, old: Status, new: Status) -> None:
        """
        Perform some actions based on whether this node is ready or not.

        :param old: the previous status
        :param new: the current status
        """
        pass

    def checkInstances(self) -> None:
        """
        Check if this node has the minimum required number of protocol
        instances, i.e. f+1. If not, add a replica. If no election is in
        progress, this node will try to nominate one of its replicas as primary.
        This method is called whenever a connection with a  new node is
        established.
        """
        logger.debug("{} choosing to start election on the basis of count {} "
                     "and nodes {}".format(self, self.nodeCount, self.conns))
        self._schedule(self.decidePrimaries)

    def addReplicas(self):
        while len(self.replicas) < self.requiredNumberOfInstances:
            self.addReplica()

    def decidePrimaries(self):
        """
        Choose the primary replica for each protocol instance in the system
        using a PrimaryDecider.
        """
        self.elector.decidePrimaries()

    def createReplica(self, instId: int, isMaster: bool) -> 'replica.Replica':
        """
        Create a new replica with the specified parameters.
        This is a convenience method used to create replicas from a node
        instead of passing in replicas in the Node's constructor.

        :param protNo: protocol instance number
        :param isMaster: does this replica belong to the master protocol
            instance?
        :return: a new instance of Replica
        """
        return replica.Replica(self, instId, isMaster)

    def addReplica(self):
        """
        Create and add a new replica to this node.
        If this is the first replica on this node, it will belong to the Master
        protocol instance.
        """
        instId = len(self.replicas)
        if len(self.replicas) == 0:
            isMaster = True
            instDesc = "master"
        else:
            isMaster = False
            instDesc = "backup"
        replica = self.createReplica(instId, isMaster)
        self.replicas.append(replica)
        self.msgsToReplicas.append(deque())
        self.monitor.addInstance()
        logger.info("{} added replica {} to instance {} ({})".
                    format(self, replica, instId, instDesc),
                    extra={"cli": True})
        return replica

    def serviceReplicaMsgs(self, limit: int=None) -> int:
        """
        Process `limit` number of replica messages.
        Here processing means appending to replica inbox.

        :param limit: the maximum number of replica messages to process
        :return: the number of replica messages processed
        """
        msgCount = 0
        for idx, replicaMsgs in enumerate(self.msgsToReplicas):
            while replicaMsgs and (not limit or msgCount < limit):
                msgCount += 1
                msg = replicaMsgs.popleft()
                self.replicas[idx].inBox.append(msg)
        return msgCount

    def serviceReplicaOutBox(self, limit: int=None) -> int:
        """
        Process `limit` number of replica messages.
        Here processing means appending to replica inbox.

        :param limit: the maximum number of replica messages to process
        :return: the number of replica messages processed
        """
        msgCount = 0
        for replica in self.replicas:
            while replica.outBox and (not limit or msgCount < limit):
                msgCount += 1
                msg = replica.outBox.popleft()
                if isinstance(msg, (PrePrepare,
                                    Prepare,
                                    Commit)):
                    self.send(msg)
                elif isinstance(msg, Ordered):
                    self.processOrdered(msg)
                elif isinstance(msg, Exception):
                    self.processEscalatedException(msg)
                else:
                    logger.error("Received msg {} and don't know how to "
                                 "handle it".format(msg))
        return msgCount

    def serviceReplicaInBox(self, limit: int=None):
        """
        Process `limit` number of messages in the replica inbox for each replica
        on this node.

        :param limit: the maximum number of replica messages to process
        :return: the number of replica messages processed successfully
        """
        msgCount = 0
        for replica in self.replicas:
            msgCount += replica.serviceQueues(limit)
        return msgCount

    def serviceElectorOutBox(self, limit: int=None) -> int:
        """
        Service at most `limit` number of messages from the elector's outBox.

        :return: the number of messages successfully serviced.
        """
        msgCount = 0
        while self.elector.outBox and (not limit or msgCount < limit):
            msgCount += 1
            msg = self.elector.outBox.popleft()
            if isinstance(msg, (Nomination, Primary, Reelection)):
                self.send(msg)
            elif isinstance(msg, BlacklistMsg):
                nodeName = getattr(msg, f.NODE_NAME.nm)
                code = getattr(msg, f.SUSP_CODE.nm)
                self.reportSuspiciousNode(nodeName, code=code)
            else:
                logger.error("Received msg {} and don't know how to handle it".
                             format(msg))
        return msgCount

    async def serviceElectorInbox(self, limit: int=None) -> int:
        """
        Service at most `limit` number of messages from the elector's outBox.

        :return: the number of messages successfully serviced.
        """
        msgCount = 0
        while self.msgsToElector and (not limit or msgCount < limit):
            msgCount += 1
            msg = self.msgsToElector.popleft()
            self.elector.inBox.append(msg)
        await self.elector.serviceQueues(limit)
        return msgCount

    @property
    def hasPrimary(self) -> bool:
        """
        Does this node have a primary replica?

        :return: whether this node has a primary
        """
        return any(replica.isPrimary for replica in self.replicas)

    @property
    def primaryReplicaNo(self) -> Optional[int]:
        """
        Return the index of the primary or None if there's no primary among the
        replicas on this node.

        :return: index of the primary
        """
        for idx, replica in enumerate(self.replicas):
            if replica.isPrimary:
                return idx
        return None

    def isValidNodeMsg(self, msg) -> bool:
        """
        Return whether the node message is valid.

        :param msg: the node message to validate
        """
        if msg.instId >= len(self.msgsToReplicas):
            # TODO should we raise suspicion here?
            self.discard(msg, "non-existent protocol instance {}"
                         .format(msg.instId))
            return False
        return True

    def sendToReplica(self, msg, frm):
        """
        Send the message to the intended replica.

        :param msg: the message to send
        :param frm: the name of the node which sent this `msg`
        """
        if self.isValidNodeMsg(msg):
            self.msgsToReplicas[msg.instId].append((msg, frm))

    def sendToElector(self, msg, frm):
        """
        Send the message to the intended elector.

        :param msg: the message to send
        :param frm: the name of the node which sent this `msg`
        """
        if self.isValidNodeMsg(msg):
            logger.debug("{} sending message to elector: {}".
                         format(self, (msg, frm)))
            self.msgsToElector.append((msg, frm))

    def handleOneNodeMsg(self, wrappedMsg):
        """
        Validate and process one message from a node.

        :param wrappedMsg: Tuple of message and the name of the node that sent
        the message
        """
        try:
            vmsg = self.validateNodeMsg(wrappedMsg)
            if vmsg:
                self.unpackNodeMsg(*vmsg)
        except SuspiciousNode as ex:
            self.reportSuspiciousNodeEx(ex)
        except Exception as ex:
            msg, frm = wrappedMsg
            self.discard(msg, ex)

    def validateNodeMsg(self, wrappedMsg):
        """
        Validate another node's message sent to this node.

        :param wrappedMsg: Tuple of message and the name of the node that sent
        the message
        :return: Tuple of message from node and name of the node
        """
        msg, frm = wrappedMsg
        if self.isNodeBlacklisted(frm):
            self.discard(msg, "received from blacklisted node {}"
                         .format(frm), logger.info)
            return None

        op = msg.pop(OP_FIELD_NAME, None)
        if not op:
            raise MissingNodeOp
        cls = TaggedTuples.get(op, None)
        if not cls:
            raise InvalidNodeOp(op)
        try:
            cMsg = cls(**msg)
        except Exception as ex:
            raise InvalidNodeMsg from ex
        try:
            self.verifySignature(cMsg)
        # TODO why must we catch and raise? Is there a way to know earlier that
        # the signature exception is suspicious? If so, how suspicious?
        except BaseExc as ex:
            raise SuspiciousNode(frm, ex, cMsg) from ex  # TODO are both needed?
        logger.debug("{} received node message from {}: {}".
                     format(self, frm, cMsg),
                     extra={"cli": False})
        return cMsg, frm

    def unpackNodeMsg(self, msg, frm) -> None:
        """
        If the message is a batch message validate each message in the batch,
        otherwise add the message to the node's inbox.

        :param msg: a node message
        :param frm: the name of the node that sent this `msg`
        """
        if isinstance(msg, Batch):
            for m in msg.messages:
                self.handleOneNodeMsg((m, frm))
        else:
            self.postToNodeInBox(msg, frm)

    def postToNodeInBox(self, msg, frm):
        """
        Append the message to the node inbox

        :param msg: a node message
        :param frm: the name of the node that sent this `msg`
        """
        self.nodeInBox.append((msg, frm))

    async def processNodeInBox(self):
        """
        Process the messages in the node inbox asynchronously.
        """
        while self.nodeInBox:
            m = self.nodeInBox.popleft()
            try:
                await self.nodeMsgRouter.handle(m)
            except SuspiciousNode as ex:
                self.reportSuspiciousNodeEx(ex)
                self.discard(m, ex)

    def handleOneClientMsg(self, wrappedMsg):
        """
        Validate and process a client message

        :param wrappedMsg: a message from a client
        """
        try:
            vmsg = self.validateClientMsg(wrappedMsg)
            if vmsg:
                self.unpackClientMsg(*vmsg)
        except SuspiciousClient as ex:
            msg, frm = wrappedMsg
            exc = ex.__cause__ if ex.__cause__ else ex
            self.reportSuspiciousClient(frm, exc)
            self.discard(wrappedMsg, exc)
        except InvalidClientRequest as ex:
            exc = ex.__cause__ if ex.__cause__ else ex
            reason = "client request invalid: {} {}".\
                format(exc.__class__.__name__, exc)
            self.discard(wrappedMsg, reason, cliOutput=True)

    def validateClientMsg(self, wrappedMsg):
        """
        Validate a message sent by a client.

        :param wrappedMsg: a message from a client
        :return: Tuple of clientMessage and client address
        """
        msg, frm = wrappedMsg
        if self.isClientBlacklisted(frm):
            self.discard(msg, "received from blacklisted client {}"
                         .format(frm), logger.info)
            return None

        if all(attr in msg.keys()
               for attr in [OPERATION, 'clientId', 'reqId']):
            self.checkValidOperation(msg[OPERATION])
            cls = Request
        elif OP_FIELD_NAME in msg:
            op = msg.pop(OP_FIELD_NAME)
            cls = TaggedTuples.get(op, None)
            if not cls:
                raise InvalidClientOp(op)
            if cls is not Batch:
                raise InvalidClientMsgType(cls)
        else:
            raise InvalidClientRequest
        try:
            cMsg = cls(**msg)
        except Exception as ex:
            raise InvalidClientRequest from ex
        try:
            self.verifySignature(cMsg)
        except Exception as ex:
            raise SuspiciousClient from ex
        logger.trace("{} received CLIENT message: {}".
                     format(self.clientstack.name, cMsg))
        return cMsg, frm

    def unpackClientMsg(self, msg, frm):
        """
        If the message is a batch message validate each message in the batch,
        otherwise add the message to the node's clientInBox.

        :param msg: a client message
        :param frm: the clientId of the client that sent this `msg`
        """
        if isinstance(msg, Batch):
            for m in msg.messages:
                self.handleOneClientMsg((m, frm))
        else:
            self.postToClientInBox(msg, frm)

    def postToClientInBox(self, msg, frm):
        """
        Append the message to the node's clientInBox

        :param msg: a client message
        :param frm: the name of the node that sent this `msg`
        """
        self.clientInBox.append((msg, frm))

    async def processClientInBox(self):
        """
        Process the messages in the node's clientInBox asynchronously.
        All messages in the inBox have already been validated, including
        signature check.
        """
        while self.clientInBox:
            m = self.clientInBox.popleft()
            req, frm = m
            logger.debug("{} processing {} request {}".
                         format(self.clientstack.name, frm, req.reqId),
                         extra={"cli": True})
            await self.clientMsgRouter.handle(m)

    async def processRequest(self, request: Request, frm: str):
        """
        Handle a REQUEST from the client.
        If the request has already been executed, the node re-sends the reply to
        the client. Otherwise, the node acknowledges the client request, adds it
        to its list of client requests, and sends a PROPAGATE to the
        remaining nodes.

        :param request: the REQUEST from the client
        :param frm: the clientId of the client that sent this REQUEST
        """
        logger.debug("Node {} received client request: {}".
                     format(self.name, request))

        # If request is already processed(there is a reply for the request in
        # the node's transaction store then return the reply from the
        # transaction store)
        txnId = self.txnStore.isRequestAlreadyProcessed(request)
        if txnId:
            logger.debug("{} returning REPLY from already processed "
                         "REQUEST: {}".format(self, request))
            reply = self.txnStore.transactions[txnId]
            self.transmitToClient(reply, request.clientId)
        else:
            self.transmitToClient(RequestAck(request.reqId), frm)
            # If not already got the propagate request(PROPAGATE) for the
            # corresponding client request(REQUEST)
            self.recordAndPropagate(request)

    # noinspection PyUnusedLocal
    async def processPropagate(self, msg: Propagate, frm):
        """
        Process one propagateRequest sent to this node asynchronously

        - If this propagateRequest hasn't been seen by this node, then broadcast
        it to all nodes after verifying the the signature.
        - Add the client to blacklist if its signature is invalid

        :param msg: the propagateRequest
        :param frm: the name of the node which sent this `msg`
        """
        logger.debug("Node {} received propagated request: {}".
                     format(self.name, msg))
        reqDict = msg.request
        request = Request(**reqDict)

        self.requests.addPropagate(request, frm)

        self.propagate(request)
        self.tryForwarding(request)

    def processOrdered(self, ordered: Ordered, retryNo: int = 0):
        """
        Process and orderedRequest.

        Execute client request with retries if client request hasn't yet reached
        this node but corresponding PROPAGATE, PRE-PREPARE, PREPARE and
        COMMIT request did

        :param ordered: an orderedRequest
        :param retryNo: the retry number used in recursion
        :return: True if successful, None otherwise
        """
        instId, viewNo, clientId, reqId, digest = tuple(ordered)

        self.monitor.requestOrdered(clientId,
                                    reqId,
                                    instId,
                                    byMaster=(instId == self.instances.masterId))

        # Only the request ordered by master protocol instance are executed by
        # the client
        if instId == self.instances.masterId:
            key = (clientId, reqId)
            if key in self.requests:
                req = self.requests[key].request
                self.executeRequest(viewNo, req)
                logger.debug("Node {} executing client request {} {}".
                             format(self.name, clientId, reqId))
            # If the client request hasn't reached the node but corresponding
            # PROPAGATE, PRE-PREPARE, PREPARE and COMMIT request did,
            # then retry 3 times
            elif retryNo < 3:
                retryNo += 1
                p = partial(self.processOrdered, ordered, retryNo)
                self._schedule(p, random.randint(2, 4))

                logger.debug("Node {} retrying executing client request {} {}".
                             format(self.name, clientId, reqId))
            return True
        else:
            logger.trace("{} got ordered request from backup replica".
                         format(self))

    def processEscalatedException(self, ex):
        """
        Process an exception escalated from a Replica
        """
        if isinstance(ex, SuspiciousNode):
            self.reportSuspiciousNodeEx(ex)
        else:
            raise RuntimeError("unhandled replica-escalated exception") from ex

    def processInstanceChange(self, instChg: InstanceChange, frm: str) -> None:
        """
        Validate and process an instance change request.

        :param instChg: the instance change request
        :param frm: the name of the node that sent this `msg`
        """
        logger.debug("Node {} received instance change request: {} from {}".
                     format(self, instChg, frm))
        if instChg.viewNo < self.viewNo:
            self.discard(instChg,
                         "Received instance change request with view no {} "
                         "which is less than its view no {}".
                         format(instChg.viewNo, self.viewNo), logger.debug)
        else:
            if not self.instanceChanges.hasView(instChg.viewNo):
                if self.monitor.isMasterDegraded():
                    self.instanceChanges.addVote(instChg.viewNo, frm)
                    self.sendInstanceChange(instChg.viewNo)
                else:
                    self.discard(instChg,
                                 "received instance change message from {} but "
                                 "did not find the master to be slow".
                                 format(frm), logger.debug)
                    return
            else:
                if self.instanceChanges.hasInstChngFrom(instChg.viewNo, frm):
                    self.reportSuspiciousNode(frm,
                                              code=Suspicions.DUPLICATE_INST_CHNG)
                else:
                    self.instanceChanges.addVote(instChg.viewNo, frm)

                    if self.canViewChange(instChg.viewNo):
                        logger.debug("{} initiating a view change with view "
                                     "no {}".format(self, self.viewNo))
                        self.startViewChange(instChg.viewNo)
                    else:
                        logger.trace("{} cannot initiate a view change".
                                     format(self))

    def checkPerformance(self):
        """
        Check if master instance is slow and send an instance change request.
        :returns True if master performance is OK, otherwise False
        """
        logger.debug("{} checking its performance".format(self))
        self._schedule(self.checkPerformance, self.perfCheckFreq)

        if self.instances.masterId is not None:
            if self.monitor.isMasterDegraded():
                logger.info("{} master has lower performance than backups. "
                            "Sending an instance change with viewNo {}".
                            format(self, self.viewNo))
                logger.info("{} metrics for monitor: {}".
                            format(self, self.monitor.prettymetrics))
                self.sendInstanceChange(self.viewNo)
                return False
            else:
                logger.debug("{}'s master has higher performance than backups".
                             format(self))
        return True

    def executeRequest(self, viewNo: int, req: Request) -> None:
        """
        Execute the REQUEST sent to this Node

        :param viewNo: the view number (See glossary)
        :param req: the client REQUEST
        """
        reply = self.generateReply(viewNo, req)
        self.transmitToClient(reply, req.clientId)
        txnId = reply.result['txnId']
        self.txnStore.addToProcessedRequests(req.clientId, req.reqId,
                                             txnId, reply)
        asyncio.ensure_future(self.txnStore.addReply(
            clientId=req.clientId, reply=reply, txnId=txnId))

    def sendInstanceChange(self, viewNo: int):
        """
        Broadcast an instance change request to all the remaining nodes

        :param viewNo: the view number when the instance change is requested
        """
        self.send(InstanceChange(viewNo))
        self.instanceChanges.addVote(viewNo, self.name)

    @property
    def quorum(self) -> int:
        r"""
        Return the quorum of this RBFT system. Equal to :math:`2f + 1`.
        """
        return (2 * self.f) + 1

    def canViewChange(self, proposedViewNo: int) -> bool:
        """
        Return whether there's quorum for view change for the proposed view
        number and its view is less than or equal to the proposed view
        """
        return self.instanceChanges.hasQuorum(proposedViewNo, self.f) and \
               self.viewNo <= proposedViewNo

    def startViewChange(self, proposedViewNo: int):
        """
        Trigger the view change process.

        :param proposedViewNo: the new view number after view change.
        """
        self.viewNo = proposedViewNo + 1
        logger.debug("{} resetting monitor stats after view change".
                     format(self))
        self.monitor.reset()

        # Now communicate the view change to the elector which will
        # contest primary elections across protocol all instances
        self.elector.viewChanged(self.viewNo)

    def verifySignature(self, msg) -> bool:
        """
        Validate the signature of the request
        Note: Batch is whitelisted because the inner messages are checked

        :param msg: a message requiring signature verification
        :return: None; raises an exception if the signature is not valid
        """
        if isinstance(msg, self.authnWhitelist):
            return  # whitelisted message types rely on RAET for authn
        if isinstance(msg, Propagate):
            typ = 'propagate '
            req = msg.request
        else:
            typ = ''
            req = msg

        if not isinstance(req, Mapping):
            req = msg.__getstate__()

        key = (req['clientId'], req['reqId'])
        if key in self.requests and self.requests[key].forwarded:
            return

        identifier = self.clientAuthNr.authenticate(req)
        logger.debug("{} authenticated {} signature on {}request {}".
                     format(self, identifier, typ, req['reqId']),
                     extra={"cli": True})

    def generateReply(self,
                      viewNo: int,
                      req: Request) -> Reply:
        """
        Return a new clientReply created using the viewNo, request and the
        computed txnId of the request

        :param viewNo: the view number (See glossary)
        :param req: the REQUEST
        :return: a clientReply generated from the request
        """
        logger.debug("{} replying request {}".format(self, req))
        txnId = sha256("{}{}{}".format(viewNo, req.clientId, req.reqId).
                       encode('utf-8')).hexdigest()
        return Reply(viewNo,
                     req.reqId,
                     {"txnId": txnId})

    def startKeySharing(self, timeout=60):
        """
        Start key sharing till the timeout is reached.
        Other nodes will be able to join this node till the timeout is reached.

        :param timeout: the time till which key sharing is active
        """
        if self.isKeySharing:
            logger.info("{} already key sharing".format(self),
                        extra={"cli": "LOW_STATUS"})
        else:
            logger.info("{} starting key sharing".format(self),
                        extra={"cli": "STATUS"})
            self.nodestack.keep.auto = AutoMode.always
            self._schedule(partial(self.stopKeySharing, timedOut=True), timeout)

            # remove any unjoined remotes
            for r in list(self.nodestack.remotes.values()):
                if not r.joined:
                    logger.debug("{} removing unjoined remote {}"
                                 .format(self, r))
                    self.nodestack.removeRemote(r)

            # if just starting, then bootstrap
            force = time.perf_counter() - self.created > 5
            self.maintainConnections(force=force)

    def stopKeySharing(self, timedOut=False):
        """
        Stop key sharing, i.e don't allow any more nodes to join this node.
        """
        if self.isKeySharing:
            if timedOut:
                logger.info("{} key sharing timed out; was not able to "
                            "connect to {}".
                            format(self, ", ".join(self.notConnectedNodes())),
                            extra={"cli": "WARNING"})
            else:
                logger.info("{} completed key sharing".format(self),
                            extra={"cli": "STATUS"})
            self.nodestack.keep.auto = AutoMode.never

    def reportSuspiciousNodeEx(self, ex: SuspiciousNode):
        self.reportSuspiciousNode(ex.node, ex.reason, ex.code, ex.offendingMsg)

    def reportSuspiciousNode(self,
                             nodeName: str,
                             reason=None,
                             code: int=None,
                             offendingMsg=None):
        logger.warning("{} suspicion raised on node {} for {}; suspicion code "
                       "is {}".format(self, nodeName, reason, code))
        # TODO need a more general solution here
        if code == InvalidSignature.code:
            self.blacklistNode(nodeName,
                               reason=InvalidSignature.reason,
                               code=InvalidSignature.code)

        if code in self.suspicions:
            self.blacklistNode(nodeName,
                               reason=self.suspicions[code],
                               code=code)
        if offendingMsg:
            self.discard(offendingMsg, reason, logger.warning)

    def reportSuspiciousClient(self, clientName: str, reason):
        logger.warning("{} suspicion raised on client {} for {}; "
                       "doing nothing for now".
                       format(self, clientName, reason))
        self.blacklistClient(clientName)

    def isClientBlacklisted(self, clientName: str):
        return self.clientBlacklister.isBlacklisted(clientName)

    def blacklistClient(self, clientName: str, reason: str=None):
        msg = "{} blacklisting client {}".format(self, clientName)
        if reason:
            msg += " for reason {}".format(reason)
        logger.debug(msg)
        self.clientBlacklister.blacklist(clientName)

    def isNodeBlacklisted(self, nodeName: str):
        return self.nodeBlacklister.isBlacklisted(nodeName)

    def blacklistNode(self, nodeName: str, reason: str=None, code: int=None):
        msg = "{} blacklisting node {}".format(self, nodeName)
        if reason:
            msg += " for reason {}".format(reason)
        if code:
            msg += " for code {}".format(code)
        logger.debug(msg)
        self.nodeBlacklister.blacklist(nodeName)

    def __enter__(self):
        return self

    # noinspection PyUnusedLocal
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.stop()

    def logstats(self):
        lines = []
        l = lines.append
        l("node {} current stats".format(self))
        l("--------------------------------------------------------")
        l("node inbox size         : {}".format(len(self.nodeInBox)))
        l("client inbox size       : {}".
                    format(len(self.clientInBox)))
        l("age (seconds)           : {}".
                    format(time.perf_counter() - self.created))
        l("next check for reconnect: {}".
                    format(time.perf_counter() - self.nextCheck))
        l("node connections        : {}".format(self._conns))
        l("f                       : {}".format(self.f))
        l("master instance         : {}".format(self.instances.masterId))
        l("replicas                : {}".format(len(self.replicas)))
        l("view no                 : {}".format(self.viewNo))
        l("rank                    : {}".format(self.rank))
        l("msgs to replicas        : {}".
                    format(len(self.msgsToReplicas)))
        l("msgs to elector         : {}".
                    format(len(self.msgsToElector)))
        l("action queue            : {} {}".
                    format(len(self.actionQueue), id(self.actionQueue)))
        l("action queue stash      : {} {}".
                    format(len(self.aqStash), id(self.aqStash)))

        logger.info("\n".join(lines), extra={"cli": False})

    def checkValidOperation(self, msg):
        if self.opVerifiers:
            try:
                for v in self.opVerifiers:
                    v.verify(msg)
            except Exception as ex:
                raise InvalidClientRequest from ex