Beispiel #1
0
    def __init__(self,
                 nodeId,
                 nodeName,
                 dataDir,
                 config,
                 ledger=None,
                 actionLog=None,
                 actionFailedCallback: Callable = None,
                 action_start_callback: Callable = None):

        self.nodeId = nodeId
        self.nodeName = nodeName
        self.config = config
        self.dataDir = dataDir
        self.ledger = ledger
        self.scheduledAction = None
        self._notifier = notifier_plugin_manager.PluginManager()
        self._actionLog = actionLog if actionLog else \
            self._defaultLog(dataDir, config)
        self._actionFailedCallback = \
            actionFailedCallback if actionFailedCallback else lambda: None
        self._action_start_callback = \
            action_start_callback if action_start_callback else lambda: None

        self.retry_timeout = 5
        self.retry_limit = 3

        self.process_action_log_for_first_run()

        HasActionQueue.__init__(self)
Beispiel #2
0
    def __init__(self,
                 nodeId,
                 nodeName,
                 dataDir,
                 config,
                 ledger,
                 upgradeLog: UpgradeLog = None,
                 upgradeFailedCallback: Callable = None,
                 upgrade_start_callback: Callable = None):

        self.nodeId = nodeId
        self.nodeName = nodeName
        self.config = config
        self.dataDir = dataDir
        self.ledger = ledger
        self.scheduledUpgrade = None  # type: Tuple[str, int, str]
        self._notifier = notifier_plugin_manager.PluginManager()
        self._upgradeLog = upgradeLog if upgradeLog else \
            self.__defaultLog(dataDir, config)
        self._upgradeFailedCallback = \
            upgradeFailedCallback if upgradeFailedCallback else lambda: None
        self._upgrade_start_callback = \
            upgrade_start_callback if upgrade_start_callback else lambda: None

        self.retry_timeout = 5
        self.retry_limit = 3

        self.process_upgrade_log_for_first_run()

        HasActionQueue.__init__(self)
Beispiel #3
0
    def __init__(self,
                 owner,
                 ownedByNode: bool = True,
                 postAllLedgersCaughtUp: Optional[Callable] = None,
                 preCatchupClbk: Optional[Callable] = None,
                 ledger_sync_order: Optional[List] = None):
        # If ledger_sync_order is not provided (is None), it is assumed that
        # `postCatchupCompleteClbk` of the LedgerInfo will be used
        self.owner = owner
        self.ownedByNode = ownedByNode
        self.postAllLedgersCaughtUp = postAllLedgersCaughtUp
        self.preCatchupClbk = preCatchupClbk
        self.ledger_sync_order = ledger_sync_order

        self.config = getConfig()
        # Needs to schedule actions. The owner of the manager has the
        # responsibility of calling its `_serviceActions` method periodically
        HasActionQueue.__init__(self)

        # Holds ledgers of different types with
        # their info like callbacks, state, etc
        self.ledgerRegistry = {}  # type: Dict[int, LedgerInfo]

        # Largest 3 phase key received during catchup.
        # This field is needed to discard any stashed 3PC messages or
        # ordered messages since the transactions part of those messages
        # will be applied when they are received through the catchup process
        self.last_caught_up_3PC = (0, 0)
Beispiel #4
0
    def __init__(self,
                 nodeId,
                 nodeName,
                 dataDir,
                 config,
                 ledger,
                 upgradeLog: UpgradeLog = None,
                 upgradeFailedCallback: Callable = None,
                 upgrade_start_callback: Callable = None):

        self.nodeId = nodeId
        self.nodeName = nodeName
        self.config = config
        self.dataDir = dataDir
        self.ledger = ledger
        self.scheduledUpgrade = None  # type: Tuple[str, int, str]
        self._notifier = notifier_plugin_manager.PluginManager()
        self._upgradeLog = upgradeLog if upgradeLog else \
            self.__defaultLog(dataDir, config)
        self._upgradeFailedCallback = \
            upgradeFailedCallback if upgradeFailedCallback else lambda: None
        self._upgrade_start_callback = \
            upgrade_start_callback if upgrade_start_callback else lambda: None

        self.retry_timeout = 5
        self.retry_limit = 3

        self.process_upgrade_log_for_first_run()

        HasActionQueue.__init__(self)
Beispiel #5
0
    def __init__(self, node) -> None:
        self._node = node
        super().__init__([ObserverSyncPolicyEachBatch(self._node)])
        HasActionQueue.__init__(self)

        self._inbox = deque()
        self._inbox_router = Router((ObservedData, self.apply_data), )
Beispiel #6
0
    def __init__(self,
                 nodeId,
                 nodeName,
                 dataDir,
                 config,
                 ledger=None,
                 actionLog=None,
                 actionFailedCallback: Callable = None,
                 action_start_callback: Callable = None):

        self.nodeId = nodeId
        self.nodeName = nodeName
        self.config = config
        self.dataDir = dataDir
        self.ledger = ledger
        self.scheduledAction = None
        self._notifier = notifier_plugin_manager.PluginManager()
        self._actionLog = actionLog if actionLog else \
            self._defaultLog(dataDir, config)
        self._actionFailedCallback = \
            actionFailedCallback if actionFailedCallback else lambda: None
        self._action_start_callback = \
            action_start_callback if action_start_callback else lambda: None

        self.retry_timeout = 5
        self.retry_limit = 3

        self.process_action_log_for_first_run()

        HasActionQueue.__init__(self)
    def __init__(self, node):
        self.node = node

        self._view_no = 0  # type: int

        HasActionQueue.__init__(self)

        self.inBox = deque()
        self.outBox = deque()
        self.inBoxRouter = Router(
            (InstanceChange, self.process_instance_change_msg),
            (ViewChangeDone, self.process_vchd_msg),
            (FutureViewChangeDone, self.process_future_view_vchd_msg))

        self.instanceChanges = InstanceChanges()

        # The quorum of `ViewChangeDone` msgs is different depending on whether we're doing a real view change,
        # or just propagating view_no and Primary from `CurrentState` messages sent to a newly joined Node.
        # TODO: separate real view change and Propagation of Primary
        # TODO: separate catch-up, view-change and primary selection so that
        # they are really independent.
        self.propagate_primary = False

        # Tracks if other nodes are indicating that this node is in lower view
        # than others. Keeps a map of view no to senders
        # TODO: Consider if sufficient ViewChangeDone for 2 different (and
        # higher views) are received, should one view change be interrupted in
        # between.
        self._next_view_indications = {}

        self._view_change_in_progress = False

        self.previous_master_primary = None

        self.set_defaults()

        self.initInsChngThrottling()

        # Action for _schedule instanceChange messages
        self.instance_change_action = None

        # Count of instance change rounds
        self.instance_change_rounds = 0

        # Time for view_change_starting
        self.start_view_change_ts = 0

        # Last successful viewNo.
        # In some cases view_change process can be uncompleted in time.
        # In that case we want to know, which viewNo was successful (last completed view_change)
        self.last_completed_view_no = 0

        # Force periodic view change if enabled in config
        force_view_change_freq = node.config.ForceViewChangeFreq
        if force_view_change_freq > 0:
            self.startRepeating(self.on_master_degradation,
                                force_view_change_freq)
Beispiel #8
0
    def __init__(self, owner, ownedByNode: bool = True):
        self.owner = owner
        self.ownedByNode = ownedByNode
        self.config = getConfig()
        # Needs to schedule actions. The owner of the manager has the
        # responsibility of calling its `_serviceActions` method periodically.
        HasActionQueue.__init__(self)

        # Holds ledgers of different types with their info like the ledger
        # object, various callbacks, state (can be synced, is already synced,
        # etc).
        self.ledgers = {}  # type: Dict[int, Dict[str, Any]]

        # Ledger statuses received while the ledger was not ready to be synced
        # (`canSync` was set to False)
        self.stashedLedgerStatuses = {}  # type: Dict[int, deque]

        # Dict of sets with each set corresponding to a ledger
        # Each set tracks which nodes claim that this node's ledger status is ok
        # , if a quorum of nodes (2f+1) say its up to date then mark the catchup
        #  process as completed
        self.ledgerStatusOk = {}  # type: Dict[int, Set]

        # Consistency proofs received in process of catching up.
        # Each element of the dict is the dictionary of consistency proofs
        # received for the ledger. For each dictionary key is the node name and
        # value is a consistency proof.
        self.recvdConsistencyProofs = {}  # type: Dict[int, Dict[str,
        # ConsistencyProof]]

        self.catchUpTill = {}

        # Catchup replies that need to be applied to the ledger. First element
        # of the list is a list of transactions that need to be applied to the
        # pool transaction ledger and the second element is the list of
        # transactions that need to be applied to the domain transaction ledger
        self.receivedCatchUpReplies = {}  # type: Dict[int, List]

        # Keep track of received replies from different senders
        self.recvdCatchupRepliesFrm = {}
        # type: Dict[int, Dict[str, List[CatchupRep]]]

        # Tracks the beginning of consistency proof timer. Timer starts when the
        #  node gets f+1 consistency proofs. If the node is not able to begin
        # the catchup process even after the timer expires then it requests
        # consistency proofs.
        self.consistencyProofsTimers = {}
        # type: Dict[int, Optional[float]]

        # Tracks the beginning of catchup reply timer. Timer starts after the
        #  node sends catchup requests. If the node is not able to finish the
        # the catchup process even after the timer expires then it requests
        # missing transactions.
        self.catchupReplyTimers = {}
    def __init__(self, node) -> None:
        self._node = node
        super().__init__(
            [ObserverSyncPolicyEachBatch(self._node)]
        )
        HasActionQueue.__init__(self)

        self._inbox = deque()
        self._inbox_router = Router(
            (ObservedData, self.apply_data),
        )
Beispiel #10
0
    def __init__(self) -> None:
        HasActionQueue.__init__(self)
        self._inbox = deque()
        self._outbox = deque()
        self._inbox_router = Router((BatchCommitted, self.process_new_batch), )

        # TODO: support other policies
        self.__sync_policies = {
            ObserverSyncPolicyType.EACH_BATCH:
            ObservableSyncPolicyEachBatch(self)
        }
Beispiel #11
0
    def __init__(self, owner, ownedByNode: bool=True):
        self.owner = owner
        self.ownedByNode = ownedByNode
        self.config = getConfig()
        # Needs to schedule actions. The owner of the manager has the
        # responsibility of calling its `_serviceActions` method periodically.
        HasActionQueue.__init__(self)

        # Holds ledgers of different types with their info like the ledger
        # object, various callbacks, state (can be synced, is already synced,
        # etc).
        self.ledgers = {}   # type: Dict[int, Dict[str, Any]]

        # Ledger statuses received while the ledger was not ready to be synced
        # (`canSync` was set to False)
        self.stashedLedgerStatuses = {}  # type: Dict[int, deque]

        # Dict of sets with each set corresponding to a ledger
        # Each set tracks which nodes claim that this node's ledger status is ok
        # , if a quorum of nodes (2f+1) say its up to date then mark the catchup
        #  process as completed
        self.ledgerStatusOk = {}        # type: Dict[int, Set]

        # Consistency proofs received in process of catching up.
        # Each element of the dict is the dictionary of consistency proofs
        # received for the ledger. For each dictionary key is the node name and
        # value is a consistency proof.
        self.recvdConsistencyProofs = {}  # type: Dict[int, Dict[str,
        # ConsistencyProof]]

        self.catchUpTill = {}

        # Catchup replies that need to be applied to the ledger. First element
        # of the list is a list of transactions that need to be applied to the
        # pool transaction ledger and the second element is the list of
        # transactions that need to be applied to the domain transaction ledger
        self.receivedCatchUpReplies = {}    # type: Dict[int, List]

        self.recvdCatchupRepliesFrm = {}
        # type: Dict[int, Dict[str, List[CatchupRep]]]

        # Tracks the beginning of consistency proof timer. Timer starts when the
        #  node gets f+1 consistency proofs. If the node is not able to begin
        # the catchup process even after the timer expires then it requests
        # consistency proofs.
        self.consistencyProofsTimers = {}
        # type: Dict[int, Optional[float]]

        # Tracks the beginning of catchup reply timer. Timer starts after the
        #  node sends catchup requests. If the node is not able to finish the
        # the catchup process even after the timer expires then it requests
        # missing transactions.
        self.catchupReplyTimers = {}
Beispiel #12
0
    def __init__(self, node):
        HasActionQueue.__init__(self)

        self.name = node.name
        self.f = node.f
        self.replicas = node.replicas
        self.viewNo = node.viewNo
        self.rank = node.rank
        self.nodeNames = node.allNodeNames
        self.nodeCount = 0
        self.inBox = deque()
        self.outBox = deque()
Beispiel #13
0
    def __init__(self) -> None:
        HasActionQueue.__init__(self)
        self._inbox = deque()
        self._outbox = deque()
        self._inbox_router = Router(
            (BatchCommitted, self.process_new_batch),
        )

        # TODO: support other policies
        self.__sync_policies = {
            ObserverSyncPolicyType.EACH_BATCH: ObservableSyncPolicyEachBatch(self)
        }
Beispiel #14
0
    def __init__(self, node):
        HasActionQueue.__init__(self)
        self.node = node

        self.name = node.name
        self.replicas = node.replicas
        self.nodeCount = 0
        self.inBox = deque()
        self.outBox = deque()
        self.inBoxRouter = Router(*self.routes)

        # Need to keep track of who was primary for the master protocol
        # instance for previous view, this variable only matters between
        # elections, the elector will set it before doing triggering new
        # election and will reset it after primary is decided for master
        # instance
        self.previous_master_primary = None
    def __init__(self, node):
        HasActionQueue.__init__(self)
        self.node = node

        self.name = node.name
        self.replicas = node.replicas
        self.nodeCount = 0
        self.inBox = deque()
        self.outBox = deque()
        self.inBoxRouter = Router(*self.routes)

        # Need to keep track of who was primary for the master protocol
        # instance for previous view, this variable only matters between
        # elections, the elector will set it before doing triggering new
        # election and will reset it after primary is decided for master
        # instance
        self.previous_master_primary = None
    def __init__(self, node):
        self.node = node

        self.view_no = 0  # type: int

        HasActionQueue.__init__(self)

        self.inBox = deque()
        self.outBox = deque()
        self.inBoxRouter = Router(
            (InstanceChange, self.process_instance_change_msg),
            (ViewChangeDone, self.process_vchd_msg)
        )

        self.instanceChanges = InstanceChanges()

        # The quorum of `ViewChangeDone` msgs is different depending on whether we're doing a real view change,
        # or just propagating view_no and Primary from `CurrentState` messages sent to a newly joined Node.
        # TODO: separate real view change and Propagation of Primary
        # TODO: separate catch-up, view-change and primary selection so that
        # they are really independent.
        self.propagate_primary = False

        # Tracks if other nodes are indicating that this node is in lower view
        # than others. Keeps a map of view no to senders
        # TODO: Consider if sufficient ViewChangeDone for 2 different (and
        # higher views) are received, should one view change be interrupted in
        # between.
        self._next_view_indications = SortedDict()

        self._view_change_in_progress = False

        self.previous_master_primary = None

        self.set_defaults()

        self.initInsChngThrottling()

        # Action for _schedule instanceChange messages
        self.instance_change_action = None

        # Count of instance change rounds
        self.instance_change_rounds = 0
    def __init__(self,
                 nodeId,
                 nodeName,
                 dataDir,
                 config,
                 ledger,
                 upgradeLog: UpgradeLog = None,
                 upgradeFailedCallback = None):

        self.nodeId = nodeId
        self.nodeName = nodeName
        self.config = config
        self.dataDir = dataDir
        self.ledger = ledger
        self.scheduledUpgrade = None  # type: Tuple[str, int]
        self._notifier = notifier_plugin_manager.PluginManager()
        self._upgradeLog = upgradeLog if upgradeLog else \
            self.__defaultLog(dataDir, config)
        self._upgradeFailedCallback = \
            upgradeFailedCallback if upgradeFailedCallback else lambda: None

        self.__isItFirstRunAfterUpgrade = None

        if self.isItFirstRunAfterUpgrade:
            (when, version) = self.lastExecutedUpgradeInfo
            if self.didLastExecutedUpgradeSucceeded:
                self._upgradeLog.appendSucceeded(when, version)
                logger.debug("Node '{}' successfully upgraded to version {}"
                             .format(nodeName, version))
                self._notifier.sendMessageUponNodeUpgradeComplete(
                    "Upgrade of node '{}' to version {} scheduled on {} "
                    "completed successfully"
                    .format(nodeName, version, when))
            else:
                self._upgradeLog.appendFailed(when, version)
                logger.error("Failed to upgrade node '{}' to version {}"
                             .format(nodeName, version))
                self._notifier.sendMessageUponNodeUpgradeFail(
                    "Upgrade of node '{}' to version {} "
                    "scheduled on {} failed"
                    .format(nodeName, version, when))
        HasActionQueue.__init__(self)
Beispiel #18
0
    def __init__(self,
                 nodeId,
                 nodeName,
                 dataDir,
                 config,
                 ledger,
                 upgradeLog: UpgradeLog = None,
                 upgradeFailedCallback = None):

        self.nodeId = nodeId
        self.nodeName = nodeName
        self.config = config
        self.dataDir = dataDir
        self.ledger = ledger
        self.scheduledUpgrade = None  # type: Tuple[str, int]
        self._notifier = notifier_plugin_manager.PluginManager()
        self._upgradeLog = upgradeLog if upgradeLog else \
            self.__defaultLog(dataDir, config)
        self._upgradeFailedCallback = \
            upgradeFailedCallback if upgradeFailedCallback else lambda: None

        self.__isItFirstRunAfterUpgrade = None

        if self.isItFirstRunAfterUpgrade:
            (when, version) = self.lastExecutedUpgradeInfo
            if self.didLastExecutedUpgradeSucceeded:
                self._upgradeLog.appendSucceeded(when, version)
                logger.debug("Node '{}' successfully upgraded to version {}"
                             .format(nodeName, version))
                self._notifier.sendMessageUponNodeUpgradeComplete(
                    "Upgrade of node '{}' to version {} scheduled on {} "
                    "completed successfully"
                    .format(nodeName, version, when))
            else:
                self._upgradeLog.appendFailed(when, version)
                logger.error("Failed to upgrade node '{}' to version {}"
                             .format(nodeName, version))
                self._notifier.sendMessageUponNodeUpgradeFail(
                    "Upgrade of node '{}' to version {} "
                    "scheduled on {} failed"
                    .format(nodeName, version, when))
        HasActionQueue.__init__(self)
Beispiel #19
0
    def __init__(self, nodeId, config, baseDir, ledger):
        self.nodeId = nodeId
        self.config = config
        self.baseDir = baseDir
        self.ledger = ledger

        # TODO: Rename to `upgradedVersion`
        self.hasCodeBeenUpgraded = self._hasCodeBeenUpgraded()
        self.storeCurrentVersion()

        # TODO: Rename to `failedToUpgrade`
        self.didLastUpgradeFail = self._didLastUpgradeFail()

        if self.didLastUpgradeFail:
            # TODO: Call `lastUpgradeFailed` to tell the agent and then agent
            # should remove file
            pass
        else:
            self.removeNextVersionFile()
        self.scheduledUpgrade = None  # type: Tuple[str, int]
        HasActionQueue.__init__(self)
Beispiel #20
0
    def __init__(self,
                 name: str,
                 nodeReg: Dict[str, HA] = None,
                 ha: Union[HA, Tuple[str, int]] = None,
                 basedirpath: str = None,
                 genesis_dir: str = None,
                 ledger_dir: str = None,
                 keys_dir: str = None,
                 plugins_dir: str = None,
                 config=None,
                 sighex: str = None):
        """
        Creates a new client.

        :param name: unique identifier for the client
        :param nodeReg: names and host addresses of all nodes in the pool
        :param ha: tuple of host and port
        """
        self.config = config or getConfig()

        dataDir = self.config.clientDataDir or "data/clients"
        self.basedirpath = basedirpath or self.config.CLI_BASE_DIR
        self.basedirpath = os.path.expanduser(self.basedirpath)

        signer = Signer(sighex)
        sighex = signer.keyraw
        verkey = rawToFriendly(signer.verraw)

        self.stackName = verkey
        # TODO: Have a way for a client to have a user friendly name. Does it
        # matter now, it used to matter in some CLI exampples in the past.
        # self.name = name
        self.name = self.stackName or 'Client~' + str(id(self))

        self.genesis_dir = genesis_dir or self.basedirpath
        self.ledger_dir = ledger_dir or os.path.join(self.basedirpath, dataDir, self.name)
        self.plugins_dir = plugins_dir or self.basedirpath
        _keys_dir = keys_dir or self.basedirpath
        self.keys_dir = os.path.join(_keys_dir, "keys")

        cha = None
        if self.exists(self.stackName, self.keys_dir):
            cha = self.nodeStackClass.getHaFromLocal(
                self.stackName, self.keys_dir)
            if cha:
                cha = HA(*cha)
                logger.debug("Client {} ignoring given ha {} and using {}".
                             format(self.name, ha, cha))
        if not cha:
            cha = ha if isinstance(ha, HA) else HA(*ha)

        self.reqRepStore = self.getReqRepStore()
        self.txnLog = self.getTxnLogStore()

        HasFileStorage.__init__(self, self.ledger_dir)

        # TODO: Find a proper name
        self.alias = name

        if not nodeReg:
            self.mode = None
            HasPoolManager.__init__(self)
            self.ledgerManager = LedgerManager(self, ownedByNode=False)
            self.ledgerManager.addLedger(
                POOL_LEDGER_ID,
                self.ledger,
                preCatchupStartClbk=self.prePoolLedgerCatchup,
                postCatchupCompleteClbk=self.postPoolLedgerCaughtUp,
                postTxnAddedToLedgerClbk=self.postTxnFromCatchupAddedToLedger)
        else:
            cliNodeReg = OrderedDict()
            for nm, (ip, port) in nodeReg.items():
                cliNodeReg[nm] = HA(ip, port)
            self.nodeReg = cliNodeReg
            self.mode = Mode.discovered

        HasActionQueue.__init__(self)

        self.setPoolParams()

        stackargs = dict(name=self.stackName,
                         ha=cha,
                         main=False,  # stops incoming vacuous joins
                         auth_mode=AuthMode.ALLOW_ANY.value)
        stackargs['basedirpath'] = self.keys_dir
        self.created = time.perf_counter()

        # noinspection PyCallingNonCallable
        # TODO I think this is a bug here, sighex is getting passed in the seed
        # parameter
        self.nodestack = self.nodeStackClass(stackargs,
                                             self.handleOneNodeMsg,
                                             self.nodeReg,
                                             sighex)
        self.nodestack.onConnsChanged = self.onConnsChanged

        if self.nodeReg:
            logger.info(
                "Client {} initialized with the following node registry:".format(
                    self.alias))
            lengths = [max(x) for x in zip(*[
                (len(name), len(host), len(str(port)))
                for name, (host, port) in self.nodeReg.items()])]
            fmt = "    {{:<{}}} listens at {{:<{}}} on port {{:>{}}}".format(
                *lengths)
            for name, (host, port) in self.nodeReg.items():
                logger.info(fmt.format(name, host, port))
        else:
            logger.info(
                "Client {} found an empty node registry:".format(self.alias))

        Motor.__init__(self)

        self.inBox = deque()

        self.nodestack.connectNicelyUntil = 0  # don't need to connect
        # nicely as a client

        # TODO: Need to have couple of tests around `reqsPendingConnection`
        # where we check with and without pool ledger

        # Stores the requests that need to be sent to the nodes when the client
        # has made sufficient connections to the nodes.
        self.reqsPendingConnection = deque()

        # Tuple of identifier and reqId as key and value as tuple of set of
        # nodes which are expected to send REQACK
        self.expectingAcksFor = {}

        # Tuple of identifier and reqId as key and value as tuple of set of
        # nodes which are expected to send REPLY
        self.expectingRepliesFor = {}

        self._observers = {}  # type Dict[str, Callable]
        self._observerSet = set()  # makes it easier to guard against duplicates

        plugins_to_load = self.config.PluginsToLoad if hasattr(self.config, "PluginsToLoad") else None
        tp = loadPlugins(self.plugins_dir, plugins_to_load)

        logger.debug("total plugins loaded in client: {}".format(tp))

        self._multi_sig_verifier = self._create_multi_sig_verifier()
        self._read_only_requests = set()
Beispiel #21
0
    def __init__(self,
                 name: str,
                 nodeReg: Dict[str, HA]=None,
                 ha: Union[HA, Tuple[str, int]]=None,
                 basedirpath: str=None,
                 config=None,
                 sighex: str=None):
        """
        Creates a new client.

        :param name: unique identifier for the client
        :param nodeReg: names and host addresses of all nodes in the pool
        :param ha: tuple of host and port
        """
        self.config = config or getConfig()
        basedirpath = self.config.baseDir if not basedirpath else basedirpath
        self.basedirpath = basedirpath

        signer = Signer(sighex)
        sighex = signer.keyhex
        verkey = rawToFriendly(signer.verraw)

        self.name = name
        self.stackName = verkey

        cha = None
        # If client information already exists is RAET then use that
        if self.exists(self.stackName, basedirpath):
            cha = getHaFromLocalEstate(self.stackName, basedirpath)
            if cha:
                cha = HA(*cha)
                logger.debug("Client {} ignoring given ha {} and using {}".
                             format(self.name, ha, cha))
        if not cha:
            cha = ha if isinstance(ha, HA) else HA(*ha)

        self.reqRepStore = self.getReqRepStore()
        self.txnLog = self.getTxnLogStore()

        self.dataDir = self.config.clientDataDir or "data/clients"
        HasFileStorage.__init__(self, self.name, baseDir=self.basedirpath,
                                dataDir=self.dataDir)

        self._ledger = None

        if not nodeReg:
            self.mode = None
            HasPoolManager.__init__(self)
            self.ledgerManager = LedgerManager(self, ownedByNode=False)
            self.ledgerManager.addLedger(0, self.ledger,
                postCatchupCompleteClbk=self.postPoolLedgerCaughtUp,
                postTxnAddedToLedgerClbk=self.postTxnFromCatchupAddedToLedger)
        else:
            cliNodeReg = OrderedDict()
            for nm, (ip, port) in nodeReg.items():
                cliNodeReg[nm] = HA(ip, port)
            self.nodeReg = cliNodeReg
            self.mode = Mode.discovered

        HasActionQueue.__init__(self)

        self.setF()

        stackargs = dict(name=self.stackName,
                         ha=cha,
                         main=False,  # stops incoming vacuous joins
                         auto=AutoMode.always)
        stackargs['basedirpath'] = basedirpath
        self.created = time.perf_counter()

        # noinspection PyCallingNonCallable
        self.nodestack = self.nodeStackClass(stackargs,
                                             self.handleOneNodeMsg,
                                             self.nodeReg,
                                             sighex)
        self.nodestack.onConnsChanged = self.onConnsChanged

        if self.nodeReg:
            logger.info("Client {} initialized with the following node registry:"
                        .format(self.name))
            lengths = [max(x) for x in zip(*[
                (len(name), len(host), len(str(port)))
                for name, (host, port) in self.nodeReg.items()])]
            fmt = "    {{:<{}}} listens at {{:<{}}} on port {{:>{}}}".format(
                *lengths)
            for name, (host, port) in self.nodeReg.items():
                logger.info(fmt.format(name, host, port))
        else:
            logger.info(
                "Client {} found an empty node registry:".format(self.name))

        Motor.__init__(self)

        self.inBox = deque()

        self.nodestack.connectNicelyUntil = 0  # don't need to connect
        # nicely as a client

        # TODO: Need to have couple of tests around `reqsPendingConnection`
        # where we check with and without pool ledger

        # Stores the requests that need to be sent to the nodes when the client
        # has made sufficient connections to the nodes.
        self.reqsPendingConnection = deque()

        # Tuple of identifier and reqId as key and value as tuple of set of
        # nodes which are expected to send REQACK
        self.expectingAcksFor = {}

        # Tuple of identifier and reqId as key and value as tuple of set of
        # nodes which are expected to send REPLY
        self.expectingRepliesFor = {}

        tp = loadPlugins(self.basedirpath)
        logger.debug("total plugins loaded in client: {}".format(tp))
Beispiel #22
0
    def __init__(self, name: str, Delta: float, Lambda: float, Omega: float,
                 instances: Instances, nodestack: NodeStack,
                 blacklister: Blacklister, nodeInfo: Dict, pluginPaths: Iterable[str]=None):
        self.name = name
        self.instances = instances
        self.nodestack = nodestack
        self.blacklister = blacklister
        self.nodeInfo = nodeInfo

        self.Delta = Delta
        self.Lambda = Lambda
        self.Omega = Omega
        self.statsConsumers = self.getPluginsByType(pluginPaths,
                                                    PLUGIN_TYPE_STATS_CONSUMER)

        # Number of ordered requests by each replica. The value at index `i` in
        # the list is a tuple of the number of ordered requests by replica and
        # the time taken to order those requests by the replica of the `i`th
        # protocol instance
        self.numOrderedRequests = []  # type: List[Tuple[int, int]]

        # Requests that have been sent for ordering. Key of the dictionary is a
        # tuple of client id and request id and the value is the time at which
        # the request was submitted for ordering
        self.requestOrderingStarted = {}  # type: Dict[Tuple[str, int], float]

        # Request latencies for the master protocol instances. Key of the
        # dictionary is a tuple of client id and request id and the value is
        # the time the master instance took for ordering it
        self.masterReqLatencies = {}  # type: Dict[Tuple[str, int], float]

        # Indicates that request latency in previous snapshot of master req
        # latencies was too high
        self.masterReqLatencyTooHigh = False

        # Request latency(time taken to be ordered) for the client. The value
        # at index `i` in the list is the dictionary where the key of the
        # dictionary is the client id and the value is a tuple of number of
        # requests and average time taken by that number of requests for the
        # `i`th protocol instance
        self.clientAvgReqLatencies = []  # type: List[Dict[str, Tuple[int, float]]]

        # TODO: Set this if this monitor belongs to a node which has primary
        # of master. Will be used to set `totalRequests`
        self.hasMasterPrimary = None

        # Total requests that have been ordered since the node started
        self.totalRequests = 0

        self.started = datetime.utcnow().isoformat()

        # Times of requests ordered by master in last
        # `ThroughputWindowSize` seconds. `ThroughputWindowSize` is
        # defined in config
        self.orderedRequestsInLast = []

        # Times and latencies (as a tuple) of requests ordered by master in last
        # `LatencyWindowSize` seconds. `LatencyWindowSize` is
        # defined in config
        self.latenciesByMasterInLast = []

        # Times and latencies (as a tuple) of requests ordered by backups in last
        # `LatencyWindowSize` seconds. `LatencyWindowSize` is
        # defined in config. Dictionary where key corresponds to instance id and
        #  value is a tuple of ordering time and latency of a request
        self.latenciesByBackupsInLast = {}

        psutil.cpu_percent(interval=None)
        self.lastKnownTraffic = self.calculateTraffic()

        self.totalViewChanges = 0
        self._lastPostedViewChange = 0
        HasActionQueue.__init__(self)

        if config.SendMonitorStats:
            self._schedule(self.sendPeriodicStats, config.DashboardUpdateFreq)
Beispiel #23
0
    def __init__(self, name: str, Delta: float, Lambda: float, Omega: float,
                 instances: Instances, nodestack: NodeRStack,
                 blacklister: Blacklister, nodeInfo: Dict,
                 notifierEventTriggeringConfig: Dict,
                 pluginPaths: Iterable[str]=None):
        self.name = name
        self.instances = instances
        self.nodestack = nodestack
        self.blacklister = blacklister
        self.nodeInfo = nodeInfo
        self.notifierEventTriggeringConfig = notifierEventTriggeringConfig

        self.Delta = Delta
        self.Lambda = Lambda
        self.Omega = Omega
        self.statsConsumers = self.getPluginsByType(pluginPaths,
                                                    PLUGIN_TYPE_STATS_CONSUMER)

        # Number of ordered requests by each replica. The value at index `i` in
        # the list is a tuple of the number of ordered requests by replica and
        # the time taken to order those requests by the replica of the `i`th
        # protocol instance
        self.numOrderedRequests = []  # type: List[Tuple[int, int]]

        # Requests that have been sent for ordering. Key of the dictionary is a
        # tuple of client id and request id and the value is the time at which
        # the request was submitted for ordering
        self.requestOrderingStarted = {}  # type: Dict[Tuple[str, int], float]

        # Request latencies for the master protocol instances. Key of the
        # dictionary is a tuple of client id and request id and the value is
        # the time the master instance took for ordering it
        self.masterReqLatencies = {}  # type: Dict[Tuple[str, int], float]

        # Indicates that request latency in previous snapshot of master req
        # latencies was too high
        self.masterReqLatencyTooHigh = False

        # Request latency(time taken to be ordered) for the client. The value
        # at index `i` in the list is the dictionary where the key of the
        # dictionary is the client id and the value is a tuple of number of
        # requests and average time taken by that number of requests for the
        # `i`th protocol instance
        self.clientAvgReqLatencies = []  # type: List[Dict[str, Tuple[int, float]]]

        # TODO: Set this if this monitor belongs to a node which has primary
        # of master. Will be used to set `totalRequests`
        self.hasMasterPrimary = None

        # Total requests that have been ordered since the node started
        self.totalRequests = 0

        self.started = datetime.utcnow().isoformat()

        # Times of requests ordered by master in last
        # `ThroughputWindowSize` seconds. `ThroughputWindowSize` is
        # defined in config
        self.orderedRequestsInLast = []

        # Times and latencies (as a tuple) of requests ordered by master in last
        # `LatencyWindowSize` seconds. `LatencyWindowSize` is
        # defined in config
        self.latenciesByMasterInLast = []

        # Times and latencies (as a tuple) of requests ordered by backups in last
        # `LatencyWindowSize` seconds. `LatencyWindowSize` is
        # defined in config. Dictionary where key corresponds to instance id and
        #  value is a tuple of ordering time and latency of a request
        self.latenciesByBackupsInLast = {}

        # Monitoring suspicious spikes in cluster throughput
        self.clusterThroughputSpikeMonitorData = {
            'value': 0,
            'cnt': 0,
            'accum': []
        }

        psutil.cpu_percent(interval=None)
        self.lastKnownTraffic = self.calculateTraffic()

        self.totalViewChanges = 0
        self._lastPostedViewChange = 0
        HasActionQueue.__init__(self)

        if config.SendMonitorStats:
            self.startRepeating(self.sendPeriodicStats,
                                config.DashboardUpdateFreq)

        self.startRepeating(self.checkPerformance,
                            config.notifierEventTriggeringConfig['clusterThroughputSpike']['freq'])
 def __init__(self, name):
     self.name = name
     self.results = {}
     Motor.__init__(self)
     HasActionQueue.__init__(self)
Beispiel #25
0
    def __init__(self,
                 name: str,
                 nodeRegistry: Dict[str, HA],
                 clientAuthNr: ClientAuthNr=None,
                 ha: HA=None,
                 cliname: str=None,
                 cliha: HA=None,
                 basedirpath: str=None,
                 primaryDecider: PrimaryDecider = None,
                 opVerifiers: Iterable[Any]=None):
        """
        Create a new node.

        :param nodeRegistry: names and host addresses of all nodes in the pool
        :param clientAuthNr: client authenticator implementation to be used
        :param basedirpath: path to the base directory used by `nstack` and
            `cstack`
        :param primaryDecider: the mechanism to be used to decide the primary
        of a protocol instance
        """
        self.opVerifiers = opVerifiers or []

        self.primaryDecider = primaryDecider
        me = nodeRegistry[name]

        self.allNodeNames = list(nodeRegistry.keys())
        if isinstance(me, NodeDetail):
            sha = me.ha
            scliname = me.cliname
            scliha = me.cliha
            nodeReg = {k: v.ha for k, v in nodeRegistry.items()}
        else:
            sha = me if isinstance(me, HA) else HA(*me)
            scliname = None
            scliha = None
            nodeReg = {k: HA(*v) for k, v in nodeRegistry.items()}
        if not ha:  # pull it from the registry
            ha = sha
        if not cliname:  # default to the name plus the suffix
            cliname = scliname if scliname else name + CLIENT_STACK_SUFFIX
        if not cliha:  # default to same ip, port + 1
            cliha = scliha if scliha else HA(ha[0], ha[1]+1)

        nstack = dict(name=name,
                      ha=ha,
                      main=True,
                      auto=AutoMode.never)

        cstack = dict(name=cliname,
                      ha=cliha,
                      main=True,
                      auto=AutoMode.always)

        if basedirpath:
            nstack['basedirpath'] = basedirpath
            cstack['basedirpath'] = basedirpath

        self.clientAuthNr = clientAuthNr or SimpleAuthNr()

        self.nodeInBox = deque()
        self.clientInBox = deque()
        self.created = time.perf_counter()

        HasActionQueue.__init__(self)
        NodeStacked.__init__(self, nstack, nodeReg)
        ClientStacked.__init__(self, cstack)
        Motor.__init__(self)
        Propagator.__init__(self)

        self.totalNodes = len(nodeRegistry)
        self.f = getMaxFailures(self.totalNodes)
        self.requiredNumberOfInstances = self.f + 1  # per RBFT
        self.minimumNodes = (2 * self.f) + 1  # minimum for a functional pool

        self.txnStore = TransactionStore()

        self.replicas = []  # type: List[replica.Replica]

        self.instanceChanges = InstanceChanges()

        self.viewNo = 0                             # type: int

        self.rank = self.getRank(self.name, nodeRegistry)

        self.elector = None  # type: PrimaryDecider

        self.forwardedRequests = set()  # type: Set[Tuple[(str, int)]]

        self.instances = Instances()

        self.monitor = Monitor(self.name,
                               Delta=.8, Lambda=60, Omega=5,
                               instances=self.instances)

        # Requests that are to be given to the replicas by the node. Each
        # element of the list is a deque for the replica with number equal to
        # its index in the list and each element of the deque is a named tuple
        self.msgsToReplicas = []  # type: List[deque]

        # Requests that are to be given to the elector by the node
        self.msgsToElector = deque()

        nodeRoutes = [(Propagate, self.processPropagate),
                      (InstanceChange, self.processInstanceChange)]

        nodeRoutes.extend((msgTyp, self.sendToElector) for msgTyp in
                          [Nomination, Primary, Reelection])

        nodeRoutes.extend((msgTyp, self.sendToReplica) for msgTyp in
                          [PrePrepare, Prepare, Commit])

        self.nodeMsgRouter = Router(*nodeRoutes)

        self.clientMsgRouter = Router((Request,
                                       self.processRequest))

        self.perfCheckFreq = 10

        self._schedule(self.checkPerformance, self.perfCheckFreq)

        self.clientBlacklister = SimpleBlacklister(
            self.name + CLIENT_BLACKLISTER_SUFFIX)  # type: Blacklister

        self.nodeBlacklister = SimpleBlacklister(
            self.name + NODE_BLACKLISTER_SUFFIX)  # type: Blacklister

        # BE CAREFUL HERE
        # This controls which message types are excluded from signature
        # verification. These are still subject to RAET's signature verification
        # but client signatures will not be checked on these. Expressly
        # prohibited from being in this is ClientRequest and Propagation,
        # which both require client signature verification
        self.authnWhitelist = (Nomination, Primary, Reelection,
                               Batch,
                               PrePrepare, Prepare,
                               Commit, InstanceChange)
        self.addReplicas()
Beispiel #26
0
 def __init__(self, name):
     self.name = name
     self.results = {}
     Motor.__init__(self)
     HasActionQueue.__init__(self)
Beispiel #27
0
    def __init__(self,
                 node: 'plenum.server.node.Node',
                 instId: int,
                 config=None,
                 isMaster: bool = False,
                 bls_bft_replica: BlsBftReplica = None,
                 metrics: MetricsCollector = NullMetricsCollector(),
                 get_current_time=None,
                 get_time_for_3pc_batch=None):
        """
        Create a new replica.

        :param node: Node on which this replica is located
        :param instId: the id of the protocol instance the replica belongs to
        :param isMaster: is this a replica of the master protocol instance
        """
        HasActionQueue.__init__(self)
        self.get_current_time = get_current_time or time.perf_counter
        self.get_time_for_3pc_batch = get_time_for_3pc_batch or node.utc_epoch
        # self.stats = Stats(TPCStat)
        self.config = config or getConfig()
        self.metrics = metrics
        self.node = node
        self.instId = instId
        self.name = self.generateName(node.name, self.instId)
        self.logger = getlogger(self.name)
        self.validator = ReplicaValidator(self)

        self.outBox = deque()
        """
        This queue is used by the replica to send messages to its node. Replica
        puts messages that are consumed by its node
        """

        self.inBox = deque()
        """
        This queue is used by the replica to receive messages from its node.
        Node puts messages that are consumed by the replica
        """

        self.inBoxStash = deque()
        """
        If messages need to go back on the queue, they go here temporarily and
        are put back on the queue on a state change
        """

        self._is_master = isMaster

        # Dictionary to keep track of the which replica was primary during each
        # view. Key is the view no and value is the name of the primary
        # replica during that view
        self.primaryNames = OrderedDict()  # type: OrderedDict[int, str]

        # Flag being used for preterm exit from the loop in the method
        # `processStashedMsgsForNewWaterMarks`. See that method for details.
        self.consumedAllStashedMsgs = True

        self._freshness_checker = FreshnessChecker(
            freshness_timeout=self.config.STATE_FRESHNESS_UPDATE_INTERVAL)

        self._bls_bft_replica = bls_bft_replica
        self._state_root_serializer = state_roots_serializer

        # Did we log a message about getting request while absence of primary
        self.warned_no_primary = False

        self._consensus_data = ConsensusSharedData(
            self.name, self.node.poolManager.node_names_ordered_by_rank(),
            self.instId, self.isMaster)
        self._internal_bus = InternalBus()
        self._external_bus = ExternalBus(send_handler=self.send)
        self.stasher = self._init_replica_stasher()
        self._subscription = Subscription()
        self._bootstrap_consensus_data()
        self._subscribe_to_external_msgs()
        self._subscribe_to_internal_msgs()
        self._checkpointer = self._init_checkpoint_service()
        self._ordering_service = self._init_ordering_service()
        self._message_req_service = self._init_message_req_service()
        self._view_change_service = self._init_view_change_service()
        for ledger_id in self.ledger_ids:
            self.register_ledger(ledger_id)
Beispiel #28
0
    def __init__(self,
                 name: str,
                 nodeReg: Dict[str, HA] = None,
                 ha: Union[HA, Tuple[str, int]] = None,
                 basedirpath: str = None,
                 config=None,
                 sighex: str = None):
        """
        Creates a new client.

        :param name: unique identifier for the client
        :param nodeReg: names and host addresses of all nodes in the pool
        :param ha: tuple of host and port
        """
        self.config = config or getConfig()
        basedirpath = self.config.baseDir if not basedirpath else basedirpath
        self.basedirpath = basedirpath

        signer = Signer(sighex)
        sighex = signer.keyraw
        verkey = rawToFriendly(signer.verraw)

        self.stackName = verkey
        # TODO: Have a way for a client to have a user friendly name. Does it
        # matter now, it used to matter in some CLI exampples in the past.
        # self.name = name
        self.name = self.stackName

        cha = None
        # If client information already exists is RAET then use that
        if self.exists(self.stackName, basedirpath):
            cha = getHaFromLocalEstate(self.stackName, basedirpath)
            if cha:
                cha = HA(*cha)
                logger.debug(
                    "Client {} ignoring given ha {} and using {}".format(
                        self.name, ha, cha))
        if not cha:
            cha = ha if isinstance(ha, HA) else HA(*ha)

        self.reqRepStore = self.getReqRepStore()
        self.txnLog = self.getTxnLogStore()

        self.dataDir = self.config.clientDataDir or "data/clients"
        HasFileStorage.__init__(self,
                                self.name,
                                baseDir=self.basedirpath,
                                dataDir=self.dataDir)

        # TODO: Find a proper name
        self.alias = name

        self._ledger = None

        if not nodeReg:
            self.mode = None
            HasPoolManager.__init__(self)
            self.ledgerManager = LedgerManager(self, ownedByNode=False)
            self.ledgerManager.addLedger(
                0,
                self.ledger,
                postCatchupCompleteClbk=self.postPoolLedgerCaughtUp,
                postTxnAddedToLedgerClbk=self.postTxnFromCatchupAddedToLedger)
        else:
            cliNodeReg = OrderedDict()
            for nm, (ip, port) in nodeReg.items():
                cliNodeReg[nm] = HA(ip, port)
            self.nodeReg = cliNodeReg
            self.mode = Mode.discovered

        HasActionQueue.__init__(self)

        self.setF()

        stackargs = dict(
            name=self.stackName,
            ha=cha,
            main=False,  # stops incoming vacuous joins
            auto=2)
        stackargs['basedirpath'] = basedirpath
        self.created = time.perf_counter()

        # noinspection PyCallingNonCallable
        self.nodestack = self.nodeStackClass(stackargs, self.handleOneNodeMsg,
                                             self.nodeReg, sighex)
        self.nodestack.onConnsChanged = self.onConnsChanged

        if self.nodeReg:
            logger.info(
                "Client {} initialized with the following node registry:".
                format(self.alias))
            lengths = [
                max(x)
                for x in zip(*[(len(name), len(host), len(str(port)))
                               for name, (host, port) in self.nodeReg.items()])
            ]
            fmt = "    {{:<{}}} listens at {{:<{}}} on port {{:>{}}}".format(
                *lengths)
            for name, (host, port) in self.nodeReg.items():
                logger.info(fmt.format(name, host, port))
        else:
            logger.info("Client {} found an empty node registry:".format(
                self.alias))

        Motor.__init__(self)

        self.inBox = deque()

        self.nodestack.connectNicelyUntil = 0  # don't need to connect
        # nicely as a client

        # TODO: Need to have couple of tests around `reqsPendingConnection`
        # where we check with and without pool ledger

        # Stores the requests that need to be sent to the nodes when the client
        # has made sufficient connections to the nodes.
        self.reqsPendingConnection = deque()

        # Tuple of identifier and reqId as key and value as tuple of set of
        # nodes which are expected to send REQACK
        self.expectingAcksFor = {}

        # Tuple of identifier and reqId as key and value as tuple of set of
        # nodes which are expected to send REPLY
        self.expectingRepliesFor = {}

        tp = loadPlugins(self.basedirpath)
        logger.debug("total plugins loaded in client: {}".format(tp))
Beispiel #29
0
    def __init__(self, name: str, Delta: float, Lambda: float, Omega: float,
                 instances: Instances, nodestack,
                 blacklister: Blacklister, nodeInfo: Dict,
                 notifierEventTriggeringConfig: Dict,
                 pluginPaths: Iterable[str] = None,
                 notifierEventsEnabled: bool = True):
        self.name = name
        self.instances = instances
        self.nodestack = nodestack
        self.blacklister = blacklister
        self.nodeInfo = nodeInfo
        self.notifierEventTriggeringConfig = notifierEventTriggeringConfig
        self.notifierEventsEnabled = notifierEventsEnabled

        self.Delta = Delta
        self.Lambda = Lambda
        self.Omega = Omega
        self.statsConsumers = self.getPluginsByType(pluginPaths,
                                                    PLUGIN_TYPE_STATS_CONSUMER)

        self.config = getConfig()

        # Number of ordered requests by each replica. The value at key `i` in
        # the dict is a tuple of the number of ordered requests by replica and
        # the time taken to order those requests by the replica of the `i`th
        # protocol instance
        self.numOrderedRequests = dict()  # type: Dict[int, Tuple[int, int]]

        # Dict(instance_id, throughput) of throughputs for replicas. Key is a instId and value is a instance of
        # ThroughputMeasurement class and provide throughputs evaluating mechanism
        self.throughputs = dict()   # type: Dict[int, ThroughputMeasurement]

        # Utility object for tracking requests order start and end
        # TODO: Has very similar cleanup logic to propagator.Requests
        self.requestTracker = RequestTimeTracker(instances.ids)

        # Request latencies for the master protocol instances. Key of the
        # dictionary is a tuple of client id and request id and the value is
        # the time the master instance took for ordering it
        self.masterReqLatencies = {}  # type: Dict[Tuple[str, int], float]

        # Indicates that request latency in previous snapshot of master req
        # latencies was too high
        self.masterReqLatencyTooHigh = False

        # Request latency(time taken to be ordered) for the client. The value
        # at key `i` in the dict is the LatencyMeasurement object which accumulate
        # average latency and total request for each client.
        self.clientAvgReqLatencies = dict()  # type: Dict[int, LatencyMeasurement]

        # TODO: Set this if this monitor belongs to a node which has primary
        # of master. Will be used to set `totalRequests`
        self.hasMasterPrimary = None

        # Total requests that have been ordered since the node started
        self.totalRequests = 0

        self.started = datetime.utcnow().isoformat()

        self.orderedRequestsInLast = []

        # attention: handlers will work over unordered request only once
        self.unordered_requests_handlers = []  # type: List[Callable]

        # Monitoring suspicious spikes in cluster throughput
        self.clusterThroughputSpikeMonitorData = {
            'value': 0,
            'cnt': 0,
            'accum': []
        }

        psutil.cpu_percent(interval=None)
        self.lastKnownTraffic = self.calculateTraffic()

        self.totalViewChanges = 0
        self._lastPostedViewChange = 0
        HasActionQueue.__init__(self)

        if self.config.SendMonitorStats:
            self.startRepeating(self.sendPeriodicStats,
                                self.config.DashboardUpdateFreq)

        self.startRepeating(
            self.checkPerformance,
            self.config.notifierEventTriggeringConfig['clusterThroughputSpike']['freq'])

        self.startRepeating(self.check_unordered, self.config.UnorderedCheckFreq)

        if 'disable_view_change' in self.config.unsafe:
            self.isMasterDegraded = lambda: False
        if 'disable_monitor' in self.config.unsafe:
            self.requestOrdered = lambda *args, **kwargs: {}
            self.sendPeriodicStats = lambda: None
            self.checkPerformance = lambda: None

        self.latency_avg_for_backup_cls = self.config.LatencyAveragingStrategyClass
        self.latency_measurement_cls = self.config.LatencyMeasurementCls
        self.throughput_avg_strategy_cls = self.config.throughput_averaging_strategy_class

        self.acc_monitor = None

        if self.config.ACC_MONITOR_ENABLED:
            self.acc_monitor = AccumulatingMonitorStrategy(
                start_time=time.perf_counter(),
                instances=instances.ids,
                txn_delta_k=self.config.ACC_MONITOR_TXN_DELTA_K,
                timeout=self.config.ACC_MONITOR_TIMEOUT,
                input_rate_reaction_half_time=self.config.ACC_MONITOR_INPUT_RATE_REACTION_HALF_TIME)
Beispiel #30
0
    def __init__(self, name: str, Delta: float, Lambda: float, Omega: float,
                 instances: Instances, nodestack,
                 blacklister: Blacklister, nodeInfo: Dict,
                 notifierEventTriggeringConfig: Dict,
                 pluginPaths: Iterable[str] = None,
                 notifierEventsEnabled: bool = True):
        self.name = name
        self.instances = instances
        self.nodestack = nodestack
        self.blacklister = blacklister
        self.nodeInfo = nodeInfo
        self.notifierEventTriggeringConfig = notifierEventTriggeringConfig
        self.notifierEventsEnabled = notifierEventsEnabled

        self.Delta = Delta
        self.Lambda = Lambda
        self.Omega = Omega
        self.statsConsumers = self.getPluginsByType(pluginPaths,
                                                    PLUGIN_TYPE_STATS_CONSUMER)

        self.config = getConfig()

        # Number of ordered requests by each replica. The value at key `i` in
        # the dict is a tuple of the number of ordered requests by replica and
        # the time taken to order those requests by the replica of the `i`th
        # protocol instance
        self.numOrderedRequests = dict()  # type: Dict[int, Tuple[int, int]]

        # Dict(instance_id, throughput) of throughputs for replicas. Key is a instId and value is a instance of
        # ThroughputMeasurement class and provide throughputs evaluating mechanism
        self.throughputs = dict()   # type: Dict[int, ThroughputMeasurement]

        # Utility object for tracking requests order start and end
        # TODO: Has very similar cleanup logic to propagator.Requests
        self.requestTracker = RequestTimeTracker(instances.ids)

        # Request latencies for the master protocol instances. Key of the
        # dictionary is a tuple of client id and request id and the value is
        # the time the master instance took for ordering it
        self.masterReqLatencies = {}  # type: Dict[Tuple[str, int], float]

        # Indicates that request latency in previous snapshot of master req
        # latencies was too high
        self.masterReqLatencyTooHigh = False

        # Request latency(time taken to be ordered) for the client. The value
        # at key `i` in the dict is the LatencyMeasurement object which accumulate
        # average latency and total request for each client.
        self.clientAvgReqLatencies = dict()  # type: Dict[int, LatencyMeasurement]

        # TODO: Set this if this monitor belongs to a node which has primary
        # of master. Will be used to set `totalRequests`
        self.hasMasterPrimary = None

        # Total requests that have been ordered since the node started
        self.totalRequests = 0

        self.started = datetime.utcnow().isoformat()

        # attention: handlers will work over unordered request only once
        self.unordered_requests_handlers = []  # type: List[Callable]

        # Monitoring suspicious spikes in cluster throughput
        self.clusterThroughputSpikeMonitorData = {
            'value': 0,
            'cnt': 0,
            'accum': []
        }

        psutil.cpu_percent(interval=None)
        self.lastKnownTraffic = self.calculateTraffic()

        self.totalViewChanges = 0
        self._lastPostedViewChange = 0
        HasActionQueue.__init__(self)

        if self.config.SendMonitorStats:
            self.startRepeating(self.sendPeriodicStats,
                                self.config.DashboardUpdateFreq)

        self.startRepeating(
            self.checkPerformance,
            self.config.notifierEventTriggeringConfig['clusterThroughputSpike']['freq'])

        self.startRepeating(self.check_unordered, self.config.UnorderedCheckFreq)

        if 'disable_view_change' in self.config.unsafe:
            self.isMasterDegraded = lambda: False
        if 'disable_monitor' in self.config.unsafe:
            self.requestOrdered = lambda *args, **kwargs: {}
            self.sendPeriodicStats = lambda: None
            self.checkPerformance = lambda: None

        self.latency_avg_for_backup_cls = self.config.LatencyAveragingStrategyClass
        self.latency_measurement_cls = self.config.LatencyMeasurementCls
        self.throughput_avg_strategy_cls = self.config.throughput_averaging_strategy_class

        self.acc_monitor = None

        if self.config.ACC_MONITOR_ENABLED:
            self.acc_monitor = AccumulatingMonitorStrategy(
                start_time=time.perf_counter(),
                instances=instances.ids,
                txn_delta_k=self.config.ACC_MONITOR_TXN_DELTA_K,
                timeout=self.config.ACC_MONITOR_TIMEOUT,
                input_rate_reaction_half_time=self.config.ACC_MONITOR_INPUT_RATE_REACTION_HALF_TIME)
Beispiel #31
0
    def __init__(self,
                 node: 'plenum.server.node.Node',
                 instId: int,
                 isMaster: bool = False):
        """
        Create a new replica.

        :param node: Node on which this replica is located
        :param instId: the id of the protocol instance the replica belongs to
        :param isMaster: is this a replica of the master protocol instance
        """
        HasActionQueue.__init__(self)
        self.stats = Stats(TPCStat)

        self.config = getConfig()

        routerArgs = [(ReqDigest, self._preProcessReqDigest)]

        for r in [PrePrepare, Prepare, Commit]:
            routerArgs.append((r, self.processThreePhaseMsg))

        routerArgs.append((Checkpoint, self.processCheckpoint))
        routerArgs.append((ThreePCState, self.process3PhaseState))

        self.inBoxRouter = Router(*routerArgs)

        self.threePhaseRouter = Router((PrePrepare, self.processPrePrepare),
                                       (Prepare, self.processPrepare),
                                       (Commit, self.processCommit))

        self.node = node
        self.instId = instId

        self.name = self.generateName(node.name, self.instId)

        self.outBox = deque()
        """
        This queue is used by the replica to send messages to its node. Replica
        puts messages that are consumed by its node
        """

        self.inBox = deque()
        """
        This queue is used by the replica to receive messages from its node.
        Node puts messages that are consumed by the replica
        """

        self.inBoxStash = deque()
        """
        If messages need to go back on the queue, they go here temporarily and
        are put back on the queue on a state change
        """

        self.isMaster = isMaster

        # Indicates name of the primary replica of this protocol instance.
        # None in case the replica does not know who the primary of the
        # instance is
        self._primaryName = None  # type: Optional[str]

        # Requests waiting to be processed once the replica is able to decide
        # whether it is primary or not
        self.postElectionMsgs = deque()

        # PRE-PREPAREs that are waiting to be processed but do not have the
        # corresponding request digest. Happens when replica has not been
        # forwarded the request by the node but is getting 3 phase messages.
        # The value is a list since a malicious entry might send PRE-PREPARE
        # with a different digest and since we dont have the request finalised,
        # we store all PRE-PPREPARES
        self.prePreparesPendingReqDigest = {
        }  # type: Dict[Tuple[str, int], List]

        # PREPAREs that are stored by non primary replica for which it has not
        #  got any PRE-PREPARE. Dictionary that stores a tuple of view no and
        #  prepare sequence number as key and a deque of PREPAREs as value.
        # This deque is attempted to be flushed on receiving every
        # PRE-PREPARE request.
        self.preparesWaitingForPrePrepare = {}
        # type: Dict[Tuple[int, int], deque]

        # COMMITs that are stored for which there are no PRE-PREPARE or PREPARE
        # received
        self.commitsWaitingForPrepare = {}
        # type: Dict[Tuple[int, int], deque]

        # Dictionary of sent PRE-PREPARE that are stored by primary replica
        # which it has broadcasted to all other non primary replicas
        # Key of dictionary is a 2 element tuple with elements viewNo,
        # pre-prepare seqNo and value is a tuple of Request Digest and time
        self.sentPrePrepares = {}
        # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]]

        # Dictionary of received PRE-PREPAREs. Key of dictionary is a 2
        # element tuple with elements viewNo, pre-prepare seqNo and value is
        # a tuple of Request Digest and time
        self.prePrepares = {}
        # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]]

        # Dictionary of received Prepare requests. Key of dictionary is a 2
        # element tuple with elements viewNo, seqNo and value is a 2 element
        # tuple containing request digest and set of sender node names(sender
        # replica names in case of multiple protocol instances)
        # (viewNo, seqNo) -> ((identifier, reqId), {senders})
        self.prepares = Prepares()
        # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], Set[str]]]

        self.commits = Commits()  # type: Dict[Tuple[int, int],
        # Tuple[Tuple[str, int], Set[str]]]

        # Set of tuples to keep track of ordered requests. Each tuple is
        # (viewNo, ppSeqNo)
        self.ordered = OrderedSet()  # type: OrderedSet[Tuple[int, int]]

        # Dictionary to keep track of the which replica was primary during each
        # view. Key is the view no and value is the name of the primary
        # replica during that view
        self.primaryNames = {}  # type: Dict[int, str]

        # Holds msgs that are for later views
        self.threePhaseMsgsForLaterView = deque()
        # type: deque[(ThreePhaseMsg, str)]

        # Holds tuple of view no and prepare seq no of 3-phase messages it
        # received while it was not participating
        self.stashingWhileCatchingUp = set()  # type: Set[Tuple]

        # Commits which are not being ordered since commits with lower view
        # numbers and sequence numbers have not been ordered yet. Key is the
        # viewNo and value a map of pre-prepare sequence number to commit
        self.stashedCommitsForOrdering = {}  # type: Dict[int,
        # Dict[int, Commit]]

        self.checkpoints = SortedDict(lambda k: k[0])

        self.stashingWhileOutsideWaterMarks = deque()

        # Low water mark
        self._h = 0  # type: int

        # High water mark
        self.H = self._h + self.config.LOG_SIZE  # type: int

        self.lastPrePrepareSeqNo = self.h  # type: int
Beispiel #32
0
    def __init__(self, name: str, Delta: float, Lambda: float, Omega: float,
                 instances: Instances, nodestack: NodeStack, blacklister: SimpleBlacklister,
                 pluginPaths: Iterable[str]=None):
        self.name = name
        self.instances = instances
        self.nodestack = nodestack
        self.blacklister = blacklister

        self.Delta = Delta
        self.Lambda = Lambda
        self.Omega = Omega
        self.statsConsumers = self.getPluginsByType(pluginPaths, PLUGIN_TYPE_STATS_CONSUMER)

        # Number of ordered requests by each replica. The value at index `i` in
        # the list is a tuple of the number of ordered requests by replica and
        # the time taken to order those requests by the replica of the `i`th
        # protocol instance
        self.numOrderedRequests = []  # type: List[Tuple[int, int]]

        # Requests that have been sent for ordering. Key of the dictionary is a
        # tuple of client id and request id and the value is the time at which
        # the request was submitted for ordering
        self.requestOrderingStarted = {}  # type: Dict[Tuple[str, int], float]

        # Request latencies for the master protocol instances. Key of the
        # dictionary is a tuple of client id and request id and the value is
        # the time the master instance took for ordering it
        self.masterReqLatencies = {}  # type: Dict[Tuple[str, int], float]

        # Request latency(time taken to be ordered) for the client. The value
        # at index `i` in the list is the dictionary where the key of the
        # dictionary is the client id and the value is a tuple of number of
        # requests and average time taken by that number of requests for the
        # `i`th protocol instance
        self.clientAvgReqLatencies = []  # type: List[Dict[str, Tuple[int, float]]]

        # TODO: Set this if this monitor belongs to a node which has primary
        # of master. Will be used to set `totalRequests`
        self.hasMasterPrimary = None

        # Total requests that have been ordered since the node started
        self.totalRequests = 0

        self.started = datetime.utcnow().isoformat()

        # Times of requests ordered by master in last
        # `ThroughputWindowSize` seconds. `ThroughputWindowSize` is
        # defined in config
        self.orderedRequestsInLast = []

        # Times and latencies (as a tuple) of requests ordered by master in last
        # `LatencyWindowSize` seconds. `LatencyWindowSize` is
        # defined in config
        self.latenciesByMasterInLast = []

        # Times and latencies (as a tuple) of requests ordered by backups in last
        # `LatencyWindowSize` seconds. `LatencyWindowSize` is
        # defined in config. Dictionary where key corresponds to instance id and
        #  value is a tuple of ordering time and latency of a request
        self.latenciesByBackupsInLast = {}

        self.totalViewChanges = 0
        self._lastPostedViewChange = 0
        HasActionQueue.__init__(self)

        if config.SendMonitorStats:
            self._schedule(self.sendPeriodicStats, config.DashboardUpdateFreq)