def test_node_requests_missing_three_phase_messages(looper, txnPoolNodeSet, wallet1, client1Connected): """ 2 of 4 nodes go down, so pool can not process any more incoming requests. A new request comes in. After a while those 2 nodes come back alive. Another request comes in. Check that previously disconnected two nodes request missing PREPARES and PREPREPARES and the pool successfully handles both transactions after that. """ INIT_REQS_CNT = 10 MISSING_REQS_CNT = 1 REQS_AFTER_RECONNECT_CNT = 1 disconnected_nodes = txnPoolNodeSet[2:] alive_nodes = txnPoolNodeSet[:2] send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1Connected, INIT_REQS_CNT) waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet[:-1]) init_ledger_size = txnPoolNodeSet[0].domainLedger.size for node in disconnected_nodes: disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node, stopNode=False) sendRandomRequests(wallet1, client1Connected, MISSING_REQS_CNT) def check_pp_out_of_sync(alive_nodes, disconnected_nodes): def get_last_pp(node): return node.replicas._master_replica.lastPrePrepare last_3pc_key_alive = get_last_pp(alive_nodes[0]) for node in alive_nodes[1:]: assert get_last_pp(node) == last_3pc_key_alive last_3pc_key_diconnected = get_last_pp(disconnected_nodes[0]) assert last_3pc_key_diconnected != last_3pc_key_alive for node in disconnected_nodes[1:]: assert get_last_pp(node) == last_3pc_key_diconnected looper.run( eventually(check_pp_out_of_sync, alive_nodes, disconnected_nodes, retryWait=1, timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet)))) for node in disconnected_nodes: reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, node) send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1Connected, REQS_AFTER_RECONNECT_CNT) waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet[:-1]) for node in txnPoolNodeSet: assert node.domainLedger.size == (init_ledger_size + MISSING_REQS_CNT + REQS_AFTER_RECONNECT_CNT)
def test_msg_max_length_check_node_to_node(looper, txnPoolNodeSet, client1, wallet1, client1Connected, clientAndWallet2): """ Two clients send 2*N requests each at the same time. N < MSG_LEN_LIMIT but 2*N > MSG_LEN_LIMIT so the requests pass the max length check for client-node requests but do not pass the check for node-node requests. """ N = 10 # it is an empirical value for N random requests # it has to be adjusted if the world changed (see pydoc) max_len_limit = 3000 patch_msg_len_validators(max_len_limit, txnPoolNodeSet) client2, wallet2 = clientAndWallet2 reqs1 = sendRandomRequests(wallet1, client1, N) reqs2 = sendRandomRequests(wallet2, client2, N) check_reqacks(client1, looper, reqs1, txnPoolNodeSet) check_reqacks(client2, looper, reqs2, txnPoolNodeSet) waitForSufficientRepliesForRequests(looper, client1, requests=reqs1) waitForSufficientRepliesForRequests(looper, client2, requests=reqs2)
def view_change_in_between_3pc(looper, nodes, slow_nodes, wallet, client, slow_delay=1, wait=None): send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 4) delay_3pc_messages(slow_nodes, 0, delay=slow_delay) sendRandomRequests(wallet, client, 10) if wait: looper.runFor(wait) ensure_view_change_complete(looper, nodes, customTimeout=60) reset_delays_and_process_delayeds(slow_nodes) sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5, total_timeout=30) send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 5, total_timeout=30)
def view_change_in_between_3pc_random_delays(looper, nodes, slow_nodes, wallet, client, tconf, min_delay=0, max_delay=0): send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 4) # max delay should not be more than catchup timeout. max_delay = max_delay or tconf.MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE - 1 delay_3pc_messages(slow_nodes, 0, min_delay=min_delay, max_delay=max_delay) sendRandomRequests(wallet, client, 10) ensure_view_change_complete(looper, nodes, customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT + max_delay, exclude_from_check=['check_last_ordered_3pc']) reset_delays_and_process_delayeds(slow_nodes) send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 10)
def test_different_ledger_request_interleave(tconf, looper, txnPoolNodeSet, client1, wallet1, one_node_added, client1Connected, tdir, client_tdir, tdirWithPoolTxns, steward1, stewardWallet, allPluginsPath): """ Send pool and domain ledger requests such that they interleave, and do view change in between and verify the pool is functional """ new_node = one_node_added sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 2) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # Send domain ledger requests but don't wait for replies requests = sendRandomRequests(wallet1, client1, 2) # Add another node by sending pool ledger request _, _, new_theta = nodeThetaAdded(looper, txnPoolNodeSet, tdir, client_tdir, tconf, steward1, stewardWallet, allPluginsPath, name='new_theta') # Send more domain ledger requests but don't wait for replies requests.extend(sendRandomRequests(wallet1, client1, 3)) # Do view change without waiting for replies ensure_view_change(looper, nodes=txnPoolNodeSet) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) # Make sure all requests are completed waitForSufficientRepliesForRequests(looper, client1, requests=requests) ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1) new_steward, new_steward_wallet = addNewSteward(looper, client_tdir, steward1, stewardWallet, 'another_ste') # Send another pool ledger request (NODE) but don't wait for completion of # request next_node_name = 'next_node' r = sendAddNewNode(tdir, tconf, next_node_name, new_steward, new_steward_wallet) node_req = r[0] # Send more domain ledger requests but don't wait for replies requests = [ node_req, *sendRandomRequests(new_steward_wallet, new_steward, 5) ] # Make sure all requests are completed waitForSufficientRepliesForRequests(looper, new_steward, requests=requests) # Make sure pool is functional ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1)
def testNodeRequestingTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes does not reply and the newly joined node cannot complete the process till the timeout and then requests the missing transactions. """ looper, newNode, client, wallet, _, _ = nodeCreatedAfterSomeTxns # So nodes wont tell the clients about the newly joined node so they # dont send any request to the newly joined node for node in txnPoolNodeSet: node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) txnPoolNodeSet.append(newNode) def ignoreCatchupReq(self, req, frm): logger.info("{} being malicious and ignoring catchup request {} " "from {}".format(self, req, frm)) # One of the node does not process catchup request. txnPoolNodeSet[0].nodeMsgRouter.routes[CatchupReq] = types.MethodType( ignoreCatchupReq, txnPoolNodeSet[0].ledgerManager) sendRandomRequests(wallet, client, 10) looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=60)) looper.run(eventually(checkNodeLedgersForEquality, newNode, *txnPoolNodeSet[:-1], retryWait=1, timeout=90))
def setup(tconf, looper, txnPoolNodeSet, client, wallet1): # Patch the 3phase request sending method to send incorrect digest and pr, otherR = getPrimaryReplica(txnPoolNodeSet, instId=0), \ getNonPrimaryReplicas(txnPoolNodeSet, instId=0) reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) waitForSufficientRepliesForRequests( looper, client, requests=reqs, customTimeoutPerReq=tconf.Max3PCBatchWait) stateRoot = pr.stateRootHash(DOMAIN_LEDGER_ID, to_str=False) origMethod = pr.create3PCBatch malignedOnce = None def badMethod(self, ledgerId): nonlocal malignedOnce pp = origMethod(ledgerId) if not malignedOnce: pp = updateNamedTuple(pp, digest=pp.digest + '123') malignedOnce = True return pp pr.create3PCBatch = types.MethodType(badMethod, pr) sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) return pr, otherR, stateRoot
def testNodeRequestingTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes does not reply and the newly joined node cannot complete the process till the timeout and then requests the missing transactions. """ looper, newNode, client, wallet, _, _ = nodeCreatedAfterSomeTxns # So nodes wont tell the clients about the newly joined node so they # dont send any request to the newly joined node for node in txnPoolNodeSet: node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) txnPoolNodeSet.append(newNode) def ignoreCatchupReq(self, req, frm): logger.info("{} being malicious and ignoring catchup request {} " "from {}".format(self, req, frm)) # One of the node does not process catchup request. txnPoolNodeSet[0].nodeMsgRouter.routes[CatchupReq] = types.MethodType( ignoreCatchupReq, txnPoolNodeSet[0].ledgerManager) sendRandomRequests(wallet, client, 10) looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=60)) looper.run( eventually(checkNodeLedgersForEquality, newNode, *txnPoolNodeSet[:-1], retryWait=1, timeout=90))
def testNodeRequestingConsProof(tconf, txnPoolNodeSet, nodeCreatedAfterSomeTxns): """ All of the 4 old nodes delay the processing of LEDGER_STATUS from the newly joined node while they are processing requests which results in them sending consistency proofs which are not same so that the newly joined node cannot conclude about the state of transactions in the system. So the new node requests consistency proof for a particular range from all nodes. """ looper, newNode, client, wallet, _, _ = nodeCreatedAfterSomeTxns # So nodes wont tell the clients about the newly joined node so they # dont send any request to the newly joined node for node in txnPoolNodeSet: node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) txnPoolNodeSet.append(newNode) # The new node sends different ledger statuses to every node so it # does not get enough similar consistency proofs next_size = 0 origMethod = newNode.build_ledger_status def build_broken_ledger_status(self, ledger_id): nonlocal next_size if ledger_id != DOMAIN_LEDGER_ID: return origMethod(ledger_id) size = self.primaryStorage.size next_size = next_size + 1 if next_size < size else 1 print("new size {}".format(next_size)) newRootHash = Ledger.hashToStr( self.domainLedger.tree.merkle_tree_hash(0, next_size)) three_pc_key = self.three_phase_key_for_txn_seq_no( ledger_id, next_size) v, p = three_pc_key if three_pc_key else None, None ledgerStatus = LedgerStatus(1, next_size, v, p, newRootHash) print("dl status {}".format(ledgerStatus)) return ledgerStatus newNode.build_ledger_status = types.MethodType(build_broken_ledger_status, newNode) logger.debug('Domain Ledger status sender of {} patched'.format(newNode)) sendRandomRequests(wallet, client, 10) # wait more than `ConsistencyProofsTimeout` # TODO: apply configurable timeout here # `ConsistencyProofsTimeout` is set to 60 sec, so need to wait more than # 60 sec, hence large timeout. Dont reduce it. waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1], customTimeout=75) # Other nodes should have received a request for `CONSISTENCY_PROOF` and # processed it. for node in txnPoolNodeSet[:-1]: assert count_msg_reqs_of_type(node, CONSISTENCY_PROOF) > 0, node
def test_view_change_after_max_catchup_rounds(txnPoolNodeSet, looper, wallet1, client1, client1Connected): """ The node should do only a fixed rounds of catchup. For this delay Prepares and Commits for 2 non-primary nodes by a large amount which is equivalent to loss of Prepares and Commits. Make sure 2 nodes have a different last prepared certificate from other two. Then do a view change, make sure view change completes and the pool does not process the request that were prepared by only a subset of the nodes """ send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 2 * 3, 3) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) ledger_summary = txnPoolNodeSet[0].elector.ledger_summary slow_nodes = [ r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)[-2:] ] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] # Make node slow to process Prepares and Commits for node in slow_nodes: node.nodeIbStasher.delay(pDelay(120, 0)) node.nodeIbStasher.delay(cDelay(120, 0)) sendRandomRequests(wallet1, client1, 5) looper.runFor(3) ensure_view_change(looper, nodes=txnPoolNodeSet) def last_prepared(nodes): lst = [ n.master_replica.last_prepared_certificate_in_view() for n in nodes ] # All nodes have same last prepared assert check_if_all_equal_in_list(lst) return lst[0] last_prepared_slow = last_prepared(slow_nodes) last_prepared_fast = last_prepared(fast_nodes) # Check `slow_nodes` and `fast_nodes` set different last_prepared assert last_prepared_fast != last_prepared_slow # View change complete ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # The requests which were prepared by only a subset of the nodes were # not ordered assert txnPoolNodeSet[0].elector.ledger_summary == ledger_summary for node in slow_nodes: node.nodeIbStasher.reset_delays_and_process_delayeds() # Make sure pool is functional ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) last_prepared(txnPoolNodeSet)
def testNodeRejectingInvalidTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes replies with incorrect transactions. The newly joined node detects that and rejects the transactions and thus blacklists the node. Ii thus cannot complete the process till the timeout and then requests the missing transactions. """ looper, newNode, client, wallet, _, _ = nodeCreatedAfterSomeTxns # So nodes wont tell the clients about the newly joined node so they # dont send any request to the newly joined node for node in txnPoolNodeSet: node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) def sendIncorrectTxns(self, req, frm): ledgerType = getattr(req, f.LEDGER_TYPE.nm) if ledgerType == 1: logger.info("{} being malicious and sending incorrect transactions" " for catchup request {} from {}".format( self, req, frm)) start, end = getattr(req, f.SEQ_NO_START.nm), \ getattr(req, f.SEQ_NO_END.nm) ledger = self.getLedgerForMsg(req) txns = ledger.getAllTxn(start, end) for seqNo in txns.keys(): # Since the type of random request is `buy` if txns[seqNo].get(TXN_TYPE) == "buy": txns[seqNo][TXN_TYPE] = "randomtype" consProof = [ b64encode(p).decode() for p in ledger.tree.consistency_proof(end, ledger.size) ] self.sendTo(msg=CatchupRep(getattr(req, f.LEDGER_TYPE.nm), txns, consProof), to=frm) else: self.processCatchupReq(req, frm) # One of the node sends incorrect txns in catchup reply. txnPoolNodeSet[0].nodeMsgRouter.routes[CatchupReq] = types.MethodType( sendIncorrectTxns, txnPoolNodeSet[0].ledgerManager) logger.debug('Catchup request processor of {} patched'.format( txnPoolNodeSet[0])) sendRandomRequests(wallet, client, 10) looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=60)) looper.run( eventually(checkNodeLedgersForEquality, newNode, *txnPoolNodeSet[:-1], retryWait=1, timeout=45)) assert newNode.isNodeBlacklisted(txnPoolNodeSet[0].name)
def test_no_requests_processed_during_view_change(looper, nodeSet, client1, wallet1): for node in nodeSet: node.view_change_in_progress = True sendRandomRequests(wallet1, client1, 10) waitRejectFromPoolWithReason(looper, nodeSet, client1, 'Can not process requests when view change is in progress') for node in nodeSet: check_replica_queue_empty(node)
def test_all_replicas_hold_request_keys(perf_chk_patched, looper, txnPoolNodeSet, client1, wallet1, client1Connected): """ All replicas whether primary or non primary hold request keys of forwarded requests. Once requests are ordered, they request keys are removed from replica. """ tconf = perf_chk_patched delay_3pc = 2 delay_3pc_messages(txnPoolNodeSet, 0, delay_3pc) delay_3pc_messages(txnPoolNodeSet, 1, delay_3pc) def chk(count): # All replicas have same amount of forwarded request keys and all keys # are finalised. for node in txnPoolNodeSet: for r in node.replicas: if r.isPrimary is False: assert len(r.requestQueues[DOMAIN_LEDGER_ID]) == count for i in range(count): k = r.requestQueues[DOMAIN_LEDGER_ID][i] assert r.requests[k].finalised elif r.isPrimary is True: assert len(r.requestQueues[DOMAIN_LEDGER_ID]) == 0 reqs = sendRandomRequests(wallet1, client1, tconf.Max3PCBatchSize - 1) # Only non primary replicas should have all request keys with them looper.run(eventually(chk, tconf.Max3PCBatchSize - 1)) waitForSufficientRepliesForRequests(looper, client1, requests=reqs, add_delay_to_timeout=delay_3pc) # Replicas should have no request keys with them since they are ordered looper.run(eventually(chk, 0)) # Need to wait since one node might not # have processed it. delay = 1 for node in txnPoolNodeSet: node.nodeIbStasher.delay(nom_delay(delay)) ensure_view_change(looper, txnPoolNodeSet) reqs = sendRandomRequests(wallet1, client1, 2 * tconf.Max3PCBatchSize) looper.run(eventually(chk, 2 * tconf.Max3PCBatchSize)) # Since each nomination is delayed and there will be multiple nominations # so adding some extra time timeout = waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) + \ len(txnPoolNodeSet) * delay ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=timeout) waitForSufficientRepliesForRequests(looper, client1, requests=reqs, add_delay_to_timeout=delay_3pc) looper.run(eventually(chk, 0))
def testNodeRequestingTxns(reduced_catchup_timeout_conf, txnPoolNodeSet, nodeCreatedAfterSomeTxns): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes does not reply and the newly joined node cannot complete the process till the timeout and then requests the missing transactions. """ looper, newNode, client, wallet, _, _ = nodeCreatedAfterSomeTxns new_node_ledger = newNode.ledgerManager.ledgerRegistry[DOMAIN_LEDGER_ID] old_size = len(new_node_ledger.ledger) old_size_others = txnPoolNodeSet[0].ledgerManager.ledgerRegistry[ DOMAIN_LEDGER_ID].ledger.size # So nodes wont tell the clients about the newly joined node so they # dont send any request to the newly joined node for node in txnPoolNodeSet: node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) def ignoreCatchupReq(self, req, frm): logger.info("{} being malicious and ignoring catchup request {} " "from {}".format(self, req, frm)) # One of the node does not process catchup request. npr = getNonPrimaryReplicas(txnPoolNodeSet, 0) badReplica = npr[0] badNode = badReplica.node txnPoolNodeSet.append(newNode) badNode.nodeMsgRouter.routes[CatchupReq] = types.MethodType( ignoreCatchupReq, badNode.ledgerManager) more_requests = 10 sendRandomRequests(wallet, client, more_requests) looper.run(checkNodesConnected(txnPoolNodeSet)) # Since one of the nodes does not reply, this new node will experience a # timeout and retry catchup requests, hence a long test timeout. timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \ reduced_catchup_timeout_conf.CatchupTransactionsTimeout waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1], customTimeout=timeout) new_size = len(new_node_ledger.ledger) # The new node ledger might catchup some transactions from the batch of # `more_request` transactions assert old_size_others - \ old_size <= new_node_ledger.num_txns_caught_up <= new_size - old_size sendRandomRequests(wallet, client, 2) waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1], customTimeout=timeout)
def testNodeRequestingConsProof(txnPoolNodeSet, nodeCreatedAfterSomeTxns): """ All of the 4 old nodes delay the processing of LEDGER_STATUS from the newly joined node while they are processing requests which results in them sending consistency proofs which are not same so that the newly joined node cannot conclude about the state of transactions in the system. So the new node requests consistency proof for a particular range from all nodes. """ looper, newNode, client, wallet, _, _ = nodeCreatedAfterSomeTxns # So nodes wont tell the clients about the newly joined node so they # dont send any request to the newly joined node for node in txnPoolNodeSet: node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) txnPoolNodeSet.append(newNode) # The new node does not sends different ledger statuses to every node so it # does not get enough similar consistency proofs sentSizes = set() def sendDLStatus(self, name): size = self.primaryStorage.size newSize = randint(1, size) while newSize in sentSizes: newSize = randint(1, size) print("new size {}".format(newSize)) newRootHash = base64.b64encode( self.domainLedger.tree.merkle_tree_hash(0, newSize)).decode() ledgerStatus = LedgerStatus(1, newSize, newRootHash) print("dl status {}".format(ledgerStatus)) rid = self.nodestack.getRemote(name).uid self.send(ledgerStatus, rid) sentSizes.add(newSize) newNode.sendDomainLedgerStatus = types.MethodType(sendDLStatus, newNode) print("sending 10 requests") sendRandomRequests(wallet, client, 10) looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=60)) # `ConsistencyProofsTimeout` is set to 60 sec, so need to wait more than # 60 sec. looper.run( eventually(checkNodeLedgersForEquality, newNode, *txnPoolNodeSet[:-1], retryWait=1, timeout=75)) for node in txnPoolNodeSet[:-1]: assert node.ledgerManager.spylog.count( TestLedgerManager.processConsistencyProofReq.__name__) > 0
def testNewNodeCatchupWhileIncomingRequests(looper, txnPoolNodeSet, tdir, tdirWithClientPoolTxns, tconf, steward1, stewardWallet, allPluginsPath): """ A new node joins while transactions are happening, its catchup requests include till where it has to catchup, which would be less than the other node's ledger size. In the meantime, the new node will stash all requests """ sendReqsToNodesAndVerifySuffReplies(looper, stewardWallet, steward1, 5) def chkAfterCall(self, req, frm): r = self.processCatchupReq(req, frm) typ = getattr(req, f.LEDGER_ID.nm) if typ == DOMAIN_LEDGER_ID: ledger = self.getLedgerForMsg(req) assert req.catchupTill <= ledger.size return r for node in txnPoolNodeSet: node.nodeMsgRouter.routes[CatchupReq] = \ types.MethodType(chkAfterCall, node.ledgerManager) node.nodeIbStasher.delay(cqDelay(3)) print('Sending 5 requests') sendRandomRequests(stewardWallet, steward1, 5) looper.runFor(1) newStewardName = randomString() newNodeName = "Epsilon" newStewardClient, newStewardWallet, newNode = addNewStewardAndNode( looper, steward1, stewardWallet, newStewardName, newNodeName, tdir, tdirWithClientPoolTxns, tconf, allPluginsPath=allPluginsPath, autoStart=True) txnPoolNodeSet.append(newNode) looper.runFor(2) sendRandomRequests(stewardWallet, steward1, 5) # TODO select or create a timeout for this case in 'waits' looper.run( eventually(checkNodeDataForEquality, newNode, *txnPoolNodeSet[:-1], retryWait=1, timeout=80)) assert newNode.spylog.count(TestNode.processStashedOrderedReqs) > 0
def test_no_ordering_during_syncup(tconf, looper, txnPoolNodeSet, client, wallet1): non_primary_replica = getNonPrimaryReplicas(txnPoolNodeSet, instId=0)[0] # Put non-primary Node to syncing state once first Prepare is recieved make_node_syncing(non_primary_replica, Prepare) # Patch non-primary Node to fail if Order is executed fail_on_execute_batch_on_master(non_primary_replica.node) # Send requests. The non-primary Node should not fail since no ordering is called while syncing sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) looper.runFor(5)
def testNodeRequestingConsProof(txnPoolNodeSet, nodeCreatedAfterSomeTxns): """ All of the 4 old nodes delay the processing of LEDGER_STATUS from the newly joined node while they are processing requests which results in them sending consistency proofs which are not same so that the newly joined node cannot conclude about the state of transactions in the system. So the new node requests consistency proof for a particular range from all nodes. """ looper, newNode, client, wallet, _, _ = nodeCreatedAfterSomeTxns # So nodes wont tell the clients about the newly joined node so they # dont send any request to the newly joined node for node in txnPoolNodeSet: node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) txnPoolNodeSet.append(newNode) # The new node does not sends different ledger statuses to every node so it # does not get enough similar consistency proofs sentSizes = set() def sendDLStatus(self, name): size = self.primaryStorage.size newSize = randint(1, size) while newSize in sentSizes: newSize = randint(1, size) print("new size {}".format(newSize)) newRootHash = base64.b64encode( self.domainLedger.tree.merkle_tree_hash(0, newSize)).decode() ledgerStatus = LedgerStatus(1, newSize, newRootHash) print("dl status {}".format(ledgerStatus)) rid = self.nodestack.getRemote(name).uid self.send(ledgerStatus, rid) sentSizes.add(newSize) newNode.sendDomainLedgerStatus = types.MethodType(sendDLStatus, newNode) print("sending 10 requests") sendRandomRequests(wallet, client, 10) looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=60)) # `ConsistencyProofsTimeout` is set to 60 sec, so need to wait more than # 60 sec. looper.run(eventually(checkNodeLedgersForEquality, newNode, *txnPoolNodeSet[:-1], retryWait=1, timeout=75)) for node in txnPoolNodeSet[:-1]: assert node.ledgerManager.spylog.count( TestLedgerManager.processConsistencyProofReq.__name__) > 0
def provoke_and_check_view_change(nodes, newViewNo, wallet, client): if {n.viewNo for n in nodes} == {newViewNo}: return True # If throughput of every node has gone down then check that # view has changed tr = [n.monitor.isMasterThroughputTooLow() for n in nodes] if all(tr): logger.info('Throughput ratio gone down, its {}'.format(tr)) checkViewNoForNodes(nodes, newViewNo) else: logger.info('Master instance has not degraded yet, ' 'sending more requests') sendRandomRequests(wallet, client, 10) assert False
def chkViewChange(newViewNo): if {n.viewNo for n in step3.nodes} != {newViewNo}: tr = [] for n in step3.nodes: tr.append(n.monitor.isMasterThroughputTooLow()) if all(tr): logger.debug('Throughput ratio gone down') checkViewNoForNodes(step3.nodes, newViewNo) else: logger.debug('Master instance has not degraded yet, ' 'sending more requests') sendRandomRequests(wallet1, client1, 1) assert False else: assert True
def test_make_proof_bls_enabled(looper, txnPoolNodeSet, client1, client1Connected, wallet1): reqs = sendRandomRequests(wallet1, client1, 1) wait_for_requests_ordered(looper, txnPoolNodeSet, reqs) req = reqs[0] for node in txnPoolNodeSet: req_handler = node.get_req_handler(DOMAIN_LEDGER_ID) key = req_handler.prepare_buy_key(req.identifier, req.reqId) proof = req_handler.make_proof(key) assert proof assert ROOT_HASH in proof assert MULTI_SIGNATURE in proof assert PROOF_NODES in proof multi_sig = proof[MULTI_SIGNATURE] assert MULTI_SIGNATURE_SIGNATURE in multi_sig assert MULTI_SIGNATURE_PARTICIPANTS in multi_sig assert MULTI_SIGNATURE_VALUE in multi_sig multi_sig_value = multi_sig[MULTI_SIGNATURE_VALUE] assert MULTI_SIGNATURE_VALUE_LEDGER_ID in multi_sig_value assert MULTI_SIGNATURE_VALUE_STATE_ROOT in multi_sig_value assert MULTI_SIGNATURE_VALUE_TXN_ROOT in multi_sig_value assert MULTI_SIGNATURE_VALUE_POOL_STATE_ROOT in multi_sig_value assert MULTI_SIGNATURE_VALUE_TIMESTAMP in multi_sig_value # check that multi sig values are in order value_keys = list(multi_sig_value.keys()) assert [MULTI_SIGNATURE_VALUE_LEDGER_ID, MULTI_SIGNATURE_VALUE_POOL_STATE_ROOT, MULTI_SIGNATURE_VALUE_STATE_ROOT, MULTI_SIGNATURE_VALUE_TIMESTAMP, MULTI_SIGNATURE_VALUE_TXN_ROOT] == value_keys assert client1.validate_multi_signature(proof)
def test_proof_in_reply(looper, txnPoolNodeSet, client1, client1Connected, wallet1): reqs = sendRandomRequests(wallet1, client1, 1) waitForSufficientRepliesForRequests(looper, client1, requests=reqs) req = reqs[0] result = client1.getReply(req.identifier, req.reqId)[0] assert result assert result[TXN_TYPE] == "buy" assert result[f.IDENTIFIER.nm] == req.identifier assert result[f.REQ_ID.nm] == req.reqId assert result[f.SEQ_NO.nm] assert result[TXN_TIME] assert STATE_PROOF in result state_proof = result[STATE_PROOF] assert ROOT_HASH in state_proof assert MULTI_SIGNATURE in state_proof assert PROOF_NODES in state_proof multi_sig = state_proof[MULTI_SIGNATURE] assert MULTI_SIGNATURE_SIGNATURE in multi_sig assert MULTI_SIGNATURE_PARTICIPANTS in multi_sig assert MULTI_SIGNATURE_VALUE in multi_sig multi_sig_value = multi_sig[MULTI_SIGNATURE_VALUE] assert MULTI_SIGNATURE_VALUE_LEDGER_ID in multi_sig_value assert MULTI_SIGNATURE_VALUE_STATE_ROOT in multi_sig_value assert MULTI_SIGNATURE_VALUE_TXN_ROOT in multi_sig_value assert MULTI_SIGNATURE_VALUE_POOL_STATE_ROOT in multi_sig_value assert MULTI_SIGNATURE_VALUE_TIMESTAMP in multi_sig_value assert client1.validate_multi_signature(state_proof) assert client1.validate_proof(result)
def load(): port = genHa()[1] ha = HA('0.0.0.0', port) name = "hello" wallet = Wallet(name) wallet.addIdentifier(signer=SimpleSigner( seed=b'000000000000000000000000Steward1')) client = Client(name, ha=ha) with Looper(debug=getConfig().LOOPER_DEBUG) as looper: looper.add(client) print('Will send {} reqs in all'.format(numReqs)) requests = sendRandomRequests(wallet, client, numReqs) start = perf_counter() for i in range(0, numReqs, numReqs // splits): print('Will wait for {} now'.format(numReqs // splits)) s = perf_counter() reqs = requests[i:i + numReqs // splits + 1] waitForSufficientRepliesForRequests(looper, client, requests=reqs, fVal=2, customTimeoutPerReq=3) print('>>> Got replies for {} requests << in {}'.format( numReqs // splits, perf_counter() - s)) end = perf_counter() print('>>>{}<<<'.format(end - start)) exit(0)
def nodeStashingOrderedRequests(txnPoolNodeSet, nodeCreatedAfterSomeTxns): looper, newNode, client, wallet, _, _ = nodeCreatedAfterSomeTxns for node in txnPoolNodeSet: node.nodeIbStasher.delay(crDelay(5)) txnPoolNodeSet.append(newNode) ensureClientConnectedToNodesAndPoolLedgerSame(looper, client, *txnPoolNodeSet[:-1]) sendRandomRequests(wallet, client, 10) looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=15)) def stashing(): assert newNode.mode != Mode.participating assert len(newNode.stashedOrderedReqs) > 0 assert len(newNode.reqsFromCatchupReplies) > 0 looper.run(eventually(stashing, retryWait=1, timeout=20))
def testTreeRootsCorrectAfterEachBatch(tconf, looper, txnPoolNodeSet, client, wallet1): """ Check if both state root and txn tree root are correct and same on each node after each batch :return: """ # Send 1 batch reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) waitForSufficientRepliesForRequests(looper, client, requests=reqs) checkNodesHaveSameRoots(txnPoolNodeSet) # Send 2 batches reqs = sendRandomRequests(wallet1, client, 2 * tconf.Max3PCBatchSize) waitForSufficientRepliesForRequests(looper, client, requests=reqs) checkNodesHaveSameRoots(txnPoolNodeSet)
def testQueueingReqFromFutureView(delayed_perf_chk, looper, nodeSet, up, wallet1, client1): """ Test if every node queues 3 Phase requests(PRE-PREPARE, PREPARE and COMMIT) that come from a view which is greater than the current view. - Delay reception and processing of view change messages by a non primary for master instance => it starts receiving 3 phase commit messages for next view """ lagging_node = get_last_master_non_primary_node(nodeSet) old_view_no = lagging_node.viewNo # Delay processing of InstanceChange and ViewChangeDone so node stashes # 3PC messages delay_ic = 60 lagging_node.nodeIbStasher.delay(icDelay(delay_ic)) lagging_node.nodeIbStasher.delay(vcd_delay(delay_ic)) logger.debug('{} will delay its view change'.format(lagging_node)) def chk_fut_view(view_no, is_empty): length = len(lagging_node.msgsForFutureViews.get(view_no, ())) if is_empty: assert length == 0 else: assert length > 0 return length # No messages queued for future view chk_fut_view(old_view_no + 1, is_empty=True) logger.debug( '{} does not have any messages for future views'.format(lagging_node)) # Every node except Node A should do a view change ensure_view_change(looper, [n for n in nodeSet if n != lagging_node], [lagging_node]) # send more requests that will be queued for the lagged node # sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 3) reqs = sendRandomRequests(wallet1, client1, 5) l = looper.run( eventually(chk_fut_view, old_view_no + 1, False, retryWait=1)) logger.debug('{} has {} messages for future views'.format(lagging_node, l)) waitForSufficientRepliesForRequests(looper, client1, requests=reqs) # reset delays for the lagging_node node so that it finally makes view # change lagging_node.reset_delays_and_process_delayeds() # Eventually no messages queued for future view looper.run( eventually(chk_fut_view, old_view_no + 1, True, retryWait=1, timeout=delay_ic + 10)) logger.debug( '{} exhausted pending messages for future views'.format(lagging_node)) send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2)
def testNodeRejectingInvalidTxns(txnPoolNodeSet, nodeCreatedAfterSomeTxns): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes replies with incorrect transactions. The newly joined node detects that and rejects the transactions and thus blacklists the node. Ii thus cannot complete the process till the timeout and then requests the missing transactions. """ looper, newNode, client, wallet, _, _ = nodeCreatedAfterSomeTxns # So nodes wont tell the clients about the newly joined node so they # dont send any request to the newly joined node for node in txnPoolNodeSet: node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) def sendIncorrectTxns(self, req, frm): ledgerType = getattr(req, f.LEDGER_TYPE.nm) if ledgerType == 1: logger.info("{} being malicious and sending incorrect transactions" " for catchup request {} from {}". format(self, req, frm)) start, end = getattr(req, f.SEQ_NO_START.nm), \ getattr(req, f.SEQ_NO_END.nm) ledger = self.getLedgerForMsg(req) txns = ledger.getAllTxn(start, end) for seqNo in txns.keys(): # Since the type of random request is `buy` if txns[seqNo].get(TXN_TYPE) == "buy": txns[seqNo][TXN_TYPE] = "randomtype" consProof = [b64encode(p).decode() for p in ledger.tree.consistency_proof(end, ledger.size)] self.sendTo(msg=CatchupRep(getattr(req, f.LEDGER_TYPE.nm), txns, consProof), to=frm) else: self.processCatchupReq(req, frm) # One of the node does not process catchup request. txnPoolNodeSet[0].nodeMsgRouter.routes[CatchupReq] = types.MethodType( sendIncorrectTxns, txnPoolNodeSet[0].ledgerManager) sendRandomRequests(wallet, client, 10) looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=60)) looper.run(eventually(checkNodeLedgersForEquality, newNode, *txnPoolNodeSet[:-1], retryWait=1, timeout=45)) assert newNode.isNodeBlacklisted(txnPoolNodeSet[0].name)
def test3PCOverBatchWithLessThanThresholdReqs(tconf, looper, txnPoolNodeSet, client, wallet1): """ Check that 3 phase commit happens when threshold number of requests are not received but threshold time has passed :return: """ reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize - 1) waitForSufficientRepliesForRequests(looper, client, requests=reqs)
def test3PCOverBatchWithThresholdReqs(tconf, looper, txnPoolNodeSet, client, wallet1): """ Check that 3 phase commit happens when threshold number of requests are received and propagated. :return: """ reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize) waitForSufficientRepliesForRequests(looper, client, requests=reqs)
def test_make_result_bls_enabled(looper, txnPoolNodeSet, client1, client1Connected, wallet1): reqs = sendRandomRequests(wallet1, client1, 1) wait_for_requests_ordered(looper, txnPoolNodeSet, reqs) req = reqs[0] assert req.protocolVersion assert req.protocolVersion >= PlenumProtocolVersion.STATE_PROOF_SUPPORT.value check_result(txnPoolNodeSet, req, client1, True)
def test_make_proof_bls_disabled(looper, txnPoolNodeSet, client1, client1Connected, wallet1): reqs = sendRandomRequests(wallet1, client1, 1) waitForSufficientRepliesForRequests(looper, client1, requests=reqs) req = reqs[0] for node in txnPoolNodeSet: key = node.reqHandler.prepare_buy_key(req.identifier, req.reqId) proof = node.reqHandler.make_proof(key) assert not proof
def test_slow_nodes_catchup_before_selecting_primary_in_new_view( tconf, looper, txnPoolNodeSet, client1, wallet1, one_node_added, client1Connected): """ Delay 3PC messages to one node and view change messages to some others (including primary) so the node that does not receive enough 3PC messages is behind but learns of the view change quickly and starts catchup. Other nodes learn of the view change late and thus keep on processing requests """ new_node = one_node_added nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)] primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node slow_node = nprs[-1] # nodes_slow_to_inst_chg = [primary_node] + nprs[:2] nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node] delay_3pc = 100 delay_ic = 5 sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 2 * Max3PCBatchSize) delay_3pc_messages([slow_node], 0, delay_3pc) for n in nodes_slow_to_inst_chg: n.nodeIbStasher.delay(icDelay(delay_ic)) def start_count(): return sum([ 1 for e in slow_node.ledgerManager.spylog.getAll( slow_node.ledgerManager.startCatchUpProcess.__name__) if e.params['ledgerId'] == DOMAIN_LEDGER_ID ]) s = start_count() requests = sendRandomRequests(wallet1, client1, 10 * Max3PCBatchSize) ensure_view_change(looper, nodes=txnPoolNodeSet, exclude_from_check=nodes_slow_to_inst_chg) waitForSufficientRepliesForRequests(looper, client1, requests=requests) waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1]) e = start_count() assert e - s >= 2 looper.run(eventually(checkViewNoForNodes, slow_node.viewNo)) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 2 * Max3PCBatchSize) waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg)
def test_make_proof_committed_head_used(looper, txnPoolNodeSet, client1, client1Connected, wallet1): reqs = sendRandomRequests(wallet1, client1, 1) wait_for_requests_ordered(looper, txnPoolNodeSet, reqs) req = reqs[0] key = txnPoolNodeSet[0].reqHandler.prepare_buy_key(req.identifier) for node in txnPoolNodeSet: node.states[DOMAIN_LEDGER_ID].set(key, b'somevalue') check_result(txnPoolNodeSet, req, client1, True)
def testOrderingCase1(looper, nodeSet, up, client1, wallet1): """ Scenario -> A client sends requests, some nodes delay COMMITs to few specific nodes such some nodes achieve commit quorum later for those requests compared to other nodes. But all nodes `ORDER` request in the same order of ppSeqNos https://www.pivotaltracker.com/n/projects/1889887/stories/133655009 """ pr, replicas = getPrimaryReplica(nodeSet, instId=0), \ getNonPrimaryReplicas(nodeSet, instId=0) assert len(replicas) == 6 rep0 = pr rep1 = replicas[0] rep2 = replicas[1] rep3 = replicas[2] rep4 = replicas[3] rep5 = replicas[4] rep6 = replicas[5] node0 = rep0.node node1 = rep1.node node2 = rep2.node node3 = rep3.node node4 = rep4.node node5 = rep5.node node6 = rep6.node requests = sendRandomRequests(wallet1, client1, 15) ppSeqsToDelay = 5 delayedPpSeqNos = set() def specificCommits(wrappedMsg): nonlocal node3, node4, node5 msg, sender = wrappedMsg if isinstance(msg, PrePrepare): if len(delayedPpSeqNos) < ppSeqsToDelay: delayedPpSeqNos.add(msg.ppSeqNo) logger.debug('ppSeqNo {} corresponding to request id {} would ' 'be delayed'.format(msg.ppSeqNo, msg.reqId)) if isinstance(msg, Commit) and msg.instId == 0 and \ sender in (n.name for n in (node3, node4, node5)) and \ msg.ppSeqNo in delayedPpSeqNos: return 3 for node in (node1, node2): logger.debug('{} would be delaying commits'.format(node)) node.nodeIbStasher.delay(specificCommits) checkSufficientRepliesForRequests(looper, client1, requests) def ensureSlowNodesHaveAllTxns(): nonlocal node1, node2 for node in node1, node2: assert len(node.domainLedger) == 15 looper.run(eventually(ensureSlowNodesHaveAllTxns, retryWait=1, timeout=15)) checkAllLedgersEqual((n.domainLedger for n in (node0, node3, node4, node5, node6))) for node in (node1, node2): for n in nodeSet: if n != node: checkLedgerEquality(node.domainLedger, n.domainLedger) checkAllLedgersEqual((n.domainLedger for n in nodeSet))