def test_missing_pp_before_starting_vc(tconf, txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward): ''' - all nodes delay PrePrepares for viewNo=1 with ppSeqNo<4 - all nodes go to view=1 - 10 batches are sent (they will not be ordered because of missing (delayed) PrePrepares - all nodes start a view change to view=2 - So all nodes finish view change successfully and can continue ordering ''' all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet] # 1. delay PrePrepares with ppSeqNo<4 with delay_rules(all_stashers, delay_3pc(view_no=1, before=4, msgs=PrePrepare)): # 2. do view change for view=1 ensure_view_change(looper, txnPoolNodeSet) looper.run(eventually(check_not_in_view_change, txnPoolNodeSet)) # 3. send requests sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_steward, 10) # 4. do view change for view=2 ensure_view_change(looper, txnPoolNodeSet) # 5. ensure everything is fine ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def view_change_in_between_3pc(looper, nodes, slow_nodes, sdk_pool_handle, sdk_wallet_client, slow_delay=1, wait=None): sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 4) delay_3pc_messages(slow_nodes, 0, delay=slow_delay) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 10) if wait: looper.runFor(wait) ensure_view_change_complete(looper, nodes, customTimeout=60) reset_delays_and_process_delayeds(slow_nodes) sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 5, total_timeout=30) sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 5, total_timeout=30)
def view_change_in_between_3pc(looper, nodes, slow_nodes, sdk_pool_handle, sdk_wallet_client, slow_delay=1, wait=None): sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 4) delay_3pc_messages(slow_nodes, 0, delay=slow_delay) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 10) if wait: looper.runFor(wait) ensure_view_change(looper, nodes) looper.run(eventually(check_not_in_view_change, nodes)) reset_delays_and_process_delayeds(slow_nodes) ensureElectionsDone(looper=looper, nodes=nodes) ensure_all_nodes_have_same_data(looper, nodes) sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 5, total_timeout=30) sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 5, total_timeout=30)
def test_backup_stabilized_checkpoint_on_view_change(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): # Delta:1 backup = txnPoolNodeSet[-1].replicas[1] count_of_replicas = len(txnPoolNodeSet[0].replicas) with delay_rules_without_processing( [n.nodeIbStasher for n in txnPoolNodeSet], ppDelay(instId=0)): sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, REQ_COUNT) looper.run( eventually( lambda r: assertExp(r.last_ordered_3pc == (0, REQ_COUNT)), backup)) # assert that all of requests are propagated for n in txnPoolNodeSet: for req in n.requests.values(): assert req.forwardedTo == count_of_replicas ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) # check, that all requests was freed on backups for n in txnPoolNodeSet: for req in n.requests.values(): assert req.forwardedTo == count_of_replicas - 1
def view_change_in_between_3pc_random_delays(looper, nodes, slow_nodes, sdk_pool_handle, sdk_wallet_client, tconf, min_delay=0, max_delay=0): sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 4) # max delay should not be more than catchup timeout. max_delay = max_delay or tconf.NEW_VIEW_TIMEOUT - 1 delay_3pc_messages(slow_nodes, 0, min_delay=min_delay, max_delay=max_delay) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 10) ensure_view_change_complete(looper, nodes, customTimeout=2 * tconf.NEW_VIEW_TIMEOUT + max_delay, exclude_from_check=['check_last_ordered_3pc']) reset_delays_and_process_delayeds(slow_nodes) sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 10)
def testNodeRequestingConsProof(tconf, txnPoolNodeSet, sdk_node_created_after_some_txns_not_started): """ All of the 4 old nodes delay the processing of LEDGER_STATUS from the newly joined node while they are processing requests which results in them sending consistency proofs which are not same so that the newly joined node cannot conclude about the state of transactions in the system. So the new node requests consistency proof for a particular range from all nodes. """ looper, new_node, sdk_pool_handle, new_steward_wallet_handle = sdk_node_created_after_some_txns_not_started # So nodes wont tell the clients about the newly joined node so they # dont send any request to the newly joined node for node in txnPoolNodeSet: node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) # The new node sends different ledger statuses to every node so it # does not get enough similar consistency proofs next_size = 0 origMethod = new_node.build_ledger_status def build_broken_ledger_status(self, ledger_id): nonlocal next_size if ledger_id != DOMAIN_LEDGER_ID: return origMethod(ledger_id) size = self.domainLedger.size next_size = next_size + 1 if next_size < size else 1 print("new size {}".format(next_size)) newRootHash = Ledger.hashToStr( self.domainLedger.tree.merkle_tree_hash(0, next_size)) three_pc_key = self.three_phase_key_for_txn_seq_no(ledger_id, next_size) v, p = three_pc_key if three_pc_key else None, None ledgerStatus = LedgerStatus(1, next_size, v, p, newRootHash, CURRENT_PROTOCOL_VERSION) print("dl status {}".format(ledgerStatus)) return ledgerStatus new_node.build_ledger_status = types.MethodType( build_broken_ledger_status, new_node) logger.debug( 'Domain Ledger status sender of {} patched'.format(new_node)) looper.add(new_node) txnPoolNodeSet.append(new_node) sdk_send_random_requests(looper, sdk_pool_handle, new_steward_wallet_handle, 10) # wait more than `ConsistencyProofsTimeout` # TODO: apply configurable timeout here # `ConsistencyProofsTimeout` is set to 60 sec, so need to wait more than # 60 sec, hence large timeout. Dont reduce it. waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=75) # Other nodes should have received a request for `CONSISTENCY_PROOF` and # processed it. for node in txnPoolNodeSet[:-1]: assert count_msg_reqs_of_type(node, CONSISTENCY_PROOF) > 0, node
def setup(tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): # Patch the 3phase request sending method to send incorrect digest and pr, otherR = getPrimaryReplica(txnPoolNodeSet, instId=0), \ getNonPrimaryReplicas(txnPoolNodeSet, instId=0) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf.Max3PCBatchSize) stateRoot = pr._ordering_service.get_state_root_hash(DOMAIN_LEDGER_ID, to_str=False) origMethod = pr._ordering_service.create_3pc_batch malignedOnce = None def badMethod(self, ledgerId): nonlocal malignedOnce pp = origMethod(ledgerId) if not malignedOnce: pp = updateNamedTuple(pp, digest=pp.digest + '123') malignedOnce = True return pp pr._ordering_service.create_3pc_batch = types.MethodType( badMethod, pr._ordering_service) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, tconf.Max3PCBatchSize) return pr, otherR, stateRoot
def testNodeRequestingConsProof(tconf, txnPoolNodeSet, sdk_node_created_after_some_txns_not_started): """ All of the 4 old nodes delay the processing of LEDGER_STATUS from the newly joined node while they are processing requests which results in them sending consistency proofs which are not same so that the newly joined node cannot conclude about the state of transactions in the system. So the new node requests consistency proof for a particular range from all nodes. """ looper, new_node, sdk_pool_handle, new_steward_wallet_handle = sdk_node_created_after_some_txns_not_started # So nodes wont tell the clients about the newly joined node so they # dont send any request to the newly joined node for node in txnPoolNodeSet: node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) # The new node sends different ledger statuses to every node so it # does not get enough similar consistency proofs next_size = 0 origMethod = new_node.build_ledger_status def build_broken_ledger_status(self, ledger_id): nonlocal next_size if ledger_id != DOMAIN_LEDGER_ID: return origMethod(ledger_id) size = self.primaryStorage.size next_size = next_size + 1 if next_size < size else 1 print("new size {}".format(next_size)) newRootHash = Ledger.hashToStr( self.domainLedger.tree.merkle_tree_hash(0, next_size)) three_pc_key = self.three_phase_key_for_txn_seq_no( ledger_id, next_size) v, p = three_pc_key if three_pc_key else None, None ledgerStatus = LedgerStatus(1, next_size, v, p, newRootHash) print("dl status {}".format(ledgerStatus)) return ledgerStatus new_node.build_ledger_status = types.MethodType(build_broken_ledger_status, new_node) logger.debug('Domain Ledger status sender of {} patched'.format(new_node)) looper.add(new_node) txnPoolNodeSet.append(new_node) sdk_send_random_requests(looper, sdk_pool_handle, new_steward_wallet_handle, 10) # wait more than `ConsistencyProofsTimeout` # TODO: apply configurable timeout here # `ConsistencyProofsTimeout` is set to 60 sec, so need to wait more than # 60 sec, hence large timeout. Dont reduce it. waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=75) # Other nodes should have received a request for `CONSISTENCY_PROOF` and # processed it. for node in txnPoolNodeSet[:-1]: assert count_msg_reqs_of_type(node, CONSISTENCY_PROOF) > 0, node
def test_working_has_no_warn_log_msg(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): clear_unordered_requests(*txnPoolNodeSet) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 5) looper.runFor(1.2 * UNORDERED_CHECK_FREQ) assert all(len(node.monitor.unordered_requests) == 0 for node in txnPoolNodeSet)
def test_view_change_after_max_catchup_rounds(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ The node should do only a fixed rounds of catchup. For this delay Prepares and Commits for 2 non-primary nodes by a large amount which is equivalent to loss of Prepares and Commits. Make sure 2 nodes have a different last prepared certificate from other two. Then do a view change, make sure view change completes and the pool does not process the request that were prepared by only a subset of the nodes """ sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * 3, 3) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) ledger_summary = txnPoolNodeSet[0].ledger_summary slow_nodes = [r.node for r in getNonPrimaryReplicas( txnPoolNodeSet, 0)[-2:]] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] # Make node slow to process Prepares and Commits for node in slow_nodes: node.nodeIbStasher.delay(pDelay(120, 0)) node.nodeIbStasher.delay(cDelay(120, 0)) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 5) looper.runFor(3) ensure_view_change(looper, nodes=txnPoolNodeSet) def last_prepared(nodes): lst = [n.master_replica.last_prepared_certificate_in_view() for n in nodes] # All nodes have same last prepared assert check_if_all_equal_in_list(lst) return lst[0] last_prepared_slow = last_prepared(slow_nodes) last_prepared_fast = last_prepared(fast_nodes) # Check `slow_nodes` and `fast_nodes` set different last_prepared assert last_prepared_fast != last_prepared_slow # View change complete ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # The requests which were prepared by only a subset of the nodes were # not ordered assert txnPoolNodeSet[0].ledger_summary == ledger_summary for node in slow_nodes: node.nodeIbStasher.reset_delays_and_process_delayeds() # Make sure pool is functional sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10, 2) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) last_prepared(txnPoolNodeSet)
def test_node_requests_missing_three_phase_messages(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ 2 of 4 nodes go down, so pool can not process any more incoming requests. A new request comes in. After a while those 2 nodes come back alive. Another request comes in. Check that previously disconnected two nodes request missing PREPARES and PREPREPARES and the pool successfully handles both transactions after that. """ INIT_REQS_CNT = 10 MISSING_REQS_CNT = 1 REQS_AFTER_RECONNECT_CNT = 1 disconnected_nodes = txnPoolNodeSet[2:] alive_nodes = txnPoolNodeSet[:2] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, INIT_REQS_CNT) init_ledger_size = txnPoolNodeSet[0].domainLedger.size for node in disconnected_nodes: disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node, stopNode=False) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, MISSING_REQS_CNT) def check_pp_out_of_sync(alive_nodes, disconnected_nodes): def get_last_pp(node): return node.replicas._master_replica.lastPrePrepare last_3pc_key_alive = get_last_pp(alive_nodes[0]) for node in alive_nodes[1:]: assert get_last_pp(node) == last_3pc_key_alive last_3pc_key_diconnected = get_last_pp(disconnected_nodes[0]) assert last_3pc_key_diconnected != last_3pc_key_alive for node in disconnected_nodes[1:]: assert get_last_pp(node) == last_3pc_key_diconnected looper.run( eventually(check_pp_out_of_sync, alive_nodes, disconnected_nodes, retryWait=1, timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet)))) for node in disconnected_nodes: reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, node) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQS_AFTER_RECONNECT_CNT) waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet[:-1]) for node in txnPoolNodeSet: assert node.domainLedger.size == (init_ledger_size + MISSING_REQS_CNT + REQS_AFTER_RECONNECT_CNT)
def test_working_has_no_warn_log_msg(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): clear_unordered_requests(*txnPoolNodeSet) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 5) looper.runFor(1.2 * UNORDERED_CHECK_FREQ) assert all( len(node.monitor.unordered_requests) == 0 for node in txnPoolNodeSet)
def testNewNodeCatchupWhileIncomingRequests(looper, txnPoolNodeSet, testNodeClass, tdir, tconf, sdk_pool_handle, sdk_wallet_steward, allPluginsPath): """ A new node joins while transactions are happening, its catchup requests include till where it has to catchup, which would be less than the other node's ledger size. In the meantime, the new node will stash all requests """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) def chkAfterCall(self, req, frm): r = self.processCatchupReq(req, frm) typ = getattr(req, f.LEDGER_ID.nm) if typ == DOMAIN_LEDGER_ID: ledger = self.getLedgerForMsg(req) assert req.catchupTill <= ledger.size return r for node in txnPoolNodeSet: node.nodeMsgRouter.routes[CatchupReq] = \ types.MethodType(chkAfterCall, node.ledgerManager) node.nodeIbStasher.delay(cqDelay(3)) print('Sending 5 requests') sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_steward, 5) looper.runFor(1) new_steward_name = randomString() new_node_name = "Epsilon" new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, nodeClass=testNodeClass, allPluginsPath=allPluginsPath, autoStart=True) sdk_pool_refresh(looper, sdk_pool_handle) txnPoolNodeSet.append(new_node) looper.runFor(2) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) # TODO select or create a timeout for this case in 'waits' looper.run( eventually(checkNodeDataForEquality, new_node, *txnPoolNodeSet[:-1], retryWait=1, timeout=80)) assert new_node.spylog.count(TestNode.processStashedOrderedReqs) > 0
def testNodeRequestingTxns(reduced_catchup_timeout_conf, txnPoolNodeSet, sdk_node_created_after_some_txns, sdk_wallet_client): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes does not reply and the newly joined node cannot complete the process till the timeout and then requests the missing transactions. """ looper, new_node, sdk_pool_handle, new_steward_wallet_handle = sdk_node_created_after_some_txns new_node_ledger = new_node.ledgerManager.ledgerRegistry[DOMAIN_LEDGER_ID] old_size = len(new_node_ledger.ledger) old_size_others = txnPoolNodeSet[0].ledgerManager.ledgerRegistry[ DOMAIN_LEDGER_ID].ledger.size # So nodes wont tell the clients about the newly joined node so they # dont send any request to the newly joined node for node in txnPoolNodeSet: node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) def ignoreCatchupReq(self, req, frm): logger.info("{} being malicious and ignoring catchup request {} " "from {}".format(self, req, frm)) # One of the node does not process catchup request. npr = getNonPrimaryReplicas(txnPoolNodeSet, 0) badReplica = npr[0] badNode = badReplica.node txnPoolNodeSet.append(new_node) badNode.nodeMsgRouter.routes[CatchupReq] = types.MethodType( ignoreCatchupReq, badNode.ledgerManager) more_requests = 10 sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, more_requests) looper.run(checkNodesConnected(txnPoolNodeSet)) # Since one of the nodes does not reply, this new node will experience a # timeout and retry catchup requests, hence a long test timeout. timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \ reduced_catchup_timeout_conf.CatchupTransactionsTimeout waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=timeout) new_size = len(new_node_ledger.ledger) # The new node ledger might catchup some transactions from the batch of # `more_request` transactions assert old_size_others - \ old_size <= new_node_ledger.num_txns_caught_up <= new_size - old_size sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 2) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=timeout)
def test_backups_dont_order_while_reordering(txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, looper): """ This test needs to show that for now we stop ordering on backups until master in reordering state after view_change Steps: 1. Delay ordering on master replica for collecting requests to reorder after VC 2. Make sure that master didn't order 3. Delay old_view_pp_request and force VC 4. Ensure that all backup replica on all nodes cannot order because primary waiting for reordering on master """ def check_pp_count(node, expected_count, inst_id=0): assert node.replicas._replicas[inst_id].last_ordered_3pc[1] == expected_count, \ "master last ordered: {}, backup_last_ordered: {}".format(node.master_replica._ordering_service.batches, node.replicas._replicas[ 1]._ordering_service.batches) # We expect that after VC Gamma will be primary on backup delayed_node = txnPoolNodeSet[-2] fast_nodes = [n for n in txnPoolNodeSet if n != delayed_node] master_pp_seq_no_before = delayed_node.master_replica.last_ordered_3pc[1] with delay_rules_without_processing(delayed_node.nodeIbStasher, pDelay(instId=MASTER_REPLICA_INDEX), cDelay(instId=MASTER_REPLICA_INDEX), msg_req_delay(), msg_rep_delay(), ppDelay(instId=MASTER_REPLICA_INDEX)): sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, REQS_FOR_REORDERING) looper.run(eventually(check_pp_count, delayed_node, REQS_FOR_REORDERING, BACKUP_INST_ID)) assert delayed_node.master_replica.last_ordered_3pc[1] == master_pp_seq_no_before with delay_rules([n.nodeIbStasher for n in txnPoolNodeSet], old_view_pp_request_delay()): ensure_view_change(looper, txnPoolNodeSet) # check that view change is finished on all nodes looper.run(eventually(check_not_in_view_change, txnPoolNodeSet)) # check that delayed node is selected on all fast nodes but not on delayed node def check_backup_primaries(): assert delayed_node.replicas[BACKUP_INST_ID]._consensus_data.primary_name is None assert delayed_node.master_replica.last_ordered_3pc[1] == master_pp_seq_no_before assert all( n.replicas[BACKUP_INST_ID]._consensus_data.primary_name == generateName(delayed_node.name, instId=BACKUP_INST_ID) for n in fast_nodes ) looper.run(eventually(check_backup_primaries)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQS_FOR_REORDERING) for node in txnPoolNodeSet: assert node.replicas._replicas[BACKUP_INST_ID].last_ordered_3pc[1] == 0
def test_view_change_by_order_stashed_on_3_nodes_and_catchup_on_1_node(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward): ''' - COMMITS are delayed on all nodes - All nodes starts a view change with a prepared certificate (for delayed message) - COMMITS come during view change for 3 nodes - So these 3 nodes finish view change by processing Commits and Ordered msgs during view change (in between rounds of catchup). - The lagging (4th) node receives missing txns as part of catch-up (during view change) and also finishes view change. ''' slow_node = txnPoolNodeSet[-1] fast_nodes = txnPoolNodeSet[:-1] slow_stasher = slow_node.nodeIbStasher fast_stashers = [n.nodeIbStasher for n in fast_nodes] all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet] initial_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc txns_count = 4 eventual_last_ordered = initial_last_ordered[0], initial_last_ordered[1] + txns_count batches_count = initial_last_ordered[1] with delay_rules(all_stashers, vcd_delay()): # the lagging node is slow in receiving Commits and Catchup mghs with delay_rules(slow_stasher, cDelay()): with delay_rules(slow_stasher, lsDelay(), msg_rep_delay(types_to_delay=[LEDGER_STATUS])): # fast nodes will receive and order Commits for last_prepared_cert during view change with delay_rules(fast_stashers, cDelay()): with delay_rules(fast_stashers, lsDelay(), msg_rep_delay(types_to_delay=[LEDGER_STATUS])): sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_steward, txns_count) batches_count += txns_count looper.run(eventually(check_prepare_certificate, txnPoolNodeSet, batches_count)) check_last_ordered_3pc_on_master(txnPoolNodeSet, initial_last_ordered) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) looper.run(eventually(check_last_prepared_certificate_after_view_change_start, txnPoolNodeSet, eventual_last_ordered)) # check that all txns are ordered till last prepared on fast nodes looper.run(eventually(check_last_ordered_3pc_on_master, fast_nodes, eventual_last_ordered, timeout=30)) # check that all txns are ordered till last prepared on slow node as a result of catchup looper.run(eventually(check_last_ordered_3pc_on_master, [slow_node], eventual_last_ordered, timeout=30)) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) # make sure that the pool is functional sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_sdk_no_ordering_during_syncup(tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): non_primary_replica = getNonPrimaryReplicas(txnPoolNodeSet, instId=0)[0] # Put non-primary Node to syncing state once first Prepare is recieved make_node_syncing(non_primary_replica, Prepare) # Patch non-primary Node to fail if Order is executed fail_on_execute_batch_on_master(non_primary_replica.node) # Send requests. The non-primary Node should not fail since no ordering is # called while syncing sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, tconf.Max3PCBatchSize) looper.runFor(5)
def testNodeRequestingTxns(reduced_catchup_timeout_conf, txnPoolNodeSet, looper, tdir, tconf, allPluginsPath, sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes does not reply and the newly joined node cannot complete the process till the timeout and then requests the missing transactions. """ def ignoreCatchupReq(self, req, frm): logger.info("{} being malicious and ignoring catchup request {} " "from {}".format(self, req, frm)) # One of the node does not process catchup request. npr = getNonPrimaryReplicas(txnPoolNodeSet, 0) badReplica = npr[0] badNode = badReplica.node badNode.nodeMsgRouter.routes[CatchupReq] = types.MethodType( ignoreCatchupReq, badNode.ledgerManager) more_requests = 10 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, more_requests) _, new_node = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) # Since one of the nodes does not reply, this new node will experience a # timeout and retry catchup requests, hence a long test timeout. timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \ reduced_catchup_timeout_conf.CatchupTransactionsTimeout waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=timeout, exclude_from_check=['check_last_ordered_3pc_backup']) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 2) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=timeout, exclude_from_check=['check_last_ordered_3pc_backup'])
def test_view_change_by_order_stashed_on_all(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward): ''' - COMMITS are delayed on all nodes - All nodes starts a view change with a prepared certificate (for delayed message) - COMMITS come during view change for all nodes, - So all nodes finish view change by processing Commits and Ordered msgs during view change (in between rounds of catchup). ''' all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet] initial_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc txns_count = 4 eventual_last_ordered = initial_last_ordered[ 0], initial_last_ordered[1] + txns_count with delay_rules(all_stashers, vcd_delay()): with delay_rules(all_stashers, cDelay()): with delay_rules(all_stashers, lsDelay(), msg_rep_delay(types_to_delay=[LEDGER_STATUS])): sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_steward, txns_count) looper.run( eventually(check_prepare_certificate, txnPoolNodeSet, txns_count)) check_last_ordered_3pc_on_master(txnPoolNodeSet, initial_last_ordered) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) looper.run( eventually( check_last_prepared_certificate_after_view_change_start, txnPoolNodeSet, eventual_last_ordered)) # check that all txns are ordered till last prepared looper.run( eventually(check_last_ordered_3pc_on_master, txnPoolNodeSet, eventual_last_ordered, timeout=30)) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) # make sure that the pool is functional sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_unstash_waiting_for_first_batch_ordered_after_catchup( looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, tconf): lagged_node = txnPoolNodeSet[-1] other_nodes = list(set(txnPoolNodeSet) - {lagged_node}) other_stashers = [n.nodeIbStasher for n in other_nodes] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) last_ordered_lagged_before = lagged_node.master_last_ordered_3PC # do not process any message reqs for PrePrepares with delay_rules_without_processing( lagged_node.nodeIbStasher, msg_rep_delay(types_to_delay=[PREPARE, PREPREPARE])): with delay_rules(lagged_node.nodeIbStasher, cDelay()): ensure_view_change(looper, txnPoolNodeSet) looper.run(eventually(check_not_in_view_change, txnPoolNodeSet)) ensureElectionsDone(looper, other_nodes, instances_list=range( getRequiredInstances(len(txnPoolNodeSet)))) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) # delay Commits on all nodes so that there are some PrePrepares still stashed after catchup with delay_rules(other_stashers, cDelay()): pre_prep_before = len(recvdPrePrepareForInstId(lagged_node, 0)) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 2) # wait till lagged node recives the new PrePrepares # they will be stashed as WAITING_FIRST_BATCH_IN_VIEW looper.run( eventually(lambda: assertExp( len(recvdPrePrepareForInstId(lagged_node, 0)) == pre_prep_before + 2))) # catchup the lagged node # the latest 2 PrePrepares are still stashed lagged_node.start_catchup() looper.run( eventually( lambda: assertExp(lagged_node.master_last_ordered_3PC > last_ordered_lagged_before))) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2) ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=30)
def provoke_and_check_view_change(looper, nodes, newViewNo, sdk_pool_handle, sdk_wallet_client): if {n.viewNo for n in nodes} == {newViewNo}: return True # If throughput of every node has gone down then check that # view has changed tr = [n.monitor.isMasterThroughputTooLow() for n in nodes] if all(tr): logger.info('Throughput ratio gone down, its {}'.format(tr)) checkViewNoForNodes(nodes, newViewNo) else: logger.info('Master instance has not degraded yet, ' 'sending more requests') sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client) assert False
def do_view_change_with_delayed_commits_on_all_but_one(nodes, nodes_without_one_stashers, except_node, looper, sdk_pool_handle, sdk_wallet_client): new_view_no = except_node.viewNo + 1 old_last_ordered = except_node.master_replica.last_ordered_3pc # delay commits for all nodes except node X with delay_rules(nodes_without_one_stashers, cDelay(sys.maxsize)): # send one request requests2 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) def last_ordered(node: Node, last_ordered): assert node.master_replica.last_ordered_3pc == last_ordered # wait until except_node ordered txn looper.run( eventually(last_ordered, except_node, (except_node.viewNo, old_last_ordered[1] + 1))) # trigger view change on all nodes for node in nodes: node.view_changer.on_master_degradation() # wait for view change done on all nodes looper.run(eventually(view_change_done, nodes, new_view_no)) sdk_get_replies(looper, requests2)
def testRequestDynamicValidation(tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Check that for requests which fail dynamic (state based) validation, REJECT is sent to the client :return: """ names = {node.master_replica.name: 0 for node in txnPoolNodeSet} def rejectingMethod(self, req, pp_time): names[self.name] += 1 # Raise rejection for last request of batch if tconf.Max3PCBatchSize - names[self.name] == 0: raise UnauthorizedClientRequest(req.identifier, req.reqId, 'Simulated rejection') for node in txnPoolNodeSet: for replica in node.replicas._replicas.values(): replica._ordering_service._do_dynamic_validation = types.MethodType( rejectingMethod, replica._ordering_service) reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, tconf.Max3PCBatchSize) sdk_get_and_check_replies(looper, reqs[:-1]) with pytest.raises(RequestRejectedException) as e: sdk_get_and_check_replies(looper, reqs[-1:]) assert 'Simulated rejection' in e._excinfo[1].args[0]
def testRequestDynamicValidation(tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Check that for requests which fail dynamic (state based) validation, REJECT is sent to the client :return: """ origMethods = [] names = {node.name: 0 for node in txnPoolNodeSet} def rejectingMethod(self, req): names[self.name] += 1 # Raise rejection for last request of batch if tconf.Max3PCBatchSize - names[self.name] == 0: raise UnauthorizedClientRequest(req.identifier, req.reqId, 'Simulated rejection') for node in txnPoolNodeSet: origMethods.append(node.doDynamicValidation) node.doDynamicValidation = types.MethodType(rejectingMethod, node) reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, tconf.Max3PCBatchSize) sdk_get_and_check_replies(looper, reqs[:-1]) with pytest.raises(RequestRejectedException) as e: sdk_get_and_check_replies(looper, reqs[-1:]) assert 'Simulated rejection' in e._excinfo[1].args[0] for i, node in enumerate(txnPoolNodeSet): node.doDynamicValidation = origMethods[i]
def testRequestDynamicValidation(tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Check that for requests which fail dynamic (state based) validation, REJECT is sent to the client :return: """ # TODO: Change this test for using SDK. # Now SDK, can't distinguish REJECTED messages and simply raise IndyError origMethods = [] names = {node.name: 0 for node in txnPoolNodeSet} def rejectingMethod(self, req): names[self.name] += 1 # Raise rejection for last request of batch if tconf.Max3PCBatchSize - names[self.name] == 0: raise UnauthorizedClientRequest(req.identifier, req.reqId, 'Simulated rejection') for node in txnPoolNodeSet: origMethods.append(node.doDynamicValidation) node.doDynamicValidation = types.MethodType(rejectingMethod, node) reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, tconf.Max3PCBatchSize) sdk_get_and_check_replies(looper, reqs[:-1]) with pytest.raises(RequestRejectedException) as e: sdk_get_and_check_replies(looper, reqs[-1:]) assert 'Simulated rejection' in e._excinfo[1].args[0] for i, node in enumerate(txnPoolNodeSet): node.doDynamicValidation = origMethods[i]
def test_replica_removing_in_ordering(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, chkFreqPatched, view_change): """ 1. Start ordering (send pre-prepares on backup) 2. Remove replica 3. Finish ordering 4. Check monitor and replicas count """ node = txnPoolNodeSet[0] start_replicas_count = node.replicas.num_replicas instance_id = start_replicas_count - 1 stashers = [n.nodeIbStasher for n in txnPoolNodeSet] with delay_rules(stashers, cDelay(delay=sys.maxsize)): req = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) def chk(): assert len(node.requests) > 0 looper.run(eventually(chk)) digest = Request(**req[0][0]).digest old_forwarded_to = node.requests[digest].forwardedTo node.replicas.remove_replica(instance_id) assert old_forwarded_to - 1 == node.requests[digest].forwardedTo sdk_get_replies(looper, req) looper.run(eventually(check_checkpoint_finalize, txnPoolNodeSet)) _check_replica_removed(node, start_replicas_count, instance_id) assert not node.monitor.isMasterDegraded() assert len(node.requests) == 0
def test_no_propagate_request_on_different_last_ordered_on_master_before_vc( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' Send random request and do view change then fast_nodes (1, 4 - without primary after next view change) are already ordered transaction on master and slow_nodes are not. Check ordering on slow_nodes.''' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) master_instance = txnPoolNodeSet[0].master_replica.instId slow_nodes = txnPoolNodeSet[1:3] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc with delay_rules(nodes_stashers, cDelay(delay=sys.maxsize)): # send one request requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) last_ordered_for_slow = slow_nodes[0].master_replica.last_ordered_3pc old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(check_last_ordered, fast_nodes, master_instance, (old_view_no, old_last_ordered[1] + 1))) # trigger view change on all nodes for node in txnPoolNodeSet: node.view_changer.on_master_degradation() # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) sdk_get_replies(looper, requests) looper.run( eventually(check_last_ordered, slow_nodes, master_instance, (old_view_no, last_ordered_for_slow[1] + 1))) assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def testQueueingReqFromFutureView(delayed_perf_chk, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Test if every node queues 3 Phase requests(PRE-PREPARE, PREPARE and COMMIT) that come from a view which is greater than the current view. - Delay reception and processing of view change messages by a non primary for master instance => it starts receiving 3 phase commit messages for next view """ lagging_node = get_last_master_non_primary_node(txnPoolNodeSet) old_view_no = lagging_node.viewNo # Delay processing of InstanceChange and ViewChangeDone so node stashes # 3PC messages delay_ic = 60 lagging_node.nodeIbStasher.delay(icDelay(delay_ic)) lagging_node.nodeIbStasher.delay(vcd_delay(delay_ic)) logger.debug('{} will delay its view change'.format(lagging_node)) def chk_fut_view(view_no, is_empty): length = len(lagging_node.msgsForFutureViews.get(view_no, ())) if is_empty: assert length == 0 else: assert length > 0 return length # No messages queued for future view chk_fut_view(old_view_no + 1, is_empty=True) logger.debug( '{} does not have any messages for future views'.format(lagging_node)) # Every node except Node A should do a view change ensure_view_change(looper, [n for n in txnPoolNodeSet if n != lagging_node], [lagging_node]) # send more requests that will be queued for the lagged node reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 5) l = looper.run( eventually(chk_fut_view, old_view_no + 1, False, retryWait=1)) logger.debug('{} has {} messages for future views'.format(lagging_node, l)) sdk_get_replies(looper, reqs) # reset delays for the lagging_node node so that it finally makes view # change lagging_node.reset_delays_and_process_delayeds() # Eventually no messages queued for future view looper.run( eventually(chk_fut_view, old_view_no + 1, True, retryWait=1, timeout=delay_ic + 10)) logger.debug( '{} exhausted pending messages for future views'.format(lagging_node)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2)
def nodeStashingOrderedRequests(txnPoolNodeSet, sdk_node_created_after_some_txns): looper, new_node, sdk_pool_handle, new_steward_wallet_handle = sdk_node_created_after_some_txns for node in txnPoolNodeSet: node.nodeIbStasher.delay(cqDelay(5)) txnPoolNodeSet.append(new_node) sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet_handle, sdk_pool_handle) sdk_send_random_requests(looper, sdk_pool_handle, new_steward_wallet_handle, 10) looper.run(checkNodesConnected(txnPoolNodeSet)) def stashing(): assert new_node.mode != Mode.participating assert len(new_node.stashedOrderedReqs) > 0 # assert len(newNode.reqsFromCatchupReplies) > 0 timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.run(eventually(stashing, retryWait=1, timeout=timeout))
def test_sdk_many_stewards_send_many(looper, sdk_pool_handle, sdk_wallet_stewards): for sdk_wallet_steward in sdk_wallet_stewards: resp_task = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_steward, 30) repl = sdk_get_and_check_replies(looper, resp_task, timeout=90) for _, resp in repl: assert resp['result']
def test_view_change_during_unstash(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf): slow_node = txnPoolNodeSet[-1] other_nodes = txnPoolNodeSet[:-1] slow_stasher = slow_node.nodeIbStasher other_stashers = [n.nodeIbStasher for n in other_nodes] all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet] # Preload nodes with some transactions sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) for node in txnPoolNodeSet: assert node.master_replica.last_ordered_3pc == (0, 1) # Prevent ordering of some requests start_delaying(all_stashers, delay_3pc(after=7, msgs=(Prepare, Commit))) # Stop ordering on slow node and send requests slow_node_after_5 = start_delaying(slow_stasher, delay_3pc(after=5, msgs=Commit)) slow_node_until_5 = start_delaying(slow_stasher, delay_3pc(after=0)) reqs_view_0 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 8) # Make pool order first 2 batches and pause pool_after_3 = start_delaying(other_stashers, delay_3pc(after=3)) looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 3)) # Start catchup, continue ordering everywhere (except two last batches on slow node) with delay_rules(slow_stasher, cr_delay()): slow_node._do_start_catchup(just_started=False) looper.run(eventually(check_catchup_is_started, slow_node)) stop_delaying_and_process(pool_after_3) looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 7)) # Finish catchup and continue processing on slow node looper.run(eventually(check_catchup_is_finished, slow_node)) stop_delaying_and_process(slow_node_until_5) looper.run(eventually(check_nodes_ordered_till, [slow_node], 0, 5)) # Start view change and allow slow node to get remaining commits with delay_rules(all_stashers, icDelay()): for node in txnPoolNodeSet: node.view_changer.on_master_degradation() looper.runFor(0.1) stop_delaying_and_process(slow_node_after_5) # Ensure that expected number of requests was ordered replies = sdk_get_replies(looper, reqs_view_0) for rep in replies[:6]: sdk_check_reply(rep) # Ensure that everything is ok ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def testOrderingCase2(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Scenario -> A client sends requests, some nodes delay COMMITs to few specific nodes such some nodes achieve commit quorum later for those requests compared to other nodes. But all nodes `ORDER` request in the same order of ppSeqNos https://www.pivotaltracker.com/n/projects/1889887/stories/133655009 """ pr, replicas = getPrimaryReplica(txnPoolNodeSet, instId=0), \ getNonPrimaryReplicas(txnPoolNodeSet, instId=0) assert len(replicas) == 6 rep0 = pr rep1 = replicas[0] rep2 = replicas[1] rep3 = replicas[2] rep4 = replicas[3] rep5 = replicas[4] rep6 = replicas[5] node0 = rep0.node node1 = rep1.node node2 = rep2.node node3 = rep3.node node4 = rep4.node node5 = rep5.node node6 = rep6.node ppSeqsToDelay = 5 commitDelay = 3 # delay each COMMIT by this number of seconds delayedPpSeqNos = set() requestCount = 10 def specificCommits(wrappedMsg): nonlocal node3, node4, node5 msg, sender = wrappedMsg if isinstance(msg, PrePrepare): if len(delayedPpSeqNos) < ppSeqsToDelay: delayedPpSeqNos.add(msg.ppSeqNo) logger.debug('ppSeqNo {} be delayed'.format(msg.ppSeqNo)) if isinstance(msg, Commit) and msg.instId == 0 and \ sender in (n.name for n in (node3, node4, node5)) and \ msg.ppSeqNo in delayedPpSeqNos: return commitDelay for node in (node1, node2): logger.debug('{} would be delaying commits'.format(node)) node.nodeIbStasher.delay(specificCommits) sdk_reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, requestCount) timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=timeout) sdk_get_and_check_replies(looper, sdk_reqs)
def test_make_result_bls_enabled(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): req_dict, _ = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)[0] req = sdk_json_to_request_object(req_dict) wait_for_requests_ordered(looper, txnPoolNodeSet, [req]) assert req.protocolVersion assert req.protocolVersion >= PlenumProtocolVersion.STATE_PROOF_SUPPORT.value check_result(txnPoolNodeSet, req, True)
def test_slow_nodes_catchup_before_selecting_primary_in_new_view( tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, one_node_added): """ Delay 3PC messages to one node and view change messages to some others (including primary) so the node that does not receive enough 3PC messages is behind but learns of the view change quickly and starts catchup. Other nodes learn of the view change late and thus keep on processing requests """ new_node = one_node_added nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)] primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node slow_node = nprs[-1] # nodes_slow_to_inst_chg = [primary_node] + nprs[:2] nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node] delay_3pc = 100 delay_ic = 5 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) delay_3pc_messages([slow_node], 0, delay_3pc) for n in nodes_slow_to_inst_chg: n.nodeIbStasher.delay(icDelay(delay_ic)) def start_count(): return sum([ 1 for e in slow_node.ledgerManager.spylog.getAll( slow_node.ledgerManager.startCatchUpProcess.__name__) if e.params['ledgerId'] == DOMAIN_LEDGER_ID ]) s = start_count() requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 10 * Max3PCBatchSize) ensure_view_change(looper, nodes=txnPoolNodeSet, exclude_from_check=nodes_slow_to_inst_chg) sdk_get_and_check_replies(looper, requests) waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1]) e = start_count() assert e - s >= 2 looper.run(eventually(checkViewNoForNodes, slow_node.viewNo)) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg)
def testNewNodeCatchupWhileIncomingRequests(looper, txnPoolNodeSet, testNodeClass, tdir, tconf, sdk_pool_handle, sdk_wallet_steward, allPluginsPath): """ A new node joins while transactions are happening, its catchup requests include till where it has to catchup, which would be less than the other node's ledger size. In the meantime, the new node will stash all requests """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) def chkAfterCall(self, req, frm): r = self.processCatchupReq(req, frm) typ = getattr(req, f.LEDGER_ID.nm) if typ == DOMAIN_LEDGER_ID: ledger = self.getLedgerForMsg(req) assert req.catchupTill <= ledger.size return r for node in txnPoolNodeSet: node.nodeMsgRouter.routes[CatchupReq] = \ types.MethodType(chkAfterCall, node.ledgerManager) node.nodeIbStasher.delay(cqDelay(3)) print('Sending 5 requests') sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_steward, 5) looper.runFor(1) new_steward_name = randomString() new_node_name = "Epsilon" new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, nodeClass=testNodeClass, allPluginsPath=allPluginsPath, autoStart=True) sdk_pool_refresh(looper, sdk_pool_handle) txnPoolNodeSet.append(new_node) looper.runFor(2) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) # TODO select or create a timeout for this case in 'waits' looper.run(eventually(checkNodeDataForEquality, new_node, *txnPoolNodeSet[:-1], retryWait=1, timeout=150)) assert new_node.spylog.count(TestNode.processStashedOrderedReqs) > 0
def test_slow_nodes_catchup_before_selecting_primary_in_new_view( tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, one_node_added): """ Delay 3PC messages to one node and view change messages to some others (including primary) so the node that does not receive enough 3PC messages is behind but learns of the view change quickly and starts catchup. Other nodes learn of the view change late and thus keep on processing requests """ new_node = one_node_added nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)] primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node slow_node = nprs[-1] # nodes_slow_to_inst_chg = [primary_node] + nprs[:2] nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node] delay_3pc = 100 delay_ic = 5 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) delay_3pc_messages([slow_node], 0, delay_3pc) for n in nodes_slow_to_inst_chg: n.nodeIbStasher.delay(icDelay(delay_ic)) def start_count(): return sum([1 for e in slow_node.ledgerManager.spylog.getAll( slow_node.ledgerManager.startCatchUpProcess.__name__) if e.params['ledgerId'] == DOMAIN_LEDGER_ID]) s = start_count() requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 10 * Max3PCBatchSize) ensure_view_change(looper, nodes=txnPoolNodeSet, exclude_from_check=nodes_slow_to_inst_chg) sdk_get_and_check_replies(looper, requests) waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1]) e = start_count() assert e - s >= 2 looper.run(eventually(checkViewNoForNodes, slow_node.viewNo)) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg)
def setup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): global initial_ledger_size A, B, C, D = txnPoolNodeSet # type: TestNode lagged_node = A frm = [B, C, D] delay(Commit, frm=frm, to=lagged_node, howlong=howlong) initial_ledger_size = lagged_node.domainLedger.size request_couple_json = sdk_send_random_requests( looper, sdk_pool_handle, sdk_wallet_client, 1) return request_couple_json
def test_slow_node_has_warn_unordered_log_msg(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): clear_unordered_requests(*txnPoolNodeSet) slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[0].node delaysCommitProcessing(slow_node, delay=3 * UNORDERED_CHECK_FREQ) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 5) looper.runFor(2 * UNORDERED_CHECK_FREQ) assert all(len(node.monitor.unordered_requests) == 0 for node in txnPoolNodeSet if node.name != slow_node.name) assert len(slow_node.monitor.unordered_requests) != 0 # Check that after being ordered request is no longer logged looper.runFor(2 * UNORDERED_CHECK_FREQ) clear_unordered_requests(*txnPoolNodeSet) looper.runFor(2 * UNORDERED_CHECK_FREQ) assert all(len(node.monitor.unordered_requests) == 0 for node in txnPoolNodeSet)
def testNodeRequestingTxns(reduced_catchup_timeout_conf, txnPoolNodeSet, looper, tdir, tconf, allPluginsPath, sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes does not reply and the newly joined node cannot complete the process till the timeout and then requests the missing transactions. """ def ignoreCatchupReq(self, req, frm): logger.info("{} being malicious and ignoring catchup request {} " "from {}".format(self, req, frm)) # One of the node does not process catchup request. npr = getNonPrimaryReplicas(txnPoolNodeSet, 0) badReplica = npr[0] badNode = badReplica.node badNode.nodeMsgRouter.routes[CatchupReq] = types.MethodType( ignoreCatchupReq, badNode.ledgerManager) more_requests = 10 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, more_requests) _, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) # Since one of the nodes does not reply, this new node will experience a # timeout and retry catchup requests, hence a long test timeout. timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \ reduced_catchup_timeout_conf.CatchupTransactionsTimeout waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=timeout) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 2) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=timeout)
def setup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): global initial_ledger_size A, B, C, D = txnPoolNodeSet # type: TestNode lagged_node = A frm = [B, C, D] delay(Propagate, frm=frm, to=lagged_node, howlong=howlong) # Delay MessageRep by long simulating loss as if Propagate is missing, it # is requested A.nodeIbStasher.delay(msg_rep_delay(10 * howlong, [PROPAGATE, ])) initial_ledger_size = lagged_node.domainLedger.size request_couple_json = sdk_send_random_requests( looper, sdk_pool_handle, sdk_wallet_client, 1) return request_couple_json
def test_make_proof_committed_head_used(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): req_dict, _ = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)[0] req = sdk_json_to_request_object(req_dict) wait_for_requests_ordered(looper, txnPoolNodeSet, [req]) req_handler = txnPoolNodeSet[0].get_req_handler(DOMAIN_LEDGER_ID) key = req_handler.prepare_buy_key(req.identifier, req.reqId) for node in txnPoolNodeSet: node.states[DOMAIN_LEDGER_ID].set(key, b'somevalue') check_result(txnPoolNodeSet, req, True)
def testRequestsSize(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, noRetryReq): clients = [] for i in range(4): clients.append(sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_steward)) numRequests = 250 for (_, nym) in clients: logger.debug("{} sending {} requests".format(nym, numRequests)) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_steward, numRequests) logger.debug("{} sent {} requests".format(nym, numRequests)) for node in txnPoolNodeSet: logger.debug("{} has requests {} with size {}". format(node, len(node.requests), get_size(node.requests))) for replica in node.replicas.values(): logger.debug("{} has prepares {} with size {}". format(replica, len(replica.prepares), get_size(replica.prepares))) logger.debug("{} has commits {} with size {}". format(replica, len(replica.commits), get_size(replica.commits)))
def setup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client,): def _clean(*args): pass A, B, C, D = txnPoolNodeSet # type: TestNode delay(Propagate, frm=[B, C, D], to=A, howlong=howlong) # Delay MessageRep by long simulating loss as if Propagate is missing, it # is requested A.nodeIbStasher.delay(msg_rep_delay(10 * howlong, [PROPAGATE, ])) # disable _clean method which remove req.key from requests map A.requests._clean = types.MethodType( _clean, A.requests) request_couple_json = sdk_send_random_requests( looper, sdk_pool_handle, sdk_wallet_client, reqCount) return request_couple_json
def setup(tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): # Patch the 3phase request sending method to send incorrect digest and pr, otherR = getPrimaryReplica(txnPoolNodeSet, instId=0), \ getNonPrimaryReplicas(txnPoolNodeSet, instId=0) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf.Max3PCBatchSize) stateRoot = pr.stateRootHash(DOMAIN_LEDGER_ID, to_str=False) origMethod = pr.create3PCBatch malignedOnce = None def badMethod(self, ledgerId): nonlocal malignedOnce pp = origMethod(ledgerId) if not malignedOnce: pp = updateNamedTuple(pp, digest=pp.digest + '123') malignedOnce = True return pp pr.create3PCBatch = types.MethodType(badMethod, pr) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, tconf.Max3PCBatchSize) return pr, otherR, stateRoot
def testOneNodeAltersAClientRequest(looper, txnPoolNodeSet, evilAlpha, sdk_pool_handle, sdk_wallet_client): """Malicious Alpha node sends incorrect propagate. This test check that nodes raise InsufficientCorrectSignatures in validate this propagate""" alpha = txnPoolNodeSet[0] goodNodes = list(txnPoolNodeSet) goodNodes.remove(alpha) # delay incoming client messages for good nodes by 250 milliseconds # this gives Alpha a chance to send a propagate message for n in goodNodes: # type: TestNode n.nodeIbStasher.delay(ppDelay(sys.maxsize)) n.nodeIbStasher.delay(req_delay(sys.maxsize)) pastNodes = [] request_couple_json = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) sent1 = sdk_json_to_request_object(request_couple_json[0][0]) checkPropagated(looper, txnPoolNodeSet, sent1, faultyNodes) def check(): for node in goodNodes: if node not in pastNodes: # ensure the nodes are suspicious of Alpha params = node.spylog.getLastParams(TestNode.reportSuspiciousNode) frm = params["nodeName"] reason = params["reason"] assert frm == 'Alpha' assert reason == InsufficientCorrectSignatures.reason.format(0, 1) # ensure Alpha's propagates were ignored by the other nodes key = sent1.digest props = node.requests[key].propagates assert 'Alpha' not in props for good in goodNodes: assert good.name in props pastNodes.append(node) for node in goodNodes: node.nodeIbStasher.resetDelays() timeout = waits.expectedClientRequestPropagationTime(len(txnPoolNodeSet)) looper.run(eventually(check, retryWait=1, timeout=timeout))
def test_old_view_requests_processed_during_view_change(looper, txnPoolNodeSet, sdk_wallet_handle, sdk_wallet_client): """ Make sure that requests sent before view change started are processed and replies are returned: - delay Propogates (to make sure that requests are not ordered before view change is started) - send requests - check that requests are ordered despite of view change being in progress """ for node in txnPoolNodeSet: node.view_change_in_progress = False node.nodeIbStasher.delay(ppgDelay(3, 0)) requests = sdk_send_random_requests(looper, sdk_wallet_handle, sdk_wallet_client, 2) looper.runFor(1) for node in txnPoolNodeSet: node.view_change_in_progress = True sdk_get_replies(looper, requests)
def test_no_propagate_request_on_different_last_ordered_on_master_before_vc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' Send random request and do view change then fast_nodes (1, 4 - without primary after next view change) are already ordered transaction on master and slow_nodes are not. Check ordering on slow_nodes.''' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) master_instance = txnPoolNodeSet[0].master_replica.instId slow_nodes = txnPoolNodeSet[1:3] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc with delay_rules(nodes_stashers, cDelay()): # send one request requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) last_ordered_for_slow = slow_nodes[0].master_replica.last_ordered_3pc old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(check_last_ordered, fast_nodes, master_instance, (old_view_no, old_last_ordered[1] + 1))) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) replies = sdk_get_replies(looper, requests) for reply in replies: sdk_check_reply(reply) check_last_ordered(slow_nodes, master_instance, (old_view_no, last_ordered_for_slow[1] + 1)) assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def test_apply_stashed_partially_ordered(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): test_node = getNonPrimaryReplicas(txnPoolNodeSet)[0].node test_stasher = test_node.nodeIbStasher ledger_size = max(node.domainLedger.size for node in txnPoolNodeSet) def check_pool_ordered_some_requests(): assert max(node.domainLedger.size for node in txnPoolNodeSet) > ledger_size def check_test_node_has_stashed_ordered_requests(): assert len(test_node.stashedOrderedReqs) > 0 # Delay COMMITs so requests are not ordered on test node with delay_rules(test_stasher, cDelay()): reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, TOTAL_REQUESTS) looper.run(eventually(check_pool_ordered_some_requests)) # Get some of txns that need to be ordered ledger_info = test_node.ledgerManager.getLedgerInfoByType(DOMAIN_LEDGER_ID) txns = ledger_info.ledger.uncommittedTxns txns = txns[:len(txns) // 2] assert len(txns) > 1 # Emulate incomplete catchup simultaneous with generation of ORDERED message test_node.mode = Mode.syncing test_node.master_replica.revert_unordered_batches() looper.run(eventually(check_test_node_has_stashed_ordered_requests)) for txn in txns: ledger_info.ledger.add(txn) ledger_info.postTxnAddedToLedgerClbk(DOMAIN_LEDGER_ID, txn) test_node.mode = Mode.participating test_node.processStashedOrderedReqs() ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) sdk_get_and_check_replies(looper, reqs)
def test_unordered_request_freed_on_replica_removal(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, chkFreqPatched, view_change): node = txnPoolNodeSet[0] stashers = [n.nodeIbStasher for n in txnPoolNodeSet] with delay_rules(stashers, cDelay(delay=sys.maxsize)): req = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.runFor(waits.expectedPropagateTime(len(txnPoolNodeSet)) + waits.expectedPrePrepareTime(len(txnPoolNodeSet)) + waits.expectedPrepareTime(len(txnPoolNodeSet)) + waits.expectedCommittedTime(len(txnPoolNodeSet))) assert len(node.requests) == 1 forwardedToBefore = next(iter(node.requests.values())).forwardedTo node.replicas.remove_replica(node.replicas.num_replicas - 1) assert len(node.requests) == 1 forwardedToAfter = next(iter(node.requests.values())).forwardedTo assert forwardedToAfter == forwardedToBefore - 1 chkChkpoints(txnPoolNodeSet, 0) sdk_get_replies(looper, req) chkChkpoints(txnPoolNodeSet, 1) # Send one more request to stabilize checkpoint sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0)) assert len(node.requests) == 0
def test_make_proof_bls_enabled(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): reqs = sdk_json_couples_to_request_list( sdk_send_random_requests( looper, sdk_pool_handle, sdk_wallet_client, 1)) wait_for_requests_ordered(looper, txnPoolNodeSet, reqs) req = reqs[0] for node in txnPoolNodeSet: req_handler = node.get_req_handler(DOMAIN_LEDGER_ID) key = req_handler.prepare_buy_key(req.identifier, req.reqId) _, proof = req_handler.get_value_from_state(key, with_proof=True) assert proof assert ROOT_HASH in proof assert MULTI_SIGNATURE in proof assert PROOF_NODES in proof multi_sig = proof[MULTI_SIGNATURE] assert MULTI_SIGNATURE_SIGNATURE in multi_sig assert MULTI_SIGNATURE_PARTICIPANTS in multi_sig assert MULTI_SIGNATURE_VALUE in multi_sig multi_sig_value = multi_sig[MULTI_SIGNATURE_VALUE] assert MULTI_SIGNATURE_VALUE_LEDGER_ID in multi_sig_value assert MULTI_SIGNATURE_VALUE_STATE_ROOT in multi_sig_value assert MULTI_SIGNATURE_VALUE_TXN_ROOT in multi_sig_value assert MULTI_SIGNATURE_VALUE_POOL_STATE_ROOT in multi_sig_value assert MULTI_SIGNATURE_VALUE_TIMESTAMP in multi_sig_value # check that multi sig values are in order value_keys = list(multi_sig_value.keys()) assert [MULTI_SIGNATURE_VALUE_LEDGER_ID, MULTI_SIGNATURE_VALUE_POOL_STATE_ROOT, MULTI_SIGNATURE_VALUE_STATE_ROOT, MULTI_SIGNATURE_VALUE_TIMESTAMP, MULTI_SIGNATURE_VALUE_TXN_ROOT] == value_keys assert validate_multi_signature(proof, txnPoolNodeSet)
def test_quorum_after_f_plus_2_nodes_including_primary_turned_off_and_later_on( looper, allPluginsPath, tdir, tconf, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): timeout = sdk_eval_timeout(1, len(txnPoolNodeSet)) nodes = txnPoolNodeSet sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) stop_node(nodes[0], looper, nodes) waitForViewChange(looper, nodes[1:], expectedViewNo=1) ensureElectionsDone(looper, nodes[1:], instances_list=range(getRequiredInstances(nodeCount))) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) stop_node(nodes[1], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[2:], expectedViewNo=1) sdk_reqs3 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) with pytest.raises(PoolLedgerTimeoutException): req_res = sdk_get_replies(looper, sdk_reqs3, timeout=timeout) sdk_check_reply(req_res[0]) stop_node(nodes[2], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[3:], expectedViewNo=1) sdk_reqs4 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) with pytest.raises(PoolLedgerTimeoutException): req_res = sdk_get_replies(looper, sdk_reqs4, timeout=timeout) sdk_check_reply(req_res[0]) nodes[2] = start_stopped_node(nodes[2], looper, tconf, tdir, allPluginsPath) looper.runFor(waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[3:], expectedViewNo=1) sdk_reqs5 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) with pytest.raises(PoolLedgerTimeoutException): req_res = sdk_get_replies(looper, sdk_reqs5, timeout=timeout) sdk_check_reply(req_res[0]) nodes[1] = start_stopped_node(nodes[1], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes[1:], instances_list=range(getRequiredInstances(nodeCount)), customTimeout=60) checkViewNoForNodes(nodes[1:], expectedViewNo=1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) nodes[0] = start_stopped_node(nodes[0], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes, instances_list=range(getRequiredInstances(nodeCount)), customTimeout=60) checkViewNoForNodes(nodes, expectedViewNo=1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)
def test_backup_replica_resumes_ordering_on_lag_in_checkpoints( looper, chkFreqPatched, reqs_for_checkpoint, one_replica_and_others_in_backup_instance, sdk_pool_handle, sdk_wallet_client, view_change_done): """ Verifies resumption of ordering 3PC-batches on a backup replica on detection of a lag in checkpoints """ slow_replica, other_replicas = one_replica_and_others_in_backup_instance view_no = slow_replica.viewNo # Send a request and ensure that the replica orders the batch for it sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.run( eventually(lambda: assertExp(slow_replica.last_ordered_3pc == (view_no, 1)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Don't receive Commits from two replicas slow_replica.node.nodeIbStasher.delay( cDelay(instId=1, sender_filter=other_replicas[0].node.name)) slow_replica.node.nodeIbStasher.delay( cDelay(instId=1, sender_filter=other_replicas[1].node.name)) # Send a request for which the replica will not be able to order the batch # due to an insufficient count of Commits sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.runFor(waits.expectedTransactionExecutionTime(nodeCount)) # Recover reception of Commits slow_replica.node.nodeIbStasher.drop_delayeds() slow_replica.node.nodeIbStasher.resetDelays() # Send requests but in a quantity insufficient # for catch-up number of checkpoints sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP * reqs_for_checkpoint - 2) looper.runFor(waits.expectedTransactionExecutionTime(nodeCount)) # Ensure that the replica has not ordered any batches # after the very first one assert slow_replica.last_ordered_3pc == (view_no, 1) # Ensure that the watermarks have not been shifted since the view start assert slow_replica.h == 0 assert slow_replica.H == LOG_SIZE # Ensure that the collections related to requests, batches and # own checkpoints are not empty. # (Note that a primary replica removes requests from requestQueues # when creating a batch with them.) if slow_replica.isPrimary: assert slow_replica.sentPrePrepares else: assert slow_replica.requestQueues[DOMAIN_LEDGER_ID] assert slow_replica.prePrepares assert slow_replica.prepares assert slow_replica.commits assert slow_replica.batches assert slow_replica.checkpoints # Ensure that there are some quorumed stashed checkpoints assert slow_replica.stashed_checkpoints_with_quorum() # Send more requests to reach catch-up number of checkpoints sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint) # Ensure that the replica has adjusted last_ordered_3pc to the end # of the last checkpoint looper.run( eventually(lambda: assertExp(slow_replica.last_ordered_3pc == (view_no, (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Ensure that the watermarks have been shifted so that the lower watermark # has the same value as last_ordered_3pc assert slow_replica.h == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ assert slow_replica.H == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + LOG_SIZE # Ensure that the collections related to requests, batches and # own checkpoints have been cleared assert not slow_replica.requestQueues[DOMAIN_LEDGER_ID] assert not slow_replica.sentPrePrepares assert not slow_replica.prePrepares assert not slow_replica.prepares assert not slow_replica.commits assert not slow_replica.batches assert not slow_replica.checkpoints # Ensure that now there are no quorumed stashed checkpoints assert not slow_replica.stashed_checkpoints_with_quorum() # Send a request and ensure that the replica orders the batch for it sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.run( eventually(lambda: assertExp(slow_replica.last_ordered_3pc == (view_no, (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + 1)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount)))
def test_new_primary_has_wrong_clock(tconf, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ One of non-primary has a bad clock, it raises suspicions but orders requests after getting PREPAREs. Then a view change happens this non-primary with the bad clock becomes the new primary but is not able to get any of it's PRE-PREPAREs ordered. Eventually another view change happens and a new primary is elected the pool is functional again :return: """ # The node having the bad clock, this node will be primary after view # change faulty_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[0].node make_clock_faulty(faulty_node) assert not faulty_node.master_replica.isPrimary # faulty_node replies too ledger_sizes = { node.name: node.domainLedger.size for node in txnPoolNodeSet} susp_counts = {node.name: get_timestamp_suspicion_count( node) for node in txnPoolNodeSet} ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) # After view change, faulty_node is primary assert faulty_node.master_replica.isPrimary old_view_no = txnPoolNodeSet[0].viewNo # Delay instance change so view change doesn't happen in the middle of this test stashers = (n.nodeIbStasher for n in txnPoolNodeSet) with delay_rules(stashers, icDelay()): # Requests are sent for _ in range(5): sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, count=2) looper.runFor(2) def chk(): for node in txnPoolNodeSet: assert node.viewNo == old_view_no for node in [n for n in txnPoolNodeSet if n != faulty_node]: # Each non faulty node raises suspicion assert get_timestamp_suspicion_count(node) > susp_counts[node.name] # Ledger does not change assert node.domainLedger.size == ledger_sizes[node.name] assert faulty_node.domainLedger.size == ledger_sizes[faulty_node.name] looper.run(eventually(chk, retryWait=1)) # Eventually another view change happens ensure_view_change(looper, txnPoolNodeSet) looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1, retryWait=1, timeout=2 * tconf.PerfCheckFreq)) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) # After view change, faulty_node is no more the primary assert not faulty_node.master_replica.isPrimary # All nodes reply sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, count=Max3PCBatchSize * 2)
def test_different_ledger_request_interleave(tconf, looper, txnPoolNodeSet, sdk_one_node_added, tdir, tdirWithPoolTxns, allPluginsPath, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward): """ Send pool and domain ledger requests such that they interleave, and do view change in between and verify the pool is functional """ new_node = sdk_one_node_added sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # Send domain ledger requests but don't wait for replies requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 2) # Add another node by sending pool ledger request _, new_theta = sdk_node_theta_added(looper, txnPoolNodeSet, tdir, tconf, sdk_pool_handle, sdk_wallet_steward, allPluginsPath, name='new_theta') # Send more domain ledger requests but don't wait for replies requests.extend(sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 3)) # Do view change without waiting for replies ensure_view_change(looper, nodes=txnPoolNodeSet) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) # Make sure all requests are completed total_timeout = sdk_eval_timeout(len(requests), len(txnPoolNodeSet)) sdk_get_and_check_replies(looper, requests, timeout=total_timeout) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) new_steward_wallet, steward_did = sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_steward, 'another_ste', role='STEWARD') # Send another pool ledger request (NODE) but don't wait for completion of # request next_node_name = 'next_node' sigseed, verkey, bls_key, nodeIp, nodePort, clientIp, clientPort, key_proof = \ prepare_new_node_data(tconf, tdir, next_node_name) node_req = looper.loop.run_until_complete( prepare_node_request(steward_did, new_node_name=next_node_name, clientIp=clientIp, clientPort=clientPort, nodeIp=nodeIp, nodePort=nodePort, bls_key=bls_key, sigseed=sigseed, key_proof=key_proof)) sdk_wallet = (new_steward_wallet, steward_did) request_couple = sdk_sign_and_send_prepared_request(looper, sdk_wallet, sdk_pool_handle, node_req) # Send more domain ledger requests but don't wait for replies request_couples = [request_couple, * sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 5)] # Make sure all requests are completed total_timeout = sdk_eval_timeout(len(request_couples), len(txnPoolNodeSet)) sdk_get_and_check_replies(looper, request_couples, timeout=total_timeout) # Make sure pool is functional sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_stashed_messages_processed_on_backup_replica_ordering_resumption( looper, chkFreqPatched, reqs_for_checkpoint, one_replica_and_others_in_backup_instance, sdk_pool_handle, sdk_wallet_client, view_change_done): """ Verifies resumption of ordering 3PC-batches on a backup replica on detection of a lag in checkpoints in case it is detected after some 3PC-messages related to the next checkpoint have already been stashed as laying outside of the watermarks. Please note that to verify this case the config is set up so that LOG_SIZE == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ """ slow_replica, other_replicas = one_replica_and_others_in_backup_instance view_no = slow_replica.viewNo # Send a request and ensure that the replica orders the batch for it sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.run( eventually(lambda: assertExp(slow_replica.last_ordered_3pc == (view_no, 1)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Don't receive Commits from two replicas slow_replica.node.nodeIbStasher.delay( cDelay(instId=1, sender_filter=other_replicas[0].node.name)) slow_replica.node.nodeIbStasher.delay( cDelay(instId=1, sender_filter=other_replicas[1].node.name)) # Send a request for which the replica will not be able to order the batch # due to an insufficient count of Commits sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.runFor(waits.expectedTransactionExecutionTime(nodeCount)) # Receive further Commits from now on slow_replica.node.nodeIbStasher.drop_delayeds() slow_replica.node.nodeIbStasher.resetDelays() # Send requests but in a quantity insufficient # for catch-up number of checkpoints sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP * reqs_for_checkpoint - 2) looper.runFor(waits.expectedTransactionExecutionTime(nodeCount)) # Don't receive Checkpoints slow_replica.node.nodeIbStasher.delay(chk_delay(instId=1)) # Send more requests to reach catch-up number of checkpoints sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint) looper.runFor(waits.expectedTransactionExecutionTime(nodeCount)) # Ensure that there are no 3PC-messages stashed # as laying outside of the watermarks assert not slow_replica.stashingWhileOutsideWaterMarks # Send a request for which the batch will be outside of the watermarks sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.runFor(waits.expectedTransactionExecutionTime(nodeCount)) # Ensure that the replica has not ordered any batches # after the very first one assert slow_replica.last_ordered_3pc == (view_no, 1) # Ensure that the watermarks have not been shifted since the view start assert slow_replica.h == 0 assert slow_replica.H == LOG_SIZE # Ensure that there are some quorumed stashed checkpoints assert slow_replica.stashed_checkpoints_with_quorum() # Ensure that now there are 3PC-messages stashed # as laying outside of the watermarks assert slow_replica.stashingWhileOutsideWaterMarks # Receive belated Checkpoints slow_replica.node.nodeIbStasher.reset_delays_and_process_delayeds() # Ensure that the replica has ordered the batch for the last sent request looper.run( eventually(lambda: assertExp(slow_replica.last_ordered_3pc == (view_no, (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + 1)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Ensure that the watermarks have been shifted so that the lower watermark # now equals to the end of the last stable checkpoint in the instance assert slow_replica.h == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ assert slow_replica.H == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + LOG_SIZE # Ensure that now there are no quorumed stashed checkpoints assert not slow_replica.stashed_checkpoints_with_quorum() # Ensure that now there are no 3PC-messages stashed # as laying outside of the watermarks assert not slow_replica.stashingWhileOutsideWaterMarks # Send a request and ensure that the replica orders the batch for it sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.run( eventually(lambda: assertExp(slow_replica.last_ordered_3pc == (view_no, (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + 2)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount)))