def test_view_change_by_order_stashed_on_3_nodes_and_catchup_on_1_node(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward): ''' - COMMITS are delayed on all nodes - All nodes starts a view change with a prepared certificate (for delayed message) - COMMITS come during view change for 3 nodes - So these 3 nodes finish view change by processing Commits and Ordered msgs during view change (in between rounds of catchup). - The lagging (4th) node receives missing txns as part of catch-up (during view change) and also finishes view change. ''' slow_node = txnPoolNodeSet[-1] fast_nodes = txnPoolNodeSet[:-1] slow_stasher = slow_node.nodeIbStasher fast_stashers = [n.nodeIbStasher for n in fast_nodes] all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet] initial_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc txns_count = 4 eventual_last_ordered = initial_last_ordered[0], initial_last_ordered[1] + txns_count batches_count = initial_last_ordered[1] with delay_rules(all_stashers, vcd_delay()): # the lagging node is slow in receiving Commits and Catchup mghs with delay_rules(slow_stasher, cDelay()): with delay_rules(slow_stasher, lsDelay(), msg_rep_delay(types_to_delay=[LEDGER_STATUS])): # fast nodes will receive and order Commits for last_prepared_cert during view change with delay_rules(fast_stashers, cDelay()): with delay_rules(fast_stashers, lsDelay(), msg_rep_delay(types_to_delay=[LEDGER_STATUS])): sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_steward, txns_count) batches_count += txns_count looper.run(eventually(check_prepare_certificate, txnPoolNodeSet, batches_count)) check_last_ordered_3pc_on_master(txnPoolNodeSet, initial_last_ordered) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) looper.run(eventually(check_last_prepared_certificate_after_view_change_start, txnPoolNodeSet, eventual_last_ordered)) # check that all txns are ordered till last prepared on fast nodes looper.run(eventually(check_last_ordered_3pc_on_master, fast_nodes, eventual_last_ordered, timeout=30)) # check that all txns are ordered till last prepared on slow node as a result of catchup looper.run(eventually(check_last_ordered_3pc_on_master, [slow_node], eventual_last_ordered, timeout=30)) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) # make sure that the pool is functional sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_slow_catchup_while_ordering(tdir, tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): lagging_node = txnPoolNodeSet[-1] other_lagging_node = txnPoolNodeSet[-2] other_nodes = txnPoolNodeSet[:-1] other_stashers = [node.nodeIbStasher for node in other_nodes] def lagging_node_state() -> NodeLeecherService.State: return lagging_node.ledgerManager._node_leecher._state def check_lagging_node_is_not_syncing_audit(): assert lagging_node_state() != NodeLeecherService.State.SyncingAudit # Prevent lagging node from ordering with delay_rules(lagging_node.nodeIbStasher, ppDelay(), pDelay(), cDelay()): # Order request on all nodes except lagging one sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) # Prevent lagging node from catching up domain ledger (and finishing catchup) with delay_rules(other_stashers, delay_domain_ledger_catchup()): # Start catchup on lagging node lagging_node.ledgerManager.start_catchup() assert lagging_node_state( ) == NodeLeecherService.State.SyncingAudit # Ensure that audit ledger is caught up by lagging node looper.run(eventually(check_lagging_node_is_not_syncing_audit)) assert lagging_node_state() != NodeLeecherService.State.Idle # Order one more request on all nodes except lagging one sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) # Now lagging node can catch up domain ledger which contains more transactions # than it was when audit ledger was caught up # Now delayed 3PC messages reach lagging node, so any transactions missed during # catch up can be ordered, ensure that all nodes will have same data after that ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # Ensure that even if we disable some other node pool is still functional # (it won't be the case if old lagging node is nonfunctional) with delay_rules(other_lagging_node.nodeIbStasher, ppDelay(), pDelay(), cDelay()): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) # Ensure that all nodes will eventually have same data ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_unstash_waiting_for_first_batch_ordered_after_catchup( looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, tconf): lagged_node = txnPoolNodeSet[-1] other_nodes = list(set(txnPoolNodeSet) - {lagged_node}) other_stashers = [n.nodeIbStasher for n in other_nodes] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) last_ordered_lagged_before = lagged_node.master_last_ordered_3PC # do not process any message reqs for PrePrepares with delay_rules_without_processing( lagged_node.nodeIbStasher, msg_rep_delay(types_to_delay=[PREPARE, PREPREPARE])): with delay_rules(lagged_node.nodeIbStasher, cDelay()): ensure_view_change(looper, txnPoolNodeSet) looper.run(eventually(check_not_in_view_change, txnPoolNodeSet)) ensureElectionsDone(looper, other_nodes, instances_list=range( getRequiredInstances(len(txnPoolNodeSet)))) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) # delay Commits on all nodes so that there are some PrePrepares still stashed after catchup with delay_rules(other_stashers, cDelay()): pre_prep_before = len(recvdPrePrepareForInstId(lagged_node, 0)) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 2) # wait till lagged node recives the new PrePrepares # they will be stashed as WAITING_FIRST_BATCH_IN_VIEW looper.run( eventually(lambda: assertExp( len(recvdPrePrepareForInstId(lagged_node, 0)) == pre_prep_before + 2))) # catchup the lagged node # the latest 2 PrePrepares are still stashed lagged_node.start_catchup() looper.run( eventually( lambda: assertExp(lagged_node.master_last_ordered_3PC > last_ordered_lagged_before))) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2) ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=30)
def test_watermarks_after_view_change(tdir, tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Delay commit, checkpoint, InstanceChange and ViewChangeDone messages for lagging_node. Start ViewChange. Check that ViewChange finished. Reset delays. Check that lagging_node can order transactions and has same data with other nodes. """ lagging_node = txnPoolNodeSet[-1] lagging_node.master_replica.config.LOG_SIZE = LOG_SIZE start_view_no = lagging_node.viewNo with delay_rules(lagging_node.nodeIbStasher, cDelay(), chk_delay(), icDelay(), nv_delay()): trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet[:-1], expectedViewNo=start_view_no + 1, customTimeout=waits.expectedPoolViewChangeStartedTimeout(len(txnPoolNodeSet))) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet[:-1]) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 6) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def do_view_change_with_delayed_commits_on_all_but_one(nodes, nodes_without_one_stashers, except_node, looper, sdk_pool_handle, sdk_wallet_client): new_view_no = except_node.viewNo + 1 old_last_ordered = except_node.master_replica.last_ordered_3pc # delay commits for all nodes except node X with delay_rules(nodes_without_one_stashers, cDelay(sys.maxsize)): # send one request requests2 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) def last_ordered(node: Node, last_ordered): assert node.master_replica.last_ordered_3pc == last_ordered # wait until except_node ordered txn looper.run( eventually(last_ordered, except_node, (except_node.viewNo, old_last_ordered[1] + 1))) # trigger view change on all nodes for node in nodes: node.view_changer.on_master_degradation() # wait for view change done on all nodes looper.run(eventually(view_change_done, nodes, new_view_no)) sdk_get_replies(looper, requests2)
def test_taa_acceptance_valid_on_uncommitted( validate_taa_acceptance_func_api, txnPoolNodeSet, looper, sdk_wallet_trustee, sdk_pool_handle, add_taa_acceptance ): text, version = gen_random_txn_author_agreement() old_pp_seq_no = txnPoolNodeSet[0].master_replica.last_ordered_3pc[1] with delay_rules([n.nodeIbStasher for n in txnPoolNodeSet], cDelay()): req = looper.loop.run_until_complete(build_txn_author_agreement_request(sdk_wallet_trustee[1], text, version, ratification_ts=get_utc_epoch() - 600)) req = sdk_sign_and_submit_req(sdk_pool_handle, sdk_wallet_trustee, req) def check(): assert old_pp_seq_no + 1 == txnPoolNodeSet[0].master_replica._consensus_data.preprepared[-1].pp_seq_no looper.run(eventually(check)) request_json = add_taa_acceptance( taa_text=text, taa_version=version, taa_a_time=get_utc_epoch() // SEC_PER_DAY * SEC_PER_DAY ) request_dict = dict(**json.loads(request_json)) validate_taa_acceptance_func_api(request_dict)
def do_view_change_with_unaligned_prepare_certificates( slow_nodes, nodes, looper, sdk_pool_handle, sdk_wallet_client): """ Perform view change with some nodes reaching lower last prepared certificate than others. With current implementation of view change this can result with view change taking a lot of time. """ fast_nodes = [n for n in nodes if n not in slow_nodes] all_stashers = [n.nodeIbStasher for n in nodes] slow_stashers = [n.nodeIbStasher for n in slow_nodes] # Delay some PREPAREs and all COMMITs with delay_rules(slow_stashers, pDelay()): with delay_rules(all_stashers, cDelay()): # Send request request = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) # Wait until this request is prepared on fast nodes looper.run(eventually(check_last_prepared_certificate, fast_nodes, (0, 1))) # Make sure its not prepared on slow nodes looper.run(eventually(check_last_prepared_certificate, slow_nodes, None)) # Trigger view change for n in nodes: n.view_changer.on_master_degradation() # Now commits are processed # Wait until view change is complete looper.run(eventually(check_view_change_done, nodes, 1, timeout=60)) # Finish request gracefully sdk_get_reply(looper, request)
def test_revert_works_for_fees_before_catch_up_on_all_nodes( looper, helpers, nodeSetWithIntegratedTokenPlugin, sdk_pool_handle, sdk_wallet_trustee, fees_set, address_main, mint_tokens): node_set = [n.nodeIbStasher for n in nodeSetWithIntegratedTokenPlugin] with delay_rules(node_set, cDelay()): request = helpers.request.nym() request = add_fees_request_with_address(helpers, fees_set, request, address_main) for n in nodeSetWithIntegratedTokenPlugin: looper.run( eventually(check_state, n, True, retryWait=0.2, timeout=15)) sdk_send_signed_requests(sdk_pool_handle, [json.dumps(request.as_dict)]) for n in nodeSetWithIntegratedTokenPlugin: looper.run( eventually(check_state, n, False, retryWait=0.2, timeout=15)) for n in nodeSetWithIntegratedTokenPlugin: n.start_catchup() for n in nodeSetWithIntegratedTokenPlugin: looper.run( eventually(lambda: assertExp(n.mode == Mode.participating))) for n in nodeSetWithIntegratedTokenPlugin: looper.run( eventually(check_state, n, True, retryWait=0.2, timeout=15)) ensure_all_nodes_have_same_data(looper, nodeSetWithIntegratedTokenPlugin)
def test_new_primary_lagging_behind(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, tconf): initial_view_no = checkViewNoForNodes(txnPoolNodeSet) next_primary_name = get_next_primary_name(txnPoolNodeSet, initial_view_no + 1) next_primary = [n for n in txnPoolNodeSet if n.name == next_primary_name][0] other_nodes = [n for n in txnPoolNodeSet if n != next_primary] expected_primary_name = get_next_primary_name(txnPoolNodeSet, initial_view_no + 2) # Next primary cannot stabilize 1 checkpoint with delay_rules(next_primary.nodeIbStasher, cDelay(), pDelay()): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ) ensure_view_change(looper, txnPoolNodeSet) looper.run( eventually(check_not_in_view_change, txnPoolNodeSet, timeout=2 * tconf.NEW_VIEW_TIMEOUT)) ensureElectionsDone(looper=looper, nodes=other_nodes, customTimeout=2 * tconf.NEW_VIEW_TIMEOUT, instances_list=[0, 1]) assert next_primary_name != expected_primary_name assert checkViewNoForNodes(txnPoolNodeSet) == initial_view_no + 2 # send CHK_FREQ reqs so that slow node will start catch-up sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=30)
def test_lagged_checkpoint_completion(chkFreqPatched, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ One node in a pool lags to order the last 3PC-batch in a checkpoint so that when it eventually orders this 3PC-batch and thus completes the checkpoint it has already received and stashed the corresponding checkpoint messages from all the other nodes. The test verifies that the node successfully processes the stashed checkpoint messages and stabilizes the checkpoint. """ slow_node = txnPoolNodeSet[-1] # All the nodes in the pool normally orders all the 3PC-batches in a # checkpoint except the last 3PC-batch. The last 3PC-batch in the # checkpoint is ordered by all the nodes except one slow node because this # node lags to receive Commits. sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) slow_node.nodeIbStasher.delay(cDelay()) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) # All the other nodes complete the checkpoint and send Checkpoint messages # to others. The slow node receives and stashes these messages because it # has not completed the checkpoint. def check(): for replica in slow_node.replicas: assert len(replica.checkpoints) == 1 assert (1, 5) in replica.checkpoints assert replica.checkpoints[(1, 5)].seqNo == 4 assert replica.checkpoints[(1, 5)].digest is None assert replica.checkpoints[(1, 5)].isStable is False assert len(replica.stashedRecvdCheckpoints) == 1 assert 0 in replica.stashedRecvdCheckpoints assert len(replica.stashedRecvdCheckpoints[0]) == 1 assert (1, 5) in replica.stashedRecvdCheckpoints[0] assert len(replica.stashedRecvdCheckpoints[0][(1, 5)]) == \ len(txnPoolNodeSet) - 1 stabilization_timeout = \ waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.run(eventually(check, timeout=stabilization_timeout)) # Eventually the slow node receives Commits, orders the last 3PC-batch in # the checkpoint and thus completes it, processes the stashed checkpoint # messages and stabilizes the checkpoint. slow_node.nodeIbStasher.reset_delays_and_process_delayeds() looper.runFor(waits.expectedOrderingTime(len(txnPoolNodeSet))) for replica in slow_node.replicas: assert len(replica.checkpoints) == 1 assert (1, 5) in replica.checkpoints assert replica.checkpoints[(1, 5)].seqNo == 5 assert replica.checkpoints[(1, 5)].digest is not None assert replica.checkpoints[(1, 5)].isStable is True assert len(replica.stashedRecvdCheckpoints) == 0
def test_freeing_forwarded_preprepared_request(looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward): # Case, when both backup and primary had problems behind_node = txnPoolNodeSet[-1] sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) with delay_rules( behind_node.nodeIbStasher, pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize), ): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) looper.run( eventually(lambda: assertExp(len(behind_node.requests) == req_num))) assert all(r.executed for r in behind_node.requests.values() if behind_node.seqNoDB.get(r.request.key)[1]) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) # Master and backup replicas do not stash new requests and succesfully order them assert len(behind_node.requests) == req_num
def test_future_primaries_replicas_decrease(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards, tdir, tconf, allPluginsPath): assert len(txnPoolNodeSet) == 7 initial_primaries = copy.copy(txnPoolNodeSet[0].primaries) last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc starting_view_number = checkViewNoForNodes(txnPoolNodeSet) # Decrease replicas count demote_node(looper, sdk_wallet_stewards[-1], sdk_pool_handle, txnPoolNodeSet[-2]) txnPoolNodeSet.remove(txnPoolNodeSet[-2]) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) new_view_no = checkViewNoForNodes(txnPoolNodeSet) assert new_view_no == starting_view_number + 1 node = txnPoolNodeSet[0] with delay_rules(node.nodeIbStasher, cDelay()): req = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[0], 1)[0][0] req = Request(**req) three_pc_batch = ThreePcBatch(DOMAIN_LEDGER_ID, 0, 0, 1, time.time(), randomString(), randomString(), ['a', 'b', 'c'], [req.digest], pp_digest='') primaries = node.write_manager.future_primary_handler.post_batch_applied(three_pc_batch) assert len(primaries) + 1 == len(initial_primaries) assert len(primaries) == len(txnPoolNodeSet[0].primaries) for node in txnPoolNodeSet: node.write_manager.future_primary_handler.commit_batch = old_commit
def test_lagged_checkpoint_completion(chkFreqPatched, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ One node in a pool lags to order the last 3PC-batch in a checkpoint so that when it eventually orders this 3PC-batch and thus completes the checkpoint it has already received and stashed the corresponding checkpoint messages from all the other nodes. The test verifies that the node successfully processes the stashed checkpoint messages and stabilizes the checkpoint. """ slow_node = txnPoolNodeSet[-1] # All the nodes in the pool normally orders all the 3PC-batches in a # checkpoint except the last 3PC-batch. The last 3PC-batch in the # checkpoint is ordered by all the nodes except one slow node because this # node lags to receive Commits. sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) slow_node.nodeIbStasher.delay(cDelay()) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) # All the other nodes complete the checkpoint and send Checkpoint messages # to others. The slow node receives and stashes these messages because it # has not completed the checkpoint. def check(): for replica in slow_node.replicas.values(): assert len(replica.checkpoints) == 1 assert (1, 5) in replica.checkpoints assert replica.checkpoints[(1, 5)].seqNo == 4 assert replica.checkpoints[(1, 5)].digest is None assert replica.checkpoints[(1, 5)].isStable is False assert len(replica.stashedRecvdCheckpoints) == 1 assert 0 in replica.stashedRecvdCheckpoints assert len(replica.stashedRecvdCheckpoints[0]) == 1 assert (1, 5) in replica.stashedRecvdCheckpoints[0] assert len(replica.stashedRecvdCheckpoints[0][(1, 5)]) == \ len(txnPoolNodeSet) - 1 stabilization_timeout = \ waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.run(eventually(check, timeout=stabilization_timeout)) # Eventually the slow node receives Commits, orders the last 3PC-batch in # the checkpoint and thus completes it, processes the stashed checkpoint # messages and stabilizes the checkpoint. slow_node.nodeIbStasher.reset_delays_and_process_delayeds() looper.runFor(waits.expectedOrderingTime(len(txnPoolNodeSet))) for replica in slow_node.replicas.values(): assert len(replica.checkpoints) == 1 assert (1, 5) in replica.checkpoints assert replica.checkpoints[(1, 5)].seqNo == 5 assert replica.checkpoints[(1, 5)].digest is not None assert replica.checkpoints[(1, 5)].isStable is True assert len(replica.stashedRecvdCheckpoints) == 0
def test_use_modified_rules_from_uncommitted(looper, txnPoolNodeSet, sdk_wallet_trustee, sdk_wallet_steward, sdk_pool_handle): node_stashers = [n.nodeIbStasher for n in txnPoolNodeSet] wh, _ = sdk_wallet_trustee new_steward_did, new_steward_verkey = create_verkey_did(looper, wh) changed_constraint = AuthConstraint(role=STEWARD, sig_count=1) with delay_rules(node_stashers, cDelay()): r_auth = sdk_send_and_check_auth_rule_request( looper, sdk_pool_handle, sdk_wallet_trustee, auth_action=ADD_PREFIX, auth_type=NYM, field=ROLE, new_value=STEWARD, old_value=None, constraint=changed_constraint.as_dict, no_wait=True) looper.runFor(waits.expectedPrePrepareTime(len(txnPoolNodeSet))) r_add_steward = sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_steward, 'newSteward2', STEWARD_STRING, dest=new_steward_did, verkey=new_steward_verkey, no_wait=True) sdk_get_and_check_replies(looper, [r_auth]) sdk_get_and_check_replies(looper, [r_add_steward])
def test_last_committed_after_catchup(looper, helpers, nodeSetWithIntegratedTokenPlugin, sdk_pool_handle, fees_set, address_main, mint_tokens): node_set = nodeSetWithIntegratedTokenPlugin reverted_node = node_set[-1] amount = get_amount_from_token_txn(mint_tokens) init_seq_no = 1 request_1, request_2 = nyms_with_fees(2, helpers, fees_set, address_main, amount, init_seq_no=init_seq_no) reverted_last_committed = get_last_committed_from_tracker(reverted_node) not_reverted_last_committed = get_last_committed_from_tracker(node_set[-1]) assert reverted_last_committed == not_reverted_last_committed with delay_rules(reverted_node.nodeIbStasher, cDelay()): """ Send NYM with FEES and wait for reply. """ r = sdk_sign_and_submit_req_obj(looper, sdk_pool_handle, helpers.request._steward_wallet, request_1) sdk_get_and_check_replies(looper, [r]) """ Start catchup. Uncommitted batch for reverted_node should be rejected and it will get NYM with FEES during catchup procedure. """ reverted_node.start_catchup() looper.run(eventually(lambda: assertExp(reverted_node.mode == Mode.participating))) assert get_last_committed_from_tracker(reverted_node) ==\ get_last_committed_from_tracker(node_set[0])
def test_belated_request_not_processed_if_already_in_3pc_process( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): delta = txnPoolNodeSet[3] initial_ledger_size = delta.domainLedger.size delta.clientIbStasher.delay(req_delay(300)) for node in txnPoolNodeSet: node.nodeIbStasher.delay(cDelay(300)) one_req = sdk_signed_random_requests(looper, sdk_wallet_client, 1) sdk_send_signed_requests(sdk_pool_handle, one_req) looper.runFor( waits.expectedPropagateTime(len(txnPoolNodeSet)) + waits.expectedPrePrepareTime(len(txnPoolNodeSet)) + waits.expectedPrepareTime(len(txnPoolNodeSet)) + waits.expectedCommittedTime(len(txnPoolNodeSet))) delta.clientIbStasher.reset_delays_and_process_delayeds() looper.runFor( waits.expectedPropagateTime(len(txnPoolNodeSet)) + waits.expectedPrePrepareTime(len(txnPoolNodeSet)) + waits.expectedPrepareTime(len(txnPoolNodeSet)) + waits.expectedCommittedTime(len(txnPoolNodeSet))) for node in txnPoolNodeSet: node.nodeIbStasher.reset_delays_and_process_delayeds() looper.runFor(waits.expectedOrderingTime(delta.replicas.num_replicas)) for node in txnPoolNodeSet: assert node.domainLedger.size - initial_ledger_size == 1
def test_revert_xfer_with_fees_before_catchup(looper, helpers, nodeSetWithIntegratedTokenPlugin, sdk_pool_handle, fees, xfer_mint_tokens, xfer_addresses): nodes = nodeSetWithIntegratedTokenPlugin node_stashers = [n.nodeIbStasher for n in nodes] helpers.general.do_set_fees(fees) [address_giver, address_receiver] = xfer_addresses inputs = helpers.general.get_utxo_addresses([address_giver])[0] outputs = [{ ADDRESS: address_receiver, AMOUNT: 1000 - fees[XFER_PUBLIC_FEES_ALIAS] }] request = helpers.request.transfer(inputs, outputs) with delay_rules_without_processing(node_stashers, cDelay(), pDelay()): helpers.sdk.send_request_objects([request]) looper.runFor(waits.expectedPrePrepareTime(len(nodes))) for n in nodes: n.start_catchup() for n in nodes: looper.run( eventually(lambda: assertExp(n.mode == Mode.participating))) for n in nodes: looper.run( eventually(check_state, n, True, retryWait=0.2, timeout=15)) ensure_all_nodes_have_same_data(looper, nodes)
def test_ordered_request_freed_on_replica_removal(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, chkFreqPatched, view_change): node = txnPoolNodeSet[0] # Stabilize checkpoint # Send one more request to stabilize checkpoint sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) old_stable_checkpoint = node.master_replica._consensus_data.stable_checkpoint with delay_rules(node.nodeIbStasher, cDelay(), msg_rep_delay(types_to_delay=[COMMIT])): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) f_d, f_r = get_forwarded_to_all(node) assert f_d node.replicas.remove_replica(node.replicas.num_replicas - 1) assert node.requests[f_d].forwardedTo == node.replicas.num_replicas looper.run( eventually(check_for_nodes, txnPoolNodeSet, check_stable_checkpoint, old_stable_checkpoint)) # Send one more request to stabilize checkpoint sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ - 1) looper.run( eventually(check_for_nodes, txnPoolNodeSet, check_stable_checkpoint, old_stable_checkpoint + CHK_FREQ))
def test_belated_request_not_processed_if_already_in_3pc_process( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): delta = txnPoolNodeSet[3] initial_ledger_size = delta.domainLedger.size delta.clientIbStasher.delay(req_delay(300)) for node in txnPoolNodeSet: node.nodeIbStasher.delay(cDelay(300)) one_req = sdk_signed_random_requests(looper, sdk_wallet_client, 1) sdk_send_signed_requests(sdk_pool_handle, one_req) looper.runFor(waits.expectedPropagateTime(len(txnPoolNodeSet)) + waits.expectedPrePrepareTime(len(txnPoolNodeSet)) + waits.expectedPrepareTime(len(txnPoolNodeSet)) + waits.expectedCommittedTime(len(txnPoolNodeSet))) delta.clientIbStasher.reset_delays_and_process_delayeds() looper.runFor(waits.expectedPropagateTime(len(txnPoolNodeSet)) + waits.expectedPrePrepareTime(len(txnPoolNodeSet)) + waits.expectedPrepareTime(len(txnPoolNodeSet)) + waits.expectedCommittedTime(len(txnPoolNodeSet))) for node in txnPoolNodeSet: node.nodeIbStasher.reset_delays_and_process_delayeds() looper.runFor(waits.expectedOrderingTime(delta.replicas.num_replicas)) for node in txnPoolNodeSet: assert node.domainLedger.size - initial_ledger_size == 1
def test_deletion_non_forwarded_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): behind_node = txnPoolNodeSet[-1] [behind_node.replicas.values()[1].discard_req_key(1, key) for key in behind_node.requests] behind_node.requests.clear() sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) behind_node.quorums.propagate = Quorum(len(txnPoolNodeSet) + 1) with delay_rules(behind_node.nodeIbStasher, ppDelay(delay=sys.maxsize), pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) # We clear caughtup requests assert len(behind_node.requests) == 0 assert all([len(q) == 0 for r in behind_node.replicas.values() for q in r.requestQueues.values()]) assert len(behind_node.clientAuthNr._verified_reqs) == 0 assert len(behind_node.requestSender) == 0
def test_backup_can_order_after_catchup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): # We expect that after VC Gamma will be primary on backup delayed_node = txnPoolNodeSet[-2] with delay_rules_without_processing(delayed_node.nodeIbStasher, pDelay(instId=MASTER_REPLICA_INDEX), cDelay(instId=MASTER_REPLICA_INDEX), ppDelay(instId=MASTER_REPLICA_INDEX)): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQUEST_COUNT) with delay_rules_without_processing( [n.nodeIbStasher for n in txnPoolNodeSet], old_view_pp_request_delay()): ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) assert delayed_node.replicas._replicas[BACKUP_INST_ID].isPrimary # Check, that backup cannot order sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQUEST_COUNT) for n in txnPoolNodeSet: assert n.replicas._replicas[BACKUP_INST_ID].last_ordered_3pc[ 1] == 0 # Forcing catchup delayed_node.start_catchup() ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # Check, that backup can order after catchup b_pp_seq_no_before = delayed_node.replicas._replicas[ BACKUP_INST_ID].last_ordered_3pc[1] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQUEST_COUNT) assert delayed_node.replicas._replicas[BACKUP_INST_ID].last_ordered_3pc[1] == \ b_pp_seq_no_before + REQUEST_COUNT
def test_no_propagate_request_on_different_last_ordered_on_master_before_vc( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' Send random request and do view change then fast_nodes (1, 4 - without primary after next view change) are already ordered transaction on master and slow_nodes are not. Check ordering on slow_nodes.''' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) master_instance = txnPoolNodeSet[0].master_replica.instId slow_nodes = txnPoolNodeSet[1:3] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc with delay_rules(nodes_stashers, cDelay(delay=sys.maxsize)): # send one request requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) last_ordered_for_slow = slow_nodes[0].master_replica.last_ordered_3pc old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(check_last_ordered, fast_nodes, master_instance, (old_view_no, old_last_ordered[1] + 1))) # trigger view change on all nodes for node in txnPoolNodeSet: node.view_changer.on_master_degradation() # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) sdk_get_replies(looper, requests) looper.run( eventually(check_last_ordered, slow_nodes, master_instance, (old_view_no, last_ordered_for_slow[1] + 1))) assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def test_freeing_forwarded_preprepared_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward): # Case, when both backup and primary had problems behind_node = txnPoolNodeSet[-1] sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) with delay_rules(behind_node.nodeIbStasher, pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize), ): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) assert len(behind_node.requests) == req_num assert all(r.executed for r in behind_node.requests.values() if behind_node.seqNoDB.get(r.request.key)[1]) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) # Master and backup replicas do not stash new requests and succesfully order them assert len(behind_node.requests) == req_num
def test_revert_during_view_change_all_nodes_set_fees( tconf, nodeSetWithIntegratedTokenPlugin, fees_set, helpers, looper): """ Check that SET_FEES transaction will be written after view change when PREPARE quorum for it is reached """ nodes = nodeSetWithIntegratedTokenPlugin node_set = [n.nodeIbStasher for n in nodeSetWithIntegratedTokenPlugin] _old_pp_seq_no = get_ppseqno_from_all_nodes( nodeSetWithIntegratedTokenPlugin) helpers.general.set_fees_without_waiting({ATTRIB_FEES_ALIAS: 3}) assert _old_pp_seq_no == get_ppseqno_from_all_nodes( nodeSetWithIntegratedTokenPlugin) with delay_rules(node_set, cDelay()): # should be changed for auth rule helpers.general.set_fees_without_waiting({ATTRIB_FEES_ALIAS: 4}) looper.run( eventually( functools.partial(check_batch_ordered, _old_pp_seq_no, nodeSetWithIntegratedTokenPlugin))) ensure_view_change(looper, nodes) ensureElectionsDone(looper=looper, nodes=nodes) ensure_all_nodes_have_same_data(looper, nodes) for n in nodes: looper.run(eventually(lambda: assertExp(n.mode == Mode.participating))) for n in nodes: looper.run(eventually(check_state, n, True, retryWait=0.2, timeout=15)) fees = helpers.general.do_get_fees() assert fees[FEES][ATTRIB_FEES_ALIAS] == 4
def test_delay_commits_for_one_node(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, slow_node_is_next_primary, vc_counts): current_view_no = checkViewNoForNodes(txnPoolNodeSet) excepted_view_no = current_view_no + 1 if vc_counts == 'once' else current_view_no + 2 next_primary = get_next_primary_name(txnPoolNodeSet, excepted_view_no) pretenders = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet) if not r.isPrimary] if slow_node_is_next_primary: delayed_node = [n for n in pretenders if n.name == next_primary][0] else: delayed_node = [n for n in pretenders if n.name != next_primary][0] with delay_rules_without_processing(delayed_node.nodeIbStasher, cDelay()): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2) trigger_view_change(txnPoolNodeSet) if vc_counts == 'twice': for node in txnPoolNodeSet: node.view_changer.start_view_change(current_view_no + 2) ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_lag_less_then_catchup(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): delayed_node = txnPoolNodeSet[-1] other_nodes = list(set(txnPoolNodeSet) - {delayed_node}) current_view_no = checkViewNoForNodes(txnPoolNodeSet) last_ordered_before = delayed_node.master_replica.last_ordered_3pc with delay_rules_without_processing(delayed_node.nodeIbStasher, cDelay()): # Send txns for stable checkpoint sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ) # Check, that all of not slowed nodes has a stable checkpoint for n in other_nodes: assert n.master_replica._consensus_data.stable_checkpoint == CHK_FREQ # Send another txn. This txn will be reordered after view_change sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) trigger_view_change(txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) assert delayed_node.master_replica.last_ordered_3pc == last_ordered_before # Send txns for stabilize checkpoint on other nodes sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ - 1) pool_pp_seq_no = get_pp_seq_no(other_nodes) looper.run(eventually(lambda: assertExp(delayed_node.master_replica.last_ordered_3pc[1] == pool_pp_seq_no))) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_apply_several_batches(looper, helpers, nodeSetWithIntegratedTokenPlugin, sdk_pool_handle, fees_set, address_main, mint_tokens): node_set = [n.nodeIbStasher for n in nodeSetWithIntegratedTokenPlugin] amount = get_amount_from_token_txn(mint_tokens) init_seq_no = 1 request1, request2 = nyms_with_fees(2, helpers, fees_set, address_main, amount, init_seq_no=init_seq_no) expected_txns_length = 2 txns_count_before = get_committed_txns_count_for_pool( nodeSetWithIntegratedTokenPlugin, TOKEN_LEDGER_ID) with delay_rules(node_set, cDelay()): r1 = sdk_send_signed_requests(sdk_pool_handle, [json.dumps(request1.as_dict)]) r2 = sdk_send_signed_requests(sdk_pool_handle, [json.dumps(request2.as_dict)]) for n in nodeSetWithIntegratedTokenPlugin: looper.run( eventually(check_uncommitted_txn, n, expected_txns_length, TOKEN_LEDGER_ID, retryWait=0.2, timeout=15)) sdk_get_and_check_replies(looper, r1) sdk_get_and_check_replies(looper, r2) txns_count_after = get_committed_txns_count_for_pool( nodeSetWithIntegratedTokenPlugin, TOKEN_LEDGER_ID) assert txns_count_after - txns_count_before == expected_txns_length
def test_future_primaries_replicas_increase(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards, tdir, tconf, allPluginsPath): # Don't delete NodeStates, so we could check them. global old_commit old_commit = txnPoolNodeSet[0].write_manager.future_primary_handler.commit_batch for node in txnPoolNodeSet: node.write_manager.future_primary_handler.commit_batch = lambda three_pc_batch, prev_handler_result=None: 0 initial_primaries = copy.copy(txnPoolNodeSet[0].primaries) last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc starting_view_number = checkViewNoForNodes(txnPoolNodeSet) # Increase replicas count add_new_node(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[0], tdir, tconf, allPluginsPath) new_view_no = checkViewNoForNodes(txnPoolNodeSet) assert new_view_no == starting_view_number + 1 # "seq_no + 2" because 1 domain and 1 pool txn. node = txnPoolNodeSet[0] with delay_rules(node.nodeIbStasher, cDelay()): req = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[0], 1)[0][0] req = Request(**req) three_pc_batch = ThreePcBatch(DOMAIN_LEDGER_ID, 0, 0, 1, time.time(), randomString(), randomString(), ['a', 'b', 'c'], [req.digest], pp_digest='') primaries = node.write_manager.future_primary_handler.post_batch_applied(three_pc_batch) assert len(primaries) == len(initial_primaries) + 1 assert len(primaries) == len(node.primaries)
def test_pp_obsolescence_check_fail_for_delayed(tdir, tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): delay = PATCHED_ACCEPTABLE_DEVIATION_PREPREPARE_SECS + 1 lagging_node = txnPoolNodeSet[-1] # Prevent lagging node from ordering with delay_rules(lagging_node.nodeIbStasher, ppDelay(), pDelay(), cDelay()): # Order request on all nodes except lagging one sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) looper.run(asyncio.sleep(delay)) # Now delayed 3PC messages reach lagging node, so any delayed transactions # can be processed (PrePrepare would be discarded but requested after that), # ensure that all nodes will have same data after that ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) pp_count = get_count(lagging_node.master_replica, lagging_node.master_replica.processPrePrepare) assert pp_count > 0 assert get_timestamp_suspicion_count(lagging_node) == pp_count
def test_clearing_forwarded_preprepared_request(looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward): # Case when backup ordered correctly, but primary had problems. # As a result, master will execute caughtup txns and will be removed # from requests queues behind_node = txnPoolNodeSet[-1] sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) with delay_rules(behind_node.nodeIbStasher, pDelay(delay=sys.maxsize, instId=0), cDelay(delay=sys.maxsize, instId=0)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) assert len(behind_node.requests) == 0 assert all([ len(q) == 0 for r in behind_node.replicas.values() for q in r.requestQueues.values() ]) assert len(behind_node.clientAuthNr._verified_reqs) == 0 assert len(behind_node.requestSender) == 0
def test_deletion_non_forwarded_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): behind_node = txnPoolNodeSet[-1] [behind_node.replicas.values()[1].discard_req_key(1, key) for key in behind_node.requests] behind_node.requests.clear() sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) behind_node.quorums.propagate = Quorum(len(txnPoolNodeSet) + 1) with delay_rules(behind_node.nodeIbStasher, ppDelay(delay=sys.maxsize), pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) # We clear caughtup requests looper.run(eventually(lambda: assertExp(len(behind_node.requests) == 0))) assert all([len(q) == 0 for r in behind_node.replicas.values() for q in r._ordering_service.requestQueues.values()]) assert len(behind_node.clientAuthNr._verified_reqs) == 0 assert len(behind_node.requestSender) == 0
def test_revert_nym_with_fees_before_catchup(looper, helpers, nodeSetWithIntegratedTokenPlugin, fees_set, fees, xfer_mint_tokens, xfer_addresses): nodes = nodeSetWithIntegratedTokenPlugin current_amount = get_amount_from_token_txn(xfer_mint_tokens) seq_no = get_seq_no(xfer_mint_tokens) lagging_node = nodes[-1] current_amount, seq_no, _ = send_and_check_nym_with_fees( helpers, fees_set, seq_no, looper, xfer_addresses, current_amount) with delay_rules_without_processing(lagging_node.nodeIbStasher, cDelay(), pDelay()): current_amount, seq_no, _ = send_and_check_nym_with_fees( helpers, fees_set, seq_no, looper, xfer_addresses, current_amount) looper.runFor(waits.expectedPrePrepareTime(len(nodes))) lagging_node.start_catchup() for n in nodes: looper.run( eventually(lambda: assertExp(n.mode == Mode.participating))) for n in nodes: looper.run( eventually(check_state, n, True, retryWait=0.2, timeout=15)) ensure_all_nodes_have_same_data(looper, nodes) current_amount, seq_no, _ = send_and_check_nym_with_fees( helpers, fees_set, seq_no, looper, xfer_addresses, current_amount) ensure_all_nodes_have_same_data(looper, nodes)
def test_freeing_forwarded_not_preprepared_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): behind_node = txnPoolNodeSet[-1] behind_node.requests.clear() sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) with delay_rules( behind_node.nodeIbStasher, chk_delay(delay=sys.maxsize, instId=behind_node.replicas.values()[-1])): with delay_rules(behind_node.nodeIbStasher, ppDelay(delay=sys.maxsize), pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run( eventually(node_caughtup, behind_node, count, retryWait=1)) looper.run( eventually( lambda: assertExp(len(behind_node.requests) == req_num))) # We execute caughtup requests looper.run( eventually(lambda: assertExp(len(behind_node.requests) == req_num))) assert all(r.executed for r in behind_node.requests.values() if behind_node.seqNoDB.get(r.request.key)[1])
def test_state_proof_for_get_fee(looper, helpers, nodeSetWithIntegratedTokenPlugin, sdk_pool_handle): fees_1 = {NYM_FEES_ALIAS: 1} fees_2 = {NYM_FEES_ALIAS: 2} node_set = [n.nodeIbStasher for n in nodeSetWithIntegratedTokenPlugin] helpers.general.do_set_fees(fees_1) response1 = helpers.general.do_get_fees() check_state_proof(response1, build_path_for_set_fees(), JsonSerializer().serialize(fees_1)) config_state = nodeSetWithIntegratedTokenPlugin[0].states[2] assert config_state.headHash == config_state.committedHeadHash # We delay commit messages to get different committed and uncommitted roots for ledger with delay_rules(node_set, cDelay()): helpers.general.set_fees_without_waiting(fees_2) looper.runFor(3) response2 = helpers.general.do_get_fees() # Returned state proof for first set_fees, which is committed check_state_proof(response2, build_path_for_set_fees(), JsonSerializer().serialize(fees_1)) # Let's check that uncommitted state differs from committed assert config_state.headHash != config_state.committedHeadHash
def test_view_change_after_max_catchup_rounds(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ The node should do only a fixed rounds of catchup. For this delay Prepares and Commits for 2 non-primary nodes by a large amount which is equivalent to loss of Prepares and Commits. Make sure 2 nodes have a different last prepared certificate from other two. Then do a view change, make sure view change completes and the pool does not process the request that were prepared by only a subset of the nodes """ sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * 3, 3) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) ledger_summary = txnPoolNodeSet[0].ledger_summary slow_nodes = [r.node for r in getNonPrimaryReplicas( txnPoolNodeSet, 0)[-2:]] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] # Make node slow to process Prepares and Commits for node in slow_nodes: node.nodeIbStasher.delay(pDelay(120, 0)) node.nodeIbStasher.delay(cDelay(120, 0)) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 5) looper.runFor(3) ensure_view_change(looper, nodes=txnPoolNodeSet) def last_prepared(nodes): lst = [n.master_replica.last_prepared_certificate_in_view() for n in nodes] # All nodes have same last prepared assert check_if_all_equal_in_list(lst) return lst[0] last_prepared_slow = last_prepared(slow_nodes) last_prepared_fast = last_prepared(fast_nodes) # Check `slow_nodes` and `fast_nodes` set different last_prepared assert last_prepared_fast != last_prepared_slow # View change complete ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # The requests which were prepared by only a subset of the nodes were # not ordered assert txnPoolNodeSet[0].ledger_summary == ledger_summary for node in slow_nodes: node.nodeIbStasher.reset_delays_and_process_delayeds() # Make sure pool is functional sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10, 2) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) last_prepared(txnPoolNodeSet)
def do_view_change_with_propagate_primary_on_one_delayed_node( slow_node, nodes, looper, sdk_pool_handle, sdk_wallet_client): slow_stasher = slow_node.nodeIbStasher fast_nodes = [n for n in nodes if n != slow_node] stashers = [n.nodeIbStasher for n in nodes] # Get last prepared certificate in pool lpc = last_prepared_certificate(nodes) # Get pool current view no view_no = lpc[0] with delay_rules(slow_stasher, icDelay()): with delay_rules(slow_stasher, vcd_delay()): with delay_rules(stashers, cDelay()): # Send request request = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) # Wait until this request is prepared on N-f nodes looper.run(eventually(check_last_prepared_certificate_on_quorum, nodes, (lpc[0], lpc[1] + 1))) # Trigger view change for n in nodes: n.view_changer.on_master_degradation() # Wait until view change is completed on all nodes except slow one waitForViewChange(looper, fast_nodes, expectedViewNo=view_no + 1, customTimeout=waits.expectedPoolViewChangeStartedTimeout(len(nodes))) wait_for_elections_done_on_given_nodes(looper, fast_nodes, getRequiredInstances(len(nodes)), timeout=waits.expectedPoolElectionTimeout(len(nodes))) # Now all the nodes receive Commits # The slow node will accept Commits and order the 3PC-batch in the old view looper.runFor(waits.expectedOrderingTime(getNoInstances(len(nodes)))) # Now slow node receives ViewChangeDones waitForViewChange(looper, [slow_node], expectedViewNo=view_no + 1, customTimeout=waits.expectedPoolViewChangeStartedTimeout(len(nodes))) wait_for_elections_done_on_given_nodes(looper, [slow_node], getRequiredInstances(len(nodes)), timeout=waits.expectedPoolElectionTimeout(len(nodes))) # Now slow node receives InstanceChanges but discards them because already # started propagate primary to the same view. # Finish request gracefully sdk_get_reply(looper, request)
def setup(request, looper, txnPoolNodeSet): slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[1].node fast_nodes = [n for n in txnPoolNodeSet if n != slow_node] # Delay catchup reply so that the test gets time to make the check, # this delay is reset after the check slow_node.nodeIbStasher.delay(cr_delay(100)) slow_node.nodeIbStasher.delay(pDelay(100, 0)) slow_node.nodeIbStasher.delay(cDelay(100, 0)) if request.param == 'all': slow_node.nodeIbStasher.delay(ppDelay(100, 0)) return slow_node, fast_nodes
def test_no_propagate_request_on_different_last_ordered_on_backup_before_vc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' Send random request and do view change then fast_nodes (1, 4 - without primary backup replicas) are already ordered transaction on master and some backup replica and slow_nodes are not on backup replica. Wait ordering on slow_nodes.''' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) slow_instance = 1 slow_nodes = txnPoolNodeSet[1:3] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].replicas[slow_instance].last_ordered_3pc with delay_rules(nodes_stashers, cDelay(instId=slow_instance)): # send one request sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(check_last_ordered, fast_nodes, slow_instance, (old_view_no, old_last_ordered[1] + 1))) check_last_ordered(slow_nodes, slow_instance, old_last_ordered) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) primary = getPrimaryReplica(txnPoolNodeSet, slow_instance).node non_primaries = [n for n in txnPoolNodeSet if n is not primary] check_last_ordered(non_primaries, slow_instance, (old_view_no, old_last_ordered[1] + 1)) # Backup primary replica must not advance last_ordered_3pc # up to the master's value check_last_ordered([primary], slow_instance, (old_view_no, old_last_ordered[1])) check_last_ordered(txnPoolNodeSet, txnPoolNodeSet[0].master_replica.instId, (old_last_ordered[0], old_last_ordered[1] + 1)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def do_view_change_with_pending_request_and_one_fast_node(fast_node, nodes, looper, sdk_pool_handle, sdk_wallet_client): """ Perform view change while processing request, with one node receiving commits much sooner than others. With current implementation of view change this will result in corrupted state of fast node """ fast_stasher = fast_node.nodeIbStasher slow_nodes = [n for n in nodes if n != fast_node] slow_stashers = [n.nodeIbStasher for n in slow_nodes] # Get last prepared certificate in pool lpc = last_prepared_certificate(nodes) # Get pool current view no view_no = lpc[0] # Delay all COMMITs with delay_rules(slow_stashers, cDelay()): with delay_rules(fast_stasher, cDelay()): # Send request request = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) # Wait until this request is prepared on N-f nodes looper.run(eventually(check_last_prepared_certificate_on_quorum, nodes, (lpc[0], lpc[1] + 1))) # Trigger view change for n in nodes: n.view_changer.on_master_degradation() # Now commits are processed on fast node # Wait until view change is complete looper.run(eventually(check_view_change_done, nodes, view_no + 1, timeout=60)) # Finish request gracefully sdk_get_reply(looper, request)
def test_freeing_forwarded_not_preprepared_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): behind_node = txnPoolNodeSet[-1] behind_node.requests.clear() sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) with delay_rules(behind_node.nodeIbStasher, ppDelay(delay=sys.maxsize), pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) # We execute caughtup requests assert len(behind_node.requests) == req_num assert all(r.executed for r in behind_node.requests.values() if behind_node.seqNoDB.get(r.request.key)[1])
def do_test_replica_removing_with_backup_degraded(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf): """ Node will change view even though it does not find the master to be degraded when a quorum of nodes agree that master performance degraded """ start_replicas_count = txnPoolNodeSet[0].replicas.num_replicas view_no = txnPoolNodeSet[0].viewNo instance_to_remove = 1 stashers = [node.nodeIbStasher for node in txnPoolNodeSet] with delay_rules(stashers, cDelay(delay=sys.maxsize, instId=instance_to_remove)): sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=10, num_batches=5) # check that replicas were removed def check_replica_removed_on_all_nodes(inst_id=instance_to_remove): for n in txnPoolNodeSet: check_replica_removed(n, start_replicas_count, inst_id) assert not n.monitor.isMasterDegraded() looper.run(eventually(check_replica_removed_on_all_nodes, timeout=120)) # start View Change for node in txnPoolNodeSet: node.view_changer.on_master_degradation() waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=view_no + 1, customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) # check that all replicas were restored assert all(start_replicas_count == node.replicas.num_replicas for node in txnPoolNodeSet)
def test_apply_stashed_partially_ordered(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): test_node = getNonPrimaryReplicas(txnPoolNodeSet)[0].node test_stasher = test_node.nodeIbStasher ledger_size = max(node.domainLedger.size for node in txnPoolNodeSet) def check_pool_ordered_some_requests(): assert max(node.domainLedger.size for node in txnPoolNodeSet) > ledger_size def check_test_node_has_stashed_ordered_requests(): assert len(test_node.stashedOrderedReqs) > 0 # Delay COMMITs so requests are not ordered on test node with delay_rules(test_stasher, cDelay()): reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, TOTAL_REQUESTS) looper.run(eventually(check_pool_ordered_some_requests)) # Get some of txns that need to be ordered ledger_info = test_node.ledgerManager.getLedgerInfoByType(DOMAIN_LEDGER_ID) txns = ledger_info.ledger.uncommittedTxns txns = txns[:len(txns) // 2] assert len(txns) > 1 # Emulate incomplete catchup simultaneous with generation of ORDERED message test_node.mode = Mode.syncing test_node.master_replica.revert_unordered_batches() looper.run(eventually(check_test_node_has_stashed_ordered_requests)) for txn in txns: ledger_info.ledger.add(txn) ledger_info.postTxnAddedToLedgerClbk(DOMAIN_LEDGER_ID, txn) test_node.mode = Mode.participating test_node.processStashedOrderedReqs() ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) sdk_get_and_check_replies(looper, reqs)
def test_no_propagate_request_on_different_last_ordered_on_master_before_vc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' Send random request and do view change then fast_nodes (1, 4 - without primary after next view change) are already ordered transaction on master and slow_nodes are not. Check ordering on slow_nodes.''' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) master_instance = txnPoolNodeSet[0].master_replica.instId slow_nodes = txnPoolNodeSet[1:3] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc with delay_rules(nodes_stashers, cDelay()): # send one request requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) last_ordered_for_slow = slow_nodes[0].master_replica.last_ordered_3pc old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(check_last_ordered, fast_nodes, master_instance, (old_view_no, old_last_ordered[1] + 1))) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) replies = sdk_get_replies(looper, requests) for reply in replies: sdk_check_reply(reply) check_last_ordered(slow_nodes, master_instance, (old_view_no, last_ordered_for_slow[1] + 1)) assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def test_unordered_request_freed_on_replica_removal(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, chkFreqPatched, view_change): node = txnPoolNodeSet[0] stashers = [n.nodeIbStasher for n in txnPoolNodeSet] with delay_rules(stashers, cDelay(delay=sys.maxsize)): req = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.runFor(waits.expectedPropagateTime(len(txnPoolNodeSet)) + waits.expectedPrePrepareTime(len(txnPoolNodeSet)) + waits.expectedPrepareTime(len(txnPoolNodeSet)) + waits.expectedCommittedTime(len(txnPoolNodeSet))) assert len(node.requests) == 1 forwardedToBefore = next(iter(node.requests.values())).forwardedTo node.replicas.remove_replica(node.replicas.num_replicas - 1) assert len(node.requests) == 1 forwardedToAfter = next(iter(node.requests.values())).forwardedTo assert forwardedToAfter == forwardedToBefore - 1 chkChkpoints(txnPoolNodeSet, 0) sdk_get_replies(looper, req) chkChkpoints(txnPoolNodeSet, 1) # Send one more request to stabilize checkpoint sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0)) assert len(node.requests) == 0
def test_clearing_forwarded_preprepared_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward): # Case when backup ordered correctly, but primary had problems. # As a result, master will execute caughtup txns and will be removed # from requests queues behind_node = txnPoolNodeSet[-1] sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) with delay_rules(behind_node.nodeIbStasher, pDelay(delay=sys.maxsize, instId=0), cDelay(delay=sys.maxsize, instId=0)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) assert len(behind_node.requests) == 0 assert all([len(q) == 0 for r in behind_node.replicas.values() for q in r.requestQueues.values()]) assert len(behind_node.clientAuthNr._verified_reqs) == 0 assert len(behind_node.requestSender) == 0
def test_unordered_state_reverted_before_catchup( tconf, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ Check that unordered state is reverted before starting catchup: - save the initial state on a node - slow down processing of COMMITs - send requests - wait until other nodes come to consensus - call start of catch-up - check that the state of the slow node is reverted and equal to the initial one. """ # CONFIG ledger_id = DOMAIN_LEDGER_ID non_primary_node = getNonPrimaryReplicas(txnPoolNodeSet, instId=0)[0].node non_primary_ledger = non_primary_node.getLedger(ledger_id) non_primary_state = non_primary_node.getState(ledger_id) # send reqs and make sure we are at the same state reqs = sdk_signed_random_requests(looper, sdk_wallet_client, 10) sdk_send_and_check(reqs, looper, txnPoolNodeSet, sdk_pool_handle) checkNodesHaveSameRoots(txnPoolNodeSet) # the state of the node before committed_ledger_before = non_primary_ledger.tree.root_hash uncommitted_ledger_before = non_primary_ledger.uncommittedRootHash committed_state_before = non_primary_state.committedHeadHash uncommitted_state_before = non_primary_state.headHash # EXECUTE # Delay commit requests on the node delay_c = 60 non_primary_node.nodeIbStasher.delay(cDelay(delay_c)) # send requests reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, tconf.Max3PCBatchSize) sdk_get_replies(looper, reqs, timeout=40) committed_ledger_during_3pc = non_primary_node.getLedger( ledger_id).tree.root_hash uncommitted_ledger_during_3pc = non_primary_node.getLedger( ledger_id).uncommittedRootHash committed_state_during_3pc = non_primary_node.getState( ledger_id).committedHeadHash uncommitted_state_during_3pc = non_primary_node.getState( ledger_id).headHash # start catchup non_primary_node.ledgerManager.preCatchupClbk(ledger_id) committed_ledger_reverted = non_primary_ledger.tree.root_hash uncommitted_ledger_reverted = non_primary_ledger.uncommittedRootHash committed_state_reverted = non_primary_state.committedHeadHash uncommitted_state_reverted = non_primary_state.headHash # CHECK # check that initial uncommitted state differs from the state during 3PC # but committed does not assert committed_ledger_before == committed_ledger_during_3pc assert uncommitted_ledger_before != uncommitted_ledger_during_3pc assert committed_state_before == committed_state_during_3pc assert uncommitted_state_before != uncommitted_state_during_3pc assert committed_ledger_before == committed_ledger_reverted assert uncommitted_ledger_before == uncommitted_ledger_reverted assert committed_state_before == committed_state_reverted assert uncommitted_state_before == uncommitted_state_reverted
def delaysCommitProcessing(node, delay: float = 30, instId: int = None): node.nodeIbStasher.delay(cDelay(delay=delay, instId=instId))
def test_stashed_checkpoint_processing(chkFreqPatched, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ One node in a pool of 5 nodes lags to order the last 3PC-batch in a checkpoint. By the moment when it eventually orders the 3PC-batch it has already received and stashed Checkpoint message from two node, so it processes these stashed messages on completing the checkpoint. After this it receives Checkpoint messages from two other nodes, processes them and stabilizes the checkpoint. """ epsilon = txnPoolNodeSet[-1] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) epsilon.nodeIbStasher.delay(cDelay()) epsilon.nodeIbStasher.delay(chk_delay(sender_filter='Gamma')) epsilon.nodeIbStasher.delay(chk_delay(sender_filter='Delta')) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) stabilization_timeout = \ waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.runFor(stabilization_timeout) for inst_id, replica in epsilon.replicas.items(): assert len(replica.checkpoints) == 1 assert (1, 5) in replica.checkpoints assert replica.checkpoints[(1, 5)].seqNo == 4 assert replica.checkpoints[(1, 5)].digest is None assert replica.checkpoints[(1, 5)].isStable is False assert len(replica.stashedRecvdCheckpoints) == 1 assert 0 in replica.stashedRecvdCheckpoints assert len(replica.stashedRecvdCheckpoints[0]) == 1 assert (1, 5) in replica.stashedRecvdCheckpoints[0] assert len(replica.stashedRecvdCheckpoints[0][(1, 5)]) == 2 epsilon.nodeIbStasher.reset_delays_and_process_delayeds(COMMIT) def check(): for inst_id, replica in epsilon.replicas.items(): assert len(replica.checkpoints) == 1 assert (1, 5) in replica.checkpoints assert replica.checkpoints[(1, 5)].seqNo == 5 assert replica.checkpoints[(1, 5)].digest is not None assert replica.checkpoints[(1, 5)].isStable is False assert len(replica.stashedRecvdCheckpoints) == 0 looper.run(eventually(check, timeout=waits.expectedOrderingTime( len(txnPoolNodeSet)))) epsilon.nodeIbStasher.reset_delays_and_process_delayeds(CHECKPOINT) stabilization_timeout = \ waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.runFor(stabilization_timeout) for inst_id, replica in epsilon.replicas.items(): assert len(replica.checkpoints) == 1 assert (1, 5) in replica.checkpoints assert replica.checkpoints[(1, 5)].seqNo == 5 assert replica.checkpoints[(1, 5)].digest is not None assert replica.checkpoints[(1, 5)].isStable is True assert len(replica.stashedRecvdCheckpoints) == 0
def test_backup_replica_resumes_ordering_on_lag_in_checkpoints( looper, chkFreqPatched, reqs_for_checkpoint, one_replica_and_others_in_backup_instance, sdk_pool_handle, sdk_wallet_client, view_change_done): """ Verifies resumption of ordering 3PC-batches on a backup replica on detection of a lag in checkpoints """ slow_replica, other_replicas = one_replica_and_others_in_backup_instance view_no = slow_replica.viewNo # Send a request and ensure that the replica orders the batch for it sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.run( eventually(lambda: assertExp(slow_replica.last_ordered_3pc == (view_no, 1)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Don't receive Commits from two replicas slow_replica.node.nodeIbStasher.delay( cDelay(instId=1, sender_filter=other_replicas[0].node.name)) slow_replica.node.nodeIbStasher.delay( cDelay(instId=1, sender_filter=other_replicas[1].node.name)) # Send a request for which the replica will not be able to order the batch # due to an insufficient count of Commits sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.runFor(waits.expectedTransactionExecutionTime(nodeCount)) # Recover reception of Commits slow_replica.node.nodeIbStasher.drop_delayeds() slow_replica.node.nodeIbStasher.resetDelays() # Send requests but in a quantity insufficient # for catch-up number of checkpoints sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP * reqs_for_checkpoint - 2) looper.runFor(waits.expectedTransactionExecutionTime(nodeCount)) # Ensure that the replica has not ordered any batches # after the very first one assert slow_replica.last_ordered_3pc == (view_no, 1) # Ensure that the watermarks have not been shifted since the view start assert slow_replica.h == 0 assert slow_replica.H == LOG_SIZE # Ensure that the collections related to requests, batches and # own checkpoints are not empty. # (Note that a primary replica removes requests from requestQueues # when creating a batch with them.) if slow_replica.isPrimary: assert slow_replica.sentPrePrepares else: assert slow_replica.requestQueues[DOMAIN_LEDGER_ID] assert slow_replica.prePrepares assert slow_replica.prepares assert slow_replica.commits assert slow_replica.batches assert slow_replica.checkpoints # Ensure that there are some quorumed stashed checkpoints assert slow_replica.stashed_checkpoints_with_quorum() # Send more requests to reach catch-up number of checkpoints sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint) # Ensure that the replica has adjusted last_ordered_3pc to the end # of the last checkpoint looper.run( eventually(lambda: assertExp(slow_replica.last_ordered_3pc == (view_no, (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Ensure that the watermarks have been shifted so that the lower watermark # has the same value as last_ordered_3pc assert slow_replica.h == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ assert slow_replica.H == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + LOG_SIZE # Ensure that the collections related to requests, batches and # own checkpoints have been cleared assert not slow_replica.requestQueues[DOMAIN_LEDGER_ID] assert not slow_replica.sentPrePrepares assert not slow_replica.prePrepares assert not slow_replica.prepares assert not slow_replica.commits assert not slow_replica.batches assert not slow_replica.checkpoints # Ensure that now there are no quorumed stashed checkpoints assert not slow_replica.stashed_checkpoints_with_quorum() # Send a request and ensure that the replica orders the batch for it sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.run( eventually(lambda: assertExp(slow_replica.last_ordered_3pc == (view_no, (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + 1)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount)))
def test_reverted_unordered(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ Before starting catchup, revert any uncommitted changes to state and ledger. This is to avoid any re-application of requests that were ordered but stashed Example scenario prepared (1, 4) startViewChange start_catchup ... .... ... committed and send Ordered (1, 2) ... .... preLedgerCatchUp force_process_ordered, take out (1,2) and stash (1, 2) now process stashed Ordered(1,2), its requests will be applied again Simulation: Delay COMMITs to a node so that it can not order requests but has prepared them. Then trigger a view change and make sure the slow node has not ordered same number of requests as others but has prepared so it can order when it receives COMMITs while view change is in progress. The slow node should revert unordered batches and but it should eventually process the ordered requests, so delay LEDGER_STATUS too so catchup is delayed """ slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node fast_nodes = [n for n in txnPoolNodeSet if n != slow_node] slow_node.nodeIbStasher.delay(cDelay(120, 0)) sent_batches = 5 sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * sent_batches, sent_batches) # Fast nodes have same last ordered and same data last_ordered = [n.master_last_ordered_3PC for n in fast_nodes] assert check_if_all_equal_in_list(last_ordered) ensure_all_nodes_have_same_data(looper, fast_nodes) # Slow nodes have different last ordered than fast nodes assert last_ordered[0] != slow_node.master_last_ordered_3PC # Delay LEDGER_STATUS so catchup starts late slow_node.nodeIbStasher.delay(lsDelay(100)) slow_node.nodeIbStasher.delay(msg_rep_delay(100)) # slow_node has not reverted batches assert sent_batches not in getAllReturnVals( slow_node.master_replica, slow_node.master_replica.revert_unordered_batches) ensure_view_change(looper, txnPoolNodeSet) def chk1(): # slow_node reverted all batches rv = getAllReturnVals(slow_node.master_replica, slow_node.master_replica.revert_unordered_batches) assert sent_batches in rv looper.run(eventually(chk1, retryWait=1)) # After the view change slow_node has prepared same requests as the fast # nodes have ordered assert last_ordered[0] == slow_node.master_replica.last_prepared_before_view_change # Deliver COMMITs slow_node.nodeIbStasher.reset_delays_and_process_delayeds(COMMIT) def chk2(): # slow_node orders all requests as others have assert last_ordered[0] == slow_node.master_last_ordered_3PC looper.run(eventually(chk2, retryWait=1)) # Deliver LEDGER_STATUS so catchup can complete slow_node.nodeIbStasher.reset_delays_and_process_delayeds(LEDGER_STATUS) slow_node.nodeIbStasher.reset_delays_and_process_delayeds(MESSAGE_RESPONSE) # Ensure all nodes have same data ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) def chk3(): # slow_node processed stashed Ordered requests successfully rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs) assert sent_batches in rv looper.run(eventually(chk3, retryWait=1)) # Ensure pool is functional sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10, 2) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def make_master_replica_lag(node): node.nodeIbStasher.delay(ppDelay(1200, 0)) node.nodeIbStasher.delay(pDelay(1200, 0)) node.nodeIbStasher.delay(cDelay(1200, 0))
def test_no_catchup_if_got_from_3pc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ A node is slow to receive COMMIT messages so after a view change it starts catchup. But before it can start requesting txns, the COMMITs messages are received and are ordered. The node should not request any transactions. :return: """ send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * 3, 3) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node other_nodes = [n for n in txnPoolNodeSet if n != slow_node] delay_cm = 30 delat_cp = 100 slow_node.nodeIbStasher.delay(cDelay(delay_cm)) # The slow node receives consistency proofs after some delay, this delay # gives the opportunity to deliver all 3PC messages slow_node.nodeIbStasher.delay(cpDelay(delat_cp)) # Count of `getCatchupReqs` which is called to construct the `CatchupReq` # to be sent def domain_cr_count(): return sum(1 for entry in slow_node.ledgerManager.spylog.getAll( slow_node.ledgerManager.getCatchupReqs) if entry.params['consProof'].ledgerId == DOMAIN_LEDGER_ID) old_count = domain_cr_count() sent_batches = 10 send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * sent_batches, sent_batches) ensure_view_change(looper, nodes=txnPoolNodeSet) # After view change, the `slow_node` is behind waitNodeDataInequality(looper, slow_node, *other_nodes) # Unstash only COMMIT messages slow_node.nodeIbStasher.reset_delays_and_process_delayeds(Commit.typename) looper.runFor(2) slow_node.nodeIbStasher.reset_delays_and_process_delayeds( ConsistencyProof.typename) waitNodeDataEquality(looper, slow_node, *other_nodes) # No `CatchupReq`s constructed, hence no `CatchupReq`s could have # been sent assert domain_cr_count() == old_count # Some stashed ordered requests have been processed rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs) assert sent_batches in rv sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def testReplicasRejectSamePrePrepareMsg(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Replicas should not accept PRE-PREPARE for view "v" and prepare sequence number "n" if it has already accepted a request with view number "v" and sequence number "n" """ numOfNodes = 4 fValue = getMaxFailures(numOfNodes) primaryRepl = getPrimaryReplica(txnPoolNodeSet, 1) logger.debug("Primary Replica: {}".format(primaryRepl)) nonPrimaryReplicas = getNonPrimaryReplicas(txnPoolNodeSet, 1) logger.debug("Non Primary Replicas: " + str(nonPrimaryReplicas)) # Delay COMMITs so request is not ordered and checks can be made c_delay = 10 for node in txnPoolNodeSet: node.nodeIbStasher.delay(cDelay(delay=c_delay, instId=1)) req1 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)[0] request1 = sdk_json_to_request_object(req1[0]) for npr in nonPrimaryReplicas: looper.run(eventually(checkPrepareReqSent, npr, request1.key, primaryRepl.viewNo, retryWait=1)) prePrepareReq = primaryRepl.sentPrePrepares[primaryRepl.viewNo, primaryRepl.lastPrePrepareSeqNo] looper.run(eventually(checkPrePrepareReqRecvd, nonPrimaryReplicas, prePrepareReq, retryWait=1)) # logger.debug("Patching the primary replica's pre-prepare sending method ") # orig_method = primaryRepl.sendPrePrepare # def patched(self, ppReq): # self.sentPrePrepares[ppReq.viewNo, ppReq.ppSeqNo] = ppReq # ppReq = updateNamedTuple(ppReq, **{f.PP_SEQ_NO.nm: 1}) # self.send(ppReq, TPCStat.PrePrepareSent) # # primaryRepl.sendPrePrepare = types.MethodType(patched, primaryRepl) logger.debug( "Decrementing the primary replica's pre-prepare sequence number by " "one...") primaryRepl._lastPrePrepareSeqNo -= 1 view_no = primaryRepl.viewNo request2 = sdk_json_to_request_object( sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)[0][0]) timeout = waits.expectedPrePrepareTime(len(txnPoolNodeSet)) looper.run(eventually(checkPrePrepareReqSent, primaryRepl, request2, retryWait=1, timeout=timeout)) # Since the node is malicious, it will not be able to process requests due # to conflicts in PRE-PREPARE primaryRepl.node.stop() looper.removeProdable(primaryRepl.node) reqIdr = [request2.digest] prePrepareReq = PrePrepare( primaryRepl.instId, view_no, primaryRepl.lastPrePrepareSeqNo, get_utc_epoch(), reqIdr, init_discarded(), primaryRepl.batchDigest([request2]), DOMAIN_LEDGER_ID, primaryRepl.stateRootHash(DOMAIN_LEDGER_ID), primaryRepl.txnRootHash(DOMAIN_LEDGER_ID), 0, True ) logger.debug("""Checking whether all the non primary replicas have received the pre-prepare request with same sequence number""") timeout = waits.expectedPrePrepareTime(len(txnPoolNodeSet)) looper.run(eventually(checkPrePrepareReqRecvd, nonPrimaryReplicas, prePrepareReq, retryWait=1, timeout=timeout)) logger.debug("""Check that none of the non primary replicas didn't send any prepare message " in response to the pre-prepare message""") timeout = waits.expectedPrepareTime(len(txnPoolNodeSet)) looper.runFor(timeout) # expect prepare processing timeout # check if prepares have not been sent for npr in nonPrimaryReplicas: with pytest.raises(AssertionError): looper.run(eventually(checkPrepareReqSent, npr, request2.key, view_no, retryWait=1, timeout=timeout)) timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) + c_delay result1 = sdk_get_replies(looper, [req1])[0][1] logger.debug("request {} gives result {}".format(request1, result1))
def test_slow_node_reverts_unordered_state_during_catchup(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Delay COMMITs to a node such that when it needs to catchup, it needs to revert some unordered state. Also till this time the node should have receive all COMMITs such that it will apply some of the COMMITs ( for which it has not received txns from catchup). For this delay COMMITs by long, do catchup for a little older than the state received in LedgerStatus, once catchup completes, reset delays and try to process delayed COMMITs, some COMMITs will be rejected but some will be processed since catchup was done for older ledger. """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 3 * Max3PCBatchSize) nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0) slow_node = nprs[-1].node other_nodes = [n for n in txnPoolNodeSet if n != slow_node] slow_master_replica = slow_node.master_replica commit_delay = 150 catchup_rep_delay = 25 # Delay COMMITs to one node slow_node.nodeIbStasher.delay(cDelay(commit_delay, 0)) # Delay LEDGER_STAUS on slow node, so that only MESSAGE_REQUEST(LEDGER_STATUS) is sent, and the # node catch-ups 2 times. # Otherwise other nodes may receive multiple LEDGER_STATUSes from slow node, and return Consistency proof for all # missing txns, so no stashed ones are applied slow_node.nodeIbStasher.delay(lsDelay(1000)) # Make the slow node receive txns for a smaller ledger so it still finds # the need to catchup delay_batches = 2 make_a_node_catchup_less(slow_node, other_nodes, DOMAIN_LEDGER_ID, delay_batches * Max3PCBatchSize) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 6 * Max3PCBatchSize) ensure_all_nodes_have_same_data(looper, other_nodes) waitNodeDataInequality(looper, slow_node, *other_nodes) old_lcu_count = slow_node.spylog.count(slow_node.allLedgersCaughtUp) # `slow_node` is slow to receive CatchupRep, so that it # gets a chance to order COMMITs slow_node.nodeIbStasher.delay(cr_delay(catchup_rep_delay)) # start view change (and hence catchup) ensure_view_change(looper, txnPoolNodeSet) # Check last ordered of `other_nodes` is same for n1, n2 in combinations(other_nodes, 2): lst_3pc = check_last_ordered_3pc(n1, n2) def chk1(): # `slow_node` has prepared all 3PC messages which # `other_nodes` have ordered assertEquality(slow_master_replica.last_prepared_before_view_change, lst_3pc) looper.run(eventually(chk1, retryWait=1)) old_pc_count = slow_master_replica.spylog.count( slow_master_replica.can_process_since_view_change_in_progress) assert len(slow_node.stashedOrderedReqs) == 0 # Repair the network so COMMITs are received, processed and stashed slow_node.reset_delays_and_process_delayeds(COMMIT) def chk2(): # COMMITs are processed for prepared messages assert slow_master_replica.spylog.count( slow_master_replica.can_process_since_view_change_in_progress) > old_pc_count looper.run(eventually(chk2, retryWait=1, timeout=5)) def chk3(): # COMMITs are stashed assert len(slow_node.stashedOrderedReqs) == delay_batches * Max3PCBatchSize looper.run(eventually(chk3, retryWait=1, timeout=15)) # fix catchup, so the node gets a chance to be caught-up repair_node_catchup_less(other_nodes) def chk4(): # Some COMMITs were ordered but stashed and they were processed rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs) assert delay_batches in rv looper.run(eventually(chk4, retryWait=1, timeout=catchup_rep_delay + 5)) def chk5(): # Catchup was done once assert slow_node.spylog.count( slow_node.allLedgersCaughtUp) > old_lcu_count looper.run( eventually( chk5, retryWait=1, timeout=waits.expectedPoolCatchupTime( len(txnPoolNodeSet)))) # make sure that the pool is functional checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_stashed_messages_processed_on_backup_replica_ordering_resumption( looper, chkFreqPatched, reqs_for_checkpoint, one_replica_and_others_in_backup_instance, sdk_pool_handle, sdk_wallet_client, view_change_done): """ Verifies resumption of ordering 3PC-batches on a backup replica on detection of a lag in checkpoints in case it is detected after some 3PC-messages related to the next checkpoint have already been stashed as laying outside of the watermarks. Please note that to verify this case the config is set up so that LOG_SIZE == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ """ slow_replica, other_replicas = one_replica_and_others_in_backup_instance view_no = slow_replica.viewNo # Send a request and ensure that the replica orders the batch for it sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.run( eventually(lambda: assertExp(slow_replica.last_ordered_3pc == (view_no, 1)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Don't receive Commits from two replicas slow_replica.node.nodeIbStasher.delay( cDelay(instId=1, sender_filter=other_replicas[0].node.name)) slow_replica.node.nodeIbStasher.delay( cDelay(instId=1, sender_filter=other_replicas[1].node.name)) # Send a request for which the replica will not be able to order the batch # due to an insufficient count of Commits sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.runFor(waits.expectedTransactionExecutionTime(nodeCount)) # Receive further Commits from now on slow_replica.node.nodeIbStasher.drop_delayeds() slow_replica.node.nodeIbStasher.resetDelays() # Send requests but in a quantity insufficient # for catch-up number of checkpoints sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP * reqs_for_checkpoint - 2) looper.runFor(waits.expectedTransactionExecutionTime(nodeCount)) # Don't receive Checkpoints slow_replica.node.nodeIbStasher.delay(chk_delay(instId=1)) # Send more requests to reach catch-up number of checkpoints sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint) looper.runFor(waits.expectedTransactionExecutionTime(nodeCount)) # Ensure that there are no 3PC-messages stashed # as laying outside of the watermarks assert not slow_replica.stashingWhileOutsideWaterMarks # Send a request for which the batch will be outside of the watermarks sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.runFor(waits.expectedTransactionExecutionTime(nodeCount)) # Ensure that the replica has not ordered any batches # after the very first one assert slow_replica.last_ordered_3pc == (view_no, 1) # Ensure that the watermarks have not been shifted since the view start assert slow_replica.h == 0 assert slow_replica.H == LOG_SIZE # Ensure that there are some quorumed stashed checkpoints assert slow_replica.stashed_checkpoints_with_quorum() # Ensure that now there are 3PC-messages stashed # as laying outside of the watermarks assert slow_replica.stashingWhileOutsideWaterMarks # Receive belated Checkpoints slow_replica.node.nodeIbStasher.reset_delays_and_process_delayeds() # Ensure that the replica has ordered the batch for the last sent request looper.run( eventually(lambda: assertExp(slow_replica.last_ordered_3pc == (view_no, (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + 1)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Ensure that the watermarks have been shifted so that the lower watermark # now equals to the end of the last stable checkpoint in the instance assert slow_replica.h == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ assert slow_replica.H == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + LOG_SIZE # Ensure that now there are no quorumed stashed checkpoints assert not slow_replica.stashed_checkpoints_with_quorum() # Ensure that now there are no 3PC-messages stashed # as laying outside of the watermarks assert not slow_replica.stashingWhileOutsideWaterMarks # Send a request and ensure that the replica orders the batch for it sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) looper.run( eventually(lambda: assertExp(slow_replica.last_ordered_3pc == (view_no, (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + 2)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount)))