def test_catchup_uses_only_nodes_with_cons_proofs(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): lagging_node = txnPoolNodeSet[-1] other_nodes = txnPoolNodeSet[:-1] start_delaying(lagging_node.nodeIbStasher, delay_3pc()) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10) catchup_reqs = {node.name: start_delaying(node.nodeIbStasher, cqDelay()) for node in other_nodes} audit_catchup_service = lagging_node.ledgerManager._node_leecher._leechers[AUDIT_LEDGER_ID]._catchup_rep_service lagging_node.start_catchup() looper.run(eventually(lambda: assert_eq(audit_catchup_service._is_working, True))) # Make sure number of cons proofs gathered when all nodes are assert len(audit_catchup_service._nodes_ledger_sizes) == 3 # Allow catchup requests only for interesting nodes for node_id in audit_catchup_service._nodes_ledger_sizes.keys(): stop_delaying_and_process(catchup_reqs[node_id]) # Check catchup finishes successfully ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=30)
def test_catchup_with_all_nodes_sending_cons_proofs_dead( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, logsearch): lagging_node = txnPoolNodeSet[-1] other_nodes = txnPoolNodeSet[:-1] start_delaying(lagging_node.nodeIbStasher, delay_3pc()) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10) log_re_ask, _ = logsearch( msgs=['requesting .* missing transactions after timeout']) old_re_ask_count = len(log_re_ask) catchup_reqs = { node.name: start_delaying(node.nodeIbStasher, cqDelay()) for node in other_nodes } audit_catchup_service = lagging_node.ledgerManager._node_leecher._leechers[ AUDIT_LEDGER_ID]._catchup_rep_service lagging_node.start_catchup() looper.run( eventually(lambda: assert_eq(audit_catchup_service._is_working, True))) # Make sure number of cons proofs gathered when all nodes are assert len(audit_catchup_service._nodes_ledger_sizes) == 3 # Allow catchup requests only from nodes that didn't respond first for node_id, node_reqs in catchup_reqs.items(): if node_id not in audit_catchup_service._nodes_ledger_sizes: stop_delaying_and_process(node_reqs) # Check catchup finishes successfully, and there were reasks ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) assert len(log_re_ask) - old_re_ask_count > 0
def test_view_change_during_unstash(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf): slow_node = txnPoolNodeSet[-1] other_nodes = txnPoolNodeSet[:-1] slow_stasher = slow_node.nodeIbStasher other_stashers = [n.nodeIbStasher for n in other_nodes] all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet] # Preload nodes with some transactions sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) for node in txnPoolNodeSet: assert node.master_replica.last_ordered_3pc == (0, 1) # Prevent ordering of some requests start_delaying(all_stashers, delay_3pc(after=7, msgs=(Prepare, Commit))) # Stop ordering on slow node and send requests slow_node_after_5 = start_delaying(slow_stasher, delay_3pc(after=5, msgs=Commit)) slow_node_until_5 = start_delaying(slow_stasher, delay_3pc(after=0)) reqs_view_0 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 8) # Make pool order first 2 batches and pause pool_after_3 = start_delaying(other_stashers, delay_3pc(after=3)) looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 3)) # Start catchup, continue ordering everywhere (except two last batches on slow node) with delay_rules(slow_stasher, cr_delay()): slow_node._do_start_catchup(just_started=False) looper.run(eventually(check_catchup_is_started, slow_node)) stop_delaying_and_process(pool_after_3) looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 7)) # Finish catchup and continue processing on slow node looper.run(eventually(check_catchup_is_finished, slow_node)) stop_delaying_and_process(slow_node_until_5) looper.run(eventually(check_nodes_ordered_till, [slow_node], 0, 5)) # Start view change and allow slow node to get remaining commits with delay_rules(all_stashers, icDelay()): for node in txnPoolNodeSet: node.view_changer.on_master_degradation() looper.runFor(0.1) stop_delaying_and_process(slow_node_after_5) # Ensure that expected number of requests was ordered replies = sdk_get_replies(looper, reqs_view_0) for rep in replies[:6]: sdk_check_reply(rep) # Ensure that everything is ok ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_catchup_with_skipped_commits(tdir, tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): lagging_node = txnPoolNodeSet[-1] lagging_stasher = lagging_node.nodeIbStasher other_nodes = txnPoolNodeSet[:-1] other_stashers = [node.nodeIbStasher for node in other_nodes] def lagging_node_state() -> NodeLeecherService.State: return lagging_node.ledgerManager._node_leecher._state def check_lagging_node_is_not_syncing_audit(): assert lagging_node_state() != NodeLeecherService.State.SyncingAudit def check_lagging_node_done_catchup(): assert lagging_node_state() == NodeLeecherService.State.Idle # Preload nodes with some transactions sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) for node in txnPoolNodeSet: assert node.master_replica.last_ordered_3pc == (0, 1) # Delay some commits on lagging node some_commits = start_delaying(lagging_stasher, delay_some_commits(0, [2, 3])) # Order 4 more requests in pool sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) with delay_rules(lagging_stasher, delay_catchup(AUDIT_LEDGER_ID)): # Start catchup lagging_node.start_catchup() looper.runFor(0.5) assert lagging_node_state() == NodeLeecherService.State.SyncingAudit # Allow some missing commits to be finally received stop_delaying_and_process(some_commits) looper.runFor(0.5) # Ensure that audit ledger is caught up by lagging node looper.run(eventually(check_lagging_node_done_catchup)) # Ensure that all nodes will eventually have same data ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_max_3pc_batches_in_flight(tdir, tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): # Check pool initial state initial_3pc = txnPoolNodeSet[0].master_replica.last_ordered_3pc for node in txnPoolNodeSet[1:]: assert node.master_replica.last_ordered_3pc == initial_3pc # Utility def check_ordered_till(pp_seq_no: int): for node in txnPoolNodeSet: last_ordered = node.master_replica.last_ordered_3pc assert last_ordered[0] == initial_3pc[0] assert last_ordered[1] == pp_seq_no # Delay some commits all_stashers = [node.nodeIbStasher for node in txnPoolNodeSet] delayers = [] for num in range(BATCHES_TO_ORDER): pp_seq_no = initial_3pc[1] + num + 1 delayer = start_delaying(all_stashers, delay_3pc(after=pp_seq_no - 1, before=pp_seq_no + 1)) delayers.append((pp_seq_no, delayer)) # Send a number of requests reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, BATCHES_TO_ORDER) # Continuously check number of batches in flight for pp_seq_no, delayer in delayers: stop_delaying_and_process(delayer) looper.run(eventually(check_ordered_till, pp_seq_no)) for node in txnPoolNodeSet: for replica in node.replicas.values(): batches_in_flight = replica.lastPrePrepareSeqNo - replica.last_ordered_3pc[1] assert batches_in_flight <= MAX_BATCHES_IN_FLIGHT # Check all requests are ordered sdk_get_and_check_replies(looper, reqs) # Ensure that all nodes will eventually have same data ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_audit_multiple_uncommitted_node_regs(looper, tdir, tconf, allPluginsPath, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward): ''' - Delay COMMITS on 1 Node - Add 2 more nodes (so that the slow node hs multiple uncommitted node txns) - Make sure that all nodes have equal state eventually ''' slow_node = txnPoolNodeSet[-1] fast_nodes = [node for node in txnPoolNodeSet if node != slow_node] slow_stashers = [slow_node.nodeIbStasher] # let's ignore view changes for simplicity here start_delaying([n.nodeIbStasher for n in txnPoolNodeSet], icDelay()) with delay_rules(slow_stashers, cDelay()): # Add Node5 new_node = add_new_node(looper, fast_nodes, sdk_pool_handle, sdk_wallet_steward, tdir, tconf, allPluginsPath, name='Psi', wait_till_added=False) txnPoolNodeSet.append(new_node) start_delaying(new_node.nodeIbStasher, icDelay()) # Add Node6 new_node = add_new_node(looper, fast_nodes, sdk_pool_handle, sdk_wallet_steward, tdir, tconf, allPluginsPath, name='Eta', wait_till_added=False) txnPoolNodeSet.append(new_node) start_delaying(new_node.nodeIbStasher, icDelay()) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=20) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_catchup_with_skipped_commits(tdir, tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): lagging_node = txnPoolNodeSet[-1] lagging_stasher = lagging_node.nodeIbStasher other_nodes = txnPoolNodeSet[:-1] other_stashers = [node.nodeIbStasher for node in other_nodes] def lagging_node_state() -> NodeLeecherService.State: return lagging_node.ledgerManager._node_leecher._state def check_lagging_node_is_not_syncing_audit(): assert lagging_node_state() != NodeLeecherService.State.SyncingAudit def check_lagging_node_done_catchup(): assert lagging_node_state() == NodeLeecherService.State.Idle def check_nodes_ordered_till(nodes: Iterable, view_no: int, pp_seq_no: int): for node in nodes: assert compare_3PC_keys((view_no, pp_seq_no), node.master_replica.last_ordered_3pc) >= 0 # Preload nodes with some transactions sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) for node in txnPoolNodeSet: assert node.master_replica.last_ordered_3pc == (0, 1) # Setup delayers lagging_mid_commits = start_delaying( lagging_stasher, delay_3pc(after=3, before=6, msgs=Commit)) others_mid_commits = start_delaying( other_stashers, delay_3pc(after=3, before=6, msgs=Commit)) start_delaying(lagging_stasher, delay_3pc(before=4, msgs=Commit)) # Send more requests reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 6) # Wait until pool ordered till (0, 3) looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 3)) assert lagging_node.master_replica.last_ordered_3pc == (0, 1) with delay_rules(lagging_stasher, delay_catchup(DOMAIN_LEDGER_ID)): with delay_rules(lagging_stasher, delay_catchup(AUDIT_LEDGER_ID)): # Start catchup lagging_node.start_catchup() looper.runFor(0.5) assert lagging_node_state( ) == NodeLeecherService.State.SyncingAudit # Process missing commits on lagging node stop_delaying_and_process(lagging_mid_commits) looper.runFor(0.5) # Allow to catchup audit ledger looper.run(eventually(check_lagging_node_is_not_syncing_audit)) stop_delaying_and_process(others_mid_commits) # Ensure that audit ledger is caught up by lagging node looper.run(eventually(check_lagging_node_done_catchup)) # Ensure that all requests were ordered sdk_get_and_check_replies(looper, reqs) # Ensure that all nodes will eventually have same data ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)