def test_delayed_instance_changes_after_vcd_for_next_view( looper, txnPoolNodeSet): ''' A node is doing view change to view=1, while the other nodes already finished view change to view=2. The node receives a quorum of VCD messages for view=2 before a quorum of InstanceChange messages for view=2. Nevertheless, the node should not start a view change to view=2 without a quorum of InstanceChanges, that is it should not go to propagate primary mode since it's already in view chanage state. The node should eventually finish view change to view=2 once receives all VCD and IS msgs for view=2 ''' nodes = txnPoolNodeSet slow_node = nodes[-1] fast_nodes = [n for n in nodes if n != slow_node] slow_stasher = slow_node.nodeIbStasher # 1. DO FIRST VIEW CHANGE # delay VCD for the first ViewChange with delay_rules(slow_stasher, nv_delay()): # Trigger view change trigger_view_change(nodes) waitForViewChange(looper, nodes, expectedViewNo=1) # make sure view change is finished on all nodes except the slow one ensureElectionsDone(looper, fast_nodes, instances_list=range(3), customTimeout=30) # drop all VCD to view=1 slow_stasher.drop_delayeds() # 2. DO SECOND VIEW CHANGE # delay Instance Changes and # so that the slow node receives VCD for view=2 before # a quorum of InstanceChanges for that view while still doing view change to view=1 with delay_rules(slow_stasher, icDelay()): # Trigger view change trigger_view_change(nodes) waitForViewChange(looper, fast_nodes, expectedViewNo=2) # make sure view change is finished on all nodes except the slow one ensureElectionsDone(looper, fast_nodes, instances_list=range(3)) # slow node is still on view=1 assert slow_node.viewNo == 1 assert slow_node.view_change_in_progress # make sure that the slow node receives VCD msgs for view=2 # and didn't receive IS msgs for view=2 # check_vcd_msgs(slow_node, expected_view_no=2, expected_count=len(fast_nodes), ) check_no_ic_msgs(slow_node, 2, fast_nodes) # 3. RESET DELAYS AND CHECK waitForViewChange(looper, nodes, expectedViewNo=2) ensureElectionsDone(looper, nodes) assert not slow_node.view_change_in_progress ensure_all_nodes_have_same_data(looper, nodes=nodes)
def do_view_change_with_delayed_commits_on_all_but_one( nodes, nodes_without_one_stashers, except_node, looper, sdk_pool_handle, sdk_wallet_client): new_view_no = except_node.viewNo + 1 old_last_ordered = except_node.master_replica.last_ordered_3pc # delay commits for all nodes except node X with delay_rules(nodes_without_one_stashers, cDelay(sys.maxsize)): # send one request requests2 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) def last_ordered(node: Node, last_ordered): assert node.master_replica.last_ordered_3pc == last_ordered # wait until except_node ordered txn looper.run( eventually(last_ordered, except_node, (except_node.viewNo, old_last_ordered[1] + 1))) # trigger view change on all nodes trigger_view_change(nodes) # wait for view change done on all nodes looper.run(eventually(view_change_done, nodes, new_view_no)) sdk_get_replies(looper, requests2) ensure_all_nodes_have_same_data(looper, nodes) sdk_ensure_pool_functional(looper, nodes, sdk_wallet_client, sdk_pool_handle)
def test_view_change_with_different_prepare_certificate( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Check that a node without pre-prepare but with quorum of prepares wouldn't use this transaction as a last in prepare certificate """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) slow_node = txnPoolNodeSet[-1] # delay preprepares and message response with preprepares. with delay_rules(slow_node.nodeIbStasher, ppDelay(delay=sys.maxsize)): with delay_rules( slow_node.nodeIbStasher, msg_rep_delay(delay=sys.maxsize, types_to_delay=[ PREPREPARE, ])): last_ordered = slow_node.master_replica.last_ordered_3pc sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) looper.run( eventually(check_prepare_certificate, txnPoolNodeSet[0:-1], last_ordered[1] + 1)) trigger_view_change(txnPoolNodeSet) assert slow_node.master_replica._ordering_service.l_last_prepared_certificate_in_view() == \ (0, last_ordered[1]) ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_lag_less_then_catchup(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): delayed_node = txnPoolNodeSet[-1] other_nodes = list(set(txnPoolNodeSet) - {delayed_node}) current_view_no = checkViewNoForNodes(txnPoolNodeSet) last_ordered_before = delayed_node.master_replica.last_ordered_3pc with delay_rules_without_processing(delayed_node.nodeIbStasher, cDelay()): # Send txns for stable checkpoint sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ) # Check, that all of not slowed nodes has a stable checkpoint for n in other_nodes: assert n.master_replica._consensus_data.stable_checkpoint == CHK_FREQ # Send another txn. This txn will be reordered after view_change sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) trigger_view_change(txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) assert delayed_node.master_replica.last_ordered_3pc == last_ordered_before # Send txns for stabilize checkpoint on other nodes sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ - 1) pool_pp_seq_no = get_pp_seq_no(other_nodes) looper.run(eventually(lambda: assertExp(delayed_node.master_replica.last_ordered_3pc[1] == pool_pp_seq_no))) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_watermarks_after_view_change(tdir, tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Delay commit, checkpoint, InstanceChange and ViewChangeDone messages for lagging_node. Start ViewChange. Check that ViewChange finished. Reset delays. Check that lagging_node can order transactions and has same data with other nodes. """ lagging_node = txnPoolNodeSet[-1] lagging_node.master_replica.config.LOG_SIZE = LOG_SIZE start_view_no = lagging_node.viewNo with delay_rules(lagging_node.nodeIbStasher, cDelay(), chk_delay(), icDelay(), nv_delay()): trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet[:-1], expectedViewNo=start_view_no + 1, customTimeout=waits.expectedPoolViewChangeStartedTimeout(len(txnPoolNodeSet))) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet[:-1]) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 6) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_view_change_triggered(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): current_view_no = checkViewNoForNodes(txnPoolNodeSet) trigger_view_change(txnPoolNodeSet, current_view_no + 1) ensureElectionsDone(looper, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_delay_commits_for_one_node(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, slow_node_is_next_primary, vc_counts): current_view_no = checkViewNoForNodes(txnPoolNodeSet) excepted_view_no = current_view_no + 1 if vc_counts == 'once' else current_view_no + 2 next_primary = get_next_primary_name(txnPoolNodeSet, excepted_view_no) pretenders = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet) if not r.isPrimary] if slow_node_is_next_primary: delayed_node = [n for n in pretenders if n.name == next_primary][0] else: delayed_node = [n for n in pretenders if n.name != next_primary][0] with delay_rules_without_processing(delayed_node.nodeIbStasher, cDelay()): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2) trigger_view_change(txnPoolNodeSet) if vc_counts == 'twice': for node in txnPoolNodeSet: node.view_changer.start_view_change(current_view_no + 2) ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_catchup_to_next_view_during_view_change_0_to_2( txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward): ''' 1) Lagging node is not a primary for new views 2) All nodes except the lagging one go to view=1 3) All nodes except the lagging one order txns on view=1 4) All nodes except the lagging one go to view=2 5) All nodes except the lagging one order txns on view=2 6) Lagging node gets InstanceChanges for view=1 and view=2 => it changes to view=2, and catches up till txns from view=2 7) Make sure that the lagging node is up to date, and can participate in consensus ''' lagging_node = txnPoolNodeSet[0] other_nodes = txnPoolNodeSet[1:] initial_view_no = checkViewNoForNodes(txnPoolNodeSet) initial_last_ordered = lagging_node.master_last_ordered_3PC with delay_rules(lagging_node.nodeIbStasher, delay_for_view(viewNo=0), delay_for_view(viewNo=1), delay_for_view(viewNo=2)): # view change to viewNo=1 trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, other_nodes, expectedViewNo=initial_view_no + 1) checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(3)) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) # order some txns sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) # view change to viewNo=2 trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, other_nodes, expectedViewNo=initial_view_no + 2) checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(3)) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) # order some txns sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) assert initial_view_no == lagging_node.viewNo assert initial_last_ordered == lagging_node.master_last_ordered_3PC # make sure that the second View Change happened on the lagging node waitForViewChange(looper, [lagging_node], expectedViewNo=initial_view_no + 2, customTimeout=20) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) # make sure that the pool is functional sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_catchup_to_next_view_during_view_change_by_primary( txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward): ''' 1) Lagging node is a primary for view=1 2) All nodes except the lagging one start a view change (to view=1) 3) The nodes can not finish it on time since the Primary for view=1 is lagging 4) All nodes except the lagging one go to view=2 then 5) All nodes except the lagging one order txns on view=2 6) Lagging node gets InstanceChanges for view=1 => it changes to view=2, and catches up till txns from view=2 7) Lagging node gets InstanceChanges for view=2 => it changes to view=2 8) Make sure that the lagging node is up to date, and can participate in consensus ''' lagging_node = txnPoolNodeSet[1] other_nodes = list(set(txnPoolNodeSet) - {lagging_node}) initial_view_no = checkViewNoForNodes(txnPoolNodeSet) initial_last_ordered = lagging_node.master_last_ordered_3PC with delay_rules(lagging_node.nodeIbStasher, delay_for_view(viewNo=2)): with delay_rules(lagging_node.nodeIbStasher, delay_for_view(viewNo=0), delay_for_view(viewNo=1)): # view change to viewNo=2 since a primary for viewNo=1 is a lagging node trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, other_nodes, expectedViewNo=initial_view_no + 2, customTimeout=40) checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(3)) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) # order some txns sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) assert initial_view_no == lagging_node.viewNo assert initial_last_ordered == lagging_node.master_last_ordered_3PC assert len(lagging_node.master_replica._ordering_service. requestQueues[DOMAIN_LEDGER_ID]) > 0 # make sure that the first View Change happened on lagging node waitForViewChange(looper, [lagging_node], expectedViewNo=initial_view_no + 1, customTimeout=20) assert initial_view_no + 1 == lagging_node.viewNo # make sure that the second View Change happened on lagging node waitForViewChange(looper, [lagging_node], expectedViewNo=initial_view_no + 2, customTimeout=20) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) # make sure that the pool is functional sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_revert_for_all_after_view_change(looper, helpers, nodeSetWithIntegratedTokenPlugin, sdk_pool_handle, fees_set, mint_tokens, addresses, fees): node_set = nodeSetWithIntegratedTokenPlugin current_amount = get_amount_from_token_txn(mint_tokens) seq_no = get_seq_no(mint_tokens) reverted_node = nodeSetWithIntegratedTokenPlugin[-1] current_amount, seq_no, _ = send_and_check_nym_with_fees( helpers, fees_set, seq_no, looper, addresses, current_amount) current_amount, seq_no, _ = send_and_check_transfer( helpers, addresses, fees, looper, current_amount, seq_no) ensure_all_nodes_have_same_data(looper, node_set) with delay_rules([n.nodeIbStasher for n in node_set], cDelay(), pDelay()): len_batches_before = len( reverted_node.master_replica._ordering_service.batches) current_amount, seq_no, resp1 = send_and_check_transfer( helpers, addresses, fees, looper, current_amount, seq_no, check_reply=False) current_amount, seq_no, resp2 = send_and_check_nym_with_fees( helpers, fees_set, seq_no, looper, addresses, current_amount, check_reply=False) looper.runFor( waits.expectedPrePrepareTime( len(nodeSetWithIntegratedTokenPlugin))) len_batches_after = len( reverted_node.master_replica._ordering_service.batches) """ Checks, that we have a 2 new batches """ assert len_batches_after - len_batches_before == 2 trigger_view_change(node_set) ensure_view_change_complete(looper, nodeSetWithIntegratedTokenPlugin) looper.run( eventually( lambda: assertExp(reverted_node.mode == Mode.participating))) ensure_all_nodes_have_same_data(looper, node_set) sdk_get_and_check_replies(looper, resp1) sdk_get_and_check_replies(looper, resp2) send_and_check_nym_with_fees(helpers, fees_set, seq_no, looper, addresses, current_amount) ensure_all_nodes_have_same_data(looper, node_set)
def test_view_change_triggered_after_ordering(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQ_COUNT) current_view_no = checkViewNoForNodes(txnPoolNodeSet) trigger_view_change(txnPoolNodeSet, current_view_no + 1) ensureElectionsDone(looper, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_view_change_during_alternating_unstash(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_trustee, sdk_wallet_new_steward, tconf): slow_node = txnPoolNodeSet[-1] other_nodes = txnPoolNodeSet[:-1] slow_stasher = slow_node.nodeIbStasher other_stashers = [n.nodeIbStasher for n in other_nodes] all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet] # Ensure that pool has expected 3PC state for node in txnPoolNodeSet: assert node.master_replica.last_ordered_3pc == (0, 1) # Prevent ordering of some requests start_delaying(all_stashers, delay_3pc_after(0, 7, Prepare, Commit)) # Stop ordering on slow node and send requests slow_node_after_5 = start_delaying(slow_stasher, delay_3pc_after(0, 5, Commit)) slow_node_until_5 = start_delaying(slow_stasher, delay_3pc_after(0, 0)) reqs_view_0 = sdk_send_alternating_ledgers_requests(looper, sdk_pool_handle, sdk_wallet_trustee, sdk_wallet_new_steward, 8) # Make pool order first 2 batches and pause pool_after_3 = start_delaying(other_stashers, delay_3pc_after(0, 3)) looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 3)) # Start catchup, continue ordering everywhere (except two last batches on slow node) with delay_rules(slow_stasher, cr_delay()): slow_node._do_start_catchup(just_started=False) looper.run(eventually(check_catchup_is_started, slow_node)) stop_delaying_and_process(pool_after_3) looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 7)) # Finish catchup and continue processing on slow node looper.run(eventually(check_catchup_is_finished, slow_node)) stop_delaying_and_process(slow_node_until_5) looper.run(eventually(check_nodes_ordered_till, [slow_node], 0, 5)) # Start view change and allow slow node to get remaining commits with delay_rules(all_stashers, icDelay()): trigger_view_change(txnPoolNodeSet) looper.runFor(0.1) stop_delaying_and_process(slow_node_after_5) # Ensure that expected number of requests was ordered replies = sdk_get_replies(looper, reqs_view_0) for req in replies[:6]: sdk_check_reply(req) # Ensure that everything is ok ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_start_view_change_by_vc_msgs(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): delayed_node = txnPoolNodeSet[-1] rest_nodes = txnPoolNodeSet[:-1] with delay_rules_without_processing(delayed_node.nodeIbStasher, icDelay()): current_view_no = checkViewNoForNodes(txnPoolNodeSet) trigger_view_change(txnPoolNodeSet) looper.run(eventually(checkViewNoForNodes, rest_nodes, current_view_no + 1)) ensureElectionsDone(looper, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_process_three_phase_msg_and_stashed_future_view( txnPoolNodeSet, looper, tconf, sdk_pool_handle, sdk_wallet_steward): """ 1. Delay ViewChangeDone messages for the slow_node. 2. Start view change on all nodes. 3. Order a new request. 4. Check that slow_node could not order this request and stashed all 3pc messages and other nodes ordered. 6. Reset delays. 7. Check that the last request is ordered on the slow_node and stashed messages were removed. """ slow_node = txnPoolNodeSet[-1] fast_nodes = txnPoolNodeSet[:-1] view_no = slow_node.viewNo old_stashed = { inst_id: r.stasher.stash_size(STASH_VIEW_3PC) for inst_id, r in slow_node.replicas.items() } with delay_rules([ slow_node.nodeIbStasher, ], msg_rep_delay(types_to_delay=[PREPREPARE, PREPARE, COMMIT])): with delay_rules([ slow_node.nodeIbStasher, ], nv_delay()): trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, fast_nodes, expectedViewNo=view_no + 1, customTimeout=2 * tconf.NEW_VIEW_TIMEOUT) ensureElectionsDone(looper=looper, nodes=fast_nodes, instances_list=range( fast_nodes[0].requiredNumberOfInstances)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1) assert slow_node.view_change_in_progress # 1 - pre-prepare msg # (len(txnPoolNodeSet) - 2) - prepare msgs # (len(txnPoolNodeSet) - 1) - commit msgs stashed_master_messages = 2 * (1 + (len(txnPoolNodeSet) - 2) + (len(txnPoolNodeSet) - 1)) assert slow_node.master_replica.stasher.stash_size( STASH_VIEW_3PC) == old_stashed[0] + stashed_master_messages def chk(): for inst_id, r in slow_node.replicas.items(): assert r.last_ordered_3pc[1] == 2 assert r.stasher.stash_size(STASH_VIEW_3PC) == 0 looper.run(eventually(chk)) waitNodeDataEquality(looper, slow_node, *fast_nodes)
def test_delay_IC_for_next_primary(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): current_view_no = checkViewNoForNodes(txnPoolNodeSet) next_primary_name = get_next_primary_name(txnPoolNodeSet, current_view_no + 1) next_primary = [n for n in txnPoolNodeSet if n.name == next_primary_name][0] rest_nodes = list(set(txnPoolNodeSet) - {next_primary}) with delay_rules_without_processing(next_primary.nodeIbStasher, icDelay()): trigger_view_change(txnPoolNodeSet) looper.run(eventually(checkViewNoForNodes, rest_nodes, current_view_no + 1)) ensureElectionsDone(looper, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) assert next_primary.master_replica.isPrimary
def test_select_primary_after_removed_backup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ Check correct order of primaries on backup replicas """ node = txnPoolNodeSet[0] start_replicas_count = node.replicas.num_replicas instance_id = start_replicas_count - 1 node.replicas.remove_replica(instance_id) trigger_view_change(txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) for n in txnPoolNodeSet: assert n.requiredNumberOfInstances == n.replicas.num_replicas for inst_id in range(n.requiredNumberOfInstances): assert n.replicas[inst_id].primaryName == \ txnPoolNodeSet[inst_id + 1].name + ":" + str(inst_id)
def test_replica_removing_with_primary_disconnected(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, tdir, allPluginsPath): """ 1. Remove backup primary node. 2. Check that replicas with the disconnected primary were removed. 3. Recover the removed node. 4. Start View Change. 5. Check that all replicas were restored. """ start_replicas_count = txnPoolNodeSet[0].replicas.num_replicas instance_to_remove = 1 node = txnPoolNodeSet[instance_to_remove] # remove backup primary node. disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node) txnPoolNodeSet.remove(node) looper.removeProdable(node) # check that replicas were removed def check_replica_removed_on_all_nodes(): for node in txnPoolNodeSet: check_replica_removed(node, start_replicas_count, instance_to_remove) looper.run( eventually(check_replica_removed_on_all_nodes, timeout=tconf.TolerateBackupPrimaryDisconnection * 4)) assert not node.monitor.isMasterDegraded() assert len(node.requests) == 0 # recover the removed node node = start_stopped_node(node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet.append(node) looper.run(checkNodesConnected(txnPoolNodeSet)) # start View Change trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=1, customTimeout=2 * tconf.NEW_VIEW_TIMEOUT) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) # check that all replicas were restored assert start_replicas_count == node.replicas.num_replicas
def test_view_change_with_next_primary_stopped(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): old_view_no = checkViewNoForNodes(txnPoolNodeSet) next_primary = get_next_primary_name(txnPoolNodeSet, old_view_no + 1) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, next_primary) remaining_nodes = [ node for node in txnPoolNodeSet if node.name != next_primary ] trigger_view_change(remaining_nodes, old_view_no + 1) ensureElectionsDone(looper, remaining_nodes, instances_list=range(2), customTimeout=15) sdk_ensure_pool_functional(looper, remaining_nodes, sdk_wallet_client, sdk_pool_handle) current_view_no = checkViewNoForNodes(remaining_nodes) assert current_view_no == old_view_no + 2
def test_view_change_with_next_primary_stopped_and_one_node_lost_commit(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, limitTestRunningTime): current_view_no = checkViewNoForNodes(txnPoolNodeSet) next_primary = get_next_primary_name(txnPoolNodeSet, current_view_no + 1) delayed_node = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet) if r.node.name != next_primary][0] other_nodes = [n for n in txnPoolNodeSet if n.name != next_primary] with delay_rules_without_processing(delayed_node.nodeIbStasher, cDelay()): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, next_primary) trigger_view_change(other_nodes) ensureElectionsDone(looper, other_nodes, instances_list=range(2), customTimeout=15) ensure_all_nodes_have_same_data(looper, other_nodes) sdk_ensure_pool_functional(looper, other_nodes, sdk_wallet_client, sdk_pool_handle) ensure_all_nodes_have_same_data(looper, other_nodes)
def test_view_change_timeout_reset_on_next_view(txnPoolNodeSet, looper, tconf): # Check that all nodes are in view 0 assert all(n.viewNo == 0 for n in txnPoolNodeSet) stashers = [n.nodeIbStasher for n in txnPoolNodeSet] with delay_rules(stashers, nv_delay()): # Start first view change trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=1) looper.runFor(0.6 * NEW_VIEW_TIMEOUT) # Start second view change trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=2) looper.runFor(0.6 * NEW_VIEW_TIMEOUT) # Ensure only 2 view changes happened ensureElectionsDone(looper, txnPoolNodeSet) for n in txnPoolNodeSet: assert n.viewNo == 2
def test_view_change_on_performance_degraded(looper, txnPoolNodeSet, viewNo, sdk_pool_handle, sdk_wallet_steward): """ Test that a view change is done when the performance of master goes down Send multiple requests from the client and delay some requests by master instance so that there is a view change. All nodes will agree that master performance degraded """ old_primary_node = get_master_primary_node(list(txnPoolNodeSet)) trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=viewNo + 1) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) new_primary_node = get_master_primary_node(list(txnPoolNodeSet)) assert old_primary_node.name != new_primary_node.name waitNodeDataEquality(looper, *txnPoolNodeSet)
def test_view_change_with_lost_new_view(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, lost_count): ''' Skip processing of lost_count Message Responses with NewView in view change; test makes sure that the node eventually finishes view change ''' node_to_disconnect = txnPoolNodeSet[-1] initial_view_no = txnPoolNodeSet[0].viewNo sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) def unpatch_after_call(msg, frm): global call_count call_count += 1 if call_count >= lost_count: # unpatch sendToReplica for NewView after lost_count calls node_to_disconnect.nodeMsgRouter.add( (NewView, node_to_disconnect.sendToReplica)) call_count = 0 # patch sendToReplica for routing NewView node_to_disconnect.nodeMsgRouter.add((NewView, unpatch_after_call)) # trigger view change on all nodes trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=initial_view_no + 1) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, customTimeout=tconf.NEW_VIEW_TIMEOUT * (call_count + 1) + 5) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) # make sure that the pool is functional sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_revert_works_for_fees_after_view_change( looper, helpers, nodeSetWithIntegratedTokenPlugin, sdk_pool_handle, fees_set, mint_tokens, addresses, fees): node_set = nodeSetWithIntegratedTokenPlugin current_amount = get_amount_from_token_txn(mint_tokens) seq_no = get_seq_no(mint_tokens) reverted_node = nodeSetWithIntegratedTokenPlugin[-1] current_amount, seq_no, _ = send_and_check_nym_with_fees( helpers, fees_set, seq_no, looper, addresses, current_amount) current_amount, seq_no, _ = send_and_check_transfer( helpers, addresses, fees, looper, current_amount, seq_no) with delay_rules_without_processing(reverted_node.nodeIbStasher, delay_3pc(view_no=0, msgs=Commit)): len_batches_before = len( reverted_node.master_replica._ordering_service.batches) current_amount, seq_no, _ = send_and_check_transfer( helpers, addresses, fees, looper, current_amount, seq_no) current_amount, seq_no, _ = send_and_check_nym_with_fees( helpers, fees_set, seq_no, looper, addresses, current_amount) looper.runFor( waits.expectedPrePrepareTime( len(nodeSetWithIntegratedTokenPlugin))) len_batches_after = len( reverted_node.master_replica._ordering_service.batches) """ Checks, that we have a 2 new batches """ assert len_batches_after - len_batches_before == 2 trigger_view_change(node_set) ensure_view_change(looper, nodeSetWithIntegratedTokenPlugin) looper.run( eventually( lambda: assertExp(reverted_node.mode == Mode.participating))) ensure_all_nodes_have_same_data(looper, node_set) send_and_check_nym_with_fees(helpers, fees_set, seq_no, looper, addresses, current_amount) ensure_all_nodes_have_same_data(looper, node_set)
def do_view_change_with_unaligned_prepare_certificates(slow_nodes, nodes, looper, sdk_pool_handle, sdk_wallet_client): """ Perform view change with some nodes reaching lower last prepared certificate than others. With current implementation of view change this can result with view change taking a lot of time. """ fast_nodes = [n for n in nodes if n not in slow_nodes] all_stashers = [n.nodeIbStasher for n in nodes] slow_stashers = [n.nodeIbStasher for n in slow_nodes] # Delay some PREPAREs and all COMMITs with delay_rules(slow_stashers, pDelay()): with delay_rules(all_stashers, cDelay()): # Send request request = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) # Wait until this request is prepared on fast nodes looper.run( eventually(check_last_prepared_certificate, fast_nodes, (0, 1))) # Make sure its not prepared on slow nodes looper.run( eventually(check_last_prepared_certificate, slow_nodes, None)) # Trigger view change trigger_view_change(nodes) # Now commits are processed # Wait until view change is complete looper.run(eventually(check_view_change_done, nodes, 1, timeout=60)) # Finish request gracefully sdk_get_reply(looper, request) ensure_all_nodes_have_same_data(looper, nodes) sdk_ensure_pool_functional(looper, nodes, sdk_wallet_client, sdk_pool_handle)
def do_test_replica_removing_with_backup_degraded(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf): """ Node will change view even though it does not find the master to be degraded when a quorum of nodes agree that master performance degraded """ start_replicas_count = txnPoolNodeSet[0].replicas.num_replicas view_no = txnPoolNodeSet[0].viewNo instance_to_remove = 1 stashers = [node.nodeIbStasher for node in txnPoolNodeSet] with delay_rules(stashers, cDelay(delay=sys.maxsize, instId=instance_to_remove)): sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=30, num_batches=15) # check that replicas were removed def check_replica_removed_on_all_nodes(inst_id=instance_to_remove): for n in txnPoolNodeSet: check_replica_removed(n, start_replicas_count, inst_id) assert not n.monitor.isMasterDegraded() looper.run(eventually(check_replica_removed_on_all_nodes, timeout=120)) # start View Change trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=view_no + 1, customTimeout=2 * tconf.NEW_VIEW_TIMEOUT) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) # check that all replicas were restored assert all(start_replicas_count == node.replicas.num_replicas for node in txnPoolNodeSet)
def do_view_change_with_pending_request_and_one_fast_node( fast_node, nodes, looper, sdk_pool_handle, sdk_wallet_client): """ Perform view change while processing request, with one node receiving commits much sooner than others. With current implementation of view change this will result in corrupted state of fast node """ fast_stasher = fast_node.nodeIbStasher slow_nodes = [n for n in nodes if n != fast_node] slow_stashers = [n.nodeIbStasher for n in slow_nodes] # Get last prepared certificate in pool lpc = last_prepared_certificate(nodes) # Get pool current view no view_no = lpc[0] # Delay all COMMITs with delay_rules(slow_stashers, cDelay()): with delay_rules(fast_stasher, cDelay()): # Send request request = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) # Wait until this request is prepared on N-f nodes looper.run( eventually(check_last_prepared_certificate_on_quorum, nodes, (lpc[0], lpc[1] + 1))) # Trigger view change trigger_view_change(nodes) # Now commits are processed on fast node # Wait until view change is complete looper.run( eventually(check_view_change_done, nodes, view_no + 1, timeout=60)) # Finish request gracefully sdk_get_reply(looper, request)
def test_replica_clear_collections_after_view_change( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, tdir, allPluginsPath, sdk_wallet_steward, chkFreqPatched, reqs_for_checkpoint): """ 1. Delay commits on one instance. 2. Order a transaction on the master. 3. Do View Change. 4. Send 2 batches for finalize checkpoint and cleaning requests queues. (1 batch is sent automatically to propagate primaries) 5. Check that requests from node contains all items from requestsQueue. """ stashers = [n.nodeIbStasher for n in txnPoolNodeSet] with delay_rules(stashers, cDelay(delay=sys.maxsize, instId=1)): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1) trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=1, customTimeout=2 * tconf.NEW_VIEW_TIMEOUT) # + 1 because of lastPrePrepareSeqNo was not dropped after view_change sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=reqs_for_checkpoint + 1) def check_request_queues(): assert len(txnPoolNodeSet[0].requests) == 1 for n in txnPoolNodeSet: assert len(n.replicas[1]._ordering_service. requestQueues[DOMAIN_LEDGER_ID]) == 0 looper.run(eventually(check_request_queues))
def test_instance_change_before_vc(looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward): master_node = get_master_primary_node(txnPoolNodeSet) old_view = master_node.viewNo expected_view_no = old_view + 1 panic_node = txnPoolNodeSet[-1] send_test_instance_change(panic_node) def has_inst_chng_in_validator_info(): for node in txnPoolNodeSet: latest_info = node._info_tool.info ic_queue = latest_info['Node_info']['View_change_status'][ 'IC_queue'] assert expected_view_no in ic_queue reason = ic_queue[expected_view_no]["Voters"][ panic_node.name]['reason'] assert reason == Suspicions.DEBUG_FORCE_VIEW_CHANGE.code looper.run(eventually(has_inst_chng_in_validator_info)) trigger_view_change(txnPoolNodeSet) looper.run( eventually(checkViewNoForNodes, txnPoolNodeSet, expected_view_no, retryWait=1, timeout=tconf.NEW_VIEW_TIMEOUT)) waitNodeDataEquality(looper, master_node, *txnPoolNodeSet) def is_inst_chngs_cleared(): for node in txnPoolNodeSet: latest_info = node._info_tool.info assert latest_info['Node_info']['View_change_status'][ 'IC_queue'] == {} looper.run(eventually(is_inst_chngs_cleared))
def test_primary_send_incorrect_pp(looper, txnPoolNodeSet, tconf, allPluginsPath, sdk_pool_handle, sdk_wallet_steward, monkeypatch): """ Test steps: Delay message requests with PrePrepares on `slow_node` Patch sending for PrePrepare on the `malicious_primary` to send an invalid PrePrepare to slow_node Order a new request Start a view change Make sure it's finished on all nodes Make sure that the lagging node has same data with other nodes """ start_view_no = txnPoolNodeSet[0].viewNo slow_node = txnPoolNodeSet[-1] malicious_primary = txnPoolNodeSet[0] other_nodes = [ n for n in txnPoolNodeSet if n not in [slow_node, malicious_primary] ] timeout = waits.expectedPoolCatchupTime(nodeCount=len(txnPoolNodeSet)) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=timeout) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1) old_sender = malicious_primary.master_replica._ordering_service._send def patched_sender(msg, dst=None, stat=None): if isinstance(msg, PrePrepare) and msg: old_sender(msg, [n.name for n in other_nodes], stat) pp_dict = msg._asdict() pp_dict["ppTime"] += 1 pp = PrePrepare(**pp_dict) old_sender(pp, [slow_node.name], stat) monkeypatch.undo() monkeypatch.setattr(malicious_primary.master_replica._ordering_service, '_send', patched_sender) monkeypatch.setattr(slow_node.master_replica._ordering_service, '_validate_applied_pre_prepare', lambda a, b, c: None) with delay_rules(slow_node.nodeIbStasher, msg_rep_delay(types_to_delay=[PREPREPARE])): preprepare_process_num = slow_node.master_replica._ordering_service.spylog.count( OrderingService.process_preprepare) resp_task = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_steward) def chk(): assert preprepare_process_num + 1 == slow_node.master_replica._ordering_service.spylog.count( OrderingService.process_preprepare) looper.run(eventually(chk)) _, j_resp = sdk_get_and_check_replies(looper, [resp_task])[0] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1) trigger_view_change(txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=start_view_no + 1) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, instances_list=[0, 1]) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_replica_removing_after_node_started(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, tdir, allPluginsPath, sdk_wallet_steward): """ 1. Remove backup primary node. 2. Check that replicas with the disconnected primary were removed. 3. Add new node 4. Check that in the new node the replica with the disconnected primary were removed. 3. Recover the removed node. 4. Start View Change. 5. Check that all replicas were restored. """ start_view_no = txnPoolNodeSet[0].viewNo start_replicas_count = txnPoolNodeSet[0].replicas.num_replicas instance_to_remove = txnPoolNodeSet[0].requiredNumberOfInstances - 1 removed_primary_node = txnPoolNodeSet[instance_to_remove] # remove backup primary node. disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, removed_primary_node) txnPoolNodeSet.remove(removed_primary_node) looper.removeProdable(removed_primary_node) # check that replicas were removed def check_replica_removed_on_all_nodes(inst_id=instance_to_remove): for node in txnPoolNodeSet: check_replica_removed(node, start_replicas_count, inst_id) assert not node.monitor.isMasterDegraded() assert len(node.requests) == 0 looper.run( eventually(check_replica_removed_on_all_nodes, timeout=tconf.TolerateBackupPrimaryDisconnection * 2)) new_steward_wallet, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, "test_steward", "test_node", tdir, tconf, allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) instance_to_remove -= 1 waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=start_view_no + 1) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup']) looper.run( eventually(check_replica_removed, new_node, start_replicas_count, instance_to_remove, timeout=tconf.TolerateBackupPrimaryDisconnection * 2)) # recover the removed node removed_primary_node = start_stopped_node(removed_primary_node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet.append(removed_primary_node) looper.run(checkNodesConnected(txnPoolNodeSet)) # start View Change trigger_view_change(txnPoolNodeSet) ensureElectionsDone( looper=looper, nodes=txnPoolNodeSet, instances_list=range(txnPoolNodeSet[0].requiredNumberOfInstances), customTimeout=tconf.TolerateBackupPrimaryDisconnection * 2) assert start_replicas_count == removed_primary_node.replicas.num_replicas