def test_future_primaries_replicas_increase(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards, tdir, tconf, allPluginsPath): # Don't delete NodeStates, so we could check them. global old_commit old_commit = txnPoolNodeSet[0].write_manager.future_primary_handler.commit_batch for node in txnPoolNodeSet: node.write_manager.future_primary_handler.commit_batch = lambda three_pc_batch, prev_handler_result=None: 0 initial_primaries = copy.copy(txnPoolNodeSet[0].primaries) last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc starting_view_number = checkViewNoForNodes(txnPoolNodeSet) # Increase replicas count add_new_node(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[0], tdir, tconf, allPluginsPath) new_view_no = checkViewNoForNodes(txnPoolNodeSet) assert new_view_no == starting_view_number + 1 # "seq_no + 2" because 1 domain and 1 pool txn. node = txnPoolNodeSet[0] with delay_rules(node.nodeIbStasher, cDelay()): req = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[0], 1)[0][0] req = Request(**req) three_pc_batch = ThreePcBatch(DOMAIN_LEDGER_ID, 0, 0, 1, time.time(), randomString(), randomString(), ['a', 'b', 'c'], [req.digest], pp_digest='') primaries = node.write_manager.future_primary_handler.post_batch_applied(three_pc_batch) assert len(primaries) == len(initial_primaries) + 1 assert len(primaries) == len(node.primaries)
def test_no_view_change_until_synced(txnPoolNodeSet, looper, mode): # emulate catchup by setting non-synced status for node in txnPoolNodeSet: node.mode = mode check_instance_change_count(txnPoolNodeSet, 0) # start View Change old_view_no = checkViewNoForNodes(txnPoolNodeSet) old_meths = do_view_change(txnPoolNodeSet) for node in txnPoolNodeSet: node.view_changer.sendInstanceChange(old_view_no + 1) # make sure View Change is not started check_no_view_change(looper, txnPoolNodeSet) assert old_view_no == checkViewNoForNodes(txnPoolNodeSet) # emulate finishing of catchup by setting Participating status revert_do_view_change(txnPoolNodeSet, old_meths) for node in txnPoolNodeSet: node.mode = Mode.participating # make sure that View Change happened waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=old_view_no + 1) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
def test_no_view_change_until_synced(txnPoolNodeSet, looper, mode): # emulate catchup by setting non-synced status for node in txnPoolNodeSet: node.mode = mode check_instance_change_count(txnPoolNodeSet, 0) # start View Change old_view_no = checkViewNoForNodes(txnPoolNodeSet) old_meths = do_view_change(txnPoolNodeSet) for node in txnPoolNodeSet: node.view_changer.sendInstanceChange(old_view_no + 1) # make sure View Change is not started check_no_view_change(looper, txnPoolNodeSet) assert old_view_no == checkViewNoForNodes(txnPoolNodeSet) # emulate finishing of catchup by setting Participating status revert_do_view_change(txnPoolNodeSet, old_meths) for node in txnPoolNodeSet: node.mode = Mode.participating # make sure that View Change happened waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=old_view_no + 1) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
def test_future_primaries_replicas_decrease(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards, tdir, tconf, allPluginsPath): assert len(txnPoolNodeSet) == 7 initial_primaries = copy.copy(txnPoolNodeSet[0].primaries) last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc starting_view_number = checkViewNoForNodes(txnPoolNodeSet) # Decrease replicas count demote_node(looper, sdk_wallet_stewards[-1], sdk_pool_handle, txnPoolNodeSet[-2]) txnPoolNodeSet.remove(txnPoolNodeSet[-2]) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) new_view_no = checkViewNoForNodes(txnPoolNodeSet) assert new_view_no == starting_view_number + 1 state = txnPoolNodeSet[0].write_manager.future_primary_handler.node_states[ -1] assert len(state.primaries) + 1 == len(initial_primaries) assert len(state.primaries) == len(txnPoolNodeSet[0].primaries) for node in txnPoolNodeSet: node.write_manager.future_primary_handler.commit_batch = old_commit
def test_future_primaries_replicas_increase(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards, tdir, tconf, allPluginsPath): # Don't delete NodeStates, so we could check them. global old_commit old_commit = txnPoolNodeSet[ 0].write_manager.future_primary_handler.commit_batch for node in txnPoolNodeSet: node.write_manager.future_primary_handler.commit_batch = lambda three_pc_batch, prev_handler_result=None: 0 initial_primaries = copy.copy(txnPoolNodeSet[0].primaries) last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc starting_view_number = checkViewNoForNodes(txnPoolNodeSet) # Increase replicas count add_new_node(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[0], tdir, tconf, allPluginsPath) new_view_no = checkViewNoForNodes(txnPoolNodeSet) assert new_view_no == starting_view_number + 1 # "seq_no + 2" because 1 domain and 1 pool txn. state = txnPoolNodeSet[0].write_manager.future_primary_handler.node_states[ -1] assert len(state.primaries) == len(initial_primaries) + 1 assert len(state.primaries) == len(txnPoolNodeSet[0].primaries)
def test_lag_less_then_catchup(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): delayed_node = txnPoolNodeSet[-1] other_nodes = list(set(txnPoolNodeSet) - {delayed_node}) checkViewNoForNodes(txnPoolNodeSet) last_ordered_before = delayed_node.master_replica.last_ordered_3pc with delay_rules_without_processing(delayed_node.nodeIbStasher, cDelay()): # Send txns for stable checkpoint sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ) # Check, that all of not slowed nodes has a stable checkpoint for n in other_nodes: assert n.master_replica._consensus_data.stable_checkpoint == CHK_FREQ # Send another txn. This txn will be reordered after view_change sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) trigger_view_change(txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) assert delayed_node.master_replica.last_ordered_3pc == last_ordered_before # Send txns for stabilize checkpoint on other nodes sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ - 1) pool_pp_seq_no = get_pp_seq_no(other_nodes) looper.run( eventually(lambda: assertExp(delayed_node.master_replica. last_ordered_3pc[1] == pool_pp_seq_no))) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_new_primary_lagging_behind(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, tconf): initial_view_no = checkViewNoForNodes(txnPoolNodeSet) next_primary_name = get_next_primary_name(txnPoolNodeSet, initial_view_no + 1) next_primary = [n for n in txnPoolNodeSet if n.name == next_primary_name][0] other_nodes = [n for n in txnPoolNodeSet if n != next_primary] expected_primary_name = get_next_primary_name(txnPoolNodeSet, initial_view_no + 2) # Next primary cannot stabilize 1 checkpoint with delay_rules(next_primary.nodeIbStasher, cDelay(), pDelay()): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ) ensure_view_change(looper, txnPoolNodeSet) looper.run( eventually(check_not_in_view_change, txnPoolNodeSet, timeout=2 * tconf.NEW_VIEW_TIMEOUT)) ensureElectionsDone(looper=looper, nodes=other_nodes, customTimeout=2 * tconf.NEW_VIEW_TIMEOUT, instances_list=[0, 1]) assert next_primary_name != expected_primary_name assert checkViewNoForNodes(txnPoolNodeSet) == initial_view_no + 2 # send CHK_FREQ reqs so that slow node will start catch-up sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=30)
def test_future_primaries_replicas_decrease(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards, tdir, tconf, allPluginsPath): assert len(txnPoolNodeSet) == 7 initial_primaries = copy.copy(txnPoolNodeSet[0].primaries) last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc starting_view_number = checkViewNoForNodes(txnPoolNodeSet) # Decrease replicas count demote_node(looper, sdk_wallet_stewards[-1], sdk_pool_handle, txnPoolNodeSet[-2]) txnPoolNodeSet.remove(txnPoolNodeSet[-2]) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) new_view_no = checkViewNoForNodes(txnPoolNodeSet) assert new_view_no == starting_view_number + 1 node = txnPoolNodeSet[0] with delay_rules(node.nodeIbStasher, cDelay()): req = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[0], 1)[0][0] req = Request(**req) three_pc_batch = ThreePcBatch(DOMAIN_LEDGER_ID, 0, 0, 1, time.time(), randomString(), randomString(), ['a', 'b', 'c'], [req.digest], pp_digest='') primaries = node.write_manager.future_primary_handler.post_batch_applied(three_pc_batch) assert len(primaries) + 1 == len(initial_primaries) assert len(primaries) == len(txnPoolNodeSet[0].primaries) for node in txnPoolNodeSet: node.write_manager.future_primary_handler.commit_batch = old_commit
def test_no_propagated_future_view_change_until_synced(txnPoolNodeSet, looper, mode): # the last node is a lagging one, which will receive ViewChangeDone messages for future view viewNo = checkViewNoForNodes(txnPoolNodeSet) lagged_node_index = (viewNo + 3) % len(txnPoolNodeSet) lagged_node = txnPoolNodeSet[lagged_node_index] other_nodes = list(set(txnPoolNodeSet) - {lagged_node}) # emulate catchup by setting non-synced status lagged_node.mode = mode old_view_no = checkViewNoForNodes([lagged_node]) check_future_vcd_count(lagged_node, 0) # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change with delay_rules(lagged_node.nodeIbStasher, icDelay()): # make sure that View Change happened on all nodes but the lagging one ensure_view_change(looper, other_nodes) checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, numInstances=2) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) check_no_view_change(looper, lagged_node) assert old_view_no == checkViewNoForNodes([lagged_node]) # emulate finishing of catchup by setting Participating status lagged_node.mode = Mode.participating # make sure that View Change happened on lagging node waitForViewChange(looper, [lagged_node], expectedViewNo=old_view_no + 1, customTimeout=10) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
def test_no_view_change_until_synced(txnPoolNodeSet, looper, mode): # emulate catchup by setting non-synced status for node in txnPoolNodeSet: node.mode = mode check_stashed_instance_changes(txnPoolNodeSet, 0) # start View Change old_view_no = checkViewNoForNodes(txnPoolNodeSet) old_meths = do_view_change(txnPoolNodeSet) for node in txnPoolNodeSet: vct_service = node.master_replica._view_change_trigger_service vct_service._send_instance_change(old_view_no + 1, Suspicions.PRIMARY_DEGRADED) # make sure View Change is not started check_no_view_change(looper, txnPoolNodeSet) assert old_view_no == checkViewNoForNodes(txnPoolNodeSet) # emulate finishing of catchup by setting Participating status revert_do_view_change(txnPoolNodeSet, old_meths) for node in txnPoolNodeSet: node.mode = Mode.participating node.master_replica.stasher.process_all_stashed(STASH_CATCH_UP) # make sure that View Change happened waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=old_view_no + 1) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
def test_propagate_primary_after_primary_restart_view_0( looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath): """ Delay instance change msgs to prevent view change during primary restart to test propagate primary for primary node. ppSeqNo should be > 0 to be able to check that propagate primary restores all indexes correctly case viewNo == 0 """ sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) old_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (old_ppseqno > 0) old_viewNo = checkViewNoForNodes(txnPoolNodeSet) old_primary = get_master_primary_node(txnPoolNodeSet) delay_instance_change(txnPoolNodeSet, IC_DELAY_SEC) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_primary, stopNode=True) looper.removeProdable(old_primary) logger.info("Restart node {}".format(old_primary)) restartedNode = start_stopped_node(old_primary, looper, tconf, tdir, allPluginsPath, delay_instance_change_msgs=False) idx = [ i for i, n in enumerate(txnPoolNodeSet) if n.name == restartedNode.name ][0] txnPoolNodeSet[idx] = restartedNode restartedNode.nodeIbStasher.delay(icDelay(IC_DELAY_SEC)) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) new_viewNo = checkViewNoForNodes(txnPoolNodeSet) assert (new_viewNo == old_viewNo) new_primary = get_master_primary_node(txnPoolNodeSet) assert (new_primary.name == old_primary.name) # check ppSeqNo the same _get_ppseqno(txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) new_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (new_ppseqno > old_ppseqno)
def test_last_ordered_3pc_reset_if_more_than_new_view(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ Check that if last_ordered_3pc's viewNo on a Replica is greater than the new viewNo after view change, then last_ordered_3pc is reset to (0,0). It can be that last_ordered_3pc was set for the previous view, since it's set during catch-up Example: a Node has last_ordered = (1, 300), and then the whole pool except this node restarted. The new viewNo is 0, but last_ordered is (1, 300), so all new requests will be discarded by this Node if we don't reset last_ordered_3pc """ old_view_no = checkViewNoForNodes(txnPoolNodeSet) for node in txnPoolNodeSet: node.master_replica.last_ordered_3pc = (old_view_no + 2, 100) ensure_view_change_complete(looper, txnPoolNodeSet, customTimeout=60) view_no = checkViewNoForNodes(txnPoolNodeSet) for node in txnPoolNodeSet: assert (view_no, 0) == node.master_replica.last_ordered_3pc # Make sure the pool is working sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_no_propagated_future_view_change_while_view_change( txnPoolNodeSet, looper): # the last node is a lagging one, which will receive ViewChangeDone messages for future view viewNo = checkViewNoForNodes(txnPoolNodeSet) lagged_node = txnPoolNodeSet[-1] other_nodes = list(set(txnPoolNodeSet) - {lagged_node}) # emulate view change in progress lagged_node.view_changer.view_change_in_progress = True old_view_no = checkViewNoForNodes([lagged_node]) initial_vhdc = \ lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__) # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change with delay_rules(lagged_node.nodeIbStasher, icDelay()): # make sure that View Change happened on all nodes but the lagging one ensure_view_change(looper, other_nodes) checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, numInstances=2) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) # check that lagged node recived 3 Future VCD, but didn't start new view change assert len(other_nodes) + initial_vhdc ==\ lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__) assert old_view_no == checkViewNoForNodes([lagged_node])
def test_demote_promote_restart_after_promotion_7_nodes(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tdir, tconf, allPluginsPath): demoted_node = txnPoolNodeSet[-1] rest_nodes = [n for n in txnPoolNodeSet if n != demoted_node] starting_view_no = checkViewNoForNodes(txnPoolNodeSet) demote_node(looper, sdk_wallet_steward, sdk_pool_handle, demoted_node) waitForViewChange(looper, rest_nodes, expectedViewNo=starting_view_no + 1) ensureElectionsDone(looper, rest_nodes) ensure_all_nodes_have_same_data(looper, rest_nodes) sdk_send_random_and_check(looper, rest_nodes, sdk_pool_handle, sdk_wallet_steward, 5) starting_view_no = checkViewNoForNodes(rest_nodes) promote_node(looper, sdk_wallet_steward, sdk_pool_handle, demoted_node) waitForViewChange(looper, rest_nodes, expectedViewNo=starting_view_no + 1) ensureElectionsDone(looper, rest_nodes, instances_list=[0, 1, 2]) ensure_all_nodes_have_same_data(looper, rest_nodes) restart_node(looper, txnPoolNodeSet, demoted_node, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_no_propagated_future_view_change_until_synced(txnPoolNodeSet, looper, mode): # the last node is a lagging one, which will receive ViewChangeDone messages for future view viewNo = checkViewNoForNodes(txnPoolNodeSet) lagged_node_index = (viewNo + 3) % len(txnPoolNodeSet) lagged_node = txnPoolNodeSet[lagged_node_index] other_nodes = list(set(txnPoolNodeSet) - {lagged_node}) # emulate catchup by setting non-synced status lagged_node.mode = mode old_view_no = checkViewNoForNodes([lagged_node]) check_future_vcd_count(lagged_node, 0) # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change with delay_rules(lagged_node.nodeIbStasher, icDelay()): # make sure that View Change happened on all nodes but the lagging one ensure_view_change(looper, other_nodes) checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(2)) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) check_no_view_change(looper, lagged_node) assert old_view_no == checkViewNoForNodes([lagged_node]) # emulate finishing of catchup by setting Participating status lagged_node.mode = Mode.participating # make sure that View Change happened on lagging node waitForViewChange(looper, [lagged_node], expectedViewNo=old_view_no + 1, customTimeout=10) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
def test_pool_reaches_quorum_after_f_plus_2_nodes_turned_off_and_later_on( looper, allPluginsPath, tdir, tconf, txnPoolNodeSet, wallet1, client1, client1Connected): nodes = txnPoolNodeSet initial_view_no = nodes[0].viewNo request = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request]) stop_node(nodes[0], looper, nodes) waitForViewChange(looper, nodes[1:], expectedViewNo=initial_view_no + 1) ensureElectionsDone(looper, nodes[1:], numInstances=getRequiredInstances(nodeCount)) request = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request]) stop_node(nodes[1], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + 2) checkViewNoForNodes(nodes[2:], initial_view_no + 1) request = sendRandomRequest(wallet1, client1) verify_request_not_replied_and_not_ordered(request, looper, client1, nodes) stop_node(nodes[2], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + 2) checkViewNoForNodes(nodes[3:], initial_view_no + 1) request = sendRandomRequest(wallet1, client1) verify_request_not_replied_and_not_ordered(request, looper, client1, nodes) nodes[2] = start_stopped_node(nodes[2], looper, tconf, tdir, allPluginsPath) looper.runFor(waits.expectedPoolElectionTimeout(len(nodes))) request = sendRandomRequest(wallet1, client1) verify_request_not_replied_and_not_ordered(request, looper, client1, nodes) nodes[1] = start_stopped_node(nodes[1], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes[1:], numInstances=getRequiredInstances(nodeCount)) waitForViewChange(looper, nodes[1:], expectedViewNo=initial_view_no + 1) request = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request]) nodes[0] = start_stopped_node(nodes[0], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes, numInstances=getRequiredInstances(nodeCount)) waitForViewChange(looper, nodes, expectedViewNo=initial_view_no + 1) request = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request])
def test_primary_selection_after_primary_demotion_and_view_changes( looper, txnPoolNodeSet, stewardAndWalletForMasterNode, txnPoolMasterNodes): """ Demote primary and do multiple view changes forcing primaries rotation. Demoted primary should be skipped without additional view changes. """ viewNo0 = checkViewNoForNodes(txnPoolNodeSet) logger.info( "1. turn off the node which has primary replica for master instanse, " " this should trigger view change") master_node = txnPoolMasterNodes[0] client, wallet = stewardAndWalletForMasterNode node_data = {ALIAS: master_node.name, SERVICES: []} updateNodeData(looper, client, wallet, master_node, node_data) restNodes = [node for node in txnPoolNodeSet \ if node.name != master_node.name] ensureElectionsDone(looper, restNodes) viewNo1 = checkViewNoForNodes(restNodes) assert viewNo1 == viewNo0 + 1 assert master_node.viewNo == viewNo0 assert len(restNodes[0].replicas) == 1 # only one instance left assert restNodes[0].replicas[0].primaryName != master_node.name # ensure pool is working properly sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3) logger.info("2. force view change 2 and check final viewNo") ensure_view_change_complete(looper, restNodes) viewNo2 = checkViewNoForNodes(restNodes) assert restNodes[0].replicas[0].primaryName != master_node.name assert viewNo2 == viewNo1 + 1 sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3) logger.info("3. force view change 3 and check final viewNo") ensure_view_change_complete(looper, restNodes) viewNo3 = checkViewNoForNodes(restNodes) assert restNodes[0].replicas[0].primaryName != master_node.name assert viewNo3 == viewNo2 + 1 sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3) logger.info("4. force view change 4 and check final viewNo") ensure_view_change_complete(looper, restNodes) viewNo4 = checkViewNoForNodes(restNodes) assert restNodes[0].replicas[0].primaryName != master_node.name assert viewNo4 == viewNo3 + 1 sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3)
def test_view_change_triggered(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): current_view_no = checkViewNoForNodes(txnPoolNodeSet) trigger_view_change(txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) assert checkViewNoForNodes(txnPoolNodeSet) == current_view_no + 1
def test_primary_selection_after_primary_demotion_and_pool_restart(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, txnPoolMasterNodes, tdir, tconf): """ Demote primary and restart the pool. Pool should select new primary and have viewNo=0 after restart. """ logger.info("1. turn off the node which has primary replica for master instanse") master_node = txnPoolMasterNodes[0] node_dest = hexToFriendly(master_node.nodestack.verhex) sdk_send_update_node(looper, sdk_wallet_steward, sdk_pool_handle, node_dest, master_node.name, None, None, None, None, services=[]) restNodes = [node for node in txnPoolNodeSet if node.name != master_node.name] ensureElectionsDone(looper, restNodes) # ensure pool is working properly logger.info("2. restart pool") # Stopping existing nodes for node in txnPoolNodeSet: node.stop() looper.removeProdable(node) # Starting nodes again by creating `Node` objects since that simulates # what happens when starting the node with script restartedNodes = [] for node in txnPoolNodeSet: config_helper = PNodeConfigHelper(node.name, tconf, chroot=tdir) restartedNode = TestNode(node.name, config_helper=config_helper, config=tconf, ha=node.nodestack.ha, cliha=node.clientstack.ha) looper.add(restartedNode) restartedNodes.append(restartedNode) restNodes = [node for node in restartedNodes if node.name != master_node.name] looper.run(checkNodesConnected(restNodes)) ensureElectionsDone(looper, restNodes) checkViewNoForNodes(restNodes, 0) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 3) primariesIdxs = getPrimaryNodesIdxs(restNodes) assert restNodes[primariesIdxs[0]].name != master_node.name
def test_view_not_changed_when_short_disconnection(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tdir, tconf, allPluginsPath): """ When primary is disconnected but not long enough to trigger the timeout, view change should not happen """ pr_node = get_master_primary_node(txnPoolNodeSet) view_no = checkViewNoForNodes(txnPoolNodeSet) prp_inst_chg_calls = { node.name: node.spylog.count(node.propose_view_change.__name__) for node in txnPoolNodeSet if node != pr_node } recv_inst_chg_calls = { node.name: node.spylog.count( node.view_changer.process_instance_change_msg.__name__) for node in txnPoolNodeSet if node != pr_node } # Disconnect master's primary disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, pr_node, timeout=2) txnPoolNodeSet.remove(pr_node) looper.removeProdable(name=pr_node.name) timeout = min(tconf.ToleratePrimaryDisconnection - 1, 1) # Reconnect master's primary pr_node = start_stopped_node(pr_node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet.append(pr_node) def chk2(): # Schedule an instance change but do not send it # since primary joins again for node in txnPoolNodeSet: if node != pr_node: assert node.spylog.count(node.propose_view_change.__name__ ) > prp_inst_chg_calls[node.name] assert node.view_changer.spylog.count(node.view_changer.process_instance_change_msg.__name__) == \ recv_inst_chg_calls[node.name] looper.run(eventually(chk2, retryWait=.2, timeout=timeout + 1)) assert checkViewNoForNodes(txnPoolNodeSet) == view_no # Send some requests and make sure the request execute sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)
def test_quorum_after_f_plus_2_nodes_including_primary_turned_off_and_later_on( looper, allPluginsPath, tdir, tconf, txnPoolNodeSet, wallet1, client1): nodes = txnPoolNodeSet request1 = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request1]) stop_node(nodes[0], looper, nodes) waitForViewChange(looper, nodes[1:], expectedViewNo=1) ensureElectionsDone(looper, nodes[1:], numInstances=getRequiredInstances(nodeCount)) request2 = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request2]) stop_node(nodes[1], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[2:], expectedViewNo=1) request3 = sendRandomRequest(wallet1, client1) verify_request_not_replied_and_not_ordered(request3, looper, client1, nodes) stop_node(nodes[2], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[3:], expectedViewNo=1) request4 = sendRandomRequest(wallet1, client1) verify_request_not_replied_and_not_ordered(request4, looper, client1, nodes) nodes[2] = start_stopped_node(nodes[2], looper, tconf, tdir, allPluginsPath) looper.runFor(waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[3:], expectedViewNo=1) request5 = sendRandomRequest(wallet1, client1) verify_request_not_replied_and_not_ordered(request5, looper, client1, nodes) nodes[1] = start_stopped_node(nodes[1], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes[1:], numInstances=getRequiredInstances(nodeCount)) checkViewNoForNodes(nodes[1:], expectedViewNo=1) request6 = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request6]) nodes[0] = start_stopped_node(nodes[0], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes, numInstances=getRequiredInstances(nodeCount)) checkViewNoForNodes(nodes, expectedViewNo=1) request7 = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request7])
def test_primary_selection_after_primary_demotion_and_pool_restart( looper, txnPoolNodeSet, stewardAndWalletForMasterNode, txnPoolMasterNodes, tdir, tconf): """ Demote primary and restart the pool. Pool should select new primary and have viewNo=0 after restart. """ logger.info( "1. turn off the node which has primary replica for master instanse") master_node = txnPoolMasterNodes[0] client, wallet = stewardAndWalletForMasterNode node_data = {ALIAS: master_node.name, SERVICES: []} updateNodeData(looper, client, wallet, master_node, node_data) restNodes = [ node for node in txnPoolNodeSet if node.name != master_node.name ] ensureElectionsDone(looper, restNodes) # ensure pool is working properly sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3) logger.info("2. restart pool") # Stopping existing nodes for node in txnPoolNodeSet: node.stop() looper.removeProdable(node) # Starting nodes again by creating `Node` objects since that simulates # what happens when starting the node with script restartedNodes = [] for node in txnPoolNodeSet: config_helper = PNodeConfigHelper(node.name, tconf, chroot=tdir) restartedNode = TestNode(node.name, config_helper=config_helper, config=tconf, ha=node.nodestack.ha, cliha=node.clientstack.ha) looper.add(restartedNode) restartedNodes.append(restartedNode) restNodes = [ node for node in restartedNodes if node.name != master_node.name ] looper.run(checkNodesConnected(restNodes)) ensureElectionsDone(looper, restNodes) checkViewNoForNodes(restNodes, 0) sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3) primariesIdxs = getPrimaryNodesIdxs(restNodes) assert restNodes[primariesIdxs[0]].name != master_node.name
def test_propagate_primary_after_primary_restart_view_1( looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath): """ Delay instance change msgs to prevent view change during primary restart to test propagate primary for primary node. ppSeqNo should be > 0 to be able to check that propagate primary restores all indices correctly case viewNo > 0 """ ensure_view_change(looper, txnPoolNodeSet) checkViewNoForNodes(txnPoolNodeSet, expectedViewNo=1) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) old_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (old_ppseqno > 0) old_viewNo = checkViewNoForNodes(txnPoolNodeSet) old_primary = get_master_primary_node(txnPoolNodeSet) delay_instance_change(txnPoolNodeSet, IC_DELAY_SEC) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_primary, stopNode=True) looper.removeProdable(old_primary) logger.info("Restart node {}".format(old_primary)) restartedNode = start_stopped_node(old_primary, looper, tconf, tdir, allPluginsPath, delay_instance_change_msgs=False) idx = [i for i, n in enumerate(txnPoolNodeSet) if n.name == restartedNode.name][0] txnPoolNodeSet[idx] = restartedNode restartedNode.nodeIbStasher.delay(icDelay(IC_DELAY_SEC)) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) new_viewNo = checkViewNoForNodes(txnPoolNodeSet) assert (new_viewNo == old_viewNo) new_primary = get_master_primary_node(txnPoolNodeSet) assert (new_primary.name == old_primary.name) # check ppSeqNo the same _get_ppseqno(txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) new_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (new_ppseqno > old_ppseqno)
def test_view_change_triggered_after_ordering(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQ_COUNT) current_view_no = checkViewNoForNodes(txnPoolNodeSet) trigger_view_change(txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) assert checkViewNoForNodes(txnPoolNodeSet) == current_view_no + 1
def testInstChangeWithLowerRatioThanDelta(looper, step3, client1): sendReqsToNodesAndVerifySuffReplies(looper, client1, 5) # wait for every node to run another checkPerformance newPerfChecks = waitForNextPerfCheck(looper, step3.nodes, step3.perfChecks) # verify all nodes recognize P as degraded # for n in step3.nodes: # assert newPerfChecks[n.name].result is False # verify all nodes have undergone an instance change checkViewNoForNodes(step3.nodes, 1)
def testInstChangeWithLowerRatioThanDelta(looper, step3, client1): sendReqsToNodesAndVerifySuffReplies(looper, client1, 5) # wait for every node to run another checkPerformance newPerfChecks = waitForNextPerfCheck(looper, step3.nodes, step3.perfChecks) # verify all nodes recognize P as degraded # for n in step3.nodes: # assert newPerfChecks[n.name].result is False # verify all nodes have undergone an instance change checkViewNoForNodes(step3.nodes, 1)
def chkViewChange(newViewNo): if {n.viewNo for n in step3.nodes} != {newViewNo}: tr = [] for n in step3.nodes: tr.append(n.monitor.isMasterThroughputTooLow()) if all(tr): logger.debug('Throughput ratio gone down') checkViewNoForNodes(step3.nodes, newViewNo) else: logger.debug('Master instance has not degraded yet, ' 'sending more requests') sendRandomRequests(wallet1, client1, 1) assert False else: assert True
def provoke_and_check_view_change(looper, nodes, newViewNo, sdk_pool_handle, sdk_wallet_client): if {n.viewNo for n in nodes} == {newViewNo}: return True # If throughput of every node has gone down then check that # view has changed tr = [n.monitor.isMasterThroughputTooLow() for n in nodes] if all(tr): logger.info('Throughput ratio gone down, its {}'.format(tr)) checkViewNoForNodes(nodes, newViewNo) else: logger.info('Master instance has not degraded yet, ' 'sending more requests') sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client) assert False
def test_pp_seq_no_starts_from_0_in_new_view( tconf, txnPoolNodeSet, looper, wallet1, client1, client1Connected): # This test fails since last ordered pre-prepare sequence number is old_view_no = checkViewNoForNodes(txnPoolNodeSet) def chk(count): for node in txnPoolNodeSet: assert node.master_replica.last_ordered_3pc[1] == count chk(0) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) chk(5) new_view_no = ensure_view_change(looper, txnPoolNodeSet) assert new_view_no > old_view_no chk(5) # no new requests yet, so last ordered 3PC is (0,5) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1) chk(1) # new request for new view => last ordered 3PC is (0,1) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) chk(6)
def ensure_view_change(looper, nodes, exclude_from_check=None, custom_timeout=None): """ This method patches the master performance check to return False and thus ensures that all given nodes do a view change """ old_view_no = checkViewNoForNodes(nodes) old_meths = do_view_change(nodes) perf_check_freq = next(iter(nodes)).config.PerfCheckFreq timeout = custom_timeout or waits.expectedPoolViewChangeStartedTimeout( len(nodes)) + perf_check_freq nodes_to_check = nodes if exclude_from_check is None else [ n for n in nodes if n not in exclude_from_check ] logger.debug('Checking view no for nodes {}'.format(nodes_to_check)) looper.run( eventually(checkViewNoForNodes, nodes_to_check, old_view_no + 1, retryWait=1, timeout=timeout)) revert_do_view_change(nodes, old_meths) return old_view_no + 1
def test_delay_commits_for_one_node(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, slow_node_is_next_primary, vc_counts): current_view_no = checkViewNoForNodes(txnPoolNodeSet) excepted_view_no = current_view_no + 1 if vc_counts == 'once' else current_view_no + 2 next_primary = get_next_primary_name(txnPoolNodeSet, excepted_view_no) pretenders = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet) if not r.isPrimary] if slow_node_is_next_primary: delayed_node = [n for n in pretenders if n.name == next_primary][0] else: delayed_node = [n for n in pretenders if n.name != next_primary][0] with delay_rules_without_processing(delayed_node.nodeIbStasher, cDelay()): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2) trigger_view_change(txnPoolNodeSet) if vc_counts == 'twice': for node in txnPoolNodeSet: node.view_changer.start_view_change(current_view_no + 2) ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_pp_seq_not_starts_from_0_in_new_view(tconf, txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): # This test fails since last ordered pre-prepare sequence number is old_view_no = checkViewNoForNodes(txnPoolNodeSet) def chk(count): for node in txnPoolNodeSet: assert node.master_replica.last_ordered_3pc[1] == count batches_count = 0 chk(batches_count) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) batches_count += 5 chk(batches_count) new_view_no = ensure_view_change(looper, txnPoolNodeSet) assert new_view_no > old_view_no batches_count += 1 chk(batches_count ) # After view_change, master primary must initiate 3pc batch sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) batches_count += 1 chk(batches_count) # new request for new view => last ordered 3PC is (0,2) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) batches_count += 5 chk(batches_count)
def testViewNotChangedIfBackupPrimaryDisconnected(txnPoolNodeSet, txnPoolNodesLooper, tconf): """ View change does not occurs when backup's primary is disconnected """ # Setup nodes = txnPoolNodeSet looper = txnPoolNodesLooper viewNoBefore = checkViewNoForNodes(nodes) primaryNodeForBackupInstance1Before = nodeByName( nodes, primaryNodeNameForInstance(nodes, 1)) # Exercise stopNodes([primaryNodeForBackupInstance1Before], looper) # Verify remainingNodes = set(nodes) - {primaryNodeForBackupInstance1Before} looper.runFor(tconf.ToleratePrimaryDisconnection + 2) def assertNewPrimariesElected(): with pytest.raises(AssertionError): assert checkViewNoForNodes(remainingNodes) == viewNoBefore + 1 viewNoAfter = checkViewNoForNodes(remainingNodes, viewNoBefore) assert viewNoBefore == viewNoAfter looper.run(eventually(assertNewPrimariesElected, retryWait=1, timeout=30))
def test_resend_instance_change_messages(looper, txnPoolNodeSet, tconf, sdk_wallet_steward, sdk_pool_handle): primary_node = txnPoolNodeSet[0] old_view_no = checkViewNoForNodes(txnPoolNodeSet, 0) assert primary_node.master_replica.isPrimary for n in txnPoolNodeSet: n.nodeIbStasher.delay(icDelay(3 * tconf.NEW_VIEW_TIMEOUT)) check_sent_instance_changes_count(txnPoolNodeSet, 0) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, primary_node, stopNode=False) txnPoolNodeSet.remove(primary_node) looper.run( eventually(check_count_connected_node, txnPoolNodeSet, 4, timeout=5, acceptableExceptions=[AssertionError])) looper.run( eventually(check_sent_instance_changes_count, txnPoolNodeSet, 1, timeout=2 * tconf.NEW_VIEW_TIMEOUT)) looper.run( eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1, timeout=3 * tconf.NEW_VIEW_TIMEOUT)) ensureElectionsDone(looper, txnPoolNodeSet) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def testViewChangesIfMasterPrimaryDisconnected(txnPoolNodeSet, looper, wallet1, client1, client1Connected, tconf): """ View change occurs when master's primary is disconnected """ # Setup nodes = txnPoolNodeSet viewNoBefore = checkViewNoForNodes(nodes) old_pr_node = get_master_primary_node(nodes) # Stop primary stopNodes([old_pr_node], looper) looper.removeProdable(old_pr_node) remainingNodes = list(set(nodes) - {old_pr_node}) # Sometimes it takes time for nodes to detect disconnection ensure_node_disconnected(looper, old_pr_node, remainingNodes, timeout=20) looper.runFor(tconf.ToleratePrimaryDisconnection + 2) # Give some time to detect disconnection and then verify that view has # changed and new primary has been elected waitForViewChange(looper, remainingNodes, viewNoBefore + 1) ensure_all_nodes_have_same_data(looper, nodes=remainingNodes) new_pr_node = get_master_primary_node(remainingNodes) assert old_pr_node != new_pr_node sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5)
def test_resend_instance_change_messages(looper, txnPoolNodeSet, tconf, sdk_wallet_steward, sdk_pool_handle): primary_node = txnPoolNodeSet[0] old_view_no = checkViewNoForNodes(txnPoolNodeSet, 0) assert primary_node.master_replica.isPrimary for n in txnPoolNodeSet: n.nodeIbStasher.delay(icDelay(3 * tconf.INSTANCE_CHANGE_TIMEOUT)) assert set([n.view_changer.instance_change_rounds for n in txnPoolNodeSet]) == {0} disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, primary_node, stopNode=False) txnPoolNodeSet.remove(primary_node) looper.run(eventually(partial(check_count_connected_node, txnPoolNodeSet, 4), timeout=5, acceptableExceptions=[AssertionError])) looper.runFor(2*tconf.INSTANCE_CHANGE_TIMEOUT) assert set([n.view_changer.instance_change_rounds for n in txnPoolNodeSet]) == {1} looper.runFor(tconf.INSTANCE_CHANGE_TIMEOUT) looper.run(eventually(partial(checkViewNoForNodes, txnPoolNodeSet, expectedViewNo=old_view_no + 1), timeout=tconf.VIEW_CHANGE_TIMEOUT)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_view_not_changed(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Test that a view change is not done when the performance of master does not go down """ """ Send multiple requests to the client and delay some requests by all backup instances to ensure master instance is always faster than backup instances and there is no view change """ # Delay PRE-PREPARE for all backup protocol instances so master performs # better for i in range(1, F + 1): nonPrimReps = getNonPrimaryReplicas(txnPoolNodeSet, i) # type: Iterable[TestReplica] for r in nonPrimReps: r.node.nodeIbStasher.delay(ppDelay(10, i)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) checkViewNoForNodes(txnPoolNodeSet, expectedViewNo=0)
def testViewNotChanged(looper: Looper, nodeSet: TestNodeSet, up, client1): """ Test that a view change is not done when the performance of master does not go down """ """ Send multiple requests to the client and delay some requests by all backup instances to ensure master instance is always faster than backup instances and there is no view change """ # Delay PRE-PREPARE for all backup protocol instances so master performs # better for i in range(1, F + 1): nonPrimReps = getNonPrimaryReplicas(nodeSet, i) # type: Iterable[TestReplica] for r in nonPrimReps: r.node.nodeIbStasher.delay(ppDelay(10, i)) sendReqsToNodesAndVerifySuffReplies(looper, client1, 5) checkViewNoForNodes(nodeSet, 0)
def test_last_ordered_3pc_reset_if_more_than_new_view(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ Check that if last_ordered_3pc's viewNo on a Replica is greater than the new viewNo after view change, then last_ordered_3pc is reset to (0,0). It can be that last_ordered_3pc was set for the previous view, since it's set during catch-up Example: a Node has last_ordered = (1, 300), and then the whole pool except this node restarted. The new viewNo is 0, but last_ordered is (1, 300), so all new requests will be discarded by this Node if we don't reset last_ordered_3pc """ old_view_no = checkViewNoForNodes(txnPoolNodeSet) for node in txnPoolNodeSet: node.master_replica.last_ordered_3pc = (old_view_no + 2, 100) ensure_view_change_complete(looper, txnPoolNodeSet, customTimeout=60) view_no = checkViewNoForNodes(txnPoolNodeSet) for node in txnPoolNodeSet: assert (view_no, 0) == node.master_replica.last_ordered_3pc # Make sure the pool is working sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_no_propagated_future_view_change_while_view_change(txnPoolNodeSet, looper): # the last node is a lagging one, which will receive ViewChangeDone messages for future view viewNo = checkViewNoForNodes(txnPoolNodeSet) lagged_node = txnPoolNodeSet[-1] other_nodes = list(set(txnPoolNodeSet) - {lagged_node}) # emulate view change in progress lagged_node.view_changer.view_change_in_progress = True old_view_no = checkViewNoForNodes([lagged_node]) initial_vhdc = \ lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__) # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change with delay_rules(lagged_node.nodeIbStasher, icDelay()): # make sure that View Change happened on all nodes but the lagging one ensure_view_change(looper, other_nodes) checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(2)) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) # check that lagged node recived 3 Future VCD, but didn't start new view change assert len(other_nodes) + initial_vhdc ==\ lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__) assert old_view_no == checkViewNoForNodes([lagged_node])
def test_master_primary_different_from_previous_view_for_itself( txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ After a view change, primary must be different from previous primary for master instance, it does not matter for other instance. Break it into 2 tests, one where the primary is malign and votes for itself but is still not made primary in the next view. """ old_view_no = checkViewNoForNodes(txnPoolNodeSet) pr = slow_primary(txnPoolNodeSet, 0, delay=10) old_pr_node = pr.node def _get_undecided_inst_id(self): undecideds = [i for i, r in self.replicas if r.isPrimary is None] # Try to nominate for the master instance return undecideds, 0 # Patching old primary's elector's method to nominate itself # again for the the new view old_pr_node.elector._get_undecided_inst_id = types.MethodType( _get_undecided_inst_id, old_pr_node.elector) # View change happens provoke_and_wait_for_view_change(looper, txnPoolNodeSet, old_view_no + 1, sdk_pool_handle, sdk_wallet_client) # Elections done ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) # New primary is not same as old primary assert getPrimaryReplica(txnPoolNodeSet, 0).node.name != old_pr_node.name # All other nodes discarded the nomination by the old primary for node in txnPoolNodeSet: if node != old_pr_node: assert countDiscarded(node.elector, 'of master in previous view too') == 1 # The new primary can still process requests sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)
def test_view_changes_if_master_primary_disconnected(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tdir, tconf, allPluginsPath): """ View change occurs when master's primary is disconnected """ # Setup nodes = txnPoolNodeSet old_view_no = checkViewNoForNodes(nodes) old_pr_node = get_master_primary_node(nodes) # Stop primary disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_pr_node, stopNode=True) looper.removeProdable(old_pr_node) remaining_nodes = list(set(nodes) - {old_pr_node}) # Sometimes it takes time for nodes to detect disconnection ensure_node_disconnected(looper, old_pr_node, remaining_nodes, timeout=20) looper.runFor(tconf.ToleratePrimaryDisconnection + 2) # Give some time to detect disconnection and then verify that view has # changed and new primary has been elected waitForViewChange(looper, remaining_nodes, old_view_no + 1) ensure_all_nodes_have_same_data(looper, nodes=remaining_nodes) new_pr_node = get_master_primary_node(remaining_nodes) assert old_pr_node != new_pr_node sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) # Check if old primary can join the pool and still functions old_pr_node = start_stopped_node(old_pr_node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet = remaining_nodes + [old_pr_node] looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1, timeout=tconf.VIEW_CHANGE_TIMEOUT)) assert len(getAllReturnVals(old_pr_node.view_changer, old_pr_node.view_changer._start_view_change_if_possible, compare_val_to=True)) > 0 ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) assert not old_pr_node.view_changer._next_view_indications
def test_pp_seq_no_starts_from_0_in_new_view(tconf, txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): # This test fails since last ordered pre-prepare sequence number is old_view_no = checkViewNoForNodes(txnPoolNodeSet) def chk(count): for node in txnPoolNodeSet: assert node.master_replica.last_ordered_3pc[1] == count chk(0) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) chk(5) new_view_no = ensure_view_change(looper, txnPoolNodeSet) assert new_view_no > old_view_no chk(5) # no new requests yet, so last ordered 3PC is (0,5) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) chk(1) # new request for new view => last ordered 3PC is (0,1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) chk(6)
def test_view_not_changed_when_short_disconnection(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tconf): """ When primary is disconnected but not long enough to trigger the timeout, view change should not happen """ pr_node = get_master_primary_node(txnPoolNodeSet) view_no = checkViewNoForNodes(txnPoolNodeSet) lost_pr_calls = {node.name: node.spylog.count( node.lost_master_primary.__name__) for node in txnPoolNodeSet if node != pr_node} prp_inst_chg_calls = {node.name: node.spylog.count( node.propose_view_change.__name__) for node in txnPoolNodeSet if node != pr_node} recv_inst_chg_calls = {node.name: node.spylog.count( node.view_changer.process_instance_change_msg.__name__) for node in txnPoolNodeSet if node != pr_node} def chk1(): # Check that non-primary nodes detects losing connection with # primary for node in txnPoolNodeSet: if node != pr_node: assert node.spylog.count(node.lost_master_primary.__name__) \ > lost_pr_calls[node.name] def chk2(): # Schedule an instance change but do not send it # since primary joins again for node in txnPoolNodeSet: if node != pr_node: assert node.spylog.count(node.propose_view_change.__name__) \ > prp_inst_chg_calls[node.name] assert node.view_changer.spylog.count(node.view_changer.process_instance_change_msg.__name__) \ == recv_inst_chg_calls[node.name] # Disconnect master's primary for node in txnPoolNodeSet: if node != pr_node: node.nodestack.getRemote(pr_node.nodestack.name).disconnect() timeout = min(tconf.ToleratePrimaryDisconnection - 1, 1) looper.run(eventually(chk1, retryWait=.2, timeout=timeout)) # Reconnect master's primary for node in txnPoolNodeSet: if node != pr_node: node.nodestack.retryDisconnected() looper.run(eventually(chk2, retryWait=.2, timeout=timeout + 1)) def chk3(): # Check the view does not change with pytest.raises(AssertionError): assert checkViewNoForNodes(txnPoolNodeSet) == view_no + 1 looper.run(eventually(chk3, retryWait=1, timeout=10)) # Send some requests and make sure the request execute sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)
def test_quorum_after_f_plus_2_nodes_including_primary_turned_off_and_later_on( looper, allPluginsPath, tdir, tconf, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): timeout = sdk_eval_timeout(1, len(txnPoolNodeSet)) nodes = txnPoolNodeSet sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) stop_node(nodes[0], looper, nodes) waitForViewChange(looper, nodes[1:], expectedViewNo=1) ensureElectionsDone(looper, nodes[1:], instances_list=range(getRequiredInstances(nodeCount))) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) stop_node(nodes[1], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[2:], expectedViewNo=1) sdk_reqs3 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) with pytest.raises(PoolLedgerTimeoutException): req_res = sdk_get_replies(looper, sdk_reqs3, timeout=timeout) sdk_check_reply(req_res[0]) stop_node(nodes[2], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[3:], expectedViewNo=1) sdk_reqs4 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) with pytest.raises(PoolLedgerTimeoutException): req_res = sdk_get_replies(looper, sdk_reqs4, timeout=timeout) sdk_check_reply(req_res[0]) nodes[2] = start_stopped_node(nodes[2], looper, tconf, tdir, allPluginsPath) looper.runFor(waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[3:], expectedViewNo=1) sdk_reqs5 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) with pytest.raises(PoolLedgerTimeoutException): req_res = sdk_get_replies(looper, sdk_reqs5, timeout=timeout) sdk_check_reply(req_res[0]) nodes[1] = start_stopped_node(nodes[1], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes[1:], instances_list=range(getRequiredInstances(nodeCount)), customTimeout=60) checkViewNoForNodes(nodes[1:], expectedViewNo=1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) nodes[0] = start_stopped_node(nodes[0], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes, instances_list=range(getRequiredInstances(nodeCount)), customTimeout=60) checkViewNoForNodes(nodes, expectedViewNo=1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)
def test_disconnected_node_with_lagged_view_pulls_up_its_view_on_reconnection( looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ Verifies that a disconnected node with a lagged view accepts the current view from the other nodes on re-connection. Steps: 1. Provoke view change to 1. 2. Ensure that all the nodes complete view change to 1. 3. Disconnect one node from the rest of the nodes in the pool. 4. Provoke view change to 2. 5. Ensure that that all the nodes except for the disconnected one complete view change to 2 and the disconnected node remains in the view 1. 6. Provoke view change to 3. 5. Ensure that that all the nodes except for the disconnected one complete view change to 3 and the disconnected node remains in the view 1. 8. Connect the disconnected node to the rest of the nodes in the pool. 9. Ensure that the re-connected node completes view change to 3. 10. Ensure that all the nodes participate in consensus. """ checkViewNoForNodes(txnPoolNodeSet, 0) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) checkViewNoForNodes(txnPoolNodeSet, 1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) lagged_node = getNonPrimaryReplicas(txnPoolNodeSet)[-1].node disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, lagged_node, stopNode=False) other_nodes = list(set(txnPoolNodeSet) - {lagged_node}) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) ensure_view_change(looper, other_nodes) ensureElectionsDone(looper, other_nodes, instances_list=range(getRequiredInstances(len(txnPoolNodeSet)))) ensure_all_nodes_have_same_data(looper, other_nodes) checkViewNoForNodes(other_nodes, 2) checkViewNoForNodes([lagged_node], 1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) ensure_view_change(looper, other_nodes) ensureElectionsDone(looper, other_nodes, instances_list=range(getRequiredInstances(len(txnPoolNodeSet)))) ensure_all_nodes_have_same_data(looper, other_nodes) checkViewNoForNodes(other_nodes, 3) checkViewNoForNodes([lagged_node], 1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, lagged_node) waitForViewChange(looper, [lagged_node], 3, customTimeout=waits.expectedPoolElectionTimeout( len(txnPoolNodeSet))) ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) checkViewNoForNodes(txnPoolNodeSet, 3) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_primary_selection_after_primary_demotion_and_view_changes(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, txnPoolMasterNodes): """ Demote primary and do multiple view changes forcing primaries rotation. Demoted primary should be skipped without additional view changes. """ viewNo0 = checkViewNoForNodes(txnPoolNodeSet) logger.info("1. turn off the node which has primary replica for master instanse, " " this should trigger view change") master_node = txnPoolMasterNodes[0] node_dest = hexToFriendly(master_node.nodestack.verhex) sdk_send_update_node(looper, sdk_wallet_steward, sdk_pool_handle, node_dest, master_node.name, None, None, None, None, services=[]) restNodes = [node for node in txnPoolNodeSet \ if node.name != master_node.name] ensureElectionsDone(looper, restNodes) viewNo1 = checkViewNoForNodes(restNodes) assert viewNo1 == viewNo0 + 1 assert master_node.viewNo == viewNo0 assert len(restNodes[0].replicas) == 1 # only one instance left assert restNodes[0].replicas[0].primaryName != master_node.name # ensure pool is working properly sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 3) logger.info("2. force view change 2 and check final viewNo") ensure_view_change_complete(looper, restNodes) viewNo2 = checkViewNoForNodes(restNodes) assert restNodes[0].replicas[0].primaryName != master_node.name assert viewNo2 == viewNo1 + 1 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 3) logger.info("3. force view change 3 and check final viewNo") ensure_view_change_complete(looper, restNodes) viewNo3 = checkViewNoForNodes(restNodes) assert restNodes[0].replicas[0].primaryName != master_node.name assert viewNo3 == viewNo2 + 1 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 3) logger.info("4. force view change 4 and check final viewNo") ensure_view_change_complete(looper, restNodes) viewNo4 = checkViewNoForNodes(restNodes) assert restNodes[0].replicas[0].primaryName != master_node.name assert viewNo4 == viewNo3 + 1 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 3)
def test_view_change_after_back_to_quorum_with_disconnected_primary(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tdir, tconf, allPluginsPath): assert len(txnPoolNodeSet) == 4 pr_node = get_master_primary_node(txnPoolNodeSet) assert pr_node.name == "Alpha" # 1. Initiate view change be primary (Alpha) restart nodes = ensure_view_change_by_primary_restart(looper, txnPoolNodeSet, tconf, tdir, allPluginsPath, customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT) # Now primary should be Beta pr_node = get_master_primary_node(nodes) assert pr_node.name == "Beta" # 2. Stop non-primary node Delta, no any view changes are expected non_primary_to_stop = [n for n in nodes if n.name == "Delta"][0] disconnect_node_and_ensure_disconnected( looper, txnPoolNodeSet, non_primary_to_stop) looper.removeProdable(non_primary_to_stop) remaining_nodes = list(set(nodes) - {non_primary_to_stop}) # Primary is going to be stopped, remember instance change messages count # to ensure that no view change happened as number of connected nodes is less # than quorum. ic_cnt = {} for n in remaining_nodes: ic_cnt[n.name] = n.view_changer.spylog.count(ViewChanger.sendInstanceChange.__name__) # 3. Disconnect primary disconnect_node_and_ensure_disconnected( looper, remaining_nodes, pr_node) looper.removeProdable(pr_node) # Wait for more than ToleratePrimaryDisconnection timeout and check that no IC messages presented. looper.runFor(tconf.ToleratePrimaryDisconnection + 5) remaining_nodes = list(set(remaining_nodes) - {pr_node}) for n in remaining_nodes: assert ic_cnt[n.name] == n.view_changer.spylog.count(ViewChanger.sendInstanceChange.__name__) view_no = checkViewNoForNodes(remaining_nodes) # 4. Start Delta (non-primary), now primary (Beta) is disconnected but there is a quorum # to choose a new one. restartedNode = start_stopped_node(non_primary_to_stop, looper, tconf, tdir, allPluginsPath, delay_instance_change_msgs=False) remaining_nodes = remaining_nodes + [restartedNode] # 5. Check that view change happened. waitForViewChange(looper, remaining_nodes, expectedViewNo=(view_no + 1), customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT) # ensure pool is working properly sdk_send_random_and_check(looper, remaining_nodes, sdk_pool_handle, sdk_wallet_client, 3) ensure_all_nodes_have_same_data(looper, nodes=remaining_nodes)
def test_view_not_changed_when_primary_disconnected_from_less_than_quorum( txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ Less than quorum nodes lose connection with primary, this should not trigger view change as the protocol can move ahead """ pr_node = get_master_primary_node(txnPoolNodeSet) npr = getNonPrimaryReplicas(txnPoolNodeSet, 0) partitioned_rep = npr[0] partitioned_node = partitioned_rep.node lost_pr_calls = partitioned_node.spylog.count( partitioned_node.lost_master_primary.__name__) recv_inst_chg_calls = {node.name: node.spylog.count( node.view_changer.process_instance_change_msg.__name__) for node in txnPoolNodeSet if node != partitioned_node and node != pr_node} view_no = checkViewNoForNodes(txnPoolNodeSet) orig_retry_meth = partitioned_node.nodestack.retryDisconnected def wont_retry(self, exclude=None): # Do not attempt to retry connection pass # simulating a partition here # Disconnect a node from only the primary of the master and dont retry to # connect to it partitioned_node.nodestack.retryDisconnected = types.MethodType( wont_retry, partitioned_node.nodestack) r = partitioned_node.nodestack.getRemote(pr_node.nodestack.name) r.disconnect() def chk1(): # Check that the partitioned node detects losing connection with # primary and sends an instance change which is received by other # nodes except the primary (since its disconnected from primary) assert partitioned_node.spylog.count( partitioned_node.lost_master_primary.__name__) > lost_pr_calls for node in txnPoolNodeSet: if node != partitioned_node and node != pr_node: assert node.view_changer.spylog.count( node.view_changer.process_instance_change_msg.__name__) > recv_inst_chg_calls[node.name] looper.run(eventually(chk1, retryWait=1, timeout=10)) def chk2(): # Check the view does not change with pytest.raises(AssertionError): assert checkViewNoForNodes(txnPoolNodeSet) == view_no + 1 looper.run(eventually(chk2, retryWait=1, timeout=10)) # Send some requests and make sure the request execute sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) # Repair the connection so the node is no longer partitioned partitioned_node.nodestack.retryDisconnected = types.MethodType( orig_retry_meth, partitioned_node.nodestack) # Send some requests and make sure the request execute sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) # Partitioned node should have the same ledger and state as others # eventually waitNodeDataEquality(looper, partitioned_node, *[n for n in txnPoolNodeSet if n != partitioned_node])
def assertNewPrimariesElected(): with pytest.raises(AssertionError): assert checkViewNoForNodes(remainingNodes) == viewNoBefore + 1 viewNoAfter = checkViewNoForNodes(remainingNodes, viewNoBefore) assert viewNoBefore == viewNoAfter
def test_reconnect_primary_and_not_primary(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle, tconf): """ Test steps: Pool of 7 nodes. count of instances must be 3 1. Choose node, that is not primary on all replicas (3 index) 2. Disconnect them 3. Ensure, that number of replicas was decreased 4. Choose current primary node (must be 0) 5. Disconnect primary 6. Ensure, that view change complete and primary was selected 7. Add node back from 1 step 8. Add node back from 4 step 9. Check, that count of instance (f+1 = 3) 10. Send some requests and check, that pool works. """ restNodes = set(txnPoolNodeSet) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) assert txnPoolNodeSet[0].master_replica.isPrimary node_after_all_primary = txnPoolNodeSet[3] # Disconnect node after all primaries (after all backup primaries) disconnect_node_and_ensure_disconnected(looper, restNodes, node_after_all_primary, stopNode=False) # ------------------------------------------------------- restNodes.remove(node_after_all_primary) looper.run(eventually(partial(check_count_connected_node, restNodes, 6), timeout=5, acceptableExceptions=[AssertionError])) sdk_send_random_and_check(looper, restNodes, sdk_pool_handle, sdk_wallet_steward, 5) # Get primary node for backup replica primary_node = txnPoolNodeSet[0] assert primary_node.master_replica.isPrimary old_view_no = checkViewNoForNodes(restNodes, 0) # disconnect primary node disconnect_node_and_ensure_disconnected(looper, restNodes, primary_node, stopNode=False) # ------------------------------------------------------- restNodes.remove(primary_node) looper.run(eventually(partial(check_count_connected_node, restNodes, 5), timeout=5, acceptableExceptions=[AssertionError])) looper.run(eventually(partial(checkViewNoForNodes, restNodes, expectedViewNo=old_view_no + 1), timeout=tconf.VIEW_CHANGE_TIMEOUT)) sdk_send_random_and_check(looper, restNodes, sdk_pool_handle, sdk_wallet_steward, 5) logger.debug("restNodes: {}".format(restNodes)) restNodes.add(node_after_all_primary) # Return back node after all primary reconnect_node_and_ensure_connected(looper, restNodes, node_after_all_primary) looper.run(checkNodesConnected(restNodes, customTimeout=5*tconf.RETRY_TIMEOUT_RESTRICTED)) looper.run(eventually(partial(check_count_connected_node, restNodes, 6), timeout=5, acceptableExceptions=[AssertionError])) assert len(set([len(n.replicas) for n in restNodes])) == 1 sdk_send_random_and_check(looper, restNodes, sdk_pool_handle, sdk_wallet_steward, 5) # Return back primary node restNodes.add(primary_node) reconnect_node_and_ensure_connected(looper, restNodes, primary_node) looper.run(checkNodesConnected(restNodes, customTimeout=5*tconf.RETRY_TIMEOUT_RESTRICTED)) sdk_send_random_and_check(looper, restNodes, sdk_pool_handle, sdk_wallet_steward, 5)
def test_quorum_after_f_plus_2_nodes_but_not_primary_turned_off_and_later_on( looper, allPluginsPath, tdir, tconf, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): nodes = txnPoolNodeSet sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) stop_node(nodes[4], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[:4], expectedViewNo=0) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) stop_node(nodes[3], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[:3], expectedViewNo=0) sdk_reqs3 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) with pytest.raises(PoolLedgerTimeoutException): req_res = sdk_get_replies(looper, sdk_reqs3) sdk_check_reply(req_res[0]) stop_node(nodes[2], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[:2], expectedViewNo=0) sdk_reqs4 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) with pytest.raises(PoolLedgerTimeoutException): req_res = sdk_get_replies(looper, sdk_reqs4) sdk_check_reply(req_res[0]) nodes[4] = start_stopped_node(nodes[4], looper, tconf, tdir, allPluginsPath) looper.runFor(waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[:2] + nodes[4:], expectedViewNo=0) sdk_reqs5 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) with pytest.raises(PoolLedgerTimeoutException): req_res = sdk_get_replies(looper, sdk_reqs5) sdk_check_reply(req_res[0]) nodes[3] = start_stopped_node(nodes[3], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes[:2] + nodes[3:], instances_list=range(getRequiredInstances(nodeCount))) checkViewNoForNodes(nodes[:2] + nodes[3:], expectedViewNo=0) sdk_reqs6 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) sdk_get_replies(looper, sdk_reqs6) nodes[2] = start_stopped_node(nodes[2], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes, instances_list=range(getRequiredInstances(nodeCount))) checkViewNoForNodes(nodes, expectedViewNo=0) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)
def test_view_change_gc_in_between_3pc_all_nodes_delays( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Test that garbage collector compares the whole 3PC key (viewNo, ppSeqNo) and does not remove messages from node's queues that have higher viewNo than last ordered one even if their ppSeqNo are less or equal """ numNodes = len(txnPoolNodeSet) viewNo = checkViewNoForNodes(txnPoolNodeSet) # 1 send two messages one by one separately to make # node pool working with two batches # -> last_ordered_3pc = (+0, 2) [+0 means from the initial state] # (last_ordered_3pc here and futher is tracked # for master instances only cause non-master ones have # specific logic of its management which we don't care in # the test, see Replica::_setup_for_non_master) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) last_ordered_3pc = (viewNo, 2) check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 2) # 2 do view change # -> GC should remove it from nodes' queues # -> viewNo = +1 ensure_view_change_complete(looper, txnPoolNodeSet) viewNo = checkViewNoForNodes(txnPoolNodeSet, viewNo + 1) check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 0) # 3 slow processing 3PC messages for all nodes (all replica instances) # randomly and send one more message # -> not ordered (last_ordered_3pc still equal (+0, 2)) but primaries # should at least send PRE-PREPAREs # TODO could it be not enough for wainting that at least primary # has sent PRE-PREPARE propagationTimeout = waits.expectedClientRequestPropagationTime(numNodes) delay_3pc_messages(txnPoolNodeSet, 0, delay=propagationTimeout * 2) delay_3pc_messages(txnPoolNodeSet, 1, delay=propagationTimeout * 2) requests = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) def checkPrePrepareSentAtLeastByPrimary(): for node in txnPoolNodeSet: for replica in node.replicas.values(): if replica.isPrimary: assert len(replica.sentPrePrepares) looper.run(eventually(checkPrePrepareSentAtLeastByPrimary, retryWait=0.1, timeout=propagationTimeout)) # 4 do view change # -> GC shouldn't remove anything because # last_ordered_3pc (+0, 1) < last message's 3pc key (+1, 1) # -> viewNo = 2 ensure_view_change_complete(looper, txnPoolNodeSet) viewNoNew = checkViewNoForNodes(txnPoolNodeSet) # another view change could happen because of slow nodes assert viewNoNew - viewNo in (1, 2) viewNo = viewNoNew check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 1) # 5 reset delays and wait for replies # -> new primaries should send new 3pc for last message # with 3pc key (+2, 1) # -> they should be ordered # -> last_ordered_3pc = (+2, 1) reset_delays_and_process_delayeds(txnPoolNodeSet) sdk_get_replies(looper, [requests]) checkViewNoForNodes(txnPoolNodeSet, viewNo) last_ordered_3pc = (viewNo, 1) check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 1) # 6 do view change # -> GC should remove them ensure_view_change_complete(looper, txnPoolNodeSet) viewNo = checkViewNoForNodes(txnPoolNodeSet, viewNo + 1) check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 0)
def chk2(): # Check the view does not change with pytest.raises(AssertionError): assert checkViewNoForNodes(txnPoolNodeSet) == view_no + 1