def test_view_change_retry_by_timeout(txnPoolNodeSet, looper, tconf, setup, sdk_pool_handle, sdk_wallet_client): """ Verifies that a view change is restarted if it is not completed in time """ m_primary_node, initial_view_no, timeout_callback_stats = setup stashers = [n.nodeIbStasher for n in txnPoolNodeSet] with delay_rules(stashers, nv_delay()): start_view_change(txnPoolNodeSet, initial_view_no + 1) # First view change should fail, because of delayed ViewChangeDone # messages. This then leads to new view change that we need. with pytest.raises(AssertionError): ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, customTimeout=1.5 * NEW_VIEW_TIMEOUT) # Now as ViewChangeDone messages are unblocked view changes should finish successfully ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) new_m_primary_node = get_master_primary_node(list(txnPoolNodeSet)) assert m_primary_node.name != new_m_primary_node.name # The timeout method was called one time check_watchdog_called_expected_times(txnPoolNodeSet, timeout_callback_stats, 1) # 2 view changes have been initiated for node in txnPoolNodeSet: assert node.viewNo - initial_view_no == 2 sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_state_regenerated_from_ledger(looper, txnPoolNodeSet, client1, wallet1, client1Connected, tconf, tdirWithPoolTxns, allPluginsPath): """ Node loses its state database but recreates it from ledger after start """ sent_batches = 10 send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 5 * sent_batches, sent_batches) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) node_to_stop = txnPoolNodeSet[-1] node_state = node_to_stop.states[DOMAIN_LEDGER_ID] assert not node_state.isEmpty state_db_path = node_state._kv.db_path nodeHa, nodeCHa = HA(*node_to_stop.nodestack.ha), HA( *node_to_stop.clientstack.ha) node_to_stop.stop() looper.removeProdable(node_to_stop) shutil.rmtree(state_db_path) restarted_node = TestNode(node_to_stop.name, basedirpath=tdirWithPoolTxns, config=tconf, ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) looper.add(restarted_node) txnPoolNodeSet[-1] = restarted_node looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, restarted_node, *txnPoolNodeSet[:-1])
def test_view_change_on_empty_ledger(txnPoolNodeSet, looper): """ Check that view change is done when no txns in the ldegr """ ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_view_change_with_different_ic(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, tdir, allPluginsPath): """ 1. panic_node (Delta) send InstanceChange for all nodes. 2. Restart nodes_to_restart (Beta, Gamma). 3. nodes_to_restart send InstanceChanges for all nodes. 4. Ensure elections done. """ nodes_to_restart = txnPoolNodeSet[1:3] panic_node = txnPoolNodeSet[-1] view_no = txnPoolNodeSet[0].viewNo panic_node.view_changer.on_master_degradation() for n in nodes_to_restart: _restart_node(looper, txnPoolNodeSet, n, tconf, tdir, allPluginsPath) nodes_to_restart = txnPoolNodeSet[1:3] for n in nodes_to_restart: n.view_changer.on_master_degradation() def check(): assert panic_node.view_change_in_progress looper.run(eventually(check)) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) for node in txnPoolNodeSet: assert node.viewNo > view_no
def test_catchup_with_all_nodes_sending_cons_proofs_dead( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, logsearch): lagging_node = txnPoolNodeSet[-1] other_nodes = txnPoolNodeSet[:-1] start_delaying(lagging_node.nodeIbStasher, delay_3pc()) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10) log_re_ask, _ = logsearch( msgs=['requesting .* missing transactions after timeout']) old_re_ask_count = len(log_re_ask) catchup_reqs = { node.name: start_delaying(node.nodeIbStasher, cqDelay()) for node in other_nodes } audit_catchup_service = lagging_node.ledgerManager._node_leecher._leechers[ AUDIT_LEDGER_ID]._catchup_rep_service lagging_node.start_catchup() looper.run( eventually(lambda: assert_eq(audit_catchup_service._is_working, True))) # Make sure number of cons proofs gathered when all nodes are assert len(audit_catchup_service._nodes_ledger_sizes) == 3 # Allow catchup requests only from nodes that didn't respond first for node_id, node_reqs in catchup_reqs.items(): if node_id not in audit_catchup_service._nodes_ledger_sizes: stop_delaying_and_process(node_reqs) # Check catchup finishes successfully, and there were reasks ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) assert len(log_re_ask) - old_re_ask_count > 0
def test_replay_recorded_msgs(txnPoolNodesLooper, txnPoolNodeSet, some_txns_done, testNodeClass, node_config_helper_class, tconf, tdir, allPluginsPath, tmpdir_factory): # Run a pool of nodes with each having a recorder. # After some txns, record state, replay each node's recorder on a # clean node and check that state matches the initial state ensure_all_nodes_have_same_data(txnPoolNodesLooper, txnPoolNodeSet) for node in txnPoolNodeSet: txnPoolNodesLooper.removeProdable(node) for node in txnPoolNodeSet: node.stop() config = getConfigOnce() reload_modules_for_replay(tconf) replayable_node_class, basedirpath = get_replayable_node_class( tmpdir_factory, tdir, testNodeClass, config) print('-------------Replaying now---------------------') for node in txnPoolNodeSet: create_replayable_node_and_check(txnPoolNodesLooper, txnPoolNodeSet, node, replayable_node_class, node_config_helper_class, tconf, basedirpath, allPluginsPath)
def test_resend_instance_change_messages(looper, txnPoolNodeSet, tconf, sdk_wallet_steward, sdk_pool_handle): primary_node = txnPoolNodeSet[0] old_view_no = checkViewNoForNodes(txnPoolNodeSet, 0) assert primary_node.master_replica.isPrimary for n in txnPoolNodeSet: n.nodeIbStasher.delay(icDelay(3 * tconf.NEW_VIEW_TIMEOUT)) check_sent_instance_changes_count(txnPoolNodeSet, 0) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, primary_node, stopNode=False) txnPoolNodeSet.remove(primary_node) looper.run( eventually(check_count_connected_node, txnPoolNodeSet, 4, timeout=5, acceptableExceptions=[AssertionError])) looper.run( eventually(check_sent_instance_changes_count, txnPoolNodeSet, 1, timeout=2 * tconf.NEW_VIEW_TIMEOUT)) looper.run( eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1, timeout=3 * tconf.NEW_VIEW_TIMEOUT)) ensureElectionsDone(looper, txnPoolNodeSet) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def sdk_change_bls_key(looper, txnPoolNodeSet, node, sdk_pool_handle, sdk_wallet_steward, add_wrong=False, new_bls=None, new_key_proof=None): if add_wrong: _, new_blspk, key_proof = create_default_bls_crypto_factory().generate_bls_keys() else: new_blspk, key_proof = init_bls_keys(node.keys_dir, node.name) key_in_txn = new_bls or new_blspk bls_key_proof = new_key_proof or key_proof node_dest = hexToFriendly(node.nodestack.verhex) sdk_send_update_node(looper, sdk_wallet_steward, sdk_pool_handle, node_dest, node.name, None, None, None, None, bls_key=key_in_txn, services=None, key_proof=bls_key_proof) poolSetExceptOne = list(txnPoolNodeSet) poolSetExceptOne.remove(node) waitNodeDataEquality(looper, node, *poolSetExceptOne) sdk_pool_refresh(looper, sdk_pool_handle) sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_steward, alias=randomString(5)) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) return new_blspk
def test_no_propagated_future_view_change_until_synced(txnPoolNodeSet, looper, mode): # the last node is a lagging one, which will receive ViewChangeDone messages for future view viewNo = checkViewNoForNodes(txnPoolNodeSet) lagged_node_index = (viewNo + 3) % len(txnPoolNodeSet) lagged_node = txnPoolNodeSet[lagged_node_index] other_nodes = list(set(txnPoolNodeSet) - {lagged_node}) # emulate catchup by setting non-synced status lagged_node.mode = mode old_view_no = checkViewNoForNodes([lagged_node]) check_future_vcd_count(lagged_node, 0) # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change with delay_rules(lagged_node.nodeIbStasher, icDelay()): # make sure that View Change happened on all nodes but the lagging one ensure_view_change(looper, other_nodes) checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(2)) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) check_no_view_change(looper, lagged_node) assert old_view_no == checkViewNoForNodes([lagged_node]) # emulate finishing of catchup by setting Participating status lagged_node.mode = Mode.participating # make sure that View Change happened on lagging node waitForViewChange(looper, [lagged_node], expectedViewNo=old_view_no + 1, customTimeout=10) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
def sdk_change_bls_key(looper, txnPoolNodeSet, node, sdk_pool_handle, sdk_wallet_steward, add_wrong=False, new_bls=None): new_blspk = init_bls_keys(node.keys_dir, node.name) key_in_txn = new_bls or new_blspk \ if not add_wrong \ else base58.b58encode(randomString(128).encode()) node_dest = hexToFriendly(node.nodestack.verhex) sdk_send_update_node(looper, sdk_wallet_steward, sdk_pool_handle, node_dest, node.name, None, None, None, None, bls_key=key_in_txn, services=None) poolSetExceptOne = list(txnPoolNodeSet) poolSetExceptOne.remove(node) waitNodeDataEquality(looper, node, *poolSetExceptOne) sdk_pool_refresh(looper, sdk_pool_handle) sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_steward, alias=randomString(5)) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) return new_blspk
def test_view_change_on_empty_ledger(nodeSet, up, looper): """ Check that view change is done when no txns in the ldegr """ ensure_view_change(looper, nodeSet) ensureElectionsDone(looper=looper, nodes=nodeSet) ensure_all_nodes_have_same_data(looper, nodes=nodeSet)
def test_multiple_view_change_retries_by_timeouts( txnPoolNodeSet, looper, tconf, setup, sdk_pool_handle, sdk_wallet_client): """ Verifies that a view change is restarted each time when the previous one is timed out """ _, initial_view_no, timeout_callback_stats = setup stashers = [n.nodeIbStasher for n in txnPoolNodeSet] with delay_rules(stashers, vcd_delay()): start_view_change(txnPoolNodeSet, initial_view_no + 1) # Wait until timeout callback is called 3 times looper.run(eventually(check_watchdog_called_expected_times, txnPoolNodeSet, timeout_callback_stats, 3, retryWait=1, timeout=3 * VIEW_CHANGE_TIMEOUT + 2)) # View changes should fail with pytest.raises(AssertionError): ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, customTimeout=1) # This view change must be completed with no problems ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) # 4 view changes must have been initiated (initial one + 3 retries) for node in txnPoolNodeSet: assert node.viewNo - initial_view_no == 4 sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_different_ledger_request_interleave(tconf, looper, txnPoolNodeSet, client1, wallet1, one_node_added, client1Connected, tdir, client_tdir, tdirWithPoolTxns, steward1, stewardWallet, allPluginsPath): """ Send pool and domain ledger requests such that they interleave, and do view change in between and verify the pool is functional """ new_node = one_node_added sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 2) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # Send domain ledger requests but don't wait for replies requests = sendRandomRequests(wallet1, client1, 2) # Add another node by sending pool ledger request _, _, new_theta = nodeThetaAdded(looper, txnPoolNodeSet, tdir, client_tdir, tconf, steward1, stewardWallet, allPluginsPath, name='new_theta') # Send more domain ledger requests but don't wait for replies requests.extend(sendRandomRequests(wallet1, client1, 3)) # Do view change without waiting for replies ensure_view_change(looper, nodes=txnPoolNodeSet) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) # Make sure all requests are completed waitForSufficientRepliesForRequests(looper, client1, requests=requests) ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1) new_steward, new_steward_wallet = addNewSteward(looper, client_tdir, steward1, stewardWallet, 'another_ste') # Send another pool ledger request (NODE) but don't wait for completion of # request next_node_name = 'next_node' r = sendAddNewNode(tdir, tconf, next_node_name, new_steward, new_steward_wallet) node_req = r[0] # Send more domain ledger requests but don't wait for replies requests = [ node_req, *sendRandomRequests(new_steward_wallet, new_steward, 5) ] # Make sure all requests are completed waitForSufficientRepliesForRequests(looper, new_steward, requests=requests) # Make sure pool is functional ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1)
def test_state_regenerated_from_ledger(looper, nodeSet, tconf, tdir, sdk_pool_handle, sdk_wallet_trustee, allPluginsPath): """ Node loses its state database but recreates it from ledger after start. Checking ATTRIB txns too since they store some data off ledger too """ endorsers = [] for i in range(5): endorsers.append(sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_trustee, 'TA' + str(i), ENDORSER_STRING)) sdk_add_raw_attribute(looper, sdk_pool_handle, endorsers[-1], randomString(6), randomString(10)) for wh in endorsers: for i in range(3): sdk_add_new_nym(looper, sdk_pool_handle, wh, 'NP1' + str(i)) ensure_all_nodes_have_same_data(looper, nodeSet) node_to_stop = nodeSet[-1] node_state = node_to_stop.states[DOMAIN_LEDGER_ID] assert not node_state.isEmpty state_db_path = node_state._kv.db_path node_to_stop.cleanupOnStopping = False node_to_stop.stop() looper.removeProdable(node_to_stop) ensure_node_disconnected(looper, node_to_stop, nodeSet[:-1]) shutil.rmtree(state_db_path) config_helper = NodeConfigHelper(node_to_stop.name, tconf, chroot=tdir) restarted_node = TestNode( node_to_stop.name, config_helper=config_helper, config=tconf, pluginPaths=allPluginsPath, ha=node_to_stop.nodestack.ha, cliha=node_to_stop.clientstack.ha) looper.add(restarted_node) nodeSet[-1] = restarted_node looper.run(checkNodesConnected(nodeSet)) # Need some time as `last_ordered_3PC` is compared too and that is # communicated through catchup waitNodeDataEquality(looper, restarted_node, *nodeSet[:-1]) # Pool is still functional for wh in endorsers: sdk_add_new_nym(looper, sdk_pool_handle, wh, 'NP--' + randomString(5)) ensure_all_nodes_have_same_data(looper, nodeSet)
def test_view_change_retry_by_timeout( txnPoolNodeSet, looper, tconf, setup, sdk_pool_handle, sdk_wallet_client): """ Verifies that a view change is restarted if it is not completed in time """ m_primary_node, initial_view_no, timeout_callback_stats = setup stashers = [n.nodeIbStasher for n in txnPoolNodeSet] with delay_rules(stashers, vcd_delay()): start_view_change(txnPoolNodeSet, initial_view_no + 1) # First view change should fail, because of delayed ViewChangeDone # messages. This then leads to new view change that we need. with pytest.raises(AssertionError): ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, customTimeout=1.5 * VIEW_CHANGE_TIMEOUT) # Now as ViewChangeDone messages are unblocked view changes should finish successfully ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) new_m_primary_node = get_master_primary_node(list(txnPoolNodeSet)) assert m_primary_node.name != new_m_primary_node.name # The timeout method was called one time check_watchdog_called_expected_times(txnPoolNodeSet, timeout_callback_stats, 1) # 2 view changes have been initiated for node in txnPoolNodeSet: assert node.viewNo - initial_view_no == 2 sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_state_regenerated_from_ledger(looper, tdirWithPoolTxns, tdirWithDomainTxnsUpdated, nodeSet, tconf, trustee, trusteeWallet, allPluginsPath): """ Node loses its state database but recreates it from ledger after start. Checking ATTRIB txns too since they store some data off ledger too """ trust_anchors = [] for i in range(5): trust_anchors.append(getClientAddedWithRole(nodeSet, tdirWithPoolTxns, looper, trustee, trusteeWallet, 'TA' + str(i), role=TRUST_ANCHOR)) addRawAttribute(looper, *trust_anchors[-1], randomString(6), randomString(10), dest=trust_anchors[-1][1].defaultId) for tc, tw in trust_anchors: for i in range(3): getClientAddedWithRole(nodeSet, tdirWithPoolTxns, looper, tc, tw, 'NP1' + str(i)) ensure_all_nodes_have_same_data(looper, nodeSet) node_to_stop = nodeSet[-1] node_state = node_to_stop.states[DOMAIN_LEDGER_ID] assert not node_state.isEmpty state_db_path = node_state._kv._dbPath node_to_stop.cleanupOnStopping = False node_to_stop.stop() looper.removeProdable(node_to_stop) ensure_node_disconnected(looper, node_to_stop.name, nodeSet[:-1]) shutil.rmtree(state_db_path) restarted_node = TestNode(node_to_stop.name, basedirpath=tdirWithPoolTxns, config=tconf, pluginPaths=allPluginsPath, ha=node_to_stop.nodestack.ha, cliha=node_to_stop.clientstack.ha) looper.add(restarted_node) nodeSet[-1] = restarted_node looper.run(checkNodesConnected(nodeSet)) # Need some time as `last_ordered_3PC` is compared too and that is # communicated through catchup waitNodeDataEquality(looper, restarted_node, *nodeSet[:-1]) # Pool is still functional for tc, tw in trust_anchors: getClientAddedWithRole(nodeSet, tdirWithPoolTxns, looper, tc, tw, 'NP--{}'.format(tc.name)) ensure_all_nodes_have_same_data(looper, nodeSet)
def test_old_instance_change_discarding(txnPoolNodeSet, looper, tconf): view_no = txnPoolNodeSet[0].viewNo first_nodes = txnPoolNodeSet[:2] second_nodes = txnPoolNodeSet[2:] for node in first_nodes: node.view_changer.on_master_degradation() def chk_ic_discard(): for n in txnPoolNodeSet: assert not n.view_changer.instanceChanges.has_view(view_no + 1) for frm in first_nodes: assert not n.view_changer.instanceChanges.has_inst_chng_from(view_no + 1, frm.name) looper.run(eventually(chk_ic_discard, timeout=tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL + 10)) for node in second_nodes: node.view_changer.on_master_degradation() ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) for node in txnPoolNodeSet: assert node.viewNo == view_no
def view_change_in_between_3pc(looper, nodes, slow_nodes, sdk_pool_handle, sdk_wallet_client, slow_delay=1, wait=None): sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 4) delay_3pc_messages(slow_nodes, 0, delay=slow_delay) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 10) if wait: looper.runFor(wait) ensure_view_change(looper, nodes) looper.run(eventually(check_not_in_view_change, nodes)) reset_delays_and_process_delayeds(slow_nodes) ensureElectionsDone(looper=looper, nodes=nodes) ensure_all_nodes_have_same_data(looper, nodes) sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 5, total_timeout=30) sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 5, total_timeout=30)
def test_last_ordered_3pc_reset_if_more_than_new_view(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ Check that if last_ordered_3pc's viewNo on a Replica is greater than the new viewNo after view change, then last_ordered_3pc is reset to (0,0). It can be that last_ordered_3pc was set for the previous view, since it's set during catch-up Example: a Node has last_ordered = (1, 300), and then the whole pool except this node restarted. The new viewNo is 0, but last_ordered is (1, 300), so all new requests will be discarded by this Node if we don't reset last_ordered_3pc """ old_view_no = checkViewNoForNodes(txnPoolNodeSet) for node in txnPoolNodeSet: node.master_replica.last_ordered_3pc = (old_view_no + 2, 100) ensure_view_change_complete(looper, txnPoolNodeSet, customTimeout=60) view_no = checkViewNoForNodes(txnPoolNodeSet) for node in txnPoolNodeSet: assert (view_no, 0) == node.master_replica.last_ordered_3pc # Make sure the pool is working sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def sdk_change_bls_key(looper, txnPoolNodeSet, node, sdk_pool_handle, sdk_wallet_steward, add_wrong=False, new_bls=None): new_blspk = init_bls_keys(node.keys_dir, node.name) key_in_txn = new_bls or new_blspk \ if not add_wrong \ else base58.b58encode(randomString(128).encode()) node_dest = hexToFriendly(node.nodestack.verhex) sdk_send_update_node(looper, sdk_wallet_steward, sdk_pool_handle, node_dest, node.name, None, None, None, None, bls_key=key_in_txn, services=None) poolSetExceptOne = list(txnPoolNodeSet) poolSetExceptOne.remove(node) waitNodeDataEquality(looper, node, *poolSetExceptOne) sdk_pool_refresh(looper, sdk_pool_handle) sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_steward, alias=randomString(5)) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) return new_blspk
def all_nodes_view_change(looper, txnPoolNodeSet, stewardWallet, steward1, client1, wallet1, client1Connected): for _ in range(5): send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_send_valid_txn_athr_agrmt_succeeds(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_trustee): req = looper.loop.run_until_complete( prepare_txn_author_agreement(sdk_wallet_trustee[1])) rep = sdk_sign_and_send_prepared_request(looper, sdk_wallet_trustee, sdk_pool_handle, req) sdk_get_and_check_replies(looper, [rep]) req = json.loads(req) version = req[OPERATION][TXN_AUTHOR_AGREEMENT_VERSION] text = req[OPERATION][TXN_AUTHOR_AGREEMENT_TEXT] digest = ConfigReqHandler._taa_digest(version, text) # TODO: Replace this with get transaction ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) for node in txnPoolNodeSet: config_req_handler = get_config_req_handler(node) assert config_req_handler.get_taa_digest() == digest.encode() assert config_req_handler.get_taa_digest(version) == digest.encode() taa = config_req_handler.state.get( ConfigReqHandler._state_path_taa_digest(digest)) assert taa is not None taa = json.loads(taa.decode()) assert taa[TXN_AUTHOR_AGREEMENT_VERSION] == version assert taa[TXN_AUTHOR_AGREEMENT_TEXT] == text
def ensure_view_change_complete_by_primary_restart( looper, nodes, tconf, tdirWithPoolTxns, allPluginsPath): nodes = ensure_view_change_by_primary_restart( looper, nodes, tconf, tdirWithPoolTxns, allPluginsPath) ensureElectionsDone(looper=looper, nodes=nodes) ensure_all_nodes_have_same_data(looper, nodes) return nodes
def testViewChangesIfMasterPrimaryDisconnected(txnPoolNodeSet, looper, wallet1, client1, client1Connected, tconf): """ View change occurs when master's primary is disconnected """ # Setup nodes = txnPoolNodeSet viewNoBefore = checkViewNoForNodes(nodes) old_pr_node = get_master_primary_node(nodes) # Stop primary stopNodes([old_pr_node], looper) looper.removeProdable(old_pr_node) remainingNodes = list(set(nodes) - {old_pr_node}) # Sometimes it takes time for nodes to detect disconnection ensure_node_disconnected(looper, old_pr_node, remainingNodes, timeout=20) looper.runFor(tconf.ToleratePrimaryDisconnection + 2) # Give some time to detect disconnection and then verify that view has # changed and new primary has been elected waitForViewChange(looper, remainingNodes, viewNoBefore + 1) ensure_all_nodes_have_same_data(looper, nodes=remainingNodes) new_pr_node = get_master_primary_node(remainingNodes) assert old_pr_node != new_pr_node sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5)
def test_delay_commits_for_one_node(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, slow_node_is_next_primary, vc_counts): current_view_no = checkViewNoForNodes(txnPoolNodeSet) excepted_view_no = current_view_no + 1 if vc_counts == 'once' else current_view_no + 2 next_primary = get_next_primary_name(txnPoolNodeSet, excepted_view_no) pretenders = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet) if not r.isPrimary] if slow_node_is_next_primary: delayed_node = [n for n in pretenders if n.name == next_primary][0] else: delayed_node = [n for n in pretenders if n.name != next_primary][0] with delay_rules_without_processing(delayed_node.nodeIbStasher, cDelay()): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2) trigger_view_change(txnPoolNodeSet) if vc_counts == 'twice': for node in txnPoolNodeSet: node.view_changer.start_view_change(current_view_no + 2) ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_multiple_view_change_retries_by_timeouts( txnPoolNodeSet, looper, tconf, setup, sdk_pool_handle, sdk_wallet_client): """ Verifies that a view change is restarted each time when the previous one is timed out """ _, initial_view_no, timeout_callback_stats = setup stashers = [n.nodeIbStasher for n in txnPoolNodeSet] with delay_rules(stashers, vcd_delay()): start_view_change(txnPoolNodeSet, initial_view_no + 1) # Wait until timeout callback is called 3 times looper.run(eventually(check_watchdog_called_expected_times, txnPoolNodeSet, timeout_callback_stats, 3, retryWait=1, timeout=3 * VIEW_CHANGE_TIMEOUT + 2)) # View changes should fail with pytest.raises(AssertionError): ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, customTimeout=1) # This view change must be completed with no problems ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) # 4 view changes must have been initiated (initial one + 3 retries) for node in txnPoolNodeSet: assert node.viewNo - initial_view_no == 4 sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_view_change_after_some_txns(txnPoolNodesLooper, txnPoolNodeSet, some_txns_done, testNodeClass, viewNo, # noqa sdk_pool_handle, sdk_wallet_client, node_config_helper_class, tconf, tdir, allPluginsPath, tmpdir_factory): """ Check that view change is done after processing some of txns """ ensure_view_change(txnPoolNodesLooper, txnPoolNodeSet) ensureElectionsDone(looper=txnPoolNodesLooper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(txnPoolNodesLooper, nodes=txnPoolNodeSet) sdk_send_random_and_check(txnPoolNodesLooper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10) ensure_all_nodes_have_same_data(txnPoolNodesLooper, txnPoolNodeSet) for node in txnPoolNodeSet: txnPoolNodesLooper.removeProdable(node) node.stop() config = getConfigOnce() reload_modules_for_replay(tconf) replayable_node_class, basedirpath = get_replayable_node_class( tmpdir_factory, tdir, testNodeClass, config) print('-------------Replaying now---------------------') for node in txnPoolNodeSet: create_replayable_node_and_check(txnPoolNodesLooper, txnPoolNodeSet, node, replayable_node_class, node_config_helper_class, tconf, basedirpath, allPluginsPath)
def testPostingThroughput(postingStatsEnabled, decreasedMonitoringTimeouts, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): config = decreasedMonitoringTimeouts reqCount = 10 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqCount) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) looper.runFor(WIND_SIZE * MIN_CNT) for node in txnPoolNodeSet: assert node.monitor.highResThroughput > 0 assert node.monitor.totalRequests == reqCount # TODO: Add implementation to actually call firebase plugin # and test if firebase plugin is sending total request count # if node is primary looper.runFor(config.DashboardUpdateFreq) for node in txnPoolNodeSet: node.monitor.spylog.count(Monitor.sendThroughput.__name__) > 0
def test_state_regenerated_from_ledger(looper, nodeSet, tconf, tdir, sdk_pool_handle, sdk_wallet_trustee, allPluginsPath): """ Node loses its state database but recreates it from ledger after start. Checking ATTRIB txns too since they store some data off ledger too """ trust_anchors = [] for i in range(5): trust_anchors.append(sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_trustee, 'TA' + str(i), TRUST_ANCHOR_STRING)) sdk_add_raw_attribute(looper, sdk_pool_handle, trust_anchors[-1], randomString(6), randomString(10)) for wh in trust_anchors: for i in range(3): sdk_add_new_nym(looper, sdk_pool_handle, wh, 'NP1' + str(i)) ensure_all_nodes_have_same_data(looper, nodeSet) node_to_stop = nodeSet[-1] node_state = node_to_stop.states[DOMAIN_LEDGER_ID] assert not node_state.isEmpty state_db_path = node_state._kv.db_path node_to_stop.cleanupOnStopping = False node_to_stop.stop() looper.removeProdable(node_to_stop) ensure_node_disconnected(looper, node_to_stop, nodeSet[:-1]) shutil.rmtree(state_db_path) config_helper = NodeConfigHelper(node_to_stop.name, tconf, chroot=tdir) restarted_node = TestNode( node_to_stop.name, config_helper=config_helper, config=tconf, pluginPaths=allPluginsPath, ha=node_to_stop.nodestack.ha, cliha=node_to_stop.clientstack.ha) looper.add(restarted_node) nodeSet[-1] = restarted_node looper.run(checkNodesConnected(nodeSet)) # Need some time as `last_ordered_3PC` is compared too and that is # communicated through catchup waitNodeDataEquality(looper, restarted_node, *nodeSet[:-1]) # Pool is still functional for wh in trust_anchors: sdk_add_new_nym(looper, sdk_pool_handle, wh, 'NP--' + randomString(5)) ensure_all_nodes_have_same_data(looper, nodeSet)
def do_view_change_with_delayed_commits_on_all_but_one( nodes, nodes_without_one_stashers, except_node, looper, sdk_pool_handle, sdk_wallet_client): new_view_no = except_node.viewNo + 1 old_last_ordered = except_node.master_replica.last_ordered_3pc # delay commits for all nodes except node X with delay_rules(nodes_without_one_stashers, cDelay(sys.maxsize)): # send one request requests2 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) def last_ordered(node: Node, last_ordered): assert node.master_replica.last_ordered_3pc == last_ordered # wait until except_node ordered txn looper.run( eventually(last_ordered, except_node, (except_node.viewNo, old_last_ordered[1] + 1))) # trigger view change on all nodes for node in nodes: node.view_changer.on_master_degradation() # wait for view change done on all nodes looper.run(eventually(view_change_done, nodes, new_view_no)) sdk_get_replies(looper, requests2) ensure_all_nodes_have_same_data(looper, nodes) sdk_ensure_pool_functional(looper, nodes, sdk_wallet_client, sdk_pool_handle)
def test_pp_obsolescence_check_fail_for_delayed(tdir, tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): delay = PATCHED_ACCEPTABLE_DEVIATION_PREPREPARE_SECS + 1 lagging_node = txnPoolNodeSet[-1] # Prevent lagging node from ordering with delay_rules(lagging_node.nodeIbStasher, ppDelay(), pDelay(), cDelay()): # Order request on all nodes except lagging one sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) looper.run(asyncio.sleep(delay)) # Now delayed 3PC messages reach lagging node, so any delayed transactions # can be processed (PrePrepare would be discarded but requested after that), # ensure that all nodes will have same data after that ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) pp_count = get_count(lagging_node.master_replica, lagging_node.master_replica.processPrePrepare) assert pp_count > 0 assert get_timestamp_suspicion_count(lagging_node) == pp_count
def test_resend_instance_change_messages(looper, txnPoolNodeSet, tconf, sdk_wallet_steward, sdk_pool_handle): primary_node = txnPoolNodeSet[0] old_view_no = checkViewNoForNodes(txnPoolNodeSet, 0) assert primary_node.master_replica.isPrimary for n in txnPoolNodeSet: n.nodeIbStasher.delay(icDelay(3 * tconf.INSTANCE_CHANGE_TIMEOUT)) assert set([n.view_changer.instance_change_rounds for n in txnPoolNodeSet]) == {0} disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, primary_node, stopNode=False) txnPoolNodeSet.remove(primary_node) looper.run(eventually(partial(check_count_connected_node, txnPoolNodeSet, 4), timeout=5, acceptableExceptions=[AssertionError])) looper.runFor(2*tconf.INSTANCE_CHANGE_TIMEOUT) assert set([n.view_changer.instance_change_rounds for n in txnPoolNodeSet]) == {1} looper.runFor(tconf.INSTANCE_CHANGE_TIMEOUT) looper.run(eventually(partial(checkViewNoForNodes, txnPoolNodeSet, expectedViewNo=old_view_no + 1), timeout=tconf.VIEW_CHANGE_TIMEOUT)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_old_instance_change_discarding(txnPoolNodeSet, looper, tconf): view_no = txnPoolNodeSet[0].viewNo first_nodes = txnPoolNodeSet[:2] second_nodes = txnPoolNodeSet[2:] for node in first_nodes: node.view_changer.on_master_degradation() def chk_ic_discard(): for n in txnPoolNodeSet: instance_changes = n.master_replica._view_change_trigger_service._instance_changes assert not instance_changes.has_view(view_no + 1) for frm in first_nodes: assert not instance_changes.has_inst_chng_from( view_no + 1, frm.name) looper.run( eventually(chk_ic_discard, timeout=tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL + 10)) for node in second_nodes: node.view_changer.on_master_degradation() ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) for node in txnPoolNodeSet: assert node.viewNo == view_no
def test_backup_can_order_after_catchup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): # We expect that after VC Gamma will be primary on backup delayed_node = txnPoolNodeSet[-2] with delay_rules_without_processing(delayed_node.nodeIbStasher, pDelay(instId=MASTER_REPLICA_INDEX), cDelay(instId=MASTER_REPLICA_INDEX), ppDelay(instId=MASTER_REPLICA_INDEX)): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQUEST_COUNT) with delay_rules_without_processing( [n.nodeIbStasher for n in txnPoolNodeSet], old_view_pp_request_delay()): ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) assert delayed_node.replicas._replicas[BACKUP_INST_ID].isPrimary # Check, that backup cannot order sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQUEST_COUNT) for n in txnPoolNodeSet: assert n.replicas._replicas[BACKUP_INST_ID].last_ordered_3pc[ 1] == 0 # Forcing catchup delayed_node.start_catchup() ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # Check, that backup can order after catchup b_pp_seq_no_before = delayed_node.replicas._replicas[ BACKUP_INST_ID].last_ordered_3pc[1] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQUEST_COUNT) assert delayed_node.replicas._replicas[BACKUP_INST_ID].last_ordered_3pc[1] == \ b_pp_seq_no_before + REQUEST_COUNT
def test_lag_less_then_catchup(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): delayed_node = txnPoolNodeSet[-1] other_nodes = list(set(txnPoolNodeSet) - {delayed_node}) current_view_no = checkViewNoForNodes(txnPoolNodeSet) last_ordered_before = delayed_node.master_replica.last_ordered_3pc with delay_rules_without_processing(delayed_node.nodeIbStasher, cDelay()): # Send txns for stable checkpoint sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ) # Check, that all of not slowed nodes has a stable checkpoint for n in other_nodes: assert n.master_replica._consensus_data.stable_checkpoint == CHK_FREQ # Send another txn. This txn will be reordered after view_change sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) trigger_view_change(txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) assert delayed_node.master_replica.last_ordered_3pc == last_ordered_before # Send txns for stabilize checkpoint on other nodes sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ - 1) pool_pp_seq_no = get_pp_seq_no(other_nodes) looper.run(eventually(lambda: assertExp(delayed_node.master_replica.last_ordered_3pc[1] == pool_pp_seq_no))) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_new_primary_lagging_behind(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, tconf): initial_view_no = checkViewNoForNodes(txnPoolNodeSet) next_primary_name = get_next_primary_name(txnPoolNodeSet, initial_view_no + 1) next_primary = [n for n in txnPoolNodeSet if n.name == next_primary_name][0] other_nodes = [n for n in txnPoolNodeSet if n != next_primary] expected_primary_name = get_next_primary_name(txnPoolNodeSet, initial_view_no + 2) # Next primary cannot stabilize 1 checkpoint with delay_rules(next_primary.nodeIbStasher, cDelay(), pDelay()): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ) ensure_view_change(looper, txnPoolNodeSet) looper.run( eventually(check_not_in_view_change, txnPoolNodeSet, timeout=2 * tconf.NEW_VIEW_TIMEOUT)) ensureElectionsDone(looper=looper, nodes=other_nodes, customTimeout=2 * tconf.NEW_VIEW_TIMEOUT, instances_list=[0, 1]) assert next_primary_name != expected_primary_name assert checkViewNoForNodes(txnPoolNodeSet) == initial_view_no + 2 # send CHK_FREQ reqs so that slow node will start catch-up sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=30)
def test_demote_backup_primary(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards, tdir, tconf, allPluginsPath): assert len(txnPoolNodeSet) == 6 node_to_restart = txnPoolNodeSet[-1] node_to_demote = steward_for_demote_node = demote_node_index = None steward_for_demote_node = None for i, n in enumerate(txnPoolNodeSet): if n.name == txnPoolNodeSet[0].primaries[1]: node_to_demote = n steward_for_demote_node = sdk_wallet_stewards[i] demote_node_index = i break assert node_to_demote demote_node(looper, steward_for_demote_node, sdk_pool_handle, node_to_demote) del txnPoolNodeSet[demote_node_index] disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_restart) looper.removeProdable(name=node_to_restart.name) node_to_restart = start_stopped_node(node_to_restart, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_restart ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[0], 1) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_no_propagated_future_view_change_until_synced(txnPoolNodeSet, looper, mode): # the last node is a lagging one, which will receive ViewChangeDone messages for future view viewNo = checkViewNoForNodes(txnPoolNodeSet) lagged_node_index = (viewNo + 3) % len(txnPoolNodeSet) lagged_node = txnPoolNodeSet[lagged_node_index] other_nodes = list(set(txnPoolNodeSet) - {lagged_node}) # emulate catchup by setting non-synced status lagged_node.mode = mode old_view_no = checkViewNoForNodes([lagged_node]) check_future_vcd_count(lagged_node, 0) # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change with delay_rules(lagged_node.nodeIbStasher, icDelay()): # make sure that View Change happened on all nodes but the lagging one ensure_view_change(looper, other_nodes) checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, numInstances=2) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) check_no_view_change(looper, lagged_node) assert old_view_no == checkViewNoForNodes([lagged_node]) # emulate finishing of catchup by setting Participating status lagged_node.mode = Mode.participating # make sure that View Change happened on lagging node waitForViewChange(looper, [lagged_node], expectedViewNo=old_view_no + 1, customTimeout=10) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
def test_revert_xfer_with_fees_before_catchup(looper, helpers, nodeSetWithIntegratedTokenPlugin, sdk_pool_handle, fees, xfer_mint_tokens, xfer_addresses): nodes = nodeSetWithIntegratedTokenPlugin node_stashers = [n.nodeIbStasher for n in nodes] helpers.general.do_set_fees(fees) [address_giver, address_receiver] = xfer_addresses inputs = helpers.general.get_utxo_addresses([address_giver])[0] outputs = [{ ADDRESS: address_receiver, AMOUNT: 1000 - fees[XFER_PUBLIC_FEES_ALIAS] }] request = helpers.request.transfer(inputs, outputs) with delay_rules_without_processing(node_stashers, cDelay(), pDelay()): helpers.sdk.send_request_objects([request]) looper.runFor(waits.expectedPrePrepareTime(len(nodes))) for n in nodes: n.start_catchup() for n in nodes: looper.run( eventually(lambda: assertExp(n.mode == Mode.participating))) for n in nodes: looper.run( eventually(check_state, n, True, retryWait=0.2, timeout=15)) ensure_all_nodes_have_same_data(looper, nodes)
def test_revert_works_for_fees_before_catch_up_on_all_nodes( looper, helpers, nodeSetWithIntegratedTokenPlugin, sdk_pool_handle, sdk_wallet_trustee, fees_set, address_main, mint_tokens): node_set = [n.nodeIbStasher for n in nodeSetWithIntegratedTokenPlugin] with delay_rules(node_set, cDelay()): request = helpers.request.nym() request = add_fees_request_with_address(helpers, fees_set, request, address_main) for n in nodeSetWithIntegratedTokenPlugin: looper.run( eventually(check_state, n, True, retryWait=0.2, timeout=15)) sdk_send_signed_requests(sdk_pool_handle, [json.dumps(request.as_dict)]) for n in nodeSetWithIntegratedTokenPlugin: looper.run( eventually(check_state, n, False, retryWait=0.2, timeout=15)) for n in nodeSetWithIntegratedTokenPlugin: n.start_catchup() for n in nodeSetWithIntegratedTokenPlugin: looper.run( eventually(lambda: assertExp(n.mode == Mode.participating))) for n in nodeSetWithIntegratedTokenPlugin: looper.run( eventually(check_state, n, True, retryWait=0.2, timeout=15)) ensure_all_nodes_have_same_data(looper, nodeSetWithIntegratedTokenPlugin)
def ensure_view_change_complete(looper, nodes, exclude_from_check=None, customTimeout=None): ensure_view_change(looper, nodes) ensureElectionsDone(looper=looper, nodes=nodes, customTimeout=customTimeout) ensure_all_nodes_have_same_data(looper, nodes, customTimeout, exclude_from_check=exclude_from_check)
def test_delayed_instance_changes_after_vcd_for_next_view(looper, txnPoolNodeSet): ''' A node is doing view change to view=1, while the other nodes already finished view change to view=2. The node receives a quorum of VCD messages for view=2 before a quorum of InstanceChange messages for view=2. Nevertheless, the node should not start a view change to view=2 without a quorum of InstanceChanges, that is it should not go to propagate primary mode since it's already in view chanage state. The node should eventually finish view change to view=2 once receives all VCD and IS msgs for view=2 ''' nodes = txnPoolNodeSet slow_node = nodes[-1] fast_nodes = [n for n in nodes if n != slow_node] slow_stasher = slow_node.nodeIbStasher # 1. DO FIRST VIEW CHANGE # delay VCD for the first ViewChange with delay_rules(slow_stasher, vcd_delay()): # Trigger view change for n in nodes: n.view_changer.on_master_degradation() waitForViewChange(looper, nodes, expectedViewNo=1) # make sure view change is finished on all nodes except the slow one ensureElectionsDone(looper, fast_nodes, instances_list=range(3)) # drop all VCD to view=1 slow_stasher.drop_delayeds() # 2. DO SECOND VIEW CHANGE # delay Instance Changes and # so that the slow node receives VCD for view=2 before # a quorum of InstanceChanges for that view while still doing view change to view=1 with delay_rules(slow_stasher, icDelay()): # Trigger view change for n in nodes: n.view_changer.on_master_degradation() waitForViewChange(looper, fast_nodes, expectedViewNo=2) # make sure view change is finished on all nodes except the slow one ensureElectionsDone(looper, fast_nodes, instances_list=range(3)) # slow node is still on view=1 assert slow_node.viewNo == 1 assert slow_node.view_change_in_progress # make sure that the slow node receives VCD msgs for view=2 # and didn't receive IS msgs for view=2 check_vcd_msgs(slow_node, expected_view_no=2, expected_count=len(fast_nodes), ) check_no_ic_msgs(slow_node, expected_view_no=2) # 3. RESET DELAYS AND CHECK waitForViewChange(looper, nodes, expectedViewNo=2) ensureElectionsDone(looper, nodes) assert not slow_node.view_change_in_progress ensure_all_nodes_have_same_data(looper, nodes=nodes)
def testOrderingCase2(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Scenario -> A client sends requests, some nodes delay COMMITs to few specific nodes such some nodes achieve commit quorum later for those requests compared to other nodes. But all nodes `ORDER` request in the same order of ppSeqNos https://www.pivotaltracker.com/n/projects/1889887/stories/133655009 """ pr, replicas = getPrimaryReplica(txnPoolNodeSet, instId=0), \ getNonPrimaryReplicas(txnPoolNodeSet, instId=0) assert len(replicas) == 6 rep0 = pr rep1 = replicas[0] rep2 = replicas[1] rep3 = replicas[2] rep4 = replicas[3] rep5 = replicas[4] rep6 = replicas[5] node0 = rep0.node node1 = rep1.node node2 = rep2.node node3 = rep3.node node4 = rep4.node node5 = rep5.node node6 = rep6.node ppSeqsToDelay = 5 commitDelay = 3 # delay each COMMIT by this number of seconds delayedPpSeqNos = set() requestCount = 10 def specificCommits(wrappedMsg): nonlocal node3, node4, node5 msg, sender = wrappedMsg if isinstance(msg, PrePrepare): if len(delayedPpSeqNos) < ppSeqsToDelay: delayedPpSeqNos.add(msg.ppSeqNo) logger.debug('ppSeqNo {} be delayed'.format(msg.ppSeqNo)) if isinstance(msg, Commit) and msg.instId == 0 and \ sender in (n.name for n in (node3, node4, node5)) and \ msg.ppSeqNo in delayedPpSeqNos: return commitDelay for node in (node1, node2): logger.debug('{} would be delaying commits'.format(node)) node.nodeIbStasher.delay(specificCommits) sdk_reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, requestCount) timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=timeout) sdk_get_and_check_replies(looper, sdk_reqs)
def test_node_notified_about_primary_election_result(txnPoolNodeSet, looper): old_counts = {node.name: get_count( node, node.primary_selected) for node in txnPoolNodeSet} ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) for node in txnPoolNodeSet: assert get_count(node, node.primary_selected) > old_counts[node.name]
def test_ledger_status_after_txn_ordered(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): # we expect last ordered 3PC is not None for Domain ledger only, as there is a txn added to Domain ledger sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) check_ledger_statuses(txnPoolNodeSet, pool_last_ordered_3pc=(None, None), domain_last_ordered_3pc=txnPoolNodeSet[0].master_last_ordered_3PC, config_last_ordered_3pc=(None, None))
def sdk_ensure_pool_functional(looper, nodes, sdk_wallet, sdk_pool, num_reqs=10, num_batches=2): sdk_send_batches_of_random_and_check(looper, nodes, sdk_pool, sdk_wallet, num_reqs, num_batches) ensure_all_nodes_have_same_data(looper, nodes)
def all_nodes_view_change( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): for _ in range(5): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_view_change_with_delayed_commits(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tconf): # Perform view change with Delta acting as fast node # With current view change implementation its state will become different from other nodes do_view_change_with_pending_request_and_one_fast_node(txnPoolNodeSet[3], txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_view_change_with_unaligned_prepare_certificates_on_half_nodes( txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tconf): """ Perform view change with half nodes reaching lower last prepared certificate than others. With current implementation of view change this can result with view change taking a lot of time. """ do_view_change_with_unaligned_prepare_certificates(txnPoolNodeSet[2:], txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_view_change_done_delayed(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ A node is slow so is behind other nodes, after view change, it catches up but it also gets view change message as delayed, a node should start participating only when caught up and ViewChangeCone quorum received. """ nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)] slow_node = nprs[-1] other_nodes = [n for n in txnPoolNodeSet if n != slow_node] delay_3pc = 10 delay_vcd = 25 delay_3pc_messages([slow_node], 0, delay_3pc) slow_node.nodeIbStasher.delay(vcd_delay(delay_vcd)) def chk(node): assert node.view_changer.has_acceptable_view_change_quorum assert node.view_changer._primary_verified assert node.isParticipating assert None not in {r.isPrimary for r in node.replicas.values()} sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5 * 4, 4) ensure_view_change(looper, nodes=txnPoolNodeSet) # After view change, the slow node successfully completes catchup waitNodeDataEquality(looper, slow_node, *other_nodes) # Other nodes complete view change, select primary and participate for node in other_nodes: looper.run(eventually(chk, node, retryWait=1)) # Since `ViewChangeCone` is delayed, slow_node is not able to select primary # and participate assert not slow_node.view_changer.has_acceptable_view_change_quorum assert not slow_node.view_changer._primary_verified assert not slow_node.isParticipating assert {r.isPrimary for r in slow_node.replicas.values()} == {None} # Send requests to make sure pool is functional sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) # Repair network slow_node.reset_delays_and_process_delayeds() # `slow_node` selects primary and participate looper.run(eventually(chk, slow_node, retryWait=1)) # Processes requests received during lack of primary waitNodeDataEquality(looper, slow_node, *other_nodes) # Send more requests and compare data of all nodes sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_view_change_with_delay_on_one_node( txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tconf): """ Perform view change on one slow node later than on the other nodes so that delayed Commits are processed by the slow node in the old view and by the other nodes in the new view. After that verify that all the nodes have the same ledgers and state. """ do_view_change_with_delay_on_one_node(txnPoolNodeSet[-1], txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_view_change_after_some_txns(looper, txnPoolNodeSet, viewNo, sdk_pool_handle, sdk_wallet_client): """ Check that view change is done after processing some of txns """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 3) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_send_more_after_view_change(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Check that we can send more requests after view change """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10)
def test_view_change_min_catchup_timeout(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tconf, viewNo): """ One of the conditions to finish catch-up during view change is to have MAX_CATCHUPS_DONE_DURING_VIEW_CHANGE rounds of catch-up without any new transactions caught up. But this should not finish very quickly. So, we should try to catch-up until MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE. In the test: - Before starting view change, mock `has_ordered_till_last_prepared_certificate` so that it always returns False. - This means that the only condition on how we can finish catch-up is by MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE timeout and having more than MAX_CATCHUPS_DONE_DURING_VIEW_CHANGE rounds of catch-up without new txns caught up. - Check that view change is not finished until MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE - Check that view change is eventually finished after MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE """ # 1. Send some txns sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) # 2. make the only condition to finish catch-up is # MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE patch_has_ordered_till_last_prepared_certificate(txnPoolNodeSet) # 3. start view change expected_view_no = viewNo + 1 for node in txnPoolNodeSet: node.view_changer.startViewChange(expected_view_no) # 4. check that it's not finished till # MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE no_view_chanage_timeout = tconf.MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE - 1 with pytest.raises(EventuallyTimeoutException): ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, customTimeout=no_view_chanage_timeout) # 5. make sure that view change is finished eventually # (it should be finished quite soon after we waited for MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, customTimeout=2) waitForViewChange(looper=looper, txnPoolNodeSet=txnPoolNodeSet, expectedViewNo=expected_view_no) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) # 6. ensure that the pool is still functional. sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_selection_f_plus_one_quorum(looper, txnPoolNodeSet, allPluginsPath, tdir, tconf, sdk_pool_handle, sdk_wallet_client): """ Check that quorum f + 1 is used for primary selection when initiated by CurrentState messages. Assumes that view change quorum is n - f. Assumes that primaries selection in round robin fashion. """ # Ensure that we have 4 nodes in total all_nodes = list(txnPoolNodeSet) assert 4 == len(all_nodes) alpha, beta, delta, gamma = all_nodes initial_view_no = alpha.viewNo # Make one node lagging by switching it off for some time lagging_node = gamma non_lagging_nodes = [alpha, beta, delta] disconnect_node_and_ensure_disconnected(looper, all_nodes, lagging_node, stopNode=True) looper.removeProdable(lagging_node) # Make nodes to perform view change ensure_view_change(looper, non_lagging_nodes) ensureElectionsDone(looper=looper, nodes=non_lagging_nodes, instances_list=range(2)) ensure_all_nodes_have_same_data(looper, nodes=non_lagging_nodes) # Stop two more of active nodes # (but not primary, which is Beta (because of round robin selection)) stopped_nodes = [alpha] # TODO: add one more here for stopped_node in stopped_nodes: disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, stopped_node, stopNode=True) looper.removeProdable(stopped_node) # Start lagging node back restarted_node = start_stopped_node( lagging_node, looper, tconf, tdir, allPluginsPath) active_nodes = [beta, delta, restarted_node] # Check that primary selected expected_view_no = initial_view_no + 1 ensureElectionsDone(looper=looper, nodes=active_nodes, instances_list=range(2), customTimeout=30) waitForViewChange(looper, active_nodes, expectedViewNo=expected_view_no) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)
def test_view_change_after_max_catchup_rounds(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ The node should do only a fixed rounds of catchup. For this delay Prepares and Commits for 2 non-primary nodes by a large amount which is equivalent to loss of Prepares and Commits. Make sure 2 nodes have a different last prepared certificate from other two. Then do a view change, make sure view change completes and the pool does not process the request that were prepared by only a subset of the nodes """ sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * 3, 3) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) ledger_summary = txnPoolNodeSet[0].ledger_summary slow_nodes = [r.node for r in getNonPrimaryReplicas( txnPoolNodeSet, 0)[-2:]] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] # Make node slow to process Prepares and Commits for node in slow_nodes: node.nodeIbStasher.delay(pDelay(120, 0)) node.nodeIbStasher.delay(cDelay(120, 0)) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 5) looper.runFor(3) ensure_view_change(looper, nodes=txnPoolNodeSet) def last_prepared(nodes): lst = [n.master_replica.last_prepared_certificate_in_view() for n in nodes] # All nodes have same last prepared assert check_if_all_equal_in_list(lst) return lst[0] last_prepared_slow = last_prepared(slow_nodes) last_prepared_fast = last_prepared(fast_nodes) # Check `slow_nodes` and `fast_nodes` set different last_prepared assert last_prepared_fast != last_prepared_slow # View change complete ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # The requests which were prepared by only a subset of the nodes were # not ordered assert txnPoolNodeSet[0].ledger_summary == ledger_summary for node in slow_nodes: node.nodeIbStasher.reset_delays_and_process_delayeds() # Make sure pool is functional sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10, 2) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) last_prepared(txnPoolNodeSet)
def tear(): # Repair any broken network for node in txnPoolNodeSet: node.reset_delays_and_process_delayeds() # Give a little time to process any delayed messages looper.runFor(3) # Check each node has same data ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # Check each node has ordered all requests (no catchup) assert check_if_all_equal_in_list([n.master_replica.ordered for n in txnPoolNodeSet]) # Check the network is functional since all nodes reply sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)
def test_checkpoint_across_views(sent_batches, chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Test checkpointing across views. This test checks that checkpointing and garbage collection works correctly no matter if view change happened before a checkpoint or after a checkpoint """ batch_size = chkFreqPatched.Max3PCBatchSize sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, batch_size * sent_batches, sent_batches) # Check that correct garbage collection happens non_gced_batch_count = (sent_batches - CHK_FREQ) if sent_batches >= CHK_FREQ else sent_batches looper.run(eventually(checkRequestCounts, txnPoolNodeSet, batch_size * non_gced_batch_count, non_gced_batch_count, retryWait=1)) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) # Check that after view change, proper clean up is done for node in txnPoolNodeSet: for r in node.replicas.values(): assert not r.checkpoints # No stashed checkpoint for previous view assert not [view_no for view_no in r.stashedRecvdCheckpoints if view_no < r.viewNo] assert r._h == 0 assert r._lastPrePrepareSeqNo == 0 assert r.h == 0 assert r.H == r._h + chkFreqPatched.LOG_SIZE checkRequestCounts(txnPoolNodeSet, 0, 0) # Even after view change, chekpointing works sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, batch_size * sent_batches, sent_batches) looper.run(eventually(checkRequestCounts, txnPoolNodeSet, batch_size * non_gced_batch_count, non_gced_batch_count, retryWait=1)) # Send more batches so one more checkpoint happens. This is done so that # when this test finishes, all requests are garbage collected and the # next run of this test (with next param) has the calculations correct more = CHK_FREQ - non_gced_batch_count sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, batch_size * more, more) looper.run(eventually(checkRequestCounts, txnPoolNodeSet, 0, 0, retryWait=1))