def sdk_change_bls_key(looper, txnPoolNodeSet, node, sdk_pool_handle, sdk_wallet_steward, add_wrong=False, new_bls=None, new_key_proof=None): if add_wrong: _, new_blspk, key_proof = create_default_bls_crypto_factory().generate_bls_keys() else: new_blspk, key_proof = init_bls_keys(node.keys_dir, node.name) key_in_txn = new_bls or new_blspk bls_key_proof = new_key_proof or key_proof node_dest = hexToFriendly(node.nodestack.verhex) sdk_send_update_node(looper, sdk_wallet_steward, sdk_pool_handle, node_dest, node.name, None, None, None, None, bls_key=key_in_txn, services=None, key_proof=bls_key_proof) poolSetExceptOne = list(txnPoolNodeSet) poolSetExceptOne.remove(node) waitNodeDataEquality(looper, node, *poolSetExceptOne) sdk_pool_refresh(looper, sdk_pool_handle) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) return new_blspk
def testAddInactiveNodeThenActivate(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle, tdir, tconf, allPluginsPath): new_steward_name = "testClientSteward" + randomString(3) new_node_name = "Kappa" # adding a new node without SERVICES field # it means the node is in the inactive state new_steward_wallet, new_node = \ sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, allPluginsPath, services=None) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_pool_refresh(looper, sdk_pool_handle) new_node = update_node_data_and_reconnect(looper, txnPoolNodeSet + [new_node], new_steward_wallet, sdk_pool_handle, new_node, None, None, None, None, tdir, tconf) txnPoolNodeSet.append(new_node) sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet, sdk_pool_handle)
def test_restart_majority_to_same_view(looper, txnPoolNodeSet, tconf, tdir, allPluginsPath, sdk_pool_handle, sdk_wallet_client): # Add transaction to ledger sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) majority = txnPoolNodeSet[:3] minority = txnPoolNodeSet[3:] # Restart majority group tm = tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) majority_before_restart = majority.copy() restart_nodes(looper, txnPoolNodeSet, majority, tconf, tdir, allPluginsPath, after_restart_timeout=tm, start_one_by_one=False, wait_for_elections=False) ensureElectionsDone(looper, majority, instances_list=range(2)) # Check that nodes in minority group are aware that they might have inconsistent 3PC state for node in minority: assert node.spylog.count(node.on_inconsistent_3pc_state) == 1 # Check that nodes in majority group didn't think they might have inconsistent 3PC state for node in majority_before_restart: assert node.spylog.count(node.on_inconsistent_3pc_state) == 0 # Check that nodes in majority group don't think they might have inconsistent 3PC state for node in majority: assert node.spylog.count(node.on_inconsistent_3pc_state) == 0 # Restart minority group restart_nodes(looper, txnPoolNodeSet, minority, tconf, tdir, allPluginsPath, after_restart_timeout=tm, start_one_by_one=False) # Check that all nodes are still functional sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_multiple_view_change_retries_by_timeouts( txnPoolNodeSet, looper, tconf, setup, sdk_pool_handle, sdk_wallet_client): """ Verifies that a view change is restarted each time when the previous one is timed out """ _, initial_view_no, timeout_callback_stats = setup stashers = [n.nodeIbStasher for n in txnPoolNodeSet] with delay_rules(stashers, vcd_delay()): start_view_change(txnPoolNodeSet, initial_view_no + 1) # Wait until timeout callback is called 3 times looper.run(eventually(check_watchdog_called_expected_times, txnPoolNodeSet, timeout_callback_stats, 3, retryWait=1, timeout=3 * VIEW_CHANGE_TIMEOUT + 2)) # View changes should fail with pytest.raises(AssertionError): ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, customTimeout=1) # This view change must be completed with no problems ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) # 4 view changes must have been initiated (initial one + 3 retries) for node in txnPoolNodeSet: assert node.viewNo - initial_view_no == 4 sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_view_change_retry_by_timeout( txnPoolNodeSet, looper, tconf, setup, sdk_pool_handle, sdk_wallet_client): """ Verifies that a view change is restarted if it is not completed in time """ m_primary_node, initial_view_no, timeout_callback_stats = setup stashers = [n.nodeIbStasher for n in txnPoolNodeSet] with delay_rules(stashers, vcd_delay()): start_view_change(txnPoolNodeSet, initial_view_no + 1) # First view change should fail, because of delayed ViewChangeDone # messages. This then leads to new view change that we need. with pytest.raises(AssertionError): ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, customTimeout=1.5 * VIEW_CHANGE_TIMEOUT) # Now as ViewChangeDone messages are unblocked view changes should finish successfully ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) new_m_primary_node = get_master_primary_node(list(txnPoolNodeSet)) assert m_primary_node.name != new_m_primary_node.name # The timeout method was called one time check_watchdog_called_expected_times(txnPoolNodeSet, timeout_callback_stats, 1) # 2 view changes have been initiated for node in txnPoolNodeSet: assert node.viewNo - initial_view_no == 2 sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def testNodeKeysChanged(looper, txnPoolNodeSet, tdir, tconf, sdk_node_theta_added, sdk_pool_handle, allPluginsPath=None): new_steward_wallet, new_node = sdk_node_theta_added new_node.stop() looper.removeProdable(name=new_node.name) nodeHa, nodeCHa = HA(*new_node.nodestack.ha), HA(*new_node.clientstack.ha) sigseed = randomString(32).encode() verkey = base58.b58encode(SimpleSigner(seed=sigseed).naclSigner.verraw).decode("utf-8") sdk_change_node_keys(looper, new_node, new_steward_wallet, sdk_pool_handle, verkey) config_helper = PNodeConfigHelper(new_node.name, tconf, chroot=tdir) initNodeKeysForBothStacks(new_node.name, config_helper.keys_dir, sigseed, override=True) logger.debug("{} starting with HAs {} {}".format(new_node, nodeHa, nodeCHa)) node = TestNode(new_node.name, config_helper=config_helper, config=tconf, ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) looper.add(node) # The last element of `txnPoolNodeSet` is the node Theta that was just # stopped txnPoolNodeSet[-1] = node looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node, *txnPoolNodeSet[:-1]) sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet, sdk_pool_handle)
def test_req_drop_on_propagate_phase_on_non_primary_and_then_ordered( tconf, setup, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): global initial_ledger_size A, B, C, D = txnPoolNodeSet # type: TestNode sent1 = sdk_json_to_request_object(setup[0][0]) lagged_node = C def check_propagates_and_3pc_delayed(): # Node should have received a request from the client assert len(recvdRequest(lagged_node)) == 1 # Node should not have received a PROPAGATE assert len(recvdPropagate(lagged_node)) == 0 # Node should have sent a PROPAGATE assert len(sentPropagate(lagged_node)) == 1 # Node should have not received PrePrepares for master instance assert len(recvdPrePrepareForInstId(lagged_node, 0)) == 0 # Node should have not received Prepares for master instance assert len(recvdPrepareForInstId(lagged_node, 0)) == 0 # Node should have not received Commits for master instance assert len(recvdCommitForInstId(lagged_node, 0)) == 0 # Node should have 1 request in requests queue assert len(lagged_node.requests) == 1 timeout = howlong - 2 looper.run(eventually(check_propagates_and_3pc_delayed, retryWait=.5, timeout=timeout)) def check_drop(): assert len(lagged_node.requests) == 0 timeout = tconf.PROPAGATES_PHASE_REQ_TIMEOUT + tconf.OUTDATED_REQS_CHECK_INTERVAL + 1 looper.run(eventually(check_drop, retryWait=.5, timeout=timeout)) for n in txnPoolNodeSet: n.nodeIbStasher.resetDelays() def check_propagates_received(): # Node should have received 3 PROPAGATEs assert len(recvdPropagate(lagged_node)) == 3 # Node should have total of 4 PROPAGATEs (3 from other nodes and 1 from # itself) key = sent1.digest assert key in lagged_node.requests assert len(lagged_node.requests[key].propagates) == 4 # Node should still have sent two PROPAGATEs since request # was dropped and re-received over propagate assert len(sentPropagate(lagged_node)) == 2 timeout = howlong + 2 looper.run(eventually(check_propagates_received, retryWait=.5, timeout=timeout)) def check_ledger_size(): # The request should be eventually ordered for n in txnPoolNodeSet: assert n.domainLedger.size - initial_ledger_size == 1 looper.run(eventually(check_ledger_size, retryWait=.5, timeout=timeout)) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def testChangeHaPersistsPostNodesRestart(looper, txnPoolNodeSet, tdir, tconf, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward): new_steward_wallet, new_node = \ sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, 'AnotherSteward' + randomString(4), 'AnotherNode' + randomString(4), tdir, tconf) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_pool_refresh(looper, sdk_pool_handle) node_new_ha, client_new_ha = genHa(2) logger.debug("{} changing HAs to {} {}".format(new_node, node_new_ha, client_new_ha)) # Making the change HA txn an confirming its succeeded node_dest = hexToFriendly(new_node.nodestack.verhex) sdk_send_update_node(looper, new_steward_wallet, sdk_pool_handle, node_dest, new_node.name, node_new_ha.host, node_new_ha.port, client_new_ha.host, client_new_ha.port) # Stopping existing nodes for node in txnPoolNodeSet: node.stop() looper.removeProdable(node) # Starting nodes again by creating `Node` objects since that simulates # what happens when starting the node with script restartedNodes = [] for node in txnPoolNodeSet[:-1]: config_helper = PNodeConfigHelper(node.name, tconf, chroot=tdir) restartedNode = TestNode(node.name, config_helper=config_helper, config=tconf, ha=node.nodestack.ha, cliha=node.clientstack.ha) looper.add(restartedNode) restartedNodes.append(restartedNode) # Starting the node whose HA was changed config_helper = PNodeConfigHelper(new_node.name, tconf, chroot=tdir) node = TestNode(new_node.name, config_helper=config_helper, config=tconf, ha=node_new_ha, cliha=client_new_ha) looper.add(node) restartedNodes.append(node) looper.run(checkNodesConnected(restartedNodes)) waitNodeDataEquality(looper, node, *restartedNodes[:-1]) sdk_pool_refresh(looper, sdk_pool_handle) sdk_ensure_pool_functional(looper, restartedNodes, sdk_wallet_client, sdk_pool_handle)
def test_primary_selection_increase_f( two_more_nodes_added, looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle): # for n in two_more_nodes_added: # check_accepted_view_change_sent(n, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_restart_groups_6_of_7_np_no_tm(looper, txnPoolNodeSet, tconf, tdir, sdk_pool_handle, sdk_wallet_client, allPluginsPath): tm = tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) restart_group = get_group(txnPoolNodeSet, 6, include_primary=False) restart_nodes(looper, txnPoolNodeSet, restart_group, tconf, tdir, allPluginsPath, after_restart_timeout=tm, start_one_by_one=False) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_node_request_propagates(looper, setup, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ One of node lacks sufficient propagates """ faulty_node, recv_client_requests = setup old_count_recv_ppg = get_count(faulty_node, faulty_node.processPropagate) old_count_recv_req = get_count(faulty_node, faulty_node.processRequest) old_count_request_propagates = get_count( faulty_node, faulty_node.request_propagates) def sum_of_sent_batches(): return faulty_node.replicas[0].lastPrePrepareSeqNo + \ faulty_node.replicas[1].lastPrePrepareSeqNo old_sum_of_sent_batches = sum_of_sent_batches() sent_reqs = 5 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, sent_reqs) assert get_count( faulty_node, faulty_node.processPropagate) > old_count_recv_ppg if recv_client_requests: assert get_count( faulty_node, faulty_node.processRequest) > old_count_recv_req else: assert get_count( faulty_node, faulty_node.processRequest) == old_count_recv_req # Attempt to request PROPAGATEs was made as many number of times as the # number of sent batches in both replicas since both replicas # independently request PROPAGATEs assert get_count(faulty_node, faulty_node.request_propagates) - \ old_count_request_propagates == (sum_of_sent_batches() - old_sum_of_sent_batches) requested_propagate_counts = getAllReturnVals( faulty_node, faulty_node.request_propagates) # The last attempt to request PROPAGATEs was not successful assert requested_propagate_counts[0] == 0 # The first attempt to request PROPAGATEs was successful as PROPAGATEs # were requested for all nodes assert requested_propagate_counts[1] == sent_reqs faulty_node.nodeIbStasher.reset_delays_and_process_delayeds() sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, num_reqs=4)
def test_genesis_nodes(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): assert len(txnPoolNodeSet) == nodeCount for node in txnPoolNodeSet: assertEquality(node.poolLedger.size, nodeCount) stw_count = sum(1 for _, txn in node.domainLedger.getAllTxn() if (get_type(txn) == NYM) and (get_payload_data(txn).get(ROLE) == STEWARD)) assertEquality(stw_count, nodeCount) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_req_drop_on_commit_phase_on_master_primary_and_then_ordered( tconf, setup, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): global initial_ledger_size A, B, C, D = txnPoolNodeSet # type: TestNode lagged_node = A def check_propagates(): # Node should have received a request from the client assert len(recvdRequest(lagged_node)) == 1 # Node should have received a PROPAGATEs assert len(recvdPropagate(lagged_node)) == 3 # Node should have sent a PROPAGATE assert len(sentPropagate(lagged_node)) == 1 # Node should have one request in the requests queue assert len(lagged_node.requests) == 1 timeout = howlong - 2 looper.run(eventually(check_propagates, retryWait=.5, timeout=timeout)) def check_prepares_received(): # Node should have received all Prepares for master instance assert len(recvdPrepareForInstId(lagged_node, 0)) == 3 assert len(lagged_node.requests) == 1 looper.run(eventually(check_prepares_received, retryWait=.5, timeout=timeout)) def check_drop(): # Node should have not received Commits for master instance assert len(recvdCommitForInstId(lagged_node, 0)) == 0 # Request object should be dropped by timeout assert len(lagged_node.requests) == 0 timeout = tconf.ORDERING_PHASE_REQ_TIMEOUT + tconf.OUTDATED_REQS_CHECK_INTERVAL + 1 looper.run(eventually(check_drop, retryWait=.5, timeout=timeout)) for n in txnPoolNodeSet: n.nodeIbStasher.resetDelays() def check_commits_received(): # Node should have received all delayed Commits for master instance assert len(recvdCommitForInstId(lagged_node, 0)) == 3 timeout = howlong * 2 looper.run(eventually(check_commits_received, retryWait=.5, timeout=timeout)) def check_ledger_size(): # The request should be eventually ordered for n in txnPoolNodeSet: assert n.domainLedger.size - initial_ledger_size == 1 looper.run(eventually(check_ledger_size, retryWait=.5, timeout=timeout)) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_propagate_primary_after_primary_restart_view_1( looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath): """ Delay instance change msgs to prevent view change during primary restart to test propagate primary for primary node. ppSeqNo should be > 0 to be able to check that propagate primary restores all indices correctly case viewNo > 0 """ ensure_view_change(looper, txnPoolNodeSet) checkViewNoForNodes(txnPoolNodeSet, expectedViewNo=1) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) old_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (old_ppseqno > 0) old_viewNo = checkViewNoForNodes(txnPoolNodeSet) old_primary = get_master_primary_node(txnPoolNodeSet) delay_instance_change(txnPoolNodeSet, IC_DELAY_SEC) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_primary, stopNode=True) looper.removeProdable(old_primary) logger.info("Restart node {}".format(old_primary)) restartedNode = start_stopped_node(old_primary, looper, tconf, tdir, allPluginsPath, delay_instance_change_msgs=False) idx = [i for i, n in enumerate(txnPoolNodeSet) if n.name == restartedNode.name][0] txnPoolNodeSet[idx] = restartedNode restartedNode.nodeIbStasher.delay(icDelay(IC_DELAY_SEC)) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) new_viewNo = checkViewNoForNodes(txnPoolNodeSet) assert (new_viewNo == old_viewNo) new_primary = get_master_primary_node(txnPoolNodeSet) assert (new_primary.name == old_primary.name) # check ppSeqNo the same _get_ppseqno(txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) new_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (new_ppseqno > old_ppseqno)
def test_view_change_min_catchup_timeout(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tconf, viewNo): """ One of the conditions to finish catch-up during view change is to have MAX_CATCHUPS_DONE_DURING_VIEW_CHANGE rounds of catch-up without any new transactions caught up. But this should not finish very quickly. So, we should try to catch-up until MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE. In the test: - Before starting view change, mock `has_ordered_till_last_prepared_certificate` so that it always returns False. - This means that the only condition on how we can finish catch-up is by MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE timeout and having more than MAX_CATCHUPS_DONE_DURING_VIEW_CHANGE rounds of catch-up without new txns caught up. - Check that view change is not finished until MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE - Check that view change is eventually finished after MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE """ # 1. Send some txns sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) # 2. make the only condition to finish catch-up is # MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE patch_has_ordered_till_last_prepared_certificate(txnPoolNodeSet) # 3. start view change expected_view_no = viewNo + 1 for node in txnPoolNodeSet: node.view_changer.startViewChange(expected_view_no) # 4. check that it's not finished till # MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE no_view_chanage_timeout = tconf.MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE - 1 with pytest.raises(EventuallyTimeoutException): ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, customTimeout=no_view_chanage_timeout) # 5. make sure that view change is finished eventually # (it should be finished quite soon after we waited for MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, customTimeout=2) waitForViewChange(looper=looper, txnPoolNodeSet=txnPoolNodeSet, expectedViewNo=expected_view_no) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) # 6. ensure that the pool is still functional. sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_add_node_to_pool_with_large_ppseqno_diff_views(do_view_change, looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath): """ Adding a node to the pool while ppSeqNo is big caused a node to stash all the requests because of incorrect watermarks limits set. The case of view_no == 0 is special. The test emulates big ppSeqNo number, adds a node and checks all the pool nodes are functional. The test is run with several starting view_no, including 0 """ ensure_several_view_change(looper, txnPoolNodeSet, do_view_change, custom_timeout=tconf.VIEW_CHANGE_TIMEOUT) big_ppseqno = tconf.LOG_SIZE * 2 + 2345 cur_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (big_ppseqno > cur_ppseqno) # ensure pool is working properly sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 3) assert (cur_ppseqno < _get_ppseqno(txnPoolNodeSet)) _set_ppseqno(txnPoolNodeSet, big_ppseqno) cur_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (big_ppseqno == cur_ppseqno) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 3) assert (cur_ppseqno < _get_ppseqno(txnPoolNodeSet)) new_steward_name = "testClientSteward" + randomString(4) new_node_name = "TestTheta" + randomString(4) new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet_handle, sdk_pool_handle) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 3) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
def test_restart_to_same_view_with_killed_primary(looper, txnPoolNodeSet, tconf, tdir, allPluginsPath, sdk_pool_handle, sdk_wallet_client): restart_timeout = tconf.ToleratePrimaryDisconnection + \ waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) primary = txnPoolNodeSet[0] alive_nodes = txnPoolNodeSet[1:] minority = alive_nodes[-1:] majority = alive_nodes[:-1] # Move to higher view by killing primary primary.cleanupOnStopping = True primary.stop() looper.removeProdable(primary) ensure_node_disconnected(looper, primary, txnPoolNodeSet) waitForViewChange(looper, alive_nodes, 1, customTimeout=VIEW_CHANGE_TIMEOUT) ensureElectionsDone(looper, alive_nodes, instances_list=range(3)) # Add transaction to ledger sdk_send_random_and_check(looper, alive_nodes, sdk_pool_handle, sdk_wallet_client, 1) # Restart majority group majority_before_restart = majority.copy() restart_nodes(looper, alive_nodes, majority, tconf, tdir, allPluginsPath, after_restart_timeout=restart_timeout, start_one_by_one=False, wait_for_elections=False) waitForViewChange(looper, majority, 1, customTimeout=2.1 * VIEW_CHANGE_TIMEOUT) ensureElectionsDone(looper, majority, instances_list=range(3)) # Check that nodes in minority group are aware that they might have inconsistent 3PC state for node in minority: assert node.spylog.count(node.on_inconsistent_3pc_state) == 1 # Check that nodes in majority group didn't think they might have inconsistent 3PC state for node in majority_before_restart: assert node.spylog.count(node.on_inconsistent_3pc_state) == 0 # Check that nodes in majority group don't think they might have inconsistent 3PC state for node in majority: assert node.spylog.count(node.on_inconsistent_3pc_state) == 0 # Restart minority group restart_nodes(looper, alive_nodes, minority, tconf, tdir, allPluginsPath, after_restart_timeout=restart_timeout, start_one_by_one=False, wait_for_elections=False) ensureElectionsDone(looper, alive_nodes, instances_list=range(3)) # Check that all nodes are still functional sdk_ensure_pool_functional(looper, alive_nodes, sdk_wallet_client, sdk_pool_handle)
def nodeStashingOrderedRequests(txnPoolNodeSet, sdk_node_created_after_some_txns): looper, new_node, sdk_pool_handle, new_steward_wallet_handle = sdk_node_created_after_some_txns for node in txnPoolNodeSet: node.nodeIbStasher.delay(cqDelay(5)) txnPoolNodeSet.append(new_node) sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet_handle, sdk_pool_handle) sdk_send_random_requests(looper, sdk_pool_handle, new_steward_wallet_handle, 10) looper.run(checkNodesConnected(txnPoolNodeSet)) def stashing(): assert new_node.mode != Mode.participating assert len(new_node.stashedOrderedReqs) > 0 # assert len(newNode.reqsFromCatchupReplies) > 0 timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.run(eventually(stashing, retryWait=1, timeout=timeout))
def test_steward_suspends_node_and_promote_with_new_ha( looper, txnPoolNodeSet, tdir, tconf, sdk_pool_handle, sdk_wallet_steward, sdk_node_theta_added, poolTxnStewardData, allPluginsPath): new_steward_wallet, new_node = sdk_node_theta_added looper.run(checkNodesConnected(txnPoolNodeSet + [new_node])) demote_node(looper, new_steward_wallet, sdk_pool_handle, new_node) # Check suspended node does not exist in any nodeReg or remotes of # nodes or clients txnPoolNodeSet = txnPoolNodeSet[:-1] for node in txnPoolNodeSet: looper.run(eventually(checkNodeNotInNodeReg, node, new_node.name)) # Check that a node does not connect to the suspended # node sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet, sdk_pool_handle) with pytest.raises(RemoteNotFound): looper.loop.run_until_complete(sendMessageAndCheckDelivery(txnPoolNodeSet[0], new_node)) new_node.stop() looper.removeProdable(new_node) # Check that a node whose suspension is revoked can reconnect to other # nodes and clients can also connect to that node node_ha, client_ha = genHa(2) node_nym = hexToFriendly(new_node.nodestack.verhex) sdk_send_update_node(looper, new_steward_wallet, sdk_pool_handle, node_nym, new_node.name, node_ha.host, node_ha.port, client_ha.host, client_ha.port, services=[VALIDATOR]) new_node.nodestack.ha = node_ha new_node.clientstack.ha = client_ha nodeTheta = start_stopped_node(new_node, looper, tconf, tdir, allPluginsPath, delay_instance_change_msgs=False) assert all(node.nodestack.remotes[new_node.name].ha == node_ha for node in txnPoolNodeSet) txnPoolNodeSet.append(nodeTheta) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_pool_refresh(looper, sdk_pool_handle) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def update_node_data_and_reconnect(looper, txnPoolNodeSet, steward_wallet, sdk_pool_handle, node, new_node_ip, new_node_port, new_client_ip, new_client_port, tdir, tconf): node_ha = node.nodestack.ha cli_ha = node.clientstack.ha node_dest = hexToFriendly(node.nodestack.verhex) sdk_send_update_node(looper, steward_wallet, sdk_pool_handle, node_dest, node.name, new_node_ip, new_node_port, new_client_ip, new_client_port) # restart the Node with new HA node.stop() looper.removeProdable(name=node.name) config_helper = PNodeConfigHelper(node.name, tconf, chroot=tdir) restartedNode = TestNode(node.name, config_helper=config_helper, config=tconf, ha=HA(new_node_ip or node_ha.host, new_node_port or node_ha.port), cliha=HA(new_client_ip or cli_ha.host, new_client_port or cli_ha.port)) looper.add(restartedNode) # replace node in txnPoolNodeSet try: idx = next(i for i, n in enumerate(txnPoolNodeSet) if n.name == node.name) except StopIteration: raise Exception('{} is not the pool'.format(node)) txnPoolNodeSet[idx] = restartedNode looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_ensure_pool_functional(looper, txnPoolNodeSet, steward_wallet, sdk_pool_handle) return restartedNode
def testNodePortChanged(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle, sdk_node_theta_added, tdir, tconf): """ A running node's port is changed """ new_steward_wallet, new_node = sdk_node_theta_added node_new_ha = genHa(1) new_port = node_new_ha.port node_ha = txnPoolNodeSet[0].nodeReg[new_node.name] cli_ha = txnPoolNodeSet[0].cliNodeReg[new_node.name + CLIENT_STACK_SUFFIX] update_node_data_and_reconnect(looper, txnPoolNodeSet, new_steward_wallet, sdk_pool_handle, new_node, node_ha.host, new_port, cli_ha.host, cli_ha.port, tdir, tconf) sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet, sdk_pool_handle)
def test_restart_half_to_lower_view(looper, txnPoolNodeSet, tconf, tdir, allPluginsPath, sdk_pool_handle, sdk_wallet_client): # Add transaction to ledger sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) # Move to higher view ensure_view_change_complete(looper, txnPoolNodeSet) # Restart half of nodes tm = tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) nodes_before_restart = txnPoolNodeSet.copy() restart_nodes(looper, txnPoolNodeSet, txnPoolNodeSet[2:], tconf, tdir, allPluginsPath, after_restart_timeout=tm, start_one_by_one=False) # Check that nodes didn't think they may have inconsistent 3PC state for node in nodes_before_restart: assert node.spylog.count(node.on_inconsistent_3pc_state) == 0 # Check that nodes don't think they may have inconsistent 3PC state for node in txnPoolNodeSet: assert node.spylog.count(node.on_inconsistent_3pc_state) == 0 # Check that all nodes are still functional sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, num_reqs=2, num_batches=1)
def add_started_node(looper, new_node, node_ha, client_ha, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, bls_key, key_proof): ''' Adds already created node to the pool, that is sends NODE txn. Makes sure that node is actually added and connected to all otehr nodes. ''' new_steward_wallet_handle = sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_steward, "Steward" + new_node.name, role=STEWARD_STRING) node_name = new_node.name node_dest = hexToFriendly(new_node.nodestack.verhex) sdk_send_update_node(looper, new_steward_wallet_handle, sdk_pool_handle, node_dest, node_name, node_ha[0], node_ha[1], client_ha[0], client_ha[1], services=[VALIDATOR], bls_key=bls_key, key_proof=key_proof) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_pool_refresh(looper, sdk_pool_handle) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
def test_finish_view_change_with_incorrect_primaries_list( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tdir, tconf, allPluginsPath): """ This test imitates situation when one of nodes is lagged. It missed txn for adding new node and view_change after this. After that lagged node started the next view_change with other nodes, but it has different committed node_reg and selected other primaries. In this case we expect, that lagged node will complete view_change with other primaries and will start catchup by Checkpoints because will not be able to ordering. """ def complete_vc(node): assert not node.view_change_in_progress view_no = checkViewNoForNodes(txnPoolNodeSet) # Delta is lagged lagging_node = txnPoolNodeSet[3] fast_nodes = txnPoolNodeSet[:3] + txnPoolNodeSet[4:] # Force 5 view changes so that we have viewNo == 5 and Zeta is the primary. for _ in range(5): trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, view_no + 1) ensureElectionsDone(looper, txnPoolNodeSet) view_no = checkViewNoForNodes(txnPoolNodeSet) with delay_rules_without_processing(lagging_node.nodeIbStasher, msg_rep_delay(), icDelay(), vc_delay(), nv_delay(), cDelay(), ppDelay(), pDelay()): # Add new node and this action should starts view_change because of NODE txn ordered _, theta = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, 'Theta_Steward', 'Theta', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(theta) fast_nodes.append(theta) looper.run(checkNodesConnected(fast_nodes)) ensure_all_nodes_have_same_data(looper, fast_nodes) waitForViewChange(looper, fast_nodes, view_no + 1) ensureElectionsDone(looper, fast_nodes) assert lagging_node.viewNo != fast_nodes[0].viewNo assert fast_nodes[0].viewNo == view_no + 1 current_view_no = checkViewNoForNodes(fast_nodes) expected_view_no = current_view_no + 1 trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, expected_view_no) ensureElectionsDone(looper, fast_nodes) looper.run(eventually(complete_vc, lagging_node, timeout=60)) assert lagging_node.viewNo == expected_view_no # We assume that after 2 Checkpoints receiving lagged node will start catchup and elect right primaries sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2 * CHK_SIZE) ensureElectionsDone(looper, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_catchup_to_next_view_during_view_change_0_to_1_then_1_to_2( txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward): ''' 1) Lagging node is not a primary for new views 2) All nodes except the lagging one go to view=1 3) All nodes except the lagging one order txns on view=1 4) All nodes except the lagging one go to view=2 5) All nodes except the lagging one order txns on view=2 6) Lagging node gets InstanceChanges for view=1 => it changes to view=1, and catches up till txns from view=2 7) Lagging node gets InstanceChanges for view=2 => it changes to view=2 8) Make sure that the lagging node is up to date, and canc participate in consensus ''' lagging_node = txnPoolNodeSet[0] other_nodes = txnPoolNodeSet[1:] initial_view_no = checkViewNoForNodes(txnPoolNodeSet) initial_last_ordered = lagging_node.master_last_ordered_3PC with delay_rules(lagging_node.nodeIbStasher, icDelay(viewNo=2), vc_delay(view_no=2)): with delay_rules(lagging_node.nodeIbStasher, delay_for_view(viewNo=0), delay_for_view(viewNo=1)): # view change to viewNo=1 trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, other_nodes, expectedViewNo=initial_view_no + 1) checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(3)) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) # order some txns sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) # view change to viewNo=2 trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, other_nodes, expectedViewNo=initial_view_no + 2) checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(3)) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) # order some txns sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) assert initial_view_no == lagging_node.viewNo assert initial_last_ordered == lagging_node.master_last_ordered_3PC # make sure that the first View Change happened on the lagging node waitForViewChange(looper, [lagging_node], expectedViewNo=initial_view_no + 1, customTimeout=20) assert initial_view_no + 1 == lagging_node.viewNo # make sure that the second View Change happened on the lagging node waitForViewChange(looper, [lagging_node], expectedViewNo=initial_view_no + 2, customTimeout=20) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) # make sure that the pool is functional sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_primary_send_incorrect_pp(looper, txnPoolNodeSet, tconf, allPluginsPath, sdk_pool_handle, sdk_wallet_steward, monkeypatch): """ Test steps: Delay message requests with PrePrepares on `slow_node` Patch sending for PrePrepare on the `malicious_primary` to send an invalid PrePrepare to slow_node Order a new request Start a view change Make sure it's finished on all nodes Make sure that the lagging node has same data with other nodes """ start_view_no = txnPoolNodeSet[0].viewNo slow_node = txnPoolNodeSet[-1] malicious_primary = txnPoolNodeSet[0] other_nodes = [ n for n in txnPoolNodeSet if n not in [slow_node, malicious_primary] ] timeout = waits.expectedPoolCatchupTime(nodeCount=len(txnPoolNodeSet)) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=timeout) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1) old_sender = malicious_primary.master_replica._ordering_service._send def patched_sender(msg, dst=None, stat=None): if isinstance(msg, PrePrepare) and msg: old_sender(msg, [n.name for n in other_nodes], stat) pp_dict = msg._asdict() pp_dict["ppTime"] += 1 pp = PrePrepare(**pp_dict) old_sender(pp, [slow_node.name], stat) monkeypatch.undo() monkeypatch.setattr(malicious_primary.master_replica._ordering_service, '_send', patched_sender) monkeypatch.setattr(slow_node.master_replica._ordering_service, '_validate_applied_pre_prepare', lambda a, b, c: None) with delay_rules(slow_node.nodeIbStasher, msg_rep_delay(types_to_delay=[PREPREPARE])): preprepare_process_num = slow_node.master_replica._ordering_service.spylog.count( OrderingService.process_preprepare) resp_task = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_steward) def chk(): assert preprepare_process_num + 1 == slow_node.master_replica._ordering_service.spylog.count( OrderingService.process_preprepare) looper.run(eventually(chk)) _, j_resp = sdk_get_and_check_replies(looper, [resp_task])[0] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1) for n in txnPoolNodeSet: n.view_changer.on_master_degradation() ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=start_view_no + 1) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, instances_list=[0, 1]) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_view_change_by_order_stashed_on_3_nodes_and_catchup_on_1_node( txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward): ''' - COMMITS are delayed on all nodes - All nodes starts a view change with a prepared certificate (for delayed message) - COMMITS come during view change for 3 nodes - So these 3 nodes finish view change by processing Commits and Ordered msgs during view change (in between rounds of catchup). - The lagging (4th) node receives missing txns as part of catch-up (during view change) and also finishes view change. ''' slow_node = txnPoolNodeSet[-1] fast_nodes = txnPoolNodeSet[:-1] slow_stasher = slow_node.nodeIbStasher fast_stashers = [n.nodeIbStasher for n in fast_nodes] all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet] initial_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc txns_count = 4 eventual_last_ordered = initial_last_ordered[ 0], initial_last_ordered[1] + txns_count batches_count = initial_last_ordered[1] with delay_rules(all_stashers, vcd_delay()): # the lagging node is slow in receiving Commits and Catchup mghs with delay_rules(slow_stasher, cDelay()): with delay_rules(slow_stasher, lsDelay(), msg_rep_delay(types_to_delay=[LEDGER_STATUS])): # fast nodes will receive and order Commits for last_prepared_cert during view change with delay_rules(fast_stashers, cDelay()): with delay_rules( fast_stashers, lsDelay(), msg_rep_delay(types_to_delay=[LEDGER_STATUS])): sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_steward, txns_count) batches_count += txns_count looper.run( eventually(check_prepare_certificate, txnPoolNodeSet, batches_count)) check_last_ordered_3pc_on_master( txnPoolNodeSet, initial_last_ordered) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) looper.run( eventually( check_last_prepared_certificate_after_view_change_start, txnPoolNodeSet, eventual_last_ordered)) # check that all txns are ordered till last prepared on fast nodes looper.run( eventually(check_last_ordered_3pc_on_master, fast_nodes, eventual_last_ordered, timeout=30)) # check that all txns are ordered till last prepared on slow node as a result of catchup looper.run( eventually(check_last_ordered_3pc_on_master, [slow_node], eventual_last_ordered, timeout=30)) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) # make sure that the pool is functional sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_different_ledger_request_interleave(tconf, looper, txnPoolNodeSet, sdk_one_node_added, tdir, tdirWithPoolTxns, allPluginsPath, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward): """ Send pool and domain ledger requests such that they interleave, and do view change in between and verify the pool is functional """ new_node = sdk_one_node_added sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # Send domain ledger requests but don't wait for replies requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 2) # Add another node by sending pool ledger request _, new_theta = sdk_node_theta_added(looper, txnPoolNodeSet, tdir, tconf, sdk_pool_handle, sdk_wallet_steward, allPluginsPath, name='new_theta') # Send more domain ledger requests but don't wait for replies requests.extend( sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 3)) # Do view change without waiting for replies ensure_view_change(looper, nodes=txnPoolNodeSet) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) # Make sure all requests are completed total_timeout = sdk_eval_timeout(len(requests), len(txnPoolNodeSet)) sdk_get_and_check_replies(looper, requests, timeout=total_timeout) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) new_steward_wallet, steward_did = sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_steward, 'another_ste', role='STEWARD') # Send another pool ledger request (NODE) but don't wait for completion of # request next_node_name = 'next_node' sigseed, verkey, bls_key, nodeIp, nodePort, clientIp, clientPort = \ prepare_new_node_data(tconf, tdir, next_node_name) node_req = looper.loop.run_until_complete( prepare_node_request(steward_did, next_node_name, clientIp, clientPort, nodeIp, nodePort, bls_key, sigseed)) sdk_wallet = (new_steward_wallet, steward_did) request_couple = sdk_sign_and_send_prepared_request( looper, sdk_wallet, sdk_pool_handle, node_req) # Send more domain ledger requests but don't wait for replies request_couples = [ request_couple, *sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 5) ] # Make sure all requests are completed total_timeout = sdk_eval_timeout(len(request_couples), len(txnPoolNodeSet)) sdk_get_and_check_replies(looper, request_couples, timeout=total_timeout) # Make sure pool is functional sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_unstash_three_phase_msg_after_catchup_in_view_change( txnPoolNodeSet, looper, tconf, sdk_pool_handle, sdk_wallet_steward): """ 1. Delay Commit on Node4 2. Order 1 req 3. Delay Commit on all nodes 4. Order 1 req 5. Delay CatchupRep on Node4 6. Delay Ledger Status and ViewChangeDones on Nodes1-3 7. Start View change on all nodes 8. Wait until Node4 got 3 stashed CatchupReps 9. Reset delaying of Commits on all Nodes 10. Reset Ledger Status on Nodes1-3 11. Check that 3 nodes finished VC while Node4 is syncing and not finished 12. Reset CatchupRep on Node4 13. Check that Node4 finished VC, and there was just 1 round of catch-up """ slow_node = txnPoolNodeSet[-1] fast_nodes = txnPoolNodeSet[:-1] view_no = txnPoolNodeSet[0].viewNo old_stashed = slow_node.master_replica.stasher.stash_size(STASH_VIEW_3PC) last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc batches_count = last_ordered[1] with delay_rules( [n.nodeIbStasher for n in txnPoolNodeSet], msg_rep_delay(types_to_delay=[PREPREPARE, PREPARE, COMMIT])): # Delay Commit messages for slow_node. slow_node.nodeIbStasher.delay(cDelay(sys.maxsize)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1) batches_count += 1 # Delay Commit messages for fast_nodes. for n in fast_nodes: n.nodeIbStasher.delay(cDelay(sys.maxsize)) request2 = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_steward) batches_count += 1 def check_commits(commit_key): for n in fast_nodes: for r in n.replicas.values(): assert commit_key in r._ordering_service.commits assert len( r._ordering_service.commits[commit_key].voters) == 1 looper.run( eventually(check_commits, (view_no, last_ordered[1] + batches_count))) # Delay CatchupRep messages for the slow_node. with delay_rules([slow_node.nodeIbStasher], cr_delay()): with delay_rules([n.nodeIbStasher for n in fast_nodes], vcd_delay()): with delay_rules( [n.nodeIbStasher for n in fast_nodes], msg_rep_delay(types_to_delay=[LEDGER_STATUS])): for n in txnPoolNodeSet: n.view_changer.on_master_degradation() looper.run( eventually(lambda: assertExp(slow_node.mode == Mode. discovering))) # Reset delay Commit messages for all nodes. for n in txnPoolNodeSet: n.nodeIbStasher.reset_delays_and_process_delayeds( COMMIT) assert slow_node.view_change_in_progress assert slow_node.mode == Mode.discovering looper.run( eventually(_check_nodes_stashed, fast_nodes, old_stashed, len(txnPoolNodeSet) - 1)) looper.run( eventually(_check_nodes_stashed, [slow_node], old_stashed, (len(txnPoolNodeSet) - 1) * 2)) waitForViewChange(looper, fast_nodes, expectedViewNo=view_no + 1, customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT) ensureElectionsDone(looper=looper, nodes=fast_nodes, instances_list=range( fast_nodes[0].requiredNumberOfInstances), customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT) sdk_get_and_check_replies(looper, [request2]) batches_count += 1 waitForViewChange(looper, [slow_node], expectedViewNo=view_no + 1, customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) _check_nodes_stashed(fast_nodes, old_stashed, 0) assert get_pp_seq_no(txnPoolNodeSet) == batches_count ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_primary_selection_non_genesis_node(sdk_one_node_added, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward): sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_view_change_add_one_node_uncommitted_by_next_primary( looper, tdir, tconf, allPluginsPath, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward): # 1. Pre-requisites: viewNo=2, Primary is Node3 for viewNo in range(1, 3): trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, viewNo) ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30) # 2. Add Steward for new Node new_steward_wallet_handle = sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_steward, alias="testClientSteward" + randomString(3), role=STEWARD_STRING) # 3. Send txn to add Node5 # It will not be proposed and ordered by the current Primary, but will be proposed by the next one in the new view # Make sure that the request is propagated by the next Primary old_state_root_hash = txnPoolNodeSet[0].stateRootHash( ledgerId=POOL_LEDGER_ID, isCommitted=False) primary_node = getPrimaryReplica(txnPoolNodeSet).node next_primary = txnPoolNodeSet[-1] with delay_rules_without_processing(primary_node.nodeIbStasher, ppgDelay()): sdk_add_new_node(looper, sdk_pool_handle, new_steward_wallet_handle, new_node_name="Psi", tdir=tdir, tconf=tconf, allPluginsPath=allPluginsPath, autoStart=True, nodeClass=TestNode, do_post_node_creation=None, services=[VALIDATOR], wait_till_added=False) looper.run(eventually(check_node_txn_propagated, [next_primary])) check_node_txn_not_applied(txnPoolNodeSet, old_state_root_hash) # 4. Trigger view change to view # Make sure that only the next Primary (Node4) finishes View Change to view=3 slow_nodes = txnPoolNodeSet[:3] fast_nodes = [next_primary] slow_stashers = [slow_node.nodeIbStasher for slow_node in slow_nodes] with delay_rules_without_processing( slow_stashers, nv_delay(), msg_rep_delay(types_to_delay=[NEW_VIEW])): trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, 3) # view change is finished on Node4 only looper.run(eventually(check_view_change_done, fast_nodes, 3)) for n in slow_nodes: assert n.master_replica._consensus_data.waiting_for_new_view # wait till fast nodes apply the Node txn in the new View (Node4 creates a new batch with it) looper.run( eventually(check_node_txn_applied, fast_nodes, old_state_root_hash)) check_node_txn_not_applied(slow_nodes, old_state_root_hash) # 5. Trigger view change to view=4, and make sure it's finished properly trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, 4) ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=35) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_no_catchup_if_got_from_3pc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ A node is slow to receive COMMIT messages so after a view change it starts catchup. But before it can start requesting txns, the COMMITs messages are received and are ordered. The node should not request any transactions. :return: """ send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * 3, 3) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node other_nodes = [n for n in txnPoolNodeSet if n != slow_node] delay_cm = 30 delat_cp = 100 slow_node.nodeIbStasher.delay(cDelay(delay_cm)) # The slow node receives consistency proofs after some delay, this delay # gives the opportunity to deliver all 3PC messages slow_node.nodeIbStasher.delay(cpDelay(delat_cp)) # Count of `getCatchupReqs` which is called to construct the `CatchupReq` # to be sent def domain_cr_count(): return sum(1 for entry in slow_node.ledgerManager.spylog.getAll( slow_node.ledgerManager.getCatchupReqs) if entry.params['consProof'].ledgerId == DOMAIN_LEDGER_ID) old_count = domain_cr_count() sent_batches = 10 send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * sent_batches, sent_batches) ensure_view_change(looper, nodes=txnPoolNodeSet) # After view change, the `slow_node` is behind waitNodeDataInequality(looper, slow_node, *other_nodes) # Unstash only COMMIT messages slow_node.nodeIbStasher.reset_delays_and_process_delayeds(Commit.typename) looper.runFor(2) slow_node.nodeIbStasher.reset_delays_and_process_delayeds( ConsistencyProof.typename) waitNodeDataEquality(looper, slow_node, *other_nodes) # No `CatchupReq`s constructed, hence no `CatchupReq`s could have # been sent assert domain_cr_count() == old_count # Some stashed ordered requests have been processed rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs) assert sent_batches in rv sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_different_ledger_request_interleave(tconf, looper, txnPoolNodeSet, sdk_one_node_added, tdir, tdirWithPoolTxns, allPluginsPath, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward): """ Send pool and domain ledger requests such that they interleave, and do view change in between and verify the pool is functional """ new_node = sdk_one_node_added sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # Send domain ledger requests but don't wait for replies requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 2) # Add another node by sending pool ledger request _, new_theta = sdk_node_theta_added(looper, txnPoolNodeSet, tdir, tconf, sdk_pool_handle, sdk_wallet_steward, allPluginsPath, name='new_theta') # Send more domain ledger requests but don't wait for replies requests.extend(sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 3)) # Do view change without waiting for replies ensure_view_change(looper, nodes=txnPoolNodeSet) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) # Make sure all requests are completed total_timeout = sdk_eval_timeout(len(requests), len(txnPoolNodeSet)) sdk_get_and_check_replies(looper, requests, timeout=total_timeout) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) new_steward_wallet, steward_did = sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_steward, 'another_ste', role='STEWARD') # Send another pool ledger request (NODE) but don't wait for completion of # request next_node_name = 'next_node' sigseed, verkey, bls_key, nodeIp, nodePort, clientIp, clientPort, key_proof = \ prepare_new_node_data(tconf, tdir, next_node_name) node_req = looper.loop.run_until_complete( prepare_node_request(steward_did, new_node_name=next_node_name, clientIp=clientIp, clientPort=clientPort, nodeIp=nodeIp, nodePort=nodePort, bls_key=bls_key, sigseed=sigseed, key_proof=key_proof)) sdk_wallet = (new_steward_wallet, steward_did) request_couple = sdk_sign_and_send_prepared_request(looper, sdk_wallet, sdk_pool_handle, node_req) # Send more domain ledger requests but don't wait for replies request_couples = [request_couple, * sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 5)] # Make sure all requests are completed total_timeout = sdk_eval_timeout(len(request_couples), len(txnPoolNodeSet)) sdk_get_and_check_replies(looper, request_couples, timeout=total_timeout) # Make sure pool is functional sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_propagate_primary_after_primary_restart_view_1( looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath): """ Delay instance change msgs to prevent view change during primary restart to test propagate primary for primary node. ppSeqNo should be > 0 to be able to check that propagate primary restores all indices correctly case viewNo > 0 """ ensure_view_change(looper, txnPoolNodeSet) checkViewNoForNodes(txnPoolNodeSet, expectedViewNo=1) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) old_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (old_ppseqno > 0) old_viewNo = checkViewNoForNodes(txnPoolNodeSet) old_primary = get_master_primary_node(txnPoolNodeSet) delay_instance_change(txnPoolNodeSet, IC_DELAY_SEC) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_primary, stopNode=True) looper.removeProdable(old_primary) logger.info("Restart node {}".format(old_primary)) restartedNode = start_stopped_node(old_primary, looper, tconf, tdir, allPluginsPath, delay_instance_change_msgs=False) idx = [ i for i, n in enumerate(txnPoolNodeSet) if n.name == restartedNode.name ][0] txnPoolNodeSet[idx] = restartedNode restartedNode.nodeIbStasher.delay(icDelay(IC_DELAY_SEC)) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) new_viewNo = checkViewNoForNodes(txnPoolNodeSet) assert (new_viewNo == old_viewNo) new_primary = get_master_primary_node(txnPoolNodeSet) assert (new_primary.name == old_primary.name) # check ppSeqNo the same _get_ppseqno(txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) new_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (new_ppseqno > old_ppseqno)
def testNodesReceiveClientMsgs(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_primary_selection_non_genesis_node(sdk_one_node_added, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward): sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_re_order_pre_prepares_no_pre_prepares(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): # 1. drop PrePrepars, Prepares and Commits on 4thNode # Order a couple of requests on Nodes 1-3 lagging_node = txnPoolNodeSet[-1] other_nodes = txnPoolNodeSet[:-1] with delay_rules_without_processing(lagging_node.nodeIbStasher, delay_3pc()): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 3) assert all(n.master_last_ordered_3PC == (0, 3) for n in other_nodes) with delay_rules_without_processing( lagging_node.nodeIbStasher, msg_rep_delay(types_to_delay=[PREPREPARE, PREPARE, COMMIT])): # 2. simulate view change start so that # all PrePrepares/Prepares/Commits are cleared # and uncommitted txns are reverted for n in txnPoolNodeSet: n.replicas.send_to_internal_bus(ViewChangeStarted(view_no=1)) master_ordering_service = n.master_replica._ordering_service assert not master_ordering_service.prePrepares assert not master_ordering_service.prepares assert not master_ordering_service.commits ledger = n.db_manager.ledgers[DOMAIN_LEDGER_ID] state = n.db_manager.states[DOMAIN_LEDGER_ID] assert len(ledger.uncommittedTxns) == 0 assert ledger.uncommitted_root_hash == ledger.tree.root_hash assert state.committedHead == state.head # check that all nodes but the lagging one have old_view_pps stored for n in other_nodes: assert n.master_replica._ordering_service.old_view_preprepares assert not lagging_node.master_replica._ordering_service.old_view_preprepares # 3. Simulate View Change finish to re-order the same PrePrepare assert lagging_node.master_last_ordered_3PC == (0, 0) new_master = txnPoolNodeSet[1] batches = sorted([ preprepare_to_batch_id(pp) for _, pp in new_master.master_replica. _ordering_service.old_view_preprepares.items() ]) new_view_msg = NewViewCheckpointsApplied(view_no=0, view_changes=[], checkpoint=None, batches=batches) for n in txnPoolNodeSet: n.master_replica._consensus_data.prev_view_prepare_cert = batches[ -1].pp_seq_no n.master_replica._ordering_service._bus.send(new_view_msg) # 4. Make sure that the nodes 1-3 (that already ordered the requests) sent Prepares and Commits so that # the request was eventually ordered on Node4 as well waitNodeDataEquality(looper, lagging_node, *other_nodes, customTimeout=60) assert lagging_node.master_last_ordered_3PC == (0, 4) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_req_drop_on_preprepare_phase_on_non_primary_and_then_ordered( tconf, setup, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): global initial_ledger_size A, B, C, D = txnPoolNodeSet # type: TestNode lagged_node = C def check_preprepares_delayed(): # Node should have received a request from the client assert len(recvdRequest(lagged_node)) == 1 # Node should not have received a PROPAGATE assert len(recvdPropagate(lagged_node)) == 3 # Node should have sent a PROPAGATE assert len(sentPropagate(lagged_node)) == 1 # Node should have not received PrePrepares for master instance assert len(recvdPrePrepareForInstId(lagged_node, 0)) == 0 # Node should have not received Prepares for master instance assert len(recvdPrepareForInstId(lagged_node, 0)) == 0 # Node should have not received Commits for master instance assert len(recvdCommitForInstId(lagged_node, 0)) == 0 # Node should have 1 request in requests queue assert len(lagged_node.requests) == 1 timeout = howlong - 2 looper.run( eventually(check_preprepares_delayed, retryWait=.5, timeout=timeout)) def check_drop(): # Node should have not received PrePrepare, Prepares and Commits for master instance assert len(recvdPrePrepareForInstId(lagged_node, 0)) == 0 assert len(recvdPrepareForInstId(lagged_node, 0)) == 0 assert len(recvdCommitForInstId(lagged_node, 0)) == 0 # Request object should be dropped by timeout assert len(lagged_node.requests) == 0 timeout = tconf.ORDERING_PHASE_REQ_TIMEOUT + tconf.OUTDATED_REQS_CHECK_INTERVAL + 1 looper.run(eventually(check_drop, retryWait=.5, timeout=timeout)) for n in txnPoolNodeSet: n.nodeIbStasher.resetDelays() def check_propagates_requested(): # Node should have received delayed PrePrepare assert len(recvdPrePrepareForInstId(lagged_node, 0)) >= 1 # Check that PROPAGATEs are requested by at least one replica as PrePrepare has been # received for request that was dropped. # We can check that the number of requested propagates is more or equal to 1, # since the first replica who sees non-finalized requests sends MessageReq for Propagates, # and it can receive Propagates before a PrePrepare for the next replica is received, # so that the for the second replica all requests will be already finalized. tmp = getAllArgs(lagged_node, TestNode.request_propagates) assert len(tmp) >= 1 timeout = howlong looper.run( eventually(check_propagates_requested, retryWait=.5, timeout=timeout)) def check_propagates_and_3pc_received(): # Node should not have received requested PROPAGATEs assert len(recvdPropagate(lagged_node)) == 6 # Node should have received delayed Prepares and Commits for master instance assert len(recvdPrepareForInstId(lagged_node, 0)) == 2 assert len(recvdCommitForInstId(lagged_node, 0)) == 3 timeout = howlong + 2 looper.run( eventually(check_propagates_and_3pc_received, retryWait=.5, timeout=timeout)) def check_ledger_size(): # The request should be eventually ordered for n in txnPoolNodeSet: assert n.domainLedger.size - initial_ledger_size == 1 looper.run(eventually(check_ledger_size, retryWait=.5, timeout=timeout)) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_old_view_pre_prepare_reply_processing(looper, txnPoolNodeSet, tconf, allPluginsPath, sdk_pool_handle, sdk_wallet_steward, monkeypatch): """ Test steps: Delay PrePrepares on `slow_node` (without processing) Delay receiving of OldViewPrePrepareRequest on all nodes but `malicious_node` Patch sending for OldViewPrePrepareReply on the `malicious_node` to send an invalid PrePrepare Start a view change Make sure it's finished on all nodes excluding `slow_node` Make sure that the lagging node received OldViewPrePrepareReply from the malicious node Reset delay for OldViewPrePrepareRequest on other nodes Make sure the pool is functional and all nodes have same data """ start_view_no = txnPoolNodeSet[0].viewNo slow_node = txnPoolNodeSet[-2] malicious_node = txnPoolNodeSet[-1] other_nodes = [n for n in txnPoolNodeSet if n not in [slow_node, malicious_node]] ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=tconf.NEW_VIEW_TIMEOUT) timeout = waits.expectedPoolCatchupTime(nodeCount=len(txnPoolNodeSet)) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=timeout) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1) with delay_rules_without_processing(slow_node.nodeIbStasher, ppDelay(), msg_rep_delay(types_to_delay=[PREPREPARE])): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1) with delay_rules([n.nodeIbStasher for n in other_nodes], old_view_pp_request_delay()): old_sender = malicious_node.master_replica._ordering_service._send def patched_sender(msg, dst=None, stat=None): if isinstance(msg, OldViewPrePrepareReply) and msg.preprepares: pp_dict = msg.preprepares[0]._asdict() pp_dict["digest"] = "incorrect_digest" pp = PrePrepare(**pp_dict) msg.preprepares[0] = pp monkeypatch.undo() old_sender(msg, dst, stat) monkeypatch.setattr(malicious_node.master_replica._ordering_service, '_send', patched_sender) monkeypatch.setattr(slow_node.master_replica._ordering_service, '_validate_applied_pre_prepare', lambda a, b, c: None) process_old_pp_num = slow_node.master_replica._ordering_service.spylog.count( OrderingService.process_old_view_preprepare_reply) for n in txnPoolNodeSet: n.view_changer.on_master_degradation() waitForViewChange(looper, other_nodes + [malicious_node], expectedViewNo=start_view_no + 1) ensureElectionsDone(looper=looper, nodes=other_nodes + [malicious_node], instances_list=[0, 1, 2]) ensure_all_nodes_have_same_data(looper, nodes=other_nodes + [malicious_node]) def chk(): assert process_old_pp_num + 1 == slow_node.master_replica._ordering_service.spylog.count( OrderingService.process_old_view_preprepare_reply) looper.run(eventually(chk)) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def testNodesReceiveClientMsgs(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)