def testAddInactiveNodeThenActivate(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle, tdir, tconf, allPluginsPath): new_steward_name = "testClientSteward" + randomString(3) new_node_name = "Kappa" # adding a new node without SERVICES field # it means the node is in the inactive state new_steward_wallet, new_node = \ sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, allPluginsPath, services=None) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_pool_refresh(looper, sdk_pool_handle) new_node = update_node_data_and_reconnect(looper, txnPoolNodeSet + [new_node], new_steward_wallet, sdk_pool_handle, new_node, None, None, None, None, tdir, tconf) txnPoolNodeSet.append(new_node) sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet, sdk_pool_handle)
def sdk_node_theta_added(looper, txnPoolNodeSet, tdir, tconf, sdk_pool_handle, sdk_wallet_steward, allPluginsPath, testNodeClass=TestNode, name=None): new_steward_name = "testClientSteward" + randomString(3) new_node_name = name or "Theta" new_steward_wallet, new_node = \ sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, allPluginsPath, nodeClass=testNodeClass) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_pool_refresh(looper, sdk_pool_handle) return new_steward_wallet, new_node
def add_new_node(looper, nodes, sdk_pool_handle, sdk_wallet_steward, tdir, client_tdir, tconf, all_plugins_path, name=None): node_name = name or "Psi" new_steward_name = "testClientSteward" + randomString(3) _, new_node = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, node_name, tdir, tconf, allPluginsPath=all_plugins_path) nodes.append(new_node) looper.run(checkNodesConnected(nodes)) timeout = waits.expectedPoolCatchupTime(nodeCount=len(nodes)) waitNodeDataEquality(looper, new_node, *nodes[:-1], customTimeout=timeout) sdk_pool_refresh(looper, sdk_pool_handle) return new_node
def testAddInactiveNodeThenActivate(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle, tdir, tconf, allPluginsPath): new_steward_name = "testClientSteward" + randomString(3) new_node_name = "Kappa" # adding a new node without SERVICES field # it means the node is in the inactive state new_steward_wallet, new_node = \ sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, allPluginsPath, services=None) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_pool_refresh(looper, sdk_pool_handle) new_node = update_node_data_and_reconnect(looper, txnPoolNodeSet + [new_node], new_steward_wallet, sdk_pool_handle, new_node, None, None, None, None, tdir, tconf) txnPoolNodeSet.append(new_node) sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet, sdk_pool_handle)
def test_integration_setup_last_ordered_after_catchup(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_wallet_client, sdk_pool_handle, tdir, tconf, allPluginsPath): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) _, new_node = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup']) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) looper.run(eventually(replicas_synced, new_node)) for node in txnPoolNodeSet: for replica in node.replicas.values(): assert replica.last_ordered_3pc == (0, 4) if not replica.isMaster: assert get_count( replica._ordering_service, replica._ordering_service._request_three_phase_msg) == 0
def add_new_node(looper, nodes, sdk_pool_handle, sdk_wallet_steward, tdir, tconf, all_plugins_path, name=None): node_name = name or randomString(5) new_steward_name = "testClientSteward" + randomString(3) new_steward_wallet_handle, new_node = \ sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, node_name, tdir, tconf, all_plugins_path) nodes.append(new_node) looper.run(checkNodesConnected(nodes, customTimeout=60)) timeout = waits.expectedPoolCatchupTime(nodeCount=len(nodes)) waitNodeDataEquality(looper, new_node, *nodes[:-1], customTimeout=timeout, exclude_from_check=['check_last_ordered_3pc_backup']) return new_node
def test_add_node_to_pool_with_large_ppseqno_diff_views( do_view_change, looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath): """ Adding a node to the pool while ppSeqNo is big caused a node to stash all the requests because of incorrect watermarks limits set. The case of view_no == 0 is special. The test emulates big ppSeqNo number, adds a node and checks all the pool nodes are functional. The test is run with several starting view_no, including 0 """ ensure_several_view_change(looper, txnPoolNodeSet, do_view_change, custom_timeout=tconf.NEW_VIEW_TIMEOUT) cur_ppseqno = get_pp_seq_no(txnPoolNodeSet) big_ppseqno = cur_ppseqno + tconf.LOG_SIZE * 2 + 2300 assert (big_ppseqno > cur_ppseqno) # ensure pool is working properly sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) assert (cur_ppseqno < get_pp_seq_no(txnPoolNodeSet)) _set_ppseqno(txnPoolNodeSet, big_ppseqno) cur_ppseqno = get_pp_seq_no(txnPoolNodeSet) assert (big_ppseqno == cur_ppseqno) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) assert (cur_ppseqno < get_pp_seq_no(txnPoolNodeSet)) # Disable view change after adding new node as it will not be able to finish due to fake ppSeqNo set for n in txnPoolNodeSet: n._on_node_count_changed_committed = lambda: None new_steward_name = "testClientSteward" + randomString(4) new_node_name = "TestTheta" + randomString(4) new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet_handle, sdk_pool_handle) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
def test_upper_bound_of_checkpoint_after_catchup_is_divisible_by_chk_freq( chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client, tdir, client_tdir, tconf, allPluginsPath): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) _, new_node = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # Epsilon did not participate in ordering of the batch with EpsilonSteward # NYM transaction and the batch with Epsilon NODE transaction. # Epsilon got these transactions via catch-up. sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) for replica in new_node.replicas: assert len(replica.checkpoints) == 1 assert next(iter(replica.checkpoints)) == (7, 10)
def test_upper_bound_of_checkpoint_after_catchup_is_divisible_by_chk_freq( chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client, tdir, tconf, allPluginsPath): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) _, new_node = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup']) # Epsilon did not participate in ordering of the batch with EpsilonSteward # NYM transaction and the batch with Epsilon NODE transaction. # Epsilon got these transactions via catch-up. sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) for replica in txnPoolNodeSet[0].replicas.values(): check_stable_checkpoint(replica, 5)
def add_new_node(looper, nodes, sdk_pool_handle, sdk_wallet_steward, tdir, tconf, all_plugins_path, name=None, wait_till_added=True): node_name = name or "Psi" new_steward_name = "testClientSteward" + randomString(3) _, new_node = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, node_name, tdir, tconf, allPluginsPath=all_plugins_path, wait_till_added=wait_till_added) if wait_till_added: nodes.append(new_node) looper.run(checkNodesConnected(nodes)) timeout = waits.expectedPoolCatchupTime(nodeCount=len(nodes)) waitNodeDataEquality( looper, new_node, *nodes[:-1], customTimeout=timeout, exclude_from_check=['check_last_ordered_3pc_backup']) sdk_pool_refresh(looper, sdk_pool_handle) return new_node
def sdk_node_theta_added(looper, txnPoolNodeSet, tdir, tconf, sdk_pool_handle, sdk_wallet_steward, allPluginsPath, testNodeClass=TestNode, name=None): new_steward_name = "testClientSteward" + randomString(3) new_node_name = name or "Theta" new_steward_wallet, new_node = \ sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, allPluginsPath, nodeClass=testNodeClass) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_pool_refresh(looper, sdk_pool_handle) return new_steward_wallet, new_node
def test_second_checkpoint_after_catchup_can_be_stabilized( chkFreqPatched, looper, txnPoolNodeSet, sdk_wallet_steward, sdk_wallet_client, sdk_pool_handle, tdir, tconf, allPluginsPath): _, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # Epsilon did not participate in ordering of the batch with EpsilonSteward # NYM transaction and the batch with Epsilon NODE transaction. # Epsilon got these transactions via catch-up. for replica in new_node.replicas: assert len(replica.checkpoints) == 0 assert replica.h == 2 assert replica.H == 17 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 7) stabilization_timeout = \ waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.runFor(stabilization_timeout) for replica in new_node.replicas: assert len(replica.checkpoints) == 2 keys_iter = iter(replica.checkpoints) assert next(keys_iter) == (3, 5) assert replica.checkpoints[3, 5].seqNo == 5 assert replica.checkpoints[3, 5].digest is None assert replica.checkpoints[3, 5].isStable is False assert next(keys_iter) == (6, 10) assert replica.checkpoints[6, 10].seqNo == 9 assert replica.checkpoints[6, 10].digest is None assert replica.checkpoints[6, 10].isStable is False assert replica.h == 2 assert replica.H == 17 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) looper.runFor(stabilization_timeout) for replica in new_node.replicas: assert len(replica.checkpoints) == 1 keys_iter = iter(replica.checkpoints) assert next(keys_iter) == (6, 10) assert replica.checkpoints[6, 10].seqNo == 10 assert replica.checkpoints[6, 10].digest is not None assert replica.checkpoints[6, 10].isStable is True assert replica.h == 10 assert replica.H == 25
def test_add_node_to_pool_with_large_ppseqno_diff_views( do_view_change, looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, client_tdir, allPluginsPath): """ Adding a node to the pool while ppSeqNo is big caused a node to stash all the requests because of incorrect watermarks limits set. The case of view_no == 0 is special. The test emulates big ppSeqNo number, adds a node and checks all the pool nodes are functional. The test is run with several starting view_no, including 0 """ # TODO: for now this test will use old client api, after moving node txn to sdk it will be rewritten ensure_several_view_change(looper, txnPoolNodeSet, do_view_change, custom_timeout=tconf.VIEW_CHANGE_TIMEOUT) big_ppseqno = tconf.LOG_SIZE * 2 + 2345 cur_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (big_ppseqno > cur_ppseqno) # ensure pool is working properly sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 3) assert (cur_ppseqno < _get_ppseqno(txnPoolNodeSet)) _set_ppseqno(txnPoolNodeSet, big_ppseqno) cur_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (big_ppseqno == cur_ppseqno) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 3) assert (cur_ppseqno < _get_ppseqno(txnPoolNodeSet)) new_steward_name = "testClientSteward" + randomString(4) new_node_name = "TestTheta" + randomString(4) new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet_handle, sdk_pool_handle) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 3) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
def testChangeHaPersistsPostNodesRestart(looper, txnPoolNodeSet, tdir, tconf, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward): new_steward_wallet, new_node = \ sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, 'AnotherSteward' + randomString(4), 'AnotherNode' + randomString(4), tdir, tconf) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_pool_refresh(looper, sdk_pool_handle) node_new_ha, client_new_ha = genHa(2) logger.debug("{} changing HAs to {} {}".format(new_node, node_new_ha, client_new_ha)) # Making the change HA txn an confirming its succeeded node_dest = hexToFriendly(new_node.nodestack.verhex) sdk_send_update_node(looper, new_steward_wallet, sdk_pool_handle, node_dest, new_node.name, node_new_ha.host, node_new_ha.port, client_new_ha.host, client_new_ha.port) # Stopping existing nodes for node in txnPoolNodeSet: node.stop() looper.removeProdable(node) # Starting nodes again by creating `Node` objects since that simulates # what happens when starting the node with script restartedNodes = [] for node in txnPoolNodeSet[:-1]: config_helper = PNodeConfigHelper(node.name, tconf, chroot=tdir) restartedNode = TestNode(node.name, config_helper=config_helper, config=tconf, ha=node.nodestack.ha, cliha=node.clientstack.ha) looper.add(restartedNode) restartedNodes.append(restartedNode) # Starting the node whose HA was changed config_helper = PNodeConfigHelper(new_node.name, tconf, chroot=tdir) node = TestNode(new_node.name, config_helper=config_helper, config=tconf, ha=node_new_ha, cliha=client_new_ha) looper.add(node) restartedNodes.append(node) looper.run(checkNodesConnected(restartedNodes)) waitNodeDataEquality(looper, node, *restartedNodes[:-1]) sdk_pool_refresh(looper, sdk_pool_handle) sdk_ensure_pool_functional(looper, restartedNodes, sdk_wallet_client, sdk_pool_handle)
def test_second_checkpoint_after_catchup_can_be_stabilized( chkFreqPatched, looper, txnPoolNodeSet, sdk_wallet_steward, sdk_wallet_client, sdk_pool_handle, tdir, tconf, allPluginsPath): _, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # Epsilon did not participate in ordering of the batch with EpsilonSteward # NYM transaction and the batch with Epsilon NODE transaction. # Epsilon got these transactions via catch-up. master_replica = new_node.replicas._master_replica check_stable_checkpoint(master_replica, 0) check_num_received_checkpoints(master_replica, 0) assert master_replica.h == 2 assert master_replica.H == 17 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) for replica in new_node.replicas.values(): assert replica.h == 2 assert replica.H == 17 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 6) stabilization_timeout = \ waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.runFor(stabilization_timeout) for replica in new_node.replicas.values(): check_stable_checkpoint(replica, 5) check_num_unstable_checkpoints(replica, 0) # nothing is stashed since it's ordered during catch-up check_num_received_checkpoints(replica, 0) assert replica.h == 5 assert replica.H == 20 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) looper.runFor(stabilization_timeout) for replica in new_node.replicas.values(): check_stable_checkpoint(replica, 10) check_num_unstable_checkpoints(replica, 0) # nothing is stashed since it's ordered during catch-up check_num_received_checkpoints(replica, 0) assert replica.h == 10 assert replica.H == 25
def testChangeHaPersistsPostNodesRestart(looper, txnPoolNodeSet, tdir, tconf, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward): new_steward_wallet, new_node = \ sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, 'AnotherSteward' + randomString(4), 'AnotherNode' + randomString(4), tdir, tconf) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_pool_refresh(looper, sdk_pool_handle) node_new_ha, client_new_ha = genHa(2) logger.debug("{} changing HAs to {} {}".format(new_node, node_new_ha, client_new_ha)) # Making the change HA txn an confirming its succeeded node_dest = hexToFriendly(new_node.nodestack.verhex) sdk_send_update_node(looper, new_steward_wallet, sdk_pool_handle, node_dest, new_node.name, node_new_ha.host, node_new_ha.port, client_new_ha.host, client_new_ha.port) # Stopping existing nodes for node in txnPoolNodeSet: node.stop() looper.removeProdable(node) # Starting nodes again by creating `Node` objects since that simulates # what happens when starting the node with script restartedNodes = [] for node in txnPoolNodeSet[:-1]: config_helper = PNodeConfigHelper(node.name, tconf, chroot=tdir) restartedNode = TestNode(node.name, config_helper=config_helper, config=tconf, ha=node.nodestack.ha, cliha=node.clientstack.ha) looper.add(restartedNode) restartedNodes.append(restartedNode) # Starting the node whose HA was changed config_helper = PNodeConfigHelper(new_node.name, tconf, chroot=tdir) node = TestNode(new_node.name, config_helper=config_helper, config=tconf, ha=node_new_ha, cliha=client_new_ha) looper.add(node) restartedNodes.append(node) looper.run(checkNodesConnected(restartedNodes)) waitNodeDataEquality(looper, node, *restartedNodes[:-1]) sdk_pool_refresh(looper, sdk_pool_handle) sdk_ensure_pool_functional(looper, restartedNodes, sdk_wallet_client, sdk_pool_handle)
def testNewNodeCatchupWhileIncomingRequests(looper, txnPoolNodeSet, testNodeClass, tdir, tconf, sdk_pool_handle, sdk_wallet_steward, allPluginsPath): """ A new node joins while transactions are happening, its catchup requests include till where it has to catchup, which would be less than the other node's ledger size. In the meantime, the new node will stash all requests """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) def chkAfterCall(self, req, frm): r = self.processCatchupReq(req, frm) typ = getattr(req, f.LEDGER_ID.nm) if typ == DOMAIN_LEDGER_ID: ledger = self.getLedgerForMsg(req) assert req.catchupTill <= ledger.size return r for node in txnPoolNodeSet: node.nodeMsgRouter.routes[CatchupReq] = \ types.MethodType(chkAfterCall, node.ledgerManager) node.nodeIbStasher.delay(cqDelay(3)) print('Sending 5 requests') sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_steward, 5) looper.runFor(1) new_steward_name = randomString() new_node_name = "Epsilon" new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, nodeClass=testNodeClass, allPluginsPath=allPluginsPath, autoStart=True) sdk_pool_refresh(looper, sdk_pool_handle) txnPoolNodeSet.append(new_node) looper.runFor(2) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) # TODO select or create a timeout for this case in 'waits' looper.run( eventually(checkNodeDataForEquality, new_node, *txnPoolNodeSet[:-1], retryWait=1, timeout=80)) assert new_node.spylog.count(TestNode.processStashedOrderedReqs) > 0
def test_add_node_delay_commit_on_one(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tdir, tconf, allPluginsPath): view_no = txnPoolNodeSet[-1].viewNo # Add a New node but don't allow Delta to be aware of it. We do not want it in Delta's node registry. with delay_rules(txnPoolNodeSet[-1].nodeIbStasher, cDelay()): _, new_node = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, 'New_Steward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet[:-2] + [new_node])) waitForViewChange(looper, txnPoolNodeSet, view_no + 1) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def testNodeRequestingTxns(reduced_catchup_timeout_conf, txnPoolNodeSet, looper, tdir, tconf, allPluginsPath, sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes does not reply and the newly joined node cannot complete the process till the timeout and then requests the missing transactions. """ def ignoreCatchupReq(self, req, frm): logger.info("{} being malicious and ignoring catchup request {} " "from {}".format(self, req, frm)) # One of the node does not process catchup request. npr = getNonPrimaryReplicas(txnPoolNodeSet, 0) badReplica = npr[0] badNode = badReplica.node badNode.nodeMsgRouter.routes[CatchupReq] = types.MethodType( ignoreCatchupReq, badNode.ledgerManager) more_requests = 10 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, more_requests) _, new_node = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) # Since one of the nodes does not reply, this new node will experience a # timeout and retry catchup requests, hence a long test timeout. timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \ reduced_catchup_timeout_conf.CatchupTransactionsTimeout waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=timeout, exclude_from_check=['check_last_ordered_3pc_backup']) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 2) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=timeout, exclude_from_check=['check_last_ordered_3pc_backup'])
def testNodeRejectingInvalidTxns(looper, sdk_pool_handle, sdk_wallet_client, tconf, tdir, txnPoolNodeSet, patched_node, request, sdk_wallet_steward, testNodeClass, allPluginsPath, do_post_node_creation): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes replies with incorrect transactions. The newly joined node detects that and rejects the transactions and thus blacklists the node. Ii thus cannot complete the process till the timeout and then requests the missing transactions. """ txnCount = getValueFromModule(request, "txnCount", 5) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, txnCount) new_steward_name = randomString() new_node_name = "Epsilon" new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, nodeClass=testNodeClass, allPluginsPath=allPluginsPath, autoStart=True, do_post_node_creation=do_post_node_creation) sdk_pool_refresh(looper, sdk_pool_handle) bad_node = patched_node do_not_tell_clients_about_newly_joined_node(txnPoolNodeSet) logger.debug('Catchup request processor of {} patched'.format(bad_node)) looper.run(checkNodesConnected(txnPoolNodeSet)) # catchup #1 -> CatchupTransactionsTimeout -> catchup #2 catchup_timeout = waits.expectedPoolCatchupTime(len(txnPoolNodeSet) + 1) timeout = 2 * catchup_timeout + tconf.CatchupTransactionsTimeout # have to skip seqno_db check because the txns are not executed # on the new node waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=timeout) assert new_node.isNodeBlacklisted(bad_node.name)
def test_add_node_to_pool_with_large_ppseqno_diff_views(do_view_change, looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath): """ Adding a node to the pool while ppSeqNo is big caused a node to stash all the requests because of incorrect watermarks limits set. The case of view_no == 0 is special. The test emulates big ppSeqNo number, adds a node and checks all the pool nodes are functional. The test is run with several starting view_no, including 0 """ ensure_several_view_change(looper, txnPoolNodeSet, do_view_change, custom_timeout=tconf.VIEW_CHANGE_TIMEOUT) big_ppseqno = tconf.LOG_SIZE * 2 + 2345 cur_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (big_ppseqno > cur_ppseqno) # ensure pool is working properly sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 3) assert (cur_ppseqno < _get_ppseqno(txnPoolNodeSet)) _set_ppseqno(txnPoolNodeSet, big_ppseqno) cur_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (big_ppseqno == cur_ppseqno) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 3) assert (cur_ppseqno < _get_ppseqno(txnPoolNodeSet)) new_steward_name = "testClientSteward" + randomString(4) new_node_name = "TestTheta" + randomString(4) new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet_handle, sdk_pool_handle) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 3) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
def sdk_node_created_after_some_txns_not_started(looper, testNodeClass, do_post_node_creation, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward, txnPoolNodeSet, tdir, tconf, allPluginsPath, request): txnCount = getValueFromModule(request, "txnCount", 5) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, txnCount) new_steward_name = randomString() new_node_name = "Epsilon" new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, nodeClass=testNodeClass, allPluginsPath=allPluginsPath, autoStart=False, do_post_node_creation=do_post_node_creation) sdk_pool_refresh(looper, sdk_pool_handle) yield looper, new_node, sdk_pool_handle, new_steward_wallet_handle
def sdk_node_created_after_some_txns(looper, testNodeClass, do_post_node_creation, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward, txnPoolNodeSet, tdir, tconf, allPluginsPath, request): txnCount = getValueFromModule(request, "txnCount", 5) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, txnCount) new_steward_name = randomString() new_node_name = "Epsilon" new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, nodeClass=testNodeClass, allPluginsPath=allPluginsPath, autoStart=True, do_post_node_creation=do_post_node_creation) sdk_pool_refresh(looper, sdk_pool_handle) yield looper, new_node, sdk_pool_handle, new_steward_wallet_handle
def add_new_node(looper, pool_nodes, sdk_pool_handle, sdk_wallet_steward, tdir, tconf, all_plugins_path): name = randomString(6) node_name = "Node-" + name new_steward_name = "Steward-" + name _, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, node_name, tdir, tconf, allPluginsPath=all_plugins_path) pool_nodes.append(new_node) looper.run(checkNodesConnected(pool_nodes)) waitNodeDataEquality(looper, new_node, *pool_nodes[:-1]) # The new node did not participate in ordering of the batch with # the new steward NYM transaction and the batch with the new NODE # transaction. The new node got these transactions via catch-up. return new_node
def testNodeRejectingInvalidTxns(looper, sdk_pool_handle, sdk_wallet_client, tconf, tdir, txnPoolNodeSet, patched_node, request, sdk_wallet_steward, testNodeClass, allPluginsPath, do_post_node_creation): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes replies with incorrect transactions. The newly joined node detects that and rejects the transactions and thus blacklists the node. Ii thus cannot complete the process till the timeout and then requests the missing transactions. """ txnCount = getValueFromModule(request, "txnCount", 5) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, txnCount) new_steward_name = randomString() new_node_name = "Epsilon" new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, nodeClass=testNodeClass, allPluginsPath=allPluginsPath, autoStart=True, do_post_node_creation=do_post_node_creation) sdk_pool_refresh(looper, sdk_pool_handle) bad_node = patched_node do_not_tell_clients_about_newly_joined_node(txnPoolNodeSet) logger.debug('Catchup request processor of {} patched'.format(bad_node)) looper.run(checkNodesConnected(txnPoolNodeSet)) # catchup #1 -> CatchupTransactionsTimeout -> catchup #2 catchup_timeout = waits.expectedPoolCatchupTime(len(txnPoolNodeSet) + 1) timeout = 2 * catchup_timeout + tconf.CatchupTransactionsTimeout # have to skip seqno_db check because the txns are not executed # on the new node waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=timeout) assert new_node.isNodeBlacklisted(bad_node.name)
def testNewNodeCatchupWhileIncomingRequests(looper, txnPoolNodeSet, testNodeClass, tdir, tconf, sdk_pool_handle, sdk_wallet_steward, allPluginsPath): """ A new node joins while transactions are happening, its catchup requests include till where it has to catchup, which would be less than the other node's ledger size. In the meantime, the new node will stash all requests """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) def chkAfterCall(self, req, frm): r = self.processCatchupReq(req, frm) typ = getattr(req, f.LEDGER_ID.nm) if typ == DOMAIN_LEDGER_ID: ledger = self.getLedgerForMsg(req) assert req.catchupTill <= ledger.size return r for node in txnPoolNodeSet: node.nodeMsgRouter.routes[CatchupReq] = \ types.MethodType(chkAfterCall, node.ledgerManager) node.nodeIbStasher.delay(cqDelay(3)) print('Sending 5 requests') sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_steward, 5) looper.runFor(1) new_steward_name = randomString() new_node_name = "Epsilon" new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, nodeClass=testNodeClass, allPluginsPath=allPluginsPath, autoStart=True) sdk_pool_refresh(looper, sdk_pool_handle) txnPoolNodeSet.append(new_node) looper.runFor(2) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) # TODO select or create a timeout for this case in 'waits' looper.run(eventually(checkNodeDataForEquality, new_node, *txnPoolNodeSet[:-1], retryWait=1, timeout=150)) assert new_node.spylog.count(TestNode.processStashedOrderedReqs) > 0
def add_new_node(looper, nodes, sdk_pool_handle, sdk_wallet_steward, tdir, tconf, all_plugins_path, name=None): node_name = name or randomString(5) new_steward_name = "testClientSteward" + randomString(3) new_steward_wallet_handle, new_node = \ sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, node_name, tdir, tconf, all_plugins_path) nodes.append(new_node) looper.run(checkNodesConnected(nodes, customTimeout=60)) timeout = waits.expectedPoolCatchupTime(nodeCount=len(nodes)) waitNodeDataEquality(looper, new_node, *nodes[:-1], customTimeout=timeout) return new_node
def testNodeRequestingTxns(reduced_catchup_timeout_conf, txnPoolNodeSet, looper, tdir, tconf, allPluginsPath, sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes does not reply and the newly joined node cannot complete the process till the timeout and then requests the missing transactions. """ def ignoreCatchupReq(self, req, frm): logger.info("{} being malicious and ignoring catchup request {} " "from {}".format(self, req, frm)) # One of the node does not process catchup request. npr = getNonPrimaryReplicas(txnPoolNodeSet, 0) badReplica = npr[0] badNode = badReplica.node badNode.nodeMsgRouter.routes[CatchupReq] = types.MethodType( ignoreCatchupReq, badNode.ledgerManager) more_requests = 10 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, more_requests) _, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) # Since one of the nodes does not reply, this new node will experience a # timeout and retry catchup requests, hence a long test timeout. timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \ reduced_catchup_timeout_conf.CatchupTransactionsTimeout waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=timeout) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 2) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=timeout)
def sdk_node_created_after_some_txns(looper, testNodeClass, do_post_node_creation, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward, txnPoolNodeSet, tdir, tconf, allPluginsPath, request, setup): def post_node_creation(node): write_rh = WriteConfHandler(node.db_manager) read_rh = ReadConfHandler(node.db_manager) node.write_manager.register_req_handler(write_rh) node.read_manager.register_req_handler(read_rh) ca = node.clientAuthNr.core_authenticator ca._write_types.add(write_rh.txn_type) ca._query_types.add(read_rh.txn_type) do_post_node_creation(node) return node txnCount = getValueFromModule(request, "txnCount", 5) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, txnCount) new_steward_name = randomString() new_node_name = "Epsilon" new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, nodeClass=testNodeClass, allPluginsPath=allPluginsPath, autoStart=True, do_post_node_creation=post_node_creation) sdk_pool_refresh(looper, sdk_pool_handle) yield looper, new_node, sdk_pool_handle, new_steward_wallet_handle
def test_upper_bound_of_checkpoint_after_catchup_is_divisible_by_chk_freq( chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client, tdir, tconf, allPluginsPath): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) _, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # Epsilon did not participate in ordering of the batch with EpsilonSteward # NYM transaction and the batch with Epsilon NODE transaction. # Epsilon got these transactions via catch-up. sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) for replica in new_node.replicas.values(): assert len(replica.checkpoints) == 1 assert next(iter(replica.checkpoints)) == (7, 10)
def testCatchupDelayedNodes(txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns, sdk_wallet_steward, txnPoolCliNodeReg, tdirWithPoolTxns, tconf, tdir, allPluginsPath): """ Node sends catchup request to other nodes for only those sequence numbers that other nodes have. Have pool of connected nodes with some transactions made and then two more nodes say X and Y will join where Y node will start its catchup process after some time. The node starting late, i.e. Y should not receive any catchup requests :return: """ looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \ sdk_node_set_with_node_added_after_some_txns stewardXName = "testClientStewardX" nodeXName = "Zeta" stewardYName = "testClientStewardY" nodeYName = "Eta" stewardZName = "testClientStewardZ" nodeZName = "Theta" delayX = 45 delayY = 2 stewardX, nodeX = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, stewardXName, nodeXName, tdir, tconf, autoStart=False, allPluginsPath=allPluginsPath) stewardY, nodeY = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, stewardYName, nodeYName, tdir, tconf, autoStart=False, allPluginsPath=allPluginsPath) nodeX.nodeIbStasher.delay(cpDelay(delayX)) nodeY.nodeIbStasher.delay(cpDelay(delayY)) looper.add(nodeX) looper.add(nodeY) txnPoolNodeSet.append(nodeX) txnPoolNodeSet.append(nodeY) timeout = waits.expectedPoolCatchupTime( len(txnPoolNodeSet)) + delayX + delayY looper.run(checkNodesConnected(txnPoolNodeSet, customTimeout=timeout)) logger.debug("Stopping 2 newest nodes, {} and {}".format(nodeX.name, nodeY.name)) nodeX.stop() nodeY.stop() logger.debug("Sending requests") sdk_pool_refresh(looper, sdk_pool_handle) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 50) logger.debug("Starting the 2 stopped nodes, {} and {}".format(nodeX.name, nodeY.name)) nodeX.start(looper.loop) nodeY.start(looper.loop) waitNodeDataEquality(looper, nodeX, *txnPoolNodeSet[:5]) waitNodeDataEquality(looper, nodeY, *txnPoolNodeSet[:5])
def test_replica_removing_after_node_started(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, tdir, allPluginsPath, sdk_wallet_steward): """ 1. Remove backup primary node. 2. Check that replicas with the disconnected primary were removed. 3. Add new node 4. Check that in the new node the replica with the disconnected primary were removed. 3. Recover the removed node. 4. Start View Change. 5. Check that all replicas were restored. """ start_view_no = txnPoolNodeSet[0].viewNo start_replicas_count = txnPoolNodeSet[0].replicas.num_replicas instance_to_remove = txnPoolNodeSet[0].requiredNumberOfInstances - 1 removed_primary_node = txnPoolNodeSet[instance_to_remove] # remove backup primary node. disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, removed_primary_node) txnPoolNodeSet.remove(removed_primary_node) looper.removeProdable(removed_primary_node) # check that replicas were removed def check_replica_removed_on_all_nodes(inst_id=instance_to_remove): for node in txnPoolNodeSet: check_replica_removed(node, start_replicas_count, inst_id) assert not node.monitor.isMasterDegraded() assert len(node.requests) == 0 looper.run( eventually(check_replica_removed_on_all_nodes, timeout=tconf.TolerateBackupPrimaryDisconnection * 2)) new_steward_wallet, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, "test_steward", "test_node", tdir, tconf, allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) instance_to_remove -= 1 waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=start_view_no + 1) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup']) looper.run( eventually(check_replica_removed, new_node, start_replicas_count, instance_to_remove, timeout=tconf.TolerateBackupPrimaryDisconnection * 2)) # recover the removed node removed_primary_node = start_stopped_node(removed_primary_node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet.append(removed_primary_node) looper.run(checkNodesConnected(txnPoolNodeSet)) # start View Change for node in txnPoolNodeSet: node.view_changer.on_master_degradation() ensureElectionsDone( looper=looper, nodes=txnPoolNodeSet, instances_list=range(txnPoolNodeSet[0].requiredNumberOfInstances), customTimeout=tconf.TolerateBackupPrimaryDisconnection * 2) assert start_replicas_count == removed_primary_node.replicas.num_replicas
def testCatchupDelayedNodes(txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns, sdk_wallet_steward, txnPoolCliNodeReg, tdirWithPoolTxns, tconf, tdir, allPluginsPath): """ Node sends catchup request to other nodes for only those sequence numbers that other nodes have. Have pool of connected nodes with some transactions made and then two more nodes say X and Y will join where Y node will start its catchup process after some time. The node starting late, i.e. Y should not receive any catchup requests :return: """ looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \ sdk_node_set_with_node_added_after_some_txns stewardXName = "testClientStewardX" nodeXName = "Zeta" stewardYName = "testClientStewardY" nodeYName = "Eta" stewardZName = "testClientStewardZ" nodeZName = "Theta" delayX = 45 delayY = 2 stewardX, nodeX = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, stewardXName, nodeXName, tdir, tconf, autoStart=False, allPluginsPath=allPluginsPath) stewardY, nodeY = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, stewardYName, nodeYName, tdir, tconf, autoStart=False, allPluginsPath=allPluginsPath) nodeX.nodeIbStasher.delay(cpDelay(delayX)) nodeY.nodeIbStasher.delay(cpDelay(delayY)) looper.add(nodeX) looper.add(nodeY) txnPoolNodeSet.append(nodeX) txnPoolNodeSet.append(nodeY) timeout = waits.expectedPoolCatchupTime( len(txnPoolNodeSet)) + delayX + delayY looper.run(checkNodesConnected(txnPoolNodeSet, customTimeout=timeout)) logger.debug("Stopping 2 newest nodes, {} and {}".format( nodeX.name, nodeY.name)) nodeX.stop() nodeY.stop() logger.debug("Sending requests") sdk_pool_refresh(looper, sdk_pool_handle) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 50) logger.debug("Starting the 2 stopped nodes, {} and {}".format( nodeX.name, nodeY.name)) nodeX.start(looper.loop) nodeY.start(looper.loop) waitNodeDataEquality(looper, nodeX, *txnPoolNodeSet[:5]) waitNodeDataEquality(looper, nodeY, *txnPoolNodeSet[:5])
def test_complete_short_checkpoint_not_included_in_lag_for_catchup( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client, tdir, tconf, allPluginsPath): """ Verifies that if the first stored own checkpoint has a not aligned lower bound (this means that it was started after a catch-up), is complete and there is a quorumed stashed checkpoint from other replicas with the same end then this stashed checkpoint is not included into the lag for a catch-up, i.e. in such a case the lag which makes the node perform catch-up is Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 2 quorumed stashed received checkpoints. """ max_batch_size = chkFreqPatched.Max3PCBatchSize _, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup']) # Epsilon did not participate in ordering of the batch with EpsilonSteward # NYM transaction and the batch with Epsilon NODE transaction. # Epsilon got these transactions via catch-up. # To complete the first checkpoint send requests for 1 checkpoint minus # 2 3PC-batches (since there are already 2 3PC-batches in the first # checkpoint : with EpsilonSteward NYM transaction and with Epsilon NODE # transaction). This checkpoint has a not aligned lower bound # on the new node replicas so it will not be stabilized on them. send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint - 2 * max_batch_size) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup']) # The master replica of the new node stops to receive 3PC-messages new_node.master_replica.threePhaseRouter.extend( ( (PrePrepare, lambda *x, **y: None), (Prepare, lambda *x, **y: None), (Commit, lambda *x, **y: None), ) ) completed_catchups_before_reqs = get_number_of_completed_catchups(new_node) # Send requests for the new node's master replica to reach # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1 quorumed stashed # checkpoints from others send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP * reqs_for_checkpoint) # Give time for the new node to catch up if it is going to do it looper.runFor(waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) + waits.expectedPoolCatchupTime(len(txnPoolNodeSet))) waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup']) # Verify that the new node has not caught up assert get_number_of_completed_catchups(new_node) == completed_catchups_before_reqs # Send more requests for the new node's master replica to reach # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 2 quorumed stashed # checkpoints from others send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup']) # Verify that the new node has caught up assert get_number_of_completed_catchups(new_node) > completed_catchups_before_reqs
def test_finish_view_change_with_incorrect_primaries_list( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tdir, tconf, allPluginsPath): """ This test imitates situation when one of nodes is lagged. It missed txn for adding new node and view_change after this. After that lagged node started the next view_change with other nodes, but it has different committed node_reg and selected other primaries. In this case we expect, that lagged node will complete view_change with other primaries and will start catchup by Checkpoints because will not be able to ordering. """ def complete_vc(node): assert not node.view_change_in_progress view_no = checkViewNoForNodes(txnPoolNodeSet) # Delta is lagged lagging_node = txnPoolNodeSet[3] fast_nodes = txnPoolNodeSet[:3] + txnPoolNodeSet[4:] # Force 5 view changes so that we have viewNo == 5 and Zeta is the primary. for _ in range(5): trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, view_no + 1) ensureElectionsDone(looper, txnPoolNodeSet) view_no = checkViewNoForNodes(txnPoolNodeSet) with delay_rules_without_processing(lagging_node.nodeIbStasher, msg_rep_delay(), icDelay(), vc_delay(), nv_delay(), cDelay(), ppDelay(), pDelay()): # Add new node and this action should starts view_change because of NODE txn ordered _, theta = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, 'Theta_Steward', 'Theta', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(theta) fast_nodes.append(theta) looper.run(checkNodesConnected(fast_nodes)) ensure_all_nodes_have_same_data(looper, fast_nodes) waitForViewChange(looper, fast_nodes, view_no + 1) ensureElectionsDone(looper, fast_nodes) assert lagging_node.viewNo != fast_nodes[0].viewNo assert fast_nodes[0].viewNo == view_no + 1 current_view_no = checkViewNoForNodes(fast_nodes) expected_view_no = current_view_no + 1 trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, expected_view_no) ensureElectionsDone(looper, fast_nodes) looper.run(eventually(complete_vc, lagging_node, timeout=60)) assert lagging_node.viewNo == expected_view_no # We assume that after 2 Checkpoints receiving lagged node will start catchup and elect right primaries sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2 * CHK_SIZE) ensureElectionsDone(looper, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_second_checkpoint_after_catchup_can_be_stabilized( chkFreqPatched, looper, txnPoolNodeSet, sdk_wallet_steward, sdk_wallet_client, sdk_pool_handle, tdir, tconf, allPluginsPath): _, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # Epsilon did not participate in ordering of the batch with EpsilonSteward # NYM transaction and the batch with Epsilon NODE transaction. # Epsilon got these transactions via catch-up. master_replica = new_node.replicas._master_replica assert len(master_replica.checkpoints) == 0 assert len(master_replica.stashedRecvdCheckpoints) == 0 assert master_replica.h == 2 assert master_replica.H == 17 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) for replica in new_node.replicas.values(): assert len(replica.checkpoints) == 1 assert len(replica.stashedRecvdCheckpoints) == 0 assert replica.h == 2 assert replica.H == 17 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 6) stabilization_timeout = \ waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.runFor(stabilization_timeout) for replica in new_node.replicas.values(): assert len(replica.checkpoints) == 2 keys_iter = iter(replica.checkpoints) assert next(keys_iter) == (3, 5) assert replica.checkpoints[3, 5].seqNo == 5 assert replica.checkpoints[3, 5].digest is None assert replica.checkpoints[3, 5].isStable is False assert next(keys_iter) == (6, 10) assert replica.checkpoints[6, 10].seqNo == 9 assert replica.checkpoints[6, 10].digest is None assert replica.checkpoints[6, 10].isStable is False assert len(replica.stashedRecvdCheckpoints) == 1 assert 0 in replica.stashedRecvdCheckpoints assert len(replica.stashedRecvdCheckpoints[0]) == 1 assert (1, 5) in replica.stashedRecvdCheckpoints[0] assert len(replica.stashedRecvdCheckpoints[0][(1, 5)]) == 4 assert replica.h == 2 assert replica.H == 17 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) looper.runFor(stabilization_timeout) for replica in new_node.replicas.values(): assert len(replica.checkpoints) == 1 keys_iter = iter(replica.checkpoints) assert next(keys_iter) == (6, 10) assert replica.checkpoints[6, 10].seqNo == 10 assert replica.checkpoints[6, 10].digest is not None assert replica.checkpoints[6, 10].isStable is True assert len(replica.stashedRecvdCheckpoints) == 0 assert replica.h == 10 assert replica.H == 25
def test_complete_short_checkpoint_not_included_in_lag_for_catchup( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client, tdir, tconf, allPluginsPath): """ Verifies that if the first stored own checkpoint has a not aligned lower bound (this means that it was started after a catch-up), is complete and there is a quorumed stashed checkpoint from other replicas with the same end then this stashed checkpoint is not included into the lag for a catch-up, i.e. in such a case the lag which makes the node perform catch-up is Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 2 quorumed stashed received checkpoints. """ max_batch_size = chkFreqPatched.Max3PCBatchSize _, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # Epsilon did not participate in ordering of the batch with EpsilonSteward # NYM transaction and the batch with Epsilon NODE transaction. # Epsilon got these transactions via catch-up. # To complete the first checkpoint send requests for 1 checkpoint minus # 2 3PC-batches (since there are already 2 3PC-batches in the first # checkpoint : with EpsilonSteward NYM transaction and with Epsilon NODE # transaction). This checkpoint has a not aligned lower bound # on the new node replicas so it will not be stabilized on them. send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint - 2 * max_batch_size) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # The master replica of the new node stops to receive 3PC-messages new_node.master_replica.threePhaseRouter.extend( ( (PrePrepare, lambda *x, **y: None), (Prepare, lambda *x, **y: None), (Commit, lambda *x, **y: None), ) ) completed_catchups_before_reqs = get_number_of_completed_catchups(new_node) # Send requests for the new node's master replica to reach # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1 quorumed stashed # checkpoints from others send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP * reqs_for_checkpoint) # Give time for the new node to catch up if it is going to do it looper.runFor(waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) + waits.expectedPoolCatchupTime(len(txnPoolNodeSet))) waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1]) # Verify that the new node has not caught up assert get_number_of_completed_catchups(new_node) == completed_catchups_before_reqs # Send more requests for the new node's master replica to reach # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 2 quorumed stashed # checkpoints from others send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # Verify that the new node has caught up assert get_number_of_completed_catchups(new_node) > completed_catchups_before_reqs