def testCatchupDelayedNodes(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns, txnPoolCliNodeReg, tdirWithPoolTxns, tconf, allPluginsPath): """ Node sends catchup request to other nodes for only those sequence numbers that other nodes have. Have pool of connected nodes with some transactions made and then two more nodes say X and Y will join where Y node will start its catchup process after some time. The node starting late, i.e. Y should not receive any catchup requests :return: """ looper, _, _, _, client, wallet = nodeSetWithNodeAddedAfterSomeTxns stewardXName = "testClientStewardX" nodeXName = "Zeta" stewardYName = "testClientStewardY" nodeYName = "Eta" stewardZName = "testClientStewardZ" nodeZName = "Theta" delayX = 45 delayY = 2 stewardX, nodeX = addNewStewardAndNode(looper, client, stewardXName, nodeXName, tdirWithPoolTxns, tconf, allPluginsPath, autoStart=False) stewardY, nodeY = addNewStewardAndNode(looper, client, stewardYName, nodeYName, tdirWithPoolTxns, tconf, allPluginsPath, autoStart=False) nodeX.nodeIbStasher.delay(cpDelay(delayX)) nodeY.nodeIbStasher.delay(cpDelay(delayY)) looper.add(nodeX) looper.add(nodeY) txnPoolNodeSet.append(nodeX) txnPoolNodeSet.append(nodeY) timeout = waits.expectedPoolCatchupTime( len(txnPoolNodeSet)) + delayX + delayY looper.run(checkNodesConnected(txnPoolNodeSet, customTimeout=timeout)) logger.debug("Stopping 2 newest nodes, {} and {}".format( nodeX.name, nodeY.name)) nodeX.stop() nodeY.stop() logger.debug("Sending requests") sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 50) logger.debug("Starting the 2 stopped nodes, {} and {}".format( nodeX.name, nodeY.name)) nodeX.start(looper.loop) nodeY.start(looper.loop) waitNodeDataEquality(looper, nodeX, *txnPoolNodeSet[:5]) waitNodeDataEquality(looper, nodeY, *txnPoolNodeSet[:5])
def test_catchup_with_reask_cp(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): ''' Start a catchup Delay ConsistencyProofs twice Check that the catchup finished ''' lagged_node = txnPoolNodeSet[-1] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) with delay_rules_without_processing( lagged_node.nodeIbStasher, delay_3pc(), msg_rep_delay(types_to_delay=[COMMIT])): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) lagged_node.nodeIbStasher.drop_delayeds() with delay_rules_without_processing(lagged_node.nodeIbStasher, cpDelay()): lagged_node.start_catchup() def chk(): cp_count = 0 for msg in lagged_node.nodeIbStasher.delayeds: if isinstance(msg.item[0], ConsistencyProof): cp_count += 1 assert cp_count >= (len(txnPoolNodeSet) - 1) * 2 lagged_node.nodeIbStasher.drop_delayeds() looper.run(eventually(chk)) waitNodeDataEquality(looper, lagged_node, *txnPoolNodeSet, exclude_from_check=['check_last_ordered_3pc_backup'])
def test_no_catchup_if_got_from_3pc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ A node is slow to receive COMMIT messages so after a view change it starts catchup. But before it can start requesting txns, the COMMITs messages are received and are ordered. The node should not request any transactions. :return: """ send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * 3, 3) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node other_nodes = [n for n in txnPoolNodeSet if n != slow_node] delay_cm = 30 delat_cp = 100 slow_node.nodeIbStasher.delay(cDelay(delay_cm)) # The slow node receives consistency proofs after some delay, this delay # gives the opportunity to deliver all 3PC messages slow_node.nodeIbStasher.delay(cpDelay(delat_cp)) # Count of `getCatchupReqs` which is called to construct the `CatchupReq` # to be sent def domain_cr_count(): return sum(1 for entry in slow_node.ledgerManager.spylog.getAll( slow_node.ledgerManager.getCatchupReqs) if entry.params['consProof'].ledgerId == DOMAIN_LEDGER_ID) old_count = domain_cr_count() sent_batches = 10 send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * sent_batches, sent_batches) ensure_view_change(looper, nodes=txnPoolNodeSet) # After view change, the `slow_node` is behind waitNodeDataInequality(looper, slow_node, *other_nodes) # Unstash only COMMIT messages slow_node.nodeIbStasher.reset_delays_and_process_delayeds(Commit.typename) looper.runFor(2) slow_node.nodeIbStasher.reset_delays_and_process_delayeds( ConsistencyProof.typename) waitNodeDataEquality(looper, slow_node, *other_nodes) # No `CatchupReq`s constructed, hence no `CatchupReq`s could have # been sent assert domain_cr_count() == old_count # Some stashed ordered requests have been processed rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs) assert sent_batches in rv sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_unordered_state_reverted_before_catchup(tconf, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ Check that unordered state is reverted before starting catchup: - save the initial state on a node - slow down processing of COMMITs - send requests - wait until other nodes come to consensus - call start of catch-up - check that the state of the slow node is reverted and equal to the initial one. """ # CONFIG ledger_id = DOMAIN_LEDGER_ID non_primary_node = getNonPrimaryReplicas(txnPoolNodeSet, instId=0)[0].node non_primary_ledger = non_primary_node.getLedger(ledger_id) non_primary_state = non_primary_node.getState(ledger_id) # send reqs and make sure we are at the same state reqs = sdk_signed_random_requests(looper, sdk_wallet_client, 10) sdk_send_and_check(reqs, looper, txnPoolNodeSet, sdk_pool_handle) checkNodesHaveSameRoots(txnPoolNodeSet) # the state of the node before committed_ledger_before = non_primary_ledger.tree.root_hash uncommitted_ledger_before = non_primary_ledger.uncommittedRootHash committed_state_before = non_primary_state.committedHeadHash uncommitted_state_before = non_primary_state.headHash # EXECUTE # Delay commit requests on the node non_primary_node.nodeIbStasher.delay(cDelay()) # Delay Consistency proofs to not finish catchup non_primary_node.nodeIbStasher.delay(cpDelay()) # send requests reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, tconf.Max3PCBatchSize) sdk_get_replies(looper, reqs, timeout=40) committed_ledger_during_3pc = non_primary_node.getLedger( ledger_id).tree.root_hash uncommitted_ledger_during_3pc = non_primary_node.getLedger( ledger_id).uncommittedRootHash committed_state_during_3pc = non_primary_node.getState( ledger_id).committedHeadHash uncommitted_state_during_3pc = non_primary_node.getState( ledger_id).headHash # start catchup non_primary_node.start_catchup() committed_ledger_reverted = non_primary_ledger.tree.root_hash uncommitted_ledger_reverted = non_primary_ledger.uncommittedRootHash committed_state_reverted = non_primary_state.committedHeadHash uncommitted_state_reverted = non_primary_state.headHash # CHECK # check that initial uncommitted state differs from the state during 3PC # but committed does not assert committed_ledger_before == committed_ledger_during_3pc assert uncommitted_ledger_before != uncommitted_ledger_during_3pc assert committed_state_before == committed_state_during_3pc assert uncommitted_state_before != uncommitted_state_during_3pc assert committed_ledger_before == committed_ledger_reverted assert uncommitted_ledger_before == uncommitted_ledger_reverted assert committed_state_before == committed_state_reverted assert uncommitted_state_before == uncommitted_state_reverted
def testCatchupDelayedNodes(txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns, sdk_wallet_steward, txnPoolCliNodeReg, tdirWithPoolTxns, tconf, tdir, allPluginsPath): """ Node sends catchup request to other nodes for only those sequence numbers that other nodes have. Have pool of connected nodes with some transactions made and then two more nodes say X and Y will join where Y node will start its catchup process after some time. The node starting late, i.e. Y should not receive any catchup requests :return: """ looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \ sdk_node_set_with_node_added_after_some_txns stewardXName = "testClientStewardX" nodeXName = "Zeta" stewardYName = "testClientStewardY" nodeYName = "Eta" stewardZName = "testClientStewardZ" nodeZName = "Theta" delayX = 45 delayY = 2 stewardX, nodeX = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, stewardXName, nodeXName, tdir, tconf, autoStart=False, allPluginsPath=allPluginsPath) stewardY, nodeY = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, stewardYName, nodeYName, tdir, tconf, autoStart=False, allPluginsPath=allPluginsPath) nodeX.nodeIbStasher.delay(cpDelay(delayX)) nodeY.nodeIbStasher.delay(cpDelay(delayY)) looper.add(nodeX) looper.add(nodeY) txnPoolNodeSet.append(nodeX) txnPoolNodeSet.append(nodeY) timeout = waits.expectedPoolCatchupTime( len(txnPoolNodeSet)) + delayX + delayY looper.run(checkNodesConnected(txnPoolNodeSet, customTimeout=timeout)) logger.debug("Stopping 2 newest nodes, {} and {}".format( nodeX.name, nodeY.name)) nodeX.stop() nodeY.stop() logger.debug("Sending requests") sdk_pool_refresh(looper, sdk_pool_handle) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 50) logger.debug("Starting the 2 stopped nodes, {} and {}".format( nodeX.name, nodeY.name)) nodeX.start(looper.loop) nodeY.start(looper.loop) waitNodeDataEquality(looper, nodeX, *txnPoolNodeSet[:5]) waitNodeDataEquality(looper, nodeY, *txnPoolNodeSet[:5])
def testCatchupDelayedNodes(txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns, sdk_wallet_steward, txnPoolCliNodeReg, tdirWithPoolTxns, tconf, tdir, allPluginsPath): """ Node sends catchup request to other nodes for only those sequence numbers that other nodes have. Have pool of connected nodes with some transactions made and then two more nodes say X and Y will join where Y node will start its catchup process after some time. The node starting late, i.e. Y should not receive any catchup requests :return: """ looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \ sdk_node_set_with_node_added_after_some_txns stewardXName = "testClientStewardX" nodeXName = "Zeta" stewardYName = "testClientStewardY" nodeYName = "Eta" stewardZName = "testClientStewardZ" nodeZName = "Theta" delayX = 45 delayY = 2 stewardX, nodeX = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, stewardXName, nodeXName, tdir, tconf, autoStart=False, allPluginsPath=allPluginsPath) stewardY, nodeY = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, stewardYName, nodeYName, tdir, tconf, autoStart=False, allPluginsPath=allPluginsPath) nodeX.nodeIbStasher.delay(cpDelay(delayX)) nodeY.nodeIbStasher.delay(cpDelay(delayY)) looper.add(nodeX) looper.add(nodeY) txnPoolNodeSet.append(nodeX) txnPoolNodeSet.append(nodeY) timeout = waits.expectedPoolCatchupTime( len(txnPoolNodeSet)) + delayX + delayY looper.run(checkNodesConnected(txnPoolNodeSet, customTimeout=timeout)) logger.debug("Stopping 2 newest nodes, {} and {}".format(nodeX.name, nodeY.name)) nodeX.stop() nodeY.stop() logger.debug("Sending requests") sdk_pool_refresh(looper, sdk_pool_handle) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 50) logger.debug("Starting the 2 stopped nodes, {} and {}".format(nodeX.name, nodeY.name)) nodeX.start(looper.loop) nodeY.start(looper.loop) waitNodeDataEquality(looper, nodeX, *txnPoolNodeSet[:5]) waitNodeDataEquality(looper, nodeY, *txnPoolNodeSet[:5])
def test_catchup_with_one_slow_node(tdir, tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, allPluginsPath, logsearch): ''' 1. Stop the node Delta 2. Order 9 txns. In sending CatchupReq in a first round every node [Alpha, Beta, Gamma] will receive request for 3 txns. 3. Start Delta 4. Make sure Consistency Proof is received from all 3 nodes (to send CatchupReq to all 3 nodes) 5. Check that all nodes have equality data. 6. Check that Delta re-ask CatchupRep only once. In the second CatchupRep (first re-ask) Delta shouldn't request CatchupRep from Alpha because it didn't answer early. If the behavior is wrong and Delta re-ask txns form all nodes, every node will receive request for 1 txns, Alpha will not answer and Delta will need a new re-ask round. ''' # Prepare nodes lagging_node = txnPoolNodeSet[-1] rest_nodes = txnPoolNodeSet[:-1] # Stop one node waitNodeDataEquality(looper, lagging_node, *rest_nodes) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, lagging_node, stopNode=True) looper.removeProdable(lagging_node) # Send more requests to active nodes sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, len(rest_nodes) * 3) waitNodeDataEquality(looper, *rest_nodes) # Restart stopped node and wait for successful catch up lagging_node = start_stopped_node(lagging_node, looper, tconf, tdir, allPluginsPath, start=False, ) log_re_ask, _ = logsearch(msgs=['requesting .* missing transactions after timeout']) old_re_ask_count = len(log_re_ask) # Delay CatchupRep messages on Alpha with delay_rules(rest_nodes[0].nodeIbStasher, cqDelay()): with delay_rules(lagging_node.nodeIbStasher, cpDelay()): looper.add(lagging_node) txnPoolNodeSet[-1] = lagging_node looper.run(checkNodesConnected(txnPoolNodeSet)) # wait till we got consistency proofs from all nodes looper.run( eventually(lambda: assertExp(lagging_node.nodeIbStasher.num_of_stashed(ConsistencyProof) >= 3), retryWait=1, timeout=60)) waitNodeDataEquality(looper, *txnPoolNodeSet, customTimeout=120, exclude_from_check=['check_last_ordered_3pc_backup']) assert len(log_re_ask) - old_re_ask_count == 2 # for audit and domain ledgers