def test_node_requests_missing_three_phase_messages(looper, txnPoolNodeSet, wallet1, client1Connected): """ 2 of 4 nodes go down, so pool can not process any more incoming requests. A new request comes in. After a while those 2 nodes come back alive. Another request comes in. Check that previously disconnected two nodes request missing PREPARES and PREPREPARES and the pool successfully handles both transactions after that. """ INIT_REQS_CNT = 10 MISSING_REQS_CNT = 1 REQS_AFTER_RECONNECT_CNT = 1 disconnected_nodes = txnPoolNodeSet[2:] alive_nodes = txnPoolNodeSet[:2] send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1Connected, INIT_REQS_CNT) waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet[:-1]) init_ledger_size = txnPoolNodeSet[0].domainLedger.size for node in disconnected_nodes: disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node, stopNode=False) sendRandomRequests(wallet1, client1Connected, MISSING_REQS_CNT) def check_pp_out_of_sync(alive_nodes, disconnected_nodes): def get_last_pp(node): return node.replicas._master_replica.lastPrePrepare last_3pc_key_alive = get_last_pp(alive_nodes[0]) for node in alive_nodes[1:]: assert get_last_pp(node) == last_3pc_key_alive last_3pc_key_diconnected = get_last_pp(disconnected_nodes[0]) assert last_3pc_key_diconnected != last_3pc_key_alive for node in disconnected_nodes[1:]: assert get_last_pp(node) == last_3pc_key_diconnected looper.run( eventually(check_pp_out_of_sync, alive_nodes, disconnected_nodes, retryWait=1, timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet)))) for node in disconnected_nodes: reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, node) send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1Connected, REQS_AFTER_RECONNECT_CNT) waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet[:-1]) for node in txnPoolNodeSet: assert node.domainLedger.size == (init_ledger_size + MISSING_REQS_CNT + REQS_AFTER_RECONNECT_CNT)
def test_replicas_prepare_time(looper, txnPoolNodeSet, client1, wallet1, client1Connected): # Check that each replica's PREPARE time is same as the PRE-PREPARE time sent_batches = 5 for i in range(sent_batches): send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2) looper.runFor(1) for node in txnPoolNodeSet: for r in node.replicas: rec_prps = defaultdict(list) for p in recvd_prepares(r): rec_prps[(p.viewNo, p.ppSeqNo)].append(p) pp_coll = r.sentPrePrepares if r.isPrimary else r.prePrepares for key, pp in pp_coll.items(): for p in rec_prps[key]: assert pp.ppTime == p.ppTime # `last_accepted_pre_prepare_time` is the time of the last PRE-PREPARE assert r.last_accepted_pre_prepare_time == pp_coll.peekitem(-1)[ 1].ppTime # The ledger should store time for each txn and it should be same # as the time for that PRE-PREPARE if r.isMaster: for iv in node.txn_seq_range_to_3phase_key[DOMAIN_LEDGER_ID]: three_pc_key = iv.data for seq_no in range(iv.begin, iv.end): assert node.domainLedger.getBySeqNo( seq_no)[TXN_TIME] == pp_coll[three_pc_key].ppTime
def all_nodes_view_change(looper, txnPoolNodeSet, stewardWallet, steward1, client1, wallet1, client1Connected): for _ in range(5): send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_new_node_accepts_timestamp(tconf, looper, txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns, client1, wallet1, client1Connected): """ A new node joins the pool and is able to function properly without """ _, new_node, _, _, _, _ = nodeSetWithNodeAddedAfterSomeTxns old_susp_count = get_timestamp_suspicion_count(new_node) # Don't wait for node to catchup, start sending requests sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 10) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # No suspicions were raised by new_node assert get_timestamp_suspicion_count(new_node) == old_susp_count # All nodes should reply send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, Max3PCBatchSize * 3) # No suspicions were raised by new_node assert get_timestamp_suspicion_count(new_node) == old_susp_count suspicions = { node.name: get_timestamp_suspicion_count(node) for node in txnPoolNodeSet } ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, Max3PCBatchSize * 3) for node in txnPoolNodeSet: assert suspicions[node.name] == get_timestamp_suspicion_count(node)
def view_change_in_between_3pc(looper, nodes, slow_nodes, wallet, client, slow_delay=1, wait=None): send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 4) delay_3pc_messages(slow_nodes, 0, delay=slow_delay) sendRandomRequests(wallet, client, 10) if wait: looper.runFor(wait) ensure_view_change_complete(looper, nodes, customTimeout=60) reset_delays_and_process_delayeds(slow_nodes) sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5, total_timeout=30) send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 5, total_timeout=30)
def view_change_in_between_3pc_random_delays(looper, nodes, slow_nodes, wallet, client, tconf, min_delay=0, max_delay=0): send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 4) # max delay should not be more than catchup timeout. max_delay = max_delay or tconf.MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE - 1 delay_3pc_messages(slow_nodes, 0, min_delay=min_delay, max_delay=max_delay) sendRandomRequests(wallet, client, 10) ensure_view_change_complete(looper, nodes, customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT + max_delay, exclude_from_check=['check_last_ordered_3pc']) reset_delays_and_process_delayeds(slow_nodes) send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 10)
def testQueueingReqFromFutureView(delayed_perf_chk, looper, nodeSet, up, wallet1, client1): """ Test if every node queues 3 Phase requests(PRE-PREPARE, PREPARE and COMMIT) that come from a view which is greater than the current view. - Delay reception and processing of view change messages by a non primary for master instance => it starts receiving 3 phase commit messages for next view """ lagging_node = get_last_master_non_primary_node(nodeSet) old_view_no = lagging_node.viewNo # Delay processing of InstanceChange and ViewChangeDone so node stashes # 3PC messages delay_ic = 60 lagging_node.nodeIbStasher.delay(icDelay(delay_ic)) lagging_node.nodeIbStasher.delay(vcd_delay(delay_ic)) logger.debug('{} will delay its view change'.format(lagging_node)) def chk_fut_view(view_no, is_empty): length = len(lagging_node.msgsForFutureViews.get(view_no, ())) if is_empty: assert length == 0 else: assert length > 0 return length # No messages queued for future view chk_fut_view(old_view_no + 1, is_empty=True) logger.debug( '{} does not have any messages for future views'.format(lagging_node)) # Every node except Node A should do a view change ensure_view_change(looper, [n for n in nodeSet if n != lagging_node], [lagging_node]) # send more requests that will be queued for the lagged node # sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 3) reqs = sendRandomRequests(wallet1, client1, 5) l = looper.run( eventually(chk_fut_view, old_view_no + 1, False, retryWait=1)) logger.debug('{} has {} messages for future views'.format(lagging_node, l)) waitForSufficientRepliesForRequests(looper, client1, requests=reqs) # reset delays for the lagging_node node so that it finally makes view # change lagging_node.reset_delays_and_process_delayeds() # Eventually no messages queued for future view looper.run( eventually(chk_fut_view, old_view_no + 1, True, retryWait=1, timeout=delay_ic + 10)) logger.debug( '{} exhausted pending messages for future views'.format(lagging_node)) send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2)
def new_node_in_correct_view(all_nodes_view_change, looper, txnPoolNodeSet, one_node_added, wallet1, client1): new_node = one_node_added looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, retryWait=1, timeout=10)) assert len(getAllReturnVals(new_node, new_node._start_view_change_if_possible, compare_val_to=True)) > 0 assert not new_node._next_view_indications send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2)
def tear(): # Repair any broken network for node in txnPoolNodeSet: node.reset_delays_and_process_delayeds() # Give a little time to process any delayed messages looper.runFor(3) # Check each node has same data ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # Check each node has ordered all requests (no catchup) assert check_if_all_equal_in_list([n.master_replica.ordered for n in txnPoolNodeSet]) # Check the network is functional since all nodes reply send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 5)
def testZStackNodeReconnection(tconf, looper, txnPoolNodeSet, client1, wallet1, tdir, client1Connected): sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1) npr = [n for n in txnPoolNodeSet if not n.hasPrimary] nodeToCrash = npr[0] idxToCrash = txnPoolNodeSet.index(nodeToCrash) otherNodes = [_ for _ in txnPoolNodeSet if _ != nodeToCrash] def checkFlakyConnected(conn=True): for node in otherNodes: if conn: assert nodeToCrash.nodestack.name in node.nodestack.connecteds else: assert nodeToCrash.nodestack.name not in node.nodestack.connecteds checkFlakyConnected(True) nodeToCrash.stop() logger.debug('Stopped node {}'.format(nodeToCrash)) looper.removeProdable(nodeToCrash) looper.runFor(1) stopNodes([nodeToCrash], looper) # TODO Select or create the timeout from 'waits'. Don't use constant. looper.run(eventually(checkFlakyConnected, False, retryWait=1, timeout=60)) looper.runFor(1) config_helper = PNodeConfigHelper(nodeToCrash.name, tconf, chroot=tdir) node = TestNode(nodeToCrash.name, ledger_dir=config_helper.ledger_dir, keys_dir=config_helper.keys_dir, genesis_dir=config_helper.genesis_dir, plugins_dir=config_helper.plugins_dir, config=tconf, ha=nodeToCrash.nodestack.ha, cliha=nodeToCrash.clientstack.ha) looper.add(node) txnPoolNodeSet[idxToCrash] = node # TODO Select or create the timeout from 'waits'. Don't use constant. looper.run(eventually(checkFlakyConnected, True, retryWait=2, timeout=50)) ensureElectionsDone(looper, txnPoolNodeSet, retryWait=2) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 10)
def test_primary_receives_delayed_prepares(looper, txnPoolNodeSet, client1, wallet1, client1Connected): """ Primary gets all PREPAREs after COMMITs """ delay = 50 primary_node = get_master_primary_node(txnPoolNodeSet) other_nodes = [n for n in txnPoolNodeSet if n != primary_node] primary_node.nodeIbStasher.delay(pDelay(delay, 0)) send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 10) for node in other_nodes: assert node.master_replica.prePrepares assert node.master_replica.prepares assert node.master_replica.commits assert primary_node.master_replica.sentPrePrepares assert not primary_node.master_replica.prepares assert primary_node.master_replica.commits
def view_change_in_between_3pc_random_delays(looper, nodes, slow_nodes, wallet, client, min_delay=0, max_delay=5): send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 4) delay_3pc_messages(slow_nodes, 0, min_delay=min_delay, max_delay=max_delay) sendRandomRequests(wallet, client, 10) ensure_view_change(looper, nodes) ensureElectionsDone(looper=looper, nodes=nodes) ensure_all_nodes_have_same_data(looper, nodes=nodes) reset_delays_and_process_delayeds(slow_nodes) send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 10)
def test_node_request_propagates(looper, setup, txnPoolNodeSet, client1, wallet1, client1Connected, request): """ One of node lacks sufficient propagates """ faulty_node, recv_client_requests = setup old_count_recv_ppg = get_count(faulty_node, faulty_node.processPropagate) old_count_recv_req = get_count(faulty_node, faulty_node.processRequest) old_count_request_propagates = get_count(faulty_node, faulty_node.request_propagates) sent_reqs = 5 send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, sent_reqs) assert get_count(faulty_node, faulty_node.processPropagate) > old_count_recv_ppg if recv_client_requests: assert get_count(faulty_node, faulty_node.processRequest) > old_count_recv_req else: assert get_count(faulty_node, faulty_node.processRequest) == old_count_recv_req # Attempt to request PROPAGATEs was made twice, since the faulty node has 2 replicas assert get_count( faulty_node, faulty_node.request_propagates) - old_count_request_propagates == 2 requested_propagate_counts = getAllReturnVals( faulty_node, faulty_node.request_propagates) # The last attempt to request PROPAGATEs was not successful assert requested_propagate_counts[0] == 0 # The first attempt to request PROPAGATEs was successful as PROPAGATEs # were requested for all nodes assert requested_propagate_counts[1] == sent_reqs faulty_node.nodeIbStasher.reset_delays_and_process_delayeds() ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1, 4)
def testPrePrepareProcessedInOrder(perf_chk_patched, looper, txnPoolNodeSet, wallet1, client): """ A non-primary receives PRE-PREPARE out of order, it receives with ppSeqNo 2 earlier than it receives the one with ppSeqNo 1 but it stashes the one with ppSeqNo 2 and only unstashes it for processing once it has processed PRE-PREPARE with ppSeqNo 1 :return: """ tconf = perf_chk_patched pr, otherR = getPrimaryReplica(txnPoolNodeSet, instId=0), \ getNonPrimaryReplicas(txnPoolNodeSet, instId=0) otherNodes = [r.node for r in otherR] ppsToDelay = 2 delayeds = 0 expectedDelayeds = (len(txnPoolNodeSet) - 1) * ppsToDelay delayedPpSeqNos = set() def specificPrePrepares(wrappedMsg): nonlocal delayeds msg, sender = wrappedMsg if isinstance(msg, PrePrepare) and delayeds < expectedDelayeds: delayeds += 1 delayedPpSeqNos.add(msg.ppSeqNo) logger.debug('ppSeqNo {} would be delayed'.format(msg.ppSeqNo)) return pp_delay for node in otherNodes: logger.debug( '{} would be delaying reception of some pre-prepares'.format(node)) node.nodeIbStasher.delay(specificPrePrepares) send_reqs_to_nodes_and_verify_all_replies( looper, wallet1, client, (ppsToDelay + 1) * tconf.Max3PCBatchSize) checkNodesHaveSameRoots(txnPoolNodeSet) for r in otherR: seqNos = [a['pp'].ppSeqNo for a in getAllArgs(r, r.addToPrePrepares)] seqNos.reverse() assert sorted(seqNos) == seqNos
def test_non_primary_accepts_pre_prepare_time(looper, txnPoolNodeSet, client1, wallet1, client1Connected): """ One of the non-primary has an in-correct clock so it thinks PRE-PREPARE has incorrect time """ send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2) # The replica having the bad clock confused_npr = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1] make_clock_faulty(confused_npr.node) old_acceptable_rvs = getAllReturnVals( confused_npr, confused_npr.is_pre_prepare_time_acceptable) old_susp_count = get_timestamp_suspicion_count(confused_npr.node) send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2) assert get_timestamp_suspicion_count(confused_npr.node) > old_susp_count new_acceptable_rvs = getAllReturnVals( confused_npr, confused_npr.is_pre_prepare_time_acceptable) # `is_pre_prepare_time_acceptable` first returned False then returned True assert [True, False, *old_acceptable_rvs] == new_acceptable_rvs
def test_view_change_on_start(tconf, txnPoolNodeSet, looper, wallet1, client1, client1Connected): """ Do view change on a without any requests """ old_view_no = txnPoolNodeSet[0].viewNo master_primary = get_master_primary_node(txnPoolNodeSet) other_nodes = [n for n in txnPoolNodeSet if n != master_primary] delay_3pc = 10 delay_3pc_messages(txnPoolNodeSet, 0, delay_3pc) sent_batches = 2 sendRandomRequests(wallet1, client1, sent_batches * tconf.Max3PCBatchSize) def chk1(): t_root, s_root = check_uncommitteds_equal(other_nodes) assert master_primary.domainLedger.uncommittedRootHash != t_root assert master_primary.states[DOMAIN_LEDGER_ID].headHash != s_root looper.run(eventually(chk1, retryWait=1)) timeout = tconf.PerfCheckFreq + \ waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) waitForViewChange(looper, txnPoolNodeSet, old_view_no + 1, customTimeout=timeout) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) check_uncommitteds_equal(txnPoolNodeSet) reset_delays_and_process_delayeds(txnPoolNodeSet) send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2 * Max3PCBatchSize, add_delay_to_timeout=delay_3pc) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_view_change_gc_in_between_3pc_all_nodes_delays( looper, txnPoolNodeSet, wallet1, client): """ Test that garbage collector compares the whole 3PC key (viewNo, ppSeqNo) and does not remove messages from node's queues that have higher viewNo than last ordered one even if their ppSeqNo are less or equal """ numNodes = len(client.nodeReg) viewNo = checkViewNoForNodes(txnPoolNodeSet) # 1 send two messages one by one separately to make # node pool working with two batches # -> last_ordered_3pc = (+0, 2) [+0 means from the initial state] # (last_ordered_3pc here and futher is tracked # for master instances only cause non-master ones have # specific logic of its management which we don't care in # the test, see Replica::_setup_for_non_master) send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client, 1) send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client, 1) last_ordered_3pc = (viewNo, 2) check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 2) # 2 do view change # -> GC should remove it from nodes' queues # -> viewNo = +1 ensure_view_change_complete(looper, txnPoolNodeSet) viewNo = checkViewNoForNodes(txnPoolNodeSet, viewNo + 1) check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 0) # 3 slow processing 3PC messages for all nodes (all replica instances) # randomly and send one more message # -> not ordered (last_ordered_3pc still equal (+0, 2)) but primaries # should at least send PRE-PREPAREs # TODO could it be not enough for wainting that at least primary # has sent PRE-PREPARE propagationTimeout = waits.expectedClientRequestPropagationTime(numNodes) delay_3pc_messages(txnPoolNodeSet, 0, delay=propagationTimeout * 2) delay_3pc_messages(txnPoolNodeSet, 1, delay=propagationTimeout * 2) requests = sendRandomRequests(wallet1, client, 1) def checkPrePrepareSentAtLeastByPrimary(): for node in txnPoolNodeSet: for replica in node.replicas: if replica.isPrimary: assert len(replica.sentPrePrepares) looper.run( eventually(checkPrePrepareSentAtLeastByPrimary, retryWait=0.1, timeout=propagationTimeout)) # 4 do view change # -> GC shouldn't remove anything because # last_ordered_3pc (+0, 1) < last message's 3pc key (+1, 1) # -> viewNo = 2 ensure_view_change_complete(looper, txnPoolNodeSet) viewNoNew = checkViewNoForNodes(txnPoolNodeSet) # another view change could happen because of slow nodes assert viewNoNew - viewNo in (1, 2) viewNo = viewNoNew check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 1) # 5 reset delays and wait for replies # -> new primaries should send new 3pc for last message # with 3pc key (+2, 1) # -> they should be ordered # -> last_ordered_3pc = (+2, 1) reset_delays_and_process_delayeds(txnPoolNodeSet) waitForSufficientRepliesForRequests(looper, client, requests=requests, fVal=numNodes - 1) checkViewNoForNodes(txnPoolNodeSet, viewNo) last_ordered_3pc = (viewNo, 1) check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 1) # 6 do view change # -> GC should remove them ensure_view_change_complete(looper, txnPoolNodeSet) viewNo = checkViewNoForNodes(txnPoolNodeSet, viewNo + 1) check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 0)
def test_new_primary_has_wrong_clock(tconf, looper, txnPoolNodeSet, client1, wallet1, client1Connected): """ One of non-primary has a bad clock, it raises suspicions but orders requests after getting PREPAREs. Then a view change happens this non-primary with the bad clock becomes the new primary but is not able to get any of it's PRE-PREPAREs ordered. Eventually another view change happens and a new primary is elected the pool is functional again :return: """ # The node having the bad clock, this node will be primary after view # change faulty_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[0].node make_clock_faulty(faulty_node) assert not faulty_node.master_replica.isPrimary # faulty_node replies too send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, Max3PCBatchSize * 3) ledger_sizes = { node.name: node.domainLedger.size for node in txnPoolNodeSet } susp_counts = { node.name: get_timestamp_suspicion_count(node) for node in txnPoolNodeSet } ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) # After view change, faulty_node is primary assert faulty_node.master_replica.isPrimary old_view_no = txnPoolNodeSet[0].viewNo # Requests are sent for _ in range(5): sendRandomRequests(wallet1, client1, 2) looper.runFor(.2) def chk(): for node in [n for n in txnPoolNodeSet if n != faulty_node]: # Each non faulty node raises suspicion assert get_timestamp_suspicion_count(node) > susp_counts[node.name] # Ledger does not change assert node.domainLedger.size == ledger_sizes[node.name] assert faulty_node.domainLedger.size == ledger_sizes[faulty_node.name] looper.run(eventually(chk, retryWait=1)) # Eventually another view change happens looper.run( eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1, retryWait=1, timeout=2 * tconf.PerfCheckFreq)) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) # After view change, faulty_node is no more the primary assert not faulty_node.master_replica.isPrimary # All nodes reply send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, Max3PCBatchSize * 2)
def test_node_requests_missing_three_phase_messages_after_long_disconnection( looper, txnPoolNodeSet, wallet1, client1Connected, tconf, tdirWithPoolTxns, allPluginsPath): """ 2 of 4 nodes go down, so pool can not process any more incoming requests. A new request comes in. Test than waits for some time to ensure that PrePrepare was created long enough seconds to be dropped by time checker. Two stopped nodes come back alive. Another request comes in. Check that previously disconnected two nodes request missing PREPARES and PREPREPARES and the pool successfully handles both transactions. """ INIT_REQS_CNT = 10 MISSING_REQS_CNT = 1 REQS_AFTER_RECONNECT_CNT = 1 alive_nodes = [] disconnected_nodes = [] # ensure_view_change_complete(looper, txnPoolNodeSet) for node in txnPoolNodeSet: if node.hasPrimary is not None: alive_nodes.append(node) else: disconnected_nodes.append(node) send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1Connected, INIT_REQS_CNT) waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet) init_ledger_size = txnPoolNodeSet[0].domainLedger.size for node in disconnected_nodes: disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node, stopNode=False) looper.removeProdable(node) sendRandomRequests(wallet1, client1Connected, MISSING_REQS_CNT) def check_pp_out_of_sync(alive_nodes, disconnected_nodes): def get_last_pp(node): return node.replicas._master_replica.lastPrePrepare last_3pc_key_alive = get_last_pp(alive_nodes[0]) for node in alive_nodes[1:]: assert get_last_pp(node) == last_3pc_key_alive last_3pc_key_diconnected = get_last_pp(disconnected_nodes[0]) assert last_3pc_key_diconnected != last_3pc_key_alive for node in disconnected_nodes[1:]: assert get_last_pp(node) == last_3pc_key_diconnected looper.run( eventually(check_pp_out_of_sync, alive_nodes, disconnected_nodes, retryWait=1, timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet)))) preprepare_deviation = 4 tconf.ACCEPTABLE_DEVIATION_PREPREPARE_SECS = preprepare_deviation time.sleep(preprepare_deviation * 2) for node in disconnected_nodes: looper.add(node) for node in disconnected_nodes: reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, node) send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1Connected, REQS_AFTER_RECONNECT_CNT) waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet) for node in txnPoolNodeSet: assert node.domainLedger.size == (init_ledger_size + MISSING_REQS_CNT + REQS_AFTER_RECONNECT_CNT)