def view_change_in_between_3pc(looper, nodes, slow_nodes, wallet, client, slow_delay=1, wait=None): send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 4) delay_3pc_messages(slow_nodes, 0, delay=slow_delay) sendRandomRequests(wallet, client, 10) if wait: looper.runFor(wait) ensure_view_change_complete(looper, nodes, customTimeout=60) reset_delays_and_process_delayeds(slow_nodes) sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5, total_timeout=30) send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 5, total_timeout=30)
def view_change_in_between_3pc_random_delays(looper, nodes, slow_nodes, wallet, client, tconf, min_delay=0, max_delay=0): send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 4) # max delay should not be more than catchup timeout. max_delay = max_delay or tconf.MIN_TIMEOUT_CATCHUPS_DONE_DURING_VIEW_CHANGE - 1 delay_3pc_messages(slow_nodes, 0, min_delay=min_delay, max_delay=max_delay) sendRandomRequests(wallet, client, 10) ensure_view_change_complete(looper, nodes, customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT + max_delay, exclude_from_check=['check_last_ordered_3pc']) reset_delays_and_process_delayeds(slow_nodes) send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 10)
def view_change_in_between_3pc_random_delays(looper, nodes, slow_nodes, sdk_pool_handle, sdk_wallet_client, tconf, min_delay=0, max_delay=0): sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 4) # max delay should not be more than catchup timeout. max_delay = max_delay or tconf.NEW_VIEW_TIMEOUT - 1 delay_3pc_messages(slow_nodes, 0, min_delay=min_delay, max_delay=max_delay) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 10) ensure_view_change_complete(looper, nodes, customTimeout=2 * tconf.NEW_VIEW_TIMEOUT + max_delay, exclude_from_check=['check_last_ordered_3pc']) reset_delays_and_process_delayeds(slow_nodes) sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 10)
def view_change_in_between_3pc(looper, nodes, slow_nodes, sdk_pool_handle, sdk_wallet_client, slow_delay=1, wait=None): sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 4) delay_3pc_messages(slow_nodes, 0, delay=slow_delay) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 10) if wait: looper.runFor(wait) ensure_view_change_complete(looper, nodes, customTimeout=60) reset_delays_and_process_delayeds(slow_nodes) sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 5, total_timeout=30) sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 5, total_timeout=30)
def view_change_in_between_3pc(looper, nodes, slow_nodes, sdk_pool_handle, sdk_wallet_client, slow_delay=1, wait=None): sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 4) delay_3pc_messages(slow_nodes, 0, delay=slow_delay) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 10) if wait: looper.runFor(wait) ensure_view_change(looper, nodes) looper.run(eventually(check_not_in_view_change, nodes)) reset_delays_and_process_delayeds(slow_nodes) ensureElectionsDone(looper=looper, nodes=nodes) ensure_all_nodes_have_same_data(looper, nodes) sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 5, total_timeout=30) sdk_send_random_and_check(looper, nodes, sdk_pool_handle, sdk_wallet_client, 5, total_timeout=30)
def test_all_replicas_hold_request_keys( perf_chk_patched, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ All replicas whether primary or non primary hold request keys of forwarded requests. Once requests are ordered, they request keys are removed from replica. """ tconf = perf_chk_patched delay_3pc = 2 delay_3pc_messages(txnPoolNodeSet, 0, delay_3pc) delay_3pc_messages(txnPoolNodeSet, 1, delay_3pc) def chk(count): # All replicas have same amount of forwarded request keys and all keys # are finalised. for node in txnPoolNodeSet: for r in node.replicas.values(): if r.isPrimary is False: assert len(r.requestQueues[DOMAIN_LEDGER_ID]) == count for i in range(count): k = r.requestQueues[DOMAIN_LEDGER_ID][i] assert r.requests[k].finalised elif r.isPrimary is True: assert len(r.requestQueues[DOMAIN_LEDGER_ID]) == 0 reqs = sdk_signed_random_requests(looper, sdk_wallet_client, tconf.Max3PCBatchSize - 1) req_resps = sdk_send_signed_requests(sdk_pool_handle, reqs) # Only non primary replicas should have all request keys with them looper.run(eventually(chk, tconf.Max3PCBatchSize - 1)) sdk_get_replies(looper, req_resps, timeout=sdk_eval_timeout( tconf.Max3PCBatchSize - 1, len(txnPoolNodeSet), add_delay_to_timeout=delay_3pc)) # Replicas should have no request keys with them since they are ordered looper.run(eventually(chk, 0)) # Need to wait since one node might not # have processed it. delay = 1 for node in txnPoolNodeSet: node.nodeIbStasher.delay(nom_delay(delay)) ensure_view_change(looper, txnPoolNodeSet) reqs = sdk_signed_random_requests(looper, sdk_wallet_client, 2 * tconf.Max3PCBatchSize) req_resps = sdk_send_signed_requests(sdk_pool_handle, reqs) looper.run(eventually(chk, 2 * tconf.Max3PCBatchSize)) # Since each nomination is delayed and there will be multiple nominations # so adding some extra time timeout = waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) + \ len(txnPoolNodeSet) * delay ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=timeout) sdk_get_replies(looper, req_resps, timeout=timeout) looper.run(eventually(chk, 0))
def test_view_change_done_delayed(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ A node is slow so is behind other nodes, after view change, it catches up but it also gets view change message as delayed, a node should start participating only when caught up and ViewChangeCone quorum received. """ nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)] slow_node = nprs[-1] other_nodes = [n for n in txnPoolNodeSet if n != slow_node] delay_3pc = 10 delay_vcd = 25 delay_3pc_messages([slow_node], 0, delay_3pc) slow_node.nodeIbStasher.delay(vcd_delay(delay_vcd)) def chk(node): assert node.view_changer.has_acceptable_view_change_quorum assert node.view_changer._primary_verified assert node.isParticipating assert None not in {r.isPrimary for r in node.replicas.values()} sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5 * 4, 4) ensure_view_change(looper, nodes=txnPoolNodeSet) # After view change, the slow node successfully completes catchup waitNodeDataEquality(looper, slow_node, *other_nodes) # Other nodes complete view change, select primary and participate for node in other_nodes: looper.run(eventually(chk, node, retryWait=1)) # Since `ViewChangeCone` is delayed, slow_node is not able to select primary # and participate assert not slow_node.view_changer.has_acceptable_view_change_quorum assert not slow_node.view_changer._primary_verified assert not slow_node.isParticipating assert {r.isPrimary for r in slow_node.replicas.values()} == {None} # Send requests to make sure pool is functional sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) # Repair network slow_node.reset_delays_and_process_delayeds() # `slow_node` selects primary and participate looper.run(eventually(chk, slow_node, retryWait=1)) # Processes requests received during lack of primary waitNodeDataEquality(looper, slow_node, *other_nodes) # Send more requests and compare data of all nodes sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_slow_nodes_catchup_before_selecting_primary_in_new_view( tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, one_node_added): """ Delay 3PC messages to one node and view change messages to some others (including primary) so the node that does not receive enough 3PC messages is behind but learns of the view change quickly and starts catchup. Other nodes learn of the view change late and thus keep on processing requests """ new_node = one_node_added nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)] primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node slow_node = nprs[-1] # nodes_slow_to_inst_chg = [primary_node] + nprs[:2] nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node] delay_3pc = 100 delay_ic = 5 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) delay_3pc_messages([slow_node], 0, delay_3pc) for n in nodes_slow_to_inst_chg: n.nodeIbStasher.delay(icDelay(delay_ic)) def start_count(): return sum([ 1 for e in slow_node.ledgerManager.spylog.getAll( slow_node.ledgerManager.startCatchUpProcess.__name__) if e.params['ledgerId'] == DOMAIN_LEDGER_ID ]) s = start_count() requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 10 * Max3PCBatchSize) ensure_view_change(looper, nodes=txnPoolNodeSet, exclude_from_check=nodes_slow_to_inst_chg) sdk_get_and_check_replies(looper, requests) waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1]) e = start_count() assert e - s >= 2 looper.run(eventually(checkViewNoForNodes, slow_node.viewNo)) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg)
def test_discard_3PC_messages_for_already_ordered(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ Nodes discard any 3PC messages for already ordered 3PC keys (view_no, pp_seq_no). Delay all 3PC messages to a node so it cannot respond to them unless the other nodes order them, now when the slow node will get them it will respond but other nodes will not process them and discard them """ slow_node = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)][-1] other_nodes = [n for n in txnPoolNodeSet if n != slow_node] delay = 20 delay_3pc_messages([slow_node], 0, delay) delay_3pc_messages([slow_node], 1, delay) sent_batches = 3 sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=2 * sent_batches, num_batches=sent_batches) # send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, # 2 * sent_batches, sent_batches) def chk(node, inst_id, p_count, c_count): # A node will still record PREPRAREs even if more than n-f-1, till the # request is not ordered assert len(node.replicas[inst_id].prepares) >= p_count assert len(node.replicas[inst_id].commits) == c_count def count_discarded(inst_id, count): for node in other_nodes: assert countDiscarded(node.replicas[inst_id], 'already ordered 3 phase message') == count # `slow_node` did not receive any PREPAREs or COMMITs chk(slow_node, 0, 0, 0) # `other_nodes` have not discarded any 3PC message count_discarded(0, 0) # `other_nodes` have not recorded any PREPAREs or COMMITs from `slow_node` chk_commits_prepares_recvd(0, other_nodes, slow_node) slow_node.reset_delays_and_process_delayeds() waitNodeDataEquality(looper, slow_node, *other_nodes) # `slow_node` did receive correct number of PREPAREs and COMMITs looper.run(eventually(chk, slow_node, 0, sent_batches - 1, sent_batches, retryWait=1)) # `other_nodes` have not recorded any PREPAREs or COMMITs from `slow_node` chk_commits_prepares_recvd(0, other_nodes, slow_node) # `other_nodes` have discarded PREPAREs and COMMITs all batches count_discarded(0, 2 * sent_batches)
def test_discard_3PC_messages_for_already_ordered(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ Nodes discard any 3PC messages for already ordered 3PC keys (view_no, pp_seq_no). Delay all 3PC messages to a node so it cannot respond to them unless the other nodes order them, now when the slow node will get them it will respond but other nodes will not process them and discard them """ slow_node = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)][-1] other_nodes = [n for n in txnPoolNodeSet if n != slow_node] delay = 20 delay_3pc_messages([slow_node], 0, delay) delay_3pc_messages([slow_node], 1, delay) sent_batches = 3 sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=2*sent_batches, num_batches=sent_batches) # send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, # 2 * sent_batches, sent_batches) def chk(node, inst_id, p_count, c_count): # A node will still record PREPRAREs even if more than n-f-1, till the # request is not ordered assert len(node.replicas[inst_id].prepares) >= p_count assert len(node.replicas[inst_id].commits) == c_count def count_discarded(inst_id, count): for node in other_nodes: assert countDiscarded(node.replicas[inst_id], 'already ordered 3 phase message') == count # `slow_node` did not receive any PREPAREs or COMMITs chk(slow_node, 0, 0, 0) # `other_nodes` have not discarded any 3PC message count_discarded(0, 0) # `other_nodes` have not recorded any PREPAREs or COMMITs from `slow_node` chk_commits_prepares_recvd(0, other_nodes, slow_node) slow_node.reset_delays_and_process_delayeds() waitNodeDataEquality(looper, slow_node, *other_nodes) # `slow_node` did receive correct number of PREPAREs and COMMITs looper.run(eventually(chk, slow_node, 0, sent_batches - 1, sent_batches, retryWait=1)) # `other_nodes` have not recorded any PREPAREs or COMMITs from `slow_node` chk_commits_prepares_recvd(0, other_nodes, slow_node) # `other_nodes` have discarded PREPAREs and COMMITs all batches count_discarded(0, 2 * sent_batches)
def test_slow_nodes_catchup_before_selecting_primary_in_new_view( tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, one_node_added): """ Delay 3PC messages to one node and view change messages to some others (including primary) so the node that does not receive enough 3PC messages is behind but learns of the view change quickly and starts catchup. Other nodes learn of the view change late and thus keep on processing requests """ new_node = one_node_added nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)] primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node slow_node = nprs[-1] # nodes_slow_to_inst_chg = [primary_node] + nprs[:2] nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node] delay_3pc = 100 delay_ic = 5 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) delay_3pc_messages([slow_node], 0, delay_3pc) for n in nodes_slow_to_inst_chg: n.nodeIbStasher.delay(icDelay(delay_ic)) def start_count(): return sum([1 for e in slow_node.ledgerManager.spylog.getAll( slow_node.ledgerManager.startCatchUpProcess.__name__) if e.params['ledgerId'] == DOMAIN_LEDGER_ID]) s = start_count() requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 10 * Max3PCBatchSize) ensure_view_change(looper, nodes=txnPoolNodeSet, exclude_from_check=nodes_slow_to_inst_chg) sdk_get_and_check_replies(looper, requests) waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1]) e = start_count() assert e - s >= 2 looper.run(eventually(checkViewNoForNodes, slow_node.viewNo)) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg)
def test_all_replicas_hold_request_keys(perf_chk_patched, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ All replicas whether primary or non primary hold request keys of forwarded requests. Once requests are ordered, they request keys are removed from replica. """ tconf = perf_chk_patched delay_3pc = 2 delay_3pc_messages(txnPoolNodeSet, 0, delay_3pc) delay_3pc_messages(txnPoolNodeSet, 1, delay_3pc) def chk(count): # All replicas have same amount of forwarded request keys and all keys # are finalised. for node in txnPoolNodeSet: for r in node.replicas.values(): if r.isPrimary is False: assert len(r.requestQueues[DOMAIN_LEDGER_ID]) == count for i in range(count): k = r.requestQueues[DOMAIN_LEDGER_ID][i] assert r.requests[k].finalised elif r.isPrimary is True: assert len(r.requestQueues[DOMAIN_LEDGER_ID]) == 0 reqs = sdk_signed_random_requests(looper, sdk_wallet_client, tconf.Max3PCBatchSize - 1) req_resps = sdk_send_signed_requests(sdk_pool_handle, reqs) # Only non primary replicas should have all request keys with them looper.run(eventually(chk, tconf.Max3PCBatchSize - 1)) sdk_get_replies(looper, req_resps, timeout=sdk_eval_timeout(tconf.Max3PCBatchSize - 1, len(txnPoolNodeSet), add_delay_to_timeout=delay_3pc)) # Replicas should have no request keys with them since they are ordered looper.run(eventually(chk, 0)) # Need to wait since one node might not # have processed it. delay = 1 for node in txnPoolNodeSet: node.nodeIbStasher.delay(nom_delay(delay)) ensure_view_change(looper, txnPoolNodeSet) reqs = sdk_signed_random_requests(looper, sdk_wallet_client, 2 * tconf.Max3PCBatchSize) req_resps = sdk_send_signed_requests(sdk_pool_handle, reqs) looper.run(eventually(chk, 2 * tconf.Max3PCBatchSize)) # Since each nomination is delayed and there will be multiple nominations # so adding some extra time timeout = waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) + \ len(txnPoolNodeSet) * delay ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=timeout) sdk_get_replies(looper, req_resps, timeout=timeout) looper.run(eventually(chk, 0))
def test_all_replicas_hold_request_keys(looper, txnPoolNodeSet, client1, wallet1, client1Connected, tconf): """ All replicas whether primary or non primary hold request keys of forwarded requests. Once requests are ordered, they request keys are removed from replica. """ delay_3pc_messages(txnPoolNodeSet, 0, 2) delay_3pc_messages(txnPoolNodeSet, 1, 2) def chk(count): # All replicas have same amount of forwarded request keys and all keys # are finalised. for node in txnPoolNodeSet: for r in node.replicas: if r.isPrimary is False: assert len(r.requestQueues[DOMAIN_LEDGER_ID]) == count for i in range(count): k = r.requestQueues[DOMAIN_LEDGER_ID][i] assert r.requests[k].finalised elif r.isPrimary is True: assert len(r.requestQueues[DOMAIN_LEDGER_ID]) == 0 reqs = sendRandomRequests(wallet1, client1, tconf.Max3PCBatchSize - 1) # Only non primary replicas should have all request keys with them looper.run(eventually(chk, tconf.Max3PCBatchSize - 1)) waitForSufficientRepliesForRequests(looper, client1, requests=reqs, add_delay_to_timeout=2) # Replicas should have no request keys with them since they are ordered looper.run(eventually(chk, 0)) # Need to wait since one node might not # have processed it. delay = 1 for node in txnPoolNodeSet: node.nodeIbStasher.delay(nom_delay(delay)) ensure_view_change(looper, txnPoolNodeSet) reqs = sendRandomRequests(wallet1, client1, 2 * tconf.Max3PCBatchSize) looper.run(eventually(chk, 2 * tconf.Max3PCBatchSize)) # Since each nomination is delayed and there will be multiple nominations # so adding some extra time timeout = waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) + \ len(txnPoolNodeSet)*delay ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=timeout) waitForSufficientRepliesForRequests(looper, client1, requests=reqs, add_delay_to_timeout=2) looper.run(eventually(chk, 0))
def slow_node_and_others(txnPoolNodeSet): node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node other = [n for n in txnPoolNodeSet if n != node] delay = 1000 logger.info("Delay 3pc messages for {} on {} sec".format(node, delay)) node.nodeIbStasher.delay( cDelay(delay_3pc_messages([node, ], inst_id=None, delay=delay)) ) return node, other
def view_change_in_between_3pc_random_delays(looper, nodes, slow_nodes, wallet, client, min_delay=0, max_delay=5): send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 4) delay_3pc_messages(slow_nodes, 0, min_delay=min_delay, max_delay=max_delay) sendRandomRequests(wallet, client, 10) ensure_view_change(looper, nodes) ensureElectionsDone(looper=looper, nodes=nodes) ensure_all_nodes_have_same_data(looper, nodes=nodes) reset_delays_and_process_delayeds(slow_nodes) send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 10)
def test_all_replicas_hold_request_keys(perf_chk_patched, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ All replicas whether primary or non primary hold request keys of forwarded requests. Once requests are ordered, they request keys are removed from replica. """ tconf = perf_chk_patched delay_3pc = 2 delay_3pc_messages(txnPoolNodeSet, 0, delay_3pc) delay_3pc_messages(txnPoolNodeSet, 1, delay_3pc) def chk(count): # All replicas have same amount of forwarded request keys and all keys # are finalised. for node in txnPoolNodeSet: for r in node.replicas.values(): if r.isPrimary is False: assert len(r._ordering_service. requestQueues[DOMAIN_LEDGER_ID]) == count for i in range(count): k = r._ordering_service.requestQueues[ DOMAIN_LEDGER_ID][i] assert r.requests[k].finalised elif r.isPrimary is True: assert len(r._ordering_service. requestQueues[DOMAIN_LEDGER_ID]) == 0 reqs = sdk_signed_random_requests(looper, sdk_wallet_client, tconf.Max3PCBatchSize - 1) req_resps = sdk_send_signed_requests(sdk_pool_handle, reqs) # Only non primary replicas should have all request keys with them looper.run(eventually(chk, tconf.Max3PCBatchSize - 1)) sdk_get_replies(looper, req_resps, timeout=sdk_eval_timeout(tconf.Max3PCBatchSize - 1, len(txnPoolNodeSet), add_delay_to_timeout=delay_3pc)) # Replicas should have no request keys with them since they are ordered looper.run(eventually(chk, 0)) # Need to wait since one node might not
def test_view_change_on_start(tconf, txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ Do view change on a without any requests """ old_view_no = txnPoolNodeSet[0].viewNo master_primary = get_master_primary_node(txnPoolNodeSet) other_nodes = [n for n in txnPoolNodeSet if n != master_primary] delay_3pc = 10 delay_3pc_messages(txnPoolNodeSet, 0, delay_3pc) sent_batches = 2 sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, sent_batches * tconf.Max3PCBatchSize) def chk1(): t_root, s_root = check_uncommitteds_equal(other_nodes) assert master_primary.domainLedger.uncommittedRootHash != t_root assert master_primary.states[DOMAIN_LEDGER_ID].headHash != s_root looper.run(eventually(chk1, retryWait=1)) timeout = tconf.PerfCheckFreq + \ waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) waitForViewChange(looper, txnPoolNodeSet, old_view_no + 1, customTimeout=timeout) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) check_uncommitteds_equal(txnPoolNodeSet) reset_delays_and_process_delayeds(txnPoolNodeSet) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize, add_delay_to_timeout=delay_3pc) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_view_change_gc_in_between_3pc_all_nodes_delays( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Test that garbage collector compares the whole 3PC key (viewNo, ppSeqNo) and does not remove messages from node's queues that have higher viewNo than last ordered one even if their ppSeqNo are less or equal """ numNodes = len(txnPoolNodeSet) viewNo = checkViewNoForNodes(txnPoolNodeSet) # 1 send two messages one by one separately to make # node pool working with two batches # -> last_ordered_3pc = (+0, 2) [+0 means from the initial state] # (last_ordered_3pc here and futher is tracked # for master instances only cause non-master ones have # specific logic of its management which we don't care in # the test, see Replica::_setup_for_non_master) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) last_ordered_3pc = (viewNo, 2) check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 2) # 2 do view change # -> GC should remove it from nodes' queues # -> viewNo = +1 ensure_view_change_complete(looper, txnPoolNodeSet) viewNo = checkViewNoForNodes(txnPoolNodeSet, viewNo + 1) check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 0) # 3 slow processing 3PC messages for all nodes (all replica instances) # randomly and send one more message # -> not ordered (last_ordered_3pc still equal (+0, 2)) but primaries # should at least send PRE-PREPAREs # TODO could it be not enough for wainting that at least primary # has sent PRE-PREPARE propagationTimeout = waits.expectedClientRequestPropagationTime(numNodes) delay_3pc_messages(txnPoolNodeSet, 0, delay=propagationTimeout * 2) delay_3pc_messages(txnPoolNodeSet, 1, delay=propagationTimeout * 2) requests = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) def checkPrePrepareSentAtLeastByPrimary(): for node in txnPoolNodeSet: for replica in node.replicas.values(): if replica.isPrimary: assert len(replica.sentPrePrepares) looper.run(eventually(checkPrePrepareSentAtLeastByPrimary, retryWait=0.1, timeout=propagationTimeout)) # 4 do view change # -> GC shouldn't remove anything because # last_ordered_3pc (+0, 1) < last message's 3pc key (+1, 1) # -> viewNo = 2 ensure_view_change_complete(looper, txnPoolNodeSet) viewNoNew = checkViewNoForNodes(txnPoolNodeSet) # another view change could happen because of slow nodes assert viewNoNew - viewNo in (1, 2) viewNo = viewNoNew check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 1) # 5 reset delays and wait for replies # -> new primaries should send new 3pc for last message # with 3pc key (+2, 1) # -> they should be ordered # -> last_ordered_3pc = (+2, 1) reset_delays_and_process_delayeds(txnPoolNodeSet) sdk_get_replies(looper, [requests]) checkViewNoForNodes(txnPoolNodeSet, viewNo) last_ordered_3pc = (viewNo, 1) check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 1) # 6 do view change # -> GC should remove them ensure_view_change_complete(looper, txnPoolNodeSet) viewNo = checkViewNoForNodes(txnPoolNodeSet, viewNo + 1) check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 0)
def test_view_change_gc_in_between_3pc_all_nodes_delays( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Test that garbage collector compares the whole 3PC key (viewNo, ppSeqNo) and does not remove messages from node's queues that have higher viewNo than last ordered one even if their ppSeqNo are less or equal """ numNodes = len(txnPoolNodeSet) viewNo = checkViewNoForNodes(txnPoolNodeSet) # 1 send two messages one by one separately to make # node pool working with two batches # -> last_ordered_3pc = (+0, 2) [+0 means from the initial state] # (last_ordered_3pc here and futher is tracked # for master instances only cause non-master ones have # specific logic of its management which we don't care in # the test, see Replica::_setup_for_non_master) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) batches_count = get_pp_seq_no(txnPoolNodeSet) last_ordered_3pc = (viewNo, batches_count) check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 2) # 2 do view change # -> GC should remove it from nodes' queues # -> viewNo = +1 ensure_view_change_complete(looper, txnPoolNodeSet) batches_count += 1 viewNo = checkViewNoForNodes(txnPoolNodeSet, viewNo + 1) looper.run( eventually(check_nodes_last_ordered_3pc, txnPoolNodeSet, (viewNo, batches_count))) check_nodes_requests_size(txnPoolNodeSet, 0) # 3 slow processing 3PC messages for all nodes (all replica instances) # randomly and send one more message # -> not ordered (last_ordered_3pc still equal (+0, 2)) but primaries # should at least send PRE-PREPAREs # TODO could it be not enough for wainting that at least primary # has sent PRE-PREPARE propagationTimeout = waits.expectedClientRequestPropagationTime(numNodes) delay_3pc_messages(txnPoolNodeSet, 0, delay=propagationTimeout * 2) delay_3pc_messages(txnPoolNodeSet, 1, delay=propagationTimeout * 2) requests = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) def checkPrePrepareSentAtLeastByPrimary(): for node in txnPoolNodeSet: for replica in node.replicas.values(): if replica.isPrimary: assert len(replica._ordering_service.sent_preprepares) looper.run( eventually(checkPrePrepareSentAtLeastByPrimary, retryWait=0.1, timeout=propagationTimeout)) # 4 do view change # -> GC shouldn't remove anything because # last_ordered_3pc (+0, 1) < last message's 3pc key (+1, 1) # -> viewNo = 2 ensure_view_change_complete(looper, txnPoolNodeSet) batches_count += 1 viewNoNew = checkViewNoForNodes(txnPoolNodeSet) # another view change could happen because of slow nodes assert viewNoNew - viewNo in (1, 2) viewNo = viewNoNew check_nodes_last_ordered_3pc(txnPoolNodeSet, (last_ordered_3pc[0] + 1, batches_count - 1)) check_nodes_requests_size(txnPoolNodeSet, 1) # 5 reset delays and wait for replies # -> new primaries should send new 3pc for last message # with 3pc key (+2, 1) # -> they should be ordered # -> last_ordered_3pc = (+2, 1) reset_delays_and_process_delayeds(txnPoolNodeSet) sdk_get_replies(looper, [requests]) batches_count += 1 checkViewNoForNodes(txnPoolNodeSet, viewNo) last_ordered_3pc = (viewNo, batches_count) check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc) check_nodes_requests_size(txnPoolNodeSet, 1) # 6 do view change # -> GC should remove them ensure_view_change_complete(looper, txnPoolNodeSet) batches_count += 1 viewNo = checkViewNoForNodes(txnPoolNodeSet, viewNo + 1) check_nodes_last_ordered_3pc(txnPoolNodeSet, (last_ordered_3pc[0] + 1, batches_count)) check_nodes_requests_size(txnPoolNodeSet, 0)
def test_node_detecting_lag_from_view_change_done_messages( txnPoolNodeSet, looper, wallet1, client1, client1Connected, tconf): """ A node is slow and after view change starts, it marks it's `last_prepared` to less than others, after catchup it does not get any txns from others and finds it has already ordered it's `last_prepared`, but when it gets ViewChangeDone messages, it starts catchup again and this time gets the txns. To achieve this delay all 3PC messages to a node so before view change it has different last_prepared from others. Also delay processing of COMMITs and INSTANCE_CHANGEs by other nodes """ send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 2 * 3, 3) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node fast_nodes = [n for n in txnPoolNodeSet if n != slow_node] slow_master_replica = slow_node.master_replica fast_master_replicas = [n.master_replica for n in fast_nodes] delay_3pc = 50 delay_ic = tconf.PerfCheckFreq + 5 delay_commit = delay_ic + 10 delay_3pc_messages([slow_node], 0, delay_3pc) for n in fast_nodes: n.nodeIbStasher.delay(icDelay(delay_ic)) n.nodeIbStasher.delay(cDelay(delay_commit)) reqs = [] for i in range(10): reqs = reqs + sendRandomRequests(wallet1, client1, 2) looper.runFor(.2) def chk1(): for rep in fast_master_replicas: assert compare_3PC_keys( slow_master_replica.last_prepared_certificate_in_view(), rep.last_prepared_certificate_in_view()) > 0 assert slow_master_replica.last_ordered_3pc == rep.last_ordered_3pc looper.run(eventually(chk1)) no_more_catchup_count = get_count(slow_node, slow_node.no_more_catchups_needed) # Track last prepared for master replica of each node prepareds = {} orig_methods = {} for node in txnPoolNodeSet: orig_methods[node.name] = node.master_replica.on_view_change_start def patched_on_view_change_start(self): orig_methods[self.node.name]() prepareds[self.node.name] = self.last_prepared_before_view_change node.master_replica.on_view_change_start = types.MethodType( patched_on_view_change_start, node.master_replica) ensure_view_change(looper, txnPoolNodeSet, exclude_from_check=fast_nodes) def chk2(): # last_prepared of slow_node is less than fast_nodes for rep in fast_master_replicas: assert compare_3PC_keys(prepareds[slow_master_replica.node.name], prepareds[rep.node.name]) > 0 looper.run(eventually(chk2, timeout=delay_ic + 5)) last_start_catchup_call_at = None no_more_catchup_call_at = None def chk3(): # no_more_catchups_needed was called since node found no need of catchup nonlocal last_start_catchup_call_at, no_more_catchup_call_at assert (get_count(slow_node, slow_node.no_more_catchups_needed) - no_more_catchup_count) > 0 no_more_catchup_call_at = slow_node.spylog.getLast( slow_node.no_more_catchups_needed).starttime last_start_catchup_call_at = slow_node.spylog.getLast( slow_node.start_catchup).starttime looper.run(eventually(chk3, timeout=delay_commit)) for n in fast_nodes: n.nodeIbStasher.reset_delays_and_process_delayeds() n.nodeIbStasher.reset_delays_and_process_delayeds() ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) assert slow_node.spylog.getLast( slow_node.start_catchup).starttime > no_more_catchup_call_at assert slow_node.spylog.getLast( slow_node.start_catchup).starttime > last_start_catchup_call_at