コード例 #1
0
def test_view_change_by_order_stashed_on_3_nodes_and_catchup_on_1_node(txnPoolNodeSet, looper,
                                                                       sdk_pool_handle, sdk_wallet_steward):
    '''
    - COMMITS are delayed on all nodes
    - All nodes starts a view change with a prepared certificate (for delayed message)
    - COMMITS come during view change for 3 nodes
    - So these 3 nodes finish view change by processing Commits and Ordered msgs during view change (in between rounds of catchup).
    - The lagging (4th) node receives missing txns as part of catch-up (during view change) and also finishes view change.
    '''
    slow_node = txnPoolNodeSet[-1]
    fast_nodes = txnPoolNodeSet[:-1]
    slow_stasher = slow_node.nodeIbStasher
    fast_stashers = [n.nodeIbStasher for n in fast_nodes]
    all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet]

    initial_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc
    txns_count = 4
    eventual_last_ordered = initial_last_ordered[0], initial_last_ordered[1] + txns_count
    batches_count = initial_last_ordered[1]

    with delay_rules(all_stashers, vcd_delay()):
        # the lagging node is slow in receiving Commits and Catchup mghs
        with delay_rules(slow_stasher, cDelay()):
            with delay_rules(slow_stasher, lsDelay(), msg_rep_delay(types_to_delay=[LEDGER_STATUS])):
                # fast nodes will receive and order Commits for last_prepared_cert during view change
                with delay_rules(fast_stashers, cDelay()):
                    with delay_rules(fast_stashers, lsDelay(), msg_rep_delay(types_to_delay=[LEDGER_STATUS])):
                        sdk_send_random_requests(looper, sdk_pool_handle,
                                                 sdk_wallet_steward, txns_count)
                        batches_count += txns_count

                        looper.run(eventually(check_prepare_certificate, txnPoolNodeSet, batches_count))
                        check_last_ordered_3pc_on_master(txnPoolNodeSet, initial_last_ordered)

                        # trigger view change on all nodes
                        ensure_view_change(looper, txnPoolNodeSet)

                        looper.run(eventually(check_last_prepared_certificate_after_view_change_start,
                                              txnPoolNodeSet, eventual_last_ordered))

                # check that all txns are ordered till last prepared on fast nodes
                looper.run(eventually(check_last_ordered_3pc_on_master, fast_nodes, eventual_last_ordered, timeout=30))

            # check that all txns are ordered till last prepared on slow node as a result of catchup
            looper.run(eventually(check_last_ordered_3pc_on_master, [slow_node], eventual_last_ordered, timeout=30))

    # wait for view change done on all nodes
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    # make sure that the pool is functional
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
コード例 #2
0
def test_node_reject_invalid_txn_during_catchup(looper, sdk_pool_handle,
                                                sdk_wallet_client, tconf, tdir,
                                                txnPoolNodeSet, bad_node,
                                                lagging_node):
    """
    Make sure that catching up node will blacklist nodes which send incorrect catchup replies
    """
    normal_nodes = [
        node for node in txnPoolNodeSet
        if node not in [bad_node, lagging_node]
    ]
    normal_stashers = [node.nodeIbStasher for node in normal_nodes]

    with delay_rules_without_processing(lagging_node.nodeIbStasher,
                                        delay_3pc()):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, 5)

        # Perform catchup, while making sure that cons proof from bad node is received
        # before cons proofs from normal nodes, so bad node can participate in catchup
        with delay_rules(normal_stashers, lsDelay()):
            lagging_node.start_catchup()

            node_leecher = lagging_node.ledgerManager._node_leecher
            audit_cons_proof_service = node_leecher._leechers[
                AUDIT_LEDGER_ID]._cons_proof_service
            looper.run(
                eventually(lambda: assert_in(
                    bad_node.name, audit_cons_proof_service._cons_proofs)))

        waitNodeDataEquality(looper, lagging_node, *normal_nodes)
        assert lagging_node.isNodeBlacklisted(bad_node.name)
コード例 #3
0
def test_view_change_by_order_stashed_on_all(txnPoolNodeSet, looper,
                                             sdk_pool_handle,
                                             sdk_wallet_steward):
    '''
    - COMMITS are delayed on all nodes
    - All nodes starts a view change with a prepared certificate (for delayed message)
    - COMMITS come during view change for all nodes,
    - So all nodes finish view change by processing Commits and Ordered msgs during view change (in between rounds of catchup).
    '''
    all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet]

    initial_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc
    txns_count = 4
    eventual_last_ordered = initial_last_ordered[
        0], initial_last_ordered[1] + txns_count

    with delay_rules(all_stashers, vcd_delay()):
        with delay_rules(all_stashers, cDelay()):
            with delay_rules(all_stashers, lsDelay(),
                             msg_rep_delay(types_to_delay=[LEDGER_STATUS])):
                sdk_send_random_requests(looper, sdk_pool_handle,
                                         sdk_wallet_steward, txns_count)

                looper.run(
                    eventually(check_prepare_certificate, txnPoolNodeSet,
                               txns_count))
                check_last_ordered_3pc_on_master(txnPoolNodeSet,
                                                 initial_last_ordered)

                # trigger view change on all nodes
                ensure_view_change(looper, txnPoolNodeSet)

                looper.run(
                    eventually(
                        check_last_prepared_certificate_after_view_change_start,
                        txnPoolNodeSet, eventual_last_ordered))

        # check that all txns are ordered till last prepared
        looper.run(
            eventually(check_last_ordered_3pc_on_master,
                       txnPoolNodeSet,
                       eventual_last_ordered,
                       timeout=30))

    # wait for view change done on all nodes
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    # make sure that the pool is functional
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward,
                               sdk_pool_handle)
def test_catchup_from_unequal_nodes_without_reasking(looper, txnPoolNodeSet,
                                                     sdk_pool_handle,
                                                     sdk_wallet_client):
    lagged_node_1 = txnPoolNodeSet[-1]
    lagged_node_2 = txnPoolNodeSet[-2]
    normal_nodes = [
        node for node in txnPoolNodeSet
        if node not in [lagged_node_1, lagged_node_2]
    ]
    normal_stashers = [node.nodeIbStasher for node in normal_nodes]

    with delay_rules_without_processing(lagged_node_1.nodeIbStasher,
                                        delay_3pc()):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, 2)

        with delay_rules_without_processing(lagged_node_2.nodeIbStasher,
                                            delay_3pc()):
            sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                      sdk_wallet_client, 7)
            ensure_all_nodes_have_same_data(looper,
                                            normal_nodes,
                                            custom_timeout=30)

            # Perform catchup, while making sure that cons proof from lagging node is received
            # before cons proofs from normal nodes, so lagging node can participate in catchup
            with delay_rules(normal_stashers, lsDelay()):
                lagged_node_1.start_catchup()

                node_leecher = lagged_node_1.ledgerManager._node_leecher
                audit_cons_proof_service = node_leecher._leechers[
                    AUDIT_LEDGER_ID]._cons_proof_service
                looper.run(
                    eventually(lambda: assertExp(lagged_node_2.name in
                                                 audit_cons_proof_service.
                                                 _cons_proofs)))

            # Make sure catchup finishes successfully
            ensure_all_nodes_have_same_data(looper,
                                            set(txnPoolNodeSet) -
                                            {lagged_node_2},
                                            custom_timeout=30)
            assert lagged_node_1.ledgerManager._node_leecher._state == NodeLeecherService.State.Idle
コード例 #5
0
def test_catchup_with_reask_ls(txnPoolNodeSet, looper, sdk_pool_handle,
                               sdk_wallet_steward, tconf, tdir,
                               allPluginsPath):
    '''
    Start a catchup
    Delay MessageqResp with LedgerStatuses twice
    Check that the catchup finished
    '''
    lagged_node = txnPoolNodeSet[-1]
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 5)
    lagged_node.nodeIbStasher.delay(msg_rep_delay(types_to_delay=[COMMIT]))

    with delay_rules_without_processing(
            lagged_node.nodeIbStasher, delay_3pc(),
            msg_rep_delay(types_to_delay=[COMMIT])):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_steward, 2)
        lagged_node.nodeIbStasher.drop_delayeds()
    with delay_rules_without_processing(
            lagged_node.nodeIbStasher, lsDelay(),
            msg_rep_delay(types_to_delay=[LEDGER_STATUS])):
        lagged_node.start_catchup()

        def chk():
            resp_ls_count = 0
            for msg in lagged_node.nodeIbStasher.delayeds:
                if isinstance(
                        msg.item[0],
                        MessageRep) and msg.item[0].msg_type == LEDGER_STATUS:
                    resp_ls_count += 1
            assert resp_ls_count >= (len(txnPoolNodeSet) - 1) * 2
            lagged_node.nodeIbStasher.drop_delayeds()

        looper.run(eventually(chk))
    waitNodeDataEquality(looper,
                         lagged_node,
                         *txnPoolNodeSet,
                         exclude_from_check=['check_last_ordered_3pc_backup'])
コード例 #6
0
def test_reverted_unordered(txnPoolNodeSet, looper, sdk_pool_handle,
                            sdk_wallet_client):
    """
    Before starting catchup, revert any uncommitted changes to state and
    ledger. This is to avoid any re-application of requests that were
    ordered but stashed
    Example scenario
    prepared (1, 4)
    start_view_change
    start_catchup
    ...
    ....
    ...
    committed and send Ordered (1, 2)
    ...
    ....
    preLedgerCatchUp
    force_process_ordered, take out (1,2) and stash (1, 2)
    now process stashed Ordered(1,2), its requests will be applied again

    Simulation: Delay COMMITs to a node so that it can not order requests
    but has prepared them. Then trigger a view change and make sure the slow
    node has not ordered same number of requests as others but has prepared
    so it can order when it receives COMMITs while view change is in progress.
    The slow node should revert unordered batches and but it should eventually
    process the ordered requests, so delay LEDGER_STATUS too so catchup
    is delayed
    """
    slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    fast_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    slow_node.nodeIbStasher.delay(cDelay(120, 0))
    sent_batches = 5
    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet,
                                         sdk_pool_handle, sdk_wallet_client,
                                         2 * sent_batches, sent_batches)

    # Fast nodes have same last ordered and same data
    last_ordered = [n.master_last_ordered_3PC for n in fast_nodes]
    assert check_if_all_equal_in_list(last_ordered)
    ensure_all_nodes_have_same_data(looper, fast_nodes)

    # Slow nodes have different last ordered than fast nodes
    assert last_ordered[0] != slow_node.master_last_ordered_3PC

    # Delay LEDGER_STATUS so catchup starts late
    slow_node.nodeIbStasher.delay(lsDelay(100))
    slow_node.nodeIbStasher.delay(msg_rep_delay(100))
    slow_node.nodeIbStasher.delay(cr_delay(100))

    # slow_node has not reverted batches
    assert sent_batches not in getAllReturnVals(
        slow_node.master_replica,
        slow_node.master_replica.revert_unordered_batches)

    ensure_view_change(looper, txnPoolNodeSet)

    def chk1():
        # slow_node reverted all batches
        rv = getAllReturnVals(
            slow_node.master_replica,
            slow_node.master_replica.revert_unordered_batches)
        assert sent_batches in rv

    looper.run(eventually(chk1, retryWait=1))

    # After the view change slow_node has prepared same requests as the fast
    # nodes have ordered
    assert last_ordered[
        0] == slow_node.master_replica.last_prepared_before_view_change

    # Deliver COMMITs
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(COMMIT)

    def chk2():
        # slow_node stashed commits
        assert slow_node.master_replica.stasher.num_stashed_catchup == \
               sent_batches * (len(txnPoolNodeSet) - 1)

    looper.run(eventually(chk2, retryWait=1))

    # Deliver LEDGER_STATUS so catchup can complete
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(LEDGER_STATUS)
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(MESSAGE_RESPONSE)
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(CATCHUP_REP)

    # Ensure all nodes have same data
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)

    def chk3():
        # slow_node processed stashed messages successfully
        assert slow_node.master_replica.stasher.num_stashed_catchup == 0

    looper.run(eventually(chk3, retryWait=1))

    # Ensure pool is functional
    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet,
                                         sdk_pool_handle, sdk_wallet_client,
                                         10, 2)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
コード例 #7
0
def test_3pc_while_catchup_with_chkpoints_only(
        tdir, tconf, looper, chkFreqPatched, reqs_for_checkpoint,
        testNodeClass, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client,
        allPluginsPath):
    '''
    Check that catch-up is not started again even if a quorum of stashed checkpoints
    is received during catch-up.
    Assume that only checkpoints and no 3PC messages are received.
    '''

    # Prepare nodes
    lagging_node = txnPoolNodeSet[-1]
    rest_nodes = txnPoolNodeSet[:-1]

    # Check that requests executed well
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)

    # Stop one node
    waitNodeDataEquality(looper, lagging_node, *rest_nodes)
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            lagging_node,
                                            stopNode=True)
    looper.removeProdable(lagging_node)

    # Send more requests to active nodes
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    waitNodeDataEquality(looper, *rest_nodes)

    # Restart stopped node and wait for successful catch up
    lagging_node = start_stopped_node(
        lagging_node,
        looper,
        tconf,
        tdir,
        allPluginsPath,
        start=False,
    )

    initial_all_ledgers_caught_up = lagging_node.spylog.count(
        Node.allLedgersCaughtUp)

    # delay all 3PC messages on the lagged node so that it
    # receives only Checkpoints and catch-up messages

    lagging_node.nodeIbStasher.delay(ppDelay())
    lagging_node.nodeIbStasher.delay(pDelay())
    lagging_node.nodeIbStasher.delay(cDelay())

    # delay CurrentState to avoid Primary Propagation (since it will lead to more catch-ups not needed in this test).
    with delay_rules(lagging_node.nodeIbStasher, cs_delay()):
        with delay_rules(lagging_node.nodeIbStasher, lsDelay(),
                         msg_rep_delay()):
            looper.add(lagging_node)
            txnPoolNodeSet[-1] = lagging_node
            looper.run(checkNodesConnected(txnPoolNodeSet))

            # wait till we got ledger statuses for messages missed while the node was offline,
            # so that now we can order more messages, and they will not be caught up, but stashed
            looper.run(
                eventually(lambda: assertExp(
                    lagging_node.nodeIbStasher.num_of_stashed(LedgerStatus) >=
                    3),
                           retryWait=1,
                           timeout=60))

            assert lagging_node.mode != Mode.participating

            # make sure that more requests are being ordered while catch-up is in progress
            # stash enough stable checkpoints for starting a catch-up
            num_checkpoints = Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1
            num_reqs = reqs_for_checkpoint * num_checkpoints + 1
            sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                      sdk_wallet_client, num_reqs)
            looper.run(
                eventually(check_last_ordered_3pc_on_all_replicas, rest_nodes,
                           (0, num_reqs + 2)))

            # all good nodes stabilized checkpoint
            looper.run(eventually(chkChkpoints, rest_nodes, 2, 0))

            assert lagging_node.mode != Mode.participating
            # lagging node is catching up and stashing all checkpoints
            looper.run(
                eventually(lambda: assertExp(
                    get_stashed_checkpoints(lagging_node) == num_checkpoints *
                    len(rest_nodes)),
                           timeout=waits.expectedPoolCatchupTime(
                               len(txnPoolNodeSet))))

        # check that last_ordered is set
        looper.run(
            eventually(check_last_ordered_3pc_on_master, [lagging_node],
                       (0, num_reqs + 2)))

        # check that the catch-up is finished
        looper.run(
            eventually(
                lambda: assertExp(lagging_node.mode == Mode.participating),
                retryWait=1,
                timeout=waits.expectedPoolCatchupTime(len(txnPoolNodeSet))))

        # check that catch-up was started only once
        assert lagging_node.spylog.count(
            Node.allLedgersCaughtUp) == initial_all_ledgers_caught_up + 1
        looper.run(
            eventually(lambda: assertExp(
                lagging_node.spylog.count(Node.allLedgersCaughtUp) ==
                initial_all_ledgers_caught_up + 1)))
        looper.run(
            eventually(lambda: assertExp(
                lagging_node.spylog.count(Node.start_catchup) == 1)))

        # do not check for audit ledger since we didn't catch-up audit ledger when txns were ordering durinf catch-up
        waitNodeDataEquality(looper,
                             *txnPoolNodeSet,
                             exclude_from_check='check_audit',
                             customTimeout=5)
コード例 #8
0
def test_slow_node_reverts_unordered_state_during_catchup(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    """
    Delay COMMITs to a node such that when it needs to catchup, it needs to
    revert some unordered state. Also till this time the node should have
    receive all COMMITs such that it will apply some of the COMMITs (
    for which it has not received txns from catchup).
    For this delay COMMITs by long, do catchup for a little older than the
    state received in LedgerStatus, once catchup completes, reset delays and
    try to process delayed COMMITs, some COMMITs will be rejected but some will
    be processed since catchup was done for older ledger.
    """
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 3 * Max3PCBatchSize)
    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    slow_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    slow_master_replica = slow_node.master_replica

    commit_delay = 150
    catchup_rep_delay = 25

    # Delay COMMITs to one node
    slow_node.nodeIbStasher.delay(cDelay(commit_delay, 0))
    # Delay LEDGER_STAUS on slow node, so that only MESSAGE_REQUEST(LEDGER_STATUS) is sent, and the
    # node catch-ups 2 times.
    # Otherwise other nodes may receive multiple LEDGER_STATUSes from slow node, and return Consistency proof for all
    # missing txns, so no stashed ones are applied
    slow_node.nodeIbStasher.delay(lsDelay(1000))

    # Make the slow node receive txns for a smaller ledger so it still finds
    # the need to catchup
    delay_batches = 2
    make_a_node_catchup_less(slow_node, other_nodes, DOMAIN_LEDGER_ID,
                             delay_batches * Max3PCBatchSize)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 6 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, other_nodes)
    waitNodeDataInequality(looper, slow_node, *other_nodes)

    old_lcu_count = slow_node.spylog.count(slow_node.allLedgersCaughtUp)

    # `slow_node` is slow to receive CatchupRep, so that it
    # gets a chance to order COMMITs
    slow_node.nodeIbStasher.delay(cr_delay(catchup_rep_delay))

    old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc

    # start view change (and hence catchup)
    ensure_view_change(looper, txnPoolNodeSet)

    # Check last ordered of `other_nodes` is same
    for n1, n2 in combinations(other_nodes, 2):
        check_last_ordered_3pc(n1, n2)

    assert slow_master_replica.last_prepared_before_view_change == old_last_ordered

    old_pc_count = slow_master_replica._ordering_service.spylog.count(
        slow_master_replica._ordering_service._validate)

    assert slow_node.master_replica.stasher.stash_size(STASH_CATCH_UP) == 0

    # Repair the network so COMMITs are received, processed and stashed
    slow_node.reset_delays_and_process_delayeds(COMMIT)

    def chk2():
        # COMMITs are processed for prepared messages
        assert slow_master_replica._ordering_service.spylog.count(
            slow_master_replica._ordering_service._validate) > old_pc_count

    looper.run(eventually(chk2, retryWait=1, timeout=5))

    def chk3():
        # (delay_batches * Max3PCBatchSize * commits_count_in_phase) COMMITs are stashed
        assert slow_node.master_replica.stasher.stash_size(STASH_CATCH_UP) == \
               delay_batches * Max3PCBatchSize * (len(txnPoolNodeSet) - 1)

    looper.run(eventually(chk3, retryWait=1, timeout=15))

    # fix catchup, so the node gets a chance to be caught-up
    repair_node_catchup_less(other_nodes)

    def chk4():
        # Some COMMITs were received but stashed and
        # they will processed after catchup
        assert slow_node.master_replica.stasher.stash_size(STASH_CATCH_UP) == 0

    looper.run(eventually(chk4, retryWait=1, timeout=catchup_rep_delay + 50))

    def chk5():
        # Catchup was done once
        assert slow_node.spylog.count(
            slow_node.allLedgersCaughtUp) > old_lcu_count

    looper.run(
        eventually(chk5,
                   retryWait=1,
                   timeout=waits.expectedPoolCatchupTime(len(txnPoolNodeSet))))

    # make sure that the pool is functional
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
コード例 #9
0
def slow_catch_up(nodes, timeout):
    for node in nodes:
        node.nodeIbStasher.delay(lsDelay(timeout))
        node.clientIbStasher.delay(lsDelay(timeout))
コード例 #10
0
def test_reverted_unordered(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client):
    """
    Before starting catchup, revert any uncommitted changes to state and
    ledger. This is to avoid any re-application of requests that were
    ordered but stashed
    Example scenario
    prepared (1, 4)
    startViewChange
    start_catchup
    ...
    ....
    ...
    committed and send Ordered (1, 2)
    ...
    ....
    preLedgerCatchUp
    force_process_ordered, take out (1,2) and stash (1, 2)
    now process stashed Ordered(1,2), its requests will be applied again

    Simulation: Delay COMMITs to a node so that it can not order requests
    but has prepared them. Then trigger a view change and make sure the slow
    node has not ordered same number of requests as others but has prepared
    so it can order when it receives COMMITs while view change is in progress.
    The slow node should revert unordered batches and but it should eventually
    process the ordered requests, so delay LEDGER_STATUS too so catchup
    is delayed
    """
    slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    fast_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    slow_node.nodeIbStasher.delay(cDelay(120, 0))
    sent_batches = 5
    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                         sdk_wallet_client, 2 * sent_batches, sent_batches)

    # Fast nodes have same last ordered and same data
    last_ordered = [n.master_last_ordered_3PC for n in fast_nodes]
    assert check_if_all_equal_in_list(last_ordered)
    ensure_all_nodes_have_same_data(looper, fast_nodes)

    # Slow nodes have different last ordered than fast nodes
    assert last_ordered[0] != slow_node.master_last_ordered_3PC

    # Delay LEDGER_STATUS so catchup starts late
    slow_node.nodeIbStasher.delay(lsDelay(100))
    slow_node.nodeIbStasher.delay(msg_rep_delay(100))

    # slow_node has not reverted batches
    assert sent_batches not in getAllReturnVals(
        slow_node.master_replica,
        slow_node.master_replica.revert_unordered_batches)

    ensure_view_change(looper, txnPoolNodeSet)

    def chk1():
        # slow_node reverted all batches
        rv = getAllReturnVals(slow_node.master_replica,
                              slow_node.master_replica.revert_unordered_batches)
        assert sent_batches in rv

    looper.run(eventually(chk1, retryWait=1))

    # After the view change slow_node has prepared same requests as the fast
    # nodes have ordered
    assert last_ordered[0] == slow_node.master_replica.last_prepared_before_view_change

    # Deliver COMMITs
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(COMMIT)

    def chk2():
        # slow_node orders all requests as others have
        assert last_ordered[0] == slow_node.master_last_ordered_3PC

    looper.run(eventually(chk2, retryWait=1))

    # Deliver LEDGER_STATUS so catchup can complete
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(LEDGER_STATUS)
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(MESSAGE_RESPONSE)

    # Ensure all nodes have same data
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)

    def chk3():
        # slow_node processed stashed Ordered requests successfully
        rv = getAllReturnVals(slow_node,
                              slow_node.processStashedOrderedReqs)
        assert sent_batches in rv

    looper.run(eventually(chk3, retryWait=1))

    # Ensure pool is functional
    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                         sdk_wallet_client, 10, 2)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
コード例 #11
0
def test_caught_up_for_current_view_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    """
    One of the node experiences poor network and loses 3PC messages. It has to
    do multiple rounds of catchup to be caught up
    """
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 3 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    bad_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != bad_node]

    # Bad node does not process any 3 Commit messages, equivalent to messages
    with delay_rules(bad_node.nodeIbStasher, cDelay()):

        # Delay LEDGER_STAUS on slow node, so that only MESSAGE_REQUEST(LEDGER_STATUS) is sent, and the
        # node catch-ups 2 times.
        # Otherwise other nodes may receive multiple LEDGER_STATUSes from slow node, and return Consistency proof for all
        # missing txns, so no stashed ones are applied

        bad_node.nodeIbStasher.delay(lsDelay(1000))

        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 6 * Max3PCBatchSize)
        waitNodeDataInequality(looper, bad_node, *other_nodes)

        # Patch all nodes to return ConsistencyProof of a smaller ledger to the
        # bad node but only once, so that the bad_node needs to do catchup again.

        make_a_node_catchup_twice(bad_node, other_nodes, DOMAIN_LEDGER_ID,
                                  Max3PCBatchSize)

        def is_catchup_needed_count():
            return len(getAllReturnVals(bad_node, bad_node.is_catchup_needed,
                                        compare_val_to=True))

        def is_catchup_not_needed_count():
            return len(getAllReturnVals(bad_node, bad_node.is_catchup_needed,
                                        compare_val_to=False))

        def has_ordered_till_last_prepared_certificate_count():
            return len(getAllReturnVals(bad_node,
                                        bad_node.has_ordered_till_last_prepared_certificate,
                                        compare_val_to=True))

        old_is_catchup_needed_count = is_catchup_needed_count()
        old_is_catchup_not_needed_count = is_catchup_not_needed_count()
        old_has_ordered_till_last_prepared_certificate_count = has_ordered_till_last_prepared_certificate_count()
        ensure_view_change(looper, txnPoolNodeSet)
        checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
        ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet,
                                        exclude_from_check=['check_last_ordered_3pc',
                                                            'check_audit',
                                                            'check_last_ordered_3pc_backup'])

        assert is_catchup_needed_count() == old_is_catchup_needed_count
        assert is_catchup_not_needed_count() > old_is_catchup_not_needed_count
        # The bad_node caught up due to ordering till last prepared certificate
        assert has_ordered_till_last_prepared_certificate_count() > old_has_ordered_till_last_prepared_certificate_count

    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet,
                                    exclude_from_check=['check_last_ordered_3pc_backup'])
コード例 #12
0
def test_checkpoints_after_view_change(tconf, looper, chkFreqPatched,
                                       reqs_for_checkpoint, txnPoolNodeSet,
                                       sdk_pool_handle, sdk_wallet_client):
    '''
    Tests that there is no infinite catchups if there is
    a quorum of stashed checkpoints received during the view change
    '''

    # Prepare nodes
    lagging_node = txnPoolNodeSet[-1]
    rest_nodes = txnPoolNodeSet[:-1]

    initial_all_ledgers_caught_up = lagging_node.spylog.count(
        Node.allLedgersCaughtUp)
    initial_start_catchup = lagging_node.spylog.count(Node.start_catchup)

    with delay_rules(lagging_node.nodeIbStasher, lsDelay()):
        with delay_rules(lagging_node.nodeIbStasher, vcd_delay()):
            ensure_view_change(looper, txnPoolNodeSet)
            looper.run(
                eventually(lambda: assertExp(lagging_node.
                                             view_change_in_progress is True),
                           timeout=waits.expectedPoolCatchupTime(
                               len(txnPoolNodeSet))))
            ensureElectionsDone(looper=looper,
                                nodes=rest_nodes,
                                instances_list=range(2))

            assert all(n.view_change_in_progress is False for n in rest_nodes)
            assert lagging_node.view_change_in_progress is True

            # make sure that more requests are being ordered while catch-up is in progress on the lagging node
            # stash enough stable checkpoints for starting a catch-up
            num_checkpoints = Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1
            num_reqs = reqs_for_checkpoint * num_checkpoints + 1
            sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                      sdk_wallet_client, num_reqs)
            looper.run(
                eventually(check_last_ordered_3pc_on_master, rest_nodes,
                           (1, num_reqs + 1)))
            looper.run(
                eventually(check_last_ordered_3pc_on_backup, rest_nodes,
                           (1, num_reqs + 1)))

            # all good nodes stabilized checkpoint
            looper.run(eventually(chkChkpoints, rest_nodes, 2, 0))

            assert get_stashed_checkpoints(
                lagging_node) == num_checkpoints * len(rest_nodes)
            # lagging node is doing the view change and stashing all checkpoints
            assert lagging_node.view_change_in_progress is True
            looper.run(
                eventually(lambda: assertExp(
                    get_stashed_checkpoints(lagging_node) == 2 * len(rest_nodes
                                                                     )),
                           timeout=waits.expectedPoolCatchupTime(
                               len(txnPoolNodeSet))))

    # check that view change is finished
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    assert lagging_node.view_change_in_progress is False

    # check that last_ordered is set
    looper.run(
        eventually(check_last_ordered_3pc_on_master, [lagging_node],
                   (1, num_reqs + 1)))
    looper.run(
        eventually(check_last_ordered_3pc_on_backup, [lagging_node],
                   (1, num_reqs + 1)))

    # check that checkpoint is stabilized for master
    looper.run(eventually(chk_chkpoints_for_instance, [lagging_node], 0, 2, 0))

    # check that the catch-up is finished
    assert lagging_node.mode == Mode.participating
    assert lagging_node.spylog.count(
        Node.allLedgersCaughtUp) == initial_all_ledgers_caught_up + 1
    assert lagging_node.spylog.count(
        Node.start_catchup) == initial_start_catchup + 1

    waitNodeDataEquality(looper, *txnPoolNodeSet, customTimeout=5)
コード例 #13
0
def test_caught_up_for_current_view_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    """
    One of the node experiences poor network and loses 3PC messages. It has to
    do multiple rounds of catchup to be caught up
    """
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 3 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    bad_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != bad_node]
    orig_method = bad_node.master_replica.dispatchThreePhaseMsg

    # Bad node does not process any 3 phase messages, equivalent to messages
    # being lost
    def bad_method(self, m, s):
        pass

    bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType(
        bad_method, bad_node.master_replica)

    # Delay LEDGER_STAUS on slow node, so that only MESSAGE_REQUEST(LEDGER_STATUS) is sent, and the
    # node catch-ups 2 times.
    # Otherwise other nodes may receive multiple LEDGER_STATUSes from slow node, and return Consistency proof for all
    # missing txns, so no stashed ones are applied
    bad_node.nodeIbStasher.delay(lsDelay(1000))

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 6 * Max3PCBatchSize)
    waitNodeDataInequality(looper, bad_node, *other_nodes)

    # Patch all nodes to return ConsistencyProof of a smaller ledger to the
    # bad node but only once, so that the bad_node needs to do catchup again.

    make_a_node_catchup_twice(bad_node, other_nodes, DOMAIN_LEDGER_ID,
                              Max3PCBatchSize)

    def is_catchup_needed_count():
        return len(getAllReturnVals(bad_node, bad_node.is_catchup_needed,
                                    compare_val_to=True))

    def is_catchup_not_needed_count():
        return len(getAllReturnVals(bad_node, bad_node.is_catchup_needed,
                                    compare_val_to=False))

    def has_ordered_till_last_prepared_certificate_count():
        return len(getAllReturnVals(bad_node,
                                    bad_node.has_ordered_till_last_prepared_certificate,
                                    compare_val_to=True))

    old_count_1 = is_catchup_needed_count()
    old_count_2 = has_ordered_till_last_prepared_certificate_count()
    old_count_3 = is_catchup_not_needed_count()
    ensure_view_change(looper, txnPoolNodeSet)
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    assert is_catchup_needed_count() > old_count_1
    assert is_catchup_not_needed_count() > old_count_3
    # The bad_node caught up due to ordering till last prepared certificate
    assert has_ordered_till_last_prepared_certificate_count() > old_count_2

    bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType(
        orig_method, bad_node.master_replica)
コード例 #14
0
def test_slow_node_reverts_unordered_state_during_catchup(looper,
                                                          txnPoolNodeSet,
                                                          sdk_pool_handle,
                                                          sdk_wallet_client):
    """
    Delay COMMITs to a node such that when it needs to catchup, it needs to
    revert some unordered state. Also till this time the node should have
    receive all COMMITs such that it will apply some of the COMMITs (
    for which it has not received txns from catchup).
    For this delay COMMITs by long, do catchup for a little older than the
    state received in LedgerStatus, once catchup completes, reset delays and
    try to process delayed COMMITs, some COMMITs will be rejected but some will
    be processed since catchup was done for older ledger.
    """
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 3 * Max3PCBatchSize)
    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    slow_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    slow_master_replica = slow_node.master_replica

    commit_delay = 150
    catchup_rep_delay = 25

    # Delay COMMITs to one node
    slow_node.nodeIbStasher.delay(cDelay(commit_delay, 0))
    # Delay LEDGER_STAUS on slow node, so that only MESSAGE_REQUEST(LEDGER_STATUS) is sent, and the
    # node catch-ups 2 times.
    # Otherwise other nodes may receive multiple LEDGER_STATUSes from slow node, and return Consistency proof for all
    # missing txns, so no stashed ones are applied
    slow_node.nodeIbStasher.delay(lsDelay(1000))

    # Make the slow node receive txns for a smaller ledger so it still finds
    # the need to catchup
    delay_batches = 2
    make_a_node_catchup_less(slow_node, other_nodes, DOMAIN_LEDGER_ID,
                             delay_batches * Max3PCBatchSize)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 6 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, other_nodes)
    waitNodeDataInequality(looper, slow_node, *other_nodes)

    old_lcu_count = slow_node.spylog.count(slow_node.allLedgersCaughtUp)

    # `slow_node` is slow to receive CatchupRep, so that it
    # gets a chance to order COMMITs
    slow_node.nodeIbStasher.delay(cr_delay(catchup_rep_delay))

    # start view change (and hence catchup)
    ensure_view_change(looper, txnPoolNodeSet)

    # Check last ordered of `other_nodes` is same
    for n1, n2 in combinations(other_nodes, 2):
        lst_3pc = check_last_ordered_3pc(n1, n2)

    def chk1():
        # `slow_node` has prepared all 3PC messages which
        # `other_nodes` have ordered
        assertEquality(slow_master_replica.last_prepared_before_view_change, lst_3pc)

    looper.run(eventually(chk1, retryWait=1))

    old_pc_count = slow_master_replica.spylog.count(
        slow_master_replica.can_process_since_view_change_in_progress)

    assert len(slow_node.stashedOrderedReqs) == 0

    # Repair the network so COMMITs are received, processed and stashed
    slow_node.reset_delays_and_process_delayeds(COMMIT)

    def chk2():
        # COMMITs are processed for prepared messages
        assert slow_master_replica.spylog.count(
            slow_master_replica.can_process_since_view_change_in_progress) > old_pc_count

    looper.run(eventually(chk2, retryWait=1, timeout=5))

    def chk3():
        # COMMITs are stashed
        assert len(slow_node.stashedOrderedReqs) == delay_batches * Max3PCBatchSize

    looper.run(eventually(chk3, retryWait=1, timeout=15))

    # fix catchup, so the node gets a chance to be caught-up
    repair_node_catchup_less(other_nodes)

    def chk4():
        # Some COMMITs were ordered but stashed and they were processed
        rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs)
        assert delay_batches in rv

    looper.run(eventually(chk4, retryWait=1, timeout=catchup_rep_delay + 5))

    def chk5():
        # Catchup was done once
        assert slow_node.spylog.count(
            slow_node.allLedgersCaughtUp) > old_lcu_count

    looper.run(
        eventually(
            chk5,
            retryWait=1,
            timeout=waits.expectedPoolCatchupTime(
                len(txnPoolNodeSet))))

    # make sure that the pool is functional
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)