def test_restarted_node_complete_vc_by_current_state(looper, txnPoolNodeSet,
                                                     tconf, tdir,
                                                     allPluginsPath):
    node_to_restart = txnPoolNodeSet[-1]
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            node_to_restart,
                                            stopNode=True)
    looper.removeProdable(node_to_restart)
    old_completed_view_no = get_last_completed_view_no(txnPoolNodeSet[:-1])
    print(old_completed_view_no)
    ensure_view_change(looper, txnPoolNodeSet[:-1])
    ensureElectionsDone(looper,
                        txnPoolNodeSet[:-1],
                        customTimeout=tconf.NEW_VIEW_TIMEOUT)
    current_completed_view_no = get_last_completed_view_no(txnPoolNodeSet[:-1])
    assert current_completed_view_no > old_completed_view_no
    print(current_completed_view_no)

    # Delay VIEW_CHANGE_DONE messages for all nodes
    for node in txnPoolNodeSet[:-1]:
        node.nodeIbStasher.delay(nv_delay(1000))
    ensure_view_change(looper, txnPoolNodeSet[:-1])

    # Start stopped node until other nodes do view_change
    node_to_restart = start_stopped_node(node_to_restart, looper, tconf, tdir,
                                         allPluginsPath)
    node_to_restart.nodeIbStasher.delay(nv_delay(1000))
    # check, that restarted node use last completed view no from pool, instead of proposed
    looper.run(
        eventually(complete_propagate_primary,
                   node_to_restart,
                   current_completed_view_no,
                   timeout=tconf.NEW_VIEW_TIMEOUT))
Ejemplo n.º 2
0
def test_view_change_retry_by_timeout(txnPoolNodeSet, looper, tconf, setup,
                                      sdk_pool_handle, sdk_wallet_client):
    """
    Verifies that a view change is restarted if it is not completed in time
    """
    m_primary_node, initial_view_no, timeout_callback_stats = setup
    stashers = [n.nodeIbStasher for n in txnPoolNodeSet]

    with delay_rules(stashers, nv_delay()):
        start_view_change(txnPoolNodeSet, initial_view_no + 1)

        # First view change should fail, because of delayed ViewChangeDone
        # messages. This then leads to new view change that we need.
        with pytest.raises(AssertionError):
            ensureElectionsDone(looper=looper,
                                nodes=txnPoolNodeSet,
                                customTimeout=1.5 * NEW_VIEW_TIMEOUT)

    # Now as ViewChangeDone messages are unblocked view changes should finish successfully
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    new_m_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    assert m_primary_node.name != new_m_primary_node.name

    # The timeout method was called one time
    check_watchdog_called_expected_times(txnPoolNodeSet,
                                         timeout_callback_stats, 1)

    # 2 view changes have been initiated
    for node in txnPoolNodeSet:
        assert node.viewNo - initial_view_no == 2

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
def test_watermarks_after_view_change(tdir, tconf,
                                      looper,
                                      txnPoolNodeSet,
                                      sdk_pool_handle,
                                      sdk_wallet_client):
    """
    Delay commit, checkpoint, InstanceChange and ViewChangeDone messages for lagging_node.
    Start ViewChange.
    Check that ViewChange finished.
    Reset delays.
    Check that lagging_node can order transactions and has same data with other nodes.
    """
    lagging_node = txnPoolNodeSet[-1]
    lagging_node.master_replica.config.LOG_SIZE = LOG_SIZE
    start_view_no = lagging_node.viewNo
    with delay_rules(lagging_node.nodeIbStasher, cDelay(), chk_delay(), icDelay(), nv_delay()):
        trigger_view_change(txnPoolNodeSet)
        waitForViewChange(looper,
                          txnPoolNodeSet[:-1],
                          expectedViewNo=start_view_no + 1,
                          customTimeout=waits.expectedPoolViewChangeStartedTimeout(len(txnPoolNodeSet)))
        ensure_all_nodes_have_same_data(looper, txnPoolNodeSet[:-1])
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, 6)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
Ejemplo n.º 4
0
def test_multiple_view_change_retries_by_timeouts(
        txnPoolNodeSet, looper, tconf, setup,
        sdk_pool_handle, sdk_wallet_client):
    """
    Verifies that a view change is restarted each time
    when the previous one is timed out
    """
    _, initial_view_no, timeout_callback_stats = setup
    stashers = [n.nodeIbStasher for n in txnPoolNodeSet]

    with delay_rules(stashers, nv_delay()):
        start_view_change(txnPoolNodeSet, initial_view_no + 1)

        # Wait until timeout callback is called 3 times
        looper.run(eventually(check_watchdog_called_expected_times,
                              txnPoolNodeSet, timeout_callback_stats, 3,
                              retryWait=1,
                              timeout=3 * VIEW_CHANGE_TIMEOUT + 2))

        # View changes should fail
        with pytest.raises(AssertionError):
            ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, customTimeout=1)

    # This view change must be completed with no problems
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    # 4 view changes must have been initiated (initial one + 3 retries)
    for node in txnPoolNodeSet:
        assert node.viewNo - initial_view_no == 4

    sdk_ensure_pool_functional(looper, txnPoolNodeSet,
                               sdk_wallet_client,
                               sdk_pool_handle)
def test_delayed_instance_changes_after_vcd_for_next_view(
        looper, txnPoolNodeSet):
    '''
    A node is doing view change to view=1, while the other nodes already finished view change to view=2.
    The node receives a quorum of VCD messages for view=2 before a quorum of InstanceChange messages for view=2.
    Nevertheless, the node should not start a view change to view=2 without a quorum of InstanceChanges,
    that is it should not go to propagate primary mode since it's already in view chanage state.
    The node should eventually finish view change to view=2 once receives all VCD and IS msgs for view=2
    '''
    nodes = txnPoolNodeSet
    slow_node = nodes[-1]
    fast_nodes = [n for n in nodes if n != slow_node]
    slow_stasher = slow_node.nodeIbStasher

    # 1. DO FIRST VIEW CHANGE

    # delay VCD for the first ViewChange
    with delay_rules(slow_stasher, nv_delay()):
        # Trigger view change
        trigger_view_change(nodes)
        waitForViewChange(looper, nodes, expectedViewNo=1)

        # make sure view change is finished on all nodes except the slow one
        ensureElectionsDone(looper,
                            fast_nodes,
                            instances_list=range(3),
                            customTimeout=30)

        # drop all VCD to view=1
        slow_stasher.drop_delayeds()

    # 2. DO SECOND VIEW CHANGE

    # delay Instance Changes and
    # so that the slow node receives VCD for view=2 before
    # a quorum of InstanceChanges for that view while still doing view change to view=1
    with delay_rules(slow_stasher, icDelay()):

        # Trigger view change
        trigger_view_change(nodes)
        waitForViewChange(looper, fast_nodes, expectedViewNo=2)

        # make sure view change is finished on all nodes except the slow one
        ensureElectionsDone(looper, fast_nodes, instances_list=range(3))

        # slow node is still on view=1
        assert slow_node.viewNo == 1
        assert slow_node.view_change_in_progress

        # make sure that the slow node receives VCD msgs for view=2
        # and didn't receive IS msgs for view=2
        # check_vcd_msgs(slow_node, expected_view_no=2, expected_count=len(fast_nodes), )
        check_no_ic_msgs(slow_node, 2, fast_nodes)

    # 3. RESET DELAYS AND CHECK

    waitForViewChange(looper, nodes, expectedViewNo=2)
    ensureElectionsDone(looper, nodes)
    assert not slow_node.view_change_in_progress
    ensure_all_nodes_have_same_data(looper, nodes=nodes)
def test_prepare_in_queue_before_vc(looper, txnPoolNodeSet, sdk_wallet_steward,
                                    sdk_pool_handle):
    """
    Test steps:
    1. Sent N random requests.
    2. Patching processNodeInBox method for node Delta.
       This method will process only not Prepare messages and store in nodeInBox queue Prepare messages
    3. Sent one request and check, that all Prepares are stored in nodeInBox queue and there is quorum of it
    4. Compare last_ordered_3pc_key and last_prepared_certificate. Last_prepared_certificate must be greater then last ordered
    5. ppSeqNo in last_prepared_certificate must be at least as ppSeqNo for queued Prepares msgs in nodeInBox queue
    """
    def chk_quorumed_prepares_count(prepares, count):
        pp_qourum = slow_node.quorums.prepare.value
        assert len([
            pp for key, pp in prepares.items()
            if prepares.hasQuorum(pp.msg, pp_qourum)
        ]) == count

    def patched_start_view_change(self, *args, **kwargs):
        self.node.processNodeInBox = functools.partial(
            TestNode.processNodeInBox, self.node)
        ViewChanger.start_view_change(self, *args, **kwargs)
        while stashed_msgs:
            self.node.nodestack.rxMsgs.append(stashed_msgs.popleft())

    """Send REQ_COUNT txns"""
    slow_node = txnPoolNodeSet[-1]
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, REQ_COUNT)
    """Check that there is REQ_COUNT prepares with quorum in queue"""
    chk_quorumed_prepares_count(
        slow_node.master_replica._ordering_service.prepares, REQ_COUNT)
    """Patch processNodeInBox method for saving Prepares in nodeInBox queue"""
    not_processing_prepare(slow_node)
    """Send 1 txn"""
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, REQ_COUNT_AFTER_SLOW)

    chk_quorumed_prepares_count(
        slow_node.master_replica._ordering_service.prepares, REQ_COUNT)
    """Get last ordered 3pc key (should be (0, REQ_COUNT))"""
    ordered_lpc = slow_node.master_replica.last_ordered_3pc
    """Delay view_change_done messages"""
    slow_node.nodeIbStasher.delay(nv_delay(100))
    """Patch on_view_change_start method for reverting processNodeInBox method"""
    slow_node.view_changer.start_view_change = functools.partial(
        patched_start_view_change, slow_node.view_changer)
    """Initiate view change"""
    ensure_view_change(looper, txnPoolNodeSet)
    """Last prepared certificate should take into account Prepares in nodeInBox queue too"""
    expected_lpc = slow_node.master_replica.last_prepared_before_view_change
    assert expected_lpc == (0, REQ_COUNT)
    """Last ordered key should be equal to last_prepared_before_view_change because we reorder reqs"""
    assert compare_3PC_keys(ordered_lpc, expected_lpc) == 0
Ejemplo n.º 7
0
def test_view_change_done_delayed(txnPoolNodeSet, looper, sdk_pool_handle,
                                  sdk_wallet_client):
    """
    A node is slow so is behind other nodes, after view change, it catches up
    but it also gets view change message as delayed, a node should start
    participating only when caught up and ViewChangeCone quorum received.
    """
    nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)]
    slow_node = nprs[-1]
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    delay_3pc = 10
    delay_vcd = 25
    delay_3pc_messages([slow_node], 0, delay_3pc)
    slow_node.nodeIbStasher.delay(nv_delay(delay_vcd))

    def chk(node):
        assert node.isParticipating
        assert None not in {r.isPrimary for r in node.replicas.values()}

    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet,
                                         sdk_pool_handle, sdk_wallet_client,
                                         5 * 4, 4)

    ensure_view_change(looper, nodes=txnPoolNodeSet)

    # After view change, the slow node successfully completes catchup
    waitNodeDataEquality(looper, slow_node, *other_nodes)

    # Other nodes complete view change, select primary and participate
    for node in other_nodes:
        looper.run(eventually(chk, node, retryWait=1))

    # Since slow node catches up successfully, it catch last primary
    assert slow_node.isParticipating
    assert {r.isPrimary for r in slow_node.replicas.values()} != {None}
    assert all(slow_node.viewNo == node.viewNo for node in other_nodes)

    # Send requests to make sure pool is functional
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)

    # Repair network
    slow_node.reset_delays_and_process_delayeds()

    # `slow_node` selects primary and participate
    looper.run(eventually(chk, slow_node, retryWait=1))

    # Processes requests received during lack of primary
    waitNodeDataEquality(looper, slow_node, *other_nodes)

    # Send more requests and compare data of all nodes
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_process_three_phase_msg_and_stashed_future_view(
        txnPoolNodeSet, looper, tconf, sdk_pool_handle, sdk_wallet_steward):
    """
    1. Delay ViewChangeDone messages for the slow_node.
    2. Start view change on all nodes.
    3. Order a new request.
    4. Check that slow_node could not order this request and stashed all 3pc messages
    and other nodes ordered.
    6. Reset delays.
    7. Check that the last request is ordered on the slow_node and stashed messages were removed.
    """
    slow_node = txnPoolNodeSet[-1]
    fast_nodes = txnPoolNodeSet[:-1]
    view_no = slow_node.viewNo
    old_stashed = {
        inst_id: r.stasher.stash_size(STASH_VIEW_3PC)
        for inst_id, r in slow_node.replicas.items()
    }
    with delay_rules([
            slow_node.nodeIbStasher,
    ], msg_rep_delay(types_to_delay=[PREPREPARE, PREPARE, COMMIT])):
        with delay_rules([
                slow_node.nodeIbStasher,
        ], nv_delay()):
            for n in txnPoolNodeSet:
                n.view_changer.on_master_degradation()
            waitForViewChange(looper,
                              fast_nodes,
                              expectedViewNo=view_no + 1,
                              customTimeout=2 * tconf.NEW_VIEW_TIMEOUT)
            ensureElectionsDone(looper=looper,
                                nodes=fast_nodes,
                                instances_list=range(
                                    fast_nodes[0].requiredNumberOfInstances))
            sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                      sdk_wallet_steward, 1)
            assert slow_node.view_change_in_progress
            # 1 - pre-prepare msg
            # (len(txnPoolNodeSet) - 2) - prepare msgs
            # (len(txnPoolNodeSet) - 1) - commit msgs
            stashed_master_messages = 2 * (1 + (len(txnPoolNodeSet) - 2) +
                                           (len(txnPoolNodeSet) - 1))
            assert slow_node.master_replica.stasher.stash_size(
                STASH_VIEW_3PC) == old_stashed[0] + stashed_master_messages

        def chk():
            for inst_id, r in slow_node.replicas.items():
                assert r.last_ordered_3pc[1] == 2
                assert r.stasher.stash_size(STASH_VIEW_3PC) == 0

        looper.run(eventually(chk))
        waitNodeDataEquality(looper, slow_node, *fast_nodes)
def test_view_change_timeout_reset_on_next_view(txnPoolNodeSet, looper, tconf):
    # Check that all nodes are in view 0
    assert all(n.viewNo == 0 for n in txnPoolNodeSet)

    stashers = [n.nodeIbStasher for n in txnPoolNodeSet]
    with delay_rules(stashers, nv_delay()):
        # Start first view change
        trigger_view_change(txnPoolNodeSet)
        waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=1)
        looper.runFor(0.6 * NEW_VIEW_TIMEOUT)

        # Start second view change
        trigger_view_change(txnPoolNodeSet)
        waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=2)
        looper.runFor(0.6 * NEW_VIEW_TIMEOUT)

    # Ensure only 2 view changes happened
    ensureElectionsDone(looper, txnPoolNodeSet)
    for n in txnPoolNodeSet:
        assert n.viewNo == 2
Ejemplo n.º 10
0
def do_view_change_with_propagate_primary_on_one_delayed_node(
        slow_node, nodes, looper, sdk_pool_handle, sdk_wallet_client):

    slow_stasher = slow_node.nodeIbStasher

    fast_nodes = [n for n in nodes if n != slow_node]

    stashers = [n.nodeIbStasher for n in nodes]

    # Get last prepared certificate in pool
    lpc = last_prepared_certificate(nodes)
    # Get pool current view no
    view_no = lpc[0]

    with delay_rules(slow_stasher, icDelay()):
        with delay_rules(slow_stasher, nv_delay()):
            with delay_rules(stashers, cDelay()):
                # Send request
                request = sdk_send_random_request(looper, sdk_pool_handle,
                                                  sdk_wallet_client)

                # Wait until this request is prepared on N-f nodes
                looper.run(
                    eventually(check_last_prepared_certificate_on_quorum,
                               nodes, (lpc[0], lpc[1] + 1)))

                # Trigger view change
                for n in nodes:
                    n.view_changer.on_master_degradation()

                # Wait until view change is completed on all nodes except slow one
                waitForViewChange(
                    looper,
                    fast_nodes,
                    expectedViewNo=view_no + 1,
                    customTimeout=waits.expectedPoolViewChangeStartedTimeout(
                        len(nodes)))
                wait_for_elections_done_on_given_nodes(
                    looper,
                    fast_nodes,
                    getRequiredInstances(len(nodes)),
                    timeout=waits.expectedPoolElectionTimeout(len(nodes)))

            # Now all the nodes receive Commits
            # The slow node will accept Commits and order the 3PC-batch in the old view
            looper.runFor(
                waits.expectedOrderingTime(getNoInstances(len(nodes))))

    # Now slow node receives NewView
    waitForViewChange(looper, [slow_node],
                      expectedViewNo=view_no + 1,
                      customTimeout=waits.expectedPoolViewChangeStartedTimeout(
                          len(nodes)))
    wait_for_elections_done_on_given_nodes(
        looper, [slow_node],
        getRequiredInstances(len(nodes)),
        timeout=waits.expectedPoolElectionTimeout(len(nodes)))

    # Now slow node receives InstanceChanges but discards them because already
    # started propagate primary to the same view.

    # Finish request gracefully
    sdk_get_reply(looper, request)
Ejemplo n.º 11
0
def check_view_change_adding_new_node(looper, tdir, tconf, allPluginsPath,
                                      txnPoolNodeSet,
                                      sdk_pool_handle,
                                      sdk_wallet_client,
                                      sdk_wallet_steward,
                                      slow_nodes=[],
                                      delay_commit=False,
                                      delay_pre_prepare=False,
                                      trigger_view_change_manually=False):
    # Pre-requisites: viewNo=3, Primary is Node4
    for viewNo in range(1, 4):
        trigger_view_change(txnPoolNodeSet)
        waitForViewChange(looper, txnPoolNodeSet, viewNo)
        ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30)

    # Delay 3PC messages on slow nodes
    fast_nodes = [node for node in txnPoolNodeSet if node not in slow_nodes]
    all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet]
    slow_stashers = [slow_node.nodeIbStasher for slow_node in slow_nodes]
    delayers = []
    if delay_pre_prepare:
        delayers.append(ppDelay())
        delayers.append(msg_rep_delay(types_to_delay=[PREPREPARE]))
    if delay_commit:
        delayers.append(cDelay())

    # add a new Steward before delaying. Otherwise the slow node may reject NODE client reqs
    # as it can not authenticate it due to lack of Steward txn applied
    new_steward_wallet_handle = sdk_add_new_nym(looper,
                                                sdk_pool_handle,
                                                sdk_wallet_steward,
                                                alias='New_Steward',
                                                role=STEWARD_STRING)

    # delay NewView message to make sure that all old nodes started view change,
    # but finish the view change when no Commits are delayed (otherwise slow node will not be able to select backup primaries)
    with delay_rules(all_stashers, nv_delay()):
        with delay_rules_without_processing(slow_stashers, *delayers):
            # Add Node5
            new_node = sdk_add_new_node(
                looper,
                sdk_pool_handle,
                new_steward_wallet_handle,
                'Epsilon',
                tdir,
                tconf,
                allPluginsPath,
                autoStart=True,
                nodeClass=TestNode,
                do_post_node_creation=None,
                services=[VALIDATOR],
                wait_till_added=True)
            looper.run(checkNodesConnected(fast_nodes + [new_node]))
            old_set = list(txnPoolNodeSet)
            txnPoolNodeSet.append(new_node)

            if trigger_view_change_manually:
                trigger_view_change(txnPoolNodeSet)

            # make sure view change is started and finished eventually
            waitForViewChange(looper, old_set, 4)
    ensureElectionsDone(looper, old_set)

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_finish_view_change_with_incorrect_primaries_list(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tdir,
        tconf, allPluginsPath):
    """
    This test imitates situation when one of nodes is lagged.
    It missed txn for adding new node and view_change after this.
    After that lagged node started the next view_change with other nodes,
    but it has different committed node_reg and selected other primaries.
    In this case we expect, that lagged node will complete view_change with other primaries
    and will start catchup by Checkpoints because will not be able to ordering.

    """
    def complete_vc(node):
        assert not node.view_change_in_progress

    view_no = checkViewNoForNodes(txnPoolNodeSet)

    # Delta is lagged
    lagging_node = txnPoolNodeSet[3]
    fast_nodes = txnPoolNodeSet[:3] + txnPoolNodeSet[4:]

    # Force 5 view changes so that we have viewNo == 5 and Zeta is the primary.
    for _ in range(5):
        trigger_view_change(txnPoolNodeSet)
        waitForViewChange(looper, txnPoolNodeSet, view_no + 1)
        ensureElectionsDone(looper, txnPoolNodeSet)
        view_no = checkViewNoForNodes(txnPoolNodeSet)

    with delay_rules_without_processing(lagging_node.nodeIbStasher,
                                        msg_rep_delay(), icDelay(), vc_delay(),
                                        nv_delay(), cDelay(), ppDelay(),
                                        pDelay()):

        # Add new node and this action should starts view_change because of NODE txn ordered
        _, theta = sdk_add_new_steward_and_node(looper,
                                                sdk_pool_handle,
                                                sdk_wallet_steward,
                                                'Theta_Steward',
                                                'Theta',
                                                tdir,
                                                tconf,
                                                allPluginsPath=allPluginsPath)
        txnPoolNodeSet.append(theta)
        fast_nodes.append(theta)

        looper.run(checkNodesConnected(fast_nodes))
        ensure_all_nodes_have_same_data(looper, fast_nodes)

        waitForViewChange(looper, fast_nodes, view_no + 1)
        ensureElectionsDone(looper, fast_nodes)

    assert lagging_node.viewNo != fast_nodes[0].viewNo
    assert fast_nodes[0].viewNo == view_no + 1

    current_view_no = checkViewNoForNodes(fast_nodes)
    expected_view_no = current_view_no + 1
    trigger_view_change(txnPoolNodeSet)
    waitForViewChange(looper, txnPoolNodeSet, expected_view_no)
    ensureElectionsDone(looper, fast_nodes)

    looper.run(eventually(complete_vc, lagging_node, timeout=60))
    assert lagging_node.viewNo == expected_view_no

    # We assume that after 2 Checkpoints receiving lagged node will start catchup and elect right primaries

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 2 * CHK_SIZE)
    ensureElectionsDone(looper, txnPoolNodeSet)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward,
                               sdk_pool_handle)
Ejemplo n.º 13
0
def test_checkpoints_after_view_change(tconf, looper, chkFreqPatched,
                                       reqs_for_checkpoint, txnPoolNodeSet,
                                       sdk_pool_handle, sdk_wallet_client):
    '''
    Tests that there is no infinite catchups if there is
    a quorum of stashed checkpoints received during the view change
    '''

    # Prepare nodes
    lagging_node = txnPoolNodeSet[-1]
    rest_nodes = txnPoolNodeSet[:-1]

    initial_all_ledgers_caught_up = lagging_node.spylog.count(
        Node.allLedgersCaughtUp)
    initial_start_catchup = lagging_node.spylog.count(Node.start_catchup)

    with delay_rules(lagging_node.nodeIbStasher, lsDelay()):
        with delay_rules(lagging_node.nodeIbStasher, nv_delay()):
            ensure_view_change(looper, txnPoolNodeSet)
            looper.run(
                eventually(lambda: assertExp(lagging_node.
                                             view_change_in_progress is True),
                           timeout=waits.expectedPoolCatchupTime(
                               len(txnPoolNodeSet))))
            ensureElectionsDone(looper=looper,
                                nodes=rest_nodes,
                                instances_list=range(2))

            assert all(n.view_change_in_progress is False for n in rest_nodes)
            assert lagging_node.view_change_in_progress is True

            # make sure that more requests are being ordered while catch-up is in progress on the lagging node
            # stash enough stable checkpoints for starting a catch-up
            num_checkpoints = Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1
            num_reqs = reqs_for_checkpoint * num_checkpoints + 1
            sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                      sdk_wallet_client, num_reqs)
            looper.run(
                eventually(check_last_ordered_3pc_on_master, rest_nodes,
                           (1, num_reqs + 1)))
            looper.run(
                eventually(check_last_ordered_3pc_on_backup, rest_nodes,
                           (1, num_reqs + 1)))

            # all good nodes stabilized checkpoint
            looper.run(
                eventually(check_for_nodes, rest_nodes,
                           check_stable_checkpoint, 10))

            assert get_stashed_checkpoints(
                lagging_node) == num_checkpoints * len(rest_nodes)
            # lagging node is doing the view change and stashing all checkpoints
            assert lagging_node.view_change_in_progress is True
            looper.run(
                eventually(lambda: assertExp(
                    get_stashed_checkpoints(lagging_node) == 2 * len(rest_nodes
                                                                     )),
                           timeout=waits.expectedPoolCatchupTime(
                               len(txnPoolNodeSet))))

    # check that view change is finished
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    assert lagging_node.view_change_in_progress is False

    # check that last_ordered is set
    looper.run(
        eventually(check_last_ordered_3pc_on_master, [lagging_node],
                   (1, num_reqs + 1)))
    looper.run(
        eventually(check_last_ordered_3pc_on_backup, [lagging_node],
                   (1, num_reqs + 1)))

    # check that checkpoint is stabilized for master
    looper.run(
        eventually(check_for_instance, [lagging_node], 0,
                   check_stable_checkpoint, 10))

    # check that the catch-up didn't happen
    assert lagging_node.mode == Mode.participating
    assert lagging_node.spylog.count(
        Node.allLedgersCaughtUp) == initial_all_ledgers_caught_up
    assert lagging_node.spylog.count(
        Node.start_catchup) == initial_start_catchup

    waitNodeDataEquality(looper, *txnPoolNodeSet, customTimeout=5)
def test_view_change_add_one_node_uncommitted_by_next_primary(
        looper, tdir, tconf, allPluginsPath, txnPoolNodeSet, sdk_pool_handle,
        sdk_wallet_client, sdk_wallet_steward):
    # 1. Pre-requisites: viewNo=2, Primary is Node3
    for viewNo in range(1, 3):
        trigger_view_change(txnPoolNodeSet)
        waitForViewChange(looper, txnPoolNodeSet, viewNo)
        ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30)

    # 2. Add Steward for new Node
    new_steward_wallet_handle = sdk_add_new_nym(looper,
                                                sdk_pool_handle,
                                                sdk_wallet_steward,
                                                alias="testClientSteward" +
                                                randomString(3),
                                                role=STEWARD_STRING)

    # 3. Send txn to add Node5
    # It will not be proposed and ordered by the current Primary, but will be proposed by the next one in the new view
    # Make sure that the request is propagated by the next Primary
    old_state_root_hash = txnPoolNodeSet[0].stateRootHash(
        ledgerId=POOL_LEDGER_ID, isCommitted=False)
    primary_node = getPrimaryReplica(txnPoolNodeSet).node
    next_primary = txnPoolNodeSet[-1]
    with delay_rules_without_processing(primary_node.nodeIbStasher,
                                        ppgDelay()):
        sdk_add_new_node(looper,
                         sdk_pool_handle,
                         new_steward_wallet_handle,
                         new_node_name="Psi",
                         tdir=tdir,
                         tconf=tconf,
                         allPluginsPath=allPluginsPath,
                         autoStart=True,
                         nodeClass=TestNode,
                         do_post_node_creation=None,
                         services=[VALIDATOR],
                         wait_till_added=False)
        looper.run(eventually(check_node_txn_propagated, [next_primary]))
        check_node_txn_not_applied(txnPoolNodeSet, old_state_root_hash)

    # 4. Trigger view change to view
    # Make sure that only the next Primary (Node4) finishes View Change to view=3
    slow_nodes = txnPoolNodeSet[:3]
    fast_nodes = [next_primary]
    slow_stashers = [slow_node.nodeIbStasher for slow_node in slow_nodes]
    with delay_rules_without_processing(
            slow_stashers, nv_delay(),
            msg_rep_delay(types_to_delay=[NEW_VIEW])):
        trigger_view_change(txnPoolNodeSet)
        waitForViewChange(looper, txnPoolNodeSet, 3)

        # view change is finished on Node4 only
        looper.run(eventually(check_view_change_done, fast_nodes, 3))
        for n in slow_nodes:
            assert n.master_replica._consensus_data.waiting_for_new_view

        # wait till fast nodes apply the Node txn in the new View (Node4 creates a new batch with it)
        looper.run(
            eventually(check_node_txn_applied, fast_nodes,
                       old_state_root_hash))
        check_node_txn_not_applied(slow_nodes, old_state_root_hash)

    # 5. Trigger view change to view=4, and make sure it's finished properly
    trigger_view_change(txnPoolNodeSet)
    waitForViewChange(looper, txnPoolNodeSet, 4)
    ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=35)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)