def test_watermarks_after_view_change(tdir, tconf,
                                      looper,
                                      txnPoolNodeSet,
                                      sdk_pool_handle,
                                      sdk_wallet_client):
    """
    Delay commit, checkpoint, InstanceChange and ViewChangeDone messages for lagging_node.
    Start ViewChange.
    Check that ViewChange finished.
    Reset delays.
    Check that lagging_node can order transactions and has same data with other nodes.
    """
    lagging_node = txnPoolNodeSet[-1]
    lagging_node.master_replica.config.LOG_SIZE = LOG_SIZE
    start_view_no = lagging_node.viewNo
    with delay_rules(lagging_node.nodeIbStasher, cDelay(), chk_delay(), icDelay(), nv_delay()):
        trigger_view_change(txnPoolNodeSet)
        waitForViewChange(looper,
                          txnPoolNodeSet[:-1],
                          expectedViewNo=start_view_no + 1,
                          customTimeout=waits.expectedPoolViewChangeStartedTimeout(len(txnPoolNodeSet)))
        ensure_all_nodes_have_same_data(looper, txnPoolNodeSet[:-1])
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, 6)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_delayed_instance_changes_after_vcd_for_next_view(
        looper, txnPoolNodeSet):
    '''
    A node is doing view change to view=1, while the other nodes already finished view change to view=2.
    The node receives a quorum of VCD messages for view=2 before a quorum of InstanceChange messages for view=2.
    Nevertheless, the node should not start a view change to view=2 without a quorum of InstanceChanges,
    that is it should not go to propagate primary mode since it's already in view chanage state.
    The node should eventually finish view change to view=2 once receives all VCD and IS msgs for view=2
    '''
    nodes = txnPoolNodeSet
    slow_node = nodes[-1]
    fast_nodes = [n for n in nodes if n != slow_node]
    slow_stasher = slow_node.nodeIbStasher

    # 1. DO FIRST VIEW CHANGE

    # delay VCD for the first ViewChange
    with delay_rules(slow_stasher, nv_delay()):
        # Trigger view change
        trigger_view_change(nodes)
        waitForViewChange(looper, nodes, expectedViewNo=1)

        # make sure view change is finished on all nodes except the slow one
        ensureElectionsDone(looper,
                            fast_nodes,
                            instances_list=range(3),
                            customTimeout=30)

        # drop all VCD to view=1
        slow_stasher.drop_delayeds()

    # 2. DO SECOND VIEW CHANGE

    # delay Instance Changes and
    # so that the slow node receives VCD for view=2 before
    # a quorum of InstanceChanges for that view while still doing view change to view=1
    with delay_rules(slow_stasher, icDelay()):

        # Trigger view change
        trigger_view_change(nodes)
        waitForViewChange(looper, fast_nodes, expectedViewNo=2)

        # make sure view change is finished on all nodes except the slow one
        ensureElectionsDone(looper, fast_nodes, instances_list=range(3))

        # slow node is still on view=1
        assert slow_node.viewNo == 1
        assert slow_node.view_change_in_progress

        # make sure that the slow node receives VCD msgs for view=2
        # and didn't receive IS msgs for view=2
        # check_vcd_msgs(slow_node, expected_view_no=2, expected_count=len(fast_nodes), )
        check_no_ic_msgs(slow_node, 2, fast_nodes)

    # 3. RESET DELAYS AND CHECK

    waitForViewChange(looper, nodes, expectedViewNo=2)
    ensureElectionsDone(looper, nodes)
    assert not slow_node.view_change_in_progress
    ensure_all_nodes_have_same_data(looper, nodes=nodes)
def test_resend_instance_change_messages(looper,
                                         txnPoolNodeSet,
                                         tconf,
                                         sdk_wallet_steward,
                                         sdk_pool_handle):
    primary_node = txnPoolNodeSet[0]
    old_view_no = checkViewNoForNodes(txnPoolNodeSet, 0)
    assert primary_node.master_replica.isPrimary
    for n in txnPoolNodeSet:
        n.nodeIbStasher.delay(icDelay(3 * tconf.INSTANCE_CHANGE_TIMEOUT))
    assert set([n.view_changer.instance_change_rounds for n in txnPoolNodeSet]) == {0}
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            primary_node,
                                            stopNode=False)
    txnPoolNodeSet.remove(primary_node)
    looper.run(eventually(partial(check_count_connected_node, txnPoolNodeSet, 4),
                          timeout=5,
                          acceptableExceptions=[AssertionError]))
    looper.runFor(2*tconf.INSTANCE_CHANGE_TIMEOUT)
    assert set([n.view_changer.instance_change_rounds for n in txnPoolNodeSet]) == {1}

    looper.runFor(tconf.INSTANCE_CHANGE_TIMEOUT)
    looper.run(eventually(partial(checkViewNoForNodes, txnPoolNodeSet, expectedViewNo=old_view_no + 1),
                          timeout=tconf.VIEW_CHANGE_TIMEOUT))
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
예제 #4
0
def test_no_propagated_future_view_change_while_view_change(
        txnPoolNodeSet, looper):
    # the last node is a lagging one, which will receive ViewChangeDone messages for future view
    viewNo = checkViewNoForNodes(txnPoolNodeSet)
    lagged_node = txnPoolNodeSet[-1]
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    # emulate view change in progress
    lagged_node.view_changer.view_change_in_progress = True
    old_view_no = checkViewNoForNodes([lagged_node])

    initial_vhdc = \
        lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__)

    # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change
    with delay_rules(lagged_node.nodeIbStasher, icDelay()):
        # make sure that View Change happened on all nodes but the lagging one
        ensure_view_change(looper, other_nodes)
        checkProtocolInstanceSetup(looper=looper,
                                   nodes=other_nodes,
                                   numInstances=2)
        ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

        # check that lagged node recived 3 Future VCD, but didn't start new view change
        assert len(other_nodes) + initial_vhdc ==\
               lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__)
        assert old_view_no == checkViewNoForNodes([lagged_node])
def test_no_propagated_future_view_change_until_synced(txnPoolNodeSet, looper,
                                                       mode):
    # the last node is a lagging one, which will receive ViewChangeDone messages for future view
    viewNo = checkViewNoForNodes(txnPoolNodeSet)
    lagged_node_index = (viewNo + 3) % len(txnPoolNodeSet)
    lagged_node = txnPoolNodeSet[lagged_node_index]
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    # emulate catchup by setting non-synced status
    lagged_node.mode = mode
    old_view_no = checkViewNoForNodes([lagged_node])

    check_future_vcd_count(lagged_node, 0)

    # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change
    with delay_rules(lagged_node.nodeIbStasher, icDelay()):
        # make sure that View Change happened on all nodes but the lagging one
        ensure_view_change(looper, other_nodes)
        checkProtocolInstanceSetup(looper=looper,
                                   nodes=other_nodes,
                                   numInstances=2)
        ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

        check_no_view_change(looper, lagged_node)
        assert old_view_no == checkViewNoForNodes([lagged_node])

        # emulate finishing of catchup by setting Participating status
        lagged_node.mode = Mode.participating

        # make sure that View Change happened on lagging node
        waitForViewChange(looper, [lagged_node],
                          expectedViewNo=old_view_no + 1,
                          customTimeout=10)
        ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
def test_resend_inst_ch_in_progress_v_ch(txnPoolNodeSet, looper,
                                         sdk_pool_handle, sdk_wallet_client,
                                         tdir, tconf, allPluginsPath):
    old_view = viewNoForNodes(txnPoolNodeSet)

    # disconnect two nodes. One of them should be next master primary in case of view change.
    for node in [txnPoolNodeSet[1], txnPoolNodeSet[-1]]:
        disconnect_node_and_ensure_disconnected(looper,
                                                txnPoolNodeSet,
                                                node,
                                                stopNode=True)
        looper.removeProdable(node)
        txnPoolNodeSet.remove(node)

    # delay I_CH on every node except last one and initiate view change
    stashers = [n.nodeIbStasher for n in txnPoolNodeSet[:-1]]
    with delay_rules_without_processing(stashers, icDelay(viewNo=2)):
        ensure_view_change(looper, txnPoolNodeSet)
        looper.runFor(tconf.NEW_VIEW_TIMEOUT + 1)

    # checks
    def checks():
        assert all(not node.view_change_in_progress for node in txnPoolNodeSet)
        assert all(node.viewNo == old_view + 2 for node in txnPoolNodeSet)

    looper.run(
        eventually(checks, timeout=tconf.NEW_VIEW_TIMEOUT * 2.5, retryWait=1))

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
예제 #7
0
def test_propagate_primary_after_primary_restart_view_0(
        looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward,
        tdir, allPluginsPath):
    """
    Delay instance change msgs to prevent view change during primary restart
    to test propagate primary for primary node.
    ppSeqNo should be > 0 to be able to check that propagate primary restores all
    indexes correctly
    case viewNo == 0
    """
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward,
                               sdk_pool_handle)

    old_ppseqno = _get_ppseqno(txnPoolNodeSet)
    assert (old_ppseqno > 0)

    old_viewNo = checkViewNoForNodes(txnPoolNodeSet)
    old_primary = get_master_primary_node(txnPoolNodeSet)

    delay_instance_change(txnPoolNodeSet, IC_DELAY_SEC)

    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            old_primary,
                                            stopNode=True)

    looper.removeProdable(old_primary)

    logger.info("Restart node {}".format(old_primary))

    restartedNode = start_stopped_node(old_primary,
                                       looper,
                                       tconf,
                                       tdir,
                                       allPluginsPath,
                                       delay_instance_change_msgs=False)
    idx = [
        i for i, n in enumerate(txnPoolNodeSet) if n.name == restartedNode.name
    ][0]
    txnPoolNodeSet[idx] = restartedNode

    restartedNode.nodeIbStasher.delay(icDelay(IC_DELAY_SEC))

    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    new_viewNo = checkViewNoForNodes(txnPoolNodeSet)
    assert (new_viewNo == old_viewNo)

    new_primary = get_master_primary_node(txnPoolNodeSet)
    assert (new_primary.name == old_primary.name)

    # check ppSeqNo the same
    _get_ppseqno(txnPoolNodeSet)

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward,
                               sdk_pool_handle)

    new_ppseqno = _get_ppseqno(txnPoolNodeSet)
    assert (new_ppseqno > old_ppseqno)
def test_no_propagated_future_view_change_until_synced(txnPoolNodeSet, looper, mode):
    # the last node is a lagging one, which will receive ViewChangeDone messages for future view
    viewNo = checkViewNoForNodes(txnPoolNodeSet)
    lagged_node_index = (viewNo + 3) % len(txnPoolNodeSet)
    lagged_node = txnPoolNodeSet[lagged_node_index]
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    # emulate catchup by setting non-synced status
    lagged_node.mode = mode
    old_view_no = checkViewNoForNodes([lagged_node])

    check_future_vcd_count(lagged_node, 0)

    # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change
    with delay_rules(lagged_node.nodeIbStasher, icDelay()):
        # make sure that View Change happened on all nodes but the lagging one
        ensure_view_change(looper, other_nodes)
        checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(2))
        ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

        check_no_view_change(looper, lagged_node)
        assert old_view_no == checkViewNoForNodes([lagged_node])

        # emulate finishing of catchup by setting Participating status
        lagged_node.mode = Mode.participating

        # make sure that View Change happened on lagging node
        waitForViewChange(looper, [lagged_node], expectedViewNo=old_view_no + 1,
                          customTimeout=10)
        ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
예제 #9
0
def test_resend_instance_change_messages(looper, txnPoolNodeSet, tconf,
                                         sdk_wallet_steward, sdk_pool_handle):
    primary_node = txnPoolNodeSet[0]
    old_view_no = checkViewNoForNodes(txnPoolNodeSet, 0)
    assert primary_node.master_replica.isPrimary
    for n in txnPoolNodeSet:
        n.nodeIbStasher.delay(icDelay(3 * tconf.NEW_VIEW_TIMEOUT))
    check_sent_instance_changes_count(txnPoolNodeSet, 0)
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            primary_node,
                                            stopNode=False)
    txnPoolNodeSet.remove(primary_node)
    looper.run(
        eventually(check_count_connected_node,
                   txnPoolNodeSet,
                   4,
                   timeout=5,
                   acceptableExceptions=[AssertionError]))
    looper.run(
        eventually(check_sent_instance_changes_count,
                   txnPoolNodeSet,
                   1,
                   timeout=2 * tconf.NEW_VIEW_TIMEOUT))

    looper.run(
        eventually(checkViewNoForNodes,
                   txnPoolNodeSet,
                   old_view_no + 1,
                   timeout=3 * tconf.NEW_VIEW_TIMEOUT))
    ensureElectionsDone(looper, txnPoolNodeSet)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 5)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
예제 #10
0
def testQueueingReqFromFutureView(delayed_perf_chk, looper, txnPoolNodeSet,
                                  sdk_pool_handle, sdk_wallet_client):
    """
    Test if every node queues 3 Phase requests(PRE-PREPARE, PREPARE and COMMIT)
    that come from a view which is greater than the current view.
    - Delay reception and processing of view change messages by a non primary for master instance
       => it starts receiving 3 phase commit messages for next view
    """

    lagging_node = get_last_master_non_primary_node(txnPoolNodeSet)
    old_view_no = lagging_node.viewNo

    # Delay processing of InstanceChange and ViewChangeDone so node stashes
    # 3PC messages
    delay_ic = 60
    lagging_node.nodeIbStasher.delay(icDelay(delay_ic))
    lagging_node.nodeIbStasher.delay(vcd_delay(delay_ic))
    logger.debug('{} will delay its view change'.format(lagging_node))

    def chk_fut_view(view_no, is_empty):
        length = len(lagging_node.msgsForFutureViews.get(view_no, ()))
        if is_empty:
            assert length == 0
        else:
            assert length > 0
        return length

    # No messages queued for future view
    chk_fut_view(old_view_no + 1, is_empty=True)
    logger.debug(
        '{} does not have any messages for future views'.format(lagging_node))

    # Every node except Node A should do a view change
    ensure_view_change(looper,
                       [n for n in txnPoolNodeSet if n != lagging_node],
                       [lagging_node])

    # send more requests that will be queued for the lagged node
    reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                                    5)
    l = looper.run(
        eventually(chk_fut_view, old_view_no + 1, False, retryWait=1))
    logger.debug('{} has {} messages for future views'.format(lagging_node, l))
    sdk_get_replies(looper, reqs)
    # reset delays for the lagging_node node so that it finally makes view
    # change
    lagging_node.reset_delays_and_process_delayeds()

    # Eventually no messages queued for future view
    looper.run(
        eventually(chk_fut_view,
                   old_view_no + 1,
                   True,
                   retryWait=1,
                   timeout=delay_ic + 10))
    logger.debug(
        '{} exhausted pending messages for future views'.format(lagging_node))

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2)
예제 #11
0
def test_audit_multiple_uncommitted_node_regs(looper, tdir, tconf,
                                              allPluginsPath, txnPoolNodeSet,
                                              sdk_pool_handle,
                                              sdk_wallet_client,
                                              sdk_wallet_steward):
    '''
    - Delay COMMITS on 1 Node
    - Add 2 more nodes (so that the slow node hs multiple uncommitted node txns)
    - Make sure that all nodes have equal state eventually
    '''
    slow_node = txnPoolNodeSet[-1]
    fast_nodes = [node for node in txnPoolNodeSet if node != slow_node]
    slow_stashers = [slow_node.nodeIbStasher]

    # let's ignore view changes for simplicity here
    start_delaying([n.nodeIbStasher for n in txnPoolNodeSet], icDelay())

    with delay_rules(slow_stashers, cDelay()):
        # Add Node5
        new_node = add_new_node(looper,
                                fast_nodes,
                                sdk_pool_handle,
                                sdk_wallet_steward,
                                tdir,
                                tconf,
                                allPluginsPath,
                                name='Psi',
                                wait_till_added=False)
        txnPoolNodeSet.append(new_node)
        start_delaying(new_node.nodeIbStasher, icDelay())

        # Add Node6
        new_node = add_new_node(looper,
                                fast_nodes,
                                sdk_pool_handle,
                                sdk_wallet_steward,
                                tdir,
                                tconf,
                                allPluginsPath,
                                name='Eta',
                                wait_till_added=False)
        txnPoolNodeSet.append(new_node)
        start_delaying(new_node.nodeIbStasher, icDelay())

    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=20)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
def test_delayed_instance_changes_after_vcd_for_next_view(looper, txnPoolNodeSet):
    '''
    A node is doing view change to view=1, while the other nodes already finished view change to view=2.
    The node receives a quorum of VCD messages for view=2 before a quorum of InstanceChange messages for view=2.
    Nevertheless, the node should not start a view change to view=2 without a quorum of InstanceChanges,
    that is it should not go to propagate primary mode since it's already in view chanage state.
    The node should eventually finish view change to view=2 once receives all VCD and IS msgs for view=2
    '''
    nodes = txnPoolNodeSet
    slow_node = nodes[-1]
    fast_nodes = [n for n in nodes if n != slow_node]
    slow_stasher = slow_node.nodeIbStasher

    # 1. DO FIRST VIEW CHANGE

    # delay VCD for the first ViewChange
    with delay_rules(slow_stasher, vcd_delay()):
        # Trigger view change
        for n in nodes:
            n.view_changer.on_master_degradation()
        waitForViewChange(looper, nodes, expectedViewNo=1)

        # make sure view change is finished on all nodes except the slow one
        ensureElectionsDone(looper, fast_nodes, instances_list=range(3))

        # drop all VCD to view=1
        slow_stasher.drop_delayeds()

    # 2. DO SECOND VIEW CHANGE

    # delay Instance Changes and
    # so that the slow node receives VCD for view=2 before
    # a quorum of InstanceChanges for that view while still doing view change to view=1
    with delay_rules(slow_stasher, icDelay()):

        # Trigger view change
        for n in nodes:
            n.view_changer.on_master_degradation()
        waitForViewChange(looper, fast_nodes, expectedViewNo=2)

        # make sure view change is finished on all nodes except the slow one
        ensureElectionsDone(looper, fast_nodes, instances_list=range(3))

        # slow node is still on view=1
        assert slow_node.viewNo == 1
        assert slow_node.view_change_in_progress

        # make sure that the slow node receives VCD msgs for view=2
        # and didn't receive IS msgs for view=2
        check_vcd_msgs(slow_node, expected_view_no=2, expected_count=len(fast_nodes), )
        check_no_ic_msgs(slow_node, expected_view_no=2)

    # 3. RESET DELAYS AND CHECK

    waitForViewChange(looper, nodes, expectedViewNo=2)
    ensureElectionsDone(looper, nodes)
    assert not slow_node.view_change_in_progress
    ensure_all_nodes_have_same_data(looper, nodes=nodes)
def test_view_change_during_unstash(looper, txnPoolNodeSet, sdk_pool_handle,
                                    sdk_wallet_client, tconf):
    slow_node = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]

    slow_stasher = slow_node.nodeIbStasher
    other_stashers = [n.nodeIbStasher for n in other_nodes]
    all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet]

    # Preload nodes with some transactions
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    for node in txnPoolNodeSet:
        assert node.master_replica.last_ordered_3pc == (0, 1)

    # Prevent ordering of some requests
    start_delaying(all_stashers, delay_3pc(after=7, msgs=(Prepare, Commit)))

    # Stop ordering on slow node and send requests
    slow_node_after_5 = start_delaying(slow_stasher,
                                       delay_3pc(after=5, msgs=Commit))
    slow_node_until_5 = start_delaying(slow_stasher, delay_3pc(after=0))
    reqs_view_0 = sdk_send_random_requests(looper, sdk_pool_handle,
                                           sdk_wallet_client, 8)

    # Make pool order first 2 batches and pause
    pool_after_3 = start_delaying(other_stashers, delay_3pc(after=3))
    looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 3))

    # Start catchup, continue ordering everywhere (except two last batches on slow node)
    with delay_rules(slow_stasher, cr_delay()):
        slow_node._do_start_catchup(just_started=False)
        looper.run(eventually(check_catchup_is_started, slow_node))
        stop_delaying_and_process(pool_after_3)
        looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 7))

    # Finish catchup and continue processing on slow node
    looper.run(eventually(check_catchup_is_finished, slow_node))
    stop_delaying_and_process(slow_node_until_5)
    looper.run(eventually(check_nodes_ordered_till, [slow_node], 0, 5))

    # Start view change and allow slow node to get remaining commits
    with delay_rules(all_stashers, icDelay()):
        for node in txnPoolNodeSet:
            node.view_changer.on_master_degradation()
        looper.runFor(0.1)
    stop_delaying_and_process(slow_node_after_5)

    # Ensure that expected number of requests was ordered
    replies = sdk_get_replies(looper, reqs_view_0)
    for rep in replies[:6]:
        sdk_check_reply(rep)

    # Ensure that everything is ok
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
예제 #14
0
def test_slow_nodes_catchup_before_selecting_primary_in_new_view(
        tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client,
        one_node_added):
    """
    Delay 3PC messages to one node and view change messages to some others
    (including primary) so the node that does not receive enough 3PC messages is
    behind but learns of the view change quickly and starts catchup.
    Other nodes learn of the view change late and thus keep on processing
    requests
    """
    new_node = one_node_added
    nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)]
    primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node
    slow_node = nprs[-1]
    # nodes_slow_to_inst_chg = [primary_node] + nprs[:2]
    nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node]
    delay_3pc = 100
    delay_ic = 5

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)

    delay_3pc_messages([slow_node], 0, delay_3pc)

    for n in nodes_slow_to_inst_chg:
        n.nodeIbStasher.delay(icDelay(delay_ic))

    def start_count():
        return sum([
            1 for e in slow_node.ledgerManager.spylog.getAll(
                slow_node.ledgerManager.startCatchUpProcess.__name__)
            if e.params['ledgerId'] == DOMAIN_LEDGER_ID
        ])

    s = start_count()
    requests = sdk_send_random_requests(looper, sdk_pool_handle,
                                        sdk_wallet_client,
                                        10 * Max3PCBatchSize)

    ensure_view_change(looper,
                       nodes=txnPoolNodeSet,
                       exclude_from_check=nodes_slow_to_inst_chg)

    sdk_get_and_check_replies(looper, requests)

    waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1])

    e = start_count()
    assert e - s >= 2

    looper.run(eventually(checkViewNoForNodes, slow_node.viewNo))
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)

    waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg)
예제 #15
0
def do_view_change_with_propagate_primary_on_one_delayed_node(
        slow_node, nodes, looper, sdk_pool_handle, sdk_wallet_client):

    slow_stasher = slow_node.nodeIbStasher

    fast_nodes = [n for n in nodes if n != slow_node]

    stashers = [n.nodeIbStasher for n in nodes]

    # Get last prepared certificate in pool
    lpc = last_prepared_certificate(nodes)
    # Get pool current view no
    view_no = lpc[0]

    with delay_rules(slow_stasher, icDelay()):
        with delay_rules(slow_stasher, vcd_delay()):
            with delay_rules(stashers, cDelay()):
                # Send request
                request = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client)

                # Wait until this request is prepared on N-f nodes
                looper.run(eventually(check_last_prepared_certificate_on_quorum, nodes, (lpc[0], lpc[1] + 1)))

                # Trigger view change
                for n in nodes:
                    n.view_changer.on_master_degradation()

                # Wait until view change is completed on all nodes except slow one
                waitForViewChange(looper,
                                  fast_nodes,
                                  expectedViewNo=view_no + 1,
                                  customTimeout=waits.expectedPoolViewChangeStartedTimeout(len(nodes)))
                wait_for_elections_done_on_given_nodes(looper,
                                                       fast_nodes,
                                                       getRequiredInstances(len(nodes)),
                                                       timeout=waits.expectedPoolElectionTimeout(len(nodes)))

            # Now all the nodes receive Commits
            # The slow node will accept Commits and order the 3PC-batch in the old view
            looper.runFor(waits.expectedOrderingTime(getNoInstances(len(nodes))))

        # Now slow node receives ViewChangeDones
        waitForViewChange(looper,
                          [slow_node],
                          expectedViewNo=view_no + 1,
                          customTimeout=waits.expectedPoolViewChangeStartedTimeout(len(nodes)))
        wait_for_elections_done_on_given_nodes(looper,
                                               [slow_node],
                                               getRequiredInstances(len(nodes)),
                                               timeout=waits.expectedPoolElectionTimeout(len(nodes)))

    # Now slow node receives InstanceChanges but discards them because already
    # started propagate primary to the same view.

    # Finish request gracefully
    sdk_get_reply(looper, request)
def test_slow_nodes_catchup_before_selecting_primary_in_new_view(
        tconf,
        looper,
        txnPoolNodeSet,
        sdk_pool_handle,
        sdk_wallet_client,
        one_node_added):
    """
    Delay 3PC messages to one node and view change messages to some others
    (including primary) so the node that does not receive enough 3PC messages is
    behind but learns of the view change quickly and starts catchup.
    Other nodes learn of the view change late and thus keep on processing
    requests
    """
    new_node = one_node_added
    nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)]
    primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node
    slow_node = nprs[-1]
    # nodes_slow_to_inst_chg = [primary_node] + nprs[:2]
    nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node]
    delay_3pc = 100
    delay_ic = 5

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)

    delay_3pc_messages([slow_node], 0, delay_3pc)

    for n in nodes_slow_to_inst_chg:
        n.nodeIbStasher.delay(icDelay(delay_ic))

    def start_count(): return sum([1 for e in slow_node.ledgerManager.spylog.getAll(
        slow_node.ledgerManager.startCatchUpProcess.__name__)
                                   if e.params['ledgerId'] == DOMAIN_LEDGER_ID])

    s = start_count()
    requests = sdk_send_random_requests(looper, sdk_pool_handle,
                                        sdk_wallet_client, 10 * Max3PCBatchSize)

    ensure_view_change(looper, nodes=txnPoolNodeSet,
                       exclude_from_check=nodes_slow_to_inst_chg)

    sdk_get_and_check_replies(looper, requests)

    waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1])

    e = start_count()
    assert e - s >= 2

    looper.run(eventually(checkViewNoForNodes, slow_node.viewNo))
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)

    waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg)
예제 #17
0
def do_view_change_with_propagate_primary_on_one_delayed_node(
        slow_node, nodes, looper, sdk_pool_handle, sdk_wallet_client):

    slow_stasher = slow_node.nodeIbStasher

    fast_nodes = [n for n in nodes if n != slow_node]

    stashers = [n.nodeIbStasher for n in nodes]

    # Get last prepared certificate in pool
    lpc = last_prepared_certificate(nodes)
    # Get pool current view no
    view_no = lpc[0]

    with delay_rules(slow_stasher, icDelay()):
        with delay_rules(slow_stasher, vcd_delay()):
            with delay_rules(stashers, cDelay()):
                # Send request
                request = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client)

                # Wait until this request is prepared on N-f nodes
                looper.run(eventually(check_last_prepared_certificate_on_quorum, nodes, (lpc[0], lpc[1] + 1)))

                # Trigger view change
                for n in nodes:
                    n.view_changer.on_master_degradation()

                # Wait until view change is completed on all nodes except slow one
                waitForViewChange(looper,
                                  fast_nodes,
                                  expectedViewNo=view_no + 1,
                                  customTimeout=waits.expectedPoolViewChangeStartedTimeout(len(nodes)))
                wait_for_elections_done_on_given_nodes(looper,
                                                       fast_nodes,
                                                       getRequiredInstances(len(nodes)),
                                                       timeout=waits.expectedPoolElectionTimeout(len(nodes)))

            # Now all the nodes receive Commits
            # The slow node will accept Commits and order the 3PC-batch in the old view
            looper.runFor(waits.expectedOrderingTime(getNoInstances(len(nodes))))

        # Now slow node receives ViewChangeDones
        waitForViewChange(looper,
                          [slow_node],
                          expectedViewNo=view_no + 1,
                          customTimeout=waits.expectedPoolViewChangeStartedTimeout(len(nodes)))
        wait_for_elections_done_on_given_nodes(looper,
                                               [slow_node],
                                               getRequiredInstances(len(nodes)),
                                               timeout=waits.expectedPoolElectionTimeout(len(nodes)))

    # Now slow node receives InstanceChanges but discards them because already
    # started propagate primary to the same view.

    # Finish request gracefully
    sdk_get_reply(looper, request)
def test_propagate_primary_after_primary_restart_view_1(
        looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath):
    """
    Delay instance change msgs to prevent view change during primary restart
    to test propagate primary for primary node.
    ppSeqNo should be > 0 to be able to check that propagate primary restores all
    indices correctly
    case viewNo > 0
    """

    ensure_view_change(looper, txnPoolNodeSet)
    checkViewNoForNodes(txnPoolNodeSet, expectedViewNo=1)

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)

    old_ppseqno = _get_ppseqno(txnPoolNodeSet)
    assert (old_ppseqno > 0)

    old_viewNo = checkViewNoForNodes(txnPoolNodeSet)
    old_primary = get_master_primary_node(txnPoolNodeSet)

    delay_instance_change(txnPoolNodeSet, IC_DELAY_SEC)

    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_primary, stopNode=True)

    looper.removeProdable(old_primary)

    logger.info("Restart node {}".format(old_primary))

    restartedNode = start_stopped_node(old_primary, looper, tconf, tdir, allPluginsPath,
                                       delay_instance_change_msgs=False)
    idx = [i for i, n in enumerate(txnPoolNodeSet) if n.name == restartedNode.name][0]
    txnPoolNodeSet[idx] = restartedNode

    restartedNode.nodeIbStasher.delay(icDelay(IC_DELAY_SEC))

    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    new_viewNo = checkViewNoForNodes(txnPoolNodeSet)
    assert (new_viewNo == old_viewNo)

    new_primary = get_master_primary_node(txnPoolNodeSet)
    assert (new_primary.name == old_primary.name)

    # check ppSeqNo the same
    _get_ppseqno(txnPoolNodeSet)

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)

    new_ppseqno = _get_ppseqno(txnPoolNodeSet)
    assert (new_ppseqno > old_ppseqno)
def test_start_view_change_by_vc_msgs(looper,
                                      txnPoolNodeSet,
                                      sdk_wallet_client,
                                      sdk_pool_handle):

    delayed_node = txnPoolNodeSet[-1]
    rest_nodes = txnPoolNodeSet[:-1]
    with delay_rules_without_processing(delayed_node.nodeIbStasher, icDelay()):
        current_view_no = checkViewNoForNodes(txnPoolNodeSet)
        trigger_view_change(txnPoolNodeSet)
        looper.run(eventually(checkViewNoForNodes, rest_nodes, current_view_no + 1))
        ensureElectionsDone(looper, txnPoolNodeSet)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_delay_IC_for_next_primary(looper,
                                   txnPoolNodeSet,
                                   sdk_pool_handle,
                                   sdk_wallet_client):
    current_view_no = checkViewNoForNodes(txnPoolNodeSet)
    next_primary_name = get_next_primary_name(txnPoolNodeSet, current_view_no + 1)
    next_primary = [n for n in txnPoolNodeSet if n.name == next_primary_name][0]
    rest_nodes = list(set(txnPoolNodeSet) - {next_primary})
    with delay_rules_without_processing(next_primary.nodeIbStasher, icDelay()):
        trigger_view_change(txnPoolNodeSet)
        looper.run(eventually(checkViewNoForNodes, rest_nodes, current_view_no + 1))
        ensureElectionsDone(looper, txnPoolNodeSet)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
    assert next_primary.master_replica.isPrimary
예제 #21
0
def start_stopped_node(stopped_node, looper, tconf, tdirWithPoolTxns,
                       allPluginsPath):
    nodeHa, nodeCHa = HA(*stopped_node.nodestack.ha), HA(
        *stopped_node.clientstack.ha)
    restarted_node = TestNode(stopped_node.name,
                              basedirpath=tdirWithPoolTxns,
                              config=tconf,
                              ha=nodeHa,
                              cliha=nodeCHa,
                              pluginPaths=allPluginsPath)
    looper.add(restarted_node)

    # Even after reconnection INSTANCE_CHANGE messages are received,
    # delay them enough to simulate real disconnection. This needs to fixed
    # soon when simulating a disconnection drains the transport queues
    restarted_node.nodeIbStasher.delay(icDelay(200))
    return restarted_node
def testPrimaryRecvs3PhaseMessageOutsideWatermarks(perf_chk_patched,
                                                   chkFreqPatched, looper,
                                                   txnPoolNodeSet, client1,
                                                   wallet1, client1Connected,
                                                   reqs_for_logsize):
    """
    One of the primary starts getting lot of requests, more than his log size
    and queues up requests since they will go beyond its watermarks. This
    happens since other nodes are slow in processing its PRE-PREPARE.
    Eventually this primary will send PRE-PREPARE for all requests and those
    requests will complete
    """
    tconf = perf_chk_patched
    delay = 5
    instId = 1
    reqs_to_send = 2 * reqs_for_logsize + 1
    logger.debug('Will send {} requests'.format(reqs_to_send))

    npr = getNonPrimaryReplicas(txnPoolNodeSet, instId)
    pr = getPrimaryReplica(txnPoolNodeSet, instId)
    from plenum.server.replica import TPCStat
    orderedCount = pr.stats.get(TPCStat.OrderSent)

    for r in npr:
        r.node.nodeIbStasher.delay(ppDelay(delay, instId))
        r.node.nodeIbStasher.delay(pDelay(delay, instId))

    # do not do any view changes since we're dealing with non-master instance and
    # may have not order all requests if view is changed
    # delay for all nodes (both primary and non-primary), since this is delay
    # for receiving, not sending.
    for node in txnPoolNodeSet:
        node.nodeIbStasher.delay(icDelay(300))

    tm_exec_1_batch = waits.expectedTransactionExecutionTime(
        len(txnPoolNodeSet))
    batch_count = math.ceil(reqs_to_send / tconf.Max3PCBatchSize)
    total_timeout = (tm_exec_1_batch + delay) * batch_count

    def chk():
        assert orderedCount + batch_count == pr.stats.get(TPCStat.OrderSent)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqs_to_send)
    looper.run(eventually(chk, retryWait=1, timeout=total_timeout))
예제 #23
0
def start_stopped_node(stopped_node, looper, tconf,
                       tdir, allPluginsPath,
                       delay_instance_change_msgs=True):
    nodeHa, nodeCHa = HA(*
                         stopped_node.nodestack.ha), HA(*
                                                        stopped_node.clientstack.ha)
    config_helper = PNodeConfigHelper(stopped_node.name, tconf, chroot=tdir)
    restarted_node = TestNode(stopped_node.name,
                              config_helper=config_helper,
                              config=tconf,
                              ha=nodeHa, cliha=nodeCHa,
                              pluginPaths=allPluginsPath)
    looper.add(restarted_node)

    # Even after reconnection INSTANCE_CHANGE messages are received,
    # delay them enough to simulate real disconnection. This needs to fixed
    # soon when simulating a disconnection drains the transport queues
    # TODO is it still actual?
    if delay_instance_change_msgs:
        restarted_node.nodeIbStasher.delay(icDelay(200))
    return restarted_node
def test_add_vc_start_msg_during_start_view_change(txnPoolNodeSet,
                                                   looper):
    delayed_node = txnPoolNodeSet[-1]
    current_view_no = delayed_node.viewNo
    proposed_view_no = current_view_no + 1
    delayed_node.nodeIbStasher.delay(vcd_delay(1000))
    delayed_node.nodeIbStasher.delay(icDelay(1000))
    assert delayed_node.view_changer
    ensure_view_change(looper, txnPoolNodeSet[:-1])
    """
    If view number was incremented, that means that instanceChange quorum was archived 
    """
    assert txnPoolNodeSet[0].viewNo == proposed_view_no
    looper.removeProdable(delayed_node)
    delayed_node.nodeInBox.append((InstanceChange(1, 25), 'Alpha'))
    delayed_node.nodeInBox.append((InstanceChange(1, 25), 'Beta'))
    delayed_node.nodeInBox.append((InstanceChange(1, 25), 'Gamma'))
    delayed_node.processNodeInBox = functools.partial(Node.processNodeInBox, delayed_node)
    looper.run(delayed_node.processNodeInBox())
    looper.run(delayed_node.serviceViewChanger(None))
    assert len(delayed_node.nodeInBox) == 1
    m = delayed_node.nodeInBox.popleft()
    assert isinstance(m[0], ViewChangeStartMessage)
예제 #25
0
def test_freshness_instance_changes_are_sent_continuosly(
        looper, tconf, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle):
    current_view_no = txnPoolNodeSet[0].viewNo
    for node in txnPoolNodeSet:
        assert node.viewNo == current_view_no

    def check_instance_change_messages(count=1):
        for node in txnPoolNodeSet:
            assert has_freshness_instance_change(node, count)

    stashers = [n.nodeIbStasher for n in txnPoolNodeSet]
    with delay_rules(stashers, ppDelay(), icDelay()):
        looper.run(
            eventually(check_instance_change_messages,
                       3,
                       timeout=FRESHNESS_TIMEOUT * 5))

        for node in txnPoolNodeSet:
            all_instance_changes = node.view_changer.spylog.getAll(
                'sendInstanceChange')
            freshness_instance_changes = [
                ic for ic in all_instance_changes
                if ic.params['suspicion'].code == 43
            ]

            # Ensure that all instance change messages were due to freshness
            assert len(all_instance_changes) == len(freshness_instance_changes)

            # Ensure that all instance change messages are for same view
            for ic in freshness_instance_changes:
                assert ic.params['view_no'] == current_view_no + 1

            # Ensure instance change messages had sensible interval
            for ic1, ic2 in zip(freshness_instance_changes[1:],
                                freshness_instance_changes):
                interval = ic2.starttime - ic1.starttime
                assert 0.9 * FRESHNESS_TIMEOUT < interval < 2.1 * FRESHNESS_TIMEOUT
def test_no_propagated_future_view_change_while_view_change(txnPoolNodeSet, looper):
    # the last node is a lagging one, which will receive ViewChangeDone messages for future view
    viewNo = checkViewNoForNodes(txnPoolNodeSet)
    lagged_node = txnPoolNodeSet[-1]
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    # emulate view change in progress
    lagged_node.view_changer.view_change_in_progress = True
    old_view_no = checkViewNoForNodes([lagged_node])

    initial_vhdc = \
        lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__)

    # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change
    with delay_rules(lagged_node.nodeIbStasher, icDelay()):
        # make sure that View Change happened on all nodes but the lagging one
        ensure_view_change(looper, other_nodes)
        checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(2))
        ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

        # check that lagged node recived 3 Future VCD, but didn't start new view change
        assert len(other_nodes) + initial_vhdc ==\
               lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__)
        assert old_view_no == checkViewNoForNodes([lagged_node])
def test_new_primary_has_wrong_clock(tconf, looper, txnPoolNodeSet,
                                     sdk_wallet_client, sdk_pool_handle):
    """
    One of non-primary has a bad clock, it raises suspicions but orders
    requests after getting PREPAREs. Then a view change happens this
    non-primary with the bad clock becomes the new primary but is not able to
    get any of it's PRE-PREPAREs ordered. Eventually another view change
    happens and a new primary is elected the pool is functional again
    :return:
    """
    # The node having the bad clock, this node will be primary after view
    # change
    faulty_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[0].node
    make_clock_faulty(faulty_node)

    assert not faulty_node.master_replica.isPrimary
    # faulty_node replies too
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=Max3PCBatchSize * 3)

    ledger_sizes = {
        node.name: node.domainLedger.size for node in txnPoolNodeSet}
    susp_counts = {node.name: get_timestamp_suspicion_count(
        node) for node in txnPoolNodeSet}
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    # After view change, faulty_node is primary
    assert faulty_node.master_replica.isPrimary

    old_view_no = txnPoolNodeSet[0].viewNo

    # Delay instance change so view change doesn't happen in the middle of this test
    stashers = (n.nodeIbStasher for n in txnPoolNodeSet)
    with delay_rules(stashers, icDelay()):
        # Requests are sent
        for _ in range(5):
            sdk_send_random_requests(looper,
                                    sdk_pool_handle,
                                    sdk_wallet_client,
                                    count=2)
            looper.runFor(2)

        def chk():
            for node in txnPoolNodeSet:
                assert node.viewNo == old_view_no

            for node in [n for n in txnPoolNodeSet if n != faulty_node]:
                # Each non faulty node raises suspicion
                assert get_timestamp_suspicion_count(node) > susp_counts[node.name]
                # Ledger does not change
                assert node.domainLedger.size == ledger_sizes[node.name]

            assert faulty_node.domainLedger.size == ledger_sizes[faulty_node.name]

        looper.run(eventually(chk, retryWait=1))


    # Eventually another view change happens
    looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1,
                          retryWait=1, timeout=2 * tconf.PerfCheckFreq))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    # After view change, faulty_node is no more the primary
    assert not faulty_node.master_replica.isPrimary

    # All nodes reply
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=Max3PCBatchSize * 2)
def test_audit_ledger_view_change(looper, txnPoolNodeSet,
                                  sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward,
                                  initial_domain_size, initial_pool_size, initial_config_size,
                                  tdir,
                                  tconf,
                                  allPluginsPath,
                                  view_no, pp_seq_no,
                                  initial_seq_no,
                                  monkeypatch):
    '''
    1. Send a NODE transaction and add a 7th Node for adding a new instance,
    but delay Ordered messages.
    2. Send a NYM txn.
    3. Reset delays in executing force_process_ordered
    4. Check that an audit txn for the NYM txn uses primary list from uncommitted
    audit with a new list of primaries.
    '''
    other_nodes = txnPoolNodeSet[:-1]
    slow_node = txnPoolNodeSet[-1]
    # Add a new steward for creating a new node
    new_steward_wallet_handle = sdk_add_new_nym(looper,
                                                sdk_pool_handle,
                                                sdk_wallet_steward,
                                                alias="newSteward",
                                                role=STEWARD_STRING)

    audit_size_initial = [node.auditLedger.size for node in txnPoolNodeSet]

    ordereds = []
    monkeypatch.setattr(slow_node, 'try_processing_ordered', lambda msg: ordereds.append(msg))

    with delay_rules([n.nodeIbStasher for n in txnPoolNodeSet], icDelay()):

        # Send NODE txn fo 7th node
        new_node = sdk_add_new_node(looper,
                                    sdk_pool_handle,
                                    new_steward_wallet_handle,
                                    "Theta",
                                    tdir,
                                    tconf,
                                    allPluginsPath)

        txnPoolNodeSet.append(new_node)
        looper.run(checkNodesConnected(other_nodes + [new_node]))

        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, 1)

        check_audit_ledger_updated(audit_size_initial, [slow_node],
                                   audit_txns_added=0)
        looper.run(eventually(check_audit_ledger_uncommitted_updated,
                              audit_size_initial, [slow_node], 2))

        def patch_force_process_ordered():
            for msg in list(ordereds):
                slow_node.replicas[msg.instId].outBox.append(msg)
                ordereds.remove(msg)
            monkeypatch.undo()
            slow_node.force_process_ordered()

        assert ordereds
        monkeypatch.setattr(slow_node, 'force_process_ordered', patch_force_process_ordered)

    looper.run(eventually(lambda: assertExp(all(n.viewNo == 1 for n in txnPoolNodeSet))))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    looper.run(eventually(lambda: assertExp(not ordereds)))

    for node in txnPoolNodeSet:
        last_txn = node.auditLedger.get_last_txn()
        last_txn['txn']['data']['primaries'] = node._get_last_audited_primaries()
        check_audit_txn(txn=last_txn,
                        view_no=view_no + 1, pp_seq_no=1,
                        seq_no=initial_seq_no + 4,
                        txn_time=node.master_replica._ordering_service.last_accepted_pre_prepare_time,
                        txn_roots={DOMAIN_LEDGER_ID: node.getLedger(DOMAIN_LEDGER_ID).tree.root_hash},
                        state_roots={DOMAIN_LEDGER_ID: node.getState(DOMAIN_LEDGER_ID).committedHeadHash},
                        pool_size=initial_pool_size + 1, domain_size=initial_domain_size + 2,
                        config_size=initial_config_size,
                        last_pool_seqno=2,
                        last_domain_seqno=1,
                        last_config_seqno=None,
                        primaries=node.write_manager.future_primary_handler.get_last_primaries() or node.primaries)
def delay_instance_change(txnPoolNodeSet, val):
    for n in txnPoolNodeSet:
        n.nodeIbStasher.delay(icDelay(val))
def test_finish_view_change_with_incorrect_primaries_list(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tdir,
        tconf, allPluginsPath):
    """
    This test imitates situation when one of nodes is lagged.
    It missed txn for adding new node and view_change after this.
    After that lagged node started the next view_change with other nodes,
    but it has different committed node_reg and selected other primaries.
    In this case we expect, that lagged node will complete view_change with other primaries
    and will start catchup by Checkpoints because will not be able to ordering.

    """
    def complete_vc(node):
        assert not node.view_change_in_progress

    view_no = checkViewNoForNodes(txnPoolNodeSet)

    # Delta is lagged
    lagging_node = txnPoolNodeSet[3]
    fast_nodes = txnPoolNodeSet[:3] + txnPoolNodeSet[4:]

    # Force 5 view changes so that we have viewNo == 5 and Zeta is the primary.
    for _ in range(5):
        trigger_view_change(txnPoolNodeSet)
        waitForViewChange(looper, txnPoolNodeSet, view_no + 1)
        ensureElectionsDone(looper, txnPoolNodeSet)
        view_no = checkViewNoForNodes(txnPoolNodeSet)

    with delay_rules_without_processing(lagging_node.nodeIbStasher,
                                        msg_rep_delay(), icDelay(), vc_delay(),
                                        nv_delay(), cDelay(), ppDelay(),
                                        pDelay()):

        # Add new node and this action should starts view_change because of NODE txn ordered
        _, theta = sdk_add_new_steward_and_node(looper,
                                                sdk_pool_handle,
                                                sdk_wallet_steward,
                                                'Theta_Steward',
                                                'Theta',
                                                tdir,
                                                tconf,
                                                allPluginsPath=allPluginsPath)
        txnPoolNodeSet.append(theta)
        fast_nodes.append(theta)

        looper.run(checkNodesConnected(fast_nodes))
        ensure_all_nodes_have_same_data(looper, fast_nodes)

        waitForViewChange(looper, fast_nodes, view_no + 1)
        ensureElectionsDone(looper, fast_nodes)

    assert lagging_node.viewNo != fast_nodes[0].viewNo
    assert fast_nodes[0].viewNo == view_no + 1

    current_view_no = checkViewNoForNodes(fast_nodes)
    expected_view_no = current_view_no + 1
    trigger_view_change(txnPoolNodeSet)
    waitForViewChange(looper, txnPoolNodeSet, expected_view_no)
    ensureElectionsDone(looper, fast_nodes)

    looper.run(eventually(complete_vc, lagging_node, timeout=60))
    assert lagging_node.viewNo == expected_view_no

    # We assume that after 2 Checkpoints receiving lagged node will start catchup and elect right primaries

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 2 * CHK_SIZE)
    ensureElectionsDone(looper, txnPoolNodeSet)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward,
                               sdk_pool_handle)
예제 #31
0
def delay_instance_change(txnPoolNodeSet, val):
    for n in txnPoolNodeSet:
        n.nodeIbStasher.delay(icDelay(val))
def test_new_primary_has_wrong_clock(tconf, looper, txnPoolNodeSet,
                                     sdk_wallet_client, sdk_pool_handle):
    """
    One of non-primary has a bad clock, it raises suspicions but orders
    requests after getting PREPAREs. Then a view change happens this
    non-primary with the bad clock becomes the new primary but is not able to
    get any of it's PRE-PREPAREs ordered. Eventually another view change
    happens and a new primary is elected the pool is functional again
    :return:
    """
    # The node having the bad clock, this node will be primary after view
    # change
    faulty_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[0].node
    make_clock_faulty(faulty_node)

    assert not faulty_node.master_replica.isPrimary
    # faulty_node replies too

    ledger_sizes = {
        node.name: node.domainLedger.size for node in txnPoolNodeSet}
    susp_counts = {node.name: get_timestamp_suspicion_count(
        node) for node in txnPoolNodeSet}
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    # After view change, faulty_node is primary
    assert faulty_node.master_replica.isPrimary

    old_view_no = txnPoolNodeSet[0].viewNo

    # Delay instance change so view change doesn't happen in the middle of this test
    stashers = (n.nodeIbStasher for n in txnPoolNodeSet)
    with delay_rules(stashers, icDelay()):
        # Requests are sent
        for _ in range(5):
            sdk_send_random_requests(looper,
                                     sdk_pool_handle,
                                     sdk_wallet_client,
                                     count=2)
            looper.runFor(2)

        def chk():
            for node in txnPoolNodeSet:
                assert node.viewNo == old_view_no

            for node in [n for n in txnPoolNodeSet if n != faulty_node]:
                # Each non faulty node raises suspicion
                assert get_timestamp_suspicion_count(node) > susp_counts[node.name]
                # Ledger does not change
                assert node.domainLedger.size == ledger_sizes[node.name]

            assert faulty_node.domainLedger.size == ledger_sizes[faulty_node.name]

        looper.run(eventually(chk, retryWait=1))

    # Eventually another view change happens
    ensure_view_change(looper, txnPoolNodeSet)
    looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1,
                          retryWait=1, timeout=2 * tconf.PerfCheckFreq))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    # After view change, faulty_node is no more the primary
    assert not faulty_node.master_replica.isPrimary

    # All nodes reply
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=Max3PCBatchSize * 2)
def testQueueingReqFromFutureView(delayedPerf, looper, nodeSet, up,
                                  wallet1, client1):
    """
    Test if every node queues 3 Phase requests(PRE-PREPARE, PREPARE and COMMIT)
    that come from a view which is greater than the current view
    """

    f = getMaxFailures(nodeCount)

    # Delay processing of instance change on a node
    nodeA = nodeSet.Alpha
    nodeA.nodeIbStasher.delay(icDelay(60))

    nonPrimReps = getNonPrimaryReplicas(nodeSet, 0)
    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's throughput falls and view changes
    ppDelayer = ppDelay(5, 0)
    for r in nonPrimReps:
        r.node.nodeIbStasher.delay(ppDelayer)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4,
                                        timeoutPerReq=5 * nodeCount)

    # Every node except Node A should have a view change
    for node in nodeSet:
        if node.name != nodeA.name:
            looper.run(eventually(
                partial(checkViewChangeInitiatedForNode, node, 1),
                retryWait=1,
                timeout=20))

    # Node A's view should not have changed yet
    with pytest.raises(AssertionError):
        looper.run(eventually(partial(
            checkViewChangeInitiatedForNode, nodeA, 1),
            retryWait=1,
            timeout=20))

    # NodeA should not have any pending 3 phase request for a later view
    for r in nodeA.replicas:  # type: TestReplica
        assert len(r.threePhaseMsgsForLaterView) == 0

    # Reset delays on incoming messages from all nodes
    for node in nodeSet:
        node.nodeIbStasher.nodelay(ppDelayer)

    # Send one more request
    sendRandomRequest(wallet1, client1)

    def checkPending3PhaseReqs():
        # Get all replicas that have their primary status decided
        reps = [rep for rep in nodeA.replicas if rep.isPrimary is not None]
        # Atleast one replica should have its primary status decided
        assert len(reps) > 0
        for r in reps:  # type: TestReplica
            logger.debug("primary status for replica {} is {}"
                          .format(r, r.primaryNames))
            assert len(r.threePhaseMsgsForLaterView) > 0

    # NodeA should now have pending 3 phase request for a later view
    looper.run(eventually(checkPending3PhaseReqs, retryWait=1, timeout=30))
예제 #34
0
def test_successive_batch_do_no_change_state(looper, tconf, nodeSet,
                                             sdk_pool_handle,
                                             sdk_wallet_trustee, monkeypatch):
    """
    Send 2 NYM txns in different batches such that the second batch does not
    change state so that state root remains same, but keep the identifier
    and reqId different. Make sure the first request is not ordered by the
    primary before PRE-PREPARE for the second is sent.
    Also check reject and commit
    :return:
    """

    # Disable view change during this test
    for n in nodeSet:
        n.nodeIbStasher.delay(icDelay())

    # Delay only first PRE-PREPARE
    delay_cm_duration = 10

    def delay_commits(wrappedMsg):
        msg, sender = wrappedMsg
        if isinstance(msg, Commit) and msg.instId == 0:
            return delay_cm_duration

    def check_verkey(i, vk):
        for node in nodeSet:
            data = node.idrCache.getNym(i, isCommitted=True)
            assert data[VERKEY] == vk

    def check_uncommitted(count):
        for node in nodeSet:
            assert len(node.idrCache.un_committed) == count

    for node in nodeSet:
        for rpl in node.replicas:
            monkeypatch.setattr(rpl, '_request_missing_three_phase_messages',
                                lambda *x, **y: None)

    wh, did = sdk_wallet_trustee
    seed = randomString(32)
    (new_did, verkey) = looper.loop.run_until_complete(
        create_and_store_my_did(wh, json.dumps({'seed': seed})))

    sdk_add_new_nym(looper,
                    sdk_pool_handle,
                    sdk_wallet_trustee,
                    dest=new_did,
                    verkey=verkey)

    for node in nodeSet:
        node.nodeIbStasher.delay(delay_commits)

    # Setting the same verkey thrice but in different batches with different
    # request ids
    for _ in range(3):
        verkey = sdk_rotate_verkey(looper, sdk_pool_handle, wh, new_did,
                                   new_did, seed)
        logger.debug('{} rotates his key to {}'.format(new_did, verkey))

    # Number of uncommitted entries is 0
    looper.run(eventually(check_uncommitted, 0))

    check_verkey(new_did, verkey)

    # Setting the verkey to `x`, then `y` and then back to `x` but in different
    # batches with different request ids. The idea is to change
    # state root to `t` then `t'` and then back to `t` and observe that no
    # errors are encountered

    seed = randomString(32)
    (new_client_did, verkey) = looper.loop.run_until_complete(
        create_and_store_my_did(wh, json.dumps({'seed': seed})))
    sdk_add_new_nym(looper,
                    sdk_pool_handle,
                    sdk_wallet_trustee,
                    dest=new_client_did,
                    verkey=verkey)

    x_seed = randomString(32)
    verkey = sdk_rotate_verkey(looper, sdk_pool_handle, wh, new_client_did,
                               new_client_did, x_seed)
    logger.debug('{} rotates his key to {}'.format(new_client_did, verkey))

    y_seed = randomString(32)
    sdk_rotate_verkey(looper, sdk_pool_handle, wh, new_client_did,
                      new_client_did, y_seed)
    logger.debug('{} rotates his key to {}'.format(new_client_did, verkey))

    sdk_rotate_verkey(looper, sdk_pool_handle, wh, new_client_did,
                      new_client_did, x_seed)
    logger.debug('{} rotates his key to {}'.format(new_client_did, verkey))

    # Number of uncommitted entries is 0
    looper.run(eventually(check_uncommitted, 0))

    check_verkey(new_client_did, verkey)
    monkeypatch.undo()

    # Delay COMMITs so that IdrCache can be checked for correct
    # number of entries

    uncommitteds = {}
    methods = {}
    for node in nodeSet:
        cache = node.idrCache  # type: IdrCache
        uncommitteds[cache._name] = []
        # Since the post batch creation handler is registered (added to a list),
        # find it and patch it
        dh = node.get_req_handler(DOMAIN_LEDGER_ID)
        for i, handler in enumerate(dh.post_batch_creation_handlers):
            # Find the cache's post create handler, not hardcoding names of
            # class or functions as they can change with refactoring.
            if handler.__func__.__qualname__ == '{}.{}'.format(
                    cache.__class__.__name__,
                    cache.currentBatchCreated.__name__):
                cre = dh.post_batch_creation_handlers[i]
                break
        com = cache.onBatchCommitted
        methods[cache._name] = (cre, com)

        # Patch methods to record and check roots after commit

        def patched_cre(self, stateRoot):
            uncommitteds[self._name].append(stateRoot)
            return methods[self._name][0](stateRoot)

        def patched_com(self, stateRoot):
            assert uncommitteds[self._name][0] == stateRoot
            rv = methods[self._name][1](stateRoot)
            uncommitteds[self._name] = uncommitteds[self._name][1:]
            return rv

        dh.post_batch_creation_handlers[i] = types.MethodType(
            patched_cre, cache)
        cache.onBatchCommitted = types.MethodType(patched_com, cache)

    # Set verkey of multiple identities
    more = 5
    keys = {}
    reqs = []
    for _ in range(more):
        seed = randomString(32)
        nym_request, new_did = looper.loop.run_until_complete(
            prepare_nym_request(sdk_wallet_trustee, seed, None, None))
        keys[new_did] = json.loads(nym_request)['operation']['verkey']
        reqs.append(
            sdk_sign_and_send_prepared_request(looper, sdk_wallet_trustee,
                                               sdk_pool_handle, nym_request))
        looper.runFor(.01)

    # Correct number of uncommitted entries
    looper.run(eventually(check_uncommitted, more, retryWait=1))

    sdk_get_and_check_replies(looper, reqs)

    # Number of uncommitted entries is 0
    looper.run(eventually(check_uncommitted, 0))

    # The verkeys are correct
    for i, v in keys.items():
        check_verkey(i, v)

    waitNodeDataEquality(looper, nodeSet[0], *nodeSet[1:])

    for _ in range(3):
        seed = randomString(32)
        nym_request, new_did = looper.loop.run_until_complete(
            prepare_nym_request(sdk_wallet_trustee, seed, None, None))
        reqs.append(
            sdk_sign_and_send_prepared_request(looper, sdk_wallet_trustee,
                                               sdk_pool_handle, nym_request))
        looper.runFor(.01)

    # Correct number of uncommitted entries
    looper.run(eventually(check_uncommitted, 3, retryWait=1))

    # Check batch reject
    for node in nodeSet:
        cache = node.idrCache
        initial = cache.un_committed
        cache.batchRejected()
        # After reject, last entry is removed
        assert cache.un_committed == initial[:-1]
        root = cache.un_committed[0][0]
        cache.onBatchCommitted(root)
        # Calling commit with same root results in Assertion error
        with pytest.raises(AssertionError):
            cache.onBatchCommitted(root)
예제 #35
0
def test_successive_batch_do_no_change_state(looper, tdirWithDomainTxnsUpdated,
                                             tdirWithClientPoolTxns, tconf,
                                             nodeSet, trustee, trusteeWallet,
                                             monkeypatch):
    """
    Send 2 NYM txns in different batches such that the second batch does not
    change state so that state root remains same, but keep the identifier
    and reqId different. Make sure the first request is not ordered by the
    primary before PRE-PREPARE for the second is sent.
    Also check reject and commit
    :return:
    """
    all_reqs = []

    # Disable view change during this test
    for n in nodeSet:
        n.nodeIbStasher.delay(icDelay())

    # Delay only first PRE-PREPARE
    delay_cm_duration = 10

    def delay_commits(wrappedMsg):
        msg, sender = wrappedMsg
        if isinstance(msg, Commit) and msg.instId == 0:
            return delay_cm_duration

    def new_identity():
        wallet = Wallet(randomString(5))
        signer = DidSigner()
        new_idr, _ = wallet.addIdentifier(signer=signer)
        verkey = wallet.getVerkey(new_idr)
        idy = Identity(identifier=new_idr, verkey=verkey, role=None)
        return idy, wallet

    def submit_id_req(idy, wallet=None, client=None):
        nonlocal all_reqs
        wallet = wallet if wallet is not None else trusteeWallet
        client = client if client is not None else trustee
        wallet.updateTrustAnchoredIdentity(idy)
        reqs = wallet.preparePending()
        all_reqs.extend(reqs)
        client.submitReqs(*reqs)
        return reqs

    def submit_id_req_and_wait(idy, wallet=None, client=None):
        reqs = submit_id_req(idy, wallet=wallet, client=client)
        looper.runFor(.2)
        return reqs

    def check_verkey(i, vk):
        for node in nodeSet:
            data = node.idrCache.getNym(i, isCommitted=True)
            assert data[VERKEY] == vk

    def check_uncommitted(count):
        for node in nodeSet:
            assert len(node.idrCache.un_committed) == count

    for node in nodeSet:
        for rpl in node.replicas:
            monkeypatch.setattr(rpl, '_request_missing_three_phase_messages',
                                lambda *x, **y: None)

    idy, new_wallet = new_identity()
    new_idr = idy.identifier
    verkey = idy.verkey

    submit_id_req(idy)
    waitForSufficientRepliesForRequests(looper,
                                        trustee,
                                        requests=all_reqs[-1:],
                                        add_delay_to_timeout=delay_cm_duration)

    for node in nodeSet:
        node.nodeIbStasher.delay(delay_commits)

    new_client, _ = genTestClient(nodeSet,
                                  tmpdir=tdirWithClientPoolTxns,
                                  usePoolLedger=True)
    looper.add(new_client)
    looper.run(new_client.ensureConnectedToNodes(count=len(nodeSet)))
    new_client.registerObserver(new_wallet.handleIncomingReply, name='temp')
    idy.seqNo = None

    # Setting the same verkey thrice but in different batches with different
    #  request ids
    for _ in range(3):
        req, = submit_id_req_and_wait(idy,
                                      wallet=new_wallet,
                                      client=new_client)
        logger.debug('{} sent request {} to change verkey'.format(
            new_client, req))

    waitForSufficientRepliesForRequests(looper,
                                        new_client,
                                        requests=all_reqs[-3:],
                                        add_delay_to_timeout=delay_cm_duration)

    # Number of uncommitted entries is 0
    looper.run(eventually(check_uncommitted, 0))

    check_verkey(new_idr, verkey)

    new_client.deregisterObserver(name='temp')

    # Setting the verkey to `x`, then `y` and then back to `x` but in different
    # batches with different request ids. The idea is to change
    # state root to `t` then `t'` and then back to `t` and observe that no
    # errors are encountered

    idy, new_wallet = new_identity()
    submit_id_req(idy)
    waitForSufficientRepliesForRequests(looper,
                                        trustee,
                                        requests=all_reqs[-1:],
                                        add_delay_to_timeout=delay_cm_duration)

    new_client.registerObserver(new_wallet.handleIncomingReply)

    idy.seqNo = None
    x_signer = SimpleSigner(identifier=idy.identifier)
    idy.verkey = x_signer.verkey
    req, = submit_id_req_and_wait(idy, wallet=new_wallet, client=new_client)
    new_wallet.updateSigner(idy.identifier, x_signer)
    logger.debug('{} sent request {} to change verkey'.format(new_client, req))

    y_signer = SimpleSigner(identifier=idy.identifier)
    idy.verkey = y_signer.verkey
    req, = submit_id_req_and_wait(idy, wallet=new_wallet, client=new_client)
    new_wallet.updateSigner(idy.identifier, y_signer)
    logger.debug('{} sent request {} to change verkey'.format(new_client, req))

    idy.verkey = x_signer.verkey
    req, = submit_id_req_and_wait(idy, wallet=new_wallet, client=new_client)
    new_wallet.updateSigner(idy.identifier, x_signer)
    logger.debug('{} sent request {} to change verkey'.format(new_client, req))

    waitForSufficientRepliesForRequests(looper,
                                        new_client,
                                        requests=all_reqs[-3:],
                                        add_delay_to_timeout=delay_cm_duration)

    # Number of uncommitted entries is 0
    looper.run(eventually(check_uncommitted, 0))

    check_verkey(new_idr, verkey)
    monkeypatch.undo()

    # Delay COMMITs so that IdrCache can be checked for correct
    # number of entries

    uncommitteds = {}
    methods = {}
    for node in nodeSet:
        cache = node.idrCache  # type: IdrCache
        uncommitteds[cache._name] = []
        # Since the post batch creation handler is registered (added to a list),
        # find it and patch it
        dh = node.get_req_handler(DOMAIN_LEDGER_ID)
        for i, handler in enumerate(dh.post_batch_creation_handlers):
            # Find the cache's post create handler, not hardcoding names of
            # class or functions as they can change with refactoring.
            if handler.__func__.__qualname__ == '{}.{}'.format(
                    cache.__class__.__name__,
                    cache.currentBatchCreated.__name__):
                cre = dh.post_batch_creation_handlers[i]
                break
        com = cache.onBatchCommitted
        methods[cache._name] = (cre, com)

        # Patch methods to record and check roots after commit

        def patched_cre(self, stateRoot):
            uncommitteds[self._name].append(stateRoot)
            return methods[self._name][0](stateRoot)

        def patched_com(self, stateRoot):
            assert uncommitteds[self._name][0] == stateRoot
            rv = methods[self._name][1](stateRoot)
            uncommitteds[self._name] = uncommitteds[self._name][1:]
            return rv

        dh.post_batch_creation_handlers[i] = types.MethodType(
            patched_cre, cache)
        cache.onBatchCommitted = types.MethodType(patched_com, cache)

    # Set verkey of multiple identities
    more = 5
    keys = {}
    for _ in range(more):
        idy, _ = new_identity()
        keys[idy.identifier] = idy.verkey
        submit_id_req(idy)
        looper.runFor(.01)

    # Correct number of uncommitted entries
    looper.run(eventually(check_uncommitted, more, retryWait=1))

    waitForSufficientRepliesForRequests(looper,
                                        trustee,
                                        requests=all_reqs[-more:],
                                        add_delay_to_timeout=delay_cm_duration)

    # Number of uncommitted entries is 0
    looper.run(eventually(check_uncommitted, 0))

    # The verkeys are correct
    for i, v in keys.items():
        check_verkey(i, v)

    waitNodeDataEquality(looper, nodeSet[0], *nodeSet[1:])

    keys = {}
    for _ in range(3):
        idy, _ = new_identity()
        keys[idy.identifier] = idy.verkey
        submit_id_req(idy)
        looper.runFor(.01)

    # Correct number of uncommitted entries
    looper.run(eventually(check_uncommitted, 3, retryWait=1))

    # Check batch reject
    for node in nodeSet:
        cache = node.idrCache
        initial = cache.un_committed
        cache.batchRejected()
        # After reject, last entry is removed
        assert cache.un_committed == initial[:-1]
        root = cache.un_committed[0][0]
        cache.onBatchCommitted(root)
        # Calling commit with same root results in Assertion error
        with pytest.raises(AssertionError):
            cache.onBatchCommitted(root)
def test_catchup_to_next_view_during_view_change_0_to_1_then_1_to_2(
        txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward):
    '''
    1) Lagging node is not a primary for new views
    2) All nodes except the lagging one go to view=1
    3) All nodes except the lagging one order txns on view=1
    4) All nodes except the lagging one go to view=2
    5) All nodes except the lagging one order txns on view=2
    6) Lagging node gets InstanceChanges for view=1 => it changes to view=1, and catches up till txns from view=2
    7) Lagging node gets InstanceChanges for view=2 => it changes to view=2
    8) Make sure that the lagging node is up to date, and canc participate in consensus
    '''
    lagging_node = txnPoolNodeSet[0]
    other_nodes = txnPoolNodeSet[1:]
    initial_view_no = checkViewNoForNodes(txnPoolNodeSet)
    initial_last_ordered = lagging_node.master_last_ordered_3PC

    with delay_rules(lagging_node.nodeIbStasher, icDelay(viewNo=2),
                     vc_delay(view_no=2)):
        with delay_rules(lagging_node.nodeIbStasher, delay_for_view(viewNo=0),
                         delay_for_view(viewNo=1)):
            # view change to viewNo=1
            for n in txnPoolNodeSet:
                n.view_changer.on_master_degradation()
            waitForViewChange(looper,
                              other_nodes,
                              expectedViewNo=initial_view_no + 1)
            checkProtocolInstanceSetup(looper=looper,
                                       nodes=other_nodes,
                                       instances=range(3))
            ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

            # order some txns
            sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                      sdk_wallet_steward, 5)

            # view change to viewNo=2
            for n in txnPoolNodeSet:
                n.view_changer.on_master_degradation()
            waitForViewChange(looper,
                              other_nodes,
                              expectedViewNo=initial_view_no + 2)
            checkProtocolInstanceSetup(looper=looper,
                                       nodes=other_nodes,
                                       instances=range(3))
            ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

            # order some txns
            sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                      sdk_wallet_steward, 5)

            assert initial_view_no == lagging_node.viewNo
            assert initial_last_ordered == lagging_node.master_last_ordered_3PC

        # make sure that the first View Change happened on the lagging node
        waitForViewChange(looper, [lagging_node],
                          expectedViewNo=initial_view_no + 1,
                          customTimeout=20)
        assert initial_view_no + 1 == lagging_node.viewNo

    # make sure that the second View Change happened on the lagging node
    waitForViewChange(looper, [lagging_node],
                      expectedViewNo=initial_view_no + 2,
                      customTimeout=20)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

    # make sure that the pool is functional
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward,
                               sdk_pool_handle)
def test_node_detecting_lag_from_view_change_done_messages(
        txnPoolNodeSet, looper, wallet1, client1, client1Connected, tconf):
    """
    A node is slow and after view change starts, it marks it's `last_prepared`
    to less than others, after catchup it does not get any txns from others
    and finds it has already ordered it's `last_prepared`, but when
    it gets ViewChangeDone messages, it starts catchup again and this
    time gets the txns. To achieve this delay all 3PC messages to a node so
    before view change it has different last_prepared from others.
    Also delay processing of COMMITs and INSTANCE_CHANGEs by other nodes
    """
    send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 2 * 3, 3)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    fast_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    slow_master_replica = slow_node.master_replica
    fast_master_replicas = [n.master_replica for n in fast_nodes]

    delay_3pc = 50
    delay_ic = tconf.PerfCheckFreq + 5
    delay_commit = delay_ic + 10
    delay_3pc_messages([slow_node], 0, delay_3pc)
    for n in fast_nodes:
        n.nodeIbStasher.delay(icDelay(delay_ic))
        n.nodeIbStasher.delay(cDelay(delay_commit))

    reqs = []
    for i in range(10):
        reqs = reqs + sendRandomRequests(wallet1, client1, 2)
        looper.runFor(.2)

    def chk1():
        for rep in fast_master_replicas:
            assert compare_3PC_keys(
                slow_master_replica.last_prepared_certificate_in_view(),
                rep.last_prepared_certificate_in_view()) > 0
            assert slow_master_replica.last_ordered_3pc == rep.last_ordered_3pc

    looper.run(eventually(chk1))

    no_more_catchup_count = get_count(slow_node,
                                      slow_node.no_more_catchups_needed)

    # Track last prepared for master replica of each node
    prepareds = {}
    orig_methods = {}
    for node in txnPoolNodeSet:
        orig_methods[node.name] = node.master_replica.on_view_change_start

        def patched_on_view_change_start(self):
            orig_methods[self.node.name]()
            prepareds[self.node.name] = self.last_prepared_before_view_change

        node.master_replica.on_view_change_start = types.MethodType(
            patched_on_view_change_start, node.master_replica)

    ensure_view_change(looper, txnPoolNodeSet, exclude_from_check=fast_nodes)

    def chk2():
        # last_prepared of slow_node is less than fast_nodes
        for rep in fast_master_replicas:
            assert compare_3PC_keys(prepareds[slow_master_replica.node.name],
                                    prepareds[rep.node.name]) > 0

    looper.run(eventually(chk2, timeout=delay_ic + 5))

    last_start_catchup_call_at = None
    no_more_catchup_call_at = None

    def chk3():
        # no_more_catchups_needed was called since node found no need of catchup
        nonlocal last_start_catchup_call_at, no_more_catchup_call_at
        assert (get_count(slow_node, slow_node.no_more_catchups_needed) -
                no_more_catchup_count) > 0

        no_more_catchup_call_at = slow_node.spylog.getLast(
            slow_node.no_more_catchups_needed).starttime
        last_start_catchup_call_at = slow_node.spylog.getLast(
            slow_node.start_catchup).starttime

    looper.run(eventually(chk3, timeout=delay_commit))

    for n in fast_nodes:
        n.nodeIbStasher.reset_delays_and_process_delayeds()
        n.nodeIbStasher.reset_delays_and_process_delayeds()

    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    assert slow_node.spylog.getLast(
        slow_node.start_catchup).starttime > no_more_catchup_call_at
    assert slow_node.spylog.getLast(
        slow_node.start_catchup).starttime > last_start_catchup_call_at
예제 #38
0
def testQueueingReqFromFutureView(delayedPerf, looper, nodeSet, up, wallet1,
                                  client1):
    """
    Test if every node queues 3 Phase requests(PRE-PREPARE, PREPARE and COMMIT)
    that come from a view which is greater than the current view
    """

    f = getMaxFailures(nodeCount)

    # Delay processing of instance change on a node
    nodeA = nodeSet.Alpha
    nodeA.nodeIbStasher.delay(icDelay(60))

    nonPrimReps = getNonPrimaryReplicas(nodeSet, 0)
    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's throughput falls and view changes
    ppDelayer = ppDelay(5, 0)
    for r in nonPrimReps:
        r.node.nodeIbStasher.delay(ppDelayer)

    sendReqsToNodesAndVerifySuffReplies(looper,
                                        wallet1,
                                        client1,
                                        4,
                                        timeoutPerReq=5 * nodeCount)

    # Every node except Node A should have a view change
    for node in nodeSet:
        if node.name != nodeA.name:
            looper.run(
                eventually(partial(checkViewChangeInitiatedForNode, node, 1),
                           retryWait=1,
                           timeout=20))

    # Node A's view should not have changed yet
    with pytest.raises(AssertionError):
        looper.run(
            eventually(partial(checkViewChangeInitiatedForNode, nodeA, 1),
                       retryWait=1,
                       timeout=20))

    # NodeA should not have any pending 3 phase request for a later view
    for r in nodeA.replicas:  # type: TestReplica
        assert len(r.threePhaseMsgsForLaterView) == 0

    # Reset delays on incoming messages from all nodes
    for node in nodeSet:
        node.nodeIbStasher.nodelay(ppDelayer)

    # Send one more request
    sendRandomRequest(wallet1, client1)

    def checkPending3PhaseReqs():
        # Get all replicas that have their primary status decided
        reps = [rep for rep in nodeA.replicas if rep.isPrimary is not None]
        # Atleast one replica should have its primary status decided
        assert len(reps) > 0
        for r in reps:  # type: TestReplica
            logger.debug("primary status for replica {} is {}".format(
                r, r.primaryNames))
            assert len(r.threePhaseMsgsForLaterView) > 0

    # NodeA should now have pending 3 phase request for a later view
    looper.run(eventually(checkPending3PhaseReqs, retryWait=1, timeout=30))