def test_future_primaries_replicas_increase(looper, txnPoolNodeSet, sdk_pool_handle,
                                            sdk_wallet_stewards, tdir, tconf, allPluginsPath):
    # Don't delete NodeStates, so we could check them.
    global old_commit
    old_commit = txnPoolNodeSet[0].write_manager.future_primary_handler.commit_batch
    for node in txnPoolNodeSet:
        node.write_manager.future_primary_handler.commit_batch = lambda three_pc_batch, prev_handler_result=None: 0

    initial_primaries = copy.copy(txnPoolNodeSet[0].primaries)
    last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc
    starting_view_number = checkViewNoForNodes(txnPoolNodeSet)

    # Increase replicas count
    add_new_node(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[0], tdir, tconf, allPluginsPath)

    new_view_no = checkViewNoForNodes(txnPoolNodeSet)
    assert new_view_no == starting_view_number + 1
    # "seq_no + 2" because 1 domain and 1 pool txn.

    node = txnPoolNodeSet[0]
    with delay_rules(node.nodeIbStasher, cDelay()):
        req = sdk_send_random_and_check(looper, txnPoolNodeSet,
                                        sdk_pool_handle,
                                        sdk_wallet_stewards[0], 1)[0][0]
        req = Request(**req)
        three_pc_batch = ThreePcBatch(DOMAIN_LEDGER_ID, 0, 0, 1, time.time(),
                                      randomString(),
                                      randomString(),
                                      ['a', 'b', 'c'], [req.digest], pp_digest='')
        primaries = node.write_manager.future_primary_handler.post_batch_applied(three_pc_batch)
        assert len(primaries) == len(initial_primaries) + 1
        assert len(primaries) == len(node.primaries)
def test_no_view_change_until_synced(txnPoolNodeSet, looper, mode):
    # emulate catchup by setting non-synced status
    for node in txnPoolNodeSet:
        node.mode = mode

    check_instance_change_count(txnPoolNodeSet, 0)

    # start View Change
    old_view_no = checkViewNoForNodes(txnPoolNodeSet)
    old_meths = do_view_change(txnPoolNodeSet)
    for node in txnPoolNodeSet:
        node.view_changer.sendInstanceChange(old_view_no + 1)

    # make sure View Change is not started
    check_no_view_change(looper, txnPoolNodeSet)
    assert old_view_no == checkViewNoForNodes(txnPoolNodeSet)

    # emulate finishing of catchup by setting Participating status
    revert_do_view_change(txnPoolNodeSet, old_meths)
    for node in txnPoolNodeSet:
        node.mode = Mode.participating

    # make sure that View Change happened
    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=old_view_no + 1)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
Example #3
0
def test_no_view_change_until_synced(txnPoolNodeSet, looper, mode):
    # emulate catchup by setting non-synced status
    for node in txnPoolNodeSet:
        node.mode = mode

    check_instance_change_count(txnPoolNodeSet, 0)

    # start View Change
    old_view_no = checkViewNoForNodes(txnPoolNodeSet)
    old_meths = do_view_change(txnPoolNodeSet)
    for node in txnPoolNodeSet:
        node.view_changer.sendInstanceChange(old_view_no + 1)

    # make sure View Change is not started
    check_no_view_change(looper, txnPoolNodeSet)
    assert old_view_no == checkViewNoForNodes(txnPoolNodeSet)

    # emulate finishing of catchup by setting Participating status
    revert_do_view_change(txnPoolNodeSet, old_meths)
    for node in txnPoolNodeSet:
        node.mode = Mode.participating

    # make sure that View Change happened
    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=old_view_no + 1)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
def test_future_primaries_replicas_decrease(looper, txnPoolNodeSet,
                                            sdk_pool_handle,
                                            sdk_wallet_stewards, tdir, tconf,
                                            allPluginsPath):
    assert len(txnPoolNodeSet) == 7

    initial_primaries = copy.copy(txnPoolNodeSet[0].primaries)
    last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc
    starting_view_number = checkViewNoForNodes(txnPoolNodeSet)

    # Decrease replicas count
    demote_node(looper, sdk_wallet_stewards[-1], sdk_pool_handle,
                txnPoolNodeSet[-2])
    txnPoolNodeSet.remove(txnPoolNodeSet[-2])
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    new_view_no = checkViewNoForNodes(txnPoolNodeSet)
    assert new_view_no == starting_view_number + 1
    state = txnPoolNodeSet[0].write_manager.future_primary_handler.node_states[
        -1]
    assert len(state.primaries) + 1 == len(initial_primaries)
    assert len(state.primaries) == len(txnPoolNodeSet[0].primaries)

    for node in txnPoolNodeSet:
        node.write_manager.future_primary_handler.commit_batch = old_commit
def test_future_primaries_replicas_increase(looper, txnPoolNodeSet,
                                            sdk_pool_handle,
                                            sdk_wallet_stewards, tdir, tconf,
                                            allPluginsPath):
    # Don't delete NodeStates, so we could check them.
    global old_commit
    old_commit = txnPoolNodeSet[
        0].write_manager.future_primary_handler.commit_batch
    for node in txnPoolNodeSet:
        node.write_manager.future_primary_handler.commit_batch = lambda three_pc_batch, prev_handler_result=None: 0

    initial_primaries = copy.copy(txnPoolNodeSet[0].primaries)
    last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc
    starting_view_number = checkViewNoForNodes(txnPoolNodeSet)

    # Increase replicas count
    add_new_node(looper, txnPoolNodeSet, sdk_pool_handle,
                 sdk_wallet_stewards[0], tdir, tconf, allPluginsPath)

    new_view_no = checkViewNoForNodes(txnPoolNodeSet)
    assert new_view_no == starting_view_number + 1
    # "seq_no + 2" because 1 domain and 1 pool txn.
    state = txnPoolNodeSet[0].write_manager.future_primary_handler.node_states[
        -1]
    assert len(state.primaries) == len(initial_primaries) + 1
    assert len(state.primaries) == len(txnPoolNodeSet[0].primaries)
Example #6
0
def test_lag_less_then_catchup(looper, txnPoolNodeSet, sdk_pool_handle,
                               sdk_wallet_client):
    delayed_node = txnPoolNodeSet[-1]
    other_nodes = list(set(txnPoolNodeSet) - {delayed_node})
    checkViewNoForNodes(txnPoolNodeSet)
    last_ordered_before = delayed_node.master_replica.last_ordered_3pc
    with delay_rules_without_processing(delayed_node.nodeIbStasher, cDelay()):
        # Send txns for stable checkpoint
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, CHK_FREQ)
        # Check, that all of not slowed nodes has a stable checkpoint
        for n in other_nodes:
            assert n.master_replica._consensus_data.stable_checkpoint == CHK_FREQ

        # Send another txn. This txn will be reordered after view_change
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, 1)
        trigger_view_change(txnPoolNodeSet)
        ensureElectionsDone(looper, txnPoolNodeSet)

        assert delayed_node.master_replica.last_ordered_3pc == last_ordered_before

    # Send txns for stabilize checkpoint on other nodes
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, CHK_FREQ - 1)

    pool_pp_seq_no = get_pp_seq_no(other_nodes)
    looper.run(
        eventually(lambda: assertExp(delayed_node.master_replica.
                                     last_ordered_3pc[1] == pool_pp_seq_no)))
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_new_primary_lagging_behind(looper, txnPoolNodeSet, sdk_wallet_client,
                                    sdk_pool_handle, tconf):
    initial_view_no = checkViewNoForNodes(txnPoolNodeSet)
    next_primary_name = get_next_primary_name(txnPoolNodeSet,
                                              initial_view_no + 1)
    next_primary = [n for n in txnPoolNodeSet
                    if n.name == next_primary_name][0]
    other_nodes = [n for n in txnPoolNodeSet if n != next_primary]
    expected_primary_name = get_next_primary_name(txnPoolNodeSet,
                                                  initial_view_no + 2)
    # Next primary cannot stabilize 1 checkpoint
    with delay_rules(next_primary.nodeIbStasher, cDelay(), pDelay()):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, CHK_FREQ)
        ensure_view_change(looper, txnPoolNodeSet)
        looper.run(
            eventually(check_not_in_view_change,
                       txnPoolNodeSet,
                       timeout=2 * tconf.NEW_VIEW_TIMEOUT))
        ensureElectionsDone(looper=looper,
                            nodes=other_nodes,
                            customTimeout=2 * tconf.NEW_VIEW_TIMEOUT,
                            instances_list=[0, 1])

    assert next_primary_name != expected_primary_name
    assert checkViewNoForNodes(txnPoolNodeSet) == initial_view_no + 2

    # send CHK_FREQ reqs so that slow node will start catch-up
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, CHK_FREQ)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=30)
def test_future_primaries_replicas_decrease(looper, txnPoolNodeSet, sdk_pool_handle,
                                            sdk_wallet_stewards, tdir, tconf, allPluginsPath):
    assert len(txnPoolNodeSet) == 7

    initial_primaries = copy.copy(txnPoolNodeSet[0].primaries)
    last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc
    starting_view_number = checkViewNoForNodes(txnPoolNodeSet)

    # Decrease replicas count
    demote_node(looper, sdk_wallet_stewards[-1], sdk_pool_handle, txnPoolNodeSet[-2])
    txnPoolNodeSet.remove(txnPoolNodeSet[-2])
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    new_view_no = checkViewNoForNodes(txnPoolNodeSet)
    assert new_view_no == starting_view_number + 1
    node = txnPoolNodeSet[0]
    with delay_rules(node.nodeIbStasher, cDelay()):
        req = sdk_send_random_and_check(looper, txnPoolNodeSet,
                                        sdk_pool_handle,
                                        sdk_wallet_stewards[0], 1)[0][0]
        req = Request(**req)
        three_pc_batch = ThreePcBatch(DOMAIN_LEDGER_ID, 0, 0, 1, time.time(),
                                      randomString(),
                                      randomString(),
                                      ['a', 'b', 'c'], [req.digest], pp_digest='')
        primaries = node.write_manager.future_primary_handler.post_batch_applied(three_pc_batch)
        assert len(primaries) + 1 == len(initial_primaries)
        assert len(primaries) == len(txnPoolNodeSet[0].primaries)

    for node in txnPoolNodeSet:
        node.write_manager.future_primary_handler.commit_batch = old_commit
def test_no_propagated_future_view_change_until_synced(txnPoolNodeSet, looper,
                                                       mode):
    # the last node is a lagging one, which will receive ViewChangeDone messages for future view
    viewNo = checkViewNoForNodes(txnPoolNodeSet)
    lagged_node_index = (viewNo + 3) % len(txnPoolNodeSet)
    lagged_node = txnPoolNodeSet[lagged_node_index]
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    # emulate catchup by setting non-synced status
    lagged_node.mode = mode
    old_view_no = checkViewNoForNodes([lagged_node])

    check_future_vcd_count(lagged_node, 0)

    # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change
    with delay_rules(lagged_node.nodeIbStasher, icDelay()):
        # make sure that View Change happened on all nodes but the lagging one
        ensure_view_change(looper, other_nodes)
        checkProtocolInstanceSetup(looper=looper,
                                   nodes=other_nodes,
                                   numInstances=2)
        ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

        check_no_view_change(looper, lagged_node)
        assert old_view_no == checkViewNoForNodes([lagged_node])

        # emulate finishing of catchup by setting Participating status
        lagged_node.mode = Mode.participating

        # make sure that View Change happened on lagging node
        waitForViewChange(looper, [lagged_node],
                          expectedViewNo=old_view_no + 1,
                          customTimeout=10)
        ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
def test_no_view_change_until_synced(txnPoolNodeSet, looper, mode):
    # emulate catchup by setting non-synced status
    for node in txnPoolNodeSet:
        node.mode = mode

    check_stashed_instance_changes(txnPoolNodeSet, 0)

    # start View Change
    old_view_no = checkViewNoForNodes(txnPoolNodeSet)
    old_meths = do_view_change(txnPoolNodeSet)
    for node in txnPoolNodeSet:
        vct_service = node.master_replica._view_change_trigger_service
        vct_service._send_instance_change(old_view_no + 1, Suspicions.PRIMARY_DEGRADED)

    # make sure View Change is not started
    check_no_view_change(looper, txnPoolNodeSet)
    assert old_view_no == checkViewNoForNodes(txnPoolNodeSet)

    # emulate finishing of catchup by setting Participating status
    revert_do_view_change(txnPoolNodeSet, old_meths)
    for node in txnPoolNodeSet:
        node.mode = Mode.participating
        node.master_replica.stasher.process_all_stashed(STASH_CATCH_UP)

    # make sure that View Change happened
    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=old_view_no + 1)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
Example #11
0
def test_propagate_primary_after_primary_restart_view_0(
        looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward,
        tdir, allPluginsPath):
    """
    Delay instance change msgs to prevent view change during primary restart
    to test propagate primary for primary node.
    ppSeqNo should be > 0 to be able to check that propagate primary restores all
    indexes correctly
    case viewNo == 0
    """
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward,
                               sdk_pool_handle)

    old_ppseqno = _get_ppseqno(txnPoolNodeSet)
    assert (old_ppseqno > 0)

    old_viewNo = checkViewNoForNodes(txnPoolNodeSet)
    old_primary = get_master_primary_node(txnPoolNodeSet)

    delay_instance_change(txnPoolNodeSet, IC_DELAY_SEC)

    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            old_primary,
                                            stopNode=True)

    looper.removeProdable(old_primary)

    logger.info("Restart node {}".format(old_primary))

    restartedNode = start_stopped_node(old_primary,
                                       looper,
                                       tconf,
                                       tdir,
                                       allPluginsPath,
                                       delay_instance_change_msgs=False)
    idx = [
        i for i, n in enumerate(txnPoolNodeSet) if n.name == restartedNode.name
    ][0]
    txnPoolNodeSet[idx] = restartedNode

    restartedNode.nodeIbStasher.delay(icDelay(IC_DELAY_SEC))

    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    new_viewNo = checkViewNoForNodes(txnPoolNodeSet)
    assert (new_viewNo == old_viewNo)

    new_primary = get_master_primary_node(txnPoolNodeSet)
    assert (new_primary.name == old_primary.name)

    # check ppSeqNo the same
    _get_ppseqno(txnPoolNodeSet)

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward,
                               sdk_pool_handle)

    new_ppseqno = _get_ppseqno(txnPoolNodeSet)
    assert (new_ppseqno > old_ppseqno)
Example #12
0
def test_last_ordered_3pc_reset_if_more_than_new_view(txnPoolNodeSet, looper,
                                                      sdk_pool_handle,
                                                      sdk_wallet_client):
    """
    Check that if last_ordered_3pc's viewNo on a Replica is greater than the new viewNo after view change,
    then last_ordered_3pc is reset to (0,0).
    It can be that last_ordered_3pc was set for the previous view, since it's set during catch-up

    Example: a Node has last_ordered = (1, 300), and then the whole pool except this node restarted.
    The new viewNo is 0, but last_ordered is (1, 300), so all new requests will be discarded by this Node
    if we don't reset last_ordered_3pc
    """
    old_view_no = checkViewNoForNodes(txnPoolNodeSet)
    for node in txnPoolNodeSet:
        node.master_replica.last_ordered_3pc = (old_view_no + 2, 100)

    ensure_view_change_complete(looper, txnPoolNodeSet, customTimeout=60)
    view_no = checkViewNoForNodes(txnPoolNodeSet)

    for node in txnPoolNodeSet:
        assert (view_no, 0) == node.master_replica.last_ordered_3pc

    # Make sure the pool is working
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
Example #13
0
def test_no_propagated_future_view_change_while_view_change(
        txnPoolNodeSet, looper):
    # the last node is a lagging one, which will receive ViewChangeDone messages for future view
    viewNo = checkViewNoForNodes(txnPoolNodeSet)
    lagged_node = txnPoolNodeSet[-1]
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    # emulate view change in progress
    lagged_node.view_changer.view_change_in_progress = True
    old_view_no = checkViewNoForNodes([lagged_node])

    initial_vhdc = \
        lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__)

    # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change
    with delay_rules(lagged_node.nodeIbStasher, icDelay()):
        # make sure that View Change happened on all nodes but the lagging one
        ensure_view_change(looper, other_nodes)
        checkProtocolInstanceSetup(looper=looper,
                                   nodes=other_nodes,
                                   numInstances=2)
        ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

        # check that lagged node recived 3 Future VCD, but didn't start new view change
        assert len(other_nodes) + initial_vhdc ==\
               lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__)
        assert old_view_no == checkViewNoForNodes([lagged_node])
def test_demote_promote_restart_after_promotion_7_nodes(txnPoolNodeSet,
                                                        looper,
                                                        sdk_pool_handle,
                                                        sdk_wallet_steward,
                                                        tdir,
                                                        tconf,
                                                        allPluginsPath):
    demoted_node = txnPoolNodeSet[-1]
    rest_nodes = [n for n in txnPoolNodeSet if n != demoted_node]

    starting_view_no = checkViewNoForNodes(txnPoolNodeSet)

    demote_node(looper, sdk_wallet_steward, sdk_pool_handle, demoted_node)

    waitForViewChange(looper, rest_nodes, expectedViewNo=starting_view_no + 1)
    ensureElectionsDone(looper, rest_nodes)
    ensure_all_nodes_have_same_data(looper, rest_nodes)

    sdk_send_random_and_check(looper, rest_nodes, sdk_pool_handle, sdk_wallet_steward, 5)

    starting_view_no = checkViewNoForNodes(rest_nodes)
    promote_node(looper, sdk_wallet_steward, sdk_pool_handle, demoted_node)

    waitForViewChange(looper, rest_nodes, expectedViewNo=starting_view_no + 1)
    ensureElectionsDone(looper, rest_nodes, instances_list=[0, 1, 2])
    ensure_all_nodes_have_same_data(looper, rest_nodes)

    restart_node(looper, txnPoolNodeSet, demoted_node, tconf, tdir, allPluginsPath)
    ensureElectionsDone(looper, txnPoolNodeSet)

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_no_propagated_future_view_change_until_synced(txnPoolNodeSet, looper, mode):
    # the last node is a lagging one, which will receive ViewChangeDone messages for future view
    viewNo = checkViewNoForNodes(txnPoolNodeSet)
    lagged_node_index = (viewNo + 3) % len(txnPoolNodeSet)
    lagged_node = txnPoolNodeSet[lagged_node_index]
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    # emulate catchup by setting non-synced status
    lagged_node.mode = mode
    old_view_no = checkViewNoForNodes([lagged_node])

    check_future_vcd_count(lagged_node, 0)

    # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change
    with delay_rules(lagged_node.nodeIbStasher, icDelay()):
        # make sure that View Change happened on all nodes but the lagging one
        ensure_view_change(looper, other_nodes)
        checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(2))
        ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

        check_no_view_change(looper, lagged_node)
        assert old_view_no == checkViewNoForNodes([lagged_node])

        # emulate finishing of catchup by setting Participating status
        lagged_node.mode = Mode.participating

        # make sure that View Change happened on lagging node
        waitForViewChange(looper, [lagged_node], expectedViewNo=old_view_no + 1,
                          customTimeout=10)
        ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
Example #16
0
def test_pool_reaches_quorum_after_f_plus_2_nodes_turned_off_and_later_on(
        looper, allPluginsPath, tdir, tconf, txnPoolNodeSet, wallet1, client1,
        client1Connected):

    nodes = txnPoolNodeSet
    initial_view_no = nodes[0].viewNo

    request = sendRandomRequest(wallet1, client1)
    waitForSufficientRepliesForRequests(looper, client1, requests=[request])

    stop_node(nodes[0], looper, nodes)
    waitForViewChange(looper, nodes[1:], expectedViewNo=initial_view_no + 1)
    ensureElectionsDone(looper,
                        nodes[1:],
                        numInstances=getRequiredInstances(nodeCount))

    request = sendRandomRequest(wallet1, client1)
    waitForSufficientRepliesForRequests(looper, client1, requests=[request])

    stop_node(nodes[1], looper, nodes)
    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)
    checkViewNoForNodes(nodes[2:], initial_view_no + 1)

    request = sendRandomRequest(wallet1, client1)
    verify_request_not_replied_and_not_ordered(request, looper, client1, nodes)

    stop_node(nodes[2], looper, nodes)
    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)
    checkViewNoForNodes(nodes[3:], initial_view_no + 1)

    request = sendRandomRequest(wallet1, client1)
    verify_request_not_replied_and_not_ordered(request, looper, client1, nodes)

    nodes[2] = start_stopped_node(nodes[2], looper, tconf, tdir,
                                  allPluginsPath)
    looper.runFor(waits.expectedPoolElectionTimeout(len(nodes)))

    request = sendRandomRequest(wallet1, client1)
    verify_request_not_replied_and_not_ordered(request, looper, client1, nodes)

    nodes[1] = start_stopped_node(nodes[1], looper, tconf, tdir,
                                  allPluginsPath)
    ensureElectionsDone(looper,
                        nodes[1:],
                        numInstances=getRequiredInstances(nodeCount))
    waitForViewChange(looper, nodes[1:], expectedViewNo=initial_view_no + 1)

    request = sendRandomRequest(wallet1, client1)
    waitForSufficientRepliesForRequests(looper, client1, requests=[request])

    nodes[0] = start_stopped_node(nodes[0], looper, tconf, tdir,
                                  allPluginsPath)
    ensureElectionsDone(looper,
                        nodes,
                        numInstances=getRequiredInstances(nodeCount))
    waitForViewChange(looper, nodes, expectedViewNo=initial_view_no + 1)

    request = sendRandomRequest(wallet1, client1)
    waitForSufficientRepliesForRequests(looper, client1, requests=[request])
Example #17
0
def test_primary_selection_after_primary_demotion_and_view_changes(
        looper, txnPoolNodeSet, stewardAndWalletForMasterNode,
        txnPoolMasterNodes):
    """
    Demote primary and do multiple view changes forcing primaries rotation.
    Demoted primary should be skipped without additional view changes.
    """

    viewNo0 = checkViewNoForNodes(txnPoolNodeSet)

    logger.info(
        "1. turn off the node which has primary replica for master instanse, "
        " this should trigger view change")
    master_node = txnPoolMasterNodes[0]
    client, wallet = stewardAndWalletForMasterNode
    node_data = {ALIAS: master_node.name, SERVICES: []}
    updateNodeData(looper, client, wallet, master_node, node_data)

    restNodes = [node for node in txnPoolNodeSet \
                    if node.name != master_node.name]
    ensureElectionsDone(looper, restNodes)

    viewNo1 = checkViewNoForNodes(restNodes)

    assert viewNo1 == viewNo0 + 1
    assert master_node.viewNo == viewNo0
    assert len(restNodes[0].replicas) == 1  # only one instance left
    assert restNodes[0].replicas[0].primaryName != master_node.name

    # ensure pool is working properly
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3)

    logger.info("2. force view change 2 and check final viewNo")
    ensure_view_change_complete(looper, restNodes)

    viewNo2 = checkViewNoForNodes(restNodes)
    assert restNodes[0].replicas[0].primaryName != master_node.name
    assert viewNo2 == viewNo1 + 1

    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3)

    logger.info("3. force view change 3 and check final viewNo")
    ensure_view_change_complete(looper, restNodes)
    viewNo3 = checkViewNoForNodes(restNodes)
    assert restNodes[0].replicas[0].primaryName != master_node.name
    assert viewNo3 == viewNo2 + 1

    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3)

    logger.info("4. force view change 4 and check final viewNo")
    ensure_view_change_complete(looper, restNodes)
    viewNo4 = checkViewNoForNodes(restNodes)
    assert restNodes[0].replicas[0].primaryName != master_node.name
    assert viewNo4 == viewNo3 + 1

    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3)
Example #18
0
def test_view_change_triggered(looper, txnPoolNodeSet, sdk_pool_handle,
                               sdk_wallet_client):
    current_view_no = checkViewNoForNodes(txnPoolNodeSet)

    trigger_view_change(txnPoolNodeSet)

    ensureElectionsDone(looper, txnPoolNodeSet)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
    assert checkViewNoForNodes(txnPoolNodeSet) == current_view_no + 1
def test_primary_selection_after_primary_demotion_and_pool_restart(looper,
                                                                   txnPoolNodeSet,
                                                                   sdk_pool_handle,
                                                                   sdk_wallet_steward,
                                                                   txnPoolMasterNodes,
                                                                   tdir, tconf):
    """
    Demote primary and restart the pool.
    Pool should select new primary and have viewNo=0 after restart.
    """

    logger.info("1. turn off the node which has primary replica for master instanse")
    master_node = txnPoolMasterNodes[0]
    node_dest = hexToFriendly(master_node.nodestack.verhex)
    sdk_send_update_node(looper, sdk_wallet_steward,
                         sdk_pool_handle,
                         node_dest, master_node.name,
                         None, None,
                         None, None,
                         services=[])

    restNodes = [node for node in txnPoolNodeSet if node.name != master_node.name]
    ensureElectionsDone(looper, restNodes)

    # ensure pool is working properly


    logger.info("2. restart pool")
    # Stopping existing nodes
    for node in txnPoolNodeSet:
        node.stop()
        looper.removeProdable(node)

    # Starting nodes again by creating `Node` objects since that simulates
    # what happens when starting the node with script
    restartedNodes = []
    for node in txnPoolNodeSet:
        config_helper = PNodeConfigHelper(node.name, tconf, chroot=tdir)
        restartedNode = TestNode(node.name,
                                 config_helper=config_helper,
                                 config=tconf, ha=node.nodestack.ha,
                                 cliha=node.clientstack.ha)
        looper.add(restartedNode)
        restartedNodes.append(restartedNode)

    restNodes = [node for node in restartedNodes if node.name != master_node.name]

    looper.run(checkNodesConnected(restNodes))
    ensureElectionsDone(looper, restNodes)
    checkViewNoForNodes(restNodes, 0)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 3)

    primariesIdxs = getPrimaryNodesIdxs(restNodes)
    assert restNodes[primariesIdxs[0]].name != master_node.name
def test_view_not_changed_when_short_disconnection(txnPoolNodeSet, looper,
                                                   sdk_pool_handle,
                                                   sdk_wallet_client, tdir,
                                                   tconf, allPluginsPath):
    """
    When primary is disconnected but not long enough to trigger the timeout,
    view change should not happen
    """

    pr_node = get_master_primary_node(txnPoolNodeSet)
    view_no = checkViewNoForNodes(txnPoolNodeSet)

    prp_inst_chg_calls = {
        node.name: node.spylog.count(node.propose_view_change.__name__)
        for node in txnPoolNodeSet if node != pr_node
    }

    recv_inst_chg_calls = {
        node.name: node.spylog.count(
            node.view_changer.process_instance_change_msg.__name__)
        for node in txnPoolNodeSet if node != pr_node
    }

    # Disconnect master's primary
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            pr_node,
                                            timeout=2)
    txnPoolNodeSet.remove(pr_node)
    looper.removeProdable(name=pr_node.name)

    timeout = min(tconf.ToleratePrimaryDisconnection - 1, 1)

    # Reconnect master's primary
    pr_node = start_stopped_node(pr_node, looper, tconf, tdir, allPluginsPath)
    txnPoolNodeSet.append(pr_node)

    def chk2():
        # Schedule an instance change but do not send it
        # since primary joins again
        for node in txnPoolNodeSet:
            if node != pr_node:
                assert node.spylog.count(node.propose_view_change.__name__
                                         ) > prp_inst_chg_calls[node.name]
                assert node.view_changer.spylog.count(node.view_changer.process_instance_change_msg.__name__) == \
                       recv_inst_chg_calls[node.name]

    looper.run(eventually(chk2, retryWait=.2, timeout=timeout + 1))

    assert checkViewNoForNodes(txnPoolNodeSet) == view_no

    # Send some requests and make sure the request execute
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)
Example #21
0
def test_quorum_after_f_plus_2_nodes_including_primary_turned_off_and_later_on(
        looper, allPluginsPath, tdir, tconf,
        txnPoolNodeSet, wallet1, client1):
    nodes = txnPoolNodeSet

    request1 = sendRandomRequest(wallet1, client1)
    waitForSufficientRepliesForRequests(looper, client1, requests=[request1])

    stop_node(nodes[0], looper, nodes)
    waitForViewChange(looper, nodes[1:], expectedViewNo=1)
    ensureElectionsDone(looper, nodes[1:],
                        numInstances=getRequiredInstances(nodeCount))

    request2 = sendRandomRequest(wallet1, client1)
    waitForSufficientRepliesForRequests(looper, client1, requests=[request2])

    stop_node(nodes[1], looper, nodes)
    looper.runFor(tconf.ToleratePrimaryDisconnection +
                  waits.expectedPoolElectionTimeout(len(nodes)))
    checkViewNoForNodes(nodes[2:], expectedViewNo=1)

    request3 = sendRandomRequest(wallet1, client1)
    verify_request_not_replied_and_not_ordered(request3, looper, client1, nodes)

    stop_node(nodes[2], looper, nodes)
    looper.runFor(tconf.ToleratePrimaryDisconnection +
                  waits.expectedPoolElectionTimeout(len(nodes)))
    checkViewNoForNodes(nodes[3:], expectedViewNo=1)

    request4 = sendRandomRequest(wallet1, client1)
    verify_request_not_replied_and_not_ordered(request4, looper, client1, nodes)

    nodes[2] = start_stopped_node(nodes[2], looper, tconf, tdir, allPluginsPath)
    looper.runFor(waits.expectedPoolElectionTimeout(len(nodes)))
    checkViewNoForNodes(nodes[3:], expectedViewNo=1)

    request5 = sendRandomRequest(wallet1, client1)
    verify_request_not_replied_and_not_ordered(request5, looper, client1, nodes)

    nodes[1] = start_stopped_node(nodes[1], looper, tconf, tdir, allPluginsPath)
    ensureElectionsDone(looper, nodes[1:],
                        numInstances=getRequiredInstances(nodeCount))
    checkViewNoForNodes(nodes[1:], expectedViewNo=1)

    request6 = sendRandomRequest(wallet1, client1)
    waitForSufficientRepliesForRequests(looper, client1, requests=[request6])

    nodes[0] = start_stopped_node(nodes[0], looper, tconf, tdir, allPluginsPath)
    ensureElectionsDone(looper, nodes,
                        numInstances=getRequiredInstances(nodeCount))
    checkViewNoForNodes(nodes, expectedViewNo=1)

    request7 = sendRandomRequest(wallet1, client1)
    waitForSufficientRepliesForRequests(looper, client1, requests=[request7])
def test_primary_selection_after_primary_demotion_and_pool_restart(
        looper, txnPoolNodeSet, stewardAndWalletForMasterNode,
        txnPoolMasterNodes, tdir, tconf):
    """
    Demote primary and restart the pool.
    Pool should select new primary and have viewNo=0 after restart.
    """

    logger.info(
        "1. turn off the node which has primary replica for master instanse")
    master_node = txnPoolMasterNodes[0]
    client, wallet = stewardAndWalletForMasterNode

    node_data = {ALIAS: master_node.name, SERVICES: []}
    updateNodeData(looper, client, wallet, master_node, node_data)

    restNodes = [
        node for node in txnPoolNodeSet if node.name != master_node.name
    ]
    ensureElectionsDone(looper, restNodes)

    # ensure pool is working properly
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3)

    logger.info("2. restart pool")
    # Stopping existing nodes
    for node in txnPoolNodeSet:
        node.stop()
        looper.removeProdable(node)

    # Starting nodes again by creating `Node` objects since that simulates
    # what happens when starting the node with script
    restartedNodes = []
    for node in txnPoolNodeSet:
        config_helper = PNodeConfigHelper(node.name, tconf, chroot=tdir)
        restartedNode = TestNode(node.name,
                                 config_helper=config_helper,
                                 config=tconf,
                                 ha=node.nodestack.ha,
                                 cliha=node.clientstack.ha)
        looper.add(restartedNode)
        restartedNodes.append(restartedNode)

    restNodes = [
        node for node in restartedNodes if node.name != master_node.name
    ]

    looper.run(checkNodesConnected(restNodes))
    ensureElectionsDone(looper, restNodes)
    checkViewNoForNodes(restNodes, 0)
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3)

    primariesIdxs = getPrimaryNodesIdxs(restNodes)
    assert restNodes[primariesIdxs[0]].name != master_node.name
def test_propagate_primary_after_primary_restart_view_1(
        looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath):
    """
    Delay instance change msgs to prevent view change during primary restart
    to test propagate primary for primary node.
    ppSeqNo should be > 0 to be able to check that propagate primary restores all
    indices correctly
    case viewNo > 0
    """

    ensure_view_change(looper, txnPoolNodeSet)
    checkViewNoForNodes(txnPoolNodeSet, expectedViewNo=1)

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)

    old_ppseqno = _get_ppseqno(txnPoolNodeSet)
    assert (old_ppseqno > 0)

    old_viewNo = checkViewNoForNodes(txnPoolNodeSet)
    old_primary = get_master_primary_node(txnPoolNodeSet)

    delay_instance_change(txnPoolNodeSet, IC_DELAY_SEC)

    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_primary, stopNode=True)

    looper.removeProdable(old_primary)

    logger.info("Restart node {}".format(old_primary))

    restartedNode = start_stopped_node(old_primary, looper, tconf, tdir, allPluginsPath,
                                       delay_instance_change_msgs=False)
    idx = [i for i, n in enumerate(txnPoolNodeSet) if n.name == restartedNode.name][0]
    txnPoolNodeSet[idx] = restartedNode

    restartedNode.nodeIbStasher.delay(icDelay(IC_DELAY_SEC))

    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    new_viewNo = checkViewNoForNodes(txnPoolNodeSet)
    assert (new_viewNo == old_viewNo)

    new_primary = get_master_primary_node(txnPoolNodeSet)
    assert (new_primary.name == old_primary.name)

    # check ppSeqNo the same
    _get_ppseqno(txnPoolNodeSet)

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)

    new_ppseqno = _get_ppseqno(txnPoolNodeSet)
    assert (new_ppseqno > old_ppseqno)
Example #24
0
def test_view_change_triggered_after_ordering(looper, txnPoolNodeSet,
                                              sdk_pool_handle,
                                              sdk_wallet_client):
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, REQ_COUNT)
    current_view_no = checkViewNoForNodes(txnPoolNodeSet)

    trigger_view_change(txnPoolNodeSet)

    ensureElectionsDone(looper, txnPoolNodeSet)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
    assert checkViewNoForNodes(txnPoolNodeSet) == current_view_no + 1
def testInstChangeWithLowerRatioThanDelta(looper, step3, client1):

    sendReqsToNodesAndVerifySuffReplies(looper, client1, 5)

    # wait for every node to run another checkPerformance
    newPerfChecks = waitForNextPerfCheck(looper, step3.nodes, step3.perfChecks)

    # verify all nodes recognize P as degraded
    # for n in step3.nodes:
    #     assert newPerfChecks[n.name].result is False

    # verify all nodes have undergone an instance change
    checkViewNoForNodes(step3.nodes, 1)
Example #26
0
def testInstChangeWithLowerRatioThanDelta(looper, step3, client1):

    sendReqsToNodesAndVerifySuffReplies(looper, client1, 5)

    # wait for every node to run another checkPerformance
    newPerfChecks = waitForNextPerfCheck(looper, step3.nodes, step3.perfChecks)

    # verify all nodes recognize P as degraded
    # for n in step3.nodes:
    #     assert newPerfChecks[n.name].result is False

    # verify all nodes have undergone an instance change
    checkViewNoForNodes(step3.nodes, 1)
 def chkViewChange(newViewNo):
     if {n.viewNo for n in step3.nodes} != {newViewNo}:
         tr = []
         for n in step3.nodes:
             tr.append(n.monitor.isMasterThroughputTooLow())
         if all(tr):
             logger.debug('Throughput ratio gone down')
             checkViewNoForNodes(step3.nodes, newViewNo)
         else:
             logger.debug('Master instance has not degraded yet, '
                          'sending more requests')
             sendRandomRequests(wallet1, client1, 1)
             assert False
     else:
         assert True
Example #28
0
def provoke_and_check_view_change(looper, nodes, newViewNo, sdk_pool_handle, sdk_wallet_client):
    if {n.viewNo for n in nodes} == {newViewNo}:
        return True

    # If throughput of every node has gone down then check that
    # view has changed
    tr = [n.monitor.isMasterThroughputTooLow() for n in nodes]
    if all(tr):
        logger.info('Throughput ratio gone down, its {}'.format(tr))
        checkViewNoForNodes(nodes, newViewNo)
    else:
        logger.info('Master instance has not degraded yet, '
                    'sending more requests')
        sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client)
        assert False
def test_pp_seq_no_starts_from_0_in_new_view(
        tconf,
        txnPoolNodeSet,
        looper,
        wallet1,
        client1,
        client1Connected):
    # This test fails since last ordered pre-prepare sequence number is
    old_view_no = checkViewNoForNodes(txnPoolNodeSet)

    def chk(count):
        for node in txnPoolNodeSet:
            assert node.master_replica.last_ordered_3pc[1] == count

    chk(0)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5)
    chk(5)

    new_view_no = ensure_view_change(looper, txnPoolNodeSet)
    assert new_view_no > old_view_no
    chk(5)  # no new requests yet, so last ordered 3PC is (0,5)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1)
    chk(1)  # new request for new view => last ordered 3PC is (0,1)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5)
    chk(6)
Example #30
0
def ensure_view_change(looper,
                       nodes,
                       exclude_from_check=None,
                       custom_timeout=None):
    """
    This method patches the master performance check to return False and thus
    ensures that all given nodes do a view change
    """
    old_view_no = checkViewNoForNodes(nodes)

    old_meths = do_view_change(nodes)

    perf_check_freq = next(iter(nodes)).config.PerfCheckFreq
    timeout = custom_timeout or waits.expectedPoolViewChangeStartedTimeout(
        len(nodes)) + perf_check_freq
    nodes_to_check = nodes if exclude_from_check is None else [
        n for n in nodes if n not in exclude_from_check
    ]
    logger.debug('Checking view no for nodes {}'.format(nodes_to_check))
    looper.run(
        eventually(checkViewNoForNodes,
                   nodes_to_check,
                   old_view_no + 1,
                   retryWait=1,
                   timeout=timeout))

    revert_do_view_change(nodes, old_meths)

    return old_view_no + 1
Example #31
0
def test_delay_commits_for_one_node(looper,
                                    txnPoolNodeSet,
                                    sdk_pool_handle,
                                    sdk_wallet_client,
                                    slow_node_is_next_primary,
                                    vc_counts):
    current_view_no = checkViewNoForNodes(txnPoolNodeSet)
    excepted_view_no = current_view_no + 1 if vc_counts == 'once' else current_view_no + 2
    next_primary = get_next_primary_name(txnPoolNodeSet, excepted_view_no)
    pretenders = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet) if not r.isPrimary]
    if slow_node_is_next_primary:
        delayed_node = [n for n in pretenders if n.name == next_primary][0]
    else:
        delayed_node = [n for n in pretenders if n.name != next_primary][0]

    with delay_rules_without_processing(delayed_node.nodeIbStasher, cDelay()):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2)

        trigger_view_change(txnPoolNodeSet)
        if vc_counts == 'twice':
            for node in txnPoolNodeSet:
                node.view_changer.start_view_change(current_view_no + 2)

    ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
Example #32
0
def test_pp_seq_not_starts_from_0_in_new_view(tconf, txnPoolNodeSet, looper,
                                              sdk_pool_handle,
                                              sdk_wallet_client):
    # This test fails since last ordered pre-prepare sequence number is
    old_view_no = checkViewNoForNodes(txnPoolNodeSet)

    def chk(count):
        for node in txnPoolNodeSet:
            assert node.master_replica.last_ordered_3pc[1] == count

    batches_count = 0
    chk(batches_count)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)
    batches_count += 5
    chk(batches_count)

    new_view_no = ensure_view_change(looper, txnPoolNodeSet)
    assert new_view_no > old_view_no
    batches_count += 1
    chk(batches_count
        )  # After view_change, master primary must initiate 3pc batch

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    batches_count += 1
    chk(batches_count)  # new request for new view => last ordered 3PC is (0,2)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)
    batches_count += 5
    chk(batches_count)
def testViewNotChangedIfBackupPrimaryDisconnected(txnPoolNodeSet,
                                                  txnPoolNodesLooper, tconf):
    """
    View change does not occurs when backup's primary is disconnected
    """

    # Setup
    nodes = txnPoolNodeSet
    looper = txnPoolNodesLooper

    viewNoBefore = checkViewNoForNodes(nodes)
    primaryNodeForBackupInstance1Before = nodeByName(
        nodes, primaryNodeNameForInstance(nodes, 1))

    # Exercise
    stopNodes([primaryNodeForBackupInstance1Before], looper)

    # Verify
    remainingNodes = set(nodes) - {primaryNodeForBackupInstance1Before}

    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    def assertNewPrimariesElected():
        with pytest.raises(AssertionError):
            assert checkViewNoForNodes(remainingNodes) == viewNoBefore + 1
        viewNoAfter = checkViewNoForNodes(remainingNodes, viewNoBefore)
        assert viewNoBefore == viewNoAfter

    looper.run(eventually(assertNewPrimariesElected, retryWait=1, timeout=30))
Example #34
0
def test_resend_instance_change_messages(looper, txnPoolNodeSet, tconf,
                                         sdk_wallet_steward, sdk_pool_handle):
    primary_node = txnPoolNodeSet[0]
    old_view_no = checkViewNoForNodes(txnPoolNodeSet, 0)
    assert primary_node.master_replica.isPrimary
    for n in txnPoolNodeSet:
        n.nodeIbStasher.delay(icDelay(3 * tconf.NEW_VIEW_TIMEOUT))
    check_sent_instance_changes_count(txnPoolNodeSet, 0)
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            primary_node,
                                            stopNode=False)
    txnPoolNodeSet.remove(primary_node)
    looper.run(
        eventually(check_count_connected_node,
                   txnPoolNodeSet,
                   4,
                   timeout=5,
                   acceptableExceptions=[AssertionError]))
    looper.run(
        eventually(check_sent_instance_changes_count,
                   txnPoolNodeSet,
                   1,
                   timeout=2 * tconf.NEW_VIEW_TIMEOUT))

    looper.run(
        eventually(checkViewNoForNodes,
                   txnPoolNodeSet,
                   old_view_no + 1,
                   timeout=3 * tconf.NEW_VIEW_TIMEOUT))
    ensureElectionsDone(looper, txnPoolNodeSet)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 5)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def testViewChangesIfMasterPrimaryDisconnected(txnPoolNodeSet, looper, wallet1,
                                               client1, client1Connected,
                                               tconf):
    """
    View change occurs when master's primary is disconnected
    """

    # Setup
    nodes = txnPoolNodeSet

    viewNoBefore = checkViewNoForNodes(nodes)
    old_pr_node = get_master_primary_node(nodes)

    # Stop primary
    stopNodes([old_pr_node], looper)
    looper.removeProdable(old_pr_node)
    remainingNodes = list(set(nodes) - {old_pr_node})
    # Sometimes it takes time for nodes to detect disconnection
    ensure_node_disconnected(looper, old_pr_node, remainingNodes, timeout=20)

    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    # Give some time to detect disconnection and then verify that view has
    # changed and new primary has been elected
    waitForViewChange(looper, remainingNodes, viewNoBefore + 1)
    ensure_all_nodes_have_same_data(looper, nodes=remainingNodes)
    new_pr_node = get_master_primary_node(remainingNodes)
    assert old_pr_node != new_pr_node

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5)
def test_resend_instance_change_messages(looper,
                                         txnPoolNodeSet,
                                         tconf,
                                         sdk_wallet_steward,
                                         sdk_pool_handle):
    primary_node = txnPoolNodeSet[0]
    old_view_no = checkViewNoForNodes(txnPoolNodeSet, 0)
    assert primary_node.master_replica.isPrimary
    for n in txnPoolNodeSet:
        n.nodeIbStasher.delay(icDelay(3 * tconf.INSTANCE_CHANGE_TIMEOUT))
    assert set([n.view_changer.instance_change_rounds for n in txnPoolNodeSet]) == {0}
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            primary_node,
                                            stopNode=False)
    txnPoolNodeSet.remove(primary_node)
    looper.run(eventually(partial(check_count_connected_node, txnPoolNodeSet, 4),
                          timeout=5,
                          acceptableExceptions=[AssertionError]))
    looper.runFor(2*tconf.INSTANCE_CHANGE_TIMEOUT)
    assert set([n.view_changer.instance_change_rounds for n in txnPoolNodeSet]) == {1}

    looper.runFor(tconf.INSTANCE_CHANGE_TIMEOUT)
    looper.run(eventually(partial(checkViewNoForNodes, txnPoolNodeSet, expectedViewNo=old_view_no + 1),
                          timeout=tconf.VIEW_CHANGE_TIMEOUT))
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_view_not_changed(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    """
    Test that a view change is not done when the performance of master does
    not go down
    """
    """
    Send multiple requests to the client and delay some requests by all
    backup instances to ensure master instance
    is always faster than backup instances and there is no view change
    """

    # Delay PRE-PREPARE for all backup protocol instances so master performs
    # better
    for i in range(1, F + 1):
        nonPrimReps = getNonPrimaryReplicas(txnPoolNodeSet, i)
        # type: Iterable[TestReplica]
        for r in nonPrimReps:
            r.node.nodeIbStasher.delay(ppDelay(10, i))

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)

    checkViewNoForNodes(txnPoolNodeSet, expectedViewNo=0)
Example #38
0
def testViewNotChanged(looper: Looper, nodeSet: TestNodeSet, up, client1):
    """
    Test that a view change is not done when the performance of master does
    not go down
    """
    """
    Send multiple requests to the client and delay some requests by all
    backup instances to ensure master instance
    is always faster than backup instances and there is no view change
    """

    # Delay PRE-PREPARE for all backup protocol instances so master performs
    # better
    for i in range(1, F + 1):
        nonPrimReps = getNonPrimaryReplicas(nodeSet, i)
        # type: Iterable[TestReplica]
        for r in nonPrimReps:
            r.node.nodeIbStasher.delay(ppDelay(10, i))

    sendReqsToNodesAndVerifySuffReplies(looper, client1, 5)

    checkViewNoForNodes(nodeSet, 0)
def test_last_ordered_3pc_reset_if_more_than_new_view(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client):
    """
    Check that if last_ordered_3pc's viewNo on a Replica is greater than the new viewNo after view change,
    then last_ordered_3pc is reset to (0,0).
    It can be that last_ordered_3pc was set for the previous view, since it's set during catch-up

    Example: a Node has last_ordered = (1, 300), and then the whole pool except this node restarted.
    The new viewNo is 0, but last_ordered is (1, 300), so all new requests will be discarded by this Node
    if we don't reset last_ordered_3pc
    """
    old_view_no = checkViewNoForNodes(txnPoolNodeSet)
    for node in txnPoolNodeSet:
        node.master_replica.last_ordered_3pc = (old_view_no + 2, 100)

    ensure_view_change_complete(looper, txnPoolNodeSet, customTimeout=60)
    view_no = checkViewNoForNodes(txnPoolNodeSet)

    for node in txnPoolNodeSet:
        assert (view_no, 0) == node.master_replica.last_ordered_3pc

    # Make sure the pool is working
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_no_propagated_future_view_change_while_view_change(txnPoolNodeSet, looper):
    # the last node is a lagging one, which will receive ViewChangeDone messages for future view
    viewNo = checkViewNoForNodes(txnPoolNodeSet)
    lagged_node = txnPoolNodeSet[-1]
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    # emulate view change in progress
    lagged_node.view_changer.view_change_in_progress = True
    old_view_no = checkViewNoForNodes([lagged_node])

    initial_vhdc = \
        lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__)

    # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change
    with delay_rules(lagged_node.nodeIbStasher, icDelay()):
        # make sure that View Change happened on all nodes but the lagging one
        ensure_view_change(looper, other_nodes)
        checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(2))
        ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

        # check that lagged node recived 3 Future VCD, but didn't start new view change
        assert len(other_nodes) + initial_vhdc ==\
               lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__)
        assert old_view_no == checkViewNoForNodes([lagged_node])
def test_master_primary_different_from_previous_view_for_itself(
        txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client):
    """
    After a view change, primary must be different from previous primary for
    master instance, it does not matter for other instance. Break it into
    2 tests, one where the primary is malign and votes for itself but is still
    not made primary in the next view.
    """
    old_view_no = checkViewNoForNodes(txnPoolNodeSet)
    pr = slow_primary(txnPoolNodeSet, 0, delay=10)
    old_pr_node = pr.node

    def _get_undecided_inst_id(self):
        undecideds = [i for i, r in self.replicas
                      if r.isPrimary is None]
        # Try to nominate for the master instance
        return undecideds, 0

    # Patching old primary's elector's method to nominate itself
    # again for the the new view
    old_pr_node.elector._get_undecided_inst_id = types.MethodType(
        _get_undecided_inst_id, old_pr_node.elector)

    # View change happens
    provoke_and_wait_for_view_change(looper,
                                     txnPoolNodeSet,
                                     old_view_no + 1,
                                     sdk_pool_handle,
                                     sdk_wallet_client)

    # Elections done
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    # New primary is not same as old primary
    assert getPrimaryReplica(txnPoolNodeSet, 0).node.name != old_pr_node.name

    # All other nodes discarded the nomination by the old primary
    for node in txnPoolNodeSet:
        if node != old_pr_node:
            assert countDiscarded(node.elector,
                                  'of master in previous view too') == 1

    # The new primary can still process requests
    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              5)
def test_view_changes_if_master_primary_disconnected(txnPoolNodeSet, looper, sdk_pool_handle,
                                                     sdk_wallet_client, tdir, tconf, allPluginsPath):
    """
    View change occurs when master's primary is disconnected
    """

    # Setup
    nodes = txnPoolNodeSet

    old_view_no = checkViewNoForNodes(nodes)
    old_pr_node = get_master_primary_node(nodes)

    # Stop primary
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet,
                                            old_pr_node, stopNode=True)
    looper.removeProdable(old_pr_node)

    remaining_nodes = list(set(nodes) - {old_pr_node})
    # Sometimes it takes time for nodes to detect disconnection
    ensure_node_disconnected(looper, old_pr_node, remaining_nodes, timeout=20)

    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    # Give some time to detect disconnection and then verify that view has
    # changed and new primary has been elected
    waitForViewChange(looper, remaining_nodes, old_view_no + 1)
    ensure_all_nodes_have_same_data(looper, nodes=remaining_nodes)
    new_pr_node = get_master_primary_node(remaining_nodes)
    assert old_pr_node != new_pr_node

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)

    # Check if old primary can join the pool and still functions
    old_pr_node = start_stopped_node(old_pr_node, looper, tconf,
                                     tdir, allPluginsPath)

    txnPoolNodeSet = remaining_nodes + [old_pr_node]
    looper.run(eventually(checkViewNoForNodes,
                          txnPoolNodeSet, old_view_no + 1, timeout=tconf.VIEW_CHANGE_TIMEOUT))
    assert len(getAllReturnVals(old_pr_node.view_changer,
                                old_pr_node.view_changer._start_view_change_if_possible,
                                compare_val_to=True)) > 0

    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    assert not old_pr_node.view_changer._next_view_indications
def test_pp_seq_no_starts_from_0_in_new_view(tconf, txnPoolNodeSet, looper,
                                             sdk_pool_handle, sdk_wallet_client):
    # This test fails since last ordered pre-prepare sequence number is
    old_view_no = checkViewNoForNodes(txnPoolNodeSet)

    def chk(count):
        for node in txnPoolNodeSet:
            assert node.master_replica.last_ordered_3pc[1] == count

    chk(0)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)
    chk(5)

    new_view_no = ensure_view_change(looper, txnPoolNodeSet)
    assert new_view_no > old_view_no
    chk(5)  # no new requests yet, so last ordered 3PC is (0,5)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)
    chk(1)  # new request for new view => last ordered 3PC is (0,1)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)
    chk(6)
def test_view_not_changed_when_short_disconnection(txnPoolNodeSet, looper,
                                                   sdk_pool_handle, sdk_wallet_client, tconf):
    """
    When primary is disconnected but not long enough to trigger the timeout,
    view change should not happen
    """
    pr_node = get_master_primary_node(txnPoolNodeSet)
    view_no = checkViewNoForNodes(txnPoolNodeSet)

    lost_pr_calls = {node.name: node.spylog.count(
        node.lost_master_primary.__name__) for node in txnPoolNodeSet
        if node != pr_node}

    prp_inst_chg_calls = {node.name: node.spylog.count(
        node.propose_view_change.__name__) for node in txnPoolNodeSet
        if node != pr_node}

    recv_inst_chg_calls = {node.name: node.spylog.count(
        node.view_changer.process_instance_change_msg.__name__) for node in txnPoolNodeSet
        if node != pr_node}

    def chk1():
        # Check that non-primary nodes detects losing connection with
        # primary
        for node in txnPoolNodeSet:
            if node != pr_node:
                assert node.spylog.count(node.lost_master_primary.__name__) \
                       > lost_pr_calls[node.name]

    def chk2():
        # Schedule an instance change but do not send it
        # since primary joins again
        for node in txnPoolNodeSet:
            if node != pr_node:
                assert node.spylog.count(node.propose_view_change.__name__) \
                       > prp_inst_chg_calls[node.name]
                assert node.view_changer.spylog.count(node.view_changer.process_instance_change_msg.__name__) \
                       == recv_inst_chg_calls[node.name]

    # Disconnect master's primary
    for node in txnPoolNodeSet:
        if node != pr_node:
            node.nodestack.getRemote(pr_node.nodestack.name).disconnect()

    timeout = min(tconf.ToleratePrimaryDisconnection - 1, 1)
    looper.run(eventually(chk1, retryWait=.2, timeout=timeout))

    # Reconnect master's primary
    for node in txnPoolNodeSet:
        if node != pr_node:
            node.nodestack.retryDisconnected()

    looper.run(eventually(chk2, retryWait=.2, timeout=timeout + 1))

    def chk3():
        # Check the view does not change
        with pytest.raises(AssertionError):
            assert checkViewNoForNodes(txnPoolNodeSet) == view_no + 1

    looper.run(eventually(chk3, retryWait=1, timeout=10))

    # Send some requests and make sure the request execute
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)
def test_quorum_after_f_plus_2_nodes_including_primary_turned_off_and_later_on(
        looper, allPluginsPath, tdir, tconf,
        txnPoolNodeSet,
        sdk_pool_handle,
        sdk_wallet_client):
    timeout = sdk_eval_timeout(1, len(txnPoolNodeSet))
    nodes = txnPoolNodeSet

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              1)

    stop_node(nodes[0], looper, nodes)
    waitForViewChange(looper, nodes[1:], expectedViewNo=1)
    ensureElectionsDone(looper, nodes[1:],
                        instances_list=range(getRequiredInstances(nodeCount)))

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              1)

    stop_node(nodes[1], looper, nodes)
    looper.runFor(tconf.ToleratePrimaryDisconnection +
                  waits.expectedPoolElectionTimeout(len(nodes)))
    checkViewNoForNodes(nodes[2:], expectedViewNo=1)

    sdk_reqs3 = sdk_send_random_requests(looper,
                                         sdk_pool_handle,
                                         sdk_wallet_client,
                                         1)
    with pytest.raises(PoolLedgerTimeoutException):
        req_res = sdk_get_replies(looper, sdk_reqs3, timeout=timeout)
        sdk_check_reply(req_res[0])

    stop_node(nodes[2], looper, nodes)
    looper.runFor(tconf.ToleratePrimaryDisconnection +
                  waits.expectedPoolElectionTimeout(len(nodes)))
    checkViewNoForNodes(nodes[3:], expectedViewNo=1)

    sdk_reqs4 = sdk_send_random_requests(looper,
                                         sdk_pool_handle,
                                         sdk_wallet_client,
                                         1)
    with pytest.raises(PoolLedgerTimeoutException):
        req_res = sdk_get_replies(looper, sdk_reqs4, timeout=timeout)
        sdk_check_reply(req_res[0])

    nodes[2] = start_stopped_node(nodes[2], looper, tconf, tdir, allPluginsPath)
    looper.runFor(waits.expectedPoolElectionTimeout(len(nodes)))
    checkViewNoForNodes(nodes[3:], expectedViewNo=1)

    sdk_reqs5 = sdk_send_random_requests(looper,
                                         sdk_pool_handle,
                                         sdk_wallet_client,
                                         1)
    with pytest.raises(PoolLedgerTimeoutException):
        req_res = sdk_get_replies(looper, sdk_reqs5, timeout=timeout)
        sdk_check_reply(req_res[0])

    nodes[1] = start_stopped_node(nodes[1], looper, tconf, tdir, allPluginsPath)
    ensureElectionsDone(looper, nodes[1:],
                        instances_list=range(getRequiredInstances(nodeCount)),
                        customTimeout=60)
    checkViewNoForNodes(nodes[1:], expectedViewNo=1)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              1)

    nodes[0] = start_stopped_node(nodes[0], looper, tconf, tdir, allPluginsPath)
    ensureElectionsDone(looper, nodes,
                        instances_list=range(getRequiredInstances(nodeCount)),
                        customTimeout=60)
    checkViewNoForNodes(nodes, expectedViewNo=1)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              1)
def test_disconnected_node_with_lagged_view_pulls_up_its_view_on_reconnection(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle):
    """
    Verifies that a disconnected node with a lagged view accepts
    the current view from the other nodes on re-connection.
    Steps:
    1. Provoke view change to 1.
    2. Ensure that all the nodes complete view change to 1.
    3. Disconnect one node from the rest of the nodes in the pool.
    4. Provoke view change to 2.
    5. Ensure that that all the nodes except for the disconnected one complete
    view change to 2 and the disconnected node remains in the view 1.
    6. Provoke view change to 3.
    5. Ensure that that all the nodes except for the disconnected one complete
    view change to 3 and the disconnected node remains in the view 1.
    8. Connect the disconnected node to the rest of the nodes in the pool.
    9. Ensure that the re-connected node completes view change to 3.
    10. Ensure that all the nodes participate in consensus.
    """
    checkViewNoForNodes(txnPoolNodeSet, 0)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    checkViewNoForNodes(txnPoolNodeSet, 1)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    lagged_node = getNonPrimaryReplicas(txnPoolNodeSet)[-1].node
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            lagged_node,
                                            stopNode=False)
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    ensure_view_change(looper, other_nodes)
    ensureElectionsDone(looper, other_nodes,
                        instances_list=range(getRequiredInstances(len(txnPoolNodeSet))))
    ensure_all_nodes_have_same_data(looper, other_nodes)
    checkViewNoForNodes(other_nodes, 2)
    checkViewNoForNodes([lagged_node], 1)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    ensure_view_change(looper, other_nodes)
    ensureElectionsDone(looper, other_nodes,
                        instances_list=range(getRequiredInstances(len(txnPoolNodeSet))))
    ensure_all_nodes_have_same_data(looper, other_nodes)
    checkViewNoForNodes(other_nodes, 3)
    checkViewNoForNodes([lagged_node], 1)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, lagged_node)
    waitForViewChange(looper, [lagged_node], 3,
                      customTimeout=waits.expectedPoolElectionTimeout(
                          len(txnPoolNodeSet)))
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    checkViewNoForNodes(txnPoolNodeSet, 3)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_primary_selection_after_primary_demotion_and_view_changes(looper, txnPoolNodeSet,
                                                                   sdk_pool_handle,
                                                                   sdk_wallet_steward,
                                                                   txnPoolMasterNodes):
    """
    Demote primary and do multiple view changes forcing primaries rotation.
    Demoted primary should be skipped without additional view changes.
    """

    viewNo0 = checkViewNoForNodes(txnPoolNodeSet)

    logger.info("1. turn off the node which has primary replica for master instanse, "
                " this should trigger view change")
    master_node = txnPoolMasterNodes[0]
    node_dest = hexToFriendly(master_node.nodestack.verhex)
    sdk_send_update_node(looper, sdk_wallet_steward,
                         sdk_pool_handle,
                         node_dest, master_node.name,
                         None, None,
                         None, None,
                         services=[])

    restNodes = [node for node in txnPoolNodeSet \
                 if node.name != master_node.name]
    ensureElectionsDone(looper, restNodes)

    viewNo1 = checkViewNoForNodes(restNodes)

    assert viewNo1 == viewNo0 + 1
    assert master_node.viewNo == viewNo0
    assert len(restNodes[0].replicas) == 1  # only one instance left
    assert restNodes[0].replicas[0].primaryName != master_node.name

    # ensure pool is working properly
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 3)

    logger.info("2. force view change 2 and check final viewNo")
    ensure_view_change_complete(looper, restNodes)

    viewNo2 = checkViewNoForNodes(restNodes)
    assert restNodes[0].replicas[0].primaryName != master_node.name
    assert viewNo2 == viewNo1 + 1

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 3)

    logger.info("3. force view change 3 and check final viewNo")
    ensure_view_change_complete(looper, restNodes)
    viewNo3 = checkViewNoForNodes(restNodes)
    assert restNodes[0].replicas[0].primaryName != master_node.name
    assert viewNo3 == viewNo2 + 1

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 3)

    logger.info("4. force view change 4 and check final viewNo")
    ensure_view_change_complete(looper, restNodes)
    viewNo4 = checkViewNoForNodes(restNodes)
    assert restNodes[0].replicas[0].primaryName != master_node.name
    assert viewNo4 == viewNo3 + 1

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 3)
def test_view_change_after_back_to_quorum_with_disconnected_primary(txnPoolNodeSet, looper,
                                                                    sdk_pool_handle,
                                                                    sdk_wallet_client,
                                                                    tdir, tconf, allPluginsPath):
    assert len(txnPoolNodeSet) == 4

    pr_node = get_master_primary_node(txnPoolNodeSet)
    assert pr_node.name == "Alpha"

    # 1. Initiate view change be primary (Alpha) restart
    nodes = ensure_view_change_by_primary_restart(looper,
                                                  txnPoolNodeSet,
                                                  tconf,
                                                  tdir,
                                                  allPluginsPath,
                                                  customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT)

    # Now primary should be Beta
    pr_node = get_master_primary_node(nodes)
    assert pr_node.name == "Beta"

    # 2. Stop non-primary node Delta, no any view changes are expected
    non_primary_to_stop = [n for n in nodes if n.name == "Delta"][0]
    disconnect_node_and_ensure_disconnected(
        looper, txnPoolNodeSet, non_primary_to_stop)
    looper.removeProdable(non_primary_to_stop)

    remaining_nodes = list(set(nodes) - {non_primary_to_stop})
    # Primary is going to be stopped, remember instance change messages count
    # to ensure that no view change happened as number of connected nodes is less
    # than quorum.
    ic_cnt = {}
    for n in remaining_nodes:
        ic_cnt[n.name] = n.view_changer.spylog.count(ViewChanger.sendInstanceChange.__name__)

    # 3. Disconnect primary
    disconnect_node_and_ensure_disconnected(
        looper, remaining_nodes, pr_node)
    looper.removeProdable(pr_node)

    # Wait for more than ToleratePrimaryDisconnection timeout and check that no IC messages presented.
    looper.runFor(tconf.ToleratePrimaryDisconnection + 5)
    remaining_nodes = list(set(remaining_nodes) - {pr_node})
    for n in remaining_nodes:
        assert ic_cnt[n.name] == n.view_changer.spylog.count(ViewChanger.sendInstanceChange.__name__)

    view_no = checkViewNoForNodes(remaining_nodes)

    # 4. Start Delta (non-primary), now primary (Beta) is disconnected but there is a quorum
    # to choose a new one.
    restartedNode = start_stopped_node(non_primary_to_stop, looper, tconf,
                                       tdir, allPluginsPath,
                                       delay_instance_change_msgs=False)
    remaining_nodes = remaining_nodes + [restartedNode]

    # 5. Check that view change happened.
    waitForViewChange(looper, remaining_nodes, expectedViewNo=(view_no + 1),
                      customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT)

    # ensure pool is working properly
    sdk_send_random_and_check(looper, remaining_nodes, sdk_pool_handle,
                              sdk_wallet_client, 3)
    ensure_all_nodes_have_same_data(looper, nodes=remaining_nodes)
def test_view_not_changed_when_primary_disconnected_from_less_than_quorum(
        txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client):
    """
    Less than quorum nodes lose connection with primary, this should not
    trigger view change as the protocol can move ahead
    """
    pr_node = get_master_primary_node(txnPoolNodeSet)
    npr = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    partitioned_rep = npr[0]
    partitioned_node = partitioned_rep.node

    lost_pr_calls = partitioned_node.spylog.count(
        partitioned_node.lost_master_primary.__name__)

    recv_inst_chg_calls = {node.name: node.spylog.count(
        node.view_changer.process_instance_change_msg.__name__) for node in txnPoolNodeSet
        if node != partitioned_node and node != pr_node}

    view_no = checkViewNoForNodes(txnPoolNodeSet)
    orig_retry_meth = partitioned_node.nodestack.retryDisconnected

    def wont_retry(self, exclude=None):
        # Do not attempt to retry connection
        pass

    # simulating a partition here
    # Disconnect a node from only the primary of the master and dont retry to
    # connect to it
    partitioned_node.nodestack.retryDisconnected = types.MethodType(
        wont_retry, partitioned_node.nodestack)
    r = partitioned_node.nodestack.getRemote(pr_node.nodestack.name)
    r.disconnect()

    def chk1():
        # Check that the partitioned node detects losing connection with
        # primary and sends an instance change which is received by other
        # nodes except the primary (since its disconnected from primary)
        assert partitioned_node.spylog.count(
            partitioned_node.lost_master_primary.__name__) > lost_pr_calls
        for node in txnPoolNodeSet:
            if node != partitioned_node and node != pr_node:
                assert node.view_changer.spylog.count(
                    node.view_changer.process_instance_change_msg.__name__) > recv_inst_chg_calls[node.name]

    looper.run(eventually(chk1, retryWait=1, timeout=10))

    def chk2():
        # Check the view does not change
        with pytest.raises(AssertionError):
            assert checkViewNoForNodes(txnPoolNodeSet) == view_no + 1

    looper.run(eventually(chk2, retryWait=1, timeout=10))
    # Send some requests and make sure the request execute
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)

    # Repair the connection so the node is no longer partitioned
    partitioned_node.nodestack.retryDisconnected = types.MethodType(
        orig_retry_meth, partitioned_node.nodestack)

    # Send some requests and make sure the request execute
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)

    # Partitioned node should have the same ledger and state as others
    # eventually
    waitNodeDataEquality(looper, partitioned_node,
                         *[n for n in txnPoolNodeSet if n != partitioned_node])
 def assertNewPrimariesElected():
     with pytest.raises(AssertionError):
         assert checkViewNoForNodes(remainingNodes) == viewNoBefore + 1
     viewNoAfter = checkViewNoForNodes(remainingNodes, viewNoBefore)
     assert viewNoBefore == viewNoAfter
def test_reconnect_primary_and_not_primary(looper,
                                        txnPoolNodeSet,
                                        sdk_wallet_steward,
                                        sdk_pool_handle,
                                        tconf):
    """
    Test steps:
    Pool of 7 nodes.
    count of instances must be 3
    1. Choose node, that is not primary on all replicas (3 index)
    2. Disconnect them
    3. Ensure, that number of replicas was decreased
    4. Choose current primary node (must be 0)
    5. Disconnect primary
    6. Ensure, that view change complete and primary was selected
    7. Add node back from 1 step
    8. Add node back from 4 step
    9. Check, that count of instance (f+1 = 3)
    10. Send some requests and check, that pool works.
    """
    restNodes = set(txnPoolNodeSet)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5)
    assert txnPoolNodeSet[0].master_replica.isPrimary
    node_after_all_primary = txnPoolNodeSet[3]
    # Disconnect node after all primaries (after all backup primaries)
    disconnect_node_and_ensure_disconnected(looper,
                                            restNodes,
                                            node_after_all_primary,
                                            stopNode=False)
    # -------------------------------------------------------
    restNodes.remove(node_after_all_primary)
    looper.run(eventually(partial(check_count_connected_node, restNodes, 6),
                          timeout=5,
                          acceptableExceptions=[AssertionError]))
    sdk_send_random_and_check(looper, restNodes, sdk_pool_handle, sdk_wallet_steward, 5)
    # Get primary node for backup replica
    primary_node = txnPoolNodeSet[0]
    assert primary_node.master_replica.isPrimary
    old_view_no = checkViewNoForNodes(restNodes, 0)
    # disconnect primary node
    disconnect_node_and_ensure_disconnected(looper,
                                            restNodes,
                                            primary_node,
                                            stopNode=False)
    # -------------------------------------------------------
    restNodes.remove(primary_node)
    looper.run(eventually(partial(check_count_connected_node, restNodes, 5),
                          timeout=5,
                          acceptableExceptions=[AssertionError]))
    looper.run(eventually(partial(checkViewNoForNodes, restNodes, expectedViewNo=old_view_no + 1),
                          timeout=tconf.VIEW_CHANGE_TIMEOUT))
    sdk_send_random_and_check(looper, restNodes, sdk_pool_handle, sdk_wallet_steward, 5)
    logger.debug("restNodes: {}".format(restNodes))
    restNodes.add(node_after_all_primary)
    # Return back node after all primary
    reconnect_node_and_ensure_connected(looper, restNodes, node_after_all_primary)
    looper.run(checkNodesConnected(restNodes,
                                   customTimeout=5*tconf.RETRY_TIMEOUT_RESTRICTED))
    looper.run(eventually(partial(check_count_connected_node, restNodes, 6),
                          timeout=5,
                          acceptableExceptions=[AssertionError]))
    assert len(set([len(n.replicas) for n in restNodes])) == 1
    sdk_send_random_and_check(looper, restNodes, sdk_pool_handle, sdk_wallet_steward, 5)
    # Return back primary node
    restNodes.add(primary_node)
    reconnect_node_and_ensure_connected(looper, restNodes, primary_node)
    looper.run(checkNodesConnected(restNodes,
                                   customTimeout=5*tconf.RETRY_TIMEOUT_RESTRICTED))
    sdk_send_random_and_check(looper, restNodes, sdk_pool_handle, sdk_wallet_steward, 5)
def test_quorum_after_f_plus_2_nodes_but_not_primary_turned_off_and_later_on(
        looper, allPluginsPath, tdir, tconf,
        txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    nodes = txnPoolNodeSet

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              1)

    stop_node(nodes[4], looper, nodes)
    looper.runFor(tconf.ToleratePrimaryDisconnection +
                  waits.expectedPoolElectionTimeout(len(nodes)))
    checkViewNoForNodes(nodes[:4], expectedViewNo=0)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              1)

    stop_node(nodes[3], looper, nodes)
    looper.runFor(tconf.ToleratePrimaryDisconnection +
                  waits.expectedPoolElectionTimeout(len(nodes)))
    checkViewNoForNodes(nodes[:3], expectedViewNo=0)

    sdk_reqs3 = sdk_send_random_requests(looper,
                                         sdk_pool_handle,
                                         sdk_wallet_client,
                                         1)
    with pytest.raises(PoolLedgerTimeoutException):
        req_res = sdk_get_replies(looper, sdk_reqs3)
        sdk_check_reply(req_res[0])

    stop_node(nodes[2], looper, nodes)
    looper.runFor(tconf.ToleratePrimaryDisconnection +
                  waits.expectedPoolElectionTimeout(len(nodes)))
    checkViewNoForNodes(nodes[:2], expectedViewNo=0)

    sdk_reqs4 = sdk_send_random_requests(looper,
                                         sdk_pool_handle,
                                         sdk_wallet_client,
                                         1)
    with pytest.raises(PoolLedgerTimeoutException):
        req_res = sdk_get_replies(looper, sdk_reqs4)
        sdk_check_reply(req_res[0])

    nodes[4] = start_stopped_node(nodes[4], looper, tconf, tdir, allPluginsPath)
    looper.runFor(waits.expectedPoolElectionTimeout(len(nodes)))
    checkViewNoForNodes(nodes[:2] + nodes[4:], expectedViewNo=0)

    sdk_reqs5 = sdk_send_random_requests(looper,
                                         sdk_pool_handle,
                                         sdk_wallet_client,
                                         1)
    with pytest.raises(PoolLedgerTimeoutException):
        req_res = sdk_get_replies(looper, sdk_reqs5)
        sdk_check_reply(req_res[0])

    nodes[3] = start_stopped_node(nodes[3], looper, tconf, tdir, allPluginsPath)
    ensureElectionsDone(looper, nodes[:2] + nodes[3:],
                        instances_list=range(getRequiredInstances(nodeCount)))
    checkViewNoForNodes(nodes[:2] + nodes[3:], expectedViewNo=0)

    sdk_reqs6 = sdk_send_random_requests(looper,
                                         sdk_pool_handle,
                                         sdk_wallet_client,
                                         1)
    sdk_get_replies(looper, sdk_reqs6)

    nodes[2] = start_stopped_node(nodes[2], looper, tconf, tdir, allPluginsPath)
    ensureElectionsDone(looper, nodes,
                        instances_list=range(getRequiredInstances(nodeCount)))
    checkViewNoForNodes(nodes, expectedViewNo=0)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              1)
def test_view_change_gc_in_between_3pc_all_nodes_delays(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    """
    Test that garbage collector compares the whole 3PC key (viewNo, ppSeqNo)
    and does not remove messages from node's queues that have higher
    viewNo than last ordered one even if their ppSeqNo are less or equal
    """

    numNodes = len(txnPoolNodeSet)
    viewNo = checkViewNoForNodes(txnPoolNodeSet)

    # 1 send two messages one by one separately to make
    #  node pool working with two batches
    #    -> last_ordered_3pc = (+0, 2) [+0 means from the initial state]
    #       (last_ordered_3pc here and futher is tracked
    #       for master instances only cause non-master ones have
    #       specific logic of its management which we don't care in
    #       the test, see Replica::_setup_for_non_master)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)

    last_ordered_3pc = (viewNo, 2)
    check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc)
    check_nodes_requests_size(txnPoolNodeSet, 2)

    # 2 do view change
    #    -> GC should remove it from nodes' queues
    #    -> viewNo = +1
    ensure_view_change_complete(looper, txnPoolNodeSet)

    viewNo = checkViewNoForNodes(txnPoolNodeSet, viewNo + 1)
    check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc)
    check_nodes_requests_size(txnPoolNodeSet, 0)

    # 3 slow processing 3PC messages for all nodes (all replica instances)
    #   randomly and send one more message
    #    -> not ordered (last_ordered_3pc still equal (+0, 2)) but primaries
    #       should at least send PRE-PREPAREs
    # TODO could it be not enough for wainting that at least primary
    # has sent PRE-PREPARE
    propagationTimeout = waits.expectedClientRequestPropagationTime(numNodes)
    delay_3pc_messages(txnPoolNodeSet,
                       0,
                       delay=propagationTimeout * 2)
    delay_3pc_messages(txnPoolNodeSet,
                       1,
                       delay=propagationTimeout * 2)
    requests = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client)

    def checkPrePrepareSentAtLeastByPrimary():
        for node in txnPoolNodeSet:
            for replica in node.replicas.values():
                if replica.isPrimary:
                    assert len(replica.sentPrePrepares)

    looper.run(eventually(checkPrePrepareSentAtLeastByPrimary,
                          retryWait=0.1,
                          timeout=propagationTimeout))
    # 4 do view change
    #    -> GC shouldn't remove anything because
    #       last_ordered_3pc (+0, 1) < last message's 3pc key (+1, 1)
    #    -> viewNo = 2
    ensure_view_change_complete(looper, txnPoolNodeSet)

    viewNoNew = checkViewNoForNodes(txnPoolNodeSet)
    # another view change could happen because of slow nodes
    assert viewNoNew - viewNo in (1, 2)
    viewNo = viewNoNew
    check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc)
    check_nodes_requests_size(txnPoolNodeSet, 1)

    # 5 reset delays and wait for replies
    #    -> new primaries should send new 3pc for last message
    #       with 3pc key (+2, 1)
    #    -> they should be ordered
    #    -> last_ordered_3pc = (+2, 1)
    reset_delays_and_process_delayeds(txnPoolNodeSet)
    sdk_get_replies(looper, [requests])

    checkViewNoForNodes(txnPoolNodeSet, viewNo)
    last_ordered_3pc = (viewNo, 1)
    check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc)
    check_nodes_requests_size(txnPoolNodeSet, 1)

    # 6 do view change
    #    -> GC should remove them
    ensure_view_change_complete(looper, txnPoolNodeSet)

    viewNo = checkViewNoForNodes(txnPoolNodeSet, viewNo + 1)
    check_nodes_last_ordered_3pc(txnPoolNodeSet, last_ordered_3pc)
    check_nodes_requests_size(txnPoolNodeSet, 0)
 def chk2():
     # Check the view does not change
     with pytest.raises(AssertionError):
         assert checkViewNoForNodes(txnPoolNodeSet) == view_no + 1