Exemple #1
0
def restart_nodes(looper, nodeSet, restart_set, tconf, tdir, allPluginsPath,
                  after_restart_timeout=None, start_one_by_one=True, wait_for_elections=True):
    for node_to_stop in restart_set:
        node_to_stop.cleanupOnStopping = True
        node_to_stop.stop()
        looper.removeProdable(node_to_stop)

    rest_nodes = [n for n in nodeSet if n not in restart_set]
    for node_to_stop in restart_set:
        ensure_node_disconnected(looper, node_to_stop, nodeSet, timeout=2)

    if after_restart_timeout:
        looper.runFor(after_restart_timeout)

    for node_to_restart in restart_set.copy():
        config_helper = PNodeConfigHelper(node_to_restart.name, tconf, chroot=tdir)
        restarted_node = TestNode(node_to_restart.name, config_helper=config_helper, config=tconf,
                                  pluginPaths=allPluginsPath, ha=node_to_restart.nodestack.ha,
                                  cliha=node_to_restart.clientstack.ha)
        looper.add(restarted_node)

        idx = nodeSet.index(node_to_restart)
        nodeSet[idx] = restarted_node
        idx = restart_set.index(node_to_restart)
        restart_set[idx] = restarted_node

        rest_nodes += [restarted_node]
        if start_one_by_one:
            looper.run(checkNodesConnected(rest_nodes))

    if not start_one_by_one:
        looper.run(checkNodesConnected(nodeSet))

    if wait_for_elections:
        ensureElectionsDone(looper=looper, nodes=nodeSet)
def test_restart_majority_to_same_view(looper, txnPoolNodeSet, tconf, tdir, allPluginsPath,
                                        sdk_pool_handle, sdk_wallet_client):
    # Add transaction to ledger
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)

    majority = txnPoolNodeSet[:3]
    minority = txnPoolNodeSet[3:]

    # Restart majority group
    tm = tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(txnPoolNodeSet))
    majority_before_restart = majority.copy()
    restart_nodes(looper, txnPoolNodeSet, majority, tconf, tdir, allPluginsPath,
                  after_restart_timeout=tm, start_one_by_one=False, wait_for_elections=False)
    ensureElectionsDone(looper, majority, instances_list=range(2))

    # Check that nodes in minority group are aware that they might have inconsistent 3PC state
    for node in minority:
        assert node.spylog.count(node.on_inconsistent_3pc_state) == 1

    # Check that nodes in majority group didn't think they might have inconsistent 3PC state
    for node in majority_before_restart:
        assert node.spylog.count(node.on_inconsistent_3pc_state) == 0

    # Check that nodes in majority group don't think they might have inconsistent 3PC state
    for node in majority:
        assert node.spylog.count(node.on_inconsistent_3pc_state) == 0

    # Restart minority group
    restart_nodes(looper, txnPoolNodeSet, minority, tconf, tdir, allPluginsPath,
                  after_restart_timeout=tm, start_one_by_one=False)

    # Check that all nodes are still functional
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_view_change_with_different_prepare_certificate(looper, txnPoolNodeSet,
                                                        sdk_pool_handle,
                                                        sdk_wallet_client):
    """
    Check that a node without pre-prepare but with quorum of prepares wouldn't
    use this transaction as a last in prepare certificate
    """
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    slow_node = txnPoolNodeSet[-1]
    # delay preprepares and message response with preprepares.
    with delay_rules(slow_node.nodeIbStasher, ppDelay(delay=sys.maxsize)):
        with delay_rules(slow_node.nodeIbStasher,
                         msg_rep_delay(delay=sys.maxsize,
                                       types_to_delay=[PREPREPARE, ])):
            last_ordered = slow_node.master_replica.last_ordered_3pc
            sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client)
            looper.run(eventually(check_prepare_certificate,
                                  txnPoolNodeSet[0:-1],
                                  last_ordered[1] + 1))

            for n in txnPoolNodeSet:
                n.view_changer.on_master_degradation()
            assert slow_node.master_replica.last_prepared_certificate_in_view() == \
                   (0, last_ordered[1])
            ensureElectionsDone(looper, txnPoolNodeSet)
def testPrimarySelectionAfterViewChange(  # noqa
        looper,
        txnPoolNodeSet,
        primaryReplicas,
        catchup_complete_count):
    """
    Test that primary replica of a protocol instance shifts to a new node after
    a view change.
    """
    # TODO: This test can fail due to view change.

    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    for n in txnPoolNodeSet:
        assert n.spylog.count(
            n.allLedgersCaughtUp) > catchup_complete_count[n.name]

    # Primary replicas before view change
    prBeforeVC = primaryReplicas

    # Primary replicas after view change
    instanceCount = getNoInstances(nodeCount)
    prAfterVC = [getPrimaryReplica(txnPoolNodeSet, i) for i in range(instanceCount)]

    # Primary replicas have moved to the next node
    for br, ar in zip(prBeforeVC, prAfterVC):
        assert ar.node.rank - br.node.rank == 1

    check_rank_consistent_across_each_node(txnPoolNodeSet)
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
def test_not_set_H_as_maxsize_for_backup_if_is_primary(looper,
                                                       txnPoolNodeSet,
                                                       sdk_pool_handle,
                                                       sdk_wallet_steward,
                                                       tconf,
                                                       tdir,
                                                       allPluginsPath):
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)
    primary_on_backup = txnPoolNodeSet[2]
    assert primary_on_backup.replicas._replicas[1].isPrimary
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            primary_on_backup,
                                            stopNode=True)
    looper.removeProdable(primary_on_backup)
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_steward,
                              LOG_SIZE)
    restarted_node = start_stopped_node(primary_on_backup,
                                        looper,
                                        tconf,
                                        tdir,
                                        allPluginsPath)
    txnPoolNodeSet[2] = restarted_node
    ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=tconf.VIEW_CHANGE_TIMEOUT)
    assert restarted_node.replicas._replicas[1].isPrimary
    assert restarted_node.replicas._replicas[1].h == 0
    assert restarted_node.replicas._replicas[1].H == LOG_SIZE
def testPrimaryElectionCase4(case4Setup, looper):
    """
    Case 4 - A node making multiple primary declarations for a particular node.
    Consider 4 nodes A, B, C and D. Lets say node B is malicious and is
    repeatedly declaring Node D as primary
    """
    allNodes = case4Setup
    A, B, C, D = allNodes

    looper.run(checkNodesConnected(allNodes))

    # Node B sends multiple declarations of node D's 0th protocol instance as
    # primary to all nodes
    for i in range(5):
        B.send(Primary(D.name, 0, B.viewNo))

    # No node from node A, node C, node D(node B is malicious anyway so not
    # considering it) should have more than one primary declaration for node
    # D since node D is slow. The one primary declaration for node D,
    # that nodes A, C and D might have would be because of node B
    def x():
        primDecs = list(node.elector.primaryDeclarations[0].values())
        assert primDecs.count(D.name) <= 1

    for node in (A, C, D):
        looper.run(eventually(x, retryWait=.5, timeout=2))

    ensureElectionsDone(looper=looper, nodes=allNodes,
                        retryWait=1, timeout=45)

    # Node D should not have any primary replica
    assert not D.hasPrimary
def testNodeDoesNotParticipateUntilCaughtUp(txnPoolNodeSet,
                                            nodes_slow_to_process_catchup_reqs,
                                            sdk_node_created_after_some_txns):
    """
    A new node that joins after some transactions should stash new transactions
    until it has caught up
    :return:
    """

    looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \
        sdk_node_created_after_some_txns
    txnPoolNodeSet.append(new_node)
    old_nodes = txnPoolNodeSet[:-1]
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              new_steward_wallet_handle, 5)
    chk_commits_prepares_recvd(0, old_nodes, new_node)

    for node in old_nodes:
        node.reset_delays_and_process_delayeds()

    timeout = waits.expectedPoolCatchupTime(len(txnPoolNodeSet)) + \
              catchup_delay + \
              waits.expectedPoolElectionTimeout(len(txnPoolNodeSet))
    ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=timeout)
    waitNodeDataEquality(looper, new_node, *old_nodes)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              new_steward_wallet_handle, 2)

    # Commits and Prepares are received by all old nodes
    with pytest.raises(AssertionError):
        # Since nodes discard 3PC messages for already ordered requests.
        chk_commits_prepares_recvd(0, old_nodes, new_node)
    waitNodeDataEquality(looper, new_node, *old_nodes)
def test_no_view_change_until_synced(txnPoolNodeSet, looper, mode):
    # emulate catchup by setting non-synced status
    for node in txnPoolNodeSet:
        node.mode = mode

    check_instance_change_count(txnPoolNodeSet, 0)

    # start View Change
    old_view_no = checkViewNoForNodes(txnPoolNodeSet)
    old_meths = do_view_change(txnPoolNodeSet)
    for node in txnPoolNodeSet:
        node.view_changer.sendInstanceChange(old_view_no + 1)

    # make sure View Change is not started
    check_no_view_change(looper, txnPoolNodeSet)
    assert old_view_no == checkViewNoForNodes(txnPoolNodeSet)

    # emulate finishing of catchup by setting Participating status
    revert_do_view_change(txnPoolNodeSet, old_meths)
    for node in txnPoolNodeSet:
        node.mode = Mode.participating

    # make sure that View Change happened
    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=old_view_no + 1)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
def test_multiple_view_change_retries_by_timeouts(
        txnPoolNodeSet, looper, tconf, setup,
        sdk_pool_handle, sdk_wallet_client):
    """
    Verifies that a view change is restarted each time
    when the previous one is timed out
    """
    _, initial_view_no, timeout_callback_stats = setup
    stashers = [n.nodeIbStasher for n in txnPoolNodeSet]

    with delay_rules(stashers, vcd_delay()):
        start_view_change(txnPoolNodeSet, initial_view_no + 1)

        # Wait until timeout callback is called 3 times
        looper.run(eventually(check_watchdog_called_expected_times,
                              txnPoolNodeSet, timeout_callback_stats, 3,
                              retryWait=1,
                              timeout=3 * VIEW_CHANGE_TIMEOUT + 2))

        # View changes should fail
        with pytest.raises(AssertionError):
            ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, customTimeout=1)

    # This view change must be completed with no problems
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    # 4 view changes must have been initiated (initial one + 3 retries)
    for node in txnPoolNodeSet:
        assert node.viewNo - initial_view_no == 4

    sdk_ensure_pool_functional(looper, txnPoolNodeSet,
                               sdk_wallet_client,
                               sdk_pool_handle)
def test_old_instance_change_discarding(txnPoolNodeSet,
                                        looper,
                                        tconf):
    view_no = txnPoolNodeSet[0].viewNo
    first_nodes = txnPoolNodeSet[:2]
    second_nodes = txnPoolNodeSet[2:]

    for node in first_nodes:
        node.view_changer.on_master_degradation()

    def chk_ic_discard():
        for n in txnPoolNodeSet:
            assert not n.view_changer.instanceChanges.has_view(view_no + 1)
            for frm in first_nodes:
                assert not n.view_changer.instanceChanges.has_inst_chng_from(view_no + 1, frm.name)

    looper.run(eventually(chk_ic_discard,
                          timeout=tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL + 10))

    for node in second_nodes:
        node.view_changer.on_master_degradation()

    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    for node in txnPoolNodeSet:
        assert node.viewNo == view_no
def test_removed_replica_restored_on_view_change(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client,
        tconf, tdir, allPluginsPath, chkFreqPatched, view_change):
    """
    1. Remove replica on some node which is not master primary
    2. Reconnect the node which was master primary so far
    3. Check that nodes and replicas correctly added
    """
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    node = get_last_master_non_primary_node(txnPoolNodeSet)
    start_replicas_count = node.replicas.num_replicas
    instance_id = start_replicas_count - 1

    node.replicas.remove_replica(instance_id)
    check_replica_removed(node, start_replicas_count, instance_id)

    # trigger view change on all nodes
    master_primary = get_master_primary_node(txnPoolNodeSet)
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, master_primary)
    txnPoolNodeSet.remove(master_primary)
    looper.removeProdable(master_primary)
    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    restarted_node = start_stopped_node(master_primary, looper, tconf, tdir, allPluginsPath)
    txnPoolNodeSet.append(restarted_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))

    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=1,
                      customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    assert start_replicas_count == node.replicas.num_replicas
def test_view_change_retry_by_timeout(
        txnPoolNodeSet, looper, tconf, setup, sdk_pool_handle, sdk_wallet_client):
    """
    Verifies that a view change is restarted if it is not completed in time
    """
    m_primary_node, initial_view_no, timeout_callback_stats = setup
    stashers = [n.nodeIbStasher for n in txnPoolNodeSet]

    with delay_rules(stashers, vcd_delay()):
        start_view_change(txnPoolNodeSet, initial_view_no + 1)

        # First view change should fail, because of delayed ViewChangeDone
        # messages. This then leads to new view change that we need.
        with pytest.raises(AssertionError):
            ensureElectionsDone(looper=looper,
                                nodes=txnPoolNodeSet,
                                customTimeout=1.5 * VIEW_CHANGE_TIMEOUT)

    # Now as ViewChangeDone messages are unblocked view changes should finish successfully
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    new_m_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    assert m_primary_node.name != new_m_primary_node.name

    # The timeout method was called one time
    check_watchdog_called_expected_times(txnPoolNodeSet, timeout_callback_stats, 1)

    # 2 view changes have been initiated
    for node in txnPoolNodeSet:
        assert node.viewNo - initial_view_no == 2

    sdk_ensure_pool_functional(looper, txnPoolNodeSet,
                               sdk_wallet_client,
                               sdk_pool_handle)
def test_view_change_after_some_txns(txnPoolNodesLooper, txnPoolNodeSet,
                                     some_txns_done, testNodeClass, viewNo,  # noqa
                                     sdk_pool_handle, sdk_wallet_client,
                                     node_config_helper_class, tconf, tdir,
                                     allPluginsPath, tmpdir_factory):
    """
    Check that view change is done after processing some of txns
    """
    ensure_view_change(txnPoolNodesLooper, txnPoolNodeSet)
    ensureElectionsDone(looper=txnPoolNodesLooper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(txnPoolNodesLooper, nodes=txnPoolNodeSet)

    sdk_send_random_and_check(txnPoolNodesLooper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 10)
    ensure_all_nodes_have_same_data(txnPoolNodesLooper, txnPoolNodeSet)

    for node in txnPoolNodeSet:
        txnPoolNodesLooper.removeProdable(node)
        node.stop()

    config = getConfigOnce()

    reload_modules_for_replay(tconf)

    replayable_node_class, basedirpath = get_replayable_node_class(
        tmpdir_factory, tdir, testNodeClass, config)

    print('-------------Replaying now---------------------')

    for node in txnPoolNodeSet:
        create_replayable_node_and_check(txnPoolNodesLooper, txnPoolNodeSet,
                                         node, replayable_node_class,
                                         node_config_helper_class, tconf,
                                         basedirpath, allPluginsPath)
def test_restarted_node_complete_vc_by_current_state(looper,
                                                     txnPoolNodeSet,
                                                     tconf,
                                                     tdir,
                                                     allPluginsPath):
    node_to_restart = txnPoolNodeSet[-1]
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            node_to_restart,
                                            stopNode=True)
    looper.removeProdable(node_to_restart)
    old_completed_view_no = get_last_completed_view_no(txnPoolNodeSet[:-1])
    ensure_view_change(looper,
                       txnPoolNodeSet[:-1])
    ensureElectionsDone(looper, txnPoolNodeSet[:-1], customTimeout=tconf.VIEW_CHANGE_TIMEOUT)
    current_completed_view_no = get_last_completed_view_no(txnPoolNodeSet[:-1])
    assert current_completed_view_no > old_completed_view_no

    # Delay VIEW_CHANGE_DONE messages for all nodes
    for node in txnPoolNodeSet[:-1]:
        node.nodeIbStasher.delay(vcd_delay(1000))
    ensure_view_change(looper, txnPoolNodeSet[:-1])

    # Start stopped node until other nodes do view_change
    node_to_restart = start_stopped_node(node_to_restart,
                                         looper,
                                         tconf,
                                         tdir,
                                         allPluginsPath)
    node_to_restart.nodeIbStasher.delay(vcd_delay(1000))
    # check, that restarted node use last completed view no from pool, instead of proposed
    looper.run(eventually(complete_propagate_primary,
                          node_to_restart,
                          current_completed_view_no,
                          timeout=tconf.VIEW_CHANGE_TIMEOUT))
def testPrimaryElectionCase2(case2Setup, looper, keySharedNodes):
    """
    Case 2 - A node making nominations for a multiple other nodes. Consider 4
    nodes A, B, C, and D. Lets say node B is malicious and nominates node C
    to all nodes. Again node B nominates node D to all nodes.
    """
    nodeSet = keySharedNodes
    A, B, C, D = nodeSet.nodes.values()

    looper.run(checkNodesConnected(nodeSet))

    # Node B sends multiple NOMINATE msgs but only after A has nominated itself
    looper.run(eventually(checkNomination, A, A.name, retryWait=.25, timeout=1))

    instId = getSelfNominationByNode(A)

    BRep = Replica.generateName(B.name, instId)
    CRep = Replica.generateName(C.name, instId)
    DRep = Replica.generateName(D.name, instId)

    # Node B first sends NOMINATE msgs for Node C to all nodes
    B.send(Nomination(CRep, instId, B.viewNo))
    # Node B sends NOMINATE msgs for Node D to all nodes
    B.send(Nomination(DRep, instId, B.viewNo))

    # Ensure elections are done
    ensureElectionsDone(looper=looper, nodes=nodeSet, retryWait=1, timeout=45)

    # All nodes from node A, node C, node D(node B is malicious anyway so
    # not considering it) should have nomination for node C from node B since
    #  node B first nominated node C
    for node in [A, C, D]:
        assert node.elector.nominations[instId][BRep] == CRep
def testPrimaryElectionCase5(case5Setup, looper, keySharedNodes):
    """
    Case 5 - A node making primary declarations for a multiple other nodes.
    Consider 4 nodes A, B, C, and D. Lets say node B is malicious and
    declares node C as primary to all nodes.
    Again node B declares node D as primary to all nodes.
    """
    nodeSet = keySharedNodes
    A, B, C, D = nodeSet.nodes.values()

    looper.run(checkNodesConnected(nodeSet))

    BRep = Replica.generateName(B.name, 0)
    CRep = Replica.generateName(C.name, 0)
    DRep = Replica.generateName(D.name, 0)

    # Node B first sends PRIMARY msgs for Node C to all nodes
    B.send(Primary(CRep, 0, B.viewNo))
    # Node B sends PRIMARY msgs for Node D to all nodes
    B.send(Primary(DRep, 0, B.viewNo))

    # Ensure elections are done
    ensureElectionsDone(looper=looper, nodes=nodeSet, retryWait=1, timeout=45)

    # All nodes from node A, node C, node D(node B is malicious anyway so not
    # considering it) should have primary declarations for node C from node B
    #  since node B first nominated node C
    for node in [A, C, D]:
        logger.debug("node {} should have primary declaration for C from node B".format(node))
        assert node.elector.primaryDeclarations[0][BRep] == CRep
def test_order_after_demote_and_restart(looper, txnPoolNodeSet,
                                        sdk_pool_handle, sdk_wallet_client, tdir, tconf, allPluginsPath,
                                        sdk_wallet_stewards):
    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet,
                                         sdk_pool_handle, sdk_wallet_client, 3, 3)

    primary_node = txnPoolNodeSet[0]
    node_to_stop = txnPoolNodeSet[1]
    node_to_demote = txnPoolNodeSet[2]
    txnPoolNodeSet.remove(node_to_demote)

    node_to_stop.cleanupOnStopping = True
    node_to_stop.stop()
    looper.removeProdable(node_to_stop)
    ensure_node_disconnected(looper, node_to_stop, txnPoolNodeSet, timeout=2)

    demote_node(looper, sdk_wallet_stewards[2], sdk_pool_handle, node_to_demote)

    config_helper = PNodeConfigHelper(node_to_stop.name, tconf, chroot=tdir)
    restarted_node = TestNode(node_to_stop.name, config_helper=config_helper, config=tconf,
                              pluginPaths=allPluginsPath, ha=node_to_stop.nodestack.ha,
                              cliha=node_to_stop.clientstack.ha)
    looper.add(restarted_node)
    txnPoolNodeSet[1] = restarted_node
    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet,
                                         sdk_pool_handle, sdk_wallet_client, 1, 1)

    def get_current_bls_keys(node):
        return node.master_replica._bls_bft_replica._bls_bft.bls_key_register._current_bls_keys

    assert get_current_bls_keys(restarted_node) == get_current_bls_keys(primary_node)
def test_no_propagated_future_view_change_until_synced(txnPoolNodeSet, looper, mode):
    # the last node is a lagging one, which will receive ViewChangeDone messages for future view
    viewNo = checkViewNoForNodes(txnPoolNodeSet)
    lagged_node_index = (viewNo + 3) % len(txnPoolNodeSet)
    lagged_node = txnPoolNodeSet[lagged_node_index]
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    # emulate catchup by setting non-synced status
    lagged_node.mode = mode
    old_view_no = checkViewNoForNodes([lagged_node])

    check_future_vcd_count(lagged_node, 0)

    # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change
    with delay_rules(lagged_node.nodeIbStasher, icDelay()):
        # make sure that View Change happened on all nodes but the lagging one
        ensure_view_change(looper, other_nodes)
        checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(2))
        ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

        check_no_view_change(looper, lagged_node)
        assert old_view_no == checkViewNoForNodes([lagged_node])

        # emulate finishing of catchup by setting Participating status
        lagged_node.mode = Mode.participating

        # make sure that View Change happened on lagging node
        waitForViewChange(looper, [lagged_node], expectedViewNo=old_view_no + 1,
                          customTimeout=10)
        ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
def test_view_change_on_empty_ledger(txnPoolNodeSet, looper):
    """
    Check that view change is done when no txns in the ldegr
    """
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_vc_by_current_state(txnPoolNodeSet,
                             looper,
                             tdir,
                             tconf,
                             allPluginsPath):
    node_to_stop = txnPoolNodeSet[-1]
    old_view_no = node_to_stop.view_changer.last_completed_view_no
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            node_to_stop,
                                            stopNode=True)
    looper.removeProdable(node_to_stop)
    ensure_view_change(looper, txnPoolNodeSet[:-1])
    ensureElectionsDone(looper,
                        txnPoolNodeSet[:-1],
                        customTimeout=tconf.VIEW_CHANGE_TIMEOUT)
    new_view_no = txnPoolNodeSet[0].view_changer.last_completed_view_no
    assert new_view_no > old_view_no
    node_to_stop = start_stopped_node(node_to_stop,
                                      looper,
                                      tconf,
                                      tdir,
                                      allPluginsPath)
    txnPoolNodeSet[-1] = node_to_stop
    ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=tconf.VIEW_CHANGE_TIMEOUT)
    assert node_to_stop.view_changer.last_completed_view_no == new_view_no
def test_view_change_with_different_ic(looper, txnPoolNodeSet,
                                       sdk_pool_handle,
                                       sdk_wallet_client,
                                       tconf, tdir, allPluginsPath):
    """
    1. panic_node (Delta) send InstanceChange for all nodes.
    2. Restart nodes_to_restart (Beta, Gamma).
    3. nodes_to_restart send InstanceChanges for all nodes.
    4. Ensure elections done.
    """
    nodes_to_restart = txnPoolNodeSet[1:3]
    panic_node = txnPoolNodeSet[-1]
    view_no = txnPoolNodeSet[0].viewNo

    panic_node.view_changer.on_master_degradation()
    for n in nodes_to_restart:
        _restart_node(looper, txnPoolNodeSet, n, tconf, tdir, allPluginsPath)
    nodes_to_restart = txnPoolNodeSet[1:3]

    for n in nodes_to_restart:
        n.view_changer.on_master_degradation()

    def check():
        assert panic_node.view_change_in_progress

    looper.run(eventually(check))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    for node in txnPoolNodeSet:
        assert node.viewNo > view_no
def test_all_replicas_hold_request_keys(
        perf_chk_patched,
        looper,
        txnPoolNodeSet,
        sdk_wallet_client,
        sdk_pool_handle):
    """
    All replicas whether primary or non primary hold request keys of forwarded
    requests. Once requests are ordered, they request keys are removed from replica.
    """
    tconf = perf_chk_patched
    delay_3pc = 2
    delay_3pc_messages(txnPoolNodeSet, 0, delay_3pc)
    delay_3pc_messages(txnPoolNodeSet, 1, delay_3pc)

    def chk(count):
        # All replicas have same amount of forwarded request keys and all keys
        # are finalised.
        for node in txnPoolNodeSet:
            for r in node.replicas.values():
                if r.isPrimary is False:
                    assert len(r.requestQueues[DOMAIN_LEDGER_ID]) == count
                    for i in range(count):
                        k = r.requestQueues[DOMAIN_LEDGER_ID][i]
                        assert r.requests[k].finalised
                elif r.isPrimary is True:
                    assert len(r.requestQueues[DOMAIN_LEDGER_ID]) == 0

    reqs = sdk_signed_random_requests(looper,
                                      sdk_wallet_client,
                                      tconf.Max3PCBatchSize - 1)
    req_resps = sdk_send_signed_requests(sdk_pool_handle, reqs)
    # Only non primary replicas should have all request keys with them
    looper.run(eventually(chk, tconf.Max3PCBatchSize - 1))
    sdk_get_replies(looper, req_resps, timeout=sdk_eval_timeout(
        tconf.Max3PCBatchSize - 1, len(txnPoolNodeSet),
        add_delay_to_timeout=delay_3pc))
    # Replicas should have no request keys with them since they are ordered
    looper.run(eventually(chk, 0))  # Need to wait since one node might not
    # have processed it.

    delay = 1
    for node in txnPoolNodeSet:
        node.nodeIbStasher.delay(nom_delay(delay))

    ensure_view_change(looper, txnPoolNodeSet)
    reqs = sdk_signed_random_requests(looper,
                                      sdk_wallet_client,
                                      2 * tconf.Max3PCBatchSize)
    req_resps = sdk_send_signed_requests(sdk_pool_handle, reqs)
    looper.run(eventually(chk, 2 * tconf.Max3PCBatchSize))

    # Since each nomination is delayed and there will be multiple nominations
    # so adding some extra time
    timeout = waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) + \
              len(txnPoolNodeSet) * delay
    ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=timeout)
    sdk_get_replies(looper, req_resps, timeout=timeout)
    looper.run(eventually(chk, 0))
def test_view_change_after_max_catchup_rounds(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client):
    """
    The node should do only a fixed rounds of catchup. For this delay Prepares
    and Commits for 2 non-primary nodes by a large amount which is equivalent
    to loss of Prepares and Commits. Make sure 2 nodes have a different last
    prepared certificate from other two. Then do a view change, make sure view
    change completes and the pool does not process the request that were
    prepared by only a subset of the nodes
    """
    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                         sdk_wallet_client, 2 * 3, 3)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    ledger_summary = txnPoolNodeSet[0].ledger_summary

    slow_nodes = [r.node for r in getNonPrimaryReplicas(
        txnPoolNodeSet, 0)[-2:]]
    fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes]

    # Make node slow to process Prepares and Commits
    for node in slow_nodes:
        node.nodeIbStasher.delay(pDelay(120, 0))
        node.nodeIbStasher.delay(cDelay(120, 0))

    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 5)
    looper.runFor(3)

    ensure_view_change(looper, nodes=txnPoolNodeSet)

    def last_prepared(nodes):
        lst = [n.master_replica.last_prepared_certificate_in_view()
               for n in nodes]
        # All nodes have same last prepared
        assert check_if_all_equal_in_list(lst)
        return lst[0]

    last_prepared_slow = last_prepared(slow_nodes)
    last_prepared_fast = last_prepared(fast_nodes)

    # Check `slow_nodes` and `fast_nodes` set different last_prepared
    assert last_prepared_fast != last_prepared_slow

    # View change complete
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    # The requests which were prepared by only a subset of the nodes were
    # not ordered
    assert txnPoolNodeSet[0].ledger_summary == ledger_summary

    for node in slow_nodes:
        node.nodeIbStasher.reset_delays_and_process_delayeds()

    # Make sure pool is functional
    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                         sdk_wallet_client, 10, 2)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    last_prepared(txnPoolNodeSet)
def test_no_propagate_request_on_different_prepares_on_backup_before_vc(looper, txnPoolNodeSet,
                                                  sdk_pool_handle, sdk_wallet_client):
    ''' Send random request and do view change then fast_nodes (2,3 - with
    primary backup replica) will have prepare or send preprepare on backup
    replicas and slow_nodes are have not and transaction will ordered on all
    master replicas. Check last ordered after view change and after another
    one request.'''
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    slow_instance = 1
    slow_nodes = txnPoolNodeSet[1:3]
    fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes]
    nodes_stashers = [n.nodeIbStasher for n in slow_nodes]
    old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc
    with delay_rules(nodes_stashers, pDelay(instId=slow_instance)):
        with delay_rules(nodes_stashers, ppDelay(instId=slow_instance)):
            # send one request
            sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                      sdk_wallet_client, 1)
            old_view_no = txnPoolNodeSet[0].viewNo
            looper.run(
                eventually(is_prepared,
                           fast_nodes,
                           2,
                           slow_instance))

            # trigger view change on all nodes
            ensure_view_change(looper, txnPoolNodeSet)
            # wait for view change done on all nodes
            ensureElectionsDone(looper, txnPoolNodeSet)

    primary = getPrimaryReplica(txnPoolNodeSet, slow_instance).node
    non_primaries = [n for n in txnPoolNodeSet if n is not primary]

    check_last_ordered(non_primaries,
                       slow_instance,
                       (old_view_no, old_last_ordered[1] + 1))

    # Backup primary replica must not advance last_ordered_3pc
    # up to the master's value
    check_last_ordered([primary],
                       slow_instance,
                       (old_view_no, old_last_ordered[1]))

    check_last_ordered(txnPoolNodeSet,
                       txnPoolNodeSet[0].master_replica.instId,
                       (old_last_ordered[0], old_last_ordered[1] + 1))

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    looper.run(
        eventually(check_last_ordered,
                   txnPoolNodeSet,
                   slow_instance,
                   (txnPoolNodeSet[0].viewNo, 1)))
    assert all(0 == node.spylog.count(node.request_propagates)
               for node in txnPoolNodeSet)
def test_node_notified_about_primary_election_result(txnPoolNodeSet, looper):
    old_counts = {node.name: get_count(
        node, node.primary_selected) for node in txnPoolNodeSet}
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    for node in txnPoolNodeSet:
        assert get_count(node, node.primary_selected) > old_counts[node.name]
Exemple #26
0
def changeNodeHa(looper, txnPoolNodeSet,
                 tconf, shouldBePrimary, tdir,
                 sdk_pool_handle, sdk_wallet_stewards,
                 sdk_wallet_client):
    # prepare new ha for node and client stack
    subjectedNode = None
    node_index = None

    for nodeIndex, n in enumerate(txnPoolNodeSet):
        if shouldBePrimary == n.has_master_primary:
            subjectedNode = n
            node_index = nodeIndex
            break

    nodeStackNewHA, clientStackNewHA = genHa(2)
    logger.debug("change HA for node: {} to {}".format(
        subjectedNode.name, (nodeStackNewHA, clientStackNewHA)))

    # change HA
    sdk_wallet_steward = sdk_wallet_stewards[node_index]
    node_dest = hexToFriendly(subjectedNode.nodestack.verhex)
    sdk_send_update_node(looper, sdk_wallet_steward,
                         sdk_pool_handle,
                         node_dest, subjectedNode.name,
                         nodeStackNewHA[0], nodeStackNewHA[1],
                         clientStackNewHA[0], clientStackNewHA[1],
                         services=[VALIDATOR])

    # stop node for which HA will be changed
    subjectedNode.stop()
    looper.removeProdable(subjectedNode)

    # start node with new HA
    config_helper = PNodeConfigHelper(subjectedNode.name, tconf, chroot=tdir)
    restartedNode = TestNode(subjectedNode.name,
                             config_helper=config_helper,
                             config=tconf, ha=nodeStackNewHA,
                             cliha=clientStackNewHA)
    looper.add(restartedNode)
    txnPoolNodeSet[nodeIndex] = restartedNode
    looper.run(checkNodesConnected(txnPoolNodeSet, customTimeout=70))


    electionTimeout = waits.expectedPoolElectionTimeout(
        nodeCount=len(txnPoolNodeSet),
        numOfReelections=3)
    ensureElectionsDone(looper,
                        txnPoolNodeSet,
                        retryWait=1,
                        customTimeout=electionTimeout)

    sdk_pool_refresh(looper, sdk_pool_handle)
    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              8)
def all_nodes_view_change(
        looper,
        txnPoolNodeSet,
        sdk_pool_handle,
        sdk_wallet_client):
    for _ in range(5):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2)
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_primary_selection_after_primary_demotion_and_pool_restart(looper,
                                                                   txnPoolNodeSet,
                                                                   sdk_pool_handle,
                                                                   sdk_wallet_steward,
                                                                   txnPoolMasterNodes,
                                                                   tdir, tconf):
    """
    Demote primary and restart the pool.
    Pool should select new primary and have viewNo=0 after restart.
    """

    logger.info("1. turn off the node which has primary replica for master instanse")
    master_node = txnPoolMasterNodes[0]
    node_dest = hexToFriendly(master_node.nodestack.verhex)
    sdk_send_update_node(looper, sdk_wallet_steward,
                         sdk_pool_handle,
                         node_dest, master_node.name,
                         None, None,
                         None, None,
                         services=[])

    restNodes = [node for node in txnPoolNodeSet if node.name != master_node.name]
    ensureElectionsDone(looper, restNodes)

    # ensure pool is working properly


    logger.info("2. restart pool")
    # Stopping existing nodes
    for node in txnPoolNodeSet:
        node.stop()
        looper.removeProdable(node)

    # Starting nodes again by creating `Node` objects since that simulates
    # what happens when starting the node with script
    restartedNodes = []
    for node in txnPoolNodeSet:
        config_helper = PNodeConfigHelper(node.name, tconf, chroot=tdir)
        restartedNode = TestNode(node.name,
                                 config_helper=config_helper,
                                 config=tconf, ha=node.nodestack.ha,
                                 cliha=node.clientstack.ha)
        looper.add(restartedNode)
        restartedNodes.append(restartedNode)

    restNodes = [node for node in restartedNodes if node.name != master_node.name]

    looper.run(checkNodesConnected(restNodes))
    ensureElectionsDone(looper, restNodes)
    checkViewNoForNodes(restNodes, 0)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 3)

    primariesIdxs = getPrimaryNodesIdxs(restNodes)
    assert restNodes[primariesIdxs[0]].name != master_node.name
def test_primary_selection_after_demoted_primary_node_promotion(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward,
        txnPoolMasterNodes):
    """
    Demote primary of master instance, wait for view change and promote it back.
    Check primaries for instances.
    """
    assert len(txnPoolNodeSet) == 4

    # Check primaries after test setup.
    primariesIdxs = getPrimaryNodesIdxs(txnPoolNodeSet)
    assert len(primariesIdxs) == 2
    assert primariesIdxs[0] == 0
    assert primariesIdxs[1] == 1

    master_node = txnPoolMasterNodes[0]

    # Demote primary of master instance.
    node_dest = hexToFriendly(master_node.nodestack.verhex)
    sdk_send_update_node(looper, sdk_wallet_steward,
                         sdk_pool_handle,
                         node_dest, master_node.name,
                         None, None,
                         None, None,
                         services=[])

    restNodes = [node for node in txnPoolNodeSet if node.name != master_node.name]
    ensureElectionsDone(looper, restNodes)

    # Check that there is only one instance now, check it's primary.
    primariesIdxs = getPrimaryNodesIdxs(restNodes)
    assert len(primariesIdxs) == 1
    assert primariesIdxs[0] == 1

    # Ensure pool is working properly.
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 3)

    # Promote demoted node back.
    sdk_send_update_node(looper, sdk_wallet_steward,
                         sdk_pool_handle,
                         node_dest, master_node.name,
                         None, None,
                         None, None,
                         services=[VALIDATOR])

    # Ensure pool is working properly.
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 3)

    # Check that there are two instances again, check their primaries.
    primariesIdxs = getPrimaryNodesIdxs(txnPoolNodeSet)
    assert len(primariesIdxs) == 2
    assert primariesIdxs[0] == 2
    assert primariesIdxs[1] == 3
Exemple #30
0
def changeNodeHa(looper, txnPoolNodeSet, tdirWithPoolTxns,
                 poolTxnData, poolTxnStewardNames, tconf, shouldBePrimary):

    # prepare new ha for node and client stack
    subjectedNode = None
    stewardName = None
    stewardsSeed = None

    for nodeIndex, n in enumerate(txnPoolNodeSet):
        if (shouldBePrimary and n.primaryReplicaNo == 0) or \
                (not shouldBePrimary and n.primaryReplicaNo != 0):
            subjectedNode = n
            stewardName = poolTxnStewardNames[nodeIndex]
            stewardsSeed = poolTxnData["seeds"][stewardName].encode()
            break

    nodeStackNewHA, clientStackNewHA = genHa(2)
    logger.debug("change HA for node: {} to {}".
                 format(subjectedNode.name, (nodeStackNewHA, clientStackNewHA)))

    nodeSeed = poolTxnData["seeds"][subjectedNode.name].encode()

    # change HA
    stewardClient, req = changeHA(looper, tconf, subjectedNode.name, nodeSeed,
                                  nodeStackNewHA, stewardName, stewardsSeed)
    f = getMaxFailures(len(stewardClient.nodeReg))
    looper.run(eventually(checkSufficientRepliesRecvd, stewardClient.inBox,
                          req.reqId, f, retryWait=1, timeout=20))

    # stop node for which HA will be changed
    subjectedNode.stop()
    looper.removeProdable(subjectedNode)

    # start node with new HA
    restartedNode = TestNode(subjectedNode.name, basedirpath=tdirWithPoolTxns,
                             config=tconf, ha=nodeStackNewHA,
                             cliha=clientStackNewHA)
    looper.add(restartedNode)

    txnPoolNodeSet[nodeIndex] = restartedNode
    looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=70))
    ensureElectionsDone(looper, txnPoolNodeSet, retryWait=1, timeout=10)

    # start client and check the node HA
    anotherClient, _ = genTestClient(tmpdir=tdirWithPoolTxns,
                                     usePoolLedger=True)
    looper.add(anotherClient)
    looper.run(eventually(anotherClient.ensureConnectedToNodes))
    stewardWallet = Wallet(stewardName)
    stewardWallet.addIdentifier(signer=SimpleSigner(seed=stewardsSeed))
    sendReqsToNodesAndVerifySuffReplies(looper, stewardWallet, stewardClient, 8)
    looper.run(eventually(checkIfGenesisPoolTxnFileUpdated, *txnPoolNodeSet,
                          stewardClient, anotherClient, retryWait=1,
                          timeout=10))
    looper.removeProdable(stewardClient)
def testZStackNodeReconnection(tconf, looper, txnPoolNodeSet, client1, wallet1,
                               tdir, client1Connected):
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1)

    npr = [n for n in txnPoolNodeSet if not n.hasPrimary]
    nodeToCrash = npr[0]
    idxToCrash = txnPoolNodeSet.index(nodeToCrash)
    otherNodes = [_ for _ in txnPoolNodeSet if _ != nodeToCrash]

    def checkFlakyConnected(conn=True):
        for node in otherNodes:
            if conn:
                assert nodeToCrash.nodestack.name in node.nodestack.connecteds
            else:
                assert nodeToCrash.nodestack.name not in node.nodestack.connecteds

    checkFlakyConnected(True)
    nodeToCrash.stop()
    logger.debug('Stopped node {}'.format(nodeToCrash))
    looper.removeProdable(nodeToCrash)
    looper.runFor(1)
    stopNodes([nodeToCrash], looper)
    # TODO Select or create the timeout from 'waits'. Don't use constant.
    looper.run(eventually(checkFlakyConnected, False, retryWait=1, timeout=60))

    looper.runFor(1)
    config_helper = PNodeConfigHelper(nodeToCrash.name, tconf, chroot=tdir)
    node = TestNode(nodeToCrash.name,
                    ledger_dir=config_helper.ledger_dir,
                    keys_dir=config_helper.keys_dir,
                    genesis_dir=config_helper.genesis_dir,
                    plugins_dir=config_helper.plugins_dir,
                    config=tconf,
                    ha=nodeToCrash.nodestack.ha,
                    cliha=nodeToCrash.clientstack.ha)
    looper.add(node)
    txnPoolNodeSet[idxToCrash] = node

    # TODO Select or create the timeout from 'waits'. Don't use constant.
    looper.run(eventually(checkFlakyConnected, True, retryWait=2, timeout=50))
    ensureElectionsDone(looper, txnPoolNodeSet, retryWait=2)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 10)
Exemple #32
0
def testClientConnectToRestartedNodes(looper, txnPoolNodeSet, tdirWithPoolTxns,
                                      poolTxnClientNames, poolTxnData, tconf,
                                      poolTxnNodeNames, allPluginsPath):
    name = poolTxnClientNames[-1]
    newClient, w = genTestClient(tmpdir=tdirWithPoolTxns,
                                 nodes=txnPoolNodeSet,
                                 name=name,
                                 usePoolLedger=True)
    looper.add(newClient)
    ensureClientConnectedToNodesAndPoolLedgerSame(looper, newClient,
                                                  *txnPoolNodeSet)
    sendReqsToNodesAndVerifySuffReplies(looper, w, newClient, 1, 1)
    for node in txnPoolNodeSet:
        node.stop()
        looper.removeProdable(node)

    # looper.run(newClient.ensureDisconnectedToNodes(timeout=60))
    txnPoolNodeSet = []
    for nm in poolTxnNodeNames:
        node = TestNode(nm,
                        basedirpath=tdirWithPoolTxns,
                        base_data_dir=tdirWithPoolTxns,
                        config=tconf,
                        pluginPaths=allPluginsPath)
        looper.add(node)
        txnPoolNodeSet.append(node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    def chk():
        for node in txnPoolNodeSet:
            assert node.isParticipating

    timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet))
    looper.run(eventually(chk, retryWait=1, timeout=timeout))

    bootstrapClientKeys(w.defaultId, w.getVerkey(), txnPoolNodeSet)

    req = sendRandomRequest(w, newClient)
    waitForSufficientRepliesForRequests(looper, newClient, requests=[req])
    ensureClientConnectedToNodesAndPoolLedgerSame(looper, newClient,
                                                  *txnPoolNodeSet)

    sendReqsToNodesAndVerifySuffReplies(looper, w, newClient, 3, 1)
def testNodesConnectsWhenOneNodeIsLate(allPluginsPath, tdir_for_func, tconf_for_func,
                                       looper_without_nodeset_for_func,
                                       nodeReg):
    looper = looper_without_nodeset_for_func
    initLocalKeys(tdir_for_func, tconf_for_func, nodeReg)

    nodes = []
    names = list(nodeReg.keys())
    logger.debug("Node names: {}".format(names))

    def create(name):
        config_helper = PNodeConfigHelper(name, tconf_for_func, chroot=tdir_for_func)
        node = TestNode(name, nodeReg,
                        config_helper=config_helper,
                        config=tconf_for_func,
                        pluginPaths=allPluginsPath)
        nodes.append(node)
        return node

    for name in names:
        create(name)

    logger.debug("Creating keys")

    for node in nodes:
        tellKeysToOthers(node, nodes)

    for node in nodes[:3]:
        looper.add(node)

    looper.run(checkNodesConnected(nodes[:3]))

    # wait for the election to complete with the first three nodes
    ensureElectionsDone(looper, nodes[:3], instances_list=range(2))

    # start the fourth and see that it learns who the primaries are
    # from the other nodes
    looper.add(nodes[3])

    # ensure election is done for updated pool
    ensureElectionsDone(looper, nodes)
    stopNodes(nodes, looper)
    for node in nodes:
        looper.removeProdable(node)
def test_order_after_demote_and_restart(looper, txnPoolNodeSet,
                                        sdk_pool_handle, sdk_wallet_client,
                                        tdir, tconf, allPluginsPath,
                                        sdk_wallet_stewards):
    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet,
                                         sdk_pool_handle, sdk_wallet_client, 3,
                                         3)

    primary_node = txnPoolNodeSet[0]
    node_to_stop = txnPoolNodeSet[1]
    node_to_demote = txnPoolNodeSet[2]
    txnPoolNodeSet.remove(node_to_demote)

    node_to_stop.cleanupOnStopping = True
    node_to_stop.stop()
    looper.removeProdable(node_to_stop)
    ensure_node_disconnected(looper, node_to_stop, txnPoolNodeSet, timeout=2)

    demote_node(looper, sdk_wallet_stewards[2], sdk_pool_handle,
                node_to_demote)

    config_helper = PNodeConfigHelper(node_to_stop.name, tconf, chroot=tdir)
    restarted_node = TestNode(node_to_stop.name,
                              config_helper=config_helper,
                              config=tconf,
                              pluginPaths=allPluginsPath,
                              ha=node_to_stop.nodestack.ha,
                              cliha=node_to_stop.clientstack.ha)
    looper.add(restarted_node)
    txnPoolNodeSet[1] = restarted_node
    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper,
                        nodes=txnPoolNodeSet,
                        check_primaries=False)

    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet,
                                         sdk_pool_handle, sdk_wallet_client, 1,
                                         1)

    def get_current_bls_keys(node):
        return node.master_replica._bls_bft_replica._bls_bft.bls_key_register._current_bls_keys

    assert get_current_bls_keys(restarted_node) == get_current_bls_keys(
        primary_node)
Exemple #35
0
def check_view_change_adding_new_node(looper,
                                      tdir,
                                      tconf,
                                      allPluginsPath,
                                      txnPoolNodeSet,
                                      sdk_pool_handle,
                                      sdk_wallet_client,
                                      sdk_wallet_steward,
                                      slow_nodes=[],
                                      delay_commit=False,
                                      delay_pre_prepare=False):
    # Pre-requisites: viewNo=3, Primary is Node4
    for viewNo in range(1, 4):
        trigger_view_change(txnPoolNodeSet)
        waitForViewChange(looper, txnPoolNodeSet, viewNo)
        ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30)

    # Delay 3PC messages on slow nodes
    fast_nodes = [node for node in txnPoolNodeSet if node not in slow_nodes]
    slow_stashers = [slow_node.nodeIbStasher for slow_node in slow_nodes]
    delayers = []
    if delay_pre_prepare:
        delayers.append(ppDelay())
        delayers.append(msg_rep_delay(types_to_delay=[PREPREPARE]))
    if delay_commit:
        delayers.append(cDelay())

    with delay_rules_without_processing(slow_stashers, *delayers):
        # Add Node5
        new_node = add_new_node(looper, fast_nodes, sdk_pool_handle,
                                sdk_wallet_steward, tdir, tconf,
                                allPluginsPath)
        old_set = list(txnPoolNodeSet)
        txnPoolNodeSet.append(new_node)

        # Trigger view change
        trigger_view_change(txnPoolNodeSet)

        # make sure view change is finished eventually
        waitForViewChange(looper, old_set, 4)
        ensureElectionsDone(looper, old_set)

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
def test_no_propagate_request_on_different_last_ordered_on_master_before_vc(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    ''' Send random request and do view change then fast_nodes (1, 4 - without
    primary after next view change) are already ordered transaction on master
    and slow_nodes are not. Check ordering on slow_nodes.'''
    global batches_count
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    batches_count += 1
    master_instance = txnPoolNodeSet[0].master_replica.instId
    slow_nodes = txnPoolNodeSet[1:3]
    fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes]
    nodes_stashers = [n.nodeIbStasher for n in slow_nodes]
    old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc
    assert batches_count == old_last_ordered[1]
    with delay_rules(nodes_stashers, cDelay()):
        # send one request
        requests = sdk_send_random_requests(looper, sdk_pool_handle,
                                            sdk_wallet_client, 1)
        batches_count += 1
        last_ordered_for_slow = slow_nodes[0].master_replica.last_ordered_3pc
        old_view_no = txnPoolNodeSet[0].viewNo
        looper.run(
            eventually(check_last_ordered, fast_nodes, master_instance,
                       (old_view_no, batches_count)))

        # trigger view change on all nodes
        ensure_view_change(looper, txnPoolNodeSet)
        # wait for view change done on all nodes
        ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=60)

        batches_count += 1

    replies = sdk_get_replies(looper, requests)
    for reply in replies:
        sdk_check_reply(reply)

    # a new primary will send a PrePrepare for the new view
    looper.run(
        eventually(check_last_ordered, txnPoolNodeSet, master_instance,
                   (old_view_no + 1, batches_count)))
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    assert all(0 == node.spylog.count(node.request_propagates)
               for node in txnPoolNodeSet)
Exemple #37
0
def test_view_change_not_happen_if_ic_is_discarded(looper, txnPoolNodeSet,
                                                   sdk_pool_handle,
                                                   sdk_wallet_client,
                                                   tconf, tdir, allPluginsPath):
    """
    1. panic_node (Delta) send InstanceChange for all nodes.
    2. Restart nodes_to_restart (Beta, Gamma).
    3. Wait OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL sec.
    4. nodes_to_restart send InstanceChanges for all nodes.
    5. View change doesn't happen since old InstanceChange from panic_node was discarded due to timeout.
    5. Ensure elections done
    """
    nodes_to_restart = txnPoolNodeSet[1:3]
    panic_node = txnPoolNodeSet[-1]
    view_no = txnPoolNodeSet[0].viewNo

    panic_node.view_changer.on_master_degradation()
    for n in nodes_to_restart:
        restart_node(looper, txnPoolNodeSet, n, tconf, tdir, allPluginsPath)
    nodes_to_restart = txnPoolNodeSet[1:3]

    # waiting to discard InstanceChange
    def check_old_ic_discarded():
        vct_services = [n.master_replica._view_change_trigger_service for n in txnPoolNodeSet]
        assert all(not vct_service._instance_changes.has_inst_chng_from(view_no + 1, panic_node.name)
                   for vct_service in vct_services)

    looper.run(eventually(check_old_ic_discarded, timeout=tconf.OUTDATED_INSTANCE_CHANGES_CHECK_INTERVAL + 1))

    for n in nodes_to_restart:
        n.view_changer.on_master_degradation()

    def check_ic():
        for node in txnPoolNodeSet:
            vct_service = node.master_replica._view_change_trigger_service
            assert all(vct_service._instance_changes.has_inst_chng_from(view_no + 1, n.name)
                       for n in nodes_to_restart)

    looper.run(eventually(check_ic))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    for node in txnPoolNodeSet:
        assert node.viewNo == view_no
Exemple #38
0
def test_replica_removing_with_primary_disconnected(looper, txnPoolNodeSet,
                                                    sdk_pool_handle,
                                                    sdk_wallet_client, tconf,
                                                    tdir, allPluginsPath):
    """
    1. Remove backup primary node.
    2. Check that replicas with the disconnected primary were removed.
    3. Recover the removed node.
    4. Start View Change.
    5. Check that all replicas were restored.
    """
    start_replicas_count = txnPoolNodeSet[0].replicas.num_replicas
    instance_to_remove = 1
    node = txnPoolNodeSet[instance_to_remove]
    # remove backup primary node.
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node)
    txnPoolNodeSet.remove(node)
    looper.removeProdable(node)

    # check that replicas were removed
    def check_replica_removed_on_all_nodes():
        for node in txnPoolNodeSet:
            check_replica_removed(node, start_replicas_count,
                                  instance_to_remove)

    looper.run(
        eventually(check_replica_removed_on_all_nodes,
                   timeout=tconf.TolerateBackupPrimaryDisconnection * 4))
    assert not node.monitor.isMasterDegraded()
    assert len(node.requests) == 0

    # recover the removed node
    node = start_stopped_node(node, looper, tconf, tdir, allPluginsPath)
    txnPoolNodeSet.append(node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    # start View Change
    trigger_view_change(txnPoolNodeSet)
    waitForViewChange(looper,
                      txnPoolNodeSet,
                      expectedViewNo=1,
                      customTimeout=2 * tconf.NEW_VIEW_TIMEOUT)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    # check that all replicas were restored
    assert start_replicas_count == node.replicas.num_replicas
Exemple #39
0
def test_view_change_with_instance_change_lost_due_to_restarts(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf,
        tdir, allPluginsPath):
    """
    1. some_nodes (Beta and Gamma) send InstanceChange for all nodes.
    2. Restart other_nodes (Gamma and Delta)
    3. last_node (Delta) send InstanceChange for all nodes.
    4. Ensure elections done and pool is functional
    """
    current_view_no = txnPoolNodeSet[0].viewNo
    some_nodes = txnPoolNodeSet[1:3]
    other_nodes = txnPoolNodeSet[2:4]

    for n in some_nodes:
        send_test_instance_change(n)

    def check_ic_delivery():
        for node in txnPoolNodeSet:
            vct_service = node.master_replica._view_change_trigger_service
            assert all(
                vct_service._instance_changes.has_inst_chng_from(
                    current_view_no + 1, sender.name) for sender in some_nodes)

    looper.run(eventually(check_ic_delivery))

    restart_nodes(looper,
                  txnPoolNodeSet,
                  other_nodes,
                  tconf,
                  tdir,
                  allPluginsPath,
                  start_one_by_one=False)

    last_node = txnPoolNodeSet[-1]
    send_test_instance_change(last_node)
    waitForViewChange(looper,
                      txnPoolNodeSet,
                      current_view_no + 1,
                      customTimeout=3 * FRESHNESS_TIMEOUT)

    ensureElectionsDone(looper, txnPoolNodeSet)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
Exemple #40
0
def txnPoolNodeSet(patchPluginManager,
                   txnPoolNodesLooper,
                   tdirWithPoolTxns,
                   tdirWithDomainTxns,
                   tconf,
                   poolTxnNodeNames,
                   allPluginsPath,
                   tdirWithNodeKeepInited,
                   testNodeClass):
    nodes = []
    for nm in poolTxnNodeNames:
        node = testNodeClass(nm, basedirpath=tdirWithPoolTxns,
                             config=tconf, pluginPaths=allPluginsPath)
        txnPoolNodesLooper.add(node)
        nodes.append(node)
    txnPoolNodesLooper.run(checkNodesConnected(nodes))
    ensureElectionsDone(looper=txnPoolNodesLooper, nodes=nodes, retryWait=1,
                        timeout=20)
    return nodes
Exemple #41
0
def test_master_primary_different_from_previous_view_for_itself(
        txnPoolNodeSet, looper, client1, wallet1):
    """
    After a view change, primary must be different from previous primary for
    master instance, it does not matter for other instance. Break it into
    2 tests, one where the primary is malign and votes for itself but is still
    not made primary in the next view.
    """
    old_view_no = checkViewNoForNodes(txnPoolNodeSet)
    pr = slow_primary(txnPoolNodeSet, 0, delay=10)
    old_pr_node = pr.node

    def _get_undecided_inst_id(self):
        undecideds = [i for i, r in enumerate(self.replicas)
                      if r.isPrimary is None]
        # Try to nominate for the master instance
        return undecideds, 0

    # Patching old primary's elector's method to nominate itself
    # again for the the new view
    old_pr_node.elector._get_undecided_inst_id = types.MethodType(
        _get_undecided_inst_id, old_pr_node.elector)

    # View change happens
    provoke_and_wait_for_view_change(looper,
                                     txnPoolNodeSet,
                                     old_view_no + 1,
                                     wallet1,
                                     client1)

    # Elections done
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    # New primary is not same as old primary
    assert getPrimaryReplica(txnPoolNodeSet, 0).node.name != old_pr_node.name

    # All other nodes discarded the nomination by the old primary
    for node in txnPoolNodeSet:
        if node != old_pr_node:
            assert countDiscarded(node.elector,
                                  'of master in previous view too') == 1

    # The new primary can still process requests
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5)
Exemple #42
0
def testSelfNominationDelay(tdir_for_func):
    nodeNames = ["testA", "testB", "testC", "testD"]
    with TestNodeSet(names=nodeNames, tmpdir=tdir_for_func) as nodeSet:
        with Looper(nodeSet) as looper:
            prepareNodeSet(looper, nodeSet)

            delay = 30
            # Add node A
            nodeA = addNodeBack(nodeSet, looper, nodeNames[0])
            nodeA.delaySelfNomination(delay)

            nodesBCD = []
            for name in nodeNames[1:]:
                # nodesBCD.append(nodeSet.addNode(name, i+1, AutoMode.never))
                nodesBCD.append(addNodeBack(nodeSet, looper, name))

            # Ensuring that NodeA is started before any other node to demonstrate
            # that it is delaying self nomination
            looper.run(
                eventually(lambda: assertExp(nodeA.isReady()),
                           retryWait=1,
                           timeout=5))

            # Elections should be done
            ensureElectionsDone(looper=looper,
                                nodes=nodeSet,
                                retryWait=1,
                                timeout=10)

            # node A should not have any primary replica
            looper.run(
                eventually(lambda: assertExp(not nodeA.hasPrimary),
                           retryWait=1,
                           timeout=10))

            # Make sure that after at the most 30 seconds, nodeA's
            # `startElection` is called
            looper.run(
                eventually(lambda: assertExp(
                    len(nodeA.spylog.getAll(Node.decidePrimaries.__name__)) > 0
                ),
                           retryWait=1,
                           timeout=30))
Exemple #43
0
def test_view_change_with_next_primary_stopped(looper, txnPoolNodeSet,
                                               sdk_pool_handle,
                                               sdk_wallet_client):
    old_view_no = checkViewNoForNodes(txnPoolNodeSet)
    next_primary = get_next_primary_name(txnPoolNodeSet, old_view_no + 1)
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet,
                                            next_primary)
    remaining_nodes = [
        node for node in txnPoolNodeSet if node.name != next_primary
    ]
    trigger_view_change(remaining_nodes, old_view_no + 1)
    ensureElectionsDone(looper,
                        remaining_nodes,
                        instances_list=range(2),
                        customTimeout=15)
    sdk_ensure_pool_functional(looper, remaining_nodes, sdk_wallet_client,
                               sdk_pool_handle)
    current_view_no = checkViewNoForNodes(remaining_nodes)
    assert current_view_no == old_view_no + 2
def test_new_primary_lagging_behind(looper,
                                    txnPoolNodeSet,
                                    sdk_wallet_client,
                                    sdk_pool_handle,
                                    tconf):
    initial_view_no = checkViewNoForNodes(txnPoolNodeSet)
    next_primary_name = get_next_primary_name(txnPoolNodeSet, initial_view_no + 1)
    next_primary = [n for n in txnPoolNodeSet if n.name == next_primary_name][0]
    expected_primary_name = get_next_primary_name(txnPoolNodeSet, initial_view_no + 2)
    # Next primary cannot stabilize 1 checkpoint
    with delay_rules_without_processing(next_primary.nodeIbStasher, cDelay(), pDelay()):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ)
        ensure_view_change(looper, txnPoolNodeSet)
        ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet,
                            customTimeout=2 * tconf.NEW_VIEW_TIMEOUT)

    assert next_primary_name != expected_primary_name
    assert checkViewNoForNodes(txnPoolNodeSet) == initial_view_no + 2
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_nodes_removes_request_keys_for_ordered(setup, looper, txnPoolNodeSet,
                                                sdk_pool_handle,
                                                sdk_wallet_client):
    """
    A node does not order requests since it is missing some 3PC messages,
    gets them from catchup. It then clears them from its request queues
    """
    slow_node, fast_nodes = setup

    reqs = sdk_json_couples_to_request_list(
        send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                               sdk_pool_handle,
                                               sdk_wallet_client, 10, 5))
    ensure_all_nodes_have_same_data(looper, fast_nodes)
    assert slow_node.master_replica.last_ordered_3pc != \
           fast_nodes[0].master_replica.last_ordered_3pc

    def chk(key, nodes, present):
        for node in nodes:
            assert (key in node.master_replica._ordering_service.
                    requestQueues[DOMAIN_LEDGER_ID]) == present

    for req in reqs:
        chk(req.digest, fast_nodes, False)
        chk(req.digest, [slow_node], True)

    # Reset catchup reply delay so that  catchup can complete
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(
        CatchupRep.typename)

    old_last_ordered = fast_nodes[0].master_replica.last_ordered_3pc

    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)

    ensure_all_nodes_have_same_data(looper, fast_nodes)
    assert slow_node.master_replica.last_ordered_3pc == old_last_ordered

    for req in reqs:
        chk(req.digest, txnPoolNodeSet, False)

    # Needed for the next run due to the parametrised fixture
    slow_node.reset_delays_and_process_delayeds()
Exemple #46
0
def newNodeAdded(looper, nodeSet, tdir, tconf, sdk_pool_handle,
                 sdk_wallet_trustee, allPluginsPath):
    view_no = nodeSet[0].viewNo
    new_steward_wallet, new_node = sdk_node_theta_added(
        looper,
        nodeSet,
        tdir,
        tconf,
        sdk_pool_handle,
        sdk_wallet_trustee,
        allPluginsPath,
        node_config_helper_class=NodeConfigHelper,
        testNodeClass=TestNode,
        name='')
    waitForViewChange(looper=looper,
                      txnPoolNodeSet=nodeSet,
                      expectedViewNo=view_no + 1)
    ensureElectionsDone(looper=looper, nodes=nodeSet)
    return new_steward_wallet, new_node
def test_view_change_restarted_by_timeout_if_next_primary_disconnected(
        txnPoolNodeSet, looper, tconf, setup):
    """
    Verifies that a view change is restarted by timeout
    if the next primary has been disconnected
    """
    _, initial_view_no, timeout_callback_stats = setup

    start_view_change(txnPoolNodeSet, initial_view_no + 1)
    alive_nodes = stop_master_primary(txnPoolNodeSet, initial_view_no + 1)

    ensureElectionsDone(looper=looper, nodes=alive_nodes, instances_list=range(3))

    # There were 2 view changes
    for node in alive_nodes:
        assert (node.viewNo - initial_view_no) == 2

    # The timeout method was called 1 time
    check_watchdog_called_expected_times(txnPoolNodeSet, timeout_callback_stats, 1)
Exemple #48
0
def test_select_primary_after_removed_backup(txnPoolNodeSet, looper,
                                             sdk_pool_handle,
                                             sdk_wallet_client):
    """
    Check correct order of primaries on backup replicas
    """

    node = txnPoolNodeSet[0]
    start_replicas_count = node.replicas.num_replicas
    instance_id = start_replicas_count - 1
    node.replicas.remove_replica(instance_id)
    for node in txnPoolNodeSet:
        node.view_changer.on_master_degradation()
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    for n in txnPoolNodeSet:
        assert n.requiredNumberOfInstances == n.replicas.num_replicas
        for inst_id in range(n.requiredNumberOfInstances):
            assert n.replicas[inst_id].primaryName == \
                   txnPoolNodeSet[inst_id + 1].name + ":" + str(inst_id)
Exemple #49
0
def test_set_H_greater_then_last_ppseqno(looper, txnPoolNodeSet,
                                         sdk_pool_handle, sdk_wallet_steward,
                                         tdir, tconf, allPluginsPath):
    # send LOG_SIZE requests and check, that all watermarks on all replicas is not changed
    # and now is (0, LOG_SIZE)
    """Send random requests for moving watermarks"""
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, LOG_SIZE)
    # check, that all of node set up watermark greater, then default and
    # ppSeqNo with number LOG_SIZE + 1 will be out from default watermark
    assert txnPoolNodeSet[0].replicas[1].last_ordered_3pc[1] == LOG_SIZE
    for n in txnPoolNodeSet:
        for r in n.replicas._replicas.values():
            assert r.h >= LOG_SIZE
            assert r.H >= LOG_SIZE + LOG_SIZE
    """Adding new node, for scheduling propagate primary procedure"""
    new_node = add_new_node(looper, txnPoolNodeSet, sdk_pool_handle,
                            sdk_wallet_steward, tdir, tconf, allPluginsPath)
    ensure_all_nodes_have_same_data(
        looper,
        txnPoolNodeSet,
        exclude_from_check=['check_last_ordered_3pc_backup'])
    """Check, that backup replicas set watermark as (0, maxInt)"""
    # Check, replica.h is set from last_ordered_3PC and replica.H is set to maxsize
    for r in new_node.replicas.values():
        assert r.h == r.last_ordered_3pc[1]
        if r.isMaster:
            assert r.H == r.last_ordered_3pc[1] + LOG_SIZE
        else:
            assert r.H == sys.maxsize
    """Send requests and check. that backup replicas does not stashing it by outside watermarks reason"""
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 1)
    # check, that there is no any stashed "outside watermark" messages.
    for r in new_node.replicas.values():
        assert r.stasher.stash_size(STASH_WATERMARKS) == 0
    """Force view change and check, that all backup replicas will reset watermarks"""
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)
    for r in new_node.replicas.values():
        if not r.isMaster:
            assert r.h == 0
            assert r.H == LOG_SIZE
def test_old_non_primary_restart_after_view_change(new_node_in_correct_view,
                                                   looper, txnPoolNodeSet,
                                                   tdir, allPluginsPath, tconf,
                                                   wallet1, client1):
    """
    An existing non-primary node crashes and then view change happens,
    the crashed node comes back up after view change
    """
    node_to_stop = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    old_view_no = node_to_stop.viewNo

    # Stop non-primary
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            node_to_stop,
                                            stopNode=True)
    looper.removeProdable(node_to_stop)
    remaining_nodes = list(set(txnPoolNodeSet) - {node_to_stop})

    # Send some requests before view change
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5)
    ensure_view_change(looper, remaining_nodes)
    ensureElectionsDone(looper, remaining_nodes)
    # Send some requests after view change
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5)

    restarted_node = start_stopped_node(node_to_stop, looper, tconf, tdir,
                                        allPluginsPath)
    txnPoolNodeSet = remaining_nodes + [restarted_node]
    looper.run(
        eventually(checkViewNoForNodes,
                   txnPoolNodeSet,
                   old_view_no + 1,
                   timeout=10))
    assert len(
        getAllReturnVals(
            restarted_node.view_changer,
            restarted_node.view_changer._start_view_change_if_possible,
            compare_val_to=True)) > 0

    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)
    assert not restarted_node.view_changer._next_view_indications
Exemple #51
0
def test_first_audit_catchup_during_ordering(monkeypatch,
                                             looper, tconf, tdir, allPluginsPath, txnPoolNodeSet,
                                             sdk_pool_handle, sdk_wallet_client):
    # 1. patch primaries in audit ledger
    for n in txnPoolNodeSet:
        patch_primaries_in_audit(n, monkeypatch)

    # 2. order a txn
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)

    # 3. restart Nodes 3 and 4
    restart_nodes(looper, txnPoolNodeSet, txnPoolNodeSet[2:], tconf, tdir, allPluginsPath, start_one_by_one=False)
    for n in txnPoolNodeSet[2:]:
        patch_primaries_in_audit(n, monkeypatch)

    # 5. make sure that all node have equal Priamries and can order
    ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=20)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_set_H_as_maxsize_for_backup_if_is_primary(looper, txnPoolNodeSet,
                                                   sdk_pool_handle,
                                                   sdk_wallet_steward, tconf,
                                                   tdir, allPluginsPath):
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)
    primary_on_backup = txnPoolNodeSet[2]
    assert primary_on_backup.replicas._replicas[1].isPrimary

    # Stop Node
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            primary_on_backup,
                                            stopNode=True)
    txnPoolNodeSet.remove(primary_on_backup)
    looper.removeProdable(primary_on_backup)

    # Start stopped Node
    primary_on_backup = start_stopped_node(primary_on_backup, looper, tconf,
                                           tdir, allPluginsPath)

    # Delay 3PC messages so that when restarted node does not have them ordered
    with delay_rules(primary_on_backup.nodeIbStasher, delay_3pc()):
        txnPoolNodeSet.append(primary_on_backup)

        ensureElectionsDone(looper,
                            txnPoolNodeSet,
                            customTimeout=tconf.NEW_VIEW_TIMEOUT)

        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_steward, LOG_SIZE)

        # Check restored state
        assert primary_on_backup.replicas._replicas[1].isPrimary
        assert primary_on_backup.replicas._replicas[1].h == 1
        assert primary_on_backup.replicas._replicas[1].H == 1 + LOG_SIZE

    def chk():
        assert primary_on_backup.replicas._replicas[1].h == LOG_SIZE
        assert primary_on_backup.replicas._replicas[1].H == LOG_SIZE + LOG_SIZE

    # Check caught-up state
    looper.run(eventually(chk, retryWait=.2, timeout=tconf.NEW_VIEW_TIMEOUT))
Exemple #53
0
def test_view_change_with_next_primary_stopped_and_one_node_lost_commit(looper, txnPoolNodeSet,
                                                                        sdk_pool_handle, sdk_wallet_client,
                                                                        limitTestRunningTime):
    current_view_no = checkViewNoForNodes(txnPoolNodeSet)
    next_primary = get_next_primary_name(txnPoolNodeSet, current_view_no + 1)
    delayed_node = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet) if r.node.name != next_primary][0]
    other_nodes = [n for n in txnPoolNodeSet if n.name != next_primary]

    with delay_rules_without_processing(delayed_node.nodeIbStasher, cDelay()):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2)

        disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, next_primary)
        trigger_view_change(other_nodes)

    ensureElectionsDone(looper, other_nodes,
                        instances_list=range(2), customTimeout=15)
    ensure_all_nodes_have_same_data(looper, other_nodes)
    sdk_ensure_pool_functional(looper, other_nodes, sdk_wallet_client, sdk_pool_handle)
    ensure_all_nodes_have_same_data(looper, other_nodes)
Exemple #54
0
def test_view_change_on_performance_degraded(looper, txnPoolNodeSet, viewNo,
                                             sdk_pool_handle,
                                             sdk_wallet_steward):
    """
    Test that a view change is done when the performance of master goes down
    Send multiple requests from the client and delay some requests by master
    instance so that there is a view change. All nodes will agree that master
    performance degraded
    """
    old_primary_node = get_master_primary_node(list(txnPoolNodeSet))

    simulate_slow_master(looper, txnPoolNodeSet, sdk_pool_handle,
                         sdk_wallet_steward)
    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=viewNo + 1)

    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    new_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    assert old_primary_node.name != new_primary_node.name
def test_view_change_retry_by_timeout(
        txnPoolNodeSet, looper, setup, sdk_pool_handle, sdk_wallet_client):
    """
    Verifies that a view change is restarted if it is not completed in time
    """
    m_primary_node, initial_view_no, timeout_callback_stats = setup

    delay_view_change_done_msg(txnPoolNodeSet)

    start_view_change(txnPoolNodeSet, initial_view_no + 1)
    # First view change should fail, because of delayed ViewChangeDone
    # messages. This then leads to new view change that we need.
    with pytest.raises(AssertionError):
        ensureElectionsDone(looper=looper,
                            nodes=txnPoolNodeSet,
                            customTimeout=view_change_timeout + 2)

    # Resetting delays to let second view change go well
    reset_delays_and_process_delayeds(txnPoolNodeSet)

    # This view change should be completed with no problems
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    new_m_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    assert m_primary_node.name != new_m_primary_node.name

    # The timeout method was called one time
    for node in txnPoolNodeSet:
        assert get_count(node,
                         node._check_view_change_completed) - \
               timeout_callback_stats[node.name]['called'] == 1
        assert len(getAllReturnVals(node,
                                    node._check_view_change_completed,
                                    compare_val_to=True)) - \
               timeout_callback_stats[node.name]['returned_true'] == 1

    # 2 view changes have been initiated
    for node in txnPoolNodeSet:
        assert node.viewNo - initial_view_no == 2

    sdk_ensure_pool_functional(looper, txnPoolNodeSet,
                               sdk_wallet_client,
                               sdk_pool_handle)
def test_recover_stop_primaries_no_view_change(looper, checkpoint_size, txnPoolNodeSet,
                                               allPluginsPath, tdir, tconf, sdk_pool_handle,
                                               sdk_wallet_steward):
    """
    Test that we can recover after having more than f nodes disconnected:
    - send txns
    - stop current master primary
    - restart current master primary
    - send txns
    """

    active_nodes = list(txnPoolNodeSet)
    assert 4 == len(active_nodes)
    initial_view_no = active_nodes[0].viewNo

    logger.info("send at least one checkpoint")
    assert nodes_do_not_have_checkpoints(*active_nodes)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 2 * checkpoint_size)
    assert nodes_have_checkpoints(*active_nodes)
    ensure_all_nodes_have_same_data(looper, nodes=active_nodes)

    logger.info("Stop first node (current Primary)")
    stopped_node, active_nodes = stop_primary(looper, active_nodes)

    logger.info("Restart the primary node")
    restarted_node = start_stopped_node(stopped_node, looper, tconf, tdir, allPluginsPath)
    assert nodes_do_not_have_checkpoints(restarted_node)
    assert nodes_have_checkpoints(*active_nodes)
    active_nodes = active_nodes + [restarted_node]

    logger.info("Check that primary selected")
    ensureElectionsDone(looper=looper, nodes=active_nodes,
                        instances_list=range(2), customTimeout=30)
    waitForViewChange(looper, active_nodes, expectedViewNo=0)
    ensure_all_nodes_have_same_data(looper, nodes=active_nodes)

    logger.info("Check if the pool is able to process requests")
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 10 * checkpoint_size)
    ensure_all_nodes_have_same_data(looper, nodes=active_nodes)
    assert nodes_have_checkpoints(*active_nodes)
Exemple #57
0
def ensure_several_view_change(looper, nodes, vc_count=1,
                               exclude_from_check=None, custom_timeout=None):
    """
    This method patches the master performance check to return False and thus
    ensures that all given nodes do a view change
    Also, this method can do several view change.
    If you try do several view_change by calling ensure_view_change,
    than monkeypatching method isMasterDegraded would work unexpectedly.
    Therefore, we return isMasterDegraded only after doing view_change needed count
    """
    old_meths = {}
    view_changes = {}
    expected_view_no = None
    for node in nodes:
        old_meths[node.name] = node.monitor.isMasterDegraded

    for __ in range(vc_count):
        old_view_no = checkViewNoForNodes(nodes)
        expected_view_no = old_view_no + 1

        for node in nodes:
            view_changes[node.name] = node.monitor.totalViewChanges

            def slow_master(self):
                # Only allow one view change
                rv = self.totalViewChanges == view_changes[self.name]
                if rv:
                    logger.info('{} making master look slow'.format(self))
                return rv

            node.monitor.isMasterDegraded = types.MethodType(slow_master, node.monitor)
            node.checkPerformance()

        perf_check_freq = next(iter(nodes)).config.PerfCheckFreq
        timeout = custom_timeout or waits.expectedPoolViewChangeStartedTimeout(len(nodes)) + perf_check_freq
        nodes_to_check = nodes if exclude_from_check is None else [n for n in nodes if n not in exclude_from_check]
        logger.debug('Checking view no for nodes {}'.format(nodes_to_check))
        looper.run(eventually(checkViewNoForNodes, nodes_to_check, expected_view_no, retryWait=1, timeout=timeout))
        ensureElectionsDone(looper=looper, nodes=nodes, customTimeout=timeout)
        ensure_all_nodes_have_same_data(looper, nodes, custom_timeout=timeout, exclude_from_check=exclude_from_check)

    return expected_view_no
Exemple #58
0
def scenario_txns_during_view_change(looper,
                                     nodes,
                                     curr_utxo,
                                     send_txns,
                                     send_txns_invalid=None):
    lagging_node = nodes[-1]
    rest_nodes = nodes[:-1]

    def send_txns_invalid_default():
        curr_utxo['amount'] += 1
        with pytest.raises(RequestRejectedException,
                           match='Insufficient funds'):
            send_txns()
        curr_utxo['amount'] -= 1

    # Send transactions
    send_txns()
    ensure_all_nodes_have_same_data(looper, nodes)

    # Lag one node (delay Prepare and  Commit messages for lagging_node)
    with delay_rules(lagging_node.nodeIbStasher, pDelay(), cDelay()):
        # Send more transactions
        send_txns()
        ensure_all_nodes_have_same_data(looper, rest_nodes)

        # Send invalid transactions
        (send_txns_invalid or send_txns_invalid_default)()
        ensure_all_nodes_have_same_data(looper, rest_nodes)

        # Initiate view change
        # Wait until view change is finished and check that needed transactions are written.
        ensure_view_change(looper, nodes)
        ensureElectionsDone(looper, nodes)

    # Reset delays
    # Make sure that all nodes have equal state
    # (expecting that lagging_node caught up missed ones)
    ensure_all_nodes_have_same_data(looper, nodes)

    # make sure the poll is functional
    send_txns()
    ensure_all_nodes_have_same_data(looper, nodes)
Exemple #59
0
def test_view_change_after_some_txns(
        txnPoolNodesLooper,
        txnPoolNodeSet,
        some_txns_done,
        testNodeClass,
        viewNo,  # noqa
        sdk_pool_handle,
        sdk_wallet_client,
        node_config_helper_class,
        tconf,
        tdir,
        allPluginsPath,
        tmpdir_factory):
    """
    Check that view change is done after processing some of txns
    """
    ensure_view_change(txnPoolNodesLooper, txnPoolNodeSet)
    ensureElectionsDone(looper=txnPoolNodesLooper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(txnPoolNodesLooper, nodes=txnPoolNodeSet)

    sdk_send_random_and_check(txnPoolNodesLooper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 10)
    ensure_all_nodes_have_same_data(txnPoolNodesLooper, txnPoolNodeSet)

    for node in txnPoolNodeSet:
        txnPoolNodesLooper.removeProdable(node)
        node.stop()

    config = getConfigOnce()

    reload_modules_for_replay(tconf)

    replayable_node_class, basedirpath = get_replayable_node_class(
        tmpdir_factory, tdir, testNodeClass, config)

    print('-------------Replaying now---------------------')

    for node in txnPoolNodeSet:
        create_replayable_node_and_check(txnPoolNodesLooper, txnPoolNodeSet,
                                         node, replayable_node_class,
                                         node_config_helper_class, tconf,
                                         basedirpath, allPluginsPath)
def test_demote_backup_primary(looper, txnPoolNodeSet, sdk_pool_handle,
                               sdk_wallet_stewards, tdir, tconf, allPluginsPath):
    assert len(txnPoolNodeSet) == 6
    view_no = txnPoolNodeSet[-1].viewNo

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_stewards[0], 1)

    node_to_restart = txnPoolNodeSet[-1]
    node_to_demote = steward_for_demote_node = demote_node_index = None
    steward_for_demote_node = None
    for i, n in enumerate(txnPoolNodeSet):
        if n.name == txnPoolNodeSet[0].primaries[1]:
            node_to_demote = n
            steward_for_demote_node = sdk_wallet_stewards[i]
            demote_node_index = i
            break

    assert node_to_demote

    demote_node(looper, steward_for_demote_node, sdk_pool_handle,
                node_to_demote)
    del txnPoolNodeSet[demote_node_index]

    # we are expecting 2 view changes here since Beta is selected as a master Primary on view=1
    # (since node reg at the beginning of view 0 is used to select it), but it's not available (demoted),
    # so we do view change to view=2 by timeout
    waitForViewChange(looper, txnPoolNodeSet, view_no + 2)
    ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_restart)
    looper.removeProdable(name=node_to_restart.name)
    node_to_restart = start_stopped_node(node_to_restart, looper, tconf,
                                         tdir, allPluginsPath)
    txnPoolNodeSet[-1] = node_to_restart
    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_stewards[0], 1)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=20)