def testViewChangesIfMasterPrimaryDisconnected(txnPoolNodeSet, looper, wallet1,
                                               client1, client1Connected,
                                               tconf):
    """
    View change occurs when master's primary is disconnected
    """

    # Setup
    nodes = txnPoolNodeSet

    viewNoBefore = checkViewNoForNodes(nodes)
    old_pr_node = get_master_primary_node(nodes)

    # Stop primary
    stopNodes([old_pr_node], looper)
    looper.removeProdable(old_pr_node)
    remainingNodes = list(set(nodes) - {old_pr_node})
    # Sometimes it takes time for nodes to detect disconnection
    ensure_node_disconnected(looper, old_pr_node, remainingNodes, timeout=20)

    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    # Give some time to detect disconnection and then verify that view has
    # changed and new primary has been elected
    waitForViewChange(looper, remainingNodes, viewNoBefore + 1)
    ensure_all_nodes_have_same_data(looper, nodes=remainingNodes)
    new_pr_node = get_master_primary_node(remainingNodes)
    assert old_pr_node != new_pr_node

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5)
Example #2
0
def test_propagate_primary_after_primary_restart_view_0(
        looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward,
        tdir, allPluginsPath):
    """
    Delay instance change msgs to prevent view change during primary restart
    to test propagate primary for primary node.
    ppSeqNo should be > 0 to be able to check that propagate primary restores all
    indexes correctly
    case viewNo == 0
    """
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward,
                               sdk_pool_handle)

    old_ppseqno = _get_ppseqno(txnPoolNodeSet)
    assert (old_ppseqno > 0)

    old_viewNo = checkViewNoForNodes(txnPoolNodeSet)
    old_primary = get_master_primary_node(txnPoolNodeSet)

    delay_instance_change(txnPoolNodeSet, IC_DELAY_SEC)

    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            old_primary,
                                            stopNode=True)

    looper.removeProdable(old_primary)

    logger.info("Restart node {}".format(old_primary))

    restartedNode = start_stopped_node(old_primary,
                                       looper,
                                       tconf,
                                       tdir,
                                       allPluginsPath,
                                       delay_instance_change_msgs=False)
    idx = [
        i for i, n in enumerate(txnPoolNodeSet) if n.name == restartedNode.name
    ][0]
    txnPoolNodeSet[idx] = restartedNode

    restartedNode.nodeIbStasher.delay(icDelay(IC_DELAY_SEC))

    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    new_viewNo = checkViewNoForNodes(txnPoolNodeSet)
    assert (new_viewNo == old_viewNo)

    new_primary = get_master_primary_node(txnPoolNodeSet)
    assert (new_primary.name == old_primary.name)

    # check ppSeqNo the same
    _get_ppseqno(txnPoolNodeSet)

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward,
                               sdk_pool_handle)

    new_ppseqno = _get_ppseqno(txnPoolNodeSet)
    assert (new_ppseqno > old_ppseqno)
Example #3
0
def test_view_change_timeout(nodeSet, looper, up, wallet1, client1):
    """
    Check view change restarted if it is not completed in time
    """

    m_primary_node = get_master_primary_node(list(nodeSet.nodes.values()))
    initial_view_no = waitForViewChange(looper, nodeSet)
    # Setting view change timeout to low value to make test pass quicker
    for node in nodeSet:
        node._view_change_timeout = 5

    # Delaying view change messages to make first view change fail
    # due to timeout
    for node in nodeSet:
        node.nodeIbStasher.delay(vcd_delay(delay=50))

    # Delaying preprepae messages from nodes and
    # sending request to force view change
    #for i in range(3):
    #    delayNonPrimaries(nodeSet, instId=i, delay=10)
    #sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4)

    times = {}
    for node in nodeSet:
        times[node.name] = {
            'called': get_count(node, node._check_view_change_completed),
            'returned_true': len(getAllReturnVals(
                node, node._check_view_change_completed, compare_val_to=True))
        }

    for node in nodeSet:
        node.startViewChange(initial_view_no + 1)

    # First view change should fail, because of delayed
    # instance change messages. This then leads to new view change that we need.
    with pytest.raises(AssertionError):
        ensureElectionsDone(looper=looper, nodes=nodeSet, customTimeout=10)

    # Resetting delays to let second view change go well
    reset_delays_and_process_delayeds(nodeSet)

    # This view change should be completed with no problems
    ensureElectionsDone(looper=looper, nodes=nodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=nodeSet)
    new_m_primary_node = get_master_primary_node(list(nodeSet.nodes.values()))
    assert m_primary_node.name != new_m_primary_node.name

    # The timeout method has been called at least once
    for node in nodeSet:
        assert get_count(node, node._check_view_change_completed) > times[node.name]['called']
        assert len(getAllReturnVals(node,
                                    node._check_view_change_completed,
                                    compare_val_to=True)) > times[node.name]['returned_true']

    # Multiple view changes have been initiated
    for node in nodeSet:
        assert (node.viewNo - initial_view_no) > 1

    ensure_pool_functional(looper, nodeSet, wallet1, client1)
def test_view_changes_if_master_primary_disconnected(txnPoolNodeSet, looper,
                                                     sdk_pool_handle,
                                                     sdk_wallet_client, tdir,
                                                     tconf, allPluginsPath):
    """
    View change occurs when master's primary is disconnected
    """

    # Setup
    nodes = txnPoolNodeSet

    old_view_no = checkViewNoForNodes(nodes)
    old_pr_node = get_master_primary_node(nodes)

    # Stop primary
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            old_pr_node,
                                            stopNode=True)
    looper.removeProdable(old_pr_node)

    remaining_nodes = list(set(nodes) - {old_pr_node})
    # Sometimes it takes time for nodes to detect disconnection
    ensure_node_disconnected(looper, old_pr_node, remaining_nodes, timeout=20)

    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    # Give some time to detect disconnection and then verify that view has
    # changed and new primary has been elected
    waitForViewChange(looper, remaining_nodes, old_view_no + 1)
    ensure_all_nodes_have_same_data(looper, nodes=remaining_nodes)
    new_pr_node = get_master_primary_node(remaining_nodes)
    assert old_pr_node != new_pr_node

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)

    # Check if old primary can join the pool and still functions
    old_pr_node = start_stopped_node(old_pr_node, looper, tconf, tdir,
                                     allPluginsPath)

    txnPoolNodeSet = remaining_nodes + [old_pr_node]
    looper.run(
        eventually(checkViewNoForNodes,
                   txnPoolNodeSet,
                   old_view_no + 1,
                   timeout=tconf.VIEW_CHANGE_TIMEOUT))

    # After node catches up it set view_no from audit ledger and do not need to do view_change
    assert len(
        getAllReturnVals(old_pr_node.view_changer,
                         old_pr_node.view_changer.start_view_change,
                         compare_val_to=True)) == 0

    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    assert not old_pr_node.view_changer._next_view_indications
def test_view_change_on_quorum_of_master_degraded(txnPoolNodeSet, looper,
                                                  sdk_pool_handle,
                                                  sdk_wallet_steward, viewNo):
    """
    Node will change view even though it does not find the master to be degraded
    when a quorum of nodes agree that master performance degraded
    """

    m_primary_node = get_master_primary_node(list(txnPoolNodeSet))

    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's performance falls and view changes
    delayNonPrimaries(txnPoolNodeSet, 0, 10)

    pr = getPrimaryReplica(txnPoolNodeSet, 0)
    relucatantNode = pr.node

    # Count sent instance changes of all nodes
    sentInstChanges = {}
    for n in txnPoolNodeSet:
        sentInstChanges[n.name] = node_sent_instance_changes_count(n)

    # Node reluctant to change view, never says master is degraded
    relucatantNode.monitor.isMasterDegraded = types.MethodType(
        lambda x: False, relucatantNode.monitor)

    backup_replica = txnPoolNodeSet[0].replicas[1]
    backup_last_ordered_before = backup_replica.last_ordered_3pc
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 4)
    # make sure that backups also ordered at least 1 batch to be able to track performance degradation
    looper.run(
        eventually(lambda: assertExp(backup_replica.last_ordered_3pc >
                                     backup_last_ordered_before)))

    for n in txnPoolNodeSet:
        n.checkPerformance()

    # Check that view change happened for all nodes
    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=viewNo + 1)

    # All nodes except the reluctant node should have sent a view change and
    # thus must have called `sendInstanceChange`
    for n in txnPoolNodeSet:
        if n.name != relucatantNode.name:
            assert node_sent_instance_changes_count(n) > sentInstChanges.get(
                n.name, 0)
        else:
            assert node_sent_instance_changes_count(n) == sentInstChanges.get(
                n.name, 0)

    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    new_m_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    assert m_primary_node.name != new_m_primary_node.name
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def testViewChangesIfMasterPrimaryDisconnected(txnPoolNodeSet, looper, wallet1,
                                               client1, client1Connected,
                                               tconf, tdirWithPoolTxns,
                                               allPluginsPath):
    """
    View change occurs when master's primary is disconnected
    """

    # Setup
    nodes = txnPoolNodeSet

    old_view_no = checkViewNoForNodes(nodes)
    old_pr_node = get_master_primary_node(nodes)

    # Stop primary
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            old_pr_node,
                                            stopNode=True)
    looper.removeProdable(old_pr_node)

    remaining_nodes = list(set(nodes) - {old_pr_node})
    # Sometimes it takes time for nodes to detect disconnection
    ensure_node_disconnected(looper, old_pr_node, remaining_nodes, timeout=20)

    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    # Give some time to detect disconnection and then verify that view has
    # changed and new primary has been elected
    waitForViewChange(looper, remaining_nodes, old_view_no + 1)
    ensure_all_nodes_have_same_data(looper, nodes=remaining_nodes)
    new_pr_node = get_master_primary_node(remaining_nodes)
    assert old_pr_node != new_pr_node

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5)

    # Check if old primary can join the pool and still functions
    old_pr_node = start_stopped_node(old_pr_node, looper, tconf,
                                     tdirWithPoolTxns, allPluginsPath)

    txnPoolNodeSet = remaining_nodes + [old_pr_node]
    looper.run(
        eventually(checkViewNoForNodes,
                   txnPoolNodeSet,
                   old_view_no + 1,
                   timeout=10))
    assert len(
        getAllReturnVals(old_pr_node,
                         old_pr_node._start_view_change_if_possible,
                         compare_val_to=True)) > 0

    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    assert not old_pr_node._next_view_indications
def test_propagate_primary_after_primary_restart_view_1(
        looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath):
    """
    Delay instance change msgs to prevent view change during primary restart
    to test propagate primary for primary node.
    ppSeqNo should be > 0 to be able to check that propagate primary restores all
    indices correctly
    case viewNo > 0
    """

    ensure_view_change(looper, txnPoolNodeSet)
    checkViewNoForNodes(txnPoolNodeSet, expectedViewNo=1)

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)

    old_ppseqno = _get_ppseqno(txnPoolNodeSet)
    assert (old_ppseqno > 0)

    old_viewNo = checkViewNoForNodes(txnPoolNodeSet)
    old_primary = get_master_primary_node(txnPoolNodeSet)

    delay_instance_change(txnPoolNodeSet, IC_DELAY_SEC)

    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_primary, stopNode=True)

    looper.removeProdable(old_primary)

    logger.info("Restart node {}".format(old_primary))

    restartedNode = start_stopped_node(old_primary, looper, tconf, tdir, allPluginsPath,
                                       delay_instance_change_msgs=False)
    idx = [i for i, n in enumerate(txnPoolNodeSet) if n.name == restartedNode.name][0]
    txnPoolNodeSet[idx] = restartedNode

    restartedNode.nodeIbStasher.delay(icDelay(IC_DELAY_SEC))

    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    new_viewNo = checkViewNoForNodes(txnPoolNodeSet)
    assert (new_viewNo == old_viewNo)

    new_primary = get_master_primary_node(txnPoolNodeSet)
    assert (new_primary.name == old_primary.name)

    # check ppSeqNo the same
    _get_ppseqno(txnPoolNodeSet)

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)

    new_ppseqno = _get_ppseqno(txnPoolNodeSet)
    assert (new_ppseqno > old_ppseqno)
Example #8
0
def test_view_change_on_quorum_of_master_degraded(txnPoolNodeSet, looper,
                                                  sdk_pool_handle,
                                                  sdk_wallet_steward, viewNo):
    """
    Node will change view even though it does not find the master to be degraded
    when a quorum of nodes agree that master performance degraded
    """

    m_primary_node = get_master_primary_node(list(txnPoolNodeSet))

    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's performance falls and view changes
    delayNonPrimaries(txnPoolNodeSet, 0, 10)

    pr = getPrimaryReplica(txnPoolNodeSet, 0)
    relucatantNode = pr.node

    # Count sent instance changes of all nodes
    sentInstChanges = {}
    instChngMethodName = ViewChanger.sendInstanceChange.__name__
    for n in txnPoolNodeSet:
        sentInstChanges[n.name] = n.view_changer.spylog.count(
            instChngMethodName)

    # Node reluctant to change view, never says master is degraded
    relucatantNode.monitor.isMasterDegraded = types.MethodType(
        lambda x: False, relucatantNode.monitor)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 4)

    for n in txnPoolNodeSet:
        n.checkPerformance()

    # Check that view change happened for all nodes
    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=viewNo + 1)

    # All nodes except the reluctant node should have sent a view change and
    # thus must have called `sendInstanceChange`
    for n in txnPoolNodeSet:
        if n.name != relucatantNode.name:
            assert n.view_changer.spylog.count(instChngMethodName) > \
                   sentInstChanges.get(n.name, 0)
        else:
            assert n.view_changer.spylog.count(instChngMethodName) == \
                   sentInstChanges.get(n.name, 0)

    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    new_m_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    assert m_primary_node.name != new_m_primary_node.name
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
Example #9
0
def test_removed_replica_restored_on_view_change(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client,
        tconf, tdir, allPluginsPath, chkFreqPatched, view_change):
    """
    1. Remove replica on some node which is not master primary
    2. Reconnect the node which was master primary so far
    3. Check that nodes and replicas correctly added
    """
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    node = get_last_master_non_primary_node(txnPoolNodeSet)
    start_replicas_count = node.replicas.num_replicas
    instance_id = start_replicas_count - 1

    node.replicas.remove_replica(instance_id)
    check_replica_removed(node, start_replicas_count, instance_id)

    # trigger view change on all nodes
    master_primary = get_master_primary_node(txnPoolNodeSet)
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, master_primary)
    txnPoolNodeSet.remove(master_primary)
    looper.removeProdable(master_primary)
    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    restarted_node = start_stopped_node(master_primary, looper, tconf, tdir, allPluginsPath)
    txnPoolNodeSet.append(restarted_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))

    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=1,
                      customTimeout=2 * tconf.NEW_VIEW_TIMEOUT)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    assert start_replicas_count == node.replicas.num_replicas
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_instance_change_before_vc(looper,
                                   txnPoolNodeSet,
                                   tconf,
                                   sdk_pool_handle,
                                   sdk_wallet_steward):
    master_node = get_master_primary_node(txnPoolNodeSet)
    old_view = master_node.viewNo
    expected_view_no = old_view + 1
    panic_node = txnPoolNodeSet[-1]
    panic_node.view_changer.on_master_degradation()

    def has_inst_chng_in_validator_info():
        for node in txnPoolNodeSet:
            latest_info = node._info_tool.info
            ic_queue = latest_info['Node_info']['View_change_status']['IC_queue']
            assert expected_view_no in ic_queue
            assert ic_queue[expected_view_no]["Voters"][panic_node.name]['reason'] == Suspicions.PRIMARY_DEGRADED.code

    looper.run(eventually(has_inst_chng_in_validator_info))

    for node in txnPoolNodeSet:
        node.view_changer.on_master_degradation()

    looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, expected_view_no, retryWait=1,
                          timeout=tconf.NEW_VIEW_TIMEOUT))
    waitNodeDataEquality(looper, master_node, *txnPoolNodeSet)

    def is_inst_chngs_cleared():
        for node in txnPoolNodeSet:
            latest_info = node._info_tool.info
            assert latest_info['Node_info']['View_change_status']['IC_queue'] == {}

    looper.run(eventually(is_inst_chngs_cleared))
Example #11
0
def split_nodes(nodes):
    primary_node = get_master_primary_node(nodes)
    slow_node = getNonPrimaryReplicas(nodes, 0)[-1].node
    other_nodes = [n for n in nodes if n != slow_node]
    other_non_primary_nodes = [n for n in nodes if n not in
                               (slow_node, primary_node)]
    return slow_node, other_nodes, primary_node, other_non_primary_nodes
def test_view_change_retry_by_timeout(
        txnPoolNodeSet, looper, tconf, setup, sdk_pool_handle, sdk_wallet_client):
    """
    Verifies that a view change is restarted if it is not completed in time
    """
    m_primary_node, initial_view_no, timeout_callback_stats = setup
    stashers = [n.nodeIbStasher for n in txnPoolNodeSet]

    with delay_rules(stashers, vcd_delay()):
        start_view_change(txnPoolNodeSet, initial_view_no + 1)

        # First view change should fail, because of delayed ViewChangeDone
        # messages. This then leads to new view change that we need.
        with pytest.raises(AssertionError):
            ensureElectionsDone(looper=looper,
                                nodes=txnPoolNodeSet,
                                customTimeout=1.5 * VIEW_CHANGE_TIMEOUT)

    # Now as ViewChangeDone messages are unblocked view changes should finish successfully
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    new_m_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    assert m_primary_node.name != new_m_primary_node.name

    # The timeout method was called one time
    check_watchdog_called_expected_times(txnPoolNodeSet, timeout_callback_stats, 1)

    # 2 view changes have been initiated
    for node in txnPoolNodeSet:
        assert node.viewNo - initial_view_no == 2

    sdk_ensure_pool_functional(looper, txnPoolNodeSet,
                               sdk_wallet_client,
                               sdk_pool_handle)
def test_number_txns_in_catchup_and_vc_queue_valid(looper, txnPoolNodeSet,
                                                   tconf, sdk_pool_handle,
                                                   sdk_wallet_steward):
    num_txns = 5
    master_node = get_master_primary_node(txnPoolNodeSet)
    old_view = master_node.viewNo
    expected_view_no = old_view + 1
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            master_node,
                                            stopNode=False)
    looper.run(
        eventually(checkViewNoForNodes,
                   txnPoolNodeSet[1:],
                   expected_view_no,
                   retryWait=1,
                   timeout=tconf.VIEW_CHANGE_TIMEOUT))
    sdk_pool_refresh(looper, sdk_pool_handle)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, num_txns)
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, master_node)
    waitNodeDataEquality(looper, master_node, *txnPoolNodeSet[-1:])
    latest_info = master_node._info_tool.info
    assert latest_info['Node_info']['Catchup_status'][
        'Number_txns_in_catchup'][1] == num_txns
    assert latest_info['Node_info']['View_change_status'][
        'View_No'] == expected_view_no
    node_names = [n.name for n in txnPoolNodeSet[1:]]
    for node_name in node_names:
        assert latest_info['Node_info']['View_change_status']['VCDone_queue'][
            node_name][0] == master_node.master_primary_name
        assert latest_info['Node_info']['View_change_status']['VCDone_queue'][
            node_name][1]
        assert latest_info['Node_info']['View_change_status'][
            'Last_complete_view_no'] == expected_view_no
def test_removed_replica_restored_on_view_change(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client,
        tconf, tdir, allPluginsPath, chkFreqPatched, view_change):
    """
    1. Remove replica on some node which is not master primary
    2. Reconnect the node which was master primary so far
    3. Check that nodes and replicas correctly added
    """
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    node = get_last_master_non_primary_node(txnPoolNodeSet)
    start_replicas_count = node.replicas.num_replicas
    instance_id = start_replicas_count - 1

    node.replicas.remove_replica(instance_id)
    check_replica_removed(node, start_replicas_count, instance_id)

    # trigger view change on all nodes
    master_primary = get_master_primary_node(txnPoolNodeSet)
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, master_primary)
    txnPoolNodeSet.remove(master_primary)
    looper.removeProdable(master_primary)
    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    restarted_node = start_stopped_node(master_primary, looper, tconf, tdir, allPluginsPath)
    txnPoolNodeSet.append(restarted_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))

    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=1,
                      customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    assert start_replicas_count == node.replicas.num_replicas
Example #15
0
def test_view_change_retry_by_timeout(txnPoolNodeSet, looper, tconf, setup,
                                      sdk_pool_handle, sdk_wallet_client):
    """
    Verifies that a view change is restarted if it is not completed in time
    """
    m_primary_node, initial_view_no, timeout_callback_stats = setup
    stashers = [n.nodeIbStasher for n in txnPoolNodeSet]

    with delay_rules(stashers, nv_delay()):
        start_view_change(txnPoolNodeSet, initial_view_no + 1)

        # First view change should fail, because of delayed ViewChangeDone
        # messages. This then leads to new view change that we need.
        with pytest.raises(AssertionError):
            ensureElectionsDone(looper=looper,
                                nodes=txnPoolNodeSet,
                                customTimeout=1.5 * NEW_VIEW_TIMEOUT)

    # Now as ViewChangeDone messages are unblocked view changes should finish successfully
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    new_m_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    assert m_primary_node.name != new_m_primary_node.name

    # The timeout method was called one time
    check_watchdog_called_expected_times(txnPoolNodeSet,
                                         timeout_callback_stats, 1)

    # 2 view changes have been initiated
    for node in txnPoolNodeSet:
        assert node.viewNo - initial_view_no == 2

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
def test_number_txns_in_catchup_and_vc_queue_valid(looper,
                                                   txnPoolNodeSet,
                                                   tconf,
                                                   sdk_pool_handle,
                                                   sdk_wallet_steward,
                                                   tdir,
                                                   allPluginsPath):
    num_txns = 5
    master_node = get_master_primary_node(txnPoolNodeSet)
    master_node_index = txnPoolNodeSet.index(master_node)
    other_nodes = txnPoolNodeSet.copy()
    other_nodes.remove(master_node)
    old_view = master_node.viewNo
    expected_view_no = old_view + 1
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, master_node, stopNode=True)
    looper.removeProdable(master_node)
    looper.run(eventually(checkViewNoForNodes, other_nodes, expected_view_no, retryWait=1,
                          timeout=tconf.NEW_VIEW_TIMEOUT))
    sdk_pool_refresh(looper, sdk_pool_handle)
    sdk_send_random_and_check(looper, other_nodes, sdk_pool_handle, sdk_wallet_steward, num_txns)
    master_node = start_stopped_node(master_node, looper, tconf,
                                     tdir, allPluginsPath)
    txnPoolNodeSet[master_node_index] = master_node
    looper.run(checkNodesConnected(txnPoolNodeSet))
    waitNodeDataEquality(looper, master_node, *txnPoolNodeSet[-1:],
                         exclude_from_check=['check_last_ordered_3pc_backup'])
    latest_info = master_node._info_tool.info
    assert latest_info['Node_info']['Catchup_status']['Number_txns_in_catchup'][1] == num_txns
    assert latest_info['Node_info']['View_change_status']['View_No'] == expected_view_no
    for n in other_nodes:
        assert n._info_tool.info['Node_info']['View_change_status']['Last_complete_view_no'] == expected_view_no
def test_primary_receives_delayed_prepares(looper, txnPoolNodeSet,
                                           sdk_wallet_client,
                                           sdk_pool_handle):
    """
    Primary gets all PREPAREs after COMMITs
    """
    delay = 50
    primary_node = get_master_primary_node(txnPoolNodeSet)
    other_nodes = [n for n in txnPoolNodeSet if n != primary_node]
    primary_node.nodeIbStasher.delay(pDelay(delay, 0))

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=10)

    for node in other_nodes:
        assert node.master_replica.prePrepares
        assert node.master_replica.prepares
        assert node.master_replica.commits

    assert primary_node.master_replica.sentPrePrepares
    assert not primary_node.master_replica.prepares
    assert primary_node.master_replica.commits
def test_view_change_on_quorum_of_master_degraded(txnPoolNodeSet, looper,
                                                  sdk_pool_handle,
                                                  sdk_wallet_steward,
                                                  viewNo):
    """
    Node will change view even though it does not find the master to be degraded
    when a quorum of nodes agree that master performance degraded
    """

    m_primary_node = get_master_primary_node(list(txnPoolNodeSet))

    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's performance falls and view changes
    delayNonPrimaries(txnPoolNodeSet, 0, 10)

    pr = getPrimaryReplica(txnPoolNodeSet, 0)
    relucatantNode = pr.node

    # Count sent instance changes of all nodes
    sentInstChanges = {}
    instChngMethodName = ViewChanger.sendInstanceChange.__name__
    for n in txnPoolNodeSet:
        sentInstChanges[n.name] = n.view_changer.spylog.count(instChngMethodName)

    # Node reluctant to change view, never says master is degraded
    relucatantNode.monitor.isMasterDegraded = types.MethodType(
        lambda x: False, relucatantNode.monitor)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 4)
    # Check that view change happened for all nodes
    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=viewNo + 1)

    # All nodes except the reluctant node should have sent a view change and
    # thus must have called `sendInstanceChange`
    for n in txnPoolNodeSet:
        if n.name != relucatantNode.name:
            assert n.view_changer.spylog.count(instChngMethodName) > \
                   sentInstChanges.get(n.name, 0)
        else:
            assert n.view_changer.spylog.count(instChngMethodName) == \
                   sentInstChanges.get(n.name, 0)

    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    new_m_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    assert m_primary_node.name != new_m_primary_node.name
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_view_change_on_performance_degraded(looper, nodeSet, up, viewNo,
                                             wallet1, client1):
    """
    Test that a view change is done when the performance of master goes down
    Send multiple requests from the client and delay some requests by master
    instance so that there is a view change. All nodes will agree that master
    performance degraded
    """
    old_primary_node = get_master_primary_node(list(nodeSet.nodes.values()))

    simulate_slow_master(looper, nodeSet, wallet1, client1)
    waitForViewChange(looper, nodeSet, expectedViewNo=viewNo + 1)

    ensureElectionsDone(looper=looper, nodes=nodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=nodeSet)
    new_primary_node = get_master_primary_node(list(nodeSet.nodes.values()))
    assert old_primary_node.name != new_primary_node.name
def test_view_changes_if_master_primary_disconnected(txnPoolNodeSet, looper, sdk_pool_handle,
                                                     sdk_wallet_client, tdir, tconf, allPluginsPath):
    """
    View change occurs when master's primary is disconnected
    """

    # Setup
    nodes = txnPoolNodeSet

    old_view_no = checkViewNoForNodes(nodes)
    old_pr_node = get_master_primary_node(nodes)

    # Stop primary
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet,
                                            old_pr_node, stopNode=True)
    looper.removeProdable(old_pr_node)

    remaining_nodes = list(set(nodes) - {old_pr_node})
    # Sometimes it takes time for nodes to detect disconnection
    ensure_node_disconnected(looper, old_pr_node, remaining_nodes, timeout=20)

    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    # Give some time to detect disconnection and then verify that view has
    # changed and new primary has been elected
    waitForViewChange(looper, remaining_nodes, old_view_no + 1)
    ensure_all_nodes_have_same_data(looper, nodes=remaining_nodes)
    new_pr_node = get_master_primary_node(remaining_nodes)
    assert old_pr_node != new_pr_node

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)

    # Check if old primary can join the pool and still functions
    old_pr_node = start_stopped_node(old_pr_node, looper, tconf,
                                     tdir, allPluginsPath)

    txnPoolNodeSet = remaining_nodes + [old_pr_node]
    looper.run(eventually(checkViewNoForNodes,
                          txnPoolNodeSet, old_view_no + 1, timeout=tconf.VIEW_CHANGE_TIMEOUT))
    assert len(getAllReturnVals(old_pr_node.view_changer,
                                old_pr_node.view_changer._start_view_change_if_possible,
                                compare_val_to=True)) > 0

    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    assert not old_pr_node.view_changer._next_view_indications
Example #21
0
def simulate_slow_master(looper, nodeSet, wallet,
                         client, delay=10, num_reqs=4):
    m_primary_node = get_master_primary_node(list(nodeSet.nodes.values()))
    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's performance falls and view changes
    delayNonPrimaries(nodeSet, 0, delay)
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, num_reqs)
    return m_primary_node
Example #22
0
def simulate_slow_master(looper, txnPoolNodeSet, sdk_pool_handle,
                         sdk_wallet_steward, delay=10, num_reqs=4):
    m_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's performance falls and view changes
    delayNonPrimaries(txnPoolNodeSet, 0, delay)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, num_reqs)
    return m_primary_node
def test_view_change_on_performance_degraded(looper, txnPoolNodeSet, viewNo,
                                             sdk_pool_handle,
                                             sdk_wallet_steward):
    """
    Test that a view change is done when the performance of master goes down
    Send multiple requests from the client and delay some requests by master
    instance so that there is a view change. All nodes will agree that master
    performance degraded
    """
    old_primary_node = get_master_primary_node(list(txnPoolNodeSet))

    simulate_slow_master(looper, txnPoolNodeSet, sdk_pool_handle,
                         sdk_wallet_steward)
    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=viewNo + 1)

    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    new_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    assert old_primary_node.name != new_primary_node.name
def test_view_change_on_performance_degraded(looper, txnPoolNodeSet, viewNo,
                                             sdk_pool_handle,
                                             sdk_wallet_steward):
    """
    Test that a view change is done when the performance of master goes down
    Send multiple requests from the client and delay some requests by master
    instance so that there is a view change. All nodes will agree that master
    performance degraded
    """
    old_primary_node = get_master_primary_node(list(txnPoolNodeSet))

    trigger_view_change(txnPoolNodeSet)

    waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=viewNo + 1)

    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    new_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    assert old_primary_node.name != new_primary_node.name
    waitNodeDataEquality(looper, *txnPoolNodeSet)
def test_view_not_changed_when_short_disconnection(txnPoolNodeSet, looper,
                                                   sdk_pool_handle,
                                                   sdk_wallet_client, tdir,
                                                   tconf, allPluginsPath):
    """
    When primary is disconnected but not long enough to trigger the timeout,
    view change should not happen
    """

    pr_node = get_master_primary_node(txnPoolNodeSet)
    view_no = checkViewNoForNodes(txnPoolNodeSet)

    prp_inst_chg_calls = {
        node.name: node.spylog.count(node.propose_view_change.__name__)
        for node in txnPoolNodeSet if node != pr_node
    }

    recv_inst_chg_calls = {
        node.name: node.spylog.count(
            node.view_changer.process_instance_change_msg.__name__)
        for node in txnPoolNodeSet if node != pr_node
    }

    # Disconnect master's primary
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            pr_node,
                                            timeout=2)
    txnPoolNodeSet.remove(pr_node)
    looper.removeProdable(name=pr_node.name)

    timeout = min(tconf.ToleratePrimaryDisconnection - 1, 1)

    # Reconnect master's primary
    pr_node = start_stopped_node(pr_node, looper, tconf, tdir, allPluginsPath)
    txnPoolNodeSet.append(pr_node)

    def chk2():
        # Schedule an instance change but do not send it
        # since primary joins again
        for node in txnPoolNodeSet:
            if node != pr_node:
                assert node.spylog.count(node.propose_view_change.__name__
                                         ) > prp_inst_chg_calls[node.name]
                assert node.view_changer.spylog.count(node.view_changer.process_instance_change_msg.__name__) == \
                       recv_inst_chg_calls[node.name]

    looper.run(eventually(chk2, retryWait=.2, timeout=timeout + 1))

    assert checkViewNoForNodes(txnPoolNodeSet) == view_no

    # Send some requests and make sure the request execute
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)
Example #26
0
def test_choose_ts_from_state(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward):
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 1)
    primary_node = get_master_primary_node(txnPoolNodeSet)
    excpected_ts = get_utc_epoch() + 30
    req_handler = primary_node.get_req_handler(DOMAIN_LEDGER_ID)
    req_handler.ts_store.set(excpected_ts, req_handler.state.headHash)
    primary_node.master_replica.last_accepted_pre_prepare_time = None
    reply = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                      sdk_wallet_steward, 1)[0][1]
    assert abs(excpected_ts - int(get_txn_time(reply['result']))) < 3
Example #27
0
def setup(nodeSet, looper):
    m_primary_node = get_master_primary_node(list(nodeSet.nodes.values()))
    initial_view_no = waitForViewChange(looper, nodeSet)
    # Setting view change timeout to low value to make test pass quicker
    for node in nodeSet:
        node._view_change_timeout = view_change_timeout

    times = {}
    for node in nodeSet:
        times[node.name] = {
            'called': get_count(node, node._check_view_change_completed),
            'returned_true': len(getAllReturnVals(
                node, node._check_view_change_completed, compare_val_to=True))
        }
    return m_primary_node, initial_view_no, times
def malicious_setup(request, txnPoolNodeSet):
    primary_node = get_master_primary_node(txnPoolNodeSet)
    slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    bad_node = [n for n in other_nodes if n != primary_node][0]
    good_non_primary_node = [
        n for n in other_nodes
        if n != slow_node and n != bad_node and n != primary_node
    ][0]

    if request.param == 'do_not_send':
        orig_method = bad_node.nodeMsgRouter.routes[MessageReq]

        def do_not_send(self, msg, frm):
            if msg.msg_type == PREPREPARE:
                return
            else:
                return orig_method(msg, frm)

        bad_node.nodeMsgRouter.routes[MessageReq] = types.MethodType(
            do_not_send, bad_node)
        return primary_node, bad_node, good_non_primary_node, slow_node, \
               other_nodes, do_not_send, orig_method

    if request.param == 'send_bad':
        orig_method = bad_node.nodeMsgRouter.routes[MessageReq]

        def send_bad(self, msg, frm):
            if msg.msg_type == PREPREPARE:
                resp = self.replicas[msg.params['instId']].getPrePrepare(
                    msg.params['viewNo'], msg.params['ppSeqNo'])
                resp = updateNamedTuple(resp, digest='11908ffq')
                self.sendToNodes(MessageRep(
                    **{
                        f.MSG_TYPE.nm: msg.msg_type,
                        f.PARAMS.nm: msg.params,
                        f.MSG.nm: resp
                    }),
                                 names=[
                                     frm,
                                 ])
            else:
                return orig_method(msg, frm)

        bad_node.nodeMsgRouter.routes[MessageReq] = types.MethodType(
            send_bad, bad_node)
        return primary_node, bad_node, good_non_primary_node, slow_node, \
               other_nodes, send_bad, orig_method
def test_view_change_retry_by_timeout(
        txnPoolNodeSet, looper, setup, sdk_pool_handle, sdk_wallet_client):
    """
    Verifies that a view change is restarted if it is not completed in time
    """
    m_primary_node, initial_view_no, timeout_callback_stats = setup

    delay_view_change_done_msg(txnPoolNodeSet)

    start_view_change(txnPoolNodeSet, initial_view_no + 1)
    # First view change should fail, because of delayed ViewChangeDone
    # messages. This then leads to new view change that we need.
    with pytest.raises(AssertionError):
        ensureElectionsDone(looper=looper,
                            nodes=txnPoolNodeSet,
                            customTimeout=view_change_timeout + 2)

    # Resetting delays to let second view change go well
    reset_delays_and_process_delayeds(txnPoolNodeSet)

    # This view change should be completed with no problems
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    new_m_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    assert m_primary_node.name != new_m_primary_node.name

    # The timeout method was called one time
    for node in txnPoolNodeSet:
        assert get_count(node,
                         node._check_view_change_completed) - \
               timeout_callback_stats[node.name]['called'] == 1
        assert len(getAllReturnVals(node,
                                    node._check_view_change_completed,
                                    compare_val_to=True)) - \
               timeout_callback_stats[node.name]['returned_true'] == 1

    # 2 view changes have been initiated
    for node in txnPoolNodeSet:
        assert node.viewNo - initial_view_no == 2

    sdk_ensure_pool_functional(looper, txnPoolNodeSet,
                               sdk_wallet_client,
                               sdk_pool_handle)
def test_primary_receives_delayed_prepares(looper, txnPoolNodeSet, client1,
                                           wallet1, client1Connected):
    """
    Primary gets all PREPAREs after COMMITs
    """
    delay = 50
    primary_node = get_master_primary_node(txnPoolNodeSet)
    other_nodes = [n for n in txnPoolNodeSet if n != primary_node]
    primary_node.nodeIbStasher.delay(pDelay(delay, 0))

    send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 10)

    for node in other_nodes:
        assert node.master_replica.prePrepares
        assert node.master_replica.prepares
        assert node.master_replica.commits

    assert primary_node.master_replica.sentPrePrepares
    assert not primary_node.master_replica.prepares
    assert primary_node.master_replica.commits
def test_choose_ts_from_state(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_steward):
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_steward,
                              1)
    primary_node = get_master_primary_node(txnPoolNodeSet)
    excpected_ts = get_utc_epoch() + 30
    req_handler = primary_node.get_req_handler(DOMAIN_LEDGER_ID)
    req_handler.ts_store.set(excpected_ts,
                                  req_handler.state.headHash)
    primary_node.master_replica.last_accepted_pre_prepare_time = None
    reply = sdk_send_random_and_check(looper,
                                      txnPoolNodeSet,
                                      sdk_pool_handle,
                                      sdk_wallet_steward,
                                      1)[0][1]
    assert abs(excpected_ts - int(get_txn_time(reply['result']))) < 3
Example #32
0
def test_choose_ts_from_state(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_steward):
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_steward,
                              1)
    primary_node = get_master_primary_node(txnPoolNodeSet)
    excpected_ts = get_utc_epoch() + 30
    req_handler = primary_node.write_manager.request_handlers[NYM][0]
    req_handler.database_manager.ts_store.set(excpected_ts,
                                              req_handler.state.headHash)
    primary_node.master_replica._ordering_service.last_accepted_pre_prepare_time = None
    reply = sdk_send_random_and_check(looper,
                                      txnPoolNodeSet,
                                      sdk_pool_handle,
                                      sdk_wallet_steward,
                                      1)[0][1]
    assert abs(excpected_ts - int(get_txn_time(reply['result']))) < 3
def test_view_change_timeout(nodeSet, looper, up, setup, wallet1, client1):
    """
    Check view change restarted if it is not completed in time
    """
    m_primary_node, initial_view_no, times = setup

    delay_view_change_msg(nodeSet)

    start_view_change(nodeSet, initial_view_no + 1)
    # First view change should fail, because of delayed
    # instance change messages. This then leads to new view change that we
    # need.
    with pytest.raises(AssertionError):
        ensureElectionsDone(looper=looper, nodes=nodeSet, customTimeout=10)

    # Resetting delays to let second view change go well
    reset_delays_and_process_delayeds(nodeSet)

    # This view change should be completed with no problems
    ensureElectionsDone(looper=looper, nodes=nodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=nodeSet)
    new_m_primary_node = get_master_primary_node(list(nodeSet.nodes.values()))
    assert m_primary_node.name != new_m_primary_node.name

    # The timeout method has been called at least once
    for node in nodeSet:
        assert get_count(
            node,
            node._check_view_change_completed) > times[node.name]['called']
        assert len(
            getAllReturnVals(
                node, node._check_view_change_completed,
                compare_val_to=True)) > times[node.name]['returned_true']

    # Multiple view changes have been initiated
    for node in nodeSet:
        assert (node.viewNo - initial_view_no) > 1

    ensure_pool_functional(looper, nodeSet, wallet1, client1)
Example #34
0
def test_view_change_on_start(tconf, txnPoolNodeSet, looper, sdk_pool_handle,
                              sdk_wallet_client):
    """
    Do view change on a without any requests
    """
    old_view_no = txnPoolNodeSet[0].viewNo
    master_primary = get_master_primary_node(txnPoolNodeSet)
    other_nodes = [n for n in txnPoolNodeSet if n != master_primary]
    delay_3pc = 10
    delay_3pc_messages(txnPoolNodeSet, 0, delay_3pc)
    sent_batches = 2
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                             sent_batches * tconf.Max3PCBatchSize)

    def chk1():
        t_root, s_root = check_uncommitteds_equal(other_nodes)
        assert master_primary.domainLedger.uncommittedRootHash != t_root
        assert master_primary.states[DOMAIN_LEDGER_ID].headHash != s_root

    looper.run(eventually(chk1, retryWait=1))
    timeout = tconf.PerfCheckFreq + \
              waits.expectedPoolElectionTimeout(len(txnPoolNodeSet))
    waitForViewChange(looper,
                      txnPoolNodeSet,
                      old_view_no + 1,
                      customTimeout=timeout)

    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    check_uncommitteds_equal(txnPoolNodeSet)

    reset_delays_and_process_delayeds(txnPoolNodeSet)
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              2 * Max3PCBatchSize,
                              add_delay_to_timeout=delay_3pc)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_number_txns_in_catchup_and_vc_queue_valid(looper,
                                                   txnPoolNodeSet,
                                                   tconf,
                                                   sdk_pool_handle,
                                                   sdk_wallet_steward):
    num_txns = 5
    master_node = get_master_primary_node(txnPoolNodeSet)
    old_view = master_node.viewNo
    expected_view_no = old_view + 1
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, master_node, stopNode=False)
    looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet[1:], expected_view_no, retryWait=1,
                          timeout=tconf.VIEW_CHANGE_TIMEOUT))
    sdk_pool_refresh(looper, sdk_pool_handle)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, num_txns)
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, master_node)
    waitNodeDataEquality(looper, master_node, *txnPoolNodeSet[-1:])
    latest_info = master_node._info_tool.info
    assert latest_info['Node_info']['Catchup_status']['Number_txns_in_catchup'][1] == num_txns
    assert latest_info['Node_info']['View_change_status']['View_No'] == expected_view_no
    node_names = [n.name for n in txnPoolNodeSet[1:]]
    for node_name in node_names:
        assert latest_info['Node_info']['View_change_status']['VCDone_queue'][node_name][0] == master_node.master_primary_name
        assert latest_info['Node_info']['View_change_status']['VCDone_queue'][node_name][1]
        assert latest_info['Node_info']['View_change_status']['Last_complete_view_no'] == expected_view_no
def test_primary_receives_delayed_prepares(looper, txnPoolNodeSet,
                                           sdk_wallet_client, sdk_pool_handle):
    """
    Primary gets all PREPAREs after COMMITs
    """
    delay = 50
    primary_node = get_master_primary_node(txnPoolNodeSet)
    other_nodes = [n for n in txnPoolNodeSet if n != primary_node]
    primary_node.nodeIbStasher.delay(pDelay(delay, 0))

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=10)

    for node in other_nodes:
        assert node.master_replica.prePrepares
        assert node.master_replica.prepares
        assert node.master_replica.commits

    assert primary_node.master_replica.sentPrePrepares
    assert not primary_node.master_replica.prepares
    assert primary_node.master_replica.commits
def setup(txnPoolNodeSet, looper):
    m_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    initial_view_no = waitForViewChange(looper, txnPoolNodeSet)
    timeout_callback_stats = _check_view_change_completed_stats(txnPoolNodeSet)
    return m_primary_node, initial_view_no, timeout_callback_stats
Example #38
0
def setup(txnPoolNodeSet, looper):
    m_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    initial_view_no = waitForViewChange(looper, txnPoolNodeSet)
    timeout_callback_stats = _check_view_change_completed_stats(txnPoolNodeSet)
    return m_primary_node, initial_view_no, timeout_callback_stats
Example #39
0
def disconnect_master_primary(nodes):
    pr_node = get_master_primary_node(nodes)
    for node in nodes:
        if node != pr_node:
            node.nodestack.getRemote(pr_node.nodestack.name).disconnect()
    return pr_node
def slow_nodes(node_set):
    return [get_master_primary_node(node_set),
            get_first_master_non_primary_node(node_set)]
def test_view_not_changed_when_short_disconnection(txnPoolNodeSet, looper,
                                                   sdk_pool_handle, sdk_wallet_client, tconf):
    """
    When primary is disconnected but not long enough to trigger the timeout,
    view change should not happen
    """
    pr_node = get_master_primary_node(txnPoolNodeSet)
    view_no = checkViewNoForNodes(txnPoolNodeSet)

    lost_pr_calls = {node.name: node.spylog.count(
        node.lost_master_primary.__name__) for node in txnPoolNodeSet
        if node != pr_node}

    prp_inst_chg_calls = {node.name: node.spylog.count(
        node.propose_view_change.__name__) for node in txnPoolNodeSet
        if node != pr_node}

    recv_inst_chg_calls = {node.name: node.spylog.count(
        node.view_changer.process_instance_change_msg.__name__) for node in txnPoolNodeSet
        if node != pr_node}

    def chk1():
        # Check that non-primary nodes detects losing connection with
        # primary
        for node in txnPoolNodeSet:
            if node != pr_node:
                assert node.spylog.count(node.lost_master_primary.__name__) \
                       > lost_pr_calls[node.name]

    def chk2():
        # Schedule an instance change but do not send it
        # since primary joins again
        for node in txnPoolNodeSet:
            if node != pr_node:
                assert node.spylog.count(node.propose_view_change.__name__) \
                       > prp_inst_chg_calls[node.name]
                assert node.view_changer.spylog.count(node.view_changer.process_instance_change_msg.__name__) \
                       == recv_inst_chg_calls[node.name]

    # Disconnect master's primary
    for node in txnPoolNodeSet:
        if node != pr_node:
            node.nodestack.getRemote(pr_node.nodestack.name).disconnect()

    timeout = min(tconf.ToleratePrimaryDisconnection - 1, 1)
    looper.run(eventually(chk1, retryWait=.2, timeout=timeout))

    # Reconnect master's primary
    for node in txnPoolNodeSet:
        if node != pr_node:
            node.nodestack.retryDisconnected()

    looper.run(eventually(chk2, retryWait=.2, timeout=timeout + 1))

    def chk3():
        # Check the view does not change
        with pytest.raises(AssertionError):
            assert checkViewNoForNodes(txnPoolNodeSet) == view_no + 1

    looper.run(eventually(chk3, retryWait=1, timeout=10))

    # Send some requests and make sure the request execute
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)
def test_view_change_after_back_to_quorum_with_disconnected_primary(txnPoolNodeSet, looper,
                                                                    sdk_pool_handle,
                                                                    sdk_wallet_client,
                                                                    tdir, tconf, allPluginsPath):
    assert len(txnPoolNodeSet) == 4

    pr_node = get_master_primary_node(txnPoolNodeSet)
    assert pr_node.name == "Alpha"

    # 1. Initiate view change be primary (Alpha) restart
    nodes = ensure_view_change_by_primary_restart(looper,
                                                  txnPoolNodeSet,
                                                  tconf,
                                                  tdir,
                                                  allPluginsPath,
                                                  customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT)

    # Now primary should be Beta
    pr_node = get_master_primary_node(nodes)
    assert pr_node.name == "Beta"

    # 2. Stop non-primary node Delta, no any view changes are expected
    non_primary_to_stop = [n for n in nodes if n.name == "Delta"][0]
    disconnect_node_and_ensure_disconnected(
        looper, txnPoolNodeSet, non_primary_to_stop)
    looper.removeProdable(non_primary_to_stop)

    remaining_nodes = list(set(nodes) - {non_primary_to_stop})
    # Primary is going to be stopped, remember instance change messages count
    # to ensure that no view change happened as number of connected nodes is less
    # than quorum.
    ic_cnt = {}
    for n in remaining_nodes:
        ic_cnt[n.name] = n.view_changer.spylog.count(ViewChanger.sendInstanceChange.__name__)

    # 3. Disconnect primary
    disconnect_node_and_ensure_disconnected(
        looper, remaining_nodes, pr_node)
    looper.removeProdable(pr_node)

    # Wait for more than ToleratePrimaryDisconnection timeout and check that no IC messages presented.
    looper.runFor(tconf.ToleratePrimaryDisconnection + 5)
    remaining_nodes = list(set(remaining_nodes) - {pr_node})
    for n in remaining_nodes:
        assert ic_cnt[n.name] == n.view_changer.spylog.count(ViewChanger.sendInstanceChange.__name__)

    view_no = checkViewNoForNodes(remaining_nodes)

    # 4. Start Delta (non-primary), now primary (Beta) is disconnected but there is a quorum
    # to choose a new one.
    restartedNode = start_stopped_node(non_primary_to_stop, looper, tconf,
                                       tdir, allPluginsPath,
                                       delay_instance_change_msgs=False)
    remaining_nodes = remaining_nodes + [restartedNode]

    # 5. Check that view change happened.
    waitForViewChange(looper, remaining_nodes, expectedViewNo=(view_no + 1),
                      customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT)

    # ensure pool is working properly
    sdk_send_random_and_check(looper, remaining_nodes, sdk_pool_handle,
                              sdk_wallet_client, 3)
    ensure_all_nodes_have_same_data(looper, nodes=remaining_nodes)
def test_view_not_changed_when_primary_disconnected_from_less_than_quorum(
        txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client):
    """
    Less than quorum nodes lose connection with primary, this should not
    trigger view change as the protocol can move ahead
    """
    pr_node = get_master_primary_node(txnPoolNodeSet)
    npr = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    partitioned_rep = npr[0]
    partitioned_node = partitioned_rep.node

    lost_pr_calls = partitioned_node.spylog.count(
        partitioned_node.lost_master_primary.__name__)

    recv_inst_chg_calls = {node.name: node.spylog.count(
        node.view_changer.process_instance_change_msg.__name__) for node in txnPoolNodeSet
        if node != partitioned_node and node != pr_node}

    view_no = checkViewNoForNodes(txnPoolNodeSet)
    orig_retry_meth = partitioned_node.nodestack.retryDisconnected

    def wont_retry(self, exclude=None):
        # Do not attempt to retry connection
        pass

    # simulating a partition here
    # Disconnect a node from only the primary of the master and dont retry to
    # connect to it
    partitioned_node.nodestack.retryDisconnected = types.MethodType(
        wont_retry, partitioned_node.nodestack)
    r = partitioned_node.nodestack.getRemote(pr_node.nodestack.name)
    r.disconnect()

    def chk1():
        # Check that the partitioned node detects losing connection with
        # primary and sends an instance change which is received by other
        # nodes except the primary (since its disconnected from primary)
        assert partitioned_node.spylog.count(
            partitioned_node.lost_master_primary.__name__) > lost_pr_calls
        for node in txnPoolNodeSet:
            if node != partitioned_node and node != pr_node:
                assert node.view_changer.spylog.count(
                    node.view_changer.process_instance_change_msg.__name__) > recv_inst_chg_calls[node.name]

    looper.run(eventually(chk1, retryWait=1, timeout=10))

    def chk2():
        # Check the view does not change
        with pytest.raises(AssertionError):
            assert checkViewNoForNodes(txnPoolNodeSet) == view_no + 1

    looper.run(eventually(chk2, retryWait=1, timeout=10))
    # Send some requests and make sure the request execute
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)

    # Repair the connection so the node is no longer partitioned
    partitioned_node.nodestack.retryDisconnected = types.MethodType(
        orig_retry_meth, partitioned_node.nodestack)

    # Send some requests and make sure the request execute
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)

    # Partitioned node should have the same ledger and state as others
    # eventually
    waitNodeDataEquality(looper, partitioned_node,
                         *[n for n in txnPoolNodeSet if n != partitioned_node])