def test_non_primary_accepts_pre_prepare_time(looper, txnPoolNodeSet,
                                              sdk_wallet_client, sdk_pool_handle):
    """
    One of the non-primary has an in-correct clock so it thinks PRE-PREPARE
    has incorrect time
    """
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=2)
    # send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2)
    # The replica having the bad clock
    confused_npr = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1]

    make_clock_faulty(confused_npr.node)

    old_acceptable_rvs = getAllReturnVals(
        confused_npr, confused_npr.is_pre_prepare_time_acceptable)
    old_susp_count = get_timestamp_suspicion_count(confused_npr.node)
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=2)

    assert get_timestamp_suspicion_count(confused_npr.node) > old_susp_count

    new_acceptable_rvs = getAllReturnVals(
        confused_npr, confused_npr.is_pre_prepare_time_acceptable)

    # `is_pre_prepare_time_acceptable` first returned False then returned True
    assert [True, False, *old_acceptable_rvs] == new_acceptable_rvs
def test_non_primary_accepts_pre_prepare_time(looper, txnPoolNodeSet,
                                              sdk_wallet_client,
                                              sdk_pool_handle):
    """
    One of the non-primary has an in-correct clock so it thinks PRE-PREPARE
    has incorrect time
    """
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=2)
    # send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2)
    # The replica having the bad clock
    confused_npr = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1]

    make_clock_faulty(confused_npr.node)

    old_acceptable_rvs = getAllReturnVals(
        confused_npr, confused_npr.is_pre_prepare_time_acceptable)
    old_susp_count = get_timestamp_suspicion_count(confused_npr.node)
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=2)

    assert get_timestamp_suspicion_count(confused_npr.node) > old_susp_count

    new_acceptable_rvs = getAllReturnVals(
        confused_npr, confused_npr.is_pre_prepare_time_acceptable)

    # `is_pre_prepare_time_acceptable` first returned False then returned True
    assert [True, False, *old_acceptable_rvs] == new_acceptable_rvs
Beispiel #3
0
def test_view_change_timeout(nodeSet, looper, up, wallet1, client1):
    """
    Check view change restarted if it is not completed in time
    """

    m_primary_node = get_master_primary_node(list(nodeSet.nodes.values()))
    initial_view_no = waitForViewChange(looper, nodeSet)
    # Setting view change timeout to low value to make test pass quicker
    for node in nodeSet:
        node._view_change_timeout = 5

    # Delaying view change messages to make first view change fail
    # due to timeout
    for node in nodeSet:
        node.nodeIbStasher.delay(vcd_delay(delay=50))

    # Delaying preprepae messages from nodes and
    # sending request to force view change
    #for i in range(3):
    #    delayNonPrimaries(nodeSet, instId=i, delay=10)
    #sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4)

    times = {}
    for node in nodeSet:
        times[node.name] = {
            'called': get_count(node, node._check_view_change_completed),
            'returned_true': len(getAllReturnVals(
                node, node._check_view_change_completed, compare_val_to=True))
        }

    for node in nodeSet:
        node.startViewChange(initial_view_no + 1)

    # First view change should fail, because of delayed
    # instance change messages. This then leads to new view change that we need.
    with pytest.raises(AssertionError):
        ensureElectionsDone(looper=looper, nodes=nodeSet, customTimeout=10)

    # Resetting delays to let second view change go well
    reset_delays_and_process_delayeds(nodeSet)

    # This view change should be completed with no problems
    ensureElectionsDone(looper=looper, nodes=nodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=nodeSet)
    new_m_primary_node = get_master_primary_node(list(nodeSet.nodes.values()))
    assert m_primary_node.name != new_m_primary_node.name

    # The timeout method has been called at least once
    for node in nodeSet:
        assert get_count(node, node._check_view_change_completed) > times[node.name]['called']
        assert len(getAllReturnVals(node,
                                    node._check_view_change_completed,
                                    compare_val_to=True)) > times[node.name]['returned_true']

    # Multiple view changes have been initiated
    for node in nodeSet:
        assert (node.viewNo - initial_view_no) > 1

    ensure_pool_functional(looper, nodeSet, wallet1, client1)
def count_requested_preprepare_resp(node):
    # Returns the number of times PRE-PREPARE was requested
    sr = node.master_replica
    return len(
        getAllReturnVals(sr,
                         sr._request_pre_prepare_if_possible,
                         compare_val_to=True))
Beispiel #5
0
def count_requested_preprepare_resp(node):
    # Returns the number of times PRE-PREPARE was requested
    sr = node.master_replica
    return len(
        getAllReturnVals(sr._ordering_service,
                         sr._ordering_service._request_pre_prepare_for_prepare,
                         compare_val_to=True))
Beispiel #6
0
def newNodeCaughtUp(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns):
    looper, newNode, _, _, _, _ = nodeSetWithNodeAddedAfterSomeTxns
    waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:4])
    check_last_3pc_master(newNode, txnPoolNodeSet[:4])

    # Check if catchup done once
    catchup_done_once = True
    for li in newNode.ledgerManager.ledgerRegistry.values():
        catchup_done_once = catchup_done_once and (li.num_txns_caught_up > 0)

    if not catchup_done_once:
        # It might be the case that node has to do catchup again, in that case
        # check the return value of `num_txns_caught_up_in_last_catchup` to be
        # greater than 0

        assert max(
            getAllReturnVals(
                newNode,
                newNode.num_txns_caught_up_in_last_catchup)) > 0

    for li in newNode.ledgerManager.ledgerRegistry.values():
        assert not li.receivedCatchUpReplies
        assert not li.recvdCatchupRepliesFrm

    return newNode
Beispiel #7
0
def test_view_change_restarted_by_timeout_if_next_primary_disconnected(
        txnPoolNodeSet, looper, setup, wallet1, client1):
    """
    Verifies that a view change is restarted by timeout
    if the next primary has been disconnected
    """
    _, initial_view_no, timeout_callback_stats = setup

    start_view_change(txnPoolNodeSet, initial_view_no + 1)

    alive_nodes = stop_next_primary(txnPoolNodeSet)

    ensureElectionsDone(looper=looper, nodes=alive_nodes, numInstances=3)

    # There were 2 view changes
    for node in alive_nodes:
        assert (node.viewNo - initial_view_no) == 2

    # The timeout method was called 1 time
    for node in alive_nodes:
        assert get_count(node,
                         node._check_view_change_completed) - \
               timeout_callback_stats[node.name]['called'] == 1
        assert len(getAllReturnVals(node,
                                    node._check_view_change_completed,
                                    compare_val_to=True)) - \
               timeout_callback_stats[node.name]['returned_true'] == 1
def sdk_new_node_caught_up(txnPoolNodeSet,
                           sdk_node_set_with_node_added_after_some_txns):
    looper, new_node, _, _ = sdk_node_set_with_node_added_after_some_txns
    waitNodeDataEquality(looper,
                         new_node,
                         *txnPoolNodeSet[:4],
                         exclude_from_check=['check_last_ordered_3pc_backup'])
    check_last_3pc_master(new_node, txnPoolNodeSet[:4])

    # Check if catchup done once
    catchup_done_once = True
    for leecher in new_node.ledgerManager._node_leecher._leechers.values():
        catchup_done_once = catchup_done_once and (leecher.num_txns_caught_up >
                                                   0)

    if not catchup_done_once:
        # It might be the case that node has to do catchup again, in that case
        # check the return value of `num_txns_caught_up_in_last_catchup` to be
        # greater than 0

        assert max(
            getAllReturnVals(new_node,
                             new_node.num_txns_caught_up_in_last_catchup)) > 0

    return new_node
Beispiel #9
0
def sdk_new_node_caught_up(txnPoolNodeSet,
                           sdk_node_set_with_node_added_after_some_txns):
    looper, new_node, _, _ = sdk_node_set_with_node_added_after_some_txns
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4])
    check_last_3pc_master(new_node, txnPoolNodeSet[:4])

    # Check if catchup done once
    catchup_done_once = True
    for li in new_node.ledgerManager.ledgerRegistry.values():
        catchup_done_once = catchup_done_once and (li.num_txns_caught_up > 0)

    if not catchup_done_once:
        # It might be the case that node has to do catchup again, in that case
        # check the return value of `num_txns_caught_up_in_last_catchup` to be
        # greater than 0

        assert max(
            getAllReturnVals(
                new_node,
                new_node.num_txns_caught_up_in_last_catchup)) > 0

    for li in new_node.ledgerManager.ledgerRegistry.values():
        assert not li.receivedCatchUpReplies
        assert not li.recvdCatchupRepliesFrm

    return new_node
def test_view_change_timeout_next_primary_is_disconnected(
        nodeSet, looper, up, setup, wallet1, client1):
    """
    Check view change restarted if it is not completed in time
    """
    m_primary_node, initial_view_no, times = setup

    start_view_change(nodeSet, initial_view_no + 1)

    alive_nodes = stop_next_primary(nodeSet)

    ensureElectionsDone(looper=looper, nodes=alive_nodes, numInstances=3)

    # there were 2 view changes
    for node in alive_nodes:
        assert (node.viewNo - initial_view_no) == 2

    # The timeout method has been called at least once
    for node in alive_nodes:
        assert get_count(
            node,
            node._check_view_change_completed) > times[node.name]['called']
        assert len(
            getAllReturnVals(
                node, node._check_view_change_completed,
                compare_val_to=True)) > times[node.name]['returned_true']
 def get_last_returned_val():
     rvs = []
     for node in txnPoolNodeSet:
         rv = getAllReturnVals(node, node.getReplyFromLedger)
         rvs.append(rv[0])
     # All items are same in the list
     assert rvs.count(rvs[0]) == len(txnPoolNodeSet)
     return rvs[0]
 def get_last_returned_val():
     rvs = []
     for node in txnPoolNodeSet:
         rv = getAllReturnVals(node, node.getReplyFromLedgerForRequest)
         rvs.append(rv[0])
     # All items are same in the list
     assert rvs.count(rvs[0]) == len(txnPoolNodeSet)
     return rvs[0]
Beispiel #13
0
def checkPrepareReqSent(replica: TestReplica, identifier: str, reqId: int):
    paramsList = getAllArgs(replica, replica.canSendPrepare)
    rv = getAllReturnVals(replica, replica.canSendPrepare)
    for params in paramsList:
        req = params['request']
        assert req.identifier == identifier
        assert req.reqId == reqId
    assert all(rv)
def testInstChangeWithMoreReqLat(looper, setup):
    nodes = setup.nodes
    for node in nodes:
        node.checkPerformance()
        assert any(getAllReturnVals(node.monitor,
                                    node.monitor.isMasterReqLatencyTooHigh))
    looper.run(eventually(partial(checkViewNoForNodes, nodes, 1),
                          retryWait=1, timeout=20))
Beispiel #15
0
def testInstChangeWithMoreReqLat(looper, setup):
    nodes = setup.nodes
    for node in nodes:
        node.checkPerformance()
        assert any(getAllReturnVals(node.monitor,
                                    node.monitor.isMasterReqLatencyTooHigh))

    waitForViewChange(looper, nodes)
Beispiel #16
0
def test_no_catchup_if_got_from_3pc(looper, txnPoolNodeSet, sdk_pool_handle,
                                    sdk_wallet_client):
    """
    A node is slow to receive COMMIT messages so after a view change it
    starts catchup. But before it can start requesting txns, the COMMITs messages
    are received and are ordered. The node should not request any transactions.
    :return:
    """
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle, sdk_wallet_client,
                                           2 * 3, 3)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]

    delay_cm = 30
    delat_cp = 100
    slow_node.nodeIbStasher.delay(cDelay(delay_cm))
    # The slow node receives consistency proofs after some delay, this delay
    # gives the opportunity to deliver all 3PC messages
    slow_node.nodeIbStasher.delay(cpDelay(delat_cp))

    # Count of `getCatchupReqs` which is called to construct the `CatchupReq`
    # to be sent
    def domain_cr_count():
        return sum(1 for entry in slow_node.ledgerManager.spylog.getAll(
            slow_node.ledgerManager.getCatchupReqs)
                   if entry.params['consProof'].ledgerId == DOMAIN_LEDGER_ID)

    old_count = domain_cr_count()
    sent_batches = 10
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle, sdk_wallet_client,
                                           2 * sent_batches, sent_batches)
    ensure_view_change(looper, nodes=txnPoolNodeSet)

    # After view change, the `slow_node` is behind
    waitNodeDataInequality(looper, slow_node, *other_nodes)

    # Unstash only COMMIT messages
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(Commit.typename)

    looper.runFor(2)

    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(
        ConsistencyProof.typename)

    waitNodeDataEquality(looper, slow_node, *other_nodes)

    # No `CatchupReq`s constructed, hence no `CatchupReq`s could have
    # been sent
    assert domain_cr_count() == old_count
    # Some stashed ordered requests have been processed
    rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs)
    assert sent_batches in rv

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
def test_view_changes_if_master_primary_disconnected(txnPoolNodeSet, looper,
                                                     sdk_pool_handle,
                                                     sdk_wallet_client, tdir,
                                                     tconf, allPluginsPath):
    """
    View change occurs when master's primary is disconnected
    """

    # Setup
    nodes = txnPoolNodeSet

    old_view_no = checkViewNoForNodes(nodes)
    old_pr_node = get_master_primary_node(nodes)

    # Stop primary
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            old_pr_node,
                                            stopNode=True)
    looper.removeProdable(old_pr_node)

    remaining_nodes = list(set(nodes) - {old_pr_node})
    # Sometimes it takes time for nodes to detect disconnection
    ensure_node_disconnected(looper, old_pr_node, remaining_nodes, timeout=20)

    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    # Give some time to detect disconnection and then verify that view has
    # changed and new primary has been elected
    waitForViewChange(looper, remaining_nodes, old_view_no + 1)
    ensure_all_nodes_have_same_data(looper, nodes=remaining_nodes)
    new_pr_node = get_master_primary_node(remaining_nodes)
    assert old_pr_node != new_pr_node

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)

    # Check if old primary can join the pool and still functions
    old_pr_node = start_stopped_node(old_pr_node, looper, tconf, tdir,
                                     allPluginsPath)

    txnPoolNodeSet = remaining_nodes + [old_pr_node]
    looper.run(
        eventually(checkViewNoForNodes,
                   txnPoolNodeSet,
                   old_view_no + 1,
                   timeout=tconf.VIEW_CHANGE_TIMEOUT))

    # After node catches up it set view_no from audit ledger and do not need to do view_change
    assert len(
        getAllReturnVals(old_pr_node.view_changer,
                         old_pr_node.view_changer.start_view_change,
                         compare_val_to=True)) == 0

    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    assert not old_pr_node.view_changer._next_view_indications
Beispiel #18
0
def checkPrepareReqSent(replica: TestReplica, key: str,
                        view_no: int):
    paramsList = getAllArgs(replica, replica.canPrepare)
    rv = getAllReturnVals(replica,
                          replica.canPrepare)
    args = [p["ppReq"].reqIdr for p in paramsList if p["ppReq"].viewNo == view_no]
    assert [key] in args
    idx = args.index([key])
    assert rv[idx]
Beispiel #19
0
 def check_timeout_callback_called(times):
     for node in txnPoolNodeSet:
         assert get_count(node,
                          node._check_view_change_completed) - \
                timeout_callback_stats[node.name]['called'] == times
         assert len(getAllReturnVals(node,
                                     node._check_view_change_completed,
                                     compare_val_to=True)) - \
                timeout_callback_stats[node.name]['returned_true'] == times
Beispiel #20
0
def checkPrepareReqSent(replica: TestReplica, key: str, view_no: int):
    paramsList = getAllArgs(replica, replica.canPrepare)
    rv = getAllReturnVals(replica, replica.canPrepare)
    args = [
        p["ppReq"].reqIdr for p in paramsList if p["ppReq"].viewNo == view_no
    ]
    assert [key] in args
    idx = args.index([key])
    assert rv[idx]
def test_node_request_propagates(looper, setup, txnPoolNodeSet,
                                 sdk_wallet_client, sdk_pool_handle):
    """
    One of node lacks sufficient propagates
    """
    faulty_node, recv_client_requests = setup

    old_count_recv_ppg = get_count(faulty_node, faulty_node.processPropagate)
    old_count_recv_req = get_count(faulty_node, faulty_node.processRequest)
    old_count_request_propagates = get_count(
        faulty_node, faulty_node.request_propagates)

    def sum_of_sent_batches():
        return faulty_node.replicas[0].lastPrePrepareSeqNo + \
               faulty_node.replicas[1].lastPrePrepareSeqNo

    old_sum_of_sent_batches = sum_of_sent_batches()

    sent_reqs = 5
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              sent_reqs)

    assert get_count(
        faulty_node, faulty_node.processPropagate) > old_count_recv_ppg
    if recv_client_requests:
        assert get_count(
            faulty_node, faulty_node.processRequest) > old_count_recv_req
    else:
        assert get_count(
            faulty_node, faulty_node.processRequest) == old_count_recv_req

    # Attempt to request PROPAGATEs was made as many number of times as the
    # number of sent batches in both replicas since both replicas
    # independently request PROPAGATEs
    assert get_count(faulty_node, faulty_node.request_propagates) - \
           old_count_request_propagates == (sum_of_sent_batches() -
                                            old_sum_of_sent_batches)

    requested_propagate_counts = getAllReturnVals(
        faulty_node, faulty_node.request_propagates)

    # The last attempt to request PROPAGATEs was not successful
    assert requested_propagate_counts[0] == 0
    # The first attempt to request PROPAGATEs was successful as PROPAGATEs
    # were requested for all nodes
    assert requested_propagate_counts[1] == sent_reqs

    faulty_node.nodeIbStasher.reset_delays_and_process_delayeds()
    sdk_ensure_pool_functional(looper,
                               txnPoolNodeSet,
                               sdk_wallet_client,
                               sdk_pool_handle,
                               num_reqs=4)
def new_node_in_correct_view(all_nodes_view_change, looper, txnPoolNodeSet,
                             one_node_added, wallet1, client1):
    new_node = one_node_added
    looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, retryWait=1,
                          timeout=10))
    assert len(getAllReturnVals(new_node,
                                new_node._start_view_change_if_possible,
                                compare_val_to=True)) > 0
    assert not new_node._next_view_indications
    send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2)
def testInstChangeWithMoreReqLat(looper, setup):
    nodes = setup.nodes
    old_view_nos = set([n.viewNo for n in nodes])
    assert len(old_view_nos) == 1
    old_view_no = old_view_nos.pop()
    for node in nodes:
        node.checkPerformance()
        assert any(getAllReturnVals(node.monitor,
                                    node.monitor.isMasterReqLatencyTooHigh))

    waitForViewChange(looper, nodes, expectedViewNo=old_view_no + 1)
def new_node_in_correct_view(all_nodes_view_change, looper, txnPoolNodeSet,
                             sdk_one_node_added, sdk_pool_handle, sdk_wallet_client):
    new_node = sdk_one_node_added
    looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, retryWait=1,
                          timeout=10))
    assert len(getAllReturnVals(new_node.view_changer,
                                new_node.view_changer._start_view_change_if_possible,
                                compare_val_to=True)) > 0
    assert not new_node.view_changer._next_view_indications
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2)
def testViewChangesIfMasterPrimaryDisconnected(txnPoolNodeSet, looper, wallet1,
                                               client1, client1Connected,
                                               tconf, tdirWithPoolTxns,
                                               allPluginsPath):
    """
    View change occurs when master's primary is disconnected
    """

    # Setup
    nodes = txnPoolNodeSet

    old_view_no = checkViewNoForNodes(nodes)
    old_pr_node = get_master_primary_node(nodes)

    # Stop primary
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            old_pr_node,
                                            stopNode=True)
    looper.removeProdable(old_pr_node)

    remaining_nodes = list(set(nodes) - {old_pr_node})
    # Sometimes it takes time for nodes to detect disconnection
    ensure_node_disconnected(looper, old_pr_node, remaining_nodes, timeout=20)

    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    # Give some time to detect disconnection and then verify that view has
    # changed and new primary has been elected
    waitForViewChange(looper, remaining_nodes, old_view_no + 1)
    ensure_all_nodes_have_same_data(looper, nodes=remaining_nodes)
    new_pr_node = get_master_primary_node(remaining_nodes)
    assert old_pr_node != new_pr_node

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5)

    # Check if old primary can join the pool and still functions
    old_pr_node = start_stopped_node(old_pr_node, looper, tconf,
                                     tdirWithPoolTxns, allPluginsPath)

    txnPoolNodeSet = remaining_nodes + [old_pr_node]
    looper.run(
        eventually(checkViewNoForNodes,
                   txnPoolNodeSet,
                   old_view_no + 1,
                   timeout=10))
    assert len(
        getAllReturnVals(old_pr_node,
                         old_pr_node._start_view_change_if_possible,
                         compare_val_to=True)) > 0

    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    assert not old_pr_node._next_view_indications
def new_node_in_correct_view(all_nodes_view_change, looper, txnPoolNodeSet,
                             one_node_added, sdk_pool_handle, sdk_wallet_client):
    new_node = one_node_added
    looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, retryWait=1,
                          timeout=10))
    assert len(getAllReturnVals(new_node.view_changer,
                                new_node.view_changer._start_view_change_if_possible,
                                compare_val_to=True)) > 0
    assert not new_node.view_changer._next_view_indications
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2)
def testInstChangeWithMoreReqLat(looper, setup):
    nodes = setup.nodes
    for node in nodes:
        node.checkPerformance()
        assert any(
            getAllReturnVals(node.monitor,
                             node.monitor.isMasterReqLatencyTooHigh))
    looper.run(
        eventually(partial(checkViewNoForNodes, nodes, 1),
                   retryWait=1,
                   timeout=20))
def test_node_request_propagates(looper, setup, txnPoolNodeSet,
                                 sdk_wallet_client, sdk_pool_handle):
    """
    One of node lacks sufficient propagates
    """
    faulty_node, recv_client_requests = setup

    old_count_recv_ppg = get_count(faulty_node, faulty_node.processPropagate)
    old_count_recv_req = get_count(faulty_node, faulty_node.processRequest)
    old_count_request_propagates = get_count(faulty_node,
                                             faulty_node.request_propagates)

    def sum_of_sent_batches():
        return faulty_node.replicas[0].lastPrePrepareSeqNo + \
               faulty_node.replicas[1].lastPrePrepareSeqNo

    old_sum_of_sent_batches = sum_of_sent_batches()

    sent_reqs = 5
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, sent_reqs)

    assert get_count(faulty_node,
                     faulty_node.processPropagate) > old_count_recv_ppg
    if recv_client_requests:
        assert get_count(faulty_node,
                         faulty_node.processRequest) > old_count_recv_req
    else:
        assert get_count(faulty_node,
                         faulty_node.processRequest) == old_count_recv_req

    # Attempt to request PROPAGATEs was made as many number of times as the
    # number of sent batches in both replicas since both replicas
    # independently request PROPAGATEs
    assert get_count(faulty_node, faulty_node.request_propagates) - \
        old_count_request_propagates == (sum_of_sent_batches() -
                                         old_sum_of_sent_batches)

    requested_propagate_counts = getAllReturnVals(
        faulty_node, faulty_node.request_propagates)

    # The last attempt to request PROPAGATEs was not successful
    assert requested_propagate_counts[0] == 0
    # The first attempt to request PROPAGATEs was successful as PROPAGATEs
    # were requested for all nodes
    assert requested_propagate_counts[1] == sent_reqs

    faulty_node.nodeIbStasher.reset_delays_and_process_delayeds()
    sdk_ensure_pool_functional(looper,
                               txnPoolNodeSet,
                               sdk_wallet_client,
                               sdk_pool_handle,
                               num_reqs=4)
def testInstChangeWithMoreReqLat(looper, setup):
    # TODO for now, view_change procedure can take more that 15 minutes
    # (5 minutes for catchup and 10 minutes for primary's answer).
    # Therefore, view_change triggering by max latency now is not indicative.
    nodes = setup.nodes
    old_view_no = setup.old_view_no
    for node in nodes:
        node.checkPerformance()
        assert any(getAllReturnVals(node.monitor,
                                    node.monitor.isMasterReqLatencyTooHigh))

    waitForViewChange(looper, nodes, expectedViewNo=old_view_no + 1)
Beispiel #30
0
def testInstChangeWithMoreReqLat(looper, setup):
    # TODO for now, view_change procedure can take more that 15 minutes
    # (5 minutes for catchup and 10 minutes for primary's answer).
    # Therefore, view_change triggering by max latency now is not indicative.
    nodes = setup.nodes
    old_view_no = setup.old_view_no
    for node in nodes:
        node.checkPerformance()
        assert any(
            getAllReturnVals(node.monitor,
                             node.monitor.isMasterReqLatencyTooHigh))

    waitForViewChange(looper, nodes, expectedViewNo=old_view_no + 1)
Beispiel #31
0
def setup(nodeSet, looper):
    m_primary_node = get_master_primary_node(list(nodeSet.nodes.values()))
    initial_view_no = waitForViewChange(looper, nodeSet)
    # Setting view change timeout to low value to make test pass quicker
    for node in nodeSet:
        node._view_change_timeout = view_change_timeout

    times = {}
    for node in nodeSet:
        times[node.name] = {
            'called': get_count(node, node._check_view_change_completed),
            'returned_true': len(getAllReturnVals(
                node, node._check_view_change_completed, compare_val_to=True))
        }
    return m_primary_node, initial_view_no, times
Beispiel #32
0
def test_old_non_primary_restart_after_view_change(new_node_in_correct_view,
                                                   looper, txnPoolNodeSet,
                                                   tdir, allPluginsPath, tconf,
                                                   sdk_pool_handle,
                                                   sdk_wallet_client):
    """
    An existing non-primary node crashes and then view change happens,
    the crashed node comes back up after view change
    """
    node_to_stop = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    old_view_no = node_to_stop.viewNo

    # Stop non-primary
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            node_to_stop,
                                            stopNode=True)
    looper.removeProdable(node_to_stop)
    remaining_nodes = list(set(txnPoolNodeSet) - {node_to_stop})

    # Send some requests before view change
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)
    ensure_view_change(looper,
                       remaining_nodes,
                       custom_timeout=tconf.VIEW_CHANGE_TIMEOUT)
    ensureElectionsDone(looper, remaining_nodes)
    # Send some requests after view change
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)

    restarted_node = start_stopped_node(node_to_stop, looper, tconf, tdir,
                                        allPluginsPath)
    txnPoolNodeSet = remaining_nodes + [restarted_node]
    looper.run(
        eventually(checkViewNoForNodes,
                   txnPoolNodeSet,
                   old_view_no + 1,
                   timeout=30))
    assert len(
        getAllReturnVals(
            restarted_node.view_changer,
            restarted_node.view_changer._start_view_change_if_possible,
            compare_val_to=True)) > 0

    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)
    assert not restarted_node.view_changer._next_view_indications
def test_view_changes_if_master_primary_disconnected(txnPoolNodeSet, looper, sdk_pool_handle,
                                                     sdk_wallet_client, tdir, tconf, allPluginsPath):
    """
    View change occurs when master's primary is disconnected
    """

    # Setup
    nodes = txnPoolNodeSet

    old_view_no = checkViewNoForNodes(nodes)
    old_pr_node = get_master_primary_node(nodes)

    # Stop primary
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet,
                                            old_pr_node, stopNode=True)
    looper.removeProdable(old_pr_node)

    remaining_nodes = list(set(nodes) - {old_pr_node})
    # Sometimes it takes time for nodes to detect disconnection
    ensure_node_disconnected(looper, old_pr_node, remaining_nodes, timeout=20)

    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    # Give some time to detect disconnection and then verify that view has
    # changed and new primary has been elected
    waitForViewChange(looper, remaining_nodes, old_view_no + 1)
    ensure_all_nodes_have_same_data(looper, nodes=remaining_nodes)
    new_pr_node = get_master_primary_node(remaining_nodes)
    assert old_pr_node != new_pr_node

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)

    # Check if old primary can join the pool and still functions
    old_pr_node = start_stopped_node(old_pr_node, looper, tconf,
                                     tdir, allPluginsPath)

    txnPoolNodeSet = remaining_nodes + [old_pr_node]
    looper.run(eventually(checkViewNoForNodes,
                          txnPoolNodeSet, old_view_no + 1, timeout=tconf.VIEW_CHANGE_TIMEOUT))
    assert len(getAllReturnVals(old_pr_node.view_changer,
                                old_pr_node.view_changer._start_view_change_if_possible,
                                compare_val_to=True)) > 0

    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    assert not old_pr_node.view_changer._next_view_indications
def test_view_change_retry_by_timeout(
        txnPoolNodeSet, looper, setup, sdk_pool_handle, sdk_wallet_client):
    """
    Verifies that a view change is restarted if it is not completed in time
    """
    m_primary_node, initial_view_no, timeout_callback_stats = setup

    delay_view_change_done_msg(txnPoolNodeSet)

    start_view_change(txnPoolNodeSet, initial_view_no + 1)
    # First view change should fail, because of delayed ViewChangeDone
    # messages. This then leads to new view change that we need.
    with pytest.raises(AssertionError):
        ensureElectionsDone(looper=looper,
                            nodes=txnPoolNodeSet,
                            customTimeout=view_change_timeout + 2)

    # Resetting delays to let second view change go well
    reset_delays_and_process_delayeds(txnPoolNodeSet)

    # This view change should be completed with no problems
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    new_m_primary_node = get_master_primary_node(list(txnPoolNodeSet))
    assert m_primary_node.name != new_m_primary_node.name

    # The timeout method was called one time
    for node in txnPoolNodeSet:
        assert get_count(node,
                         node._check_view_change_completed) - \
               timeout_callback_stats[node.name]['called'] == 1
        assert len(getAllReturnVals(node,
                                    node._check_view_change_completed,
                                    compare_val_to=True)) - \
               timeout_callback_stats[node.name]['returned_true'] == 1

    # 2 view changes have been initiated
    for node in txnPoolNodeSet:
        assert node.viewNo - initial_view_no == 2

    sdk_ensure_pool_functional(looper, txnPoolNodeSet,
                               sdk_wallet_client,
                               sdk_pool_handle)
Beispiel #35
0
def test_node_request_propagates(looper, setup, txnPoolNodeSet, client1,
                                 wallet1, client1Connected, request):
    """
    One of node lacks sufficient propagates
    """
    faulty_node, recv_client_requests = setup

    old_count_recv_ppg = get_count(faulty_node, faulty_node.processPropagate)
    old_count_recv_req = get_count(faulty_node, faulty_node.processRequest)
    old_count_request_propagates = get_count(faulty_node,
                                             faulty_node.request_propagates)

    sent_reqs = 5
    send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1,
                                              sent_reqs)

    assert get_count(faulty_node,
                     faulty_node.processPropagate) > old_count_recv_ppg
    if recv_client_requests:
        assert get_count(faulty_node,
                         faulty_node.processRequest) > old_count_recv_req
    else:
        assert get_count(faulty_node,
                         faulty_node.processRequest) == old_count_recv_req

    # Attempt to request PROPAGATEs was made twice, since the faulty node has 2 replicas
    assert get_count(
        faulty_node,
        faulty_node.request_propagates) - old_count_request_propagates == 2

    requested_propagate_counts = getAllReturnVals(
        faulty_node, faulty_node.request_propagates)

    # The last attempt to request PROPAGATEs was not successful
    assert requested_propagate_counts[0] == 0
    # The first attempt to request PROPAGATEs was successful as PROPAGATEs
    # were requested for all nodes
    assert requested_propagate_counts[1] == sent_reqs

    faulty_node.nodeIbStasher.reset_delays_and_process_delayeds()
    ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1, 4)
def test_old_non_primary_restart_after_view_change(new_node_in_correct_view,
                                                   looper, txnPoolNodeSet,
                                                   tdir,
                                                   allPluginsPath, tconf,
                                                   sdk_pool_handle,
                                                   sdk_wallet_client):
    """
    An existing non-primary node crashes and then view change happens,
    the crashed node comes back up after view change
    """
    node_to_stop = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    old_view_no = node_to_stop.viewNo

    # Stop non-primary
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet,
                                            node_to_stop, stopNode=True)
    looper.removeProdable(node_to_stop)
    remaining_nodes = list(set(txnPoolNodeSet) - {node_to_stop})

    # Send some requests before view change
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)
    ensure_view_change(looper, remaining_nodes, custom_timeout=tconf.VIEW_CHANGE_TIMEOUT)
    ensureElectionsDone(looper, remaining_nodes)
    # Send some requests after view change
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)

    restarted_node = start_stopped_node(node_to_stop, looper, tconf,
                                        tdir, allPluginsPath)
    txnPoolNodeSet = remaining_nodes + [restarted_node]
    looper.run(eventually(checkViewNoForNodes,
                          txnPoolNodeSet, old_view_no + 1, timeout=30))
    assert len(getAllReturnVals(restarted_node.view_changer,
                                restarted_node.view_changer._start_view_change_if_possible,
                                compare_val_to=True)) > 0

    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)
    assert not restarted_node.view_changer._next_view_indications
def test_view_change_timeout(nodeSet, looper, up, setup, wallet1, client1):
    """
    Check view change restarted if it is not completed in time
    """
    m_primary_node, initial_view_no, times = setup

    delay_view_change_msg(nodeSet)

    start_view_change(nodeSet, initial_view_no + 1)
    # First view change should fail, because of delayed
    # instance change messages. This then leads to new view change that we
    # need.
    with pytest.raises(AssertionError):
        ensureElectionsDone(looper=looper, nodes=nodeSet, customTimeout=10)

    # Resetting delays to let second view change go well
    reset_delays_and_process_delayeds(nodeSet)

    # This view change should be completed with no problems
    ensureElectionsDone(looper=looper, nodes=nodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=nodeSet)
    new_m_primary_node = get_master_primary_node(list(nodeSet.nodes.values()))
    assert m_primary_node.name != new_m_primary_node.name

    # The timeout method has been called at least once
    for node in nodeSet:
        assert get_count(
            node,
            node._check_view_change_completed) > times[node.name]['called']
        assert len(
            getAllReturnVals(
                node, node._check_view_change_completed,
                compare_val_to=True)) > times[node.name]['returned_true']

    # Multiple view changes have been initiated
    for node in nodeSet:
        assert (node.viewNo - initial_view_no) > 1

    ensure_pool_functional(looper, nodeSet, wallet1, client1)
 def chk4():
     # Some COMMITs were ordered but stashed and they were processed
     rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs)
     assert delay_batches in rv
def test_reverted_unordered(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client):
    """
    Before starting catchup, revert any uncommitted changes to state and
    ledger. This is to avoid any re-application of requests that were
    ordered but stashed
    Example scenario
    prepared (1, 4)
    startViewChange
    start_catchup
    ...
    ....
    ...
    committed and send Ordered (1, 2)
    ...
    ....
    preLedgerCatchUp
    force_process_ordered, take out (1,2) and stash (1, 2)
    now process stashed Ordered(1,2), its requests will be applied again

    Simulation: Delay COMMITs to a node so that it can not order requests
    but has prepared them. Then trigger a view change and make sure the slow
    node has not ordered same number of requests as others but has prepared
    so it can order when it receives COMMITs while view change is in progress.
    The slow node should revert unordered batches and but it should eventually
    process the ordered requests, so delay LEDGER_STATUS too so catchup
    is delayed
    """
    slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    fast_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    slow_node.nodeIbStasher.delay(cDelay(120, 0))
    sent_batches = 5
    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                         sdk_wallet_client, 2 * sent_batches, sent_batches)

    # Fast nodes have same last ordered and same data
    last_ordered = [n.master_last_ordered_3PC for n in fast_nodes]
    assert check_if_all_equal_in_list(last_ordered)
    ensure_all_nodes_have_same_data(looper, fast_nodes)

    # Slow nodes have different last ordered than fast nodes
    assert last_ordered[0] != slow_node.master_last_ordered_3PC

    # Delay LEDGER_STATUS so catchup starts late
    slow_node.nodeIbStasher.delay(lsDelay(100))
    slow_node.nodeIbStasher.delay(msg_rep_delay(100))

    # slow_node has not reverted batches
    assert sent_batches not in getAllReturnVals(
        slow_node.master_replica,
        slow_node.master_replica.revert_unordered_batches)

    ensure_view_change(looper, txnPoolNodeSet)

    def chk1():
        # slow_node reverted all batches
        rv = getAllReturnVals(slow_node.master_replica,
                              slow_node.master_replica.revert_unordered_batches)
        assert sent_batches in rv

    looper.run(eventually(chk1, retryWait=1))

    # After the view change slow_node has prepared same requests as the fast
    # nodes have ordered
    assert last_ordered[0] == slow_node.master_replica.last_prepared_before_view_change

    # Deliver COMMITs
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(COMMIT)

    def chk2():
        # slow_node orders all requests as others have
        assert last_ordered[0] == slow_node.master_last_ordered_3PC

    looper.run(eventually(chk2, retryWait=1))

    # Deliver LEDGER_STATUS so catchup can complete
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(LEDGER_STATUS)
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(MESSAGE_RESPONSE)

    # Ensure all nodes have same data
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)

    def chk3():
        # slow_node processed stashed Ordered requests successfully
        rv = getAllReturnVals(slow_node,
                              slow_node.processStashedOrderedReqs)
        assert sent_batches in rv

    looper.run(eventually(chk3, retryWait=1))

    # Ensure pool is functional
    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                         sdk_wallet_client, 10, 2)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
Beispiel #40
0
 def chk1():
     # slow_node reverted all batches
     rv = getAllReturnVals(
         slow_node.master_replica,
         slow_node.master_replica.revert_unordered_batches)
     assert sent_batches in rv
def count_requested_preprepare_resp(node):
    # Returns the number of times PRE-PREPARE was requested
    sr = node.master_replica
    return len(getAllReturnVals(sr, sr._request_pre_prepare_for_prepare,
                                compare_val_to=True))
 def chk3():
     # slow_node processed stashed Ordered requests successfully
     rv = getAllReturnVals(slow_node,
                           slow_node.processStashedOrderedReqs)
     assert sent_batches in rv
 def caught_up_for_current_view_count():
     return len(
         getAllReturnVals(bad_node,
                          bad_node.caught_up_for_current_view,
                          compare_val_to=True))
 def is_catchup_not_needed_count():
     return len(getAllReturnVals(bad_node, bad_node.is_catchup_needed,
                                 compare_val_to=False))
def test_no_catchup_if_got_from_3pc(looper, txnPoolNodeSet,
                                    sdk_pool_handle,
                                    sdk_wallet_client):
    """
    A node is slow to receive COMMIT messages so after a view change it
    starts catchup. But before it can start requesting txns, the COMMITs messages
    are received and are ordered. The node should not request any transactions.
    :return:
    """
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           2 * 3,
                                           3)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]

    delay_cm = 30
    delat_cp = 100
    slow_node.nodeIbStasher.delay(cDelay(delay_cm))
    # The slow node receives consistency proofs after some delay, this delay
    # gives the opportunity to deliver all 3PC messages
    slow_node.nodeIbStasher.delay(cpDelay(delat_cp))

    # Count of `getCatchupReqs` which is called to construct the `CatchupReq`
    # to be sent
    def domain_cr_count(): return sum(1 for entry in
                                      slow_node.ledgerManager.spylog.getAll(
                                          slow_node.ledgerManager.getCatchupReqs) if
                                      entry.params['consProof'].ledgerId == DOMAIN_LEDGER_ID)

    old_count = domain_cr_count()
    sent_batches = 10
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           2 * sent_batches,
                                           sent_batches)
    ensure_view_change(looper, nodes=txnPoolNodeSet)

    # After view change, the `slow_node` is behind
    waitNodeDataInequality(looper, slow_node, *other_nodes)

    # Unstash only COMMIT messages
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(Commit.typename)

    looper.runFor(2)

    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(
        ConsistencyProof.typename)

    waitNodeDataEquality(looper, slow_node, *other_nodes)

    # No `CatchupReq`s constructed, hence no `CatchupReq`s could have
    # been sent
    assert domain_cr_count() == old_count
    # Some stashed ordered requests have been processed
    rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs)
    assert sent_batches in rv

    sdk_ensure_pool_functional(looper, txnPoolNodeSet,
                               sdk_wallet_client,
                               sdk_pool_handle)
 def is_catchup_needed_count():
     return len(
         getAllReturnVals(bad_node,
                          bad_node.is_catchup_needed,
                          compare_val_to=True))
 def has_ordered_till_last_prepared_certificate_count():
     return len(getAllReturnVals(bad_node,
                                 bad_node.has_ordered_till_last_prepared_certificate,
                                 compare_val_to=True))
def _check_view_change_completed_true(node):
    return len(getAllReturnVals(node, node._check_view_change_completed, compare_val_to=True))
 def chk1():
     # slow_node reverted all batches
     rv = getAllReturnVals(slow_node.master_replica,
                           slow_node.master_replica.revert_unordered_batches)
     assert sent_batches in rv