def txnPoolNodeSet(txnPoolNodeSet, looper, client1, wallet1, client1Connected,
                   tconf, tdirWithPoolTxns, allPluginsPath):
    logger.debug("Do several view changes to round the list of primaries")

    assert txnPoolNodeSet[0].viewNo == len(txnPoolNodeSet) - 1

    logger.debug(
        "Do view change to reach viewNo {}".format(txnPoolNodeSet[0].viewNo +
                                                   1))
    ensure_view_change_complete(looper, txnPoolNodeSet)
    logger.debug("Send requests to ensure that pool is working properly, "
                 "viewNo: {}".format(txnPoolNodeSet[0].viewNo))
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, numReqs=3)

    logger.debug("Pool is ready, current viewNo: {}".format(
        txnPoolNodeSet[0].viewNo))

    # TODO find out and fix why additional view change could happen
    # because of degarded master. It's critical for current test to have
    # view change completed for the time when new node is joining.
    # Thus, disable master degradation check as it won't impact the case
    # and guarantees necessary state.
    for node in txnPoolNodeSet:
        node.monitor.isMasterDegraded = lambda: False

    return txnPoolNodeSet
def testZStackNodeReconnection(tconf, looper, txnPoolNodeSet, client1, wallet1,
                               tdirWithPoolTxns, client1Connected):
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1)

    npr = [n for n in txnPoolNodeSet if not n.hasPrimary]
    nodeToCrash = npr[0]
    idxToCrash = txnPoolNodeSet.index(nodeToCrash)
    otherNodes = [_ for _ in txnPoolNodeSet if _ != nodeToCrash]

    def checkFlakyConnected(conn=True):
        for node in otherNodes:
            if conn:
                assert nodeToCrash.nodestack.name in node.nodestack.connecteds
            else:
                assert nodeToCrash.nodestack.name not in node.nodestack.connecteds

    checkFlakyConnected(True)
    nodeToCrash.stop()
    looper.removeProdable(nodeToCrash)
    looper.runFor(1)
    looper.run(eventually(checkFlakyConnected, False, retryWait=1, timeout=35))
    looper.runFor(1)
    node = TestNode(nodeToCrash.name,
                    basedirpath=tdirWithPoolTxns,
                    config=tconf,
                    ha=nodeToCrash.nodestack.ha,
                    cliha=nodeToCrash.clientstack.ha)
    looper.add(node)
    txnPoolNodeSet[idxToCrash] = node
    looper.run(eventually(checkFlakyConnected, True, retryWait=2, timeout=50))
    ensureElectionsDone(looper, txnPoolNodeSet, retryWait=2, timeout=50)
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1)
    checkNodesSendingCommits(txnPoolNodeSet)
def test_node_load_after_add(newNodeCaughtUp, txnPoolNodeSet, tconf,
                             tdirWithPoolTxns, allPluginsPath,
                             poolTxnStewardData, looper, client1, wallet1,
                             client1Connected, capsys):
    """
    A node that restarts after some transactions should eventually get the
    transactions which happened while it was down
    :return:
    """
    new_node = newNodeCaughtUp
    logger.debug("Sending requests")

    # Here's where we apply some load
    client_batches = 300
    txns_per_batch = 25
    for i in range(client_batches):
        s = perf_counter()
        sendReqsToNodesAndVerifySuffReplies(looper,
                                            wallet1,
                                            client1,
                                            txns_per_batch,
                                            override_timeout_limit=True)
        with capsys.disabled():
            print('{} executed {} client txns in {:.2f} seconds'.format(
                i + 1, txns_per_batch,
                perf_counter() - s))

    logger.debug("Starting the stopped node, {}".format(new_node))
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5)
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4])
def testNodeCatchupAfterRestart(newNodeCaughtUp, txnPoolNodeSet,
                                nodeSetWithNodeAddedAfterSomeTxns):
    """
    A node that restarts after some transactions should eventually get the
    transactions which happened while it was down
    :return:
    """

    looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns
    logger.debug("Stopping node {} with pool ledger size {}".format(
        newNode, newNode.poolManager.txnSeqNo))
    ensureNodeDisconnectedFromPool(looper, txnPoolNodeSet, newNode)
    # for n in txnPoolNodeSet[:4]:
    #     for r in n.nodestack.remotes.values():
    #         if r.name == newNode.name:
    #             r.removeStaleCorrespondents()
    # looper.run(eventually(checkNodeDisconnectedFrom, newNode.name,
    #                       txnPoolNodeSet[:4], retryWait=1, timeout=5))
    # TODO: Check if the node has really stopped processing requests?
    logger.debug("Sending requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    logger.debug("Starting the stopped node, {}".format(newNode))
    newNode.start(looper.loop)
    looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=30))
    looper.run(
        eventually(checkNodeLedgersForEquality,
                   newNode,
                   *txnPoolNodeSet[:4],
                   retryWait=1,
                   timeout=75))
def test_not_check_if_no_new_requests(looper: Looper,
                                      nodeSet: TestNodeSet,
                                      wallet1, client1):
    """
    Checks that node does not do performance check if there were no new
    requests since previous check
    """
    
    # Ensure that nodes participating, because otherwise they do not do check
    for node in list(nodeSet):
        assert node.isParticipating

    # Check that first performance checks passes, but further do not
    for node in list(nodeSet):
        assert node.checkPerformance()
        assert not node.checkPerformance()
        assert not node.checkPerformance()
        assert not node.checkPerformance()

    # Send new request and check that after it nodes can do
    # performance check again
    num_requests = 1
    sendReqsToNodesAndVerifySuffReplies(looper,
                                        wallet1,
                                        client1,
                                        num_requests,
                                        nodeSet.f)

    for node in list(nodeSet):
        assert node.checkPerformance()
def test_new_node_accepts_chosen_primary(
        txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns):
    looper, new_node, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns

    logger.debug("Ensure nodes data equality".format(txnPoolNodeSet[0].viewNo))
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])

    # here we must have view_no = 4
    #  - current primary is Alpha (based on node registry before new node joined)
    #  - but new node expects itself as primary basing
    #    on updated node registry
    # -> new node doesn't verify current primary
    assert not new_node.view_changer._primary_verified
    # -> new node haven't received ViewChangeDone from the expected primary
    #    (self VCHD message is registered when node sends it, not the case
    #    for primary propagate logic)
    assert not new_node.view_changer.has_view_change_from_primary
    # -> BUT new node understands that no view change actually happens
    assert new_node.view_changer._is_propagated_view_change_completed

    logger.debug("Send requests to ensure that pool is working properly, "
                 "viewNo: {}".format(txnPoolNodeSet[0].viewNo))
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=3)

    logger.debug("Ensure nodes data equality".format(txnPoolNodeSet[0].viewNo))
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
def testNodeCatchupAfterLostConnection(newNodeCaughtUp, txnPoolNodeSet,
                                   nodeSetWithNodeAddedAfterSomeTxns):
    """
    A node that has poor internet connection and got unsynced after some
    transactions should eventually get the transactions which happened while
    it was not accessible
    :return:
    """
    looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns
    logger.debug("Disconnecting node {}, ledger size {}".
                 format(newNode, newNode.domainLedger.size))
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, newNode,
                                            stopNode=False)
    looper.removeProdable(newNode)

    # TODO: Check if the node has really stopped processing requests?
    logger.debug("Sending requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    # Make sure new node got out of sync
    waitNodeDataInequality(looper, newNode, *txnPoolNodeSet[:-1])

    # logger.debug("Ensure node {} gets disconnected".format(newNode))
    ensure_node_disconnected(looper, newNode, txnPoolNodeSet[:-1])

    logger.debug("Connecting the node {} back, ledger size {}".
                 format(newNode, newNode.domainLedger.size))
    looper.add(newNode)

    logger.debug("Waiting for the node to catch up, {}".format(newNode))
    waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1])

    logger.debug("Sending more requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 10)
    checkNodeDataForEquality(newNode, *txnPoolNodeSet[:-1])
Exemple #8
0
def view_change_in_between_3pc(looper,
                               nodes,
                               slow_nodes,
                               wallet,
                               client,
                               slow_delay=1,
                               wait=None):
    send_reqs_to_nodes_and_verify_all_replies(looper, wallet, client, 4)
    delay_3pc_messages(slow_nodes, 0, delay=slow_delay)

    sendRandomRequests(wallet, client, 10)
    if wait:
        looper.runFor(wait)

    ensure_view_change_complete(looper, nodes, customTimeout=60)

    reset_delays_and_process_delayeds(slow_nodes)

    sendReqsToNodesAndVerifySuffReplies(looper,
                                        wallet,
                                        client,
                                        5,
                                        total_timeout=30)
    send_reqs_to_nodes_and_verify_all_replies(looper,
                                              wallet,
                                              client,
                                              5,
                                              total_timeout=30)
Exemple #9
0
def nodeCreatedAfterSomeTxns(looper, testNodeClass, do_post_node_creation,
                             txnPoolNodeSet, tdir, tdirWithClientPoolTxns,
                             poolTxnStewardData, tconf, allPluginsPath,
                             request):
    client, wallet = buildPoolClientAndWallet(poolTxnStewardData,
                                              tdirWithClientPoolTxns,
                                              clientClass=TestClient)
    looper.add(client)
    looper.run(client.ensureConnectedToNodes())
    txnCount = getValueFromModule(request, "txnCount", 5)
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, txnCount)
    newStewardName = randomString()
    newNodeName = "Epsilon"
    newStewardClient, newStewardWallet, newNode = addNewStewardAndNode(
        looper,
        client,
        wallet,
        newStewardName,
        newNodeName,
        tdir,
        tdirWithClientPoolTxns,
        tconf,
        nodeClass=testNodeClass,
        allPluginsPath=allPluginsPath,
        autoStart=True,
        do_post_node_creation=do_post_node_creation)
    yield looper, newNode, client, wallet, newStewardClient, \
        newStewardWallet
def testRequestsSize(txnPoolNodesLooper, txnPoolNodeSet, poolTxnClientNames,
                     tdirWithPoolTxns, poolTxnData, noRetryReq):
    """
    Client should not be using node registry but pool transaction file
    :return:
    """
    clients = []
    for name in poolTxnClientNames:
        seed = poolTxnData["seeds"][name].encode()
        client, wallet = buildPoolClientAndWallet((name, seed),
                                                  tdirWithPoolTxns)
        txnPoolNodesLooper.add(client)
        ensureClientConnectedToNodesAndPoolLedgerSame(txnPoolNodesLooper, client,
                                                      *txnPoolNodeSet)
        clients.append((client, wallet))

    n = 250
    timeOutPerReq = 3
    for (client, wallet) in clients:
        logger.debug("{} sending {} requests".format(client, n))
        sendReqsToNodesAndVerifySuffReplies(txnPoolNodesLooper, wallet, client,
                                            n, 1, timeOutPerReq)
        logger.debug("{} sent {} requests".format(client, n))
    for node in txnPoolNodeSet:
        logger.debug("{} has requests {} with size {}".
                     format(node, len(node.requests), get_size(node.requests)))
        for replica in node.replicas:
            logger.debug("{} has prepares {} with size {}".
                         format(replica, len(replica.prepares),
                                get_size(replica.prepares)))
            logger.debug("{} has commits {} with size {}".
                         format(replica, len(replica.commits),
                                get_size(replica.commits)))
def test_different_ledger_request_interleave(tconf, looper, txnPoolNodeSet,
                                             client1, wallet1, one_node_added,
                                             client1Connected, tdir,
                                             client_tdir, tdirWithPoolTxns,
                                             steward1, stewardWallet,
                                             allPluginsPath):
    """
    Send pool and domain ledger requests such that they interleave, and do
    view change in between and verify the pool is functional
    """
    new_node = one_node_added
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 2)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    # Send domain ledger requests but don't wait for replies
    requests = sendRandomRequests(wallet1, client1, 2)
    # Add another node by sending pool ledger request
    _, _, new_theta = nodeThetaAdded(looper,
                                     txnPoolNodeSet,
                                     tdir,
                                     client_tdir,
                                     tconf,
                                     steward1,
                                     stewardWallet,
                                     allPluginsPath,
                                     name='new_theta')

    # Send more domain ledger requests but don't wait for replies
    requests.extend(sendRandomRequests(wallet1, client1, 3))

    # Do view change without waiting for replies
    ensure_view_change(looper, nodes=txnPoolNodeSet)
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)

    # Make sure all requests are completed
    waitForSufficientRepliesForRequests(looper, client1, requests=requests)

    ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1)

    new_steward, new_steward_wallet = addNewSteward(looper, client_tdir,
                                                    steward1, stewardWallet,
                                                    'another_ste')

    # Send another pool ledger request (NODE) but don't wait for completion of
    # request
    next_node_name = 'next_node'
    r = sendAddNewNode(tdir, tconf, next_node_name, new_steward,
                       new_steward_wallet)
    node_req = r[0]

    # Send more domain ledger requests but don't wait for replies
    requests = [
        node_req, *sendRandomRequests(new_steward_wallet, new_steward, 5)
    ]

    # Make sure all requests are completed
    waitForSufficientRepliesForRequests(looper, new_steward, requests=requests)

    # Make sure pool is functional
    ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1)
def testReplyWhenRequestSentToMoreThanFPlusOneNodes(looper, nodeSet, fClient,
                                                    replied1, wallet1):
    """
    Alpha would not be sent request but other nodes will be, so Alpha will
    just rely on propagates from other nodes
    """
    alpha = nodeSet.Alpha
    other_nodes = [n for n in nodeSet if n != alpha]

    def chk(req_count=1):
        for node in nodeSet:
            prc_req = node.processRequest.__name__
            prc_ppg = node.processPropagate.__name__
            if node != alpha:
                # All nodes except alpha will receive requests from client
                assert node.spylog.count(prc_req) == req_count
            else:
                # Alpha will not receive requests from client
                assert node.spylog.count(prc_req) == 0

            # All nodes will get propagates from others
            assert node.spylog.count(prc_ppg) == req_count * (nodeCount - 1)

    # Ledger is same for all nodes
    waitNodeDataEquality(looper, alpha, *other_nodes)
    chk(1)

    more_reqs_count = 5
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, fClient,
                                        more_reqs_count, 1)
    # Ledger is same for all nodes
    waitNodeDataEquality(looper, alpha, *other_nodes)
    chk(6)  # Since one request is already sent as part of `replied1`
def testRequestsSize(txnPoolNodesLooper, txnPoolNodeSet, poolTxnClientNames,
                     tdirWithPoolTxns, poolTxnData, noRetryReq):
    """
    Client should not be using node registry but pool transaction file
    :return:
    """
    clients = []
    for name in poolTxnClientNames:
        seed = poolTxnData["seeds"][name].encode()
        client, wallet = buildPoolClientAndWallet((name, seed),
                                                  tdirWithPoolTxns)
        txnPoolNodesLooper.add(client)
        ensureClientConnectedToNodesAndPoolLedgerSame(txnPoolNodesLooper,
                                                      client, *txnPoolNodeSet)
        clients.append((client, wallet))

    n = 250
    timeOutPerReq = 3
    for (client, wallet) in clients:
        logger.debug("{} sending {} requests".format(client, n))
        sendReqsToNodesAndVerifySuffReplies(txnPoolNodesLooper, wallet, client,
                                            n, 1, timeOutPerReq)
        logger.debug("{} sent {} requests".format(client, n))
    for node in txnPoolNodeSet:
        logger.debug("{} has requests {} with size {}".format(
            node, len(node.requests), get_size(node.requests)))
        for replica in node.replicas:
            logger.debug("{} has prepares {} with size {}".format(
                replica, len(replica.prepares), get_size(replica.prepares)))
            logger.debug("{} has commits {} with size {}".format(
                replica, len(replica.commits), get_size(replica.commits)))
def test_new_node_accepts_timestamp(tconf, looper, txnPoolNodeSet,
                                    nodeSetWithNodeAddedAfterSomeTxns, client1,
                                    wallet1, client1Connected):
    """
    A new node joins the pool and is able to function properly without
    """
    _, new_node, _, _, _, _ = nodeSetWithNodeAddedAfterSomeTxns
    old_susp_count = get_timestamp_suspicion_count(new_node)
    # Don't wait for node to catchup, start sending requests
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 10)
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])

    # No suspicions were raised by new_node
    assert get_timestamp_suspicion_count(new_node) == old_susp_count

    # All nodes should reply
    send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1,
                                              Max3PCBatchSize * 3)
    # No suspicions were raised by new_node
    assert get_timestamp_suspicion_count(new_node) == old_susp_count

    suspicions = {
        node.name: get_timestamp_suspicion_count(node)
        for node in txnPoolNodeSet
    }
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1,
                                              Max3PCBatchSize * 3)
    for node in txnPoolNodeSet:
        assert suspicions[node.name] == get_timestamp_suspicion_count(node)
def test_node_load_after_one_node_drops_all_msgs(looper, txnPoolNodeSet, tconf,
                                                 tdirWithPoolTxns,
                                                 allPluginsPath,
                                                 poolTxnStewardData, capsys):
    client, wallet = buildPoolClientAndWallet(poolTxnStewardData,
                                              tdirWithPoolTxns,
                                              clientClass=TestClient)
    looper.add(client)
    looper.run(client.ensureConnectedToNodes())

    nodes = txnPoolNodeSet
    x = nodes[-1]

    with capsys.disabled():
        print("Patching node {}".format(x))

    def handleOneNodeMsg(self, wrappedMsg):
        # do nothing with an incoming node message
        pass

    x.handleOneNodeMsg = MethodType(handleOneNodeMsg, x)

    client_batches = 120
    txns_per_batch = 25
    for i in range(client_batches):
        s = perf_counter()
        sendReqsToNodesAndVerifySuffReplies(looper,
                                            wallet,
                                            client,
                                            txns_per_batch,
                                            override_timeout_limit=True)
        with capsys.disabled():
            print('{} executed {} client txns in {:.2f} seconds'.format(
                i + 1, txns_per_batch,
                perf_counter() - s))
def testNodeCatchupAfterDisconnect(newNodeCaughtUp, txnPoolNodeSet,
                                   nodeSetWithNodeAddedAfterSomeTxns):
    """
    A node that disconnects after some transactions should eventually get the
    transactions which happened while it was disconnected
    :return:
    """
    looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns

    logger.debug("Stopping node {} with pool ledger size {}".format(
        newNode, newNode.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            newNode,
                                            stopNode=False)
    looper.removeProdable(newNode)

    # TODO: Check if the node has really stopped processing requests?
    logger.debug("Sending requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    # Make sure new node got out of sync
    waitNodeDataInequality(looper, newNode, *txnPoolNodeSet[:-1])

    logger.debug("Starting the stopped node, {}".format(newNode))
    looper.add(newNode)
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, newNode)

    logger.debug("Waiting for the node to catch up, {}".format(newNode))
    waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1])

    logger.debug("Sending more requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 10)
    checkNodeDataForEquality(newNode, *txnPoolNodeSet[:-1])
Exemple #17
0
def nodeCreatedAfterSomeTxns(txnPoolNodeSet, tdirWithPoolTxns,
                             poolTxnStewardData, tconf, allPluginsPath,
                             request):
    with Looper(debug=True) as looper:
        client, wallet = buildPoolClientAndWallet(poolTxnStewardData,
                                                  tdirWithPoolTxns,
                                                  clientClass=TestClient)
        looper.add(client)
        looper.run(client.ensureConnectedToNodes())
        txnCount = getValueFromModule(request, "txnCount", 5)
        sendReqsToNodesAndVerifySuffReplies(looper,
                                            wallet,
                                            client,
                                            txnCount,
                                            timeoutPerReq=25)

        newStewardName = randomString()
        newNodeName = "Epsilon"
        newStewardClient, newStewardWallet, newNode = addNewStewardAndNode(
            looper,
            client,
            wallet,
            newStewardName,
            newNodeName,
            tdirWithPoolTxns,
            tconf,
            allPluginsPath=allPluginsPath,
            autoStart=True)
        yield looper, newNode, client, wallet, newStewardClient, \
              newStewardWallet
def test_observer_execution(looper, txnPoolNodeSet, client1, wallet1):
    resp1 = []
    resp2 = []

    def callable1(name, reqId, frm, result, numReplies):
        resp1.append(reqId)
        return reqId

    def callable2(name, reqId, frm, result, numReplies):
        resp2.append(reqId)
        return reqId

    client1.registerObserver(callable1, name='first')
    client1.registerObserver(callable2)

    # Send 1 request
    req, = sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1)
    # Each observer is called only once
    assert len(resp1) == 1
    assert len(resp2) == 1
    assert resp1[0] == req.reqId
    assert resp2[0] == req.reqId

    client1.deregisterObserver('first')
    # Send another request
    req1, = sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1)
    # Only 1 observer is called
    assert len(resp1) == 1
    assert len(resp2) == 2
    assert resp1[-1] == req.reqId
    assert resp2[-1] == req1.reqId
def testNodeCatchupAfterRestart(newNodeCaughtUp, txnPoolNodeSet,
                                nodeSetWithNodeAddedAfterSomeTxns):
    """
    A node that restarts after some transactions should eventually get the
    transactions which happened while it was down
    :return:
    """

    looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns
    logger.debug("Stopping node {} with pool ledger size {}".
                 format(newNode, newNode.poolManager.txnSeqNo))
    ensureNodeDisconnectedFromPool(looper, txnPoolNodeSet, newNode)
    # for n in txnPoolNodeSet[:4]:
    #     for r in n.nodestack.remotes.values():
    #         if r.name == newNode.name:
    #             r.removeStaleCorrespondents()
    # looper.run(eventually(checkNodeDisconnectedFrom, newNode.name,
    #                       txnPoolNodeSet[:4], retryWait=1, timeout=5))
    # TODO: Check if the node has really stopped processing requests?
    logger.debug("Sending requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    logger.debug("Starting the stopped node, {}".format(newNode))
    newNode.start(looper.loop)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    looper.run(eventually(checkNodeLedgersForEquality, newNode,
                          *txnPoolNodeSet[:4], retryWait=1, timeout=15))
def testQueueingReqFromFutureView(delayedPerf, looper, nodeSet, up, client1):
    """
    Test if every node queues 3 Phase requests(PRE-PREPARE, PREPARE and COMMIT)
    that come from a view which is greater than the current view
    """

    f = getMaxFailures(nodeCount)

    # Delay processing of instance change on a node
    nodeA = nodeSet.Alpha
    nodeA.nodeIbStasher.delay(icDelay(60))

    nonPrimReps = getNonPrimaryReplicas(nodeSet, 0)
    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's throughput falls and view changes
    ppDelayer = ppDelay(5, 0)
    for r in nonPrimReps:
        r.node.nodeIbStasher.delay(ppDelayer)

    sendReqsToNodesAndVerifySuffReplies(looper, client1, 4,
                                        timeout=5 * nodeCount)

    # Every node except Node A should have a view change
    for node in nodeSet:
        if node.name != nodeA.name:
            looper.run(eventually(
                partial(checkViewChangeInitiatedForNode, node, 0),
                retryWait=1,
                timeout=20))

    # Node A's view should not have changed yet
    with pytest.raises(AssertionError):
        looper.run(eventually(partial(
            checkViewChangeInitiatedForNode, nodeA, 0),
            retryWait=1,
            timeout=20))

    # NodeA should not have any pending 3 phase request for a later view
    for r in nodeA.replicas:  # type: TestReplica
        assert len(r.threePhaseMsgsForLaterView) == 0

    # Reset delays on incoming messages from all nodes
    for node in nodeSet:
        node.nodeIbStasher.nodelay(ppDelayer)

    # Send one more request
    sendRandomRequest(client1)

    def checkPending3PhaseReqs():
        # Get all replicas that have their primary status decided
        reps = [rep for rep in nodeA.replicas if rep.isPrimary is not None]
        # Atleast one replica should have its primary status decided
        assert len(reps) > 0
        for r in reps:  # type: TestReplica
            logging.debug("primary status for replica {} is {}"
                          .format(r, r.primaryNames))
            assert len(r.threePhaseMsgsForLaterView) > 0

    # NodeA should now have pending 3 phase request for a later view
    looper.run(eventually(checkPending3PhaseReqs, retryWait=1, timeout=30))
def test_master_primary_different_from_previous(txnPoolNodeSet, looper,
                                                client1, wallet1,
                                                client1Connected):
    """
    After a view change, primary must be different from previous primary for
    master instance, it does not matter for other instance. The primary is
    benign and does not vote for itself.
    """
    pr = slow_primary(txnPoolNodeSet, 0, delay=10)
    old_pr_node_name = pr.node.name

    # View change happens
    ensure_view_change(looper, txnPoolNodeSet)
    logger.debug("VIEW HAS BEEN CHANGED!")

    # Elections done
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    # New primary is not same as old primary
    assert getPrimaryReplica(txnPoolNodeSet, 0).node.name != old_pr_node_name

    pr.outBoxTestStasher.resetDelays()

    # The new primary can still process requests
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5)
Exemple #22
0
def testNodeDoesNotParticipateUntilCaughtUp(txnPoolNodeSet,
                                            nodes_slow_to_process_catchup_reqs,
                                            nodeCreatedAfterSomeTxns):
    """
    A new node that joins after some transactions should stash new transactions
    until it has caught up
    :return:
    """

    looper, new_node, client, wallet, newStewardClient, newStewardWallet = \
        nodeCreatedAfterSomeTxns
    txnPoolNodeSet.append(new_node)
    old_nodes = txnPoolNodeSet[:-1]
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    chk_commits_prepares_recvd(0, old_nodes, new_node)

    for node in old_nodes:
        node.reset_delays_and_process_delayeds()

    timeout = waits.expectedPoolCatchupTime(len(txnPoolNodeSet)) + \
              catchup_delay + \
              waits.expectedPoolElectionTimeout(len(txnPoolNodeSet))
    ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=timeout)
    waitNodeDataEquality(looper, new_node, *old_nodes)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 2)

    # Commits and Prepares are received by all old nodes
    with pytest.raises(AssertionError):
        # Since nodes discard 3PC messages for already ordered requests.
        chk_commits_prepares_recvd(0, old_nodes, new_node)
    waitNodeDataEquality(looper, new_node, *old_nodes)
Exemple #23
0
def testPrimaryRecvs3PhaseMessageOutsideWatermarks(chkFreqPatched, looper,
                                                   txnPoolNodeSet, client1,
                                                   wallet1, client1Connected):
    """
    One of the primary starts getting lot of requests, more than his log size
    and queues up requests since they will go beyond its watermarks. This
    happens since other nodes are slow in processing its PRE-PREPARE.
    Eventually this primary will send PRE-PREPARE for all requests and those
    requests will complete
    """
    instId = 1
    reqsToSend = 2*chkFreqPatched.LOG_SIZE + 1
    npr = getNonPrimaryReplicas(txnPoolNodeSet, instId)
    pr = getPrimaryReplica(txnPoolNodeSet, instId)
    from plenum.server.replica import TPCStat
    orderedCount = pr.stats.get(TPCStat.OrderSent)

    for r in npr:
        r.node.nodeIbStasher.delay(ppDelay(10, instId))

    def chk():
        assert orderedCount + reqsToSend == pr.stats.get(TPCStat.OrderSent)

    print('Sending {} requests'.format(reqsToSend))
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqsToSend, 1)
    looper.run(eventually(chk, retryWait=1, timeout=80))
def test_catch_up_after_demoted(txnPoolNodeSet,
                                nodeSetWithNodeAddedAfterSomeTxns):
    logger.info(
        "1. add a new node after sending some txns and check that catch-up "
        "is done (the new node is up to date)")
    looper, newNode, client, wallet, newStewardClient, \
    newStewardWallet = nodeSetWithNodeAddedAfterSomeTxns
    waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:4])

    logger.info("2. turn the new node off (demote)")
    node_data = {ALIAS: newNode.name, SERVICES: []}
    updateNodeData(looper, newStewardClient, newStewardWallet, newNode,
                   node_data)

    logger.info("3. send more requests, "
                "so that the new node's state is outdated")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    checkNodeDataForInequality(newNode, *txnPoolNodeSet[:-1])

    logger.info("4. turn the new node on")
    node_data = {ALIAS: newNode.name, SERVICES: [VALIDATOR]}
    updateNodeData(looper, newStewardClient, newStewardWallet, newNode,
                   node_data)

    logger.info("5. make sure catch-up is done "
                "(the new node is up to date again)")
    waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1])

    logger.info("6. send more requests and make sure "
                "that the new node participates in processing them")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 10)
    waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1])
Exemple #25
0
def testCheckpointCreated(chkFreqPatched, looper, txnPoolNodeSet, client1,
                          wallet1, client1Connected, reqs_for_checkpoint):
    """
    After requests less than `CHK_FREQ`, there should be one checkpoint
    on each replica. After `CHK_FREQ`, one checkpoint should become stable
    """
    # Send one batch less so checkpoint is not created
    sendReqsToNodesAndVerifySuffReplies(
        looper, wallet1, client1,
        reqs_for_checkpoint - (chkFreqPatched.Max3PCBatchSize), 1)
    # Deliberately waiting so as to verify that not more than 1 checkpoint is
    # created
    looper.runFor(2)
    chkChkpoints(txnPoolNodeSet, 1)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1,
                                        chkFreqPatched.Max3PCBatchSize, 1)

    timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.run(
        eventually(chkChkpoints,
                   txnPoolNodeSet,
                   1,
                   0,
                   retryWait=1,
                   timeout=timeout))
def testElectionsAfterViewChange(delayedPerf, looper: Looper,
                                 nodeSet: TestNodeSet, up, wallet1, client1):
    """
    Test that a primary election does happen after a view change
    """

    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's throughput falls
    # and view changes
    nonPrimReps = getNonPrimaryReplicas(nodeSet, 0)
    for r in nonPrimReps:
        r.node.nodeIbStasher.delay(ppDelay(10, 0))

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4)

    # Ensure view change happened for both node and its primary elector
    for node in nodeSet:
        looper.run(
            eventually(partial(checkViewChangeInitiatedForNode, node, 1),
                       retryWait=1,
                       timeout=20))

    # Ensure elections are done again and pool is setup again with appropriate
    # protocol instances and each protocol instance is setup properly too
    checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, timeout=30)
Exemple #27
0
def setup(looper, tconf, startedNodes, up, wallet1, client1):
    # Get the master replica of the master protocol instance
    P = getPrimaryReplica(startedNodes)

    # Make `Delta` small enough so throughput check passes.
    for node in startedNodes:
        node.monitor.Delta = .001

    # set LAMBDA not so huge like it set in the production config
    testLambda = 30
    for node in startedNodes:
        node.monitor.Lambda = testLambda

    slowed_request = False

    # make P (primary replica on master) faulty, i.e., slow to send
    # PRE-PREPARE for a specific client request only
    def specificPrePrepare(msg):
        nonlocal slowed_request
        if isinstance(msg, PrePrepare) and slowed_request is False:
            slowed_request = True
            return testLambda + 5  # just more that LAMBDA

    P.outBoxTestStasher.delay(specificPrePrepare)
    # TODO select or create a timeout for this case in 'waits'
    sendReqsToNodesAndVerifySuffReplies(looper,
                                        wallet1,
                                        client1,
                                        numReqs=5,
                                        customTimeoutPerReq=tconf.TestRunningTimeLimitSec)

    return adict(nodes=startedNodes)
def setup(looper, startedNodes, up, wallet1, client1):
    # Get the master replica of the master protocol instance
    P = getPrimaryReplica(startedNodes)

    # Make `Delta` small enough so throughput check passes.
    for node in startedNodes:
        node.monitor.Delta = .001

    slowRequest = None

    # make P (primary replica on master) faulty, i.e., slow to send
    # PRE-PREPARE for a specific client request only
    def by65SpecificPrePrepare(msg):
        nonlocal slowRequest
        if isinstance(msg, PrePrepare) and slowRequest is None:
            slowRequest = getattr(msg, f.REQ_ID.nm)
            return 65

    P.outBoxTestStasher.delay(by65SpecificPrePrepare)

    sendReqsToNodesAndVerifySuffReplies(looper,
                                        wallet1,
                                        client1,
                                        numReqs=5,
                                        timeoutPerReq=80)

    return adict(nodes=startedNodes)
def testViewChangesIfMasterPrimaryDisconnected(txnPoolNodeSet, looper, wallet1,
                                               client1, client1Connected,
                                               tconf):
    """
    View change occurs when master's primary is disconnected
    """

    # Setup
    nodes = txnPoolNodeSet

    viewNoBefore = checkViewNoForNodes(nodes)
    old_pr_node = get_master_primary_node(nodes)

    # Stop primary
    stopNodes([old_pr_node], looper)
    looper.removeProdable(old_pr_node)
    remainingNodes = list(set(nodes) - {old_pr_node})
    # Sometimes it takes time for nodes to detect disconnection
    ensure_node_disconnected(looper, old_pr_node, remainingNodes, timeout=20)

    looper.runFor(tconf.ToleratePrimaryDisconnection + 2)

    # Give some time to detect disconnection and then verify that view has
    # changed and new primary has been elected
    waitForViewChange(looper, remainingNodes, viewNoBefore + 1)
    ensure_all_nodes_have_same_data(looper, nodes=remainingNodes)
    new_pr_node = get_master_primary_node(remainingNodes)
    assert old_pr_node != new_pr_node

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5)
Exemple #30
0
def testOldCheckpointDeleted(chkFreqPatched, looper, txnPoolNodeSet, client1,
                             wallet1, client1Connected, reqs_for_checkpoint):
    """
    Send requests more than twice of `CHK_FREQ`, there should be one new stable
    checkpoint on each replica. The old stable checkpoint should be removed
    """
    sendReqsToNodesAndVerifySuffReplies(looper,
                                        wallet1,
                                        client1,
                                        numReqs=2 * reqs_for_checkpoint,
                                        fVal=1)

    sendReqsToNodesAndVerifySuffReplies(looper,
                                        wallet1,
                                        client1,
                                        numReqs=1,
                                        fVal=1)

    timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.run(
        eventually(chkChkpoints,
                   txnPoolNodeSet,
                   2,
                   0,
                   retryWait=1,
                   timeout=timeout))
def test_caught_up_for_current_view_check(looper, txnPoolNodeSet, client1,
                                          wallet1, client1Connected):
    """
    One of the node experiences poor network and loses 3PC messages. It has to
    do multiple rounds of catchup to be caught up
    """

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1,
                                        3 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    bad_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != bad_node]
    orig_method = bad_node.master_replica.dispatchThreePhaseMsg

    # Bad node does not process any 3 phase messages, equivalent to messages
    # being lost
    def bad_method(self, m, s):
        pass

    bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType(
        bad_method, bad_node.master_replica)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1,
                                        6 * Max3PCBatchSize)
    waitNodeDataInequality(looper, bad_node, *other_nodes)

    # Patch all nodes to return ConsistencyProof of a smaller ledger to the
    # bad node but only once, so that the bad_node needs to do catchup again.

    make_a_node_catchup_twice(bad_node, other_nodes, DOMAIN_LEDGER_ID,
                              Max3PCBatchSize)

    def is_catchup_needed_count():
        return len(
            getAllReturnVals(bad_node,
                             bad_node.is_catchup_needed,
                             compare_val_to=True))

    def caught_up_for_current_view_count():
        return len(
            getAllReturnVals(bad_node,
                             bad_node.caught_up_for_current_view,
                             compare_val_to=True))

    old_count_1 = is_catchup_needed_count()
    old_count_2 = caught_up_for_current_view_count()
    ensure_view_change(looper, txnPoolNodeSet)
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    assert is_catchup_needed_count() > old_count_1
    # The bad_node caught up due to receiving sufficient ViewChangeDone
    # messages
    assert caught_up_for_current_view_count() > old_count_2

    bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType(
        orig_method, bad_node.master_replica)
Exemple #32
0
def changeNodeHa(looper, txnPoolNodeSet, tdirWithClientPoolTxns,
                 poolTxnData, poolTxnStewardNames, tconf, shouldBePrimary, tdir):
    # prepare new ha for node and client stack
    subjectedNode = None
    stewardName = None
    stewardsSeed = None

    for nodeIndex, n in enumerate(txnPoolNodeSet):
        if shouldBePrimary == n.has_master_primary:
            subjectedNode = n
            stewardName = poolTxnStewardNames[nodeIndex]
            stewardsSeed = poolTxnData["seeds"][stewardName].encode()
            break

    nodeStackNewHA, clientStackNewHA = genHa(2)
    logger.debug("change HA for node: {} to {}".format(
        subjectedNode.name, (nodeStackNewHA, clientStackNewHA)))

    nodeSeed = poolTxnData["seeds"][subjectedNode.name].encode()

    # change HA
    stewardClient, req = changeHA(looper, tconf, subjectedNode.name, nodeSeed,
                                  nodeStackNewHA, stewardName, stewardsSeed,
                                  basedir=tdirWithClientPoolTxns)

    waitForSufficientRepliesForRequests(looper, stewardClient,
                                        requests=[req])

    # stop node for which HA will be changed
    subjectedNode.stop()
    looper.removeProdable(subjectedNode)

    # start node with new HA
    config_helper = PNodeConfigHelper(subjectedNode.name, tconf, chroot=tdir)
    restartedNode = TestNode(subjectedNode.name,
                             config_helper=config_helper,
                             config=tconf, ha=nodeStackNewHA,
                             cliha=clientStackNewHA)
    looper.add(restartedNode)
    txnPoolNodeSet[nodeIndex] = restartedNode
    looper.run(checkNodesConnected(txnPoolNodeSet, customTimeout=70))

    electionTimeout = waits.expectedPoolElectionTimeout(
        nodeCount=len(txnPoolNodeSet),
        numOfReelections=3)
    ensureElectionsDone(looper,
                        txnPoolNodeSet,
                        retryWait=1,
                        customTimeout=electionTimeout)

    # start client and check the node HA
    anotherClient, _ = genTestClient(tmpdir=tdirWithClientPoolTxns,
                                     usePoolLedger=True)
    looper.add(anotherClient)
    looper.run(eventually(anotherClient.ensureConnectedToNodes))
    stewardWallet = Wallet(stewardName)
    stewardWallet.addIdentifier(signer=DidSigner(seed=stewardsSeed))
    sendReqsToNodesAndVerifySuffReplies(
        looper, stewardWallet, stewardClient, 8)
Exemple #33
0
def test_recover_stop_primaries(looper, checkpoint_size, txnPoolNodeSet,
                                allPluginsPath, tdir, tconf, client1, wallet1,
                                client1Connected):
    """
    Test that we can recover after having more than f nodes disconnected:
    - stop current master primary (Alpha)
    - send txns
    - restart current master primary (Beta)
    - send txns
    """

    active_nodes = list(txnPoolNodeSet)
    assert 4 == len(active_nodes)
    initial_view_no = active_nodes[0].viewNo

    logger.info("Stop first node (current Primary)")
    _, active_nodes = stop_primary(looper, active_nodes)

    logger.info("Make sure view changed")
    expected_view_no = initial_view_no + 1
    waitForViewChange(looper, active_nodes, expectedViewNo=expected_view_no)
    ensureElectionsDone(looper=looper, nodes=active_nodes, numInstances=2)
    ensure_all_nodes_have_same_data(looper, nodes=active_nodes)

    logger.info("send at least one checkpoint")
    assert nodes_do_not_have_checkpoints(*active_nodes)
    sendReqsToNodesAndVerifySuffReplies(looper,
                                        wallet1,
                                        client1,
                                        numReqs=2 * checkpoint_size)
    assert nodes_have_checkpoints(*active_nodes)
    ensure_all_nodes_have_same_data(looper, nodes=active_nodes)

    logger.info(
        "Stop second node (current Primary) so the primary looses his state")
    stopped_node, active_nodes = stop_primary(looper, active_nodes)

    logger.info("Restart the primary node")
    restarted_node = start_stopped_node(stopped_node, looper, tconf, tdir,
                                        allPluginsPath)
    assert nodes_do_not_have_checkpoints(restarted_node)
    assert nodes_have_checkpoints(*active_nodes)
    active_nodes = active_nodes + [restarted_node]

    logger.info("Check that primary selected")
    ensureElectionsDone(looper=looper,
                        nodes=active_nodes,
                        numInstances=2,
                        customTimeout=30)
    waitForViewChange(looper, active_nodes, expectedViewNo=expected_view_no)
    ensure_all_nodes_have_same_data(looper, nodes=active_nodes)

    logger.info("Check if the pool is able to process requests")
    sendReqsToNodesAndVerifySuffReplies(looper,
                                        wallet1,
                                        client1,
                                        numReqs=10 * checkpoint_size)
    ensure_all_nodes_have_same_data(looper, nodes=active_nodes)
    assert nodes_have_checkpoints(*active_nodes)
def testInstChangeWithLowerRatioThanDelta(looper, step3, wallet1, client1):
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 10)

    # wait for every node to run another checkPerformance
    waitForNextPerfCheck(looper, step3.nodes, step3.perfChecks)

    # verify all nodes have undergone an instance change
    looper.run(eventually(checkViewNoForNodes, step3.nodes, 1, timeout=10))
Exemple #35
0
def viewChangeDone(nodeSet, looper, up, wallet1, client1, viewNo):
    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's performance falls and view changes
    delayNonPrimaries(nodeSet, 0, 10)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4)

    looper.run(eventually(partial(checkViewNoForNodes, nodeSet, viewNo+1),
                          retryWait=1, timeout=20))
Exemple #36
0
def testReplyReceivedOnlyByClientWhoSentRequest(looper, nodeSet, tdir,
                                                client1, wallet1):
    newClient, _ = genTestClient(nodeSet, tmpdir=tdir)
    looper.add(newClient)
    looper.run(newClient.ensureConnectedToNodes())
    client1InboxSize = len(client1.inBox)
    newClientInboxSize = len(newClient.inBox)
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, newClient, 1)
    assert len(client1.inBox) == client1InboxSize
    assert len(newClient.inBox) > newClientInboxSize
Exemple #37
0
def changeNodeHa(looper, txnPoolNodeSet, tdirWithPoolTxns,
                 poolTxnData, poolTxnStewardNames, tconf, shouldBePrimary):

    # prepare new ha for node and client stack
    subjectedNode = None
    stewardName = None
    stewardsSeed = None

    for nodeIndex, n in enumerate(txnPoolNodeSet):
        if (shouldBePrimary and n.primaryReplicaNo == 0) or \
                (not shouldBePrimary and n.primaryReplicaNo != 0):
            subjectedNode = n
            stewardName = poolTxnStewardNames[nodeIndex]
            stewardsSeed = poolTxnData["seeds"][stewardName].encode()
            break

    nodeStackNewHA, clientStackNewHA = genHa(2)
    logger.debug("change HA for node: {} to {}".
                 format(subjectedNode.name, (nodeStackNewHA, clientStackNewHA)))

    nodeSeed = poolTxnData["seeds"][subjectedNode.name].encode()

    # change HA
    stewardClient, req = changeHA(looper, tconf, subjectedNode.name, nodeSeed,
                                  nodeStackNewHA, stewardName, stewardsSeed)
    f = getMaxFailures(len(stewardClient.nodeReg))
    looper.run(eventually(checkSufficientRepliesRecvd, stewardClient.inBox,
                          req.reqId, f, retryWait=1, timeout=20))

    # stop node for which HA will be changed
    subjectedNode.stop()
    looper.removeProdable(subjectedNode)

    # start node with new HA
    restartedNode = TestNode(subjectedNode.name, basedirpath=tdirWithPoolTxns,
                             config=tconf, ha=nodeStackNewHA,
                             cliha=clientStackNewHA)
    looper.add(restartedNode)

    txnPoolNodeSet[nodeIndex] = restartedNode
    looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=70))
    ensureElectionsDone(looper, txnPoolNodeSet, retryWait=1, timeout=10)

    # start client and check the node HA
    anotherClient, _ = genTestClient(tmpdir=tdirWithPoolTxns,
                                     usePoolLedger=True)
    looper.add(anotherClient)
    looper.run(eventually(anotherClient.ensureConnectedToNodes))
    stewardWallet = Wallet(stewardName)
    stewardWallet.addIdentifier(signer=SimpleSigner(seed=stewardsSeed))
    sendReqsToNodesAndVerifySuffReplies(looper, stewardWallet, stewardClient, 8)
    looper.run(eventually(checkIfGenesisPoolTxnFileUpdated, *txnPoolNodeSet,
                          stewardClient, anotherClient, retryWait=1,
                          timeout=10))
    looper.removeProdable(stewardClient)
def testNodesReceiveClientMsgs(txnPoolNodeSet, tdirWithPoolTxns,
                               poolTxnClientData, txnPoolCliNodeReg):

    with Looper(debug=True) as looper:
        name, pkseed, sigseed = poolTxnClientData
        signer = SimpleSigner(seed=sigseed)
        client = TestClient(name=name, nodeReg=txnPoolCliNodeReg, ha=genHa(),
                            signer=signer, basedirpath=tdirWithPoolTxns)
        looper.add(client)
        looper.run(client.ensureConnectedToNodes())
        sendReqsToNodesAndVerifySuffReplies(looper, client, 1)
def testOldCheckpointDeleted(chkFreqPatched, looper, txnPoolNodeSet, client1,
                             wallet1, client1Connected):
    """
    Send requests more than twice of `CHK_FREQ`, there should be one new stable
    checkpoint on each replica. The old stable checkpoint should be removed
    """
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 2*CHK_FREQ,
                                        1)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1, 1)

    looper.run(eventually(chkChkpoints, txnPoolNodeSet, 2, 0, retryWait=1))
def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet,
                                          nodeSetWithNodeAddedAfterSomeTxns,
                                          newNodeCaughtUp, tdirWithPoolTxns,
                                          tconf, allPluginsPath):
    """
    Node discards 3-phase and election messages from view nos that it does not
    know of (view nos before it joined the pool)
    :return:
    """
    looper, nodeX, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns
    viewNo = nodeX.viewNo

    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's performance falls and view changes
    delayNonPrimaries(txnPoolNodeSet, 0, 10)
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 4)
    looper.run(eventually(partial(checkViewNoForNodes, txnPoolNodeSet,
                                  viewNo + 1), retryWait=1, timeout=20))

    newStewardName = "testClientSteward" + randomString(3)
    nodeName = "Theta"
    _, _, nodeTheta = addNewStewardAndNode(looper, client,
                                           wallet,
                                           newStewardName,
                                           nodeName,
                                           tdirWithPoolTxns, tconf,
                                           allPluginsPath)
    txnPoolNodeSet.append(nodeTheta)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    looper.run(client.ensureConnectedToNodes())
    looper.run(eventually(checkNodeLedgersForEquality, nodeTheta,
                          *txnPoolNodeSet[:-1], retryWait=1, timeout=5))
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1,
                               timeout=10)
    electMsg = Nomination(nodeX.name, 0, viewNo)
    threePMsg = PrePrepare(
            0,
            viewNo,
            10,
            wallet.defaultId,
            wallet._getIdData().lastReqId+1,
            "random digest",
            time.time()
            )
    ridTheta = nodeX.nodestack.getRemote(nodeTheta.name).uid
    nodeX.send(electMsg, ridTheta)
    nodeX.send(threePMsg, ridTheta)
    nodeX.send(electMsg, ridTheta)
    looper.run(eventually(checkDiscardMsg, [nodeTheta, ], electMsg,
                          'un-acceptable viewNo', retryWait=1, timeout=5))
    nodeX.send(threePMsg, ridTheta)
    looper.run(eventually(checkDiscardMsg, [nodeTheta, ], threePMsg,
                          'un-acceptable viewNo', retryWait=1, timeout=5))
def testRequestOlderThanStableCheckpointRemoved(
    chkFreqPatched, looper, txnPoolNodeSet, client1, wallet1, client1Connected
):
    reqs = sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, CHK_FREQ - 1, 1)
    looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, retryWait=1))
    checkRequestCounts(txnPoolNodeSet, len(reqs))
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1, 1)
    looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1))
    checkRequestCounts(txnPoolNodeSet, 0)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 3 * CHK_FREQ + 1, 1)
    looper.run(eventually(chkChkpoints, txnPoolNodeSet, 2, 0, retryWait=1))
    checkRequestCounts(txnPoolNodeSet, 1)
def testInstChangeWithLowerRatioThanDelta(looper, step3, client1):

    sendReqsToNodesAndVerifySuffReplies(looper, client1, 5)

    # wait for every node to run another checkPerformance
    newPerfChecks = waitForNextPerfCheck(looper, step3.nodes, step3.perfChecks)

    # verify all nodes recognize P as degraded
    # for n in step3.nodes:
    #     assert newPerfChecks[n.name].result is False

    # verify all nodes have undergone an instance change
    checkViewNoForNodes(step3.nodes, 1)
Exemple #43
0
def viewChangeDone(nodeSet, looper, up, client1):
    """
    Test that a view change is done when the performance of master goes down
    """

    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's performance falls and view changes
    nonPrimReps = getNonPrimaryReplicas(nodeSet, 0)
    for r in nonPrimReps:
        r.node.nodeIbStasher.delay(ppDelay(10, 0))

    sendReqsToNodesAndVerifySuffReplies(looper, client1, 4)

    looper.run(eventually(partial(checkViewNoForNodes, nodeSet, 1),
                          retryWait=1, timeout=20))
def testCheckpointCreated(chkFreqPatched, looper, txnPoolNodeSet, client1,
                          wallet1, client1Connected):
    """
    After requests less than `CHK_FREQ`, there should be one checkpoint
    on each replica. After `CHK_FREQ`, one checkpoint should become stable
    """
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, CHK_FREQ-1, 1)
    # Deliberately waiting so as to verify that not more than 1 checkpoint is
    # created
    looper.runFor(2)
    chkChkpoints(txnPoolNodeSet, 1)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1, 1)

    looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1))
def testPostingThroughput(postingStatsEnabled, looper: Looper,
                          nodeSet: TestNodeSet,
                          wallet1, client1):
    """
    The throughput after `DashboardUpdateFreq` seconds and before sending any
    requests should be zero.
    Send `n` requests in less than `ThroughputWindowSize` seconds and the
    throughput till `ThroughputWindowSize` should consider those `n` requests.
    After `ThroughputWindowSize` seconds the throughput should be zero
    Test `totalRequests` too.
    """

    # We are sleeping for this window size, because we need to clear previous
    # values that were being stored for this much time in tests
    looper.runFor(config.ThroughputWindowSize)

    reqCount = 10
    for node in nodeSet:
        assert node.monitor.highResThroughput == 0
        assert node.monitor.totalRequests == 0

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqCount, nodeSet.f,
                                        timeoutPerReq=20)

    for node in nodeSet:
        assert len(node.monitor.orderedRequestsInLast) == reqCount
        assert node.monitor.highResThroughput > 0
        assert node.monitor.totalRequests == reqCount
        # TODO: Add implementation to actually call firebase plugin
        # and test if firebase plugin is sending total request count
        # if node is primary

    looper.runFor(config.DashboardUpdateFreq)

    for node in nodeSet:
        node.monitor.spylog.count(Monitor.sendThroughput.__name__) > 0

    # Run for latency window duration so that `orderedRequestsInLast`
    # becomes empty
    looper.runFor(config.ThroughputWindowSize)

    def chk():
        for node in nodeSet:
            assert len(node.monitor.orderedRequestsInLast) == 0
            assert node.monitor.highResThroughput == 0
            assert node.monitor.totalRequests == reqCount

    looper.run(eventually(chk, retryWait=1, timeout=10))
def testNodeDoesNotParticipateUntilCaughtUp(txnPoolNodeSet,
                                            nodeSetWithNodeAddedAfterSomeTxns):
    """
    A new node that joins after some transactions should stash new transactions
    until it has caught up
    :return:
    """
    looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)

    for node in txnPoolNodeSet[:4]:
        for replica in node.replicas:
            for commit in replica.commits.values():
                assert newNode.name not in commit.voters
            for prepare in replica.prepares.values():
                assert newNode.name not in prepare.voters
def setup(looper, startedNodes, up, client1):
    # Get the master replica of the master protocol instance
    P = getPrimaryReplica(startedNodes)

    # Make `Delta` small enough so throughput check passes.
    for node in startedNodes:
        node.monitor.Delta = .001

    # make P (primary replica on master) faulty, i.e., slow to send
    # PRE-PREPARE for a specific client request only
    def by65SpecificPrePrepare(msg):
        if isinstance(msg, PrePrepare) and getattr(msg, f.REQ_ID.nm) == 2:
            return 65

    P.outBoxTestStasher.delay(by65SpecificPrePrepare)

    sendReqsToNodesAndVerifySuffReplies(looper, client1, numReqs=5, timeout=80)

    return adict(nodes=startedNodes)
def testClientConnectToRestartedNodes(looper, txnPoolNodeSet, tdirWithPoolTxns,
                                      poolTxnClientNames, poolTxnData, tconf,
                                      poolTxnNodeNames,
                                      allPluginsPath):
    name = poolTxnClientNames[-1]
    seed = poolTxnData["seeds"][name]
    newClient, w = genTestClient(tmpdir=tdirWithPoolTxns, nodes=txnPoolNodeSet,
                                 name=name, usePoolLedger=True)
    looper.add(newClient)
    ensureClientConnectedToNodesAndPoolLedgerSame(looper, newClient,
                                                  *txnPoolNodeSet)
    sendReqsToNodesAndVerifySuffReplies(looper, w, newClient, 1, 1)
    for node in txnPoolNodeSet:
        node.stop()
        looper.removeProdable(node)

    # looper.run(newClient.ensureDisconnectedToNodes(timeout=60))
    txnPoolNodeSet = []
    for nm in poolTxnNodeNames:
        node = TestNode(nm, basedirpath=tdirWithPoolTxns,
                        config=tconf, pluginPaths=allPluginsPath)
        looper.add(node)
        txnPoolNodeSet.append(node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, retryWait=1,
                        timeout=10)

    def chk():
        for node in txnPoolNodeSet:
            assert node.isParticipating

    looper.run(eventually(chk, retryWait=1, timeout=10))

    bootstrapClientKeys(w.defaultId, w.getVerkey(), txnPoolNodeSet)

    req = sendRandomRequest(w, newClient)
    checkSufficientRepliesForRequests(looper, newClient, [req, ],
                                      timeoutPerReq=10)
    ensureClientConnectedToNodesAndPoolLedgerSame(looper, newClient,
                                                  *txnPoolNodeSet)

    sendReqsToNodesAndVerifySuffReplies(looper, w, newClient, 1, 1)
def testPostingLatency(postingStatsEnabled, looper: Looper,
                          nodeSet: TestNodeSet,
                          wallet1, client1):
    """
    The latencies (master as well as average of backups) after
    `DashboardUpdateFreq` seconds and before sending any requests should be zero.
    Send `n` requests in less than `LatencyWindowSize` seconds and the
    latency till `LatencyWindowSize` should consider those `n` requests.
    After `LatencyWindowSize` seconds the latencies should be zero
    """
    # Run for latency window duration so that `latenciesByMasterInLast` and
    # `latenciesByBackupsInLast` become empty
    looper.runFor(config.LatencyWindowSize)
    reqCount = 10
    for node in nodeSet:
        assert node.monitor.masterLatency == 0
        assert node.monitor.avgBackupLatency == 0

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqCount,
                                        nodeSet.f,
                                        timeoutPerReq=20)

    for node in nodeSet:
        assert node.monitor.masterLatency > 0
        assert node.monitor.avgBackupLatency > 0

    looper.runFor(config.DashboardUpdateFreq)

    for node in nodeSet:
        node.monitor.spylog.count(Monitor.sendLatencies.__name__) > 0

    # Run for latency window duration so that `latenciesByMasterInLast` and
    # `latenciesByBackupsInLast` become empty
    looper.runFor(config.LatencyWindowSize)

    def chk():
        for node in nodeSet:
            assert node.monitor.masterLatency == 0
            assert node.monitor.avgBackupLatency == 0

    looper.run(eventually(chk, retryWait=1, timeout=10))
def testElectionsAfterViewChange(delayedPerf, looper: Looper, nodeSet: TestNodeSet, up, client1):
    """
    Test that a primary election does happen after a view change
    """

    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's throughput falls
    # and view changes
    nonPrimReps = getNonPrimaryReplicas(nodeSet, 0)
    for r in nonPrimReps:
        r.node.nodeIbStasher.delay(ppDelay(10, 0))

    sendReqsToNodesAndVerifySuffReplies(looper, client1, 4)

    # Ensure view change happened for both node and its primary elector
    for node in nodeSet:
        looper.run(eventually(partial(checkViewChangeInitiatedForNode, node, 0),
                              retryWait=1, timeout=20))

    # Ensure elections are done again and pool is setup again with appropriate
    # protocol instances and each protocol instance is setup properly too
    checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, timeout=30)
def step1(looper, startedNodes, up, client1):
    """
    stand up a pool of nodes and send 5 requests to client
    """
    # the master instance has a primary replica, call it P
    P = getPrimaryReplica(startedNodes)

    requests = sendReqsToNodesAndVerifySuffReplies(looper, client1, 5)
    # profile_this(sendReqsToNodesAndVerifySuffReplies, looper, client1, 5)

    return adict(P=P,
                 nodes=startedNodes,
                 requests=requests)
def testViewNotChanged(looper: Looper, nodeSet: TestNodeSet, up, client1):
    """
    Test that a view change is not done when the performance of master does
    not go down
    """
    """
    Send multiple requests to the client and delay some requests by all
    backup instances to ensure master instance
    is always faster than backup instances and there is no view change
    """

    # Delay PRE-PREPARE for all backup protocol instances so master performs
    # better
    for i in range(1, F + 1):
        nonPrimReps = getNonPrimaryReplicas(nodeSet, i)
        # type: Iterable[TestReplica]
        for r in nonPrimReps:
            r.node.nodeIbStasher.delay(ppDelay(10, i))

    sendReqsToNodesAndVerifySuffReplies(looper, client1, 5)

    checkViewNoForNodes(nodeSet, 0)
Exemple #53
0
def testViewChangeCase1(nodeSet, looper, up, wallet1, client1, viewNo):
    """
    Node will change view even though it does not find the master to be degraded
    when a quorum of nodes agree that master performance degraded
    """

    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's performance falls and view changes
    delayNonPrimaries(nodeSet, 0, 10)

    pr = getPrimaryReplica(nodeSet, 0)
    relucatantNode = pr.node

    # Count sent instance changes of all nodes
    sentInstChanges = {}
    instChngMethodName = Node.sendInstanceChange.__name__
    for n in nodeSet:
        sentInstChanges[n.name] = n.spylog.count(instChngMethodName)

    # Node reluctant to change view, never says master is degraded
    relucatantNode.monitor.isMasterDegraded = types.MethodType(
        lambda x: False, relucatantNode.monitor)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4)

    # Check that view change happened for all nodes
    looper.run(eventually(partial(checkViewNoForNodes, nodeSet, viewNo + 1),
                          retryWait=1, timeout=20))

    # All nodes except the reluctant node should have sent a view change and
    # thus must have called `sendInstanceChange`
    for n in nodeSet:
        if n.name != relucatantNode.name:
            assert n.spylog.count(instChngMethodName) > \
                   sentInstChanges.get(n.name, 0)
        else:
            assert n.spylog.count(instChngMethodName) == \
                   sentInstChanges.get(n.name, 0)
Exemple #54
0
def nodeCreatedAfterSomeTxns(
    txnPoolNodesLooper, txnPoolNodeSet, tdirWithPoolTxns, poolTxnStewardData, tconf, allPluginsPath, request
):
    # with Looper(debug=True) as looper:
    client, wallet = buildPoolClientAndWallet(poolTxnStewardData, tdirWithPoolTxns, clientClass=TestClient)
    txnPoolNodesLooper.add(client)
    txnPoolNodesLooper.run(client.ensureConnectedToNodes())
    txnCount = getValueFromModule(request, "txnCount", 5)
    sendReqsToNodesAndVerifySuffReplies(txnPoolNodesLooper, wallet, client, txnCount, timeoutPerReq=25)

    newStewardName = randomString()
    newNodeName = "Epsilon"
    newStewardClient, newStewardWallet, newNode = addNewStewardAndNode(
        txnPoolNodesLooper,
        client,
        wallet,
        newStewardName,
        newNodeName,
        tdirWithPoolTxns,
        tconf,
        allPluginsPath=allPluginsPath,
        autoStart=True,
    )
    yield txnPoolNodesLooper, newNode, client, wallet, newStewardClient, newStewardWallet
def testNodesReceiveClientMsgs(looper, txnPoolNodeSet, wallet1, client1,
                               client1Connected):
    ensureClientConnectedToNodesAndPoolLedgerSame(looper, client1,
                                                  *txnPoolNodeSet)
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 1)