def testNonPrimarySendsAPrePrepare(looper, txnPoolNodeSet, setup, propagated1):
    nonPrimaryReplicas = getNonPrimaryReplicas(txnPoolNodeSet, instId)
    firstNpr = nonPrimaryReplicas[0]
    remainingNpr = nonPrimaryReplicas[1:]

    def sendPrePrepareFromNonPrimary():
        firstNpr.requestQueues[DOMAIN_LEDGER_ID].add(propagated1.key)
        ppReq = firstNpr.create3PCBatch(DOMAIN_LEDGER_ID)
        firstNpr.sendPrePrepare(ppReq)
        return ppReq

    ppr = sendPrePrepareFromNonPrimary()

    def chk():
        for r in remainingNpr:
            recvdPps = recvd_pre_prepares(r)
            assert len(recvdPps) == 1
            assert compareNamedTuple(recvdPps[0], ppr,
                                     f.DIGEST.nm, f.STATE_ROOT.nm,
                                     f.TXN_ROOT.nm)
            nodeSuspicions = len(getNodeSuspicions(
                r.node, Suspicions.PPR_FRM_NON_PRIMARY.code))
            assert nodeSuspicions == 1

    timeout = waits.expectedClientRequestPropagationTime(len(txnPoolNodeSet))
    looper.run(eventually(chk,
                          retryWait=.5, timeout=timeout))
Exemple #2
0
 def chk():
     for r in getNonPrimaryReplicas(nodeSet, instId):
         l = len([
             param for param in getAllArgs(r, r.processPrepare)
             if param['sender'] == primary.name
         ])
         assert l == 1
def testPrimarySendsAPrepareAndMarkedSuspicious(looper, txnPoolNodeSet,
                                                delay_commits, preprepared1):
    def sendPrepareFromPrimary(instId):
        primary = getPrimaryReplica(txnPoolNodeSet, instId)
        viewNo, ppSeqNo = next(
            iter(primary._ordering_service.sent_preprepares.keys()))
        ppReq = primary._ordering_service.sent_preprepares[viewNo, ppSeqNo]
        primary._ordering_service._do_prepare(ppReq)

        def chk():
            for r in getNonPrimaryReplicas(txnPoolNodeSet, instId):
                l = len([
                    param for param in getAllArgs(
                        r._ordering_service,
                        r._ordering_service.process_prepare)
                    if param['sender'] == primary.name
                ])
                assert l == 1

        looper.run(eventually(chk))

    sendPrepareFromPrimary(0)

    for node in txnPoolNodeSet:
        if node in getNonPrimaryReplicas(txnPoolNodeSet, 0):
            frm, reason, code = getAllArgs(node, TestNode.reportSuspiciousNode)
            assert frm == getPrimaryReplica(txnPoolNodeSet, 0).node.name
            assert isinstance(reason, SuspiciousNode)
            assert len(getNodeSuspicions(node,
                                         Suspicions.PR_FRM_PRIMARY.code)) == 10
def test_primary_recvs_3phase_message_outside_watermarks(perf_chk_patched, chkFreqPatched, looper, txnPoolNodeSet,
                                                         sdk_pool_handle, sdk_wallet_client, reqs_for_logsize):
    """
    One of the primary starts getting lot of requests, more than his log size
    and queues up requests since they will go beyond its watermarks. This
    happens since other nodes are slow in processing its PRE-PREPARE.
    Eventually this primary will send PRE-PREPARE for all requests and those
    requests will complete
    """
    tconf = perf_chk_patched
    delay = 5
    instId = 0
    reqs_to_send = 2 * reqs_for_logsize + 1
    logger.debug('Will send {} requests'.format(reqs_to_send))

    npr = getNonPrimaryReplicas(txnPoolNodeSet, instId)
    pr = getPrimaryReplica(txnPoolNodeSet, instId)
    from plenum.server.replica import TPCStat
    orderedCount = pr.stats.get(TPCStat.OrderSent)

    for r in npr:
        r.node.nodeIbStasher.delay(ppDelay(delay, instId))
        r.node.nodeIbStasher.delay(pDelay(delay, instId))

    tm_exec_1_batch = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    batch_count = math.ceil(reqs_to_send / tconf.Max3PCBatchSize)
    total_timeout = (tm_exec_1_batch + delay) * batch_count

    def chk():
        assert orderedCount + batch_count == pr.stats.get(TPCStat.OrderSent)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_to_send)
    looper.run(eventually(chk, retryWait=1, timeout=total_timeout))
def testOrderingWhenPrePrepareNotReceived(looper, nodeSet, up, client1,
                                          wallet1):
    """
    Send commits and prepares but delay pre-prepare such that enough prepares
    and commits are received, now the request should not be ordered until
    pre-prepare is received and ordering should just happen once,
    """
    nonPrimReps = getNonPrimaryReplicas(nodeSet, 0)
    slowRep = nonPrimReps[0]
    slowNode = slowRep.node
    slowNode.nodeIbStasher.delay(ppDelay(10, 0))
    sendRandomRequest(wallet1, client1)

    stash = []
    origMethod = slowRep.processReqDigest

    def patched(self, msg):
        stash.append(msg)

    patchedMethod = types.MethodType(patched, slowRep)
    slowRep.processReqDigest = patchedMethod

    def chk1():
        assert len(slowRep.commitsWaitingForPrepare) > 0

    looper.run(eventually(chk1, timeout=4))

    for item in stash:
        origMethod(item)

    def chk2():
        assert len(slowRep.commitsWaitingForPrepare) == 0
        assert slowRep.spylog.count(slowRep.doOrder.__name__) == 1

    looper.run(eventually(chk2, timeout=12))
def testOrderingCase1(looper, nodeSet, up, client1, wallet1):
    """
    Scenario -> PRE-PREPARE not received by the replica, Request not received
    for ordering by the replica, but received enough commits to start ordering.
    It queues up the request so when a PRE-PREPARE is received or request is
    receievd for ordering, an order can be triggered
    https://www.pivotaltracker.com/story/show/125239401

    Reproducing by - Pick a node with no primary replica, replica ignores
    forwarded request to replica and delay reception of PRE-PREPARE sufficiently
    so that enough COMMITs reach to trigger ordering.
    """
    delay = 10
    replica = getNonPrimaryReplicas(nodeSet, instId=0)[0]
    delaysPrePrepareProcessing(replica.node, delay=delay, instId=0)

    def doNotProcessReqDigest(self, rd: ReqDigest):
        pass

    patchedMethod = types.MethodType(doNotProcessReqDigest, replica)
    replica.processRequest = patchedMethod

    def chk(n):
        assert replica.spylog.count(replica.doOrder.__name__) == n

    sendRandomRequest(wallet1, client1)
    timeout = delay - 5
    looper.run(eventually(chk, 0, retryWait=1, timeout=timeout))
    timeout = delay + 5
    looper.run(eventually(chk, 1, retryWait=1, timeout=timeout))
def test_checkpoints_removed_on_master_non_primary_replica_after_catchup(
        chkFreqPatched, txnPoolNodeSet, view_setup, clear_checkpoints):

    replica = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1]
    others = set(getAllReplicas(txnPoolNodeSet, 0)) - {replica}
    node = replica.node

    node.master_replica.last_ordered_3pc = (2, 12)

    replica.checkpoints[(6, 10)] = CheckpointState(seqNo=10,
                                                   digests=[],
                                                   digest='digest-6-10',
                                                   receivedDigests={r.name: 'digest-6-10' for r in others},
                                                   isStable=True)

    replica.checkpoints[(11, 15)] = CheckpointState(seqNo=12,
                                                    digests=['digest-11', 'digest-12'],
                                                    digest=None,
                                                    receivedDigests={},
                                                    isStable=False)

    replica.stashedRecvdCheckpoints[2] = {}

    replica.stashedRecvdCheckpoints[2][(11, 15)] = {}
    for r in others:
        replica.stashedRecvdCheckpoints[2][(11, 15)][r.name] = \
            Checkpoint(instId=0,
                       viewNo=2,
                       seqNoStart=11,
                       seqNoEnd=15,
                       digest='digest-11-15')

    replica.stashedRecvdCheckpoints[2][(16, 20)] = {}
    for r in others:
        replica.stashedRecvdCheckpoints[2][(16, 20)][r.name] = \
            Checkpoint(instId=0,
                       viewNo=2,
                       seqNoStart=16,
                       seqNoEnd=20,
                       digest='digest-16-20')

    replica.stashedRecvdCheckpoints[2][(21, 25)] = {}
    replica.stashedRecvdCheckpoints[2][(21, 25)][next(iter(others)).name] = \
        Checkpoint(instId=0,
                   viewNo=2,
                   seqNoStart=21,
                   seqNoEnd=25,
                   digest='digest-21-25')

    # Simulate catch-up completion
    node.ledgerManager.last_caught_up_3PC = (2, 20)
    node.allLedgersCaughtUp()

    assert len(replica.checkpoints) == 0

    assert len(replica.stashedRecvdCheckpoints) == 1
    assert 2 in replica.stashedRecvdCheckpoints
    assert len(replica.stashedRecvdCheckpoints[2]) == 1
    assert (21, 25) in replica.stashedRecvdCheckpoints[2]
    assert len(replica.stashedRecvdCheckpoints[2][(21, 25)]) == 1
Exemple #8
0
def test_slow_node_has_warn_unordered_log_msg(looper, txnPoolNodeSet, wallet1,
                                              client1, patch_monitors):
    npr = getNonPrimaryReplicas(txnPoolNodeSet, 0)[0]
    slow_node = npr.node

    monitor = txnPoolNodeSet[0].monitor
    delay = monitor.WARN_NOT_PARTICIPATING_MIN_DIFF_SEC * \
            monitor.WARN_NOT_PARTICIPATING_UNORDERED_NUM + 10
    delaysCommitProcessing(slow_node, delay=delay)

    assert no_any_warn(*txnPoolNodeSet), \
        'all nodes do not have warnings before test'

    for i in range(monitor.WARN_NOT_PARTICIPATING_UNORDERED_NUM):
        req = sendRandomRequest(wallet1, client1)
        waitForSufficientRepliesForRequests(looper, client1, requests=[req])
        looper.runFor(monitor.WARN_NOT_PARTICIPATING_MIN_DIFF_SEC)

    others = [node for node in txnPoolNodeSet if node.name != slow_node.name]
    assert no_any_warn(*others), \
        'others do not have warning after test'
    assert has_some_warn(slow_node), \
        'slow node has the warning'

    ordered_requests_keys_len_before = len(monitor.ordered_requests_keys)
    # wait at least windows time
    looper.runFor(monitor.WARN_NOT_PARTICIPATING_WINDOW_MINS * 60)
    req = sendRandomRequest(wallet1, client1)
    waitForSufficientRepliesForRequests(looper, client1, requests=[req])
    assert no_any_warn(*others), 'others do not have warning'
    assert no_last_warn(slow_node), \
        'the last call of warn_has_lot_unordered_requests returned False ' \
        'so slow node has no the warning for now'
    assert len(monitor.ordered_requests_keys) < ordered_requests_keys_len_before, \
        "ordered_requests_keys was cleaned up"
def test_commits_recvd_first(looper, txnPoolNodeSet,
                             sdk_wallet_client, sdk_pool_handle):
    slow_node = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)][-1]
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    delay = 50
    slow_node.nodeIbStasher.delay(ppDelay(delay, 0))
    slow_node.nodeIbStasher.delay(pDelay(delay, 0))

    sdk_send_batches_of_random_and_check(looper,
                                         txnPoolNodeSet,
                                         sdk_pool_handle,
                                         sdk_wallet_client,
                                         num_reqs=20,
                                         num_batches=4)

    assert not slow_node.master_replica.prePrepares
    assert not slow_node.master_replica.prepares
    assert not slow_node.master_replica.commits
    assert len(slow_node.master_replica.commitsWaitingForPrepare) > 0

    slow_node.reset_delays_and_process_delayeds()
    waitNodeDataEquality(looper, slow_node, *other_nodes)
    assert check_if_all_equal_in_list([n.master_replica.ordered
                                       for n in txnPoolNodeSet])

    assert slow_node.master_replica.prePrepares
    assert slow_node.master_replica.prepares
    assert slow_node.master_replica.commits
    assert not slow_node.master_replica.commitsWaitingForPrepare
Exemple #10
0
def test_forced_upgrade_handled_once_if_ordered_and_then_request_received(
        looper, nodeSet, sdk_pool_handle, sdk_wallet_trustee,
        validUpgradeExpForceTrue):
    """
    Verifies that POOL_UPGRADE force=true request is handled one time in case
    the node commits the transaction to the ledger and only after that receives
    the request directly from the client
    """
    slow_node = getNonPrimaryReplicas(nodeSet, instId=0)[-1].node
    slow_node.clientIbStasher.delay(req_delay())

    sdk_ensure_upgrade_sent(looper, sdk_pool_handle, sdk_wallet_trustee,
                            validUpgradeExpForceTrue)

    looper.run(
        eventually(checkUpgradeScheduled, [slow_node],
                   validUpgradeExpForceTrue[VERSION],
                   retryWait=1,
                   timeout=waits.expectedUpgradeScheduled()))

    slow_node.clientIbStasher.reset_delays_and_process_delayeds()
    looper.runFor(waits.expectedUpgradeScheduled())

    checkUpgradeScheduled([slow_node], validUpgradeExpForceTrue[VERSION])
    assert len(list(slow_node.upgrader._actionLog)) == 1
    assert slow_node.upgrader._actionLog.lastEvent[1] == \
           UpgradeLog.SCHEDULED
def testOrderingCase1(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle):
    """
    Scenario -> PRE-PREPARE not received by the replica, Request not received
    for ordering by the replica, but received enough commits to start ordering.
    It queues up the request so when a PRE-PREPARE is received or request is
    receievd for ordering, an order can be triggered
    https://www.pivotaltracker.com/story/show/125239401

    Reproducing by - Pick a node with no primary replica, replica ignores
    forwarded request to replica and delay reception of PRE-PREPARE sufficiently
    so that enough COMMITs reach to trigger ordering.
    """
    delay = 10
    replica = getNonPrimaryReplicas(txnPoolNodeSet, instId=0)[0]
    delaysPrePrepareProcessing(replica.node, delay=delay, instId=0)

    def doNotProcessReqDigest(self, _):
        pass

    patchedMethod = types.MethodType(doNotProcessReqDigest, replica)
    replica.processRequest = patchedMethod

    def chk(n):
        assert replica.spylog.count(replica.doOrder.__name__) == n

    sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client)
    timeout = delay - 5
    looper.run(eventually(chk, 0, retryWait=1, timeout=timeout))
    timeout = delay + 5
    looper.run(eventually(chk, 1, retryWait=1, timeout=timeout))
Exemple #12
0
def split_nodes(nodes):
    primary_node = get_master_primary_node(nodes)
    slow_node = getNonPrimaryReplicas(nodes, 0)[-1].node
    other_nodes = [n for n in nodes if n != slow_node]
    other_non_primary_nodes = [n for n in nodes if n not in
                               (slow_node, primary_node)]
    return slow_node, other_nodes, primary_node, other_non_primary_nodes
def test_node_handles_forced_upgrade_on_propagate(looper, nodeSet,
                                                  sdk_pool_handle,
                                                  sdk_wallet_trustee,
                                                  validUpgradeExpForceTrue):
    """
    Verifies that POOL_UPGRADE force=true request is handled immediately when
    the node receives it in a PROPAGATE from any other node
    """
    slow_node = getNonPrimaryReplicas(nodeSet, instId=0)[-1].node

    # Stash all except PROPAGATEs from Gamma
    slow_node.clientIbStasher.delay(req_delay())
    slow_node.nodeIbStasher.delay(ppgDelay(sender_filter='Alpha'))
    slow_node.nodeIbStasher.delay(ppgDelay(sender_filter='Beta'))
    slow_node.nodeIbStasher.delay(ppDelay())
    slow_node.nodeIbStasher.delay(pDelay())
    slow_node.nodeIbStasher.delay(cDelay())

    sdk_send_upgrade(looper, sdk_pool_handle, sdk_wallet_trustee,
                     validUpgradeExpForceTrue)

    looper.run(
        eventually(checkUpgradeScheduled, [slow_node],
                   validUpgradeExpForceTrue[VERSION],
                   retryWait=1,
                   timeout=waits.expectedUpgradeScheduled()))
def test_commits_recvd_first(looper, txnPoolNodeSet, sdk_wallet_client,
                             sdk_pool_handle):
    slow_node = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)][-1]
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    delay = 50
    slow_node.nodeIbStasher.delay(ppDelay(delay, 0))
    slow_node.nodeIbStasher.delay(pDelay(delay, 0))

    sdk_send_batches_of_random_and_check(looper,
                                         txnPoolNodeSet,
                                         sdk_pool_handle,
                                         sdk_wallet_client,
                                         num_reqs=20,
                                         num_batches=4)

    assert not slow_node.master_replica._ordering_service.prePrepares
    assert not slow_node.master_replica._ordering_service.prepares
    assert not slow_node.master_replica._ordering_service.commits
    assert len(slow_node.master_replica._ordering_service.
               commitsWaitingForPrepare) > 0

    slow_node.reset_delays_and_process_delayeds()
    waitNodeDataEquality(looper, slow_node, *other_nodes)
    assert check_if_all_equal_in_list(
        [n.master_replica._ordering_service.ordered for n in txnPoolNodeSet])

    assert slow_node.master_replica._ordering_service.prePrepares
    assert slow_node.master_replica._ordering_service.prepares
    assert slow_node.master_replica._ordering_service.commits
    assert not slow_node.master_replica._ordering_service.commitsWaitingForPrepare
def test_setup_last_ordered_for_non_master_without_catchup(txnPoolNodeSet,
                                                           sdk_wallet_client):
    inst_id = 1
    last_ordered_3pc = (0, 12)
    timestamp = time.time()
    ppSeqNo = 16
    replica = getNonPrimaryReplicas(txnPoolNodeSet, inst_id)[-1]
    replica.last_ordered_3pc = last_ordered_3pc
    replica.preparesWaitingForPrePrepare.clear()
    replica.prePreparesPendingPrevPP.clear()
    preprepare, prepare = \
        _create_prepare_and_preprepare(inst_id,
                                       replica.viewNo,
                                       ppSeqNo,
                                       timestamp,
                                       sdk_wallet_client)
    replica.prePreparesPendingPrevPP[replica.viewNo, ppSeqNo] = deque()
    replica.prePreparesPendingPrevPP[replica.viewNo, ppSeqNo] \
        .append((preprepare, replica.primaryName))
    replica.preparesWaitingForPrePrepare[replica.viewNo, ppSeqNo] = deque()
    for node in txnPoolNodeSet:
        replica.preparesWaitingForPrePrepare[replica.viewNo, ppSeqNo] \
            .append((prepare, node.name))

    replica._setup_last_ordered_for_non_master()
    assert replica.last_ordered_3pc == last_ordered_3pc
def test_delay_commits_for_one_node(looper, txnPoolNodeSet, sdk_pool_handle,
                                    sdk_wallet_client,
                                    slow_node_is_next_primary, vc_counts):
    current_view_no = checkViewNoForNodes(txnPoolNodeSet)
    excepted_view_no = current_view_no + 1 if vc_counts == 'once' else current_view_no + 2
    next_primary = get_next_primary_name(txnPoolNodeSet, excepted_view_no)
    pretenders = [
        r.node for r in getNonPrimaryReplicas(txnPoolNodeSet)
        if not r.isPrimary
    ]
    if slow_node_is_next_primary:
        delayed_node = [n for n in pretenders if n.name == next_primary][0]
    else:
        delayed_node = [n for n in pretenders if n.name != next_primary][0]

    with delay_rules_without_processing(delayed_node.nodeIbStasher, cDelay()):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, 2)

        trigger_view_change(txnPoolNodeSet, excepted_view_no)
        if vc_counts == 'twice':
            trigger_view_change(txnPoolNodeSet, excepted_view_no)

    ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_primary_recvs_3phase_message_outside_watermarks(perf_chk_patched, chkFreqPatched, looper, txnPoolNodeSet,
                                                         sdk_pool_handle, sdk_wallet_client, reqs_for_logsize):
    """
    One of the primary starts getting lot of requests, more than his log size
    and queues up requests since they will go beyond its watermarks. This
    happens since other nodes are slow in processing its PRE-PREPARE.
    Eventually this primary will send PRE-PREPARE for all requests and those
    requests will complete
    """
    tconf = perf_chk_patched
    delay = 2
    instId = 0
    reqs_to_send = 2 * reqs_for_logsize + 1
    logger.debug('Will send {} requests'.format(reqs_to_send))

    npr = getNonPrimaryReplicas(txnPoolNodeSet, instId)
    pr = getPrimaryReplica(txnPoolNodeSet, instId)
    orderedCount = pr.stats.get(TPCStat.OrderSent)

    for r in npr:
        r.node.nodeIbStasher.delay(ppDelay(delay, instId))
        r.node.nodeIbStasher.delay(pDelay(delay, instId))

    tm_exec_1_batch = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    batch_count = math.ceil(reqs_to_send / tconf.Max3PCBatchSize)
    total_timeout = (tm_exec_1_batch + delay) * batch_count

    def chk():
        assert orderedCount + batch_count == pr.stats.get(TPCStat.OrderSent)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_to_send)
    looper.run(eventually(chk, retryWait=1, timeout=total_timeout))
def testNonPrimarySendsAPrePrepare(looper, nodeSet, setup, propagated1):
    primaryReplica = getPrimaryReplica(nodeSet, instId)
    nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet, instId)
    firstNpr = nonPrimaryReplicas[0]
    remainingNpr = nonPrimaryReplicas[1:]

    def sendPrePrepareFromNonPrimary(replica):
        firstNpr.doPrePrepare(propagated1.reqDigest)

        return PrePrepare(
                replica.instId,
                firstNpr.viewNo,
                firstNpr.lastPrePrepareSeqNo,
                propagated1.identifier,
                propagated1.reqId,
                propagated1.digest,
                time.time())

    ppr = sendPrePrepareFromNonPrimary(firstNpr)

    def chk():
        for r in (primaryReplica, *remainingNpr):
            recvdPps = recvdPrePrepare(r)
            assert len(recvdPps) == 1
            assert recvdPps[0]['pp'][:-1] == ppr[:-1]
            nodeSuspicions = len(getNodeSuspicions(
                r.node, Suspicions.PPR_FRM_NON_PRIMARY.code))
            assert nodeSuspicions == 1

    looper.run(eventually(chk,
                          retryWait=.5, timeout=5))
def setup(tconf, looper, txnPoolNodeSet, client, wallet1):
    # Patch the 3phase request sending method to send incorrect digest and
    pr, otherR = getPrimaryReplica(txnPoolNodeSet, instId=0), \
                 getNonPrimaryReplicas(txnPoolNodeSet, instId=0)

    reqs = sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize)
    waitForSufficientRepliesForRequests(
        looper,
        client,
        requests=reqs,
        customTimeoutPerReq=tconf.Max3PCBatchWait)
    stateRoot = pr.stateRootHash(DOMAIN_LEDGER_ID, to_str=False)

    origMethod = pr.create3PCBatch
    malignedOnce = None

    def badMethod(self, ledgerId):
        nonlocal malignedOnce
        pp = origMethod(ledgerId)
        if not malignedOnce:
            pp = updateNamedTuple(pp, digest=pp.digest + '123')
            malignedOnce = True
        return pp

    pr.create3PCBatch = types.MethodType(badMethod, pr)
    sendRandomRequests(wallet1, client, tconf.Max3PCBatchSize)
    return pr, otherR, stateRoot
def testElectionsAfterViewChange(delayedPerf, looper: Looper,
                                 nodeSet: TestNodeSet, up, wallet1, client1):
    """
    Test that a primary election does happen after a view change
    """

    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's throughput falls
    # and view changes
    nonPrimReps = getNonPrimaryReplicas(nodeSet, 0)
    for r in nonPrimReps:
        r.node.nodeIbStasher.delay(ppDelay(10, 0))

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4)

    # Ensure view change happened for both node and its primary elector
    for node in nodeSet:
        looper.run(
            eventually(partial(checkViewChangeInitiatedForNode, node, 1),
                       retryWait=1,
                       timeout=20))

    # Ensure elections are done again and pool is setup again with appropriate
    # protocol instances and each protocol instance is setup properly too
    checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, timeout=30)
Exemple #21
0
def test_setup_last_ordered_for_non_master_without_quorum_of_prepares(
        txnPoolNodeSet, sdk_wallet_client):
    inst_id = 1
    replica = getNonPrimaryReplicas(txnPoolNodeSet, inst_id)[-1]
    replica._ordering_service.preparesWaitingForPrePrepare.clear()
    replica._ordering_service.prePreparesPendingPrevPP.clear()
    replica.last_ordered_3pc = (0, 0)
    timestamp = time.time()
    ppSeqNo = 5
    preprepare, prepare = \
        _create_prepare_and_preprepare(inst_id,
                                       replica.viewNo,
                                       ppSeqNo,
                                       timestamp,
                                       sdk_wallet_client)
    replica._ordering_service.prePreparesPendingPrevPP[replica.viewNo,
                                                       ppSeqNo] = deque()
    replica._ordering_service.prePreparesPendingPrevPP[replica.viewNo, ppSeqNo] \
        .append((preprepare, replica.primaryName))
    replica._ordering_service.preparesWaitingForPrePrepare[replica.viewNo,
                                                           ppSeqNo] = deque()
    replica._ordering_service.preparesWaitingForPrePrepare[replica.viewNo, ppSeqNo] \
        .append((prepare, txnPoolNodeSet[-1].name))
    replica._ordering_service.l_setup_last_ordered_for_non_master()
    assert replica.last_ordered_3pc == (0, 0)
def testNonPrimarySendsAPrePrepare(looper, nodeSet, setup, propagated1):
    nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet, instId)
    firstNpr = nonPrimaryReplicas[0]
    remainingNpr = nonPrimaryReplicas[1:]

    def sendPrePrepareFromNonPrimary():
        firstNpr.requestQueues[DOMAIN_LEDGER_ID].add(propagated1.key)
        ppReq = firstNpr.create3PCBatch(DOMAIN_LEDGER_ID)
        firstNpr.sendPrePrepare(ppReq)
        return ppReq

    ppr = sendPrePrepareFromNonPrimary()

    def chk():
        for r in remainingNpr:
            recvdPps = recvd_pre_prepares(r)
            assert len(recvdPps) == 1
            assert compareNamedTuple(recvdPps[0], ppr, f.DIGEST.nm,
                                     f.STATE_ROOT.nm, f.TXN_ROOT.nm)
            nodeSuspicions = len(
                getNodeSuspicions(r.node, Suspicions.PPR_FRM_NON_PRIMARY.code))
            assert nodeSuspicions == 1

    timeout = waits.expectedClientRequestPropagationTime(len(nodeSet))
    looper.run(eventually(chk, retryWait=.5, timeout=timeout))
Exemple #23
0
def testPrimarySendsAPrepareAndMarkedSuspicious(looper, nodeSet, delay_commits,
                                                preprepared1):
    def sendPrepareFromPrimary(instId):
        primary = getPrimaryReplica(nodeSet, instId)
        viewNo, ppSeqNo = next(iter(primary.sentPrePrepares.keys()))
        ppReq = primary.sentPrePrepares[viewNo, ppSeqNo]
        prepare = Prepare(instId, viewNo, ppSeqNo, ppReq.digest,
                          ppReq.stateRootHash, ppReq.txnRootHash)
        primary.doPrepare(prepare)

        def chk():
            for r in getNonPrimaryReplicas(nodeSet, instId):
                l = len([
                    param for param in getAllArgs(r, r.processPrepare)
                    if param['sender'] == primary.name
                ])
                assert l == 1

        looper.run(eventually(chk))

    sendPrepareFromPrimary(0)

    for node in nodeSet:
        if node in getNonPrimaryReplicas(nodeSet, 0):
            frm, reason, code = getAllArgs(node, TestNode.reportSuspiciousNode)
            assert frm == getPrimaryReplica(nodeSet, 0).node.name
            assert isinstance(reason, SuspiciousNode)
            assert len(getNodeSuspicions(node,
                                         Suspicions.PR_FRM_PRIMARY.code)) == 10
def setup(txnPoolNodeSet):
    primaryRep, nonPrimaryReps = getPrimaryReplica(txnPoolNodeSet, 0), \
                                 getNonPrimaryReplicas(txnPoolNodeSet, 0)

    faultyRep = nonPrimaryReps[0]
    makeNodeFaulty(
        faultyRep.node,
        partial(sendDuplicate3PhaseMsg, msgType=Commit, count=3, instId=0))

    # The node of the primary replica above should not be blacklisted by any
    # other node since we are simulating multiple COMMIT messages and
    # want to check for a particular suspicion

    whitelistNode(faultyRep.node.name,
                  [node for node in txnPoolNodeSet if node != faultyRep.node],
                  Suspicions.DUPLICATE_CM_SENT.code)

    # If the request is ordered then COMMIT will be rejected much earlier
    for r in [primaryRep, *nonPrimaryReps]:

        def do_nothing(self, commit):
            pass

        r.doOrder = types.MethodType(do_nothing, r)

    return adict(primaryRep=primaryRep,
                 nonPrimaryReps=nonPrimaryReps,
                 faultyRep=faultyRep)
def test_setup_last_ordered_for_non_master_without_catchup(
        txnPoolNodeSet, sdk_wallet_client):
    inst_id = 1
    last_ordered_3pc = (0, 12)
    timestamp = time.time()
    ppSeqNo = 16
    replica = getNonPrimaryReplicas(txnPoolNodeSet, inst_id)[-1]
    replica.last_ordered_3pc = last_ordered_3pc
    replica.preparesWaitingForPrePrepare.clear()
    replica.prePreparesPendingPrevPP.clear()
    preprepare, prepare = \
        _create_prepare_and_preprepare(inst_id,
                                       replica.viewNo,
                                       ppSeqNo,
                                       timestamp,
                                       sdk_wallet_client)
    replica.prePreparesPendingPrevPP[replica.viewNo, ppSeqNo] = deque()
    replica.prePreparesPendingPrevPP[replica.viewNo, ppSeqNo] \
        .append((preprepare, replica.primaryName))
    replica.preparesWaitingForPrePrepare[replica.viewNo, ppSeqNo] = deque()
    for node in txnPoolNodeSet:
        replica.preparesWaitingForPrePrepare[replica.viewNo, ppSeqNo] \
            .append((prepare, node.name))

    replica._setup_last_ordered_for_non_master()
    assert replica.last_ordered_3pc == last_ordered_3pc
Exemple #26
0
def testNonPrimarySendsAPrePrepare(looper, nodeSet, setup, propagated1):
    primaryReplica = getPrimaryReplica(nodeSet, instId)
    nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet, instId)
    firstNpr = nonPrimaryReplicas[0]
    remainingNpr = nonPrimaryReplicas[1:]

    def sendPrePrepareFromNonPrimary(replica):
        firstNpr.doPrePrepare(propagated1.reqDigest)

        return PrePrepare(
                replica.instId,
                firstNpr.viewNo,
                firstNpr.lastPrePrepareSeqNo,
                propagated1.identifier,
                propagated1.reqId,
                propagated1.digest,
                time.time())

    ppr = sendPrePrepareFromNonPrimary(firstNpr)

    def chk():
        for r in (primaryReplica, *remainingNpr):
            recvdPps = recvdPrePrepare(r)
            assert len(recvdPps) == 1
            assert recvdPps[0]['pp'][:-1] == ppr[:-1]
            nodeSuspicions = len(getNodeSuspicions(
                r.node, Suspicions.PPR_FRM_NON_PRIMARY.code))
            assert nodeSuspicions == 1

    looper.run(eventually(chk,
                          retryWait=.5, timeout=5))
def setup(txnPoolNodeSet):
    primaryRep, nonPrimaryReps = getPrimaryReplica(txnPoolNodeSet, 0), \
                                 getNonPrimaryReplicas(txnPoolNodeSet, 0)

    faultyRep = nonPrimaryReps[0]
    makeNodeFaulty(faultyRep.node, partial(sendDuplicate3PhaseMsg,
                                           msgType=Commit, count=3,
                                           instId=0))

    # The node of the primary replica above should not be blacklisted by any
    # other node since we are simulating multiple COMMIT messages and
    # want to check for a particular suspicion

    whitelistNode(faultyRep.node.name,
                  [node for node in txnPoolNodeSet if node != faultyRep.node],
                  Suspicions.DUPLICATE_CM_SENT.code)

    # If the request is ordered then COMMIT will be rejected much earlier
    for r in [primaryRep, *nonPrimaryReps]:
        def do_nothing(self, commit):
            pass

        r.doOrder = types.MethodType(do_nothing, r)

    return adict(primaryRep=primaryRep, nonPrimaryReps=nonPrimaryReps,
                 faultyRep=faultyRep)
def testPrePrepareWithHighSeqNo(looper, txnPoolNodeSet, propagated1):
    def chk():
        for r in getNonPrimaryReplicas(txnPoolNodeSet, instId):
            nodeSuspicions = len(getNodeSuspicions(
                r.node, Suspicions.WRONG_PPSEQ_NO.code))
            assert nodeSuspicions == 1

    def checkPreprepare(replica, viewNo, ppSeqNo, req, numOfPrePrepares):
        assert (replica.prePrepares[viewNo, ppSeqNo][0]) == \
               (req.identifier, req.reqId, req.digest)

    primary = getPrimaryReplica(txnPoolNodeSet, instId)
    nonPrimaryReplicas = getNonPrimaryReplicas(txnPoolNodeSet, instId)
    req = propagated1.reqDigest
    primary.doPrePrepare(req)
    timeout = waits.expectedPrePrepareTime(len(txnPoolNodeSet))
    for np in nonPrimaryReplicas:
        looper.run(
            eventually(checkPreprepare, np, primary.viewNo,
                       primary.lastPrePrepareSeqNo - 1, req, 1,
                       retryWait=.5, timeout=timeout))

    newReqDigest = (req.identifier, req.reqId + 1, req.digest)
    incorrectPrePrepareReq = PrePrepare(instId,
                                        primary.viewNo,
                                        primary.lastPrePrepareSeqNo + 2,
                                        *newReqDigest,
                                        get_utc_epoch())
    primary.send(incorrectPrePrepareReq, TPCStat.PrePrepareSent)

    timeout = waits.expectedPrePrepareTime(len(txnPoolNodeSet))
    looper.run(eventually(chk, retryWait=1, timeout=timeout))
def testPrePrepareWithHighSeqNo(looper, nodeSet, propagated1):
    def chk():
        for r in getNonPrimaryReplicas(nodeSet, instId):
            nodeSuspicions = len(getNodeSuspicions(r.node, Suspicions.WRONG_PPSEQ_NO.code))
            assert nodeSuspicions == 1

    def checkPreprepare(replica, viewNo, ppSeqNo, req, numOfPrePrepares):
        assert (replica.prePrepares[viewNo, ppSeqNo][0]) == (req.identifier, req.reqId, req.digest)

    primary = getPrimaryReplica(nodeSet, instId)
    nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet, instId)
    req = propagated1.reqDigest
    primary.doPrePrepare(req)
    for np in nonPrimaryReplicas:
        looper.run(
            eventually(
                checkPreprepare, np, primary.viewNo, primary.lastPrePrepareSeqNo - 1, req, 1, retryWait=0.5, timeout=10
            )
        )

    newReqDigest = ReqDigest(req.identifier, req.reqId + 1, req.digest)
    incorrectPrePrepareReq = PrePrepare(
        instId, primary.viewNo, primary.lastPrePrepareSeqNo + 2, *newReqDigest, time.time()
    )
    primary.send(incorrectPrePrepareReq, TPCStat.PrePrepareSent)
    looper.run(eventually(chk, retryWait=1, timeout=50))
Exemple #30
0
def test_non_primary_accepts_pre_prepare_time(looper, txnPoolNodeSet,
                                              sdk_wallet_client,
                                              sdk_pool_handle):
    """
    One of the non-primary has an in-correct clock so it thinks PRE-PREPARE
    has incorrect time
    """
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=2)
    # send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2)
    # The replica having the bad clock
    confused_npr = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1]

    make_clock_faulty(confused_npr.node)

    old_acceptable_rvs = getAllReturnVals(
        confused_npr, confused_npr.is_pre_prepare_time_acceptable)
    old_susp_count = get_timestamp_suspicion_count(confused_npr.node)
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=2)

    assert get_timestamp_suspicion_count(confused_npr.node) > old_susp_count

    new_acceptable_rvs = getAllReturnVals(
        confused_npr, confused_npr.is_pre_prepare_time_acceptable)

    # `is_pre_prepare_time_acceptable` first returned False then returned True
    assert [True, False, *old_acceptable_rvs] == new_acceptable_rvs
Exemple #31
0
def setup(tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    # Patch the 3phase request sending method to send incorrect digest and
    pr, otherR = getPrimaryReplica(txnPoolNodeSet, instId=0), \
                 getNonPrimaryReplicas(txnPoolNodeSet, instId=0)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, tconf.Max3PCBatchSize)
    stateRoot = pr._ordering_service.get_state_root_hash(DOMAIN_LEDGER_ID,
                                                         to_str=False)

    origMethod = pr._ordering_service.create_3pc_batch
    malignedOnce = None

    def badMethod(self, ledgerId):
        nonlocal malignedOnce
        pp = origMethod(ledgerId)
        if not malignedOnce:
            pp = updateNamedTuple(pp, digest=pp.digest + '123')
            malignedOnce = True
        return pp

    pr._ordering_service.create_3pc_batch = types.MethodType(
        badMethod, pr._ordering_service)
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                             tconf.Max3PCBatchSize)
    return pr, otherR, stateRoot
def test_non_primary_accepts_pre_prepare_time(looper, txnPoolNodeSet,
                                              sdk_wallet_client, sdk_pool_handle):
    """
    One of the non-primary has an in-correct clock so it thinks PRE-PREPARE
    has incorrect time
    """
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=2)
    # send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2)
    # The replica having the bad clock
    confused_npr = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1]

    make_clock_faulty(confused_npr.node)

    old_acceptable_rvs = getAllReturnVals(
        confused_npr, confused_npr.is_pre_prepare_time_acceptable)
    old_susp_count = get_timestamp_suspicion_count(confused_npr.node)
    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              count=2)

    assert get_timestamp_suspicion_count(confused_npr.node) > old_susp_count

    new_acceptable_rvs = getAllReturnVals(
        confused_npr, confused_npr.is_pre_prepare_time_acceptable)

    # `is_pre_prepare_time_acceptable` first returned False then returned True
    assert [True, False, *old_acceptable_rvs] == new_acceptable_rvs
Exemple #33
0
def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1):
    """
    Replicas should not accept PRE-PREPARE for view "v" and prepare sequence
    number "n" if it has already accepted a request with view number "v" and
    sequence number "n"

    """
    numOfNodes = 4
    fValue = getMaxFailures(numOfNodes)
    request1 = sendRandomRequest(wallet1, client1)
    result1 = looper.run(
        eventually(checkSufficientRepliesRecvd,
                   client1.inBox,
                   request1.reqId,
                   fValue,
                   retryWait=1,
                   timeout=5))
    logger.debug("request {} gives result {}".format(request1, result1))
    primaryRepl = getPrimaryReplica(nodeSet)
    logger.debug("Primary Replica: {}".format(primaryRepl))
    logger.debug(
        "Decrementing the primary replica's pre-prepare sequence number by "
        "one...")
    primaryRepl.lastPrePrepareSeqNo -= 1
    request2 = sendRandomRequest(wallet1, client1)
    looper.run(
        eventually(checkPrePrepareReqSent,
                   primaryRepl,
                   request2,
                   retryWait=1,
                   timeout=10))

    nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet)
    logger.debug("Non Primary Replicas: " + str(nonPrimaryReplicas))
    prePrepareReq = PrePrepare(primaryRepl.instId, primaryRepl.viewNo,
                               primaryRepl.lastPrePrepareSeqNo,
                               wallet1.defaultId, request2.reqId,
                               request2.digest, time.time())

    logger.debug("""Checking whether all the non primary replicas have received
                the pre-prepare request with same sequence number""")
    looper.run(
        eventually(checkPrePrepareReqRecvd,
                   nonPrimaryReplicas,
                   prePrepareReq,
                   retryWait=1,
                   timeout=10))
    logger.debug("""Check that none of the non primary replicas didn't send
    any prepare message "
                             in response to the pre-prepare message""")
    for npr in nonPrimaryReplicas:
        with pytest.raises(AssertionError):
            looper.run(
                eventually(checkPrepareReqSent,
                           npr,
                           wallet1.defaultId,
                           request2.reqId,
                           retryWait=1,
                           timeout=10))
def test_caught_up_for_current_view_check(looper, txnPoolNodeSet, client1,
                                          wallet1, client1Connected):
    """
    One of the node experiences poor network and loses 3PC messages. It has to
    do multiple rounds of catchup to be caught up
    """

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1,
                                        3 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    bad_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != bad_node]
    orig_method = bad_node.master_replica.dispatchThreePhaseMsg

    # Bad node does not process any 3 phase messages, equivalent to messages
    # being lost
    def bad_method(self, m, s):
        pass

    bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType(
        bad_method, bad_node.master_replica)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1,
                                        6 * Max3PCBatchSize)
    waitNodeDataInequality(looper, bad_node, *other_nodes)

    # Patch all nodes to return ConsistencyProof of a smaller ledger to the
    # bad node but only once, so that the bad_node needs to do catchup again.

    make_a_node_catchup_twice(bad_node, other_nodes, DOMAIN_LEDGER_ID,
                              Max3PCBatchSize)

    def is_catchup_needed_count():
        return len(
            getAllReturnVals(bad_node,
                             bad_node.is_catchup_needed,
                             compare_val_to=True))

    def caught_up_for_current_view_count():
        return len(
            getAllReturnVals(bad_node,
                             bad_node.caught_up_for_current_view,
                             compare_val_to=True))

    old_count_1 = is_catchup_needed_count()
    old_count_2 = caught_up_for_current_view_count()
    ensure_view_change(looper, txnPoolNodeSet)
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    assert is_catchup_needed_count() > old_count_1
    # The bad_node caught up due to receiving sufficient ViewChangeDone
    # messages
    assert caught_up_for_current_view_count() > old_count_2

    bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType(
        orig_method, bad_node.master_replica)
Exemple #35
0
def setup(request, looper, txnPoolNodeSet, client1, wallet1, client1Connected):
    slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[1].node
    fast_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    slow_node.nodeIbStasher.delay(pDelay(100, 0))
    slow_node.nodeIbStasher.delay(cDelay(100, 0))
    if request.param == 'all':
        slow_node.nodeIbStasher.delay(ppDelay(100, 0))
    return slow_node, fast_nodes
Exemple #36
0
def split_nodes(nodes):
    primary_node = get_master_primary_node(nodes)
    slow_node = getNonPrimaryReplicas(nodes, 0)[-1].node
    other_nodes = [n for n in nodes if n != slow_node]
    other_non_primary_nodes = [
        n for n in nodes if n not in (slow_node, primary_node)
    ]
    return slow_node, other_nodes, primary_node, other_non_primary_nodes
def testOrderingCase2(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    """
    Scenario -> A client sends requests, some nodes delay COMMITs to few
    specific nodes such some nodes achieve commit quorum later for those
    requests compared to other nodes. But all nodes `ORDER` request in the same
    order of ppSeqNos
    https://www.pivotaltracker.com/n/projects/1889887/stories/133655009
    """
    pr, replicas = getPrimaryReplica(txnPoolNodeSet, instId=0), \
                   getNonPrimaryReplicas(txnPoolNodeSet, instId=0)
    assert len(replicas) == 6

    rep0 = pr
    rep1 = replicas[0]
    rep2 = replicas[1]
    rep3 = replicas[2]
    rep4 = replicas[3]
    rep5 = replicas[4]
    rep6 = replicas[5]

    node0 = rep0.node
    node1 = rep1.node
    node2 = rep2.node
    node3 = rep3.node
    node4 = rep4.node
    node5 = rep5.node
    node6 = rep6.node

    ppSeqsToDelay = 5
    commitDelay = 3  # delay each COMMIT by this number of seconds
    delayedPpSeqNos = set()

    requestCount = 10

    def specificCommits(wrappedMsg):
        nonlocal node3, node4, node5
        msg, sender = wrappedMsg
        if isinstance(msg, PrePrepare):
            if len(delayedPpSeqNos) < ppSeqsToDelay:
                delayedPpSeqNos.add(msg.ppSeqNo)
                logger.debug('ppSeqNo {} be delayed'.format(msg.ppSeqNo))
        if isinstance(msg, Commit) and msg.instId == 0 and \
                sender in (n.name for n in (node3, node4, node5)) and \
                msg.ppSeqNo in delayedPpSeqNos:
            return commitDelay

    for node in (node1, node2):
        logger.debug('{} would be delaying commits'.format(node))
        node.nodeIbStasher.delay(specificCommits)

    sdk_reqs = sdk_send_random_requests(looper, sdk_pool_handle,
                                        sdk_wallet_client, requestCount)

    timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet))

    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=timeout)

    sdk_get_and_check_replies(looper, sdk_reqs)
def test_checkpoints_removed_on_backup_non_primary_replica_after_catchup(
        chkFreqPatched, txnPoolNodeSet, view_setup, clear_checkpoints):

    replica = getNonPrimaryReplicas(txnPoolNodeSet, 1)[-1]
    others = set(getAllReplicas(txnPoolNodeSet, 1)) - {replica}
    node = replica.node

    node.master_replica.last_ordered_3pc = (2, 12)

    replica.checkpoints[(6, 10)] = CheckpointState(
        seqNo=10,
        digests=[],
        digest='digest-6-10',
        receivedDigests={r.name: 'digest-6-10'
                         for r in others},
        isStable=True)

    replica.checkpoints[(11, 15)] = CheckpointState(
        seqNo=13,
        digests=['digest-11', 'digest-12', 'digest-13'],
        digest=None,
        receivedDigests={},
        isStable=False)

    replica.stashedRecvdCheckpoints[2] = {}

    replica.stashedRecvdCheckpoints[2][(11, 15)] = {}
    for r in others:
        replica.stashedRecvdCheckpoints[2][(11, 15)][r.name] = \
            Checkpoint(instId=1,
                       viewNo=2,
                       seqNoStart=11,
                       seqNoEnd=15,
                       digest='digest-11-15')

    replica.stashedRecvdCheckpoints[2][(16, 20)] = {}
    for r in others:
        replica.stashedRecvdCheckpoints[2][(16, 20)][r.name] = \
            Checkpoint(instId=1,
                       viewNo=2,
                       seqNoStart=16,
                       seqNoEnd=20,
                       digest='digest-16-20')

    replica.stashedRecvdCheckpoints[2][(21, 25)] = {}
    replica.stashedRecvdCheckpoints[2][(21, 25)][next(iter(others)).name] = \
        Checkpoint(instId=1,
                   viewNo=2,
                   seqNoStart=21,
                   seqNoEnd=25,
                   digest='digest-21-25')

    # Simulate catch-up completion
    node.ledgerManager.last_caught_up_3PC = (2, 20)
    node.allLedgersCaughtUp()

    assert len(replica.checkpoints) == 0
    assert len(replica.stashedRecvdCheckpoints) == 0
Exemple #39
0
def test_view_change_done_delayed(txnPoolNodeSet, looper, sdk_pool_handle,
                                  sdk_wallet_client):
    """
    A node is slow so is behind other nodes, after view change, it catches up
    but it also gets view change message as delayed, a node should start
    participating only when caught up and ViewChangeCone quorum received.
    """
    nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)]
    slow_node = nprs[-1]
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    delay_3pc = 10
    delay_vcd = 25
    delay_3pc_messages([slow_node], 0, delay_3pc)
    slow_node.nodeIbStasher.delay(vcd_delay(delay_vcd))

    def chk(node):
        assert node.view_changer.has_acceptable_view_change_quorum
        assert node.view_changer._primary_verified
        assert node.isParticipating
        assert None not in {r.isPrimary for r in node.replicas.values()}

    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet,
                                         sdk_pool_handle, sdk_wallet_client,
                                         5 * 4, 4)

    ensure_view_change(looper, nodes=txnPoolNodeSet)

    # After view change, the slow node successfully completes catchup
    waitNodeDataEquality(looper, slow_node, *other_nodes)

    # Other nodes complete view change, select primary and participate
    for node in other_nodes:
        looper.run(eventually(chk, node, retryWait=1))

    # Since `ViewChangeCone` is delayed, slow_node is not able to select primary
    # and participate
    assert not slow_node.view_changer.has_acceptable_view_change_quorum
    assert not slow_node.view_changer._primary_verified
    assert not slow_node.isParticipating
    assert {r.isPrimary for r in slow_node.replicas.values()} == {None}

    # Send requests to make sure pool is functional
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)

    # Repair network
    slow_node.reset_delays_and_process_delayeds()

    # `slow_node` selects primary and participate
    looper.run(eventually(chk, slow_node, retryWait=1))

    # Processes requests received during lack of primary
    waitNodeDataEquality(looper, slow_node, *other_nodes)

    # Send more requests and compare data of all nodes
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
Exemple #40
0
def test_no_catchup_if_got_from_3pc(looper, txnPoolNodeSet, sdk_pool_handle,
                                    sdk_wallet_client):
    """
    A node is slow to receive COMMIT messages so after a view change it
    starts catchup. But before it can start requesting txns, the COMMITs messages
    are received and are ordered. The node should not request any transactions.
    :return:
    """
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle, sdk_wallet_client,
                                           2 * 3, 3)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]

    delay_cm = 30
    delat_cp = 100
    slow_node.nodeIbStasher.delay(cDelay(delay_cm))
    # The slow node receives consistency proofs after some delay, this delay
    # gives the opportunity to deliver all 3PC messages
    slow_node.nodeIbStasher.delay(cpDelay(delat_cp))

    # Count of `getCatchupReqs` which is called to construct the `CatchupReq`
    # to be sent
    def domain_cr_count():
        return sum(1 for entry in slow_node.ledgerManager.spylog.getAll(
            slow_node.ledgerManager.getCatchupReqs)
                   if entry.params['consProof'].ledgerId == DOMAIN_LEDGER_ID)

    old_count = domain_cr_count()
    sent_batches = 10
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle, sdk_wallet_client,
                                           2 * sent_batches, sent_batches)
    ensure_view_change(looper, nodes=txnPoolNodeSet)

    # After view change, the `slow_node` is behind
    waitNodeDataInequality(looper, slow_node, *other_nodes)

    # Unstash only COMMIT messages
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(Commit.typename)

    looper.runFor(2)

    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(
        ConsistencyProof.typename)

    waitNodeDataEquality(looper, slow_node, *other_nodes)

    # No `CatchupReq`s constructed, hence no `CatchupReq`s could have
    # been sent
    assert domain_cr_count() == old_count
    # Some stashed ordered requests have been processed
    rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs)
    assert sent_batches in rv

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
Exemple #41
0
def test_view_change_after_max_catchup_rounds(txnPoolNodeSet, looper, wallet1,
                                              client1, client1Connected):
    """
    The node should do only a fixed rounds of catchup. For this delay Prepares
    and Commits for 2 non-primary nodes by a large amount which is equivalent
    to loss of Prepares and Commits. Make sure 2 nodes have a different last
    prepared certificate from other two. Then do a view change, make sure view
    change completes and the pool does not process the request that were
    prepared by only a subset of the nodes
    """
    send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 2 * 3, 3)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    ledger_summary = txnPoolNodeSet[0].elector.ledger_summary

    slow_nodes = [
        r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)[-2:]
    ]
    fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes]

    # Make node slow to process Prepares and Commits
    for node in slow_nodes:
        node.nodeIbStasher.delay(pDelay(120, 0))
        node.nodeIbStasher.delay(cDelay(120, 0))

    sendRandomRequests(wallet1, client1, 5)
    looper.runFor(3)

    ensure_view_change(looper, nodes=txnPoolNodeSet)

    def last_prepared(nodes):
        lst = [
            n.master_replica.last_prepared_certificate_in_view() for n in nodes
        ]
        # All nodes have same last prepared
        assert check_if_all_equal_in_list(lst)
        return lst[0]

    last_prepared_slow = last_prepared(slow_nodes)
    last_prepared_fast = last_prepared(fast_nodes)

    # Check `slow_nodes` and `fast_nodes` set different last_prepared
    assert last_prepared_fast != last_prepared_slow

    # View change complete
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    # The requests which were prepared by only a subset of the nodes were
    # not ordered
    assert txnPoolNodeSet[0].elector.ledger_summary == ledger_summary

    for node in slow_nodes:
        node.nodeIbStasher.reset_delays_and_process_delayeds()

    # Make sure pool is functional
    ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    last_prepared(txnPoolNodeSet)
def test_view_change_after_max_catchup_rounds(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client):
    """
    The node should do only a fixed rounds of catchup. For this delay Prepares
    and Commits for 2 non-primary nodes by a large amount which is equivalent
    to loss of Prepares and Commits. Make sure 2 nodes have a different last
    prepared certificate from other two. Then do a view change, make sure view
    change completes and the pool does not process the request that were
    prepared by only a subset of the nodes
    """
    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                         sdk_wallet_client, 2 * 3, 3)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    ledger_summary = txnPoolNodeSet[0].ledger_summary

    slow_nodes = [r.node for r in getNonPrimaryReplicas(
        txnPoolNodeSet, 0)[-2:]]
    fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes]

    # Make node slow to process Prepares and Commits
    for node in slow_nodes:
        node.nodeIbStasher.delay(pDelay(120, 0))
        node.nodeIbStasher.delay(cDelay(120, 0))

    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 5)
    looper.runFor(3)

    ensure_view_change(looper, nodes=txnPoolNodeSet)

    def last_prepared(nodes):
        lst = [n.master_replica.last_prepared_certificate_in_view()
               for n in nodes]
        # All nodes have same last prepared
        assert check_if_all_equal_in_list(lst)
        return lst[0]

    last_prepared_slow = last_prepared(slow_nodes)
    last_prepared_fast = last_prepared(fast_nodes)

    # Check `slow_nodes` and `fast_nodes` set different last_prepared
    assert last_prepared_fast != last_prepared_slow

    # View change complete
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    # The requests which were prepared by only a subset of the nodes were
    # not ordered
    assert txnPoolNodeSet[0].ledger_summary == ledger_summary

    for node in slow_nodes:
        node.nodeIbStasher.reset_delays_and_process_delayeds()

    # Make sure pool is functional
    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                         sdk_wallet_client, 10, 2)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    last_prepared(txnPoolNodeSet)
def setup(nodeSet, up):
    primaryRep, nonPrimaryReps = getPrimaryReplica(nodeSet, 0), \
                                 getNonPrimaryReplicas(nodeSet, 0)

    # The primary replica would send PRE-PREPARE messages with incorrect digest
    makeNodeFaulty(primaryRep.node, partial(send3PhaseMsgWithIncorrectDigest,
                                            msgType=PrePrepare))

    return adict(primaryRep=primaryRep, nonPrimaryReps=nonPrimaryReps)
def testNonPrimaryRecvs3PhaseMessageOutsideWatermarks(chkFreqPatched, looper,
                                                      txnPoolNodeSet, client1,
                                                      wallet1,
                                                      client1Connected,
                                                      reqs_for_logsize):
    """
    A node is slow in processing PRE-PREPAREs and PREPAREs such that lot of
    requests happen and the slow node has started getting 3 phase messages
    outside of it watermarks. Check that it queues up requests outside watermarks and once it
    has received stable checkpoint it processes more requests. It sends other
    nodes 3 phase messages older than their stable checkpoint so they should
    discard them.
    """
    delay = 15
    instId = 1
    reqsToSend = reqs_for_logsize + 2
    npr = getNonPrimaryReplicas(txnPoolNodeSet, instId)
    slowReplica = npr[0]
    slowNode = slowReplica.node
    slowNode.nodeIbStasher.delay(ppDelay(delay, instId))
    slowNode.nodeIbStasher.delay(pDelay(delay, instId))

    def discardCounts(replicas, pat):
        counts = {}
        for r in replicas:
            counts[r.name] = countDiscarded(r, pat)
        return counts

    oldStashCount = slowReplica.spylog.count(
        TestReplica.stashOutsideWatermarks.__name__)
    oldDiscardCounts = discardCounts(
        [n.replicas[instId] for n in txnPoolNodeSet if n != slowNode],
        'achieved stable checkpoint')

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqsToSend,
                                        1)
    timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet))
    looper.run(
        eventually(checkNodeDataForEquality,
                   slowNode,
                   *[_ for _ in txnPoolNodeSet if _ != slowNode],
                   retryWait=1,
                   timeout=timeout))
    newStashCount = slowReplica.spylog.count(
        TestReplica.stashOutsideWatermarks.__name__)
    assert newStashCount > oldStashCount

    def chk():
        counts = discardCounts(
            [n.replicas[instId] for n in txnPoolNodeSet if n != slowNode],
            'achieved stable checkpoint')
        for nm, count in counts.items():
            assert count > oldDiscardCounts[nm]

    timeout = waits.expectedNodeToNodeMessageDeliveryTime() * \
        len(txnPoolNodeSet) + delay
    looper.run(eventually(chk, retryWait=1, timeout=timeout))
def test_discard_3PC_messages_for_already_ordered(looper, txnPoolNodeSet,
                                                  sdk_wallet_client, sdk_pool_handle):
    """
    Nodes discard any 3PC messages for already ordered 3PC keys
    (view_no, pp_seq_no). Delay all 3PC messages to a node so it cannot respond
    to them unless the other nodes order them, now when the slow node will get
    them it will respond but other nodes will not process them and discard them
    """
    slow_node = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)][-1]
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    delay = 20
    delay_3pc_messages([slow_node], 0, delay)
    delay_3pc_messages([slow_node], 1, delay)

    sent_batches = 3
    sdk_send_batches_of_random_and_check(looper,
                                         txnPoolNodeSet,
                                         sdk_pool_handle,
                                         sdk_wallet_client,
                                         num_reqs=2 * sent_batches,
                                         num_batches=sent_batches)

    # send_reqs_batches_and_get_suff_replies(looper, wallet1, client1,
    #                                        2 * sent_batches, sent_batches)

    def chk(node, inst_id, p_count, c_count):
        # A node will still record PREPRAREs even if more than n-f-1, till the
        # request is not ordered
        assert len(node.replicas[inst_id].prepares) >= p_count
        assert len(node.replicas[inst_id].commits) == c_count

    def count_discarded(inst_id, count):
        for node in other_nodes:
            assert countDiscarded(node.replicas[inst_id],
                                  'already ordered 3 phase message') == count

    # `slow_node` did not receive any PREPAREs or COMMITs
    chk(slow_node, 0, 0, 0)

    # `other_nodes` have not discarded any 3PC message
    count_discarded(0, 0)

    # `other_nodes` have not recorded any PREPAREs or COMMITs from `slow_node`
    chk_commits_prepares_recvd(0, other_nodes, slow_node)

    slow_node.reset_delays_and_process_delayeds()
    waitNodeDataEquality(looper, slow_node, *other_nodes)

    # `slow_node` did receive correct number of PREPAREs and COMMITs
    looper.run(eventually(chk, slow_node, 0, sent_batches - 1, sent_batches,
                          retryWait=1))

    # `other_nodes` have not recorded any PREPAREs or COMMITs from `slow_node`
    chk_commits_prepares_recvd(0, other_nodes, slow_node)

    # `other_nodes` have discarded PREPAREs and COMMITs all batches
    count_discarded(0, 2 * sent_batches)
Exemple #46
0
def test_slow_nodes_catchup_before_selecting_primary_in_new_view(
        tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client,
        one_node_added):
    """
    Delay 3PC messages to one node and view change messages to some others
    (including primary) so the node that does not receive enough 3PC messages is
    behind but learns of the view change quickly and starts catchup.
    Other nodes learn of the view change late and thus keep on processing
    requests
    """
    new_node = one_node_added
    nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)]
    primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node
    slow_node = nprs[-1]
    # nodes_slow_to_inst_chg = [primary_node] + nprs[:2]
    nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node]
    delay_3pc = 100
    delay_ic = 5

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)

    delay_3pc_messages([slow_node], 0, delay_3pc)

    for n in nodes_slow_to_inst_chg:
        n.nodeIbStasher.delay(icDelay(delay_ic))

    def start_count():
        return sum([
            1 for e in slow_node.ledgerManager.spylog.getAll(
                slow_node.ledgerManager.startCatchUpProcess.__name__)
            if e.params['ledgerId'] == DOMAIN_LEDGER_ID
        ])

    s = start_count()
    requests = sdk_send_random_requests(looper, sdk_pool_handle,
                                        sdk_wallet_client,
                                        10 * Max3PCBatchSize)

    ensure_view_change(looper,
                       nodes=txnPoolNodeSet,
                       exclude_from_check=nodes_slow_to_inst_chg)

    sdk_get_and_check_replies(looper, requests)

    waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1])

    e = start_count()
    assert e - s >= 2

    looper.run(eventually(checkViewNoForNodes, slow_node.viewNo))
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)

    waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg)
def test_slow_nodes_catchup_before_selecting_primary_in_new_view(
        tconf,
        looper,
        txnPoolNodeSet,
        sdk_pool_handle,
        sdk_wallet_client,
        one_node_added):
    """
    Delay 3PC messages to one node and view change messages to some others
    (including primary) so the node that does not receive enough 3PC messages is
    behind but learns of the view change quickly and starts catchup.
    Other nodes learn of the view change late and thus keep on processing
    requests
    """
    new_node = one_node_added
    nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)]
    primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node
    slow_node = nprs[-1]
    # nodes_slow_to_inst_chg = [primary_node] + nprs[:2]
    nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node]
    delay_3pc = 100
    delay_ic = 5

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)

    delay_3pc_messages([slow_node], 0, delay_3pc)

    for n in nodes_slow_to_inst_chg:
        n.nodeIbStasher.delay(icDelay(delay_ic))

    def start_count(): return sum([1 for e in slow_node.ledgerManager.spylog.getAll(
        slow_node.ledgerManager.startCatchUpProcess.__name__)
                                   if e.params['ledgerId'] == DOMAIN_LEDGER_ID])

    s = start_count()
    requests = sdk_send_random_requests(looper, sdk_pool_handle,
                                        sdk_wallet_client, 10 * Max3PCBatchSize)

    ensure_view_change(looper, nodes=txnPoolNodeSet,
                       exclude_from_check=nodes_slow_to_inst_chg)

    sdk_get_and_check_replies(looper, requests)

    waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1])

    e = start_count()
    assert e - s >= 2

    looper.run(eventually(checkViewNoForNodes, slow_node.viewNo))
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)

    waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg)
Exemple #48
0
def setup(nodeSet, up):
    primaryRep = getPrimaryReplica(nodeSet, 0)
    nonPrimaryReps = getNonPrimaryReplicas(nodeSet, 0)

    faultyRep = nonPrimaryReps[0]
    makeNodeFaulty(faultyRep.node, partial(send3PhaseMsgWithIncorrectDigest,
                                           msgType=Commit, instId=0))

    return adict(primaryRep=primaryRep, nonPrimaryReps=nonPrimaryReps,
                 faultyRep=faultyRep)
def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1):
    """
    Replicas should not accept PRE-PREPARE for view "v" and prepare sequence
    number "n" if it has already accepted a request with view number "v" and
    sequence number "n"

    """
    numOfNodes = 4
    fValue = getMaxFailures(numOfNodes)
    request1 = sendRandomRequest(wallet1, client1)
    result1 = looper.run(
        eventually(checkSufficientRepliesRecvd, client1.inBox,
                   request1.reqId, fValue,
                   retryWait=1, timeout=5))
    logger.debug("request {} gives result {}".format(request1, result1))
    primaryRepl = getPrimaryReplica(nodeSet)
    logger.debug("Primary Replica: {}".format(primaryRepl))
    logger.debug(
        "Decrementing the primary replica's pre-prepare sequence number by "
        "one...")
    primaryRepl.lastPrePrepareSeqNo -= 1
    request2 = sendRandomRequest(wallet1, client1)
    looper.run(eventually(checkPrePrepareReqSent, primaryRepl, request2,
                          retryWait=1, timeout=10))

    nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet)
    logger.debug("Non Primary Replicas: " + str(nonPrimaryReplicas))
    prePrepareReq = PrePrepare(
        primaryRepl.instId,
        primaryRepl.viewNo,
        primaryRepl.lastPrePrepareSeqNo,
        wallet1.defaultId,
        request2.reqId,
        request2.digest,
        time.time()
    )

    logger.debug("""Checking whether all the non primary replicas have received
                the pre-prepare request with same sequence number""")
    looper.run(eventually(checkPrePrepareReqRecvd,
                          nonPrimaryReplicas,
                          prePrepareReq,
                          retryWait=1,
                          timeout=10))
    logger.debug("""Check that none of the non primary replicas didn't send
    any prepare message "
                             in response to the pre-prepare message""")
    for npr in nonPrimaryReplicas:
        with pytest.raises(AssertionError):
            looper.run(eventually(checkPrepareReqSent,
                                  npr,
                                  wallet1.defaultId,
                                  request2.reqId,
                                  retryWait=1,
                                  timeout=10))
def test_cannot_restore_last_sent_pp_seq_no_if_replica_not_primary(
        txnPoolNodeSet, view_no_set, setup):

    replica = getNonPrimaryReplicas(txnPoolNodeSet, instId=1)[0]
    node = replica.node
    assert node.viewNo == 2

    can = node.last_sent_pp_store_helper._can_restore_last_sent_pp_seq_no(
        PrePrepareKey(inst_id=1, view_no=2, pp_seq_no=5))

    assert can is False
def test_view_change_done_delayed(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client):
    """
    A node is slow so is behind other nodes, after view change, it catches up
    but it also gets view change message as delayed, a node should start
    participating only when caught up and ViewChangeCone quorum received.
    """
    nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)]
    slow_node = nprs[-1]
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    delay_3pc = 10
    delay_vcd = 25
    delay_3pc_messages([slow_node], 0, delay_3pc)
    slow_node.nodeIbStasher.delay(vcd_delay(delay_vcd))

    def chk(node):
        assert node.view_changer.has_acceptable_view_change_quorum
        assert node.view_changer._primary_verified
        assert node.isParticipating
        assert None not in {r.isPrimary for r in node.replicas.values()}

    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                         sdk_wallet_client, 5 * 4, 4)

    ensure_view_change(looper, nodes=txnPoolNodeSet)

    # After view change, the slow node successfully completes catchup
    waitNodeDataEquality(looper, slow_node, *other_nodes)

    # Other nodes complete view change, select primary and participate
    for node in other_nodes:
        looper.run(eventually(chk, node, retryWait=1))

    # Since `ViewChangeCone` is delayed, slow_node is not able to select primary
    # and participate
    assert not slow_node.view_changer.has_acceptable_view_change_quorum
    assert not slow_node.view_changer._primary_verified
    assert not slow_node.isParticipating
    assert {r.isPrimary for r in slow_node.replicas.values()} == {None}

    # Send requests to make sure pool is functional
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)

    # Repair network
    slow_node.reset_delays_and_process_delayeds()

    # `slow_node` selects primary and participate
    looper.run(eventually(chk, slow_node, retryWait=1))

    # Processes requests received during lack of primary
    waitNodeDataEquality(looper, slow_node, *other_nodes)

    # Send more requests and compare data of all nodes
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def setup(request, looper, txnPoolNodeSet):
    slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[1].node
    fast_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    # Delay catchup reply so that the test gets time to make the check,
    # this delay is reset after the check
    slow_node.nodeIbStasher.delay(cr_delay(100))
    slow_node.nodeIbStasher.delay(pDelay(100, 0))
    slow_node.nodeIbStasher.delay(cDelay(100, 0))
    if request.param == 'all':
        slow_node.nodeIbStasher.delay(ppDelay(100, 0))
    return slow_node, fast_nodes
def setup(txnPoolNodeSet):
    primaryRep, nonPrimaryReps = getPrimaryReplica(txnPoolNodeSet, 0), \
                                 getNonPrimaryReplicas(txnPoolNodeSet, 0)

    # A non primary replica sends PREPARE messages with incorrect digest

    faultyRep = nonPrimaryReps[0]
    makeNodeFaulty(faultyRep.node, partial(send3PhaseMsgWithIncorrectDigest,
                                           msgType=Prepare, instId=0))

    return adict(primaryRep=primaryRep, nonPrimaryReps=nonPrimaryReps,
                 faultyRep=faultyRep)
def test_lag_size_for_catchup(
        looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet,
        sdk_pool_handle, sdk_wallet_client):
    """
    Verifies that if the stored own checkpoints have aligned bounds then
    the master replica lag which makes the node perform catch-up is
    Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1 quorumed stashed received
    checkpoints.
    """
    slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]

    # The master replica of the slow node stops to receive 3PC-messages
    slow_node.master_replica.threePhaseRouter.extend(
        (
            (PrePrepare, lambda *x, **y: None),
            (Prepare, lambda *x, **y: None),
            (Commit, lambda *x, **y: None),
        )
    )

    completed_catchups_before_reqs = get_number_of_completed_catchups(slow_node)

    # Send requests for the slow node's master replica to get
    # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP quorumed stashed checkpoints
    # from others
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP *
                                           reqs_for_checkpoint)

    # Give time for the slow node to catch up if it is going to do it
    looper.runFor(waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) +
                  waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))

    checkNodeDataForInequality(slow_node, *other_nodes)

    # Verify that the slow node has not caught up
    assert get_number_of_completed_catchups(slow_node) == completed_catchups_before_reqs

    # Send more requests for the slow node's master replica to reach
    # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1 quorumed stashed
    # checkpoints from others
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint)

    waitNodeDataEquality(looper, slow_node, *other_nodes)

    # Verify that the slow node has caught up
    assert get_number_of_completed_catchups(slow_node) > completed_catchups_before_reqs
Exemple #55
0
def one_replica_and_others_in_backup_instance(
        request, txnPoolNodeSet, view_change_done):

    # NOTICE: This parametrized fixture triggers view change as pre-condition

    backup_inst_id = 1

    primary = getPrimaryReplica(txnPoolNodeSet, backup_inst_id)
    non_primaries = getNonPrimaryReplicas(txnPoolNodeSet, backup_inst_id)

    if request.param == 'primary':
        return primary, non_primaries
    else:
        return non_primaries[0], [primary] + non_primaries[1:]
def setup(txnPoolNodeSet):
    primaryRep, nonPrimaryReps = getPrimaryReplica(txnPoolNodeSet, 0), \
                                 getNonPrimaryReplicas(txnPoolNodeSet, 0)

    # The primary replica would send 3 duplicate PRE-PREPARE requests to
    # non primary replicas
    makeNodeFaulty(primaryRep.node, partial(sendDuplicate3PhaseMsg,
                                            msgType=PrePrepare, count=3))

    # The node of the primary replica above should not be blacklisted by any
    # other node since we are simulating multiple PRE-PREPARE messages and
    # want to check for a particular suspicion

    return adict(primaryRep=primaryRep, nonPrimaryReps=nonPrimaryReps)
def testOrderingWhenPrePrepareNotReceived(looper, txnPoolNodeSet,
                                          sdk_wallet_client, sdk_pool_handle):
    """
    Send commits but delay pre-prepare and prepares such that enough
    commits are received, now the request should not be ordered until
    pre-prepare is received and ordering should just happen once,
    """
    delay = 10
    non_prim_reps = getNonPrimaryReplicas(txnPoolNodeSet, 0)

    slow_rep = non_prim_reps[0]
    slow_node = slow_rep.node
    slow_node.nodeIbStasher.delay(ppDelay(delay, 0))
    slow_node.nodeIbStasher.delay(pDelay(delay, 0))

    stash_pp = []
    stash_p = []
    orig_pp_method = slow_rep.processPrePrepare
    orig_p_method = slow_rep.processPrepare

    def patched_pp(self, msg, sender):
        stash_pp.append((msg, sender))

    def patched_p(self, msg, sender):
        stash_p.append((msg, sender))

    slow_rep.processPrePrepare = \
        types.MethodType(patched_pp, slow_rep)
    slow_rep.processPrepare = \
        types.MethodType(patched_p, slow_rep)

    def chk1():
        assert len(slow_rep.commitsWaitingForPrepare) > 0

    sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client)
    timeout = waits.expectedPrePrepareTime(len(txnPoolNodeSet)) + delay
    looper.run(eventually(chk1, retryWait=1, timeout=timeout))

    for m, s in stash_pp:
        orig_pp_method(m, s)

    for m, s in stash_p:
        orig_p_method(m, s)

    def chk2():
        assert len(slow_rep.commitsWaitingForPrepare) == 0
        assert slow_rep.spylog.count(slow_rep.doOrder.__name__) == 1

    timeout = waits.expectedOrderingTime(len(non_prim_reps) + 1) + 2 * delay
    looper.run(eventually(chk2, retryWait=1, timeout=timeout))
def setup(request, txnPoolNodeSet):
    # Test once when client request is received and once when not received

    # Choosing a faulty node which is primary in neither instance, this helps
    # in the that same PROPAGATEs are not requested again by the node
    faulty_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[1].node
    if request.param == 'client_requests':
        # Long delay in PROPAGATEs
        faulty_node.nodeIbStasher.delay(ppgDelay(90))
        return faulty_node, True
    if request.param == 'no_client_requests':
        # Long delay in PROPAGATEs
        faulty_node.nodeIbStasher.delay(ppgDelay(90))
        # Long delay in Client Requests
        faulty_node.clientIbStasher.delay(req_delay(90))
        return faulty_node, False
def break_backup_replica(txnPoolNodeSet):
    node = getNonPrimaryReplicas(txnPoolNodeSet, inst_id)[-1].node
    broken_replica = node.replicas[inst_id]
    non_broken_replica = node.replicas[0]

    def fakeProcessPrePrepare(pre_prepare, sender):
        logger.warning(
            "{} is broken. 'processPrePrepare' does nothing".format(broken_replica.name))

    broken_replica.threePhaseRouter.extend(
        (
            (PrePrepare, fakeProcessPrePrepare),
        )
    )

    return broken_replica, non_broken_replica
Exemple #60
0
def broken_node_and_others(txnPoolNodeSet):
    node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    other = [n for n in txnPoolNodeSet if n != node]

    def brokenSendToReplica(msg, frm):
        logger.warning(
            "{} is broken. 'sendToReplica' does nothing".format(node.name))

    node.nodeMsgRouter.extend(
        (
            (PrePrepare, brokenSendToReplica),
            (Prepare, brokenSendToReplica),
            (Commit, brokenSendToReplica),
            (Checkpoint, brokenSendToReplica),
        )
    )

    return node, other