Ejemplo n.º 1
0
def test_stashed_checkpoint_processing(chkFreqPatched, looper, txnPoolNodeSet,
                                       sdk_wallet_client, sdk_pool_handle):
    """
    One node in a pool of 5 nodes lags to order the last 3PC-batch in a
    checkpoint. By the moment when it eventually orders the 3PC-batch it has
    already received and stashed Checkpoint message from two node, so it
    processes these stashed messages on completing the checkpoint. After this
    it receives Checkpoint messages from two other nodes, processes them and
    stabilizes the checkpoint.
    """
    epsilon = txnPoolNodeSet[-1]

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 4)

    epsilon.nodeIbStasher.delay(cDelay())
    epsilon.nodeIbStasher.delay(chk_delay(sender_filter='Gamma'))
    epsilon.nodeIbStasher.delay(chk_delay(sender_filter='Delta'))

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)

    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.runFor(stabilization_timeout)

    for inst_id, replica in epsilon.replicas.items():
        check_stable_checkpoint(replica, 0)
        check_num_unstable_checkpoints(replica, 0)
        check_num_received_checkpoints(replica, 1)
        check_received_checkpoint_votes(replica, pp_seq_no=5, num_votes=2)

    epsilon.nodeIbStasher.reset_delays_and_process_delayeds(COMMIT)

    def check():
        for inst_id, replica in epsilon.replicas.items():
            check_stable_checkpoint(replica, 0)
            check_num_unstable_checkpoints(replica, 1)
            check_last_checkpoint(replica, 5)

            check_num_received_checkpoints(replica, 1)
            check_last_received_checkpoint(replica, 5)

    looper.run(
        eventually(check,
                   timeout=waits.expectedOrderingTime(len(txnPoolNodeSet))))

    epsilon.nodeIbStasher.reset_delays_and_process_delayeds(CHECKPOINT)

    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.runFor(stabilization_timeout)

    for inst_id, replica in epsilon.replicas.items():
        check_stable_checkpoint(replica, 5)
        check_num_unstable_checkpoints(replica, 0)
        check_num_received_checkpoints(replica, 0)
def test_watermarks_after_view_change(tdir, tconf,
                                      looper,
                                      txnPoolNodeSet,
                                      sdk_pool_handle,
                                      sdk_wallet_client):
    """
    Delay commit, checkpoint, InstanceChange and ViewChangeDone messages for lagging_node.
    Start ViewChange.
    Check that ViewChange finished.
    Reset delays.
    Check that lagging_node can order transactions and has same data with other nodes.
    """
    lagging_node = txnPoolNodeSet[-1]
    lagging_node.master_replica.config.LOG_SIZE = LOG_SIZE
    start_view_no = lagging_node.viewNo
    with delay_rules(lagging_node.nodeIbStasher, cDelay(), chk_delay(), icDelay(), nv_delay()):
        trigger_view_change(txnPoolNodeSet)
        waitForViewChange(looper,
                          txnPoolNodeSet[:-1],
                          expectedViewNo=start_view_no + 1,
                          customTimeout=waits.expectedPoolViewChangeStartedTimeout(len(txnPoolNodeSet)))
        ensure_all_nodes_have_same_data(looper, txnPoolNodeSet[:-1])
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, 6)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
Ejemplo n.º 3
0
def test_freeing_forwarded_not_preprepared_request(
        looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet,
        sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath):
    behind_node = txnPoolNodeSet[-1]
    behind_node.requests.clear()

    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet,
                                         sdk_pool_handle, sdk_wallet_steward,
                                         CHK_FREQ, CHK_FREQ)
    with delay_rules(
            behind_node.nodeIbStasher,
            chk_delay(delay=sys.maxsize,
                      instId=behind_node.replicas.values()[-1])):
        with delay_rules(behind_node.nodeIbStasher, ppDelay(delay=sys.maxsize),
                         pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize)):
            count = behind_node.spylog.count(behind_node.allLedgersCaughtUp)
            sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle,
                                       sdk_wallet_steward, req_num, req_num)
            looper.run(
                eventually(node_caughtup, behind_node, count, retryWait=1))
            looper.run(
                eventually(
                    lambda: assertExp(len(behind_node.requests) == req_num)))

    # We execute caughtup requests
    looper.run(
        eventually(lambda: assertExp(len(behind_node.requests) == req_num)))
    assert all(r.executed for r in behind_node.requests.values()
               if behind_node.seqNoDB.get(r.request.key)[1])
Ejemplo n.º 4
0
def test_backup_replica_resumes_ordering_on_lag_if_checkpoints_belate(
        looper, chkFreqPatched, reqs_for_checkpoint,
        one_replica_and_others_in_backup_instance, sdk_pool_handle,
        sdk_wallet_client, view_change_done):
    """
    Verifies resumption of ordering 3PC-batches on a backup replica
    on detection of a lag in checkpoints in case it is detected after
    some batch in the next checkpoint has already been committed but cannot
    be ordered out of turn
    """

    slow_replica, other_replicas = one_replica_and_others_in_backup_instance
    view_no = slow_replica.viewNo

    # Send a request and ensure that the replica orders the batch for it
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)

    looper.run(
        eventually(lambda *args: assertExp(slow_replica.last_ordered_3pc ==
                                           (view_no, 2)),
                   slow_replica,
                   retryWait=1,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))

    # Don't receive Commits from two replicas
    slow_replica.node.nodeIbStasher.delay(
        cDelay(instId=1, sender_filter=other_replicas[0].node.name))
    slow_replica.node.nodeIbStasher.delay(
        cDelay(instId=1, sender_filter=other_replicas[1].node.name))

    # Send a request for which the replica will not be able to order the batch
    # due to an insufficient count of Commits
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)
    looper.runFor(waits.expectedTransactionExecutionTime(nodeCount))

    # Receive further Commits from now on
    slow_replica.node.nodeIbStasher.drop_delayeds()
    slow_replica.node.nodeIbStasher.resetDelays()
    looper.run(
        eventually(lambda *args: assertExp(slow_replica.last_ordered_3pc ==
                                           (view_no, 2)),
                   slow_replica,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))

    # Send requests but in a quantity insufficient
    # for catch-up number of checkpoints
    sdk_send_random_requests(
        looper, sdk_pool_handle, sdk_wallet_client,
        Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP * reqs_for_checkpoint - 2)
    looper.runFor(waits.expectedTransactionExecutionTime(nodeCount))

    # Don't receive Checkpoints
    slow_replica.node.nodeIbStasher.delay(chk_delay(instId=1))

    # Send more requests to reach catch-up number of checkpoints
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                             reqs_for_checkpoint)
    # Send a request that starts a new checkpoint
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)
    looper.runFor(waits.expectedTransactionExecutionTime(nodeCount))

    # Ensure that the replica has not ordered any batches
    # after the very first one
    assert slow_replica.last_ordered_3pc == (view_no, 2)

    # Ensure that the watermarks have not been shifted since the view start
    assert slow_replica.h == 0
    assert slow_replica.H == LOG_SIZE

    # Ensure that there are some quorumed stashed checkpoints
    assert slow_replica.stashed_checkpoints_with_quorum()

    # Receive belated Checkpoints
    slow_replica.node.nodeIbStasher.reset_delays_and_process_delayeds()

    # Ensure that the replica has ordered the batch for the last sent request
    looper.run(
        eventually(lambda *args: assertExp(slow_replica.last_ordered_3pc == \
                        (view_no, (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + 2)),
                   slow_replica,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))

    # Ensure that the watermarks have been shifted so that the lower watermark
    # now equals to the end of the last stable checkpoint in the instance
    assert slow_replica.h == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP +
                              1) * CHK_FREQ
    assert slow_replica.H == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP +
                              1) * CHK_FREQ + LOG_SIZE

    # Ensure that now there are no quorumed stashed checkpoints
    assert not slow_replica.stashed_checkpoints_with_quorum()

    # Send a request and ensure that the replica orders the batch for it
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)

    looper.run(
        eventually(lambda: assertExp(slow_replica.last_ordered_3pc == (
            view_no,
            (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + 3)),
                   retryWait=1,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))
def test_stashed_checkpoint_processing(chkFreqPatched, looper, txnPoolNodeSet,
                                       sdk_wallet_client, sdk_pool_handle):
    """
    One node in a pool of 5 nodes lags to order the last 3PC-batch in a
    checkpoint. By the moment when it eventually orders the 3PC-batch it has
    already received and stashed Checkpoint message from two node, so it
    processes these stashed messages on completing the checkpoint. After this
    it receives Checkpoint messages from two other nodes, processes them and
    stabilizes the checkpoint.
    """
    epsilon = txnPoolNodeSet[-1]

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 4)

    epsilon.nodeIbStasher.delay(cDelay())
    epsilon.nodeIbStasher.delay(chk_delay(sender_filter='Gamma'))
    epsilon.nodeIbStasher.delay(chk_delay(sender_filter='Delta'))

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.runFor(stabilization_timeout)

    for inst_id, replica in epsilon.replicas.items():
        assert len(replica._checkpointer._checkpoint_state) == 1
        assert (1, 5) in replica._checkpointer._checkpoint_state
        assert replica._checkpointer._checkpoint_state[(1, 5)].seqNo == 4
        assert replica._checkpointer._checkpoint_state[(1, 5)].digest is None
        assert replica._checkpointer._checkpoint_state[(1, 5)].isStable is False

        assert len(replica._checkpointer._stashed_recvd_checkpoints) == 1
        assert 0 in replica._checkpointer._stashed_recvd_checkpoints
        assert len(replica._checkpointer._stashed_recvd_checkpoints[0]) == 1
        assert (1, 5) in replica._checkpointer._stashed_recvd_checkpoints[0]
        assert len(replica._checkpointer._stashed_recvd_checkpoints[0][(1, 5)]) == 2

    epsilon.nodeIbStasher.reset_delays_and_process_delayeds(COMMIT)

    def check():
        for inst_id, replica in epsilon.replicas.items():
            assert len(replica._checkpointer._checkpoint_state) == 1
            assert (1, 5) in replica._checkpointer._checkpoint_state
            assert replica._checkpointer._checkpoint_state[(1, 5)].seqNo == 5
            assert replica._checkpointer._checkpoint_state[(1, 5)].digest is not None
            assert replica._checkpointer._checkpoint_state[(1, 5)].isStable is False

            assert len(replica._checkpointer._stashed_recvd_checkpoints) == 0

    looper.run(eventually(check, timeout=waits.expectedOrderingTime(
        len(txnPoolNodeSet))))

    epsilon.nodeIbStasher.reset_delays_and_process_delayeds(CHECKPOINT)

    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.runFor(stabilization_timeout)

    for inst_id, replica in epsilon.replicas.items():
        assert len(replica._checkpointer._checkpoint_state) == 1
        assert (1, 5) in replica._checkpointer._checkpoint_state
        assert replica._checkpointer._checkpoint_state[(1, 5)].seqNo == 5
        assert replica._checkpointer._checkpoint_state[(1, 5)].digest is not None
        assert replica._checkpointer._checkpoint_state[(1, 5)].isStable is True

        assert len(replica._checkpointer._stashed_recvd_checkpoints) == 0
Ejemplo n.º 6
0
def test_stashed_messages_processed_on_backup_replica_ordering_resumption(
        looper, chkFreqPatched, reqs_for_checkpoint,
        one_replica_and_others_in_backup_instance,
        sdk_pool_handle, sdk_wallet_client, view_change_done,
        txnPoolNodeSet):
    """
    Verifies resumption of ordering 3PC-batches on a backup replica
    on detection of a lag in checkpoints in case it is detected after
    some 3PC-messages related to the next checkpoint have already been stashed
    as laying outside of the watermarks.
    Please note that to verify this case the config is set up so that
    LOG_SIZE == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ
    """
    global first_run
    batches_count = get_pp_seq_no(txnPoolNodeSet)

    slow_replica, other_replicas = one_replica_and_others_in_backup_instance
    view_no = slow_replica.viewNo
    low_watermark = slow_replica.h

    # Send a request and ensure that the replica orders the batch for it
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)
    batches_count += 1

    looper.run(
        eventually(lambda *args: assertExp(slow_replica.last_ordered_3pc == (view_no, batches_count)),
                   slow_replica,
                   retryWait=1,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))

    # Don't receive Commits from two replicas
    slow_replica.node.nodeIbStasher.delay(
        cDelay(instId=1, sender_filter=other_replicas[0].node.name))
    slow_replica.node.nodeIbStasher.delay(
        cDelay(instId=1, sender_filter=other_replicas[1].node.name))
    slow_replica.node.nodeIbStasher.delay(
        msg_rep_delay(types_to_delay=[COMMIT])
    )

    # Send a request for which the replica will not be able to order the batch
    # due to an insufficient count of Commits
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)
    looper.runFor(waits.expectedTransactionExecutionTime(nodeCount))

    # Receive further Commits from now on
    slow_replica.node.nodeIbStasher.drop_delayeds()
    slow_replica.node.nodeIbStasher.resetDelays()

    # Send requests but in a quantity insufficient
    # for catch-up number of checkpoints
    reqs_until_checkpoints = reqs_for_checkpoint - get_pp_seq_no([r.node for r in other_replicas]) % reqs_for_checkpoint
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                             Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP *
                             reqs_until_checkpoints)
    looper.runFor(waits.expectedTransactionExecutionTime(nodeCount))

    # Don't receive Checkpoints
    slow_replica.node.nodeIbStasher.delay(chk_delay(instId=1))

    # Send more requests to reach catch-up number of checkpoints
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                             reqs_for_checkpoint)
    looper.runFor(waits.expectedTransactionExecutionTime(nodeCount))

    # Ensure that there are no 3PC-messages stashed
    # as laying outside of the watermarks
    assert slow_replica.stasher.stash_size(STASH_WATERMARKS) == 0

    # Send a request for which the batch will be outside of the watermarks
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)
    looper.runFor(waits.expectedTransactionExecutionTime(nodeCount))

    # Ensure that the replica has not ordered any batches
    # after the very first one
    assert slow_replica.last_ordered_3pc == (view_no, batches_count)

    # Ensure that the watermarks have not been shifted since the view start
    assert slow_replica.h == low_watermark
    assert slow_replica.H == (sys.maxsize if first_run else low_watermark + LOG_SIZE)

    # Ensure that there are some quorumed stashed checkpoints
    check_num_quorumed_received_checkpoints(slow_replica, 1)

    # Ensure that now there are 3PC-messages stashed
    # as laying outside of the watermarks
    if not first_run:
        assert slow_replica.stasher.stash_size(STASH_WATERMARKS) == incoming_3pc_msgs_count(len(txnPoolNodeSet))

    # Receive belated Checkpoints
    slow_replica.node.nodeIbStasher.reset_delays_and_process_delayeds()
    batches_count = get_pp_seq_no([r.node for r in other_replicas])

    # Ensure that the replica has ordered the batch for the last sent request
    looper.run(
        eventually(lambda *args: assertExp(slow_replica.last_ordered_3pc ==
                                     (view_no, batches_count)),
                   slow_replica,
                   retryWait=1,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))

    # Ensure that the watermarks have been shifted so that the lower watermark
    # now equals to the end of the last stable checkpoint in the instance
    assert slow_replica.h == low_watermark + (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ
    assert slow_replica.H == low_watermark + (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + LOG_SIZE

    # Ensure that now there are no quorumed stashed checkpoints
    check_num_quorumed_received_checkpoints(slow_replica, 0)

    # Ensure that now there are no 3PC-messages stashed
    # as laying outside of the watermarks
    assert slow_replica.stasher.stash_size(STASH_WATERMARKS) == 0

    # Send a request and ensure that the replica orders the batch for it
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)
    batches_count += 1

    looper.run(
        eventually(lambda *args: assertExp(slow_replica.last_ordered_3pc ==
                                           (view_no, batches_count)),
                   slow_replica,
                   retryWait=1,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))
    first_run = False
def test_non_primary_recvs_3phase_message_outside_watermarks(
        chkFreqPatched, reqs_for_logsize, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    """
    A node is slow in receiving PRE-PREPAREs and PREPAREs. A lot of requests
    are sent and the slow node has started receiving COMMITs outside of its
    watermarks and so stashes them. Also this node is slow in receiving
    CHECKPOINTs. So a catch-up does not occur on it.

    Then the slow node eventually receives the sent PRE-PREPAREs and PREPAREs
    and so orders the 3PC-batches between its watermarks. The other nodes
    discard the COMMITs from the slow node since they have already achieved
    stable checkpoints for these COMMITs.

    After that the slow node eventually receives the sent CHECKPOINTs from
    the other nodes and so stabilizes own completed checkpoints and updates its
    watermarks. A catch-up is not triggered because no received checkpoints are
    stashed. Since now the watermarks have been updated, the slow node
    processes 3PC-messages stashed earlier and its ledger becomes equal to the
    ledgers of the other nodes.
    """
    backupInstId = 1
    npr = getNonPrimaryReplicas(txnPoolNodeSet, backupInstId)

    slowReplica = npr[0]
    slowNode = slowReplica.node

    slowNode.nodeIbStasher.delay(ppDelay(300, backupInstId))
    slowNode.nodeIbStasher.delay(pDelay(300, backupInstId))
    slowNode.nodeIbStasher.delay(chk_delay(300))

    initialDomainLedgerSize = slowNode.domainLedger.size
    oldStashCount = slowReplica.stasher.num_stashed_watermarks
    slowReplica.H = LOG_SIZE
    # 1. Send requests more than fit between the watermarks on the slow node
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_logsize + 2)

    # Verify that the slow node stashes the batches outside of its watermarks
    newStashCount = slowReplica.stasher.num_stashed_watermarks
    assert newStashCount > oldStashCount

    oldDiscardCounts = discardCounts([n.replicas[backupInstId] for n in txnPoolNodeSet if n != slowNode],
                                     'achieved stable checkpoint')

    # 2. Deliver the sent PREPREPAREs and PREPAREs to the slow node
    slowNode.nodeIbStasher.reset_delays_and_process_delayeds(PREPREPARE, PREPARE)

    # Verify that the slow node orders the 3PC-batches between its watermarks
    # but no more.
    looper.runFor(waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)))

    checkNodeDataForInequality(slowNode, *[n for n in txnPoolNodeSet if n != slowNode])
    assert slowNode.domainLedger.size - initialDomainLedgerSize == reqs_for_logsize

    # Also verify that the other nodes discard the COMMITs from the slow node
    # since they have already achieved stable checkpoints for these COMMITs.
    counts = discardCounts(
        [n.replicas[backupInstId] for n in txnPoolNodeSet if n != slowNode],
        'achieved stable checkpoint')
    for nm, count in counts.items():
        assert count > oldDiscardCounts[nm]

    oldCatchupTimes = slowNode.spylog.count(Node.start_catchup)

    # 3. Deliver the sent CHECKPOINTs to the slow node
    slowNode.nodeIbStasher.reset_delays_and_process_delayeds(CHECKPOINT)

    # Verify that the slow node processes 3PC-messages stashed earlier and its
    # ledger becomes equal to the ledgers of the other nodes while a catch-up
    # is not made.
    waitNodeDataEquality(looper, slowNode, *[n for n in txnPoolNodeSet if n != slowNode])
    assert slowNode.domainLedger.size - initialDomainLedgerSize == reqs_for_logsize + 2
    newCatchupTimes = slowNode.spylog.count(Node.start_catchup)
    assert newCatchupTimes == oldCatchupTimes
def test_stashed_messages_processed_on_backup_replica_ordering_resumption(
        looper, chkFreqPatched, reqs_for_checkpoint,
        one_replica_and_others_in_backup_instance,
        sdk_pool_handle, sdk_wallet_client, view_change_done):
    """
    Verifies resumption of ordering 3PC-batches on a backup replica
    on detection of a lag in checkpoints in case it is detected after
    some 3PC-messages related to the next checkpoint have already been stashed
    as laying outside of the watermarks.
    Please note that to verify this case the config is set up so that
    LOG_SIZE == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ
    """

    slow_replica, other_replicas = one_replica_and_others_in_backup_instance
    view_no = slow_replica.viewNo

    # Send a request and ensure that the replica orders the batch for it
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)

    looper.run(
        eventually(lambda: assertExp(slow_replica.last_ordered_3pc == (view_no, 1)),
                   retryWait=1,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))

    # Don't receive Commits from two replicas
    slow_replica.node.nodeIbStasher.delay(
        cDelay(instId=1, sender_filter=other_replicas[0].node.name))
    slow_replica.node.nodeIbStasher.delay(
        cDelay(instId=1, sender_filter=other_replicas[1].node.name))

    # Send a request for which the replica will not be able to order the batch
    # due to an insufficient count of Commits
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)
    looper.runFor(waits.expectedTransactionExecutionTime(nodeCount))

    # Receive further Commits from now on
    slow_replica.node.nodeIbStasher.drop_delayeds()
    slow_replica.node.nodeIbStasher.resetDelays()

    # Send requests but in a quantity insufficient
    # for catch-up number of checkpoints
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                             Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP *
                             reqs_for_checkpoint - 2)
    looper.runFor(waits.expectedTransactionExecutionTime(nodeCount))

    # Don't receive Checkpoints
    slow_replica.node.nodeIbStasher.delay(chk_delay(instId=1))

    # Send more requests to reach catch-up number of checkpoints
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                             reqs_for_checkpoint)
    looper.runFor(waits.expectedTransactionExecutionTime(nodeCount))

    # Ensure that there are no 3PC-messages stashed
    # as laying outside of the watermarks
    assert not slow_replica.stashingWhileOutsideWaterMarks

    # Send a request for which the batch will be outside of the watermarks
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)
    looper.runFor(waits.expectedTransactionExecutionTime(nodeCount))

    # Ensure that the replica has not ordered any batches
    # after the very first one
    assert slow_replica.last_ordered_3pc == (view_no, 1)

    # Ensure that the watermarks have not been shifted since the view start
    assert slow_replica.h == 0
    assert slow_replica.H == LOG_SIZE

    # Ensure that there are some quorumed stashed checkpoints
    assert slow_replica.stashed_checkpoints_with_quorum()

    # Ensure that now there are 3PC-messages stashed
    # as laying outside of the watermarks
    assert slow_replica.stashingWhileOutsideWaterMarks

    # Receive belated Checkpoints
    slow_replica.node.nodeIbStasher.reset_delays_and_process_delayeds()

    # Ensure that the replica has ordered the batch for the last sent request
    looper.run(
        eventually(lambda: assertExp(slow_replica.last_ordered_3pc ==
                                     (view_no, (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + 1)),
                   retryWait=1,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))

    # Ensure that the watermarks have been shifted so that the lower watermark
    # now equals to the end of the last stable checkpoint in the instance
    assert slow_replica.h == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ
    assert slow_replica.H == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + LOG_SIZE

    # Ensure that now there are no quorumed stashed checkpoints
    assert not slow_replica.stashed_checkpoints_with_quorum()

    # Ensure that now there are no 3PC-messages stashed
    # as laying outside of the watermarks
    assert not slow_replica.stashingWhileOutsideWaterMarks

    # Send a request and ensure that the replica orders the batch for it
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)

    looper.run(
        eventually(lambda: assertExp(slow_replica.last_ordered_3pc ==
                                     (view_no, (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + 2)),
                   retryWait=1,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))
def test_non_primary_recvs_3phase_message_outside_watermarks(
        chkFreqPatched, reqs_for_logsize, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    """
    A node is slow in receiving PRE-PREPAREs and PREPAREs. A lot of requests
    are sent and the slow node has started receiving COMMITs outside of its
    watermarks and so stashes them. Also this node is slow in receiving
    CHECKPOINTs. So a catch-up does not occur on it.

    Then the slow node eventually receives the sent PRE-PREPAREs and PREPAREs
    and so orders the 3PC-batches between its watermarks. The other nodes
    discard the COMMITs from the slow node since they have already achieved
    stable checkpoints for these COMMITs.

    After that the slow node eventually receives the sent CHECKPOINTs from
    the other nodes and so stabilizes own completed checkpoints and updates its
    watermarks. A catch-up is not triggered because no received checkpoints are
    stashed. Since now the watermarks have been updated, the slow node
    processes 3PC-messages stashed earlier and its ledger becomes equal to the
    ledgers of the other nodes.
    """
    backupInstId = 1
    npr = getNonPrimaryReplicas(txnPoolNodeSet, backupInstId)

    slowReplica = npr[0]
    slowNode = slowReplica.node

    slowNode.nodeIbStasher.delay(ppDelay(300, backupInstId))
    slowNode.nodeIbStasher.delay(pDelay(300, backupInstId))
    slowNode.nodeIbStasher.delay(chk_delay(300))

    initialDomainLedgerSize = slowNode.domainLedger.size
    oldStashCount = slowReplica.spylog.count(TestReplica.stashOutsideWatermarks.__name__)

    # 1. Send requests more than fit between the watermarks on the slow node
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_logsize + 2)

    # Verify that the slow node stashes the batches outside of its watermarks
    newStashCount = slowReplica.spylog.count(TestReplica.stashOutsideWatermarks.__name__)
    assert newStashCount > oldStashCount

    oldDiscardCounts = discardCounts([n.replicas[backupInstId] for n in txnPoolNodeSet if n != slowNode],
                                     'achieved stable checkpoint')

    # 2. Deliver the sent PREPREPAREs and PREPAREs to the slow node
    slowNode.nodeIbStasher.reset_delays_and_process_delayeds(PREPREPARE, PREPARE)

    # Verify that the slow node orders the 3PC-batches between its watermarks
    # but no more.
    looper.runFor(waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)))

    checkNodeDataForInequality(slowNode, *[n for n in txnPoolNodeSet if n != slowNode])
    assert slowNode.domainLedger.size - initialDomainLedgerSize == reqs_for_logsize

    # Also verify that the other nodes discard the COMMITs from the slow node
    # since they have already achieved stable checkpoints for these COMMITs.
    counts = discardCounts(
        [n.replicas[backupInstId] for n in txnPoolNodeSet if n != slowNode],
        'achieved stable checkpoint')
    for nm, count in counts.items():
        assert count > oldDiscardCounts[nm]

    oldCatchupTimes = slowNode.spylog.count(Node.start_catchup)

    # 3. Deliver the sent CHECKPOINTs to the slow node
    slowNode.nodeIbStasher.reset_delays_and_process_delayeds(CHECKPOINT)

    # Verify that the slow node processes 3PC-messages stashed earlier and its
    # ledger becomes equal to the ledgers of the other nodes while a catch-up
    # is not made.
    waitNodeDataEquality(looper, slowNode, *[n for n in txnPoolNodeSet if n != slowNode])
    assert slowNode.domainLedger.size - initialDomainLedgerSize == reqs_for_logsize + 2
    newCatchupTimes = slowNode.spylog.count(Node.start_catchup)
    assert newCatchupTimes == oldCatchupTimes
Ejemplo n.º 10
0
def test_process_three_phase_msg_and_stashed_for_next_checkpoint(
        txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client,
        chkFreqPatched):
    """
    1. Delay checkpoints processing on the slow_node. That is checkpoint on this node
    can't be finalized.
    2. Order requests for finalize checkpoints.
    3. Check that a checkpoint is finalized on all nodes exclude the slow_node.
    4. Order a new request.
    5. Check that slow_node could not order this request and stashed all 3pc messages.
    6. Reset delays.
    7. Check that the last request is ordered on the slow_node, checkpoint is finalized
    and stashed messages were removed.
    """

    for n in txnPoolNodeSet:
        for r in n.replicas.values():
            r.update_watermark_from_3pc()

    slow_node = txnPoolNodeSet[-1]
    fast_nodes = txnPoolNodeSet[:-1]

    old_stashed = {
        inst_id: r.stasher.num_stashed_watermarks
        for inst_id, r in slow_node.replicas.items()
    }
    last_ordered = {
        inst_id: r.last_ordered_3pc
        for inst_id, r in slow_node.replicas.items()
    }
    with delay_rules([
            slow_node.nodeIbStasher,
    ], msg_rep_delay(types_to_delay=[PREPREPARE, PREPARE, COMMIT])):
        with delay_rules([
                slow_node.nodeIbStasher,
        ], chk_delay()):
            sdk_send_batches_of_random_and_check(looper,
                                                 txnPoolNodeSet,
                                                 sdk_pool_handle,
                                                 sdk_wallet_client,
                                                 num_reqs=1 * CHK_FREQ,
                                                 num_batches=CHK_FREQ)
            ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
            looper.run(
                eventually(_check_checkpoint_finalize, fast_nodes, 1,
                           CHK_FREQ))
            sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                      sdk_wallet_client, 1)

            stashed_messages = incoming_3pc_msgs_count(len(txnPoolNodeSet))
            assert all(
                r.stasher.num_stashed_watermarks == old_stashed[inst_id] +
                stashed_messages for inst_id, r in slow_node.replicas.items())

            _check_batches_ordered(slow_node, last_ordered, CHK_FREQ)
            for n in fast_nodes:
                _check_batches_ordered(n, last_ordered, CHK_FREQ + 1)

        looper.run(
            eventually(_check_checkpoint_finalize, [
                slow_node,
            ], 1, CHK_FREQ))
        looper.run(
            eventually(_check_batches_ordered, slow_node, last_ordered,
                       CHK_FREQ + 1))
        assert all(r.stasher.num_stashed_watermarks == old_stashed[inst_id]
                   for inst_id, r in slow_node.replicas.items())
def test_stashed_checkpoint_processing(chkFreqPatched, looper, txnPoolNodeSet,
                                       sdk_wallet_client, sdk_pool_handle):
    """
    One node in a pool of 5 nodes lags to order the last 3PC-batch in a
    checkpoint. By the moment when it eventually orders the 3PC-batch it has
    already received and stashed Checkpoint message from two node, so it
    processes these stashed messages on completing the checkpoint. After this
    it receives Checkpoint messages from two other nodes, processes them and
    stabilizes the checkpoint.
    """
    epsilon = txnPoolNodeSet[-1]

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 4)

    epsilon.nodeIbStasher.delay(cDelay())
    epsilon.nodeIbStasher.delay(chk_delay(sender_filter='Gamma'))
    epsilon.nodeIbStasher.delay(chk_delay(sender_filter='Delta'))

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.runFor(stabilization_timeout)

    for inst_id, replica in epsilon.replicas.items():
        assert len(replica.checkpoints) == 1
        assert (1, 5) in replica.checkpoints
        assert replica.checkpoints[(1, 5)].seqNo == 4
        assert replica.checkpoints[(1, 5)].digest is None
        assert replica.checkpoints[(1, 5)].isStable is False

        assert len(replica.stashedRecvdCheckpoints) == 1
        assert 0 in replica.stashedRecvdCheckpoints
        assert len(replica.stashedRecvdCheckpoints[0]) == 1
        assert (1, 5) in replica.stashedRecvdCheckpoints[0]
        assert len(replica.stashedRecvdCheckpoints[0][(1, 5)]) == 2

    epsilon.nodeIbStasher.reset_delays_and_process_delayeds(COMMIT)

    def check():
        for inst_id, replica in epsilon.replicas.items():
            assert len(replica.checkpoints) == 1
            assert (1, 5) in replica.checkpoints
            assert replica.checkpoints[(1, 5)].seqNo == 5
            assert replica.checkpoints[(1, 5)].digest is not None
            assert replica.checkpoints[(1, 5)].isStable is False

            assert len(replica.stashedRecvdCheckpoints) == 0

    looper.run(eventually(check, timeout=waits.expectedOrderingTime(
        len(txnPoolNodeSet))))

    epsilon.nodeIbStasher.reset_delays_and_process_delayeds(CHECKPOINT)

    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.runFor(stabilization_timeout)

    for inst_id, replica in epsilon.replicas.items():
        assert len(replica.checkpoints) == 1
        assert (1, 5) in replica.checkpoints
        assert replica.checkpoints[(1, 5)].seqNo == 5
        assert replica.checkpoints[(1, 5)].digest is not None
        assert replica.checkpoints[(1, 5)].isStable is True

        assert len(replica.stashedRecvdCheckpoints) == 0