def test_catchup_from_unequal_nodes_without_waiting(looper,
                                                    txnPoolNodeSet,
                                                    sdk_pool_handle,
                                                    sdk_wallet_client):
    normal_node = txnPoolNodeSet[0]
    lagging_node_1 = txnPoolNodeSet[1]
    lagging_node_2 = txnPoolNodeSet[2]
    stopped_node = txnPoolNodeSet[3]

    # Make sure everyone have one batch
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)

    # Wait until all nodes have same data and store last 3PC number of node that's going to be "stopped"
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=30)
    last_3pc = stopped_node.master_last_ordered_3PC

    with delay_rules_without_processing(stopped_node.nodeIbStasher, delay_3pc()):
        # Create one more batch on all nodes except "stopped" node
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)

        with delay_rules(lagging_node_1.nodeIbStasher, delay_3pc(msgs=Commit)):
            # Create one more batch on all nodes except "stopped" and first lagging node
            sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)

            with delay_rules(lagging_node_2.nodeIbStasher, delay_3pc(msgs=Commit)):
                # Create one more batch on all nodes except "stopped" and both lagging nodes
                # This time we can't wait for replies because there will be only one
                reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)

                # Wait until normal node orders txn
                looper.run(eventually(lambda: assert_eq(normal_node.master_last_ordered_3PC[1],
                                                        last_3pc[1] + 3)))

                # Now all nodes have different number of txns, so if we try to start a catch up
                # it is guaranteed that we'll need to ask for equal consistency proofs, and
                # disabled timeout ensures that node can do so without relying on timeout
                stopped_node.start_catchup()

                # Wait until catchup ends
                looper.run(eventually(lambda: assert_eq(stopped_node.ledgerManager._node_leecher._state,
                                                        NodeLeecherService.State.Idle)))

                # Ensure stopped node caught up at least one batch
                assert stopped_node.master_last_ordered_3PC[1] > last_3pc[1]

                # And there was no view change
                assert stopped_node.master_last_ordered_3PC[0] == last_3pc[0]

            # Make sure replies from last request are eventually received
            sdk_get_and_check_replies(looper, reqs)
Beispiel #2
0
def test_catchup_with_all_nodes_sending_cons_proofs_dead(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, logsearch):
    lagging_node = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]

    start_delaying(lagging_node.nodeIbStasher, delay_3pc())
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 10)

    log_re_ask, _ = logsearch(
        msgs=['requesting .* missing transactions after timeout'])
    old_re_ask_count = len(log_re_ask)

    catchup_reqs = {
        node.name: start_delaying(node.nodeIbStasher, cqDelay())
        for node in other_nodes
    }
    audit_catchup_service = lagging_node.ledgerManager._node_leecher._leechers[
        AUDIT_LEDGER_ID]._catchup_rep_service
    lagging_node.start_catchup()
    looper.run(
        eventually(lambda: assert_eq(audit_catchup_service._is_working, True)))

    # Make sure number of cons proofs gathered when all nodes are
    assert len(audit_catchup_service._nodes_ledger_sizes) == 3

    # Allow catchup requests only from nodes that didn't respond first
    for node_id, node_reqs in catchup_reqs.items():
        if node_id not in audit_catchup_service._nodes_ledger_sizes:
            stop_delaying_and_process(node_reqs)

    # Check catchup finishes successfully, and there were reasks
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    assert len(log_re_ask) - old_re_ask_count > 0
def test_catchup_uses_only_nodes_with_cons_proofs(looper,
                                                  txnPoolNodeSet,
                                                  sdk_pool_handle,
                                                  sdk_wallet_client):
    lagging_node = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]

    start_delaying(lagging_node.nodeIbStasher, delay_3pc())
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10)

    catchup_reqs = {node.name: start_delaying(node.nodeIbStasher, cqDelay())
                    for node in other_nodes}
    audit_catchup_service = lagging_node.ledgerManager._node_leecher._leechers[AUDIT_LEDGER_ID]._catchup_rep_service
    lagging_node.start_catchup()
    looper.run(eventually(lambda: assert_eq(audit_catchup_service._is_working, True)))

    # Make sure number of cons proofs gathered when all nodes are
    assert len(audit_catchup_service._nodes_ledger_sizes) == 3

    # Allow catchup requests only for interesting nodes
    for node_id in audit_catchup_service._nodes_ledger_sizes.keys():
        stop_delaying_and_process(catchup_reqs[node_id])

    # Check catchup finishes successfully
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=30)
def test_re_order_pre_prepares(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle):
    # 0. use new 3PC validator
    for n in txnPoolNodeSet:
        ordering_service = n.master_replica._ordering_service
        ordering_service._validator = OrderingServiceMsgValidator(
            ordering_service._data)

    # 1. drop Prepares and Commits on 4thNode
    # Order a couple of requests on Nodes 1-3
    lagging_node = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]
    with delay_rules_without_processing(lagging_node.nodeIbStasher, cDelay(),
                                        pDelay()):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, 1)
        assert all(n.master_last_ordered_3PC == (0, 1) for n in other_nodes)

    # 2. simulate view change start so that
    # all PrePrepares/Prepares/Commits are cleared
    # and uncommitted txns are reverted
    for n in txnPoolNodeSet:
        n.internal_bus.send(ViewChangeStarted(view_no=1))
        master_ordering_service = n.master_replica._ordering_service
        assert not master_ordering_service.prePrepares
        assert not master_ordering_service.prepares
        assert not master_ordering_service.commits
        assert master_ordering_service.old_view_preprepares
        ledger = n.db_manager.ledgers[DOMAIN_LEDGER_ID]
        state = n.db_manager.states[DOMAIN_LEDGER_ID]
        assert len(ledger.uncommittedTxns) == 0
        assert ledger.uncommitted_root_hash == ledger.tree.root_hash
        assert state.committedHead == state.head

    # 3. Simulate View Change finish to re-order the same PrePrepare
    assert lagging_node.master_last_ordered_3PC == (0, 0)
    new_master = txnPoolNodeSet[1]
    batches = [
        preprepare_to_batch_id(pp) for _, pp in new_master.master_replica.
        _ordering_service.old_view_preprepares.items()
    ]
    new_view_msg = NewViewCheckpointsApplied(view_no=0,
                                             view_changes=[],
                                             checkpoint=None,
                                             batches=batches)
    for n in txnPoolNodeSet:
        n.master_replica._ordering_service._bus.send(new_view_msg)

    # 4. Make sure that the nodes 1-3 (that already ordered the requests) sent Prepares and Commits so that
    # the request was eventually ordered on Node4 as well
    looper.run(
        eventually(lambda: assert_eq(lagging_node.master_last_ordered_3PC,
                                     (0, 1))))
def test_catchup_during_3pc(tconf, looper, txnPoolNodeSet, sdk_wallet_client,
                            sdk_pool_handle):
    '''
    1) Send 1 3PC batch + 2 reqs
    2) Delay commits on one node
    3) Make sure the batch is ordered on all nodes except the lagged one
    4) start catchup of the lagged node
    5) Make sure that all nodes are equal
    6) Send more requests that we have 3 batches in total
    7) Make sure that all nodes are equal
    '''

    lagging_node = txnPoolNodeSet[-1]
    rest_nodes = txnPoolNodeSet[:-1]

    with delay_rules(lagging_node.nodeIbStasher, cDelay()):
        sdk_reqs = sdk_send_random_requests(looper, sdk_pool_handle,
                                            sdk_wallet_client,
                                            tconf.Max3PCBatchSize + 2)

        looper.run(
            eventually(check_last_ordered_3pc_on_master, rest_nodes, (0, 1)))

        lagging_node.start_catchup()

        looper.run(
            eventually(
                lambda: assert_eq(lagging_node.mode, Mode.participating),
                retryWait=1,
                timeout=waits.expectedPoolCatchupTime(len(txnPoolNodeSet))))

        waitNodeDataEquality(looper, *txnPoolNodeSet, customTimeout=5)

    sdk_get_replies(looper, sdk_reqs)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * tconf.Max3PCBatchSize - 2)

    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
Beispiel #6
0
def test_backup_replica_resumes_ordering_on_lag_in_checkpoints(
        looper, chkFreqPatched, reqs_for_checkpoint,
        one_replica_and_others_in_backup_instance, sdk_pool_handle,
        sdk_wallet_client, view_change_done, txnPoolNodeSet):
    """
    Verifies resumption of ordering 3PC-batches on a backup replica
    on detection of a lag in checkpoints
    """

    slow_replica, other_replicas = one_replica_and_others_in_backup_instance
    view_no = slow_replica.viewNo

    # Send a request and ensure that the replica orders the batch for it
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)

    looper.run(
        eventually(lambda: assert_eq(slow_replica.last_ordered_3pc,
                                     (view_no, 2)),
                   retryWait=1,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))

    # Don't receive Commits from two replicas
    slow_replica.node.nodeIbStasher.delay(
        cDelay(instId=1, sender_filter=other_replicas[0].node.name))
    slow_replica.node.nodeIbStasher.delay(
        cDelay(instId=1, sender_filter=other_replicas[1].node.name))

    # Send a request for which the replica will not be able to order the batch
    # due to an insufficient count of Commits
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)
    looper.runFor(waits.expectedTransactionExecutionTime(nodeCount))

    # Recover reception of Commits
    slow_replica.node.nodeIbStasher.drop_delayeds()
    slow_replica.node.nodeIbStasher.resetDelays()

    # Send requests but in a quantity insufficient
    # for catch-up number of checkpoints
    sdk_send_random_requests(
        looper, sdk_pool_handle, sdk_wallet_client,
        Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP * reqs_for_checkpoint - 3)
    looper.runFor(waits.expectedTransactionExecutionTime(nodeCount))

    # Ensure that the replica has not ordered any batches
    # after the very first one
    assert slow_replica.last_ordered_3pc == (view_no, 2)

    # Ensure that the watermarks have not been shifted since the view start
    assert slow_replica.h == 0
    assert slow_replica.H == LOG_SIZE

    # Ensure that the collections related to requests, batches and
    # own checkpoints are not empty.
    # (Note that a primary replica removes requests from requestQueues
    # when creating a batch with them.)
    if slow_replica.isPrimary:
        assert slow_replica._ordering_service.sentPrePrepares
    else:
        assert slow_replica._ordering_service.requestQueues[DOMAIN_LEDGER_ID]
        assert slow_replica._ordering_service.prePrepares
    assert slow_replica._ordering_service.prepares
    assert slow_replica._ordering_service.commits
    assert slow_replica._ordering_service.batches
    assert slow_replica._checkpointer._checkpoint_state

    # Ensure that there are some quorumed stashed checkpoints
    assert slow_replica._checkpointer._stashed_checkpoints_with_quorum()

    # Send more requests to reach catch-up number of checkpoints
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, reqs_for_checkpoint)

    # Ensure that the replica has adjusted last_ordered_3pc to the end
    # of the last checkpoint
    looper.run(
        eventually(lambda *args: assertExp(slow_replica.last_ordered_3pc == \
                        (view_no, (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ)),
                   slow_replica,
                   retryWait=1,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))

    # Ensure that the watermarks have been shifted so that the lower watermark
    # has the same value as last_ordered_3pc
    assert slow_replica.h == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP +
                              1) * CHK_FREQ
    assert slow_replica.H == (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP +
                              1) * CHK_FREQ + LOG_SIZE

    # Ensure that the collections related to requests, batches and
    # own checkpoints have been cleared
    assert not slow_replica._ordering_service.requestQueues[DOMAIN_LEDGER_ID]
    assert not slow_replica._ordering_service.sentPrePrepares
    assert not slow_replica._ordering_service.prePrepares
    assert not slow_replica._ordering_service.prepares
    assert not slow_replica._ordering_service.commits
    assert not slow_replica._ordering_service.batches
    assert not slow_replica._checkpointer._checkpoint_state

    # Ensure that now there are no quorumed stashed checkpoints
    assert not slow_replica._checkpointer._stashed_checkpoints_with_quorum()

    # Send a request and ensure that the replica orders the batch for it
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)

    looper.run(
        eventually(lambda *args: assertExp(slow_replica.last_ordered_3pc == (
            view_no,
            (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + 1)),
                   slow_replica,
                   retryWait=1,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))