Ejemplo n.º 1
0
def test_checkpoints_removed_on_backup_replica_after_catchup_during_view_change(
        chkFreqPatched, txnPoolNodeSet, view_change_in_progress,
        clear_checkpoints):

    backup_replicas = getAllReplicas(txnPoolNodeSet, 1)
    replica = backup_replicas[-1]
    others = backup_replicas[:-1]
    node = replica.node

    node.master_replica.last_ordered_3pc = (2, 12)

    replica._checkpointer._mark_checkpoint_stable(10)
    replica._checkpointer._received_checkpoints[cp_key(
        2, 15)] = [r.name for r in others]
    replica._checkpointer._received_checkpoints[cp_key(
        2, 20)] = [r.name for r in others]
    replica._checkpointer._received_checkpoints[cp_key(2,
                                                       25)] = [others[0].name]

    # Simulate catch-up completion
    node.ledgerManager.last_caught_up_3PC = (2, 20)
    audit_ledger = node.getLedger(AUDIT_LEDGER_ID)
    txn_with_last_seq_no = {
        'txn': {
            'data': {
                AUDIT_TXN_VIEW_NO: 2,
                AUDIT_TXN_PP_SEQ_NO: 20,
                AUDIT_TXN_PRIMARIES: ['Gamma', 'Delta']
            }
        }
    }
    audit_ledger.get_last_committed_txn = lambda *args: txn_with_last_seq_no
    node.allLedgersCaughtUp()

    check_num_received_checkpoints(replica, 0)
Ejemplo n.º 2
0
def test_received_checkpoints_removed_on_backup_primary_replica_after_catchup(
        chkFreqPatched, txnPoolNodeSet, view_setup, clear_checkpoints):

    replica = getPrimaryReplica(txnPoolNodeSet, 1)
    others = set(getAllReplicas(txnPoolNodeSet, 1)) - {replica}
    node = replica.node

    node.master_replica.last_ordered_3pc = (2, 12)

    replica._consensus_data.stable_checkpoint = 15
    replica._checkpointer._received_checkpoints[cp_key(
        2, 20)] = [next(iter(others)).name]

    # Simulate catch-up completion
    node.ledgerManager.last_caught_up_3PC = (2, 20)
    audit_ledger = node.getLedger(AUDIT_LEDGER_ID)
    txn_with_last_seq_no = {
        'txn': {
            'data': {
                AUDIT_TXN_VIEW_NO: 2,
                AUDIT_TXN_PP_SEQ_NO: 20,
                AUDIT_TXN_PRIMARIES: ['Gamma', 'Delta']
            }
        }
    }
    audit_ledger.get_last_committed_txn = lambda *args: txn_with_last_seq_no
    node.write_manager.node_reg_handler.on_catchup_finished = lambda *args: None
    node.allLedgersCaughtUp()

    check_num_received_checkpoints(replica, 1)
    check_last_received_checkpoint(replica, 20, view_no=2)
def test_checkpoints_removed_on_master_non_primary_replica_after_catchup(
        chkFreqPatched, txnPoolNodeSet, view_setup, clear_checkpoints):

    replica = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1]
    others = set(getAllReplicas(txnPoolNodeSet, 0)) - {replica}
    node = replica.node

    node.master_replica.last_ordered_3pc = (2, 12)

    replica._checkpointer._mark_checkpoint_stable(10)
    replica._checkpointer._received_checkpoints[cp_key(2, 15)] = [r.name for r in others]
    replica._checkpointer._received_checkpoints[cp_key(2, 20)] = [r.name for r in others]
    replica._checkpointer._received_checkpoints[cp_key(2, 25)] = [next(iter(others)).name]

    # Simulate catch-up completion
    node.ledgerManager.last_caught_up_3PC = (2, 20)
    audit_ledger = node.getLedger(AUDIT_LEDGER_ID)
    txn_with_last_seq_no = {'txn': {'data': {AUDIT_TXN_VIEW_NO: 2,
                                             AUDIT_TXN_PP_SEQ_NO: 20,
                                             AUDIT_TXN_PRIMARIES: ['Gamma', 'Delta']}}}
    audit_ledger.get_last_committed_txn = lambda *args: txn_with_last_seq_no
    node.allLedgersCaughtUp()

    check_num_received_checkpoints(replica, 1)
    check_last_received_checkpoint(replica, 25, view_no=2)
 def check():
     for replica in slow_node.replicas.values():
         check_stable_checkpoint(replica, 0)
         check_num_unstable_checkpoints(replica, 0)
         check_num_received_checkpoints(replica, 1)
         check_received_checkpoint_votes(replica,
                                         pp_seq_no=5,
                                         num_votes=len(txnPoolNodeSet) - 1)
Ejemplo n.º 5
0
    def check():
        for inst_id, replica in epsilon.replicas.items():
            check_stable_checkpoint(replica, 0)
            check_num_unstable_checkpoints(replica, 1)
            check_last_checkpoint(replica, 5)

            check_num_received_checkpoints(replica, 1)
            check_last_received_checkpoint(replica, 5)
Ejemplo n.º 6
0
def test_stashed_checkpoint_processing(chkFreqPatched, looper, txnPoolNodeSet,
                                       sdk_wallet_client, sdk_pool_handle):
    """
    One node in a pool of 5 nodes lags to order the last 3PC-batch in a
    checkpoint. By the moment when it eventually orders the 3PC-batch it has
    already received and stashed Checkpoint message from two node, so it
    processes these stashed messages on completing the checkpoint. After this
    it receives Checkpoint messages from two other nodes, processes them and
    stabilizes the checkpoint.
    """
    epsilon = txnPoolNodeSet[-1]

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 4)

    epsilon.nodeIbStasher.delay(cDelay())
    epsilon.nodeIbStasher.delay(chk_delay(sender_filter='Gamma'))
    epsilon.nodeIbStasher.delay(chk_delay(sender_filter='Delta'))

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)

    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.runFor(stabilization_timeout)

    for inst_id, replica in epsilon.replicas.items():
        check_stable_checkpoint(replica, 0)
        check_num_unstable_checkpoints(replica, 0)
        check_num_received_checkpoints(replica, 1)
        check_received_checkpoint_votes(replica, pp_seq_no=5, num_votes=2)

    epsilon.nodeIbStasher.reset_delays_and_process_delayeds(COMMIT)

    def check():
        for inst_id, replica in epsilon.replicas.items():
            check_stable_checkpoint(replica, 0)
            check_num_unstable_checkpoints(replica, 1)
            check_last_checkpoint(replica, 5)

            check_num_received_checkpoints(replica, 1)
            check_last_received_checkpoint(replica, 5)

    looper.run(
        eventually(check,
                   timeout=waits.expectedOrderingTime(len(txnPoolNodeSet))))

    epsilon.nodeIbStasher.reset_delays_and_process_delayeds(CHECKPOINT)

    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.runFor(stabilization_timeout)

    for inst_id, replica in epsilon.replicas.items():
        check_stable_checkpoint(replica, 5)
        check_num_unstable_checkpoints(replica, 0)
        check_num_received_checkpoints(replica, 0)
def test_lagged_checkpoint_completion(chkFreqPatched, looper, txnPoolNodeSet,
                                      sdk_wallet_client, sdk_pool_handle):
    """
    One node in a pool lags to order the last 3PC-batch in a checkpoint so that
    when it eventually orders this 3PC-batch and thus completes the checkpoint
    it has already received and stashed the corresponding checkpoint messages
    from all the other nodes. The test verifies that the node successfully
    processes the stashed checkpoint messages and stabilizes the checkpoint.
    """
    slow_node = txnPoolNodeSet[-1]

    # All the nodes in the pool normally orders all the 3PC-batches in a
    # checkpoint except the last 3PC-batch. The last 3PC-batch in the
    # checkpoint is ordered by all the nodes except one slow node because this
    # node lags to receive Commits.
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 4)

    slow_node.nodeIbStasher.delay(cDelay())

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)

    # All the other nodes complete the checkpoint and send Checkpoint messages
    # to others. The slow node receives and stashes these messages because it
    # has not completed the checkpoint.
    def check():
        for replica in slow_node.replicas.values():
            check_stable_checkpoint(replica, 0)
            check_num_unstable_checkpoints(replica, 0)
            check_num_received_checkpoints(replica, 1)
            check_received_checkpoint_votes(replica,
                                            pp_seq_no=5,
                                            num_votes=len(txnPoolNodeSet) - 1)

    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.run(eventually(check, timeout=stabilization_timeout))

    # Eventually the slow node receives Commits, orders the last 3PC-batch in
    # the checkpoint and thus completes it, processes the stashed checkpoint
    # messages and stabilizes the checkpoint.
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds()

    looper.runFor(waits.expectedOrderingTime(len(txnPoolNodeSet)))

    for replica in slow_node.replicas.values():
        check_stable_checkpoint(replica, 5)
        check_num_unstable_checkpoints(replica, 0)
        check_num_received_checkpoints(replica, 0)
Ejemplo n.º 8
0
def test_second_checkpoint_after_catchup_can_be_stabilized(
        chkFreqPatched, looper, txnPoolNodeSet, sdk_wallet_steward,
        sdk_wallet_client, sdk_pool_handle, tdir, tconf,
        allPluginsPath):
    _, new_node = sdk_add_new_steward_and_node(
        looper, sdk_pool_handle, sdk_wallet_steward,
        'EpsilonSteward', 'Epsilon', tdir, tconf,
        allPluginsPath=allPluginsPath)
    txnPoolNodeSet.append(new_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
    # Epsilon did not participate in ordering of the batch with EpsilonSteward
    # NYM transaction and the batch with Epsilon NODE transaction.
    # Epsilon got these transactions via catch-up.

    master_replica = new_node.replicas._master_replica

    check_stable_checkpoint(master_replica, 0)
    check_num_received_checkpoints(master_replica, 0)

    assert master_replica.h == 2
    assert master_replica.H == 17

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    for replica in new_node.replicas.values():
        assert replica.h == 2
        assert replica.H == 17

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 6)
    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.runFor(stabilization_timeout)

    for replica in new_node.replicas.values():
        check_stable_checkpoint(replica, 5)
        check_num_unstable_checkpoints(replica, 0)

        # nothing is stashed since it's ordered during catch-up
        check_num_received_checkpoints(replica, 0)

        assert replica.h == 5
        assert replica.H == 20

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)
    looper.runFor(stabilization_timeout)

    for replica in new_node.replicas.values():
        check_stable_checkpoint(replica, 10)
        check_num_unstable_checkpoints(replica, 0)

        # nothing is stashed since it's ordered during catch-up
        check_num_received_checkpoints(replica, 0)

        assert replica.h == 10
        assert replica.H == 25
Ejemplo n.º 9
0
def test_second_checkpoint_after_catchup_can_be_stabilized(
        chkFreqPatched, looper, txnPoolNodeSet, sdk_wallet_steward,
        sdk_wallet_client, sdk_pool_handle, tdir, tconf, allPluginsPath):
    lagging_node = txnPoolNodeSet[-1]
    with delay_rules_without_processing(lagging_node.nodeIbStasher, cDelay()):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client,
                                  tconf.Max3PCBatchSize * CHK_FREQ * 2)
    waitNodeDataEquality(looper, lagging_node, *txnPoolNodeSet[:-1])
    # Epsilon got lost transactions via catch-up.
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2)

    master_replica = lagging_node.master_replica

    check_stable_checkpoint(master_replica, 10)
    check_num_received_checkpoints(master_replica, 0)

    assert master_replica.h == 10
    assert master_replica.H == 25

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)

    for replica in lagging_node.replicas.values():
        assert replica.h == 10
        assert replica.H == 25

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 6)
    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.runFor(stabilization_timeout)

    for replica in lagging_node.replicas.values():
        check_stable_checkpoint(replica, 15)
        check_num_unstable_checkpoints(replica, 0)

        # nothing is stashed since it's ordered during catch-up
        check_num_received_checkpoints(replica, 0)

        assert replica.h == 15
        assert replica.H == 30

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    looper.runFor(stabilization_timeout)

    for replica in lagging_node.replicas.values():
        check_stable_checkpoint(replica, 20)
        check_num_unstable_checkpoints(replica, 0)

        # nothing is stashed since it's ordered during catch-up
        check_num_received_checkpoints(replica, 0)

        assert replica.h == 20
        assert replica.H == 35
def test_2_nodes_get_only_preprepare(looper,
                                     txnPoolNodeSet,
                                     sdk_pool_handle,
                                     sdk_wallet_client,
                                     tconf,
                                     chkFreqPatched):
    # CHK_FREQ = 2 in this test
    # num of stashed checkpoints to start catchup is 2 (or 4 batches)

    master_node = txnPoolNodeSet[0]
    behind_nodes = txnPoolNodeSet[-2:]

    # Nodes order batches
    sdk_send_batches_of_random_and_check(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1, 1)
    nodes_last_ordered_equal(*txnPoolNodeSet)

    # Emulate connection problems, 1st behind_node receiving only pre-prepares
    dont_send_prepare_and_commit_to(txnPoolNodeSet[:-2], behind_nodes[0].name)

    # Send some txns and 1st behind_node cant order them while pool is working
    sdk_send_batches_of_random_and_check(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1, 1)
    assert behind_nodes[0].master_last_ordered_3PC[1] + 1 == \
           master_node.master_last_ordered_3PC[1]

    # 1st behind got 1 stashed checkpoint from each node
    check_num_received_checkpoints(behind_nodes[0].master_replica, 1)
    check_received_checkpoint_votes(behind_nodes[0].master_replica, 2, 3)

    # Remove connection problems
    reset_sending(txnPoolNodeSet[:-2])

    # Send txns
    sdk_send_batches_of_random_and_check(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1, 1)

    # 1st behind_node is getting new prepares, but still can't order,
    # cause can't get quorum for prepare for previous batch
    assert behind_nodes[0].master_last_ordered_3PC[1] + 1 * 2 == \
           master_node.master_last_ordered_3PC[1]

    # Emulate connection problems, 2nd behind_node receiving only pre-prepares
    dont_send_prepare_and_commit_to(txnPoolNodeSet[:-2], behind_nodes[1].name)

    # Send some txns and 2nd behind_node cant order them while pool is working
    sdk_send_batches_of_random_and_check(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1, 1)
    assert behind_nodes[1].master_last_ordered_3PC[1] + 1 == \
           master_node.master_last_ordered_3PC[1]

    # 2d behind got 1 stashed checkpoint from all nodes except 1st behind
    check_num_received_checkpoints(behind_nodes[1].master_replica, 1)
    check_received_checkpoint_votes(behind_nodes[1].master_replica, 4, 2)

    # 1st behind got another stashed checkpoint, so should catch-up now
    waitNodeDataEquality(looper, master_node, behind_nodes[0], customTimeout=60,
                         exclude_from_check=['check_last_ordered_3pc_backup'])

    # Remove connection problems
    reset_sending(txnPoolNodeSet[:-2])

    # Send txns
    sdk_send_batches_of_random_and_check(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1, 1)

    # 2nd behind_node is getting new prepares, but still can't order,
    # cause can't get quorum for prepare for previous batch
    assert behind_nodes[1].master_last_ordered_3PC[1] + 1 * 2 == \
           master_node.master_last_ordered_3PC[1]

    # After achieving stable checkpoint, behind_node start ordering
    sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client,
                                         1, 1)
    # 2d behind got another stashed checkpoint, so should catch-up now
    waitNodeDataEquality(looper, master_node, behind_nodes[1], customTimeout=60,
                         exclude_from_check=['check_last_ordered_3pc_backup'])

    # Pool is working
    waitNodeDataEquality(looper, master_node, *behind_nodes, customTimeout=5,
                         exclude_from_check=['check_last_ordered_3pc_backup'])