def test_view_change_during_unstash(looper, txnPoolNodeSet, sdk_pool_handle,
                                    sdk_wallet_client, tconf):
    slow_node = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]

    slow_stasher = slow_node.nodeIbStasher
    other_stashers = [n.nodeIbStasher for n in other_nodes]
    all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet]

    # Preload nodes with some transactions
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    for node in txnPoolNodeSet:
        assert node.master_replica.last_ordered_3pc == (0, 1)

    # Prevent ordering of some requests
    start_delaying(all_stashers, delay_3pc(after=7, msgs=(Prepare, Commit)))

    # Stop ordering on slow node and send requests
    slow_node_after_5 = start_delaying(slow_stasher,
                                       delay_3pc(after=5, msgs=Commit))
    slow_node_until_5 = start_delaying(slow_stasher, delay_3pc(after=0))
    reqs_view_0 = sdk_send_random_requests(looper, sdk_pool_handle,
                                           sdk_wallet_client, 8)

    # Make pool order first 2 batches and pause
    pool_after_3 = start_delaying(other_stashers, delay_3pc(after=3))
    looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 3))

    # Start catchup, continue ordering everywhere (except two last batches on slow node)
    with delay_rules(slow_stasher, cr_delay()):
        slow_node._do_start_catchup(just_started=False)
        looper.run(eventually(check_catchup_is_started, slow_node))
        stop_delaying_and_process(pool_after_3)
        looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 7))

    # Finish catchup and continue processing on slow node
    looper.run(eventually(check_catchup_is_finished, slow_node))
    stop_delaying_and_process(slow_node_until_5)
    looper.run(eventually(check_nodes_ordered_till, [slow_node], 0, 5))

    # Start view change and allow slow node to get remaining commits
    with delay_rules(all_stashers, icDelay()):
        for node in txnPoolNodeSet:
            node.view_changer.on_master_degradation()
        looper.runFor(0.1)
    stop_delaying_and_process(slow_node_after_5)

    # Ensure that expected number of requests was ordered
    replies = sdk_get_replies(looper, reqs_view_0)
    for rep in replies[:6]:
        sdk_check_reply(rep)

    # Ensure that everything is ok
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
def test_catchup_from_unequal_nodes_without_waiting(looper,
                                                    txnPoolNodeSet,
                                                    sdk_pool_handle,
                                                    sdk_wallet_client):
    normal_node = txnPoolNodeSet[0]
    lagging_node_1 = txnPoolNodeSet[1]
    lagging_node_2 = txnPoolNodeSet[2]
    stopped_node = txnPoolNodeSet[3]

    # Make sure everyone have one batch
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)

    # Wait until all nodes have same data and store last 3PC number of node that's going to be "stopped"
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=30)
    last_3pc = stopped_node.master_last_ordered_3PC

    with delay_rules_without_processing(stopped_node.nodeIbStasher, delay_3pc()):
        # Create one more batch on all nodes except "stopped" node
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)

        with delay_rules(lagging_node_1.nodeIbStasher, delay_3pc(msgs=Commit)):
            # Create one more batch on all nodes except "stopped" and first lagging node
            sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)

            with delay_rules(lagging_node_2.nodeIbStasher, delay_3pc(msgs=Commit)):
                # Create one more batch on all nodes except "stopped" and both lagging nodes
                # This time we can't wait for replies because there will be only one
                reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)

                # Wait until normal node orders txn
                looper.run(eventually(lambda: assert_eq(normal_node.master_last_ordered_3PC[1],
                                                        last_3pc[1] + 3)))

                # Now all nodes have different number of txns, so if we try to start a catch up
                # it is guaranteed that we'll need to ask for equal consistency proofs, and
                # disabled timeout ensures that node can do so without relying on timeout
                stopped_node.start_catchup()

                # Wait until catchup ends
                looper.run(eventually(lambda: assert_eq(stopped_node.ledgerManager._node_leecher._state,
                                                        NodeLeecherService.State.Idle)))

                # Ensure stopped node caught up at least one batch
                assert stopped_node.master_last_ordered_3PC[1] > last_3pc[1]

                # And there was no view change
                assert stopped_node.master_last_ordered_3PC[0] == last_3pc[0]

            # Make sure replies from last request are eventually received
            sdk_get_and_check_replies(looper, reqs)
Exemple #3
0
def test_node_reject_invalid_txn_during_catchup(looper, sdk_pool_handle,
                                                sdk_wallet_client, tconf, tdir,
                                                txnPoolNodeSet, bad_node,
                                                lagging_node):
    """
    Make sure that catching up node will blacklist nodes which send incorrect catchup replies
    """
    normal_nodes = [
        node for node in txnPoolNodeSet
        if node not in [bad_node, lagging_node]
    ]
    normal_stashers = [node.nodeIbStasher for node in normal_nodes]

    with delay_rules_without_processing(lagging_node.nodeIbStasher,
                                        delay_3pc()):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, 5)

        # Perform catchup, while making sure that cons proof from bad node is received
        # before cons proofs from normal nodes, so bad node can participate in catchup
        with delay_rules(normal_stashers, lsDelay()):
            lagging_node.start_catchup()

            node_leecher = lagging_node.ledgerManager._node_leecher
            audit_cons_proof_service = node_leecher._leechers[
                AUDIT_LEDGER_ID]._cons_proof_service
            looper.run(
                eventually(lambda: assert_in(
                    bad_node.name, audit_cons_proof_service._cons_proofs)))

        waitNodeDataEquality(looper, lagging_node, *normal_nodes)
        assert lagging_node.isNodeBlacklisted(bad_node.name)
def test_catchup_with_reask_cp(txnPoolNodeSet, looper, sdk_pool_handle,
                               sdk_wallet_steward, tconf, tdir,
                               allPluginsPath):
    '''
    Start a catchup
    Delay ConsistencyProofs twice
    Check that the catchup finished
    '''
    lagged_node = txnPoolNodeSet[-1]
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 5)
    with delay_rules_without_processing(
            lagged_node.nodeIbStasher, delay_3pc(),
            msg_rep_delay(types_to_delay=[COMMIT])):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_steward, 2)
        lagged_node.nodeIbStasher.drop_delayeds()

    with delay_rules_without_processing(lagged_node.nodeIbStasher, cpDelay()):
        lagged_node.start_catchup()

        def chk():
            cp_count = 0
            for msg in lagged_node.nodeIbStasher.delayeds:
                if isinstance(msg.item[0], ConsistencyProof):
                    cp_count += 1
            assert cp_count >= (len(txnPoolNodeSet) - 1) * 2
            lagged_node.nodeIbStasher.drop_delayeds()

        looper.run(eventually(chk))
    waitNodeDataEquality(looper,
                         lagged_node,
                         *txnPoolNodeSet,
                         exclude_from_check=['check_last_ordered_3pc_backup'])
Exemple #5
0
def test_catchup_with_all_nodes_sending_cons_proofs_dead(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, logsearch):
    lagging_node = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]

    start_delaying(lagging_node.nodeIbStasher, delay_3pc())
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 10)

    log_re_ask, _ = logsearch(
        msgs=['requesting .* missing transactions after timeout'])
    old_re_ask_count = len(log_re_ask)

    catchup_reqs = {
        node.name: start_delaying(node.nodeIbStasher, cqDelay())
        for node in other_nodes
    }
    audit_catchup_service = lagging_node.ledgerManager._node_leecher._leechers[
        AUDIT_LEDGER_ID]._catchup_rep_service
    lagging_node.start_catchup()
    looper.run(
        eventually(lambda: assert_eq(audit_catchup_service._is_working, True)))

    # Make sure number of cons proofs gathered when all nodes are
    assert len(audit_catchup_service._nodes_ledger_sizes) == 3

    # Allow catchup requests only from nodes that didn't respond first
    for node_id, node_reqs in catchup_reqs.items():
        if node_id not in audit_catchup_service._nodes_ledger_sizes:
            stop_delaying_and_process(node_reqs)

    # Check catchup finishes successfully, and there were reasks
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    assert len(log_re_ask) - old_re_ask_count > 0
def test_missing_pp_before_starting_vc(tconf, txnPoolNodeSet, looper,
                                       sdk_pool_handle, sdk_wallet_steward):
    '''
    - all nodes delay PrePrepares for viewNo=1 with ppSeqNo<4
    - all nodes go to view=1
    - 10 batches are sent (they will not be ordered because of missing (delayed) PrePrepares
    - all nodes start a view change to view=2
    - So all nodes finish view change successfully and can continue ordering
    '''
    all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet]

    # 1. delay PrePrepares with ppSeqNo<4
    with delay_rules(all_stashers,
                     delay_3pc(view_no=1, before=4, msgs=PrePrepare)):
        # 2. do view change for view=1
        ensure_view_change(looper, txnPoolNodeSet)
        looper.run(eventually(check_not_in_view_change, txnPoolNodeSet))

        # 3. send requests
        sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_steward,
                                 10)

        # 4. do view change for view=2
        ensure_view_change(looper, txnPoolNodeSet)

    # 5. ensure everything is fine
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward,
                               sdk_pool_handle)
def test_catchup_uses_only_nodes_with_cons_proofs(looper,
                                                  txnPoolNodeSet,
                                                  sdk_pool_handle,
                                                  sdk_wallet_client):
    lagging_node = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]

    start_delaying(lagging_node.nodeIbStasher, delay_3pc())
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10)

    catchup_reqs = {node.name: start_delaying(node.nodeIbStasher, cqDelay())
                    for node in other_nodes}
    audit_catchup_service = lagging_node.ledgerManager._node_leecher._leechers[AUDIT_LEDGER_ID]._catchup_rep_service
    lagging_node.start_catchup()
    looper.run(eventually(lambda: assert_eq(audit_catchup_service._is_working, True)))

    # Make sure number of cons proofs gathered when all nodes are
    assert len(audit_catchup_service._nodes_ledger_sizes) == 3

    # Allow catchup requests only for interesting nodes
    for node_id in audit_catchup_service._nodes_ledger_sizes.keys():
        stop_delaying_and_process(catchup_reqs[node_id])

    # Check catchup finishes successfully
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=30)
def test_re_order_pre_prepares_no_pre_prepares(looper, txnPoolNodeSet,
                                               sdk_wallet_client, sdk_pool_handle):
    # 1. drop PrePrepars, Prepares and Commits on 4thNode
    # Order a couple of requests on Nodes 1-3
    lagging_node = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]
    with delay_rules_without_processing(lagging_node.nodeIbStasher, delay_3pc()):
        sdk_send_random_and_check(looper, txnPoolNodeSet,
                                  sdk_pool_handle, sdk_wallet_client, 3)
        assert all(n.master_last_ordered_3PC == (0, 3) for n in other_nodes)

    with delay_rules_without_processing(lagging_node.nodeIbStasher,
                                        msg_rep_delay(types_to_delay=[PREPREPARE, PREPARE, COMMIT])):
        # 2. simulate view change start so that
        # all PrePrepares/Prepares/Commits are cleared
        # and uncommitted txns are reverted
        for n in txnPoolNodeSet:
            n.replicas.send_to_internal_bus(ViewChangeStarted(view_no=1))
            master_ordering_service = n.master_replica._ordering_service
            assert not master_ordering_service.prePrepares
            assert not master_ordering_service.prepares
            assert not master_ordering_service.commits
            ledger = n.db_manager.ledgers[DOMAIN_LEDGER_ID]
            state = n.db_manager.states[DOMAIN_LEDGER_ID]
            assert len(ledger.uncommittedTxns) == 0
            assert ledger.uncommitted_root_hash == ledger.tree.root_hash
            assert state.committedHead == state.head

        # check that all nodes but the lagging one have old_view_pps stored
        for n in other_nodes:
            assert n.master_replica._ordering_service.old_view_preprepares
        assert not lagging_node.master_replica._ordering_service.old_view_preprepares

        # 3. Simulate View Change finish to re-order the same PrePrepare
        assert lagging_node.master_last_ordered_3PC == (0, 0)
        new_master = txnPoolNodeSet[1]
        batches = sorted([preprepare_to_batch_id(pp) for _, pp in
                         new_master.master_replica._ordering_service.old_view_preprepares.items()])
        new_view_msg = NewView(viewNo=0,
                               viewChanges=[],
                               checkpoint=None,
                               batches=batches)
        new_view_chk_applied_msg = NewViewCheckpointsApplied(view_no=0,
                                                             view_changes=[],
                                                             checkpoint=None,
                                                             batches=batches)
        for n in txnPoolNodeSet:
            n.master_replica._consensus_data.new_view = new_view_msg
            n.master_replica._consensus_data.prev_view_prepare_cert = batches[-1].pp_seq_no
            n.master_replica._ordering_service._bus.send(new_view_chk_applied_msg)

        # 4. Make sure that the nodes 1-3 (that already ordered the requests) sent Prepares and Commits so that
        # the request was eventually ordered on Node4 as well
        waitNodeDataEquality(looper, lagging_node, *other_nodes, customTimeout=60)
        assert lagging_node.master_last_ordered_3PC == (0, 4)

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_catchup_from_unequal_nodes_without_reasking(looper, txnPoolNodeSet,
                                                     sdk_pool_handle,
                                                     sdk_wallet_client):
    lagged_node_1 = txnPoolNodeSet[-1]
    lagged_node_2 = txnPoolNodeSet[-2]
    normal_nodes = [
        node for node in txnPoolNodeSet
        if node not in [lagged_node_1, lagged_node_2]
    ]
    normal_stashers = [node.nodeIbStasher for node in normal_nodes]

    with delay_rules_without_processing(lagged_node_1.nodeIbStasher,
                                        delay_3pc()):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, 2)

        with delay_rules_without_processing(lagged_node_2.nodeIbStasher,
                                            delay_3pc()):
            sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                      sdk_wallet_client, 7)
            ensure_all_nodes_have_same_data(looper,
                                            normal_nodes,
                                            custom_timeout=30)

            # Perform catchup, while making sure that cons proof from lagging node is received
            # before cons proofs from normal nodes, so lagging node can participate in catchup
            with delay_rules(normal_stashers, lsDelay()):
                lagged_node_1.start_catchup()

                node_leecher = lagged_node_1.ledgerManager._node_leecher
                audit_cons_proof_service = node_leecher._leechers[
                    AUDIT_LEDGER_ID]._cons_proof_service
                looper.run(
                    eventually(lambda: assertExp(lagged_node_2.name in
                                                 audit_cons_proof_service.
                                                 _cons_proofs)))

            # Make sure catchup finishes successfully
            ensure_all_nodes_have_same_data(looper,
                                            set(txnPoolNodeSet) -
                                            {lagged_node_2},
                                            custom_timeout=30)
            assert lagged_node_1.ledgerManager._node_leecher._state == NodeLeecherService.State.Idle
def test_catchup_with_old_txn_metadata_digest_format(tdir, tconf, looper,
                                                     txnPoolNodeSet,
                                                     sdk_pool_handle,
                                                     sdk_wallet_client,
                                                     monkeypatch):
    lagging_node = txnPoolNodeSet[-1]
    lagging_stasher = lagging_node.nodeIbStasher
    other_nodes = txnPoolNodeSet[:-1]

    # Utility
    def check_nodes_domain_ledger(nodes: Iterable, txn_count: int):
        for node in nodes:
            assert node.domainLedger.size >= txn_count

    # Patch payload metadata, note that it will prevent pool from sending adequate replies to clients
    def append_old_payload_metadata(txn,
                                    frm=None,
                                    req_id=None,
                                    digest=None,
                                    payload_digest=None,
                                    taa_acceptance=None,
                                    endorser=None):
        txn = append_payload_metadata(txn, frm, req_id, digest, payload_digest,
                                      taa_acceptance, endorser)
        metadata = txn[TXN_PAYLOAD][TXN_PAYLOAD_METADATA]
        del metadata[TXN_PAYLOAD_METADATA_PAYLOAD_DIGEST]
        metadata[TXN_PAYLOAD_METADATA_DIGEST] = payload_digest
        return txn

    monkeypatch.setattr(txn_util, 'append_payload_metadata',
                        append_old_payload_metadata)

    # Check pool initial state
    initial_size = txnPoolNodeSet[0].domainLedger.size
    for node in txnPoolNodeSet:
        assert node.domainLedger.size == initial_size

    # Order some transactions, with one node discarding messages
    with delay_rules_without_processing(lagging_stasher, delay_3pc()):
        reps = sdk_send_random_requests(looper, sdk_pool_handle,
                                        sdk_wallet_client, 10)
        looper.run(
            eventually(check_nodes_domain_ledger, other_nodes,
                       initial_size + 10))
        assert lagging_node.domainLedger.size == initial_size

    # Catchup transactions and ensure that all nodes will eventually have same data
    lagging_node.start_catchup()
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    # Catch replies
    sdk_get_replies(looper, reps)
Exemple #11
0
def test_max_3pc_batches_in_flight(tdir, tconf,
                                   looper,
                                   txnPoolNodeSet,
                                   sdk_pool_handle,
                                   sdk_wallet_client):
    # Check pool initial state
    initial_3pc = txnPoolNodeSet[0].master_replica.last_ordered_3pc
    for node in txnPoolNodeSet[1:]:
        assert node.master_replica.last_ordered_3pc == initial_3pc

    # Utility
    def check_ordered_till(pp_seq_no: int):
        for node in txnPoolNodeSet:
            last_ordered = node.master_replica.last_ordered_3pc
            assert last_ordered[0] == initial_3pc[0]
            assert last_ordered[1] == pp_seq_no

    # Delay some commits
    all_stashers = [node.nodeIbStasher for node in txnPoolNodeSet]
    delayers = []
    for num in range(BATCHES_TO_ORDER):
        pp_seq_no = initial_3pc[1] + num + 1
        delayer = start_delaying(all_stashers, delay_3pc(after=pp_seq_no - 1,
                                                         before=pp_seq_no + 1))
        delayers.append((pp_seq_no, delayer))

    # Send a number of requests
    reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, BATCHES_TO_ORDER)

    # Continuously check number of batches in flight
    for pp_seq_no, delayer in delayers:
        stop_delaying_and_process(delayer)
        looper.run(eventually(check_ordered_till, pp_seq_no))

        for node in txnPoolNodeSet:
            for replica in node.replicas.values():
                batches_in_flight = replica.lastPrePrepareSeqNo - replica.last_ordered_3pc[1]
                assert batches_in_flight <= MAX_BATCHES_IN_FLIGHT

    # Check all requests are ordered
    sdk_get_and_check_replies(looper, reqs)

    # Ensure that all nodes will eventually have same data
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_set_H_as_maxsize_for_backup_if_is_primary(looper, txnPoolNodeSet,
                                                   sdk_pool_handle,
                                                   sdk_wallet_steward, tconf,
                                                   tdir, allPluginsPath):
    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)
    primary_on_backup = txnPoolNodeSet[2]
    assert primary_on_backup.replicas._replicas[1].isPrimary

    # Stop Node
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            primary_on_backup,
                                            stopNode=True)
    txnPoolNodeSet.remove(primary_on_backup)
    looper.removeProdable(primary_on_backup)

    # Start stopped Node
    primary_on_backup = start_stopped_node(primary_on_backup, looper, tconf,
                                           tdir, allPluginsPath)

    # Delay 3PC messages so that when restarted node does not have them ordered
    with delay_rules(primary_on_backup.nodeIbStasher, delay_3pc()):
        txnPoolNodeSet.append(primary_on_backup)

        ensureElectionsDone(looper,
                            txnPoolNodeSet,
                            customTimeout=tconf.NEW_VIEW_TIMEOUT)

        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_steward, LOG_SIZE)

        # Check restored state
        assert primary_on_backup.replicas._replicas[1].isPrimary
        assert primary_on_backup.replicas._replicas[1].h == 1
        assert primary_on_backup.replicas._replicas[1].H == 1 + LOG_SIZE

    def chk():
        assert primary_on_backup.replicas._replicas[1].h == LOG_SIZE
        assert primary_on_backup.replicas._replicas[1].H == LOG_SIZE + LOG_SIZE

    # Check caught-up state
    looper.run(eventually(chk, retryWait=.2, timeout=tconf.NEW_VIEW_TIMEOUT))
def test_catchup_with_reask_ls(txnPoolNodeSet, looper, sdk_pool_handle,
                               sdk_wallet_steward, tconf, tdir,
                               allPluginsPath):
    '''
    Start a catchup
    Delay MessageqResp with LedgerStatuses twice
    Check that the catchup finished
    '''
    lagged_node = txnPoolNodeSet[-1]
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 5)
    lagged_node.nodeIbStasher.delay(msg_rep_delay(types_to_delay=[COMMIT]))

    with delay_rules_without_processing(
            lagged_node.nodeIbStasher, delay_3pc(),
            msg_rep_delay(types_to_delay=[COMMIT])):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_steward, 2)
        lagged_node.nodeIbStasher.drop_delayeds()
    with delay_rules_without_processing(
            lagged_node.nodeIbStasher, lsDelay(),
            msg_rep_delay(types_to_delay=[LEDGER_STATUS])):
        lagged_node.start_catchup()

        def chk():
            resp_ls_count = 0
            for msg in lagged_node.nodeIbStasher.delayeds:
                if isinstance(
                        msg.item[0],
                        MessageRep) and msg.item[0].msg_type == LEDGER_STATUS:
                    resp_ls_count += 1
            assert resp_ls_count >= (len(txnPoolNodeSet) - 1) * 2
            lagged_node.nodeIbStasher.drop_delayeds()

        looper.run(eventually(chk))
    waitNodeDataEquality(looper,
                         lagged_node,
                         *txnPoolNodeSet,
                         exclude_from_check=['check_last_ordered_3pc_backup'])
Exemple #14
0
def test_node_requests_missing_preprepares_and_prepares(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle,
        tconf, tdir, allPluginsPath):
    """
    2 of 4 nodes go down (simulate this by dropping requests), so pool can not process any more incoming requests.
    A new request comes in. After a while those 2 nodes come back alive.
    Another request comes in. Check that previously disconnected two nodes
    request missing PREPREPARES and PREPARES and the pool successfully handles
    both transactions after that.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1

    disconnected_nodes = txnPoolNodeSet[2:]
    alive_nodes = txnPoolNodeSet[:2]
    disconnected_nodes_stashers = [n.nodeIbStasher for n in disconnected_nodes]

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              INIT_REQS_CNT)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    with delay_rules_without_processing(disconnected_nodes_stashers, delay_3pc()):
        sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, MISSING_REQS_CNT)
        last_ordered_key = txnPoolNodeSet[0].master_replica.last_ordered_3pc
        looper.run(eventually(check_pp_out_of_sync,
                              alive_nodes,
                              disconnected_nodes,
                              last_ordered_key,
                              retryWait=1,
                              timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == init_ledger_size

    for node in disconnected_nodes:
        assert node.master_replica._ordering_service.spylog.count(OrderingService._request_pre_prepare) == 0
        assert node.master_replica._ordering_service.spylog.count(OrderingService._request_prepare) == 0
        assert node.master_replica._message_req_service.spylog.count(MessageReqService.process_message_rep) == 0

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet[:-1])

    for node in disconnected_nodes:
        assert node.master_replica._ordering_service.spylog.count(OrderingService._request_pre_prepare) > 0
        assert node.master_replica._ordering_service.spylog.count(OrderingService._request_prepare) > 0
        assert node.master_replica._message_req_service.spylog.count(MessageReqService.process_message_rep) > 0

    def check_all_ordered():
        for node in txnPoolNodeSet:
            assert node.domainLedger.size == (init_ledger_size +
                                              MISSING_REQS_CNT +
                                              REQS_AFTER_RECONNECT_CNT)

    looper.run(eventually(check_all_ordered, timeout=20))
def test_catchup_with_skipped_commits(tdir, tconf, looper, txnPoolNodeSet,
                                      sdk_pool_handle, sdk_wallet_client):
    lagging_node = txnPoolNodeSet[-1]
    lagging_stasher = lagging_node.nodeIbStasher
    other_nodes = txnPoolNodeSet[:-1]
    other_stashers = [node.nodeIbStasher for node in other_nodes]

    def lagging_node_state() -> NodeLeecherService.State:
        return lagging_node.ledgerManager._node_leecher._state

    def check_lagging_node_is_not_syncing_audit():
        assert lagging_node_state() != NodeLeecherService.State.SyncingAudit

    def check_lagging_node_done_catchup():
        assert lagging_node_state() == NodeLeecherService.State.Idle

    def check_nodes_ordered_till(nodes: Iterable, view_no: int,
                                 pp_seq_no: int):
        for node in nodes:
            assert compare_3PC_keys((view_no, pp_seq_no),
                                    node.master_replica.last_ordered_3pc) >= 0

    # Preload nodes with some transactions
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    for node in txnPoolNodeSet:
        assert node.master_replica.last_ordered_3pc == (0, 1)

    # Setup delayers
    lagging_mid_commits = start_delaying(
        lagging_stasher, delay_3pc(after=3, before=6, msgs=Commit))
    others_mid_commits = start_delaying(
        other_stashers, delay_3pc(after=3, before=6, msgs=Commit))
    start_delaying(lagging_stasher, delay_3pc(before=4, msgs=Commit))

    # Send more requests
    reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                                    6)

    # Wait until pool ordered till (0, 3)
    looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 3))
    assert lagging_node.master_replica.last_ordered_3pc == (0, 1)

    with delay_rules(lagging_stasher, delay_catchup(DOMAIN_LEDGER_ID)):
        with delay_rules(lagging_stasher, delay_catchup(AUDIT_LEDGER_ID)):
            # Start catchup
            lagging_node.start_catchup()
            looper.runFor(0.5)
            assert lagging_node_state(
            ) == NodeLeecherService.State.SyncingAudit

            # Process missing commits on lagging node
            stop_delaying_and_process(lagging_mid_commits)
            looper.runFor(0.5)

        # Allow to catchup audit ledger
        looper.run(eventually(check_lagging_node_is_not_syncing_audit))
        stop_delaying_and_process(others_mid_commits)

    # Ensure that audit ledger is caught up by lagging node
    looper.run(eventually(check_lagging_node_done_catchup))

    # Ensure that all requests were ordered
    sdk_get_and_check_replies(looper, reqs)

    # Ensure that all nodes will eventually have same data
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
Exemple #16
0
def test_node_requests_missing_preprepares_prepares_and_commits(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, tdir,
        allPluginsPath):
    """
    1 of 4 nodes goes down ((simulate this by dropping requests)). A new request comes in and is ordered by
    the 3 remaining nodes. After a while the previously disconnected node
    comes back alive. Another request comes in. Check that the previously
    disconnected node requests missing PREPREPARES, PREPARES and COMMITS,
    orders the previous request and all the nodes successfully handles
    the last request.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1
    disconnected_node = txnPoolNodeSet[3]
    alive_nodes = txnPoolNodeSet[:3]
    disconnected_node_stashers = disconnected_node.nodeIbStasher

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, INIT_REQS_CNT)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    with delay_rules_without_processing(disconnected_node_stashers,
                                        delay_3pc()):
        last_ordered_key = txnPoolNodeSet[0].master_replica.last_ordered_3pc
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, MISSING_REQS_CNT)
        looper.run(
            eventually(check_pp_out_of_sync,
                       alive_nodes, [disconnected_node],
                       last_ordered_key,
                       retryWait=1,
                       timeout=expectedPoolGetReadyTimeout(
                           len(txnPoolNodeSet))))

    for node in alive_nodes:
        assert node.domainLedger.size == init_ledger_size + MISSING_REQS_CNT
    # Ensure that the reconnected node has not caught up though
    assert disconnected_node.domainLedger.size == init_ledger_size

    ordering_service = disconnected_node.master_replica._ordering_service
    assert ordering_service.spylog.count(
        OrderingService._request_pre_prepare) == 0
    assert ordering_service.spylog.count(OrderingService._request_prepare) == 0
    assert ordering_service.spylog.count(OrderingService._request_commit) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_pre_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_commit) == 0
    doOrderTimesBefore = ordering_service.spylog.count(
        OrderingService._do_order)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_node, *alive_nodes)

    assert ordering_service.spylog.count(
        OrderingService._request_pre_prepare) > 0
    assert ordering_service.spylog.count(OrderingService._request_prepare) > 0
    assert ordering_service.spylog.count(OrderingService._request_commit) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_pre_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_commit) > 0
    doOrderTimesAfter = ordering_service.spylog.count(
        OrderingService._do_order)
    # Ensure that the reconnected node has ordered both the missed 3PC-batch and the new 3PC-batch
    assert doOrderTimesAfter - doOrderTimesBefore == 2

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size + MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)

    def check_all_ordered():
        for node in txnPoolNodeSet:
            assert node.domainLedger.size == (init_ledger_size +
                                              MISSING_REQS_CNT +
                                              REQS_AFTER_RECONNECT_CNT)

    looper.run(eventually(check_all_ordered, timeout=20))
Exemple #17
0
def check_catchup_with_skipped_commits_received_before_catchup(
        catchup_state, looper, txnPoolNodeSet, sdk_pool_handle,
        sdk_wallet_new_steward):
    lagging_node = txnPoolNodeSet[-1]
    lagging_stasher = lagging_node.nodeIbStasher
    other_nodes = txnPoolNodeSet[:-1]

    def delay_catchup(catchup_state: NodeLeecherService.State):
        ledger_id = NodeLeecherService.state_to_ledger[catchup_state]
        _delayer = cr_delay(ledger_filter=ledger_id)
        _delayer.__name__ = "delay_catchup({})".format(ledger_id)
        return _delayer

    def check_lagging_node_catchup_state(state: NodeLeecherService.State):
        assert lagging_node.ledgerManager._node_leecher._state == state

    def check_nodes_ordered_till(nodes: Iterable, view_no: int,
                                 pp_seq_no: int):
        for node in nodes:
            assert compare_3PC_keys((view_no, pp_seq_no),
                                    node.master_replica.last_ordered_3pc) >= 0

    # Make sure pool is in expected state
    init_pp_seq_no = txnPoolNodeSet[0].master_replica.last_ordered_3pc[1]
    for node in txnPoolNodeSet:
        assert node.master_replica.last_ordered_3pc == (0, init_pp_seq_no)

    # Order pool requests while delaying first two commits on lagging node
    with delay_rules(lagging_stasher,
                     delay_3pc(before=init_pp_seq_no + 3, msgs=Commit)):
        # Send some pool requests
        reqs = sdk_send_random_pool_requests(looper, sdk_pool_handle,
                                             sdk_wallet_new_steward, 4)
        sdk_get_and_check_replies(looper, reqs)

    # Make sure pool is in expected state
    for node in other_nodes:
        assert node.master_replica.last_ordered_3pc == (0, init_pp_seq_no + 4)
    assert lagging_node.master_replica.last_ordered_3pc == (0, init_pp_seq_no)

    # Wait until two batches with delayed commits are ordered, but not more
    looper.run(
        eventually(check_nodes_ordered_till, [lagging_node], 0,
                   init_pp_seq_no + 2))
    assert lagging_node.master_replica.last_ordered_3pc == (0,
                                                            init_pp_seq_no + 2)

    with delay_rules(lagging_stasher, delay_catchup(catchup_state)):
        # Start catchup
        lagging_node.start_catchup()

        # Wait until catchup reaches desired state
        looper.run(eventually(check_lagging_node_catchup_state, catchup_state))

        # Emulate scheduled action
        lagging_node.master_replica._ordering_service._process_stashed_out_of_order_commits(
        )

    # Ensure that audit ledger is caught up by lagging node
    looper.run(
        eventually(check_lagging_node_catchup_state,
                   NodeLeecherService.State.Idle))

    # Ensure that all nodes will eventually have same data
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_node_requests_missing_preprepares_and_prepares_after_long_disconnection(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, tconf,
        tdirWithPoolTxns, tdir, allPluginsPath):
    """
    2 of 4 nodes go down (simulate this by dropping requests), so pool can not process any more incoming requests.
    A new request comes in.
    Test than waits for some time to ensure that PrePrepare was created
    long enough seconds to be dropped by time checker.
    Two stopped nodes come back alive.
    Another request comes in.
    Check that previously disconnected two nodes request missing PREPREPARES
    and PREPARES and the pool successfully handles both transactions.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1

    alive_nodes = []
    disconnected_nodes = []

    for node in txnPoolNodeSet:
        if node.hasPrimary:
            alive_nodes.append(node)
        else:
            disconnected_nodes.append(node)
    disconnected_nodes_stashers = [n.nodeIbStasher for n in disconnected_nodes]

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, INIT_REQS_CNT)

    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    with delay_rules_without_processing(disconnected_nodes_stashers,
                                        delay_3pc()):
        sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                                 MISSING_REQS_CNT)
        last_ordered_key = txnPoolNodeSet[0].master_replica.last_ordered_3pc
        looper.run(
            eventually(check_pp_out_of_sync,
                       alive_nodes,
                       disconnected_nodes,
                       last_ordered_key,
                       retryWait=1,
                       timeout=expectedPoolGetReadyTimeout(
                           len(txnPoolNodeSet))))

    preprepare_deviation = 4
    tconf.ACCEPTABLE_DEVIATION_PREPREPARE_SECS = preprepare_deviation
    time.sleep(preprepare_deviation * 2)

    for node in disconnected_nodes:
        assert node.domainLedger.size == init_ledger_size

    for node in disconnected_nodes:
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_pre_prepare) == 0
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_prepare) == 0
        assert node.master_replica._message_req_service.spylog.count(
            MessageReqService.process_message_rep) == 0

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, REQS_AFTER_RECONNECT_CNT)

    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet)

    for node in disconnected_nodes:
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_pre_prepare) > 0
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_prepare) > 0
        assert node.master_replica._message_req_service.spylog.count(
            MessageReqService.process_message_rep) > 0

    def check_all_ordered():
        for node in txnPoolNodeSet:
            assert node.domainLedger.size == (init_ledger_size +
                                              MISSING_REQS_CNT +
                                              REQS_AFTER_RECONNECT_CNT)

    looper.run(eventually(check_all_ordered, timeout=20))