コード例 #1
0
def nodes_slow_to_process_catchup_reqs(txnPoolNodeSet):
    """
    This will make the new node slow to complete the catchup and hence will
    not send any 3PC messages till catchup is complete
    """
    for node in txnPoolNodeSet:
        node.nodeIbStasher.delay(cqDelay(catchup_delay))
コード例 #2
0
def nodes_slow_to_process_catchup_reqs(txnPoolNodeSet):
    """
    This will make the new node slow to complete the catchup and hence will
    not send any 3PC messages till catchup is complete
    """
    for node in txnPoolNodeSet:
        node.nodeIbStasher.delay(cqDelay(catchup_delay))
コード例 #3
0
def test_catchup_with_all_nodes_sending_cons_proofs_dead(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, logsearch):
    lagging_node = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]

    start_delaying(lagging_node.nodeIbStasher, delay_3pc())
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 10)

    log_re_ask, _ = logsearch(
        msgs=['requesting .* missing transactions after timeout'])
    old_re_ask_count = len(log_re_ask)

    catchup_reqs = {
        node.name: start_delaying(node.nodeIbStasher, cqDelay())
        for node in other_nodes
    }
    audit_catchup_service = lagging_node.ledgerManager._node_leecher._leechers[
        AUDIT_LEDGER_ID]._catchup_rep_service
    lagging_node.start_catchup()
    looper.run(
        eventually(lambda: assert_eq(audit_catchup_service._is_working, True)))

    # Make sure number of cons proofs gathered when all nodes are
    assert len(audit_catchup_service._nodes_ledger_sizes) == 3

    # Allow catchup requests only from nodes that didn't respond first
    for node_id, node_reqs in catchup_reqs.items():
        if node_id not in audit_catchup_service._nodes_ledger_sizes:
            stop_delaying_and_process(node_reqs)

    # Check catchup finishes successfully, and there were reasks
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    assert len(log_re_ask) - old_re_ask_count > 0
def test_catchup_uses_only_nodes_with_cons_proofs(looper,
                                                  txnPoolNodeSet,
                                                  sdk_pool_handle,
                                                  sdk_wallet_client):
    lagging_node = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]

    start_delaying(lagging_node.nodeIbStasher, delay_3pc())
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10)

    catchup_reqs = {node.name: start_delaying(node.nodeIbStasher, cqDelay())
                    for node in other_nodes}
    audit_catchup_service = lagging_node.ledgerManager._node_leecher._leechers[AUDIT_LEDGER_ID]._catchup_rep_service
    lagging_node.start_catchup()
    looper.run(eventually(lambda: assert_eq(audit_catchup_service._is_working, True)))

    # Make sure number of cons proofs gathered when all nodes are
    assert len(audit_catchup_service._nodes_ledger_sizes) == 3

    # Allow catchup requests only for interesting nodes
    for node_id in audit_catchup_service._nodes_ledger_sizes.keys():
        stop_delaying_and_process(catchup_reqs[node_id])

    # Check catchup finishes successfully
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=30)
コード例 #5
0
def test_catchup_with_one_slow_node(tdir, tconf, looper, txnPoolNodeSet,
                                    sdk_pool_handle, sdk_wallet_client,
                                    allPluginsPath, logsearch):
    '''
    1. Stop the node Delta
    2. Order 9 txns. In sending CatchupReq in a first round every
    node [Alpha, Beta, Gamma] will receive request for 3 txns.
    3. Delay CatchupReq messages on Alpha
    4. Start Delta
    5. Check that all nodes have equality data.
    6. Check that Delta re-ask CatchupRep only once.
    In the second CatchupRep (first re-ask) Delta shouldn't request
    CatchupRep from Alpha because it didn't answer early.
    If the behavior is wrong and Delta re-ask txns form all nodes,
    every node will receive request for 1 txns, Alpha will not answer
    and Delta will need a new re-ask round.
    '''
    # Prepare nodes
    lagging_node = txnPoolNodeSet[-1]
    rest_nodes = txnPoolNodeSet[:-1]

    # Stop one node
    waitNodeDataEquality(looper, lagging_node, *rest_nodes)
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            lagging_node,
                                            stopNode=True)
    looper.removeProdable(lagging_node)

    # Send more requests to active nodes
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client,
                              len(rest_nodes) * 3)
    waitNodeDataEquality(looper, *rest_nodes)

    # Restart stopped node and wait for successful catch up
    lagging_node = start_stopped_node(
        lagging_node,
        looper,
        tconf,
        tdir,
        allPluginsPath,
        start=False,
    )
    log_re_ask, _ = logsearch(
        msgs=['requesting .* missing transactions after timeout'])
    old_re_ask_count = len(log_re_ask)

    # Delay CatchupRep messages on Alpha
    with delay_rules(rest_nodes[0].nodeIbStasher, cqDelay()):
        with delay_rules(lagging_node.nodeIbStasher, cs_delay()):
            looper.add(lagging_node)
            txnPoolNodeSet[-1] = lagging_node
            looper.run(checkNodesConnected(txnPoolNodeSet))

            waitNodeDataEquality(looper, *txnPoolNodeSet, customTimeout=120)
            assert len(
                log_re_ask
            ) - old_re_ask_count == 2  # for audit and domain ledgers
def testNewNodeCatchupWhileIncomingRequests(looper, txnPoolNodeSet,
                                            testNodeClass, tdir, tconf,
                                            sdk_pool_handle,
                                            sdk_wallet_steward,
                                            allPluginsPath):
    """
    A new node joins while transactions are happening, its catchup requests
    include till where it has to catchup, which would be less than the other
    node's ledger size. In the meantime, the new node will stash all requests
    """

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 5)

    def chkAfterCall(self, req, frm):
        r = self.processCatchupReq(req, frm)
        typ = getattr(req, f.LEDGER_ID.nm)
        if typ == DOMAIN_LEDGER_ID:
            ledger = self.getLedgerForMsg(req)
            assert req.catchupTill <= ledger.size
        return r

    for node in txnPoolNodeSet:
        node.nodeMsgRouter.routes[CatchupReq] = \
            types.MethodType(chkAfterCall, node.ledgerManager)
        node.nodeIbStasher.delay(cqDelay(3))

    print('Sending 5 requests')
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_steward, 5)
    looper.runFor(1)
    new_steward_name = randomString()
    new_node_name = "Epsilon"
    new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node(
        looper,
        sdk_pool_handle,
        sdk_wallet_steward,
        new_steward_name,
        new_node_name,
        tdir,
        tconf,
        nodeClass=testNodeClass,
        allPluginsPath=allPluginsPath,
        autoStart=True)
    sdk_pool_refresh(looper, sdk_pool_handle)
    txnPoolNodeSet.append(new_node)
    looper.runFor(2)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 5)
    # TODO select or create a timeout for this case in 'waits'
    looper.run(
        eventually(checkNodeDataForEquality,
                   new_node,
                   *txnPoolNodeSet[:-1],
                   retryWait=1,
                   timeout=80))
    assert new_node.spylog.count(TestNode.processStashedOrderedReqs) > 0
コード例 #7
0
def nodeStashingOrderedRequests(txnPoolNodeSet, nodeCreatedAfterSomeTxns):
    looper, newNode, client, wallet, _, _ = nodeCreatedAfterSomeTxns
    for node in txnPoolNodeSet:
        node.nodeIbStasher.delay(cqDelay(5))
    txnPoolNodeSet.append(newNode)
    ensureClientConnectedToNodesAndPoolLedgerSame(looper, client,
                                                  *txnPoolNodeSet[:-1])
    sendRandomRequests(wallet, client, 10)
    looper.run(checkNodesConnected(txnPoolNodeSet))

    def stashing():
        assert newNode.mode != Mode.participating
        assert len(newNode.stashedOrderedReqs) > 0
        # assert len(newNode.reqsFromCatchupReplies) > 0

    timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.run(eventually(stashing, retryWait=1, timeout=timeout))
コード例 #8
0
def nodeStashingOrderedRequests(txnPoolNodeSet, sdk_node_created_after_some_txns):
    looper, new_node, sdk_pool_handle, new_steward_wallet_handle = sdk_node_created_after_some_txns
    for node in txnPoolNodeSet:
        node.nodeIbStasher.delay(cqDelay(5))
    txnPoolNodeSet.append(new_node)

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet_handle, sdk_pool_handle)
    sdk_send_random_requests(looper, sdk_pool_handle, new_steward_wallet_handle, 10)
    looper.run(checkNodesConnected(txnPoolNodeSet))

    def stashing():
        assert new_node.mode != Mode.participating
        assert len(new_node.stashedOrderedReqs) > 0
        # assert len(newNode.reqsFromCatchupReplies) > 0

    timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.run(eventually(stashing, retryWait=1, timeout=timeout))
def testNewNodeCatchupWhileIncomingRequests(looper, txnPoolNodeSet,
                                            testNodeClass, tdir,
                                            tconf,
                                            sdk_pool_handle,
                                            sdk_wallet_steward,
                                            allPluginsPath):
    """
    A new node joins while transactions are happening, its catchup requests
    include till where it has to catchup, which would be less than the other
    node's ledger size. In the meantime, the new node will stash all requests
    """

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5)

    def chkAfterCall(self, req, frm):
        r = self.processCatchupReq(req, frm)
        typ = getattr(req, f.LEDGER_ID.nm)
        if typ == DOMAIN_LEDGER_ID:
            ledger = self.getLedgerForMsg(req)
            assert req.catchupTill <= ledger.size
        return r

    for node in txnPoolNodeSet:
        node.nodeMsgRouter.routes[CatchupReq] = \
            types.MethodType(chkAfterCall, node.ledgerManager)
        node.nodeIbStasher.delay(cqDelay(3))

    print('Sending 5 requests')
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_steward, 5)
    looper.runFor(1)
    new_steward_name = randomString()
    new_node_name = "Epsilon"
    new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node(
        looper, sdk_pool_handle, sdk_wallet_steward,
        new_steward_name, new_node_name, tdir, tconf, nodeClass=testNodeClass,
        allPluginsPath=allPluginsPath, autoStart=True)
    sdk_pool_refresh(looper, sdk_pool_handle)
    txnPoolNodeSet.append(new_node)
    looper.runFor(2)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5)
    # TODO select or create a timeout for this case in 'waits'
    looper.run(eventually(checkNodeDataForEquality, new_node,
                          *txnPoolNodeSet[:-1], retryWait=1, timeout=150))
    assert new_node.spylog.count(TestNode.processStashedOrderedReqs) > 0
コード例 #10
0
def test_slow_node_reverts_unordered_state_during_catchup(
        looper, txnPoolNodeSet, client1, wallet1, client1Connected):
    """
    Delay COMMITs to a node such that when it needs to catchup, it needs to
    revert some unordered state. Also till this time the node should have
    receive all COMMITs such that it will apply some of the COMMITs (
    for which it has not received txns from catchup).
    For this delay COMMITs by long, do catchup for a little older than the
    state received in LedgerStatus, once catchup completes, reset delays and
    try to process delayed COMMITs, some COMMITs will be rejected but some will
    be processed since catchup was done for older ledger.
    """
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1,
                                        3 * Max3PCBatchSize)
    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    slow_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    slow_master_replica = slow_node.master_replica

    commit_delay = 150
    catchup_req_delay = 15

    # Delay COMMITs to one node
    slow_node.nodeIbStasher.delay(cDelay(commit_delay, 0))

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1,
                                        6 * Max3PCBatchSize)
    waitNodeDataInequality(looper, slow_node, *other_nodes)

    # Make the slow node receive txns for a smaller ledger so it still finds
    # the need to catchup
    delay_batches = 2
    make_a_node_catchup_twice(slow_node, other_nodes, DOMAIN_LEDGER_ID,
                              delay_batches * Max3PCBatchSize)

    def is_catchup_needed_count():
        return len(
            getAllReturnVals(slow_node,
                             slow_node.is_catchup_needed,
                             compare_val_to=True))

    old_lcu_count = slow_node.spylog.count(slow_node.allLedgersCaughtUp)
    old_cn_count = is_catchup_needed_count()

    # Other nodes are slow to respond to CatchupReq, so that `slow_node`
    # gets a chance to order COMMITs
    for n in other_nodes:
        n.nodeIbStasher.delay(cqDelay(catchup_req_delay))

    ensure_view_change(looper, txnPoolNodeSet)

    # Check last ordered of `other_nodes` is same
    for n1, n2 in combinations(other_nodes, 2):
        lst_3pc = check_last_ordered_3pc(n1, n2)

    def chk1():
        # `slow_node` has prepared all 3PC messages which
        # `other_nodes` have ordered
        assert slow_master_replica.last_prepared_before_view_change == lst_3pc

    looper.run(eventually(chk1, retryWait=1))

    old_pc_count = slow_master_replica.spylog.count(
        slow_master_replica.can_process_since_view_change_in_progress)

    # Repair the network so COMMITs are delayed and processed
    slow_node.resetDelays()
    slow_node.force_process_delayeds()

    def chk2():
        # COMMITs are processed for prepared messages
        assert slow_master_replica.spylog.count(
            slow_master_replica.can_process_since_view_change_in_progress
        ) > old_pc_count

    looper.run(eventually(chk2, retryWait=1, timeout=5))

    def chk3():
        # Some COMMITs were ordered but stashed and they were processed
        rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs)
        assert rv[0] == delay_batches

    looper.run(eventually(chk3, retryWait=1, timeout=catchup_req_delay + 5))

    def chk4():
        # Catchup was done once
        assert slow_node.spylog.count(
            slow_node.allLedgersCaughtUp) > old_lcu_count

    looper.run(
        eventually(chk4,
                   retryWait=1,
                   timeout=waits.expectedPoolCatchupTime(len(txnPoolNodeSet))))

    def chk5():
        # Once catchup was done, need of other catchup was not found
        assert is_catchup_needed_count() == old_cn_count

    looper.run(eventually(chk5, retryWait=1, timeout=5))

    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1,
                                        2 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)