def test_non_primary_recvs_3phase_message_outside_watermarks( chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_logsize): """ A node is slow in processing PRE-PREPAREs and PREPAREs such that lot of requests happen and the slow node has started getting 3 phase messages outside of it watermarks. Check that it queues up requests outside watermarks and once it has received stable checkpoint it processes more requests. It sends other nodes 3 phase messages older than their stable checkpoint so they should discard them. """ delay = 15 instId = 1 reqsToSend = reqs_for_logsize + 2 npr = getNonPrimaryReplicas(txnPoolNodeSet, instId) slowReplica = npr[0] slowNode = slowReplica.node slowNode.nodeIbStasher.delay(ppDelay(delay, instId)) slowNode.nodeIbStasher.delay(pDelay(delay, instId)) def discardCounts(replicas, pat): counts = {} for r in replicas: counts[r.name] = countDiscarded(r, pat) return counts oldStashCount = slowReplica.spylog.count( TestReplica.stashOutsideWatermarks.__name__) oldDiscardCounts = discardCounts( [n.replicas[instId] for n in txnPoolNodeSet if n != slowNode], 'achieved stable checkpoint') send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqsToSend) timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) looper.run( eventually(checkNodeDataForEquality, slowNode, *[_ for _ in txnPoolNodeSet if _ != slowNode], retryWait=1, timeout=timeout)) newStashCount = slowReplica.spylog.count( TestReplica.stashOutsideWatermarks.__name__) assert newStashCount > oldStashCount def chk(): counts = discardCounts( [n.replicas[instId] for n in txnPoolNodeSet if n != slowNode], 'achieved stable checkpoint') for nm, count in counts.items(): assert count > oldDiscardCounts[nm] timeout = waits.expectedNodeToNodeMessageDeliveryTime() * len( txnPoolNodeSet) + delay looper.run(eventually(chk, retryWait=1, timeout=timeout))
def test_discard_checkpoint_msg_for_stable_checkpoint(chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint): send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1)) node1 = txnPoolNodeSet[0] rep1 = node1.replicas[0] _, stableChk = rep1.firstCheckPoint oldChkpointMsg = Checkpoint(rep1.instId, rep1.viewNo, *_, stableChk.digest) rep1.send(oldChkpointMsg) recvReplicas = [n.replicas[0] for n in txnPoolNodeSet[1:]] looper.run(eventually(checkDiscardMsg, recvReplicas, oldChkpointMsg, "Checkpoint already stable", retryWait=1))
def test_old_checkpoint_deleted(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint): """ Send requests more than twice of `CHK_FREQ`, there should be one new stable checkpoint on each replica. The old stable checkpoint should be removed """ send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * reqs_for_checkpoint) send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 2, 0, retryWait=1, timeout=timeout))
def test_request_older_than_stable_checkpoint_removed(chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint): timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) max_batch_size = chkFreqPatched.Max3PCBatchSize # Send some requests (insufficient for checkpoint), # wait replies and check that current checkpoint is not stable reqs = send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint - max_batch_size) total_checkpoints = 1 looper.run( eventually(chkChkpoints, txnPoolNodeSet, total_checkpoints, retryWait=1, timeout=timeout)) chk_freq = chkFreqPatched.CHK_FREQ checkRequestCounts(txnPoolNodeSet, len(reqs), chk_freq - 1, 1) # Send some more requests to cause checkpoint stabilization send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, max_batch_size) # Check that checkpoint is stable now # and verify that requests for it were removed stable_checkpoint_id = 0 looper.run( eventually(chkChkpoints, txnPoolNodeSet, total_checkpoints, stable_checkpoint_id, retryWait=1, timeout=timeout)) checkRequestCounts(txnPoolNodeSet, 0, 0, 0) # Send some more requests to cause new checkpoint send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint + 1) total_checkpoints = 2 looper.run( eventually(chkChkpoints, txnPoolNodeSet, total_checkpoints, stable_checkpoint_id, retryWait=1, timeout=timeout)) checkRequestCounts(txnPoolNodeSet, 1, 1, 1)
def testLoggingTxnStateForValidRequest(looper, logsearch, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): logsPropagate, _ = logsearch(levels=['INFO'], files=['propagator.py'], funcs=['propagate'], msgs=['propagating.*request.*from client']) logsOrdered, _ = logsearch(levels=['INFO'], files=['replica.py'], funcs=['order_3pc_key'], msgs=['ordered batch request']) logsCommited, _ = logsearch(levels=['INFO'], files=['node.py'], funcs=['executeBatch'], msgs=['committed batch request']) reqs = send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) req, _ = reqs[0] reqId = str(req['reqId']) assert any(reqId in record.getMessage() for record in logsPropagate) assert any(reqId in record.getMessage() for record in logsOrdered) assert any(reqId in record.getMessage() for record in logsCommited)
def test_checkpoint_created(chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint): """ After requests less than `CHK_FREQ`, there should be one checkpoint on each replica. After `CHK_FREQ`, one checkpoint should become stable """ # Send one batch less so checkpoint is not created send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint - (chkFreqPatched.Max3PCBatchSize)) # Deliberately waiting so as to verify that not more than 1 checkpoint is # created looper.runFor(2) chkChkpoints(txnPoolNodeSet, 1) send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, chkFreqPatched.Max3PCBatchSize) timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1, timeout=timeout))
def test_stable_checkpoint_when_one_instance_slow(chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint): delay = 5 pr = getPrimaryReplica(txnPoolNodeSet, 1) slowNode = pr.node otherNodes = [n for n in txnPoolNodeSet if n != slowNode] for n in otherNodes: n.nodeIbStasher.delay(ppDelay(delay, 1)) send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint) timeout = waits.expectedTransactionExecutionTime( len(txnPoolNodeSet)) + delay looper.run( eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1, timeout=timeout))
def test_primary_recvs_3phase_message_outside_watermarks( perf_chk_patched, chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_logsize): """ One of the primary starts getting lot of requests, more than his log size and queues up requests since they will go beyond its watermarks. This happens since other nodes are slow in processing its PRE-PREPARE. Eventually this primary will send PRE-PREPARE for all requests and those requests will complete """ tconf = perf_chk_patched delay = 5 instId = 0 reqs_to_send = 2 * reqs_for_logsize + 1 logger.debug('Will send {} requests'.format(reqs_to_send)) npr = getNonPrimaryReplicas(txnPoolNodeSet, instId) pr = getPrimaryReplica(txnPoolNodeSet, instId) from plenum.server.replica import TPCStat orderedCount = pr.stats.get(TPCStat.OrderSent) for r in npr: r.node.nodeIbStasher.delay(ppDelay(delay, instId)) r.node.nodeIbStasher.delay(pDelay(delay, instId)) tm_exec_1_batch = waits.expectedTransactionExecutionTime( len(txnPoolNodeSet)) batch_count = math.ceil(reqs_to_send / tconf.Max3PCBatchSize) total_timeout = (tm_exec_1_batch + delay) * batch_count def chk(): assert orderedCount + batch_count == pr.stats.get(TPCStat.OrderSent) send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_to_send) looper.run(eventually(chk, retryWait=1, timeout=total_timeout))