def test_freeing_forwarded_preprepared_request(looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward): # Case, when both backup and primary had problems behind_node = txnPoolNodeSet[-1] sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) with delay_rules( behind_node.nodeIbStasher, pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize), ): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) looper.run( eventually(lambda: assertExp(len(behind_node.requests) == req_num))) assert all(r.executed for r in behind_node.requests.values() if behind_node.seqNoDB.get(r.request.key)[1]) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) # Master and backup replicas do not stash new requests and succesfully order them assert len(behind_node.requests) == req_num
def test_deletion_non_forwarded_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): behind_node = txnPoolNodeSet[-1] [behind_node.replicas.values()[1].discard_req_key(1, key) for key in behind_node.requests] behind_node.requests.clear() sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) behind_node.quorums.propagate = Quorum(len(txnPoolNodeSet) + 1) with delay_rules(behind_node.nodeIbStasher, ppDelay(delay=sys.maxsize), pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) # We clear caughtup requests looper.run(eventually(lambda: assertExp(len(behind_node.requests) == 0))) assert all([len(q) == 0 for r in behind_node.replicas.values() for q in r._ordering_service.requestQueues.values()]) assert len(behind_node.clientAuthNr._verified_reqs) == 0 assert len(behind_node.requestSender) == 0
def test_freeing_forwarded_not_preprepared_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): behind_node = txnPoolNodeSet[-1] behind_node.requests.clear() sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) with delay_rules( behind_node.nodeIbStasher, chk_delay(delay=sys.maxsize, instId=behind_node.replicas.values()[-1])): with delay_rules(behind_node.nodeIbStasher, ppDelay(delay=sys.maxsize), pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run( eventually(node_caughtup, behind_node, count, retryWait=1)) looper.run( eventually( lambda: assertExp(len(behind_node.requests) == req_num))) # We execute caughtup requests looper.run( eventually(lambda: assertExp(len(behind_node.requests) == req_num))) assert all(r.executed for r in behind_node.requests.values() if behind_node.seqNoDB.get(r.request.key)[1])
def test_freeing_forwarded_preprepared_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward): # Case, when both backup and primary had problems behind_node = txnPoolNodeSet[-1] sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) with delay_rules(behind_node.nodeIbStasher, pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize), ): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) assert len(behind_node.requests) == req_num assert all(r.executed for r in behind_node.requests.values() if behind_node.seqNoDB.get(r.request.key)[1]) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) # Master and backup replicas do not stash new requests and succesfully order them assert len(behind_node.requests) == req_num
def test_clearing_forwarded_preprepared_request(looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward): # Case when backup ordered correctly, but primary had problems. # As a result, master will execute caughtup txns and will be removed # from requests queues behind_node = txnPoolNodeSet[-1] sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) with delay_rules(behind_node.nodeIbStasher, pDelay(delay=sys.maxsize, instId=0), cDelay(delay=sys.maxsize, instId=0)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) assert len(behind_node.requests) == 0 assert all([ len(q) == 0 for r in behind_node.replicas.values() for q in r.requestQueues.values() ]) assert len(behind_node.clientAuthNr._verified_reqs) == 0 assert len(behind_node.requestSender) == 0
def test_deletion_non_forwarded_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): behind_node = txnPoolNodeSet[-1] [behind_node.replicas.values()[1].discard_req_key(1, key) for key in behind_node.requests] behind_node.requests.clear() sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) behind_node.quorums.propagate = Quorum(len(txnPoolNodeSet) + 1) with delay_rules(behind_node.nodeIbStasher, ppDelay(delay=sys.maxsize), pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) # We clear caughtup requests assert len(behind_node.requests) == 0 assert all([len(q) == 0 for r in behind_node.replicas.values() for q in r.requestQueues.values()]) assert len(behind_node.clientAuthNr._verified_reqs) == 0 assert len(behind_node.requestSender) == 0
def test_node_erases_last_sent_pp_key_on_view_change(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf): # Get a node with a backup primary replica replica = getPrimaryReplica(txnPoolNodeSet, instId=backup_inst_id) node = replica.node # Send some 3PC-batches and wait until the replica orders the 3PC-batches sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=3, num_batches=num_batches_before, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (0, 3)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Ensure that there is a stored last sent PrePrepare key on the node assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB # Make the pool perform view change ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) # Verify that the node has erased the stored last sent PrePrepare key for value in node.last_sent_pp_store_helper._load_last_sent_pp_key( ).values(): # + 1 it's after view_change assert value == [node.viewNo, num_batches_before + 1] # Send a 3PC-batch and ensure that the replica orders it sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=1, num_batches=num_batches_after, timeout=tconf.Max3PCBatchWait) looper.run( eventually( lambda: assertExp(replica.last_ordered_3pc == (1, num_batches_before + num_batches_after + 1)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount)))
def test_freeing_forwarded_not_preprepared_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): behind_node = txnPoolNodeSet[-1] behind_node.requests.clear() sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) with delay_rules(behind_node.nodeIbStasher, ppDelay(delay=sys.maxsize), pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) # We execute caughtup requests assert len(behind_node.requests) == req_num assert all(r.executed for r in behind_node.requests.values() if behind_node.seqNoDB.get(r.request.key)[1])
def test_node_erases_last_sent_pp_key_on_view_change( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf): # Get a node with a backup primary replica replica = getPrimaryReplica(txnPoolNodeSet, instId=backup_inst_id) node = replica.node # Send some 3PC-batches and wait until the replica orders the 3PC-batches sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=3, num_batches=3, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (0, 3)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Ensure that there is a stored last sent PrePrepare key on the node assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB # Make the pool perform view change ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) # Verify that the node has erased the stored last sent PrePrepare key assert LAST_SENT_PRE_PREPARE not in node.nodeStatusDB # Send a 3PC-batch and ensure that the replica orders it sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=1, num_batches=1, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (1, 1)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount)))
def test_clearing_forwarded_preprepared_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward): # Case when backup ordered correctly, but primary had problems. # As a result, master will execute caughtup txns and will be removed # from requests queues behind_node = txnPoolNodeSet[-1] sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) with delay_rules(behind_node.nodeIbStasher, pDelay(delay=sys.maxsize, instId=0), cDelay(delay=sys.maxsize, instId=0)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) assert len(behind_node.requests) == 0 assert all([len(q) == 0 for r in behind_node.replicas.values() for q in r.requestQueues.values()]) assert len(behind_node.clientAuthNr._verified_reqs) == 0 assert len(behind_node.requestSender) == 0
def test_backup_primary_restores_pp_seq_no_if_view_is_same( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, tdir, allPluginsPath, chkFreqPatched, view_no): # Get a node with a backup primary replica replica = getPrimaryReplica(txnPoolNodeSet, instId=backup_inst_id) batches_count = 0 if view_no == 0 else 1 node = replica.node # Send some 3PC-batches and wait until the replica orders the 3PC-batches sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=7, num_batches=num_batches, timeout=tconf.Max3PCBatchWait) batches_count += num_batches looper.run( eventually(lambda r: assertExp(r.last_ordered_3pc == (view_no, batches_count)), replica, retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Check view no of the node and lastPrePrepareSeqNo of the replica assert node.viewNo == view_no assert replica.lastPrePrepareSeqNo == batches_count # Ensure that the node has stored the last sent PrePrepare key assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB last_sent_pre_prepare_key = \ node_status_db_serializer.deserialize( node.nodeStatusDB.get(LAST_SENT_PRE_PREPARE)) assert last_sent_pre_prepare_key == { str(backup_inst_id): [view_no, batches_count] } # Restart the node containing the replica disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node.name, stopNode=True) looper.removeProdable(node) txnPoolNodeSet.remove(node) node = start_stopped_node(node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet.append(node) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper, txnPoolNodeSet) replica = node.replicas[backup_inst_id] # Verify that after the successful propagate primary procedure the replica # (which must still be the primary in its instance) has restored # lastPrePrepareSeqNo and adjusted last_ordered_3pc and shifted # the watermarks correspondingly assert node.viewNo == view_no assert replica.isPrimary assert replica.lastPrePrepareSeqNo == batches_count assert replica.last_ordered_3pc == (view_no, batches_count) assert replica.h == batches_count assert replica.H == batches_count + LOG_SIZE # Verify also that the stored last sent PrePrepare key has not been erased assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB # Send a 3PC-batch and ensure that the replica orders it sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=1, num_batches=1, timeout=tconf.Max3PCBatchWait) batches_count += 1 looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (view_no, batches_count)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount)))
def test_backup_primary_restores_pp_seq_no_if_view_is_same( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, tdir, allPluginsPath, chkFreqPatched, view_no): # Get a node with a backup primary replica replica = getPrimaryReplica(txnPoolNodeSet, instId=backup_inst_id) node = replica.node # Send some 3PC-batches and wait until the replica orders the 3PC-batches sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=7, num_batches=7, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (view_no, 7)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Check view no of the node and lastPrePrepareSeqNo of the replica assert node.viewNo == view_no assert replica.lastPrePrepareSeqNo == 7 # Ensure that the node has stored the last sent PrePrepare key assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB last_sent_pre_prepare_key = \ PrePrepareKey(**node_status_db_serializer.deserialize( node.nodeStatusDB.get(LAST_SENT_PRE_PREPARE))) assert last_sent_pre_prepare_key == PrePrepareKey(inst_id=backup_inst_id, view_no=view_no, pp_seq_no=7) # Restart the node containing the replica disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node.name, stopNode=True) looper.removeProdable(node) txnPoolNodeSet.remove(node) node = start_stopped_node(node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet.append(node) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper, txnPoolNodeSet) replica = node.replicas[backup_inst_id] # Verify that after the successful propagate primary procedure the replica # (which must still be the primary in its instance) has restored # lastPrePrepareSeqNo and adjusted last_ordered_3pc and shifted # the watermarks correspondingly assert node.viewNo == view_no assert replica.isPrimary assert replica.lastPrePrepareSeqNo == 7 assert replica.last_ordered_3pc == (view_no, 7) assert replica.h == 7 assert replica.H == 7 + LOG_SIZE # Verify also that the stored last sent PrePrepare key has not been erased assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB # Send a 3PC-batch and ensure that the replica orders it sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=1, num_batches=1, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (view_no, 8)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount)))
def test_2_nodes_get_only_preprepare(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, chkFreqPatched): master_node = txnPoolNodeSet[0] behind_nodes = txnPoolNodeSet[-2:] delta = tconf.CHK_FREQ * 3 num_of_batches = 1 # Nodes order batches sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) nodes_last_ordered_equal(*txnPoolNodeSet) # Emulate connection problems, 1st behind_node receiving only pre-prepares dont_send_prepare_and_commit_to(txnPoolNodeSet[:-2], behind_nodes[0].name) # Send some txns and 1st behind_node cant order them while pool is working sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) assert behind_nodes[0].master_last_ordered_3PC[1] + num_of_batches == \ master_node.master_last_ordered_3PC[1] # Remove connection problems reset_sending(txnPoolNodeSet[:-2]) # Send txns sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) # 1st behind_node is getting new prepares, but still can't order, # cause can't get quorum for prepare for previous batch assert behind_nodes[0].master_last_ordered_3PC[1] + num_of_batches * 2 == \ master_node.master_last_ordered_3PC[1] # Emulate connection problems, 2nd behind_node receiving only pre-prepares dont_send_prepare_and_commit_to(txnPoolNodeSet[:-2], behind_nodes[1].name) # Send some txns and 2nd behind_node cant order them while pool is working sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) assert behind_nodes[1].master_last_ordered_3PC[1] + num_of_batches == \ master_node.master_last_ordered_3PC[1] # Remove connection problems reset_sending(txnPoolNodeSet[:-2]) # Send txns sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) # 2nd behind_node is getting new prepares, but still can't order, # cause can't get quorum for prepare for previous batch assert behind_nodes[1].master_last_ordered_3PC[1] + num_of_batches * 2 == \ master_node.master_last_ordered_3PC[1] # After achieving stable checkpoint, behind_node start ordering sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, delta, delta) # Pool is working looper.run(eventually(nodes_last_ordered_equal, *behind_nodes, master_node))
def test_2_node_got_no_preprepare(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, chkFreqPatched): master_node = txnPoolNodeSet[0] behind_nodes = txnPoolNodeSet[-2:] delta = tconf.CHK_FREQ * 3 num_of_batches = 1 # Nodes order batches sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) nodes_last_ordered_equal(*behind_nodes, master_node) # Emulate connection problems, behind_node doesnt receive pre-prepares router_dont_accept_messages_from(behind_nodes[0], master_node.name) # Send some txns and behind_node cant order them while pool is working sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) with pytest.raises(AssertionError): nodes_last_ordered_equal(behind_nodes[0], master_node) # behind_node has requested preprepare and wouldn't request it again. # It will catchup with closest stable checkpoint # Remove connection problems reset_router_accepting(behind_nodes[0]) # Send txns sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) # behind_node stashing new 3pc messages and not ordering and not participating in consensus assert len(behind_nodes[0].master_replica._ordering_service. prePreparesPendingPrevPP) == 1 with pytest.raises(AssertionError): nodes_last_ordered_equal(behind_nodes[0], master_node) # Emulate connection problems, behind_node doesnt receive pre-prepares router_dont_accept_messages_from(behind_nodes[1], master_node.name) # Send some txns and behind_node cant order them while pool is working sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) # Remove connection problems reset_router_accepting(behind_nodes[1]) # Pool stayed alive looper.run( eventually(nodes_last_ordered_equal, behind_nodes[1], master_node)) # Send txns sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) # After achieving stable checkpoint, behind_node start ordering sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, delta, delta) # Pool is working looper.run(eventually(nodes_last_ordered_equal, *behind_nodes, master_node))
def test_2_node_got_no_preprepare(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, chkFreqPatched): master_node = txnPoolNodeSet[0] behind_nodes = txnPoolNodeSet[-2:] delta = tconf.CHK_FREQ * 3 num_of_batches = 1 # Nodes order batches sdk_send_batches_of_random_and_check( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) nodes_last_ordered_equal(*behind_nodes, master_node) # Emulate connection problems, behind_node doesnt receive pre-prepares router_dont_accept_messages_from(behind_nodes[0], master_node.name) # Send some txns and behind_node cant order them while pool is working sdk_send_batches_of_random_and_check( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) with pytest.raises(AssertionError): nodes_last_ordered_equal(behind_nodes[0], master_node) # behind_node has requested preprepare and wouldn't request it again. # It will catchup with closest stable checkpoint # Remove connection problems reset_router_accepting(behind_nodes[0]) # Send txns sdk_send_batches_of_random_and_check( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) # behind_node stashing new 3pc messages and not ordering and not participating in consensus assert len(behind_nodes[0].master_replica.prePreparesPendingPrevPP) == 1 with pytest.raises(AssertionError): nodes_last_ordered_equal(behind_nodes[0], master_node) # Emulate connection problems, behind_node doesnt receive pre-prepares router_dont_accept_messages_from(behind_nodes[1], master_node.name) # Send some txns and behind_node cant order them while pool is working sdk_send_batches_of_random( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) # Remove connection problems reset_router_accepting(behind_nodes[1]) # Pool stayed alive looper.run(eventually(nodes_last_ordered_equal, behind_nodes[1], master_node)) # Send txns sdk_send_batches_of_random( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) # After achieving stable checkpoint, behind_node start ordering sdk_send_batches_of_random( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, delta, delta) # Pool is working looper.run(eventually(nodes_last_ordered_equal, *behind_nodes, master_node))
def test_2_nodes_get_only_preprepare(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, chkFreqPatched): master_node = txnPoolNodeSet[0] behind_nodes = txnPoolNodeSet[-2:] delta = tconf.CHK_FREQ * 3 num_of_batches = 1 # Nodes order batches sdk_send_batches_of_random_and_check( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) nodes_last_ordered_equal(*txnPoolNodeSet) # Emulate connection problems, 1st behind_node receiving only pre-prepares dont_send_prepare_and_commit_to(txnPoolNodeSet[:-2], behind_nodes[0].name) # Send some txns and 1st behind_node cant order them while pool is working sdk_send_batches_of_random_and_check( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) assert behind_nodes[0].master_last_ordered_3PC[1] + num_of_batches == \ master_node.master_last_ordered_3PC[1] # Remove connection problems reset_sending(txnPoolNodeSet[:-2]) # Send txns sdk_send_batches_of_random_and_check( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) # 1st behind_node is getting new prepares, but still can't order, # cause can't get quorum for prepare for previous batch assert behind_nodes[0].master_last_ordered_3PC[1] + num_of_batches * 2 == \ master_node.master_last_ordered_3PC[1] # Emulate connection problems, 2nd behind_node receiving only pre-prepares dont_send_prepare_and_commit_to(txnPoolNodeSet[:-2], behind_nodes[1].name) # Send some txns and 2nd behind_node cant order them while pool is working sdk_send_batches_of_random_and_check( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) assert behind_nodes[1].master_last_ordered_3PC[1] + num_of_batches == \ master_node.master_last_ordered_3PC[1] # Remove connection problems reset_sending(txnPoolNodeSet[:-2]) # Send txns sdk_send_batches_of_random_and_check( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_of_batches, num_of_batches) # 2nd behind_node is getting new prepares, but still can't order, # cause can't get quorum for prepare for previous batch assert behind_nodes[1].master_last_ordered_3PC[1] + num_of_batches * 2 == \ master_node.master_last_ordered_3PC[1] # After achieving stable checkpoint, behind_node start ordering sdk_send_batches_of_random( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, delta, delta) # Pool is working looper.run(eventually(nodes_last_ordered_equal, *behind_nodes, master_node))
def test_node_not_erases_last_sent_pp_key_on_pool_restart( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, tdir, allPluginsPath, chkFreqPatched): # Get a node with a backup primary replica and the rest of the nodes replica = getPrimaryReplica(txnPoolNodeSet, instId=backup_inst_id) node = replica.node # Send some 3PC-batches and wait until the replica orders the 3PC-batches sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=7, num_batches=7, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (0, 7)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Check view no of the node and lastPrePrepareSeqNo of the replica assert node.viewNo == 0 assert replica.lastPrePrepareSeqNo == 7 assert replica.h == 6 assert replica.H == 6 + LOG_SIZE # Ensure that there is a stored last sent PrePrepare key on the node assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB # Save old state old_view_no = node.viewNo old_is_primary = replica.isPrimary old_last_pp = replica.lastPrePrepareSeqNo old_last_ordered = replica.last_ordered_3pc # Restart all the nodes in the pool and wait for primary elections done all_nodes = copy(txnPoolNodeSet) for n in all_nodes: disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, n.name, timeout=nodeCount, stopNode=True) looper.removeProdable(n) txnPoolNodeSet.remove(n) for n in all_nodes: txnPoolNodeSet.append( start_stopped_node(n, looper, tconf, tdir, allPluginsPath)) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper, txnPoolNodeSet) node = nodeByName(txnPoolNodeSet, node.name) replica = node.replicas[backup_inst_id] # Verify that the node has not erased the stored last sent PrePrepare key assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB # Verify correspondingly that after the pool restart the replica # (which must again be the primary in its instance) has restored # lastPrePrepareSeqNo, adjusted last_ordered_3pc and shifted # the watermarks assert node.viewNo == old_view_no assert replica.isPrimary == old_is_primary assert replica.lastPrePrepareSeqNo == old_last_pp assert replica.last_ordered_3pc == old_last_ordered assert replica.h == replica.last_ordered_3pc[1] assert replica.H == replica.last_ordered_3pc[1] + LOG_SIZE # Send a 3PC-batch and ensure that the replica orders it sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=1, num_batches=1, timeout=tconf.Max3PCBatchWait) looper.run( eventually( lambda: assertExp(replica.last_ordered_3pc == (old_last_ordered[0], old_last_ordered[1] + 1)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount)))
def test_node_erases_last_sent_pp_key_on_pool_restart( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, tdir, allPluginsPath, chkFreqPatched): # Get a node with a backup primary replica and the rest of the nodes replica = getPrimaryReplica(txnPoolNodeSet, instId=backup_inst_id) node = replica.node # Send some 3PC-batches and wait until the replica orders the 3PC-batches sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=7, num_batches=7, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (0, 7)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Check view no of the node and lastPrePrepareSeqNo of the replica assert node.viewNo == 0 assert replica.lastPrePrepareSeqNo == 7 assert replica.h == 6 assert replica.H == 6 + LOG_SIZE # Ensure that there is a stored last sent PrePrepare key on the node assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB # Restart all the nodes in the pool and wait for primary elections done all_nodes = copy(txnPoolNodeSet) for n in all_nodes: disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, n.name, timeout=nodeCount, stopNode=True) looper.removeProdable(n) txnPoolNodeSet.remove(n) for n in all_nodes: txnPoolNodeSet.append(start_stopped_node(n, looper, tconf, tdir, allPluginsPath)) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper, txnPoolNodeSet) node = nodeByName(txnPoolNodeSet, node.name) replica = node.replicas[backup_inst_id] # Verify that the node has erased the stored last sent PrePrepare key assert LAST_SENT_PRE_PREPARE not in node.nodeStatusDB # Verify correspondingly that after the pool restart the replica # (which must again be the primary in its instance) has not restored # lastPrePrepareSeqNo, not adjusted last_ordered_3pc and not shifted # the watermarks assert node.viewNo == 0 assert replica.isPrimary assert replica.lastPrePrepareSeqNo == 0 assert replica.last_ordered_3pc == (0, 0) assert replica.h == 0 assert replica.H == 0 + LOG_SIZE # Send a 3PC-batch and ensure that the replica orders it sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=1, num_batches=1, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (0, 1)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount)))