def test_commits_recvd_first(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): slow_node = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)][-1] other_nodes = [n for n in txnPoolNodeSet if n != slow_node] delay = 50 slow_node.nodeIbStasher.delay(ppDelay(delay, 0)) slow_node.nodeIbStasher.delay(pDelay(delay, 0)) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=20, num_batches=4) assert not slow_node.master_replica.prePrepares assert not slow_node.master_replica.prepares assert not slow_node.master_replica.commits assert len(slow_node.master_replica.commitsWaitingForPrepare) > 0 slow_node.reset_delays_and_process_delayeds() waitNodeDataEquality(looper, slow_node, *other_nodes) assert check_if_all_equal_in_list([n.master_replica.ordered for n in txnPoolNodeSet]) assert slow_node.master_replica.prePrepares assert slow_node.master_replica.prepares assert slow_node.master_replica.commits assert not slow_node.master_replica.commitsWaitingForPrepare
def test_no_preprepare_requested(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, teardown): """ Node missing Propagates hence request not finalised, hence stashes PRE-PREPARE but does not request PRE-PREPARE on receiving PREPARE """ slow_node, other_nodes, _, _ = split_nodes(txnPoolNodeSet) slow_node.nodeIbStasher.delay(ppgDelay(20)) slow_node.nodeIbStasher.delay(msg_rep_delay(20, [PROPAGATE, ])) old_count_resp = count_requested_preprepare_resp(slow_node) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=4, num_batches=2) # The slow node is behind checkNodeDataForInequality(slow_node, *other_nodes) # PRE-PREPARE were not requested assert count_requested_preprepare_resp(slow_node) == old_count_resp slow_node.nodeIbStasher.reset_delays_and_process_delayeds() # The slow node has processed all requests waitNodeDataEquality(looper, slow_node, *other_nodes) # PRE-PREPARE were not requested assert count_requested_preprepare_resp(slow_node) == old_count_resp
def testNodeKeysChanged(looper, txnPoolNodeSet, tdir, tconf, sdk_node_theta_added, sdk_pool_handle, allPluginsPath=None): new_steward_wallet, new_node = sdk_node_theta_added new_node.stop() looper.removeProdable(name=new_node.name) nodeHa, nodeCHa = HA(*new_node.nodestack.ha), HA(*new_node.clientstack.ha) sigseed = randomString(32).encode() verkey = base58.b58encode(SimpleSigner(seed=sigseed).naclSigner.verraw).decode("utf-8") sdk_change_node_keys(looper, new_node, new_steward_wallet, sdk_pool_handle, verkey) config_helper = PNodeConfigHelper(new_node.name, tconf, chroot=tdir) initNodeKeysForBothStacks(new_node.name, config_helper.keys_dir, sigseed, override=True) logger.debug("{} starting with HAs {} {}".format(new_node, nodeHa, nodeCHa)) node = TestNode(new_node.name, config_helper=config_helper, config=tconf, ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) looper.add(node) # The last element of `txnPoolNodeSet` is the node Theta that was just # stopped txnPoolNodeSet[-1] = node looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node, *txnPoolNodeSet[:-1]) sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet, sdk_pool_handle)
def test_state_regenerated_from_ledger(looper, nodeSet, tconf, tdir, sdk_pool_handle, sdk_wallet_trustee, allPluginsPath): """ Node loses its state database but recreates it from ledger after start. Checking ATTRIB txns too since they store some data off ledger too """ trust_anchors = [] for i in range(5): trust_anchors.append(sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_trustee, 'TA' + str(i), TRUST_ANCHOR_STRING)) sdk_add_raw_attribute(looper, sdk_pool_handle, trust_anchors[-1], randomString(6), randomString(10)) for wh in trust_anchors: for i in range(3): sdk_add_new_nym(looper, sdk_pool_handle, wh, 'NP1' + str(i)) ensure_all_nodes_have_same_data(looper, nodeSet) node_to_stop = nodeSet[-1] node_state = node_to_stop.states[DOMAIN_LEDGER_ID] assert not node_state.isEmpty state_db_path = node_state._kv.db_path node_to_stop.cleanupOnStopping = False node_to_stop.stop() looper.removeProdable(node_to_stop) ensure_node_disconnected(looper, node_to_stop, nodeSet[:-1]) shutil.rmtree(state_db_path) config_helper = NodeConfigHelper(node_to_stop.name, tconf, chroot=tdir) restarted_node = TestNode( node_to_stop.name, config_helper=config_helper, config=tconf, pluginPaths=allPluginsPath, ha=node_to_stop.nodestack.ha, cliha=node_to_stop.clientstack.ha) looper.add(restarted_node) nodeSet[-1] = restarted_node looper.run(checkNodesConnected(nodeSet)) # Need some time as `last_ordered_3PC` is compared too and that is # communicated through catchup waitNodeDataEquality(looper, restarted_node, *nodeSet[:-1]) # Pool is still functional for wh in trust_anchors: sdk_add_new_nym(looper, sdk_pool_handle, wh, 'NP--' + randomString(5)) ensure_all_nodes_have_same_data(looper, nodeSet)
def test_requests_post_multiple_new_nodes( looper, nodeSet, tconf, tdir, sdk_pool_handle, sdk_wallet_trustee, allPluginsPath, some_transactions_done): new_nodes = [] for node_name in ('Zeta', 'Eta'): new_steward_wallet, new_node = sdk_node_theta_added(looper, nodeSet, tdir, tconf, sdk_pool_handle, sdk_wallet_trustee, allPluginsPath, node_config_helper_class=NodeConfigHelper, testNodeClass=TestNode, name=node_name) new_nodes.append(new_node) for _ in range(5): sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_trustee) for new_node in new_nodes: waitNodeDataEquality(looper, new_node, *nodeSet[:-2]) for _ in range(5): sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_trustee)
def testNodeDoesNotParticipateUntilCaughtUp(txnPoolNodeSet, nodes_slow_to_process_catchup_reqs, sdk_node_created_after_some_txns): """ A new node that joins after some transactions should stash new transactions until it has caught up :return: """ looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \ sdk_node_created_after_some_txns txnPoolNodeSet.append(new_node) old_nodes = txnPoolNodeSet[:-1] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet_handle, 5) chk_commits_prepares_recvd(0, old_nodes, new_node) for node in old_nodes: node.reset_delays_and_process_delayeds() timeout = waits.expectedPoolCatchupTime(len(txnPoolNodeSet)) + \ catchup_delay + \ waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=timeout) waitNodeDataEquality(looper, new_node, *old_nodes) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet_handle, 2) # Commits and Prepares are received by all old nodes with pytest.raises(AssertionError): # Since nodes discard 3PC messages for already ordered requests. chk_commits_prepares_recvd(0, old_nodes, new_node) waitNodeDataEquality(looper, new_node, *old_nodes)
def sdk_change_bls_key(looper, txnPoolNodeSet, node, sdk_pool_handle, sdk_wallet_steward, add_wrong=False, new_bls=None, new_key_proof=None): if add_wrong: _, new_blspk, key_proof = create_default_bls_crypto_factory( ).generate_bls_keys() else: new_blspk, key_proof = init_bls_keys(node.keys_dir, node.name) key_in_txn = new_bls or new_blspk bls_key_proof = new_key_proof or key_proof node_dest = hexToFriendly(node.nodestack.verhex) sdk_send_update_node(looper, sdk_wallet_steward, sdk_pool_handle, node_dest, node.name, None, None, None, None, bls_key=key_in_txn, services=None, key_proof=bls_key_proof) poolSetExceptOne = list(txnPoolNodeSet) poolSetExceptOne.remove(node) waitNodeDataEquality(looper, node, *poolSetExceptOne) sdk_pool_refresh(looper, sdk_pool_handle) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) return new_blspk
def test_node_load_after_add(newNodeCaughtUp, txnPoolNodeSet, tconf, tdirWithPoolTxns, allPluginsPath, poolTxnStewardData, looper, client1, wallet1, client1Connected, capsys): """ A node that restarts after some transactions should eventually get the transactions which happened while it was down :return: """ new_node = newNodeCaughtUp logger.debug("Sending requests") # Here's where we apply some load client_batches = 300 txns_per_batch = 25 for i in range(client_batches): s = perf_counter() sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, txns_per_batch, override_timeout_limit=True) with capsys.disabled(): print('{} executed {} client txns in {:.2f} seconds'.format( i + 1, txns_per_batch, perf_counter() - s)) logger.debug("Starting the stopped node, {}".format(new_node)) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4])
def test_restarted_node_catches_up_config_ledger_txns(looper, some_config_txns_done, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, sdk_new_node_caught_up, keys, tconf, tdir, allPluginsPath): """ A node is stopped, a few config ledger txns happen, the stopped node is started and catches up the config ledger """ new_node = sdk_new_node_caught_up disconnect_node_and_ensure_disconnected( looper, txnPoolNodeSet, new_node, stopNode=True) looper.removeProdable(new_node) # Do some config txns; using a fixture as a method, passing some arguments # as None as they only make sense for the fixture (pre-requisites) send_some_config_txns(looper, sdk_pool_handle, sdk_wallet_client, keys) # Make sure new node got out of sync for node in txnPoolNodeSet[:-1]: assert new_node.configLedger.size < node.configLedger.size restarted_node = start_stopped_node(new_node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = restarted_node looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, restarted_node, *txnPoolNodeSet[:-1])
def sdk_change_bls_key(looper, txnPoolNodeSet, node, sdk_pool_handle, sdk_wallet_steward, add_wrong=False, new_bls=None, new_key_proof=None): if add_wrong: _, new_blspk, key_proof = create_default_bls_crypto_factory().generate_bls_keys() else: new_blspk, key_proof = init_bls_keys(node.keys_dir, node.name) key_in_txn = new_bls or new_blspk bls_key_proof = new_key_proof or key_proof node_dest = hexToFriendly(node.nodestack.verhex) sdk_send_update_node(looper, sdk_wallet_steward, sdk_pool_handle, node_dest, node.name, None, None, None, None, bls_key=key_in_txn, services=None, key_proof=bls_key_proof) poolSetExceptOne = list(txnPoolNodeSet) poolSetExceptOne.remove(node) waitNodeDataEquality(looper, node, *poolSetExceptOne) sdk_pool_refresh(looper, sdk_pool_handle) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) return new_blspk
def test_fill_ts_store_after_catchup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) node_to_disconnect = txnPoolNodeSet[-1] disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) sdk_replies = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) node_to_disconnect = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet) req_handler = node_to_disconnect.getDomainReqHandler() for reply in sdk_replies: key = req_handler.prepare_buy_key(reply[1]['result']['identifier'], reply[1]['result']['reqId']) root_hash = req_handler.ts_store.get_equal_or_prev( reply[1]['result']['txnTime']) assert root_hash from_state = req_handler.state.get_for_root_hash(root_hash=root_hash, key=key) assert req_handler.stateSerializer.deserialize(from_state)['amount'] == \ reply[1]['result']['amount']
def test_idr_cache_update_after_catchup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): wallet_handle, identifier = sdk_wallet_steward node_to_disconnect = txnPoolNodeSet[-1] disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect.name, stopNode=True) looper.removeProdable(node_to_disconnect) idr, verkey = createHalfKeyIdentifierAndAbbrevVerkey() request = looper.loop.run_until_complete(build_nym_request(identifier, idr, verkey, None, None)) req_signed = looper.loop.run_until_complete(sign_request(wallet_handle, identifier, request)) result = json.loads(looper.loop.run_until_complete(submit_request(sdk_pool_handle, req_signed))) restarted_node = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = restarted_node waitNodeDataEquality(looper, restarted_node, *txnPoolNodeSet[:-1]) req_handler = restarted_node.getDomainReqHandler() root_hash = req_handler.ts_store.get_equal_or_prev(get_txn_time(result['result'])) key = domain.make_state_path_for_nym(idr) from_state = req_handler.state.get_for_root_hash(root_hash=root_hash, key=key) assert from_state deserialized = req_handler.stateSerializer.deserialize(from_state) assert deserialized items_after = req_handler.idrCache.get(idr) assert items_after
def testNodeCatchupAfterDisconnect(sdk_new_node_caught_up, txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns): """ A node that disconnects after some transactions should eventually get the transactions which happened while it was disconnected :return: """ looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \ sdk_node_set_with_node_added_after_some_txns logger.debug("Disconnecting node {} with pool ledger size {}". format(new_node, new_node.poolManager.txnSeqNo)) disconnect_node_and_ensure_disconnected( looper, txnPoolNodeSet, new_node, stopNode=False) # TODO: Check if the node has really stopped processing requests? logger.debug("Sending requests") sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet_handle, 5) # Make sure new node got out of sync waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1]) logger.debug("Connecting the stopped node, {}".format(new_node)) reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, new_node) logger.debug("Waiting for the node to catch up, {}".format(new_node)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) logger.debug("Sending more requests") sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet_handle, 10) checkNodeDataForEquality(new_node, *txnPoolNodeSet[:-1])
def test_catchup_with_ledger_statuses_in_old_format_from_one_node( txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): """ A node is restarted and during a catch-up receives ledger statuses in an old format (without `protocolVersion`) from one of nodes in the pool. The test verifies that the node successfully completes the catch-up and participates in ordering of further transactions. """ node_to_restart = txnPoolNodeSet[-1] other_nodes = txnPoolNodeSet[:-1] old_node = txnPoolNodeSet[0] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) original_get_ledger_status = old_node.getLedgerStatus # Patch the method getLedgerStatus to # get_ledger_status_without_protocol_version for sending ledger status # in old format (without `protocolVersion`) def get_ledger_status_without_protocol_version(ledgerId: int): original_ledger_status = original_get_ledger_status(ledgerId) return LedgerStatusInOldFormat(original_ledger_status.ledgerId, original_ledger_status.txnSeqNo, original_ledger_status.viewNo, original_ledger_status.ppSeqNo, original_ledger_status.merkleRoot) old_node.getLedgerStatus = get_ledger_status_without_protocol_version # restart node disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_restart) looper.removeProdable(name=node_to_restart.name) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) # add `node_to_restart` to pool node_to_restart = start_stopped_node(node_to_restart, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_restart looper.run(checkNodesConnected(txnPoolNodeSet)) # Verify that `node_to_restart` successfully completes catch-up waitNodeDataEquality(looper, node_to_restart, *other_nodes) # check discarding ledger statuses from `old_node` for all ledgers assert countDiscarded(node_to_restart, 'replied message has invalid structure') >= 3 # Verify that `node_to_restart` participates in ordering # of further transactions sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) waitNodeDataEquality(looper, node_to_restart, *other_nodes)
def test_instance_change_before_vc(looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward): master_node = get_master_primary_node(txnPoolNodeSet) old_view = master_node.viewNo expected_view_no = old_view + 1 panic_node = txnPoolNodeSet[-1] panic_node.view_changer.on_master_degradation() def has_inst_chng_in_validator_info(): for node in txnPoolNodeSet: latest_info = node._info_tool.info ic_queue = latest_info['Node_info']['View_change_status']['IC_queue'] assert expected_view_no in ic_queue assert ic_queue[expected_view_no]["Voters"][panic_node.name]['reason'] == Suspicions.PRIMARY_DEGRADED.code looper.run(eventually(has_inst_chng_in_validator_info)) for node in txnPoolNodeSet: node.view_changer.on_master_degradation() looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, expected_view_no, retryWait=1, timeout=tconf.NEW_VIEW_TIMEOUT)) waitNodeDataEquality(looper, master_node, *txnPoolNodeSet) def is_inst_chngs_cleared(): for node in txnPoolNodeSet: latest_info = node._info_tool.info assert latest_info['Node_info']['View_change_status']['IC_queue'] == {} looper.run(eventually(is_inst_chngs_cleared))
def testNodeKeysChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1, nodeThetaAdded, allPluginsPath=None): newSteward, newStewardWallet, newNode = nodeThetaAdded newNode.stop() looper.removeProdable(name=newNode.name) nodeHa, nodeCHa = HA(*newNode.nodestack.ha), HA(*newNode.clientstack.ha) sigseed = randomString(32).encode() verkey = base58.b58encode(SimpleSigner(seed=sigseed).naclSigner.verraw) changeNodeKeys(looper, newSteward, newStewardWallet, newNode, verkey) initNodeKeysForBothStacks(newNode.name, tdirWithPoolTxns, sigseed, override=True) logger.debug("{} starting with HAs {} {}".format(newNode, nodeHa, nodeCHa)) node = TestNode(newNode.name, basedirpath=tdirWithPoolTxns, config=tconf, ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) looper.add(node) # The last element of `txnPoolNodeSet` is the node Theta that was just # stopped txnPoolNodeSet[-1] = node looper.run(checkNodesConnected(stacks=txnPoolNodeSet)) waitNodeDataEquality(looper, node, *txnPoolNodeSet[:-1]) ensureClientConnectedToNodesAndPoolLedgerSame(looper, steward1, *txnPoolNodeSet) ensureClientConnectedToNodesAndPoolLedgerSame(looper, newSteward, *txnPoolNodeSet)
def test_restarted_node_catches_up_config_ledger_txns( looper, some_config_txns_done, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, sdk_new_node_caught_up, keys, tconf, tdir, allPluginsPath): """ A node is stopped, a few config ledger txns happen, the stopped node is started and catches up the config ledger """ new_node = sdk_new_node_caught_up disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, new_node, stopNode=True) looper.removeProdable(new_node) # Do some config txns; using a fixture as a method, passing some arguments # as None as they only make sense for the fixture (pre-requisites) send_some_config_txns(looper, sdk_pool_handle, sdk_wallet_client, keys) # Make sure new node got out of sync for node in txnPoolNodeSet[:-1]: assert new_node.configLedger.size < node.configLedger.size restarted_node = start_stopped_node(new_node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = restarted_node looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, restarted_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup'])
def test_state_regenerated_from_ledger(looper, tdirWithPoolTxns, tdirWithDomainTxnsUpdated, nodeSet, tconf, trustee, trusteeWallet, allPluginsPath): """ Node loses its state database but recreates it from ledger after start. Checking ATTRIB txns too since they store some data off ledger too """ trust_anchors = [] for i in range(5): trust_anchors.append(getClientAddedWithRole(nodeSet, tdirWithPoolTxns, looper, trustee, trusteeWallet, 'TA' + str(i), role=TRUST_ANCHOR)) addRawAttribute(looper, *trust_anchors[-1], randomString(6), randomString(10), dest=trust_anchors[-1][1].defaultId) for tc, tw in trust_anchors: for i in range(3): getClientAddedWithRole(nodeSet, tdirWithPoolTxns, looper, tc, tw, 'NP1' + str(i)) ensure_all_nodes_have_same_data(looper, nodeSet) node_to_stop = nodeSet[-1] node_state = node_to_stop.states[DOMAIN_LEDGER_ID] assert not node_state.isEmpty state_db_path = node_state._kv._dbPath node_to_stop.cleanupOnStopping = False node_to_stop.stop() looper.removeProdable(node_to_stop) ensure_node_disconnected(looper, node_to_stop.name, nodeSet[:-1]) shutil.rmtree(state_db_path) restarted_node = TestNode(node_to_stop.name, basedirpath=tdirWithPoolTxns, config=tconf, pluginPaths=allPluginsPath, ha=node_to_stop.nodestack.ha, cliha=node_to_stop.clientstack.ha) looper.add(restarted_node) nodeSet[-1] = restarted_node looper.run(checkNodesConnected(nodeSet)) # Need some time as `last_ordered_3PC` is compared too and that is # communicated through catchup waitNodeDataEquality(looper, restarted_node, *nodeSet[:-1]) # Pool is still functional for tc, tw in trust_anchors: getClientAddedWithRole(nodeSet, tdirWithPoolTxns, looper, tc, tw, 'NP--{}'.format(tc.name)) ensure_all_nodes_have_same_data(looper, nodeSet)
def add_started_node(looper, new_node, node_ha, client_ha, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, bls_key): ''' Adds already created node to the pool, that is sends NODE txn. Makes sure that node is actually added and connected to all otehr nodes. ''' new_steward_wallet_handle = sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_steward, "Steward" + new_node.name, role=STEWARD_STRING) node_name = new_node.name node_dest = hexToFriendly(new_node.nodestack.verhex) sdk_send_update_node(looper, new_steward_wallet_handle, sdk_pool_handle, node_dest, node_name, node_ha[0], node_ha[1], client_ha[0], client_ha[1], services=[VALIDATOR], bls_key=bls_key) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_pool_refresh(looper, sdk_pool_handle) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
def newNodeCaughtUp(txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns): looper, newNode, _, _, _, _ = nodeSetWithNodeAddedAfterSomeTxns waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:4]) check_last_3pc_master(newNode, txnPoolNodeSet[:4]) # Check if catchup done once catchup_done_once = True for li in newNode.ledgerManager.ledgerRegistry.values(): catchup_done_once = catchup_done_once and (li.num_txns_caught_up > 0) if not catchup_done_once: # It might be the case that node has to do catchup again, in that case # check the return value of `num_txns_caught_up_in_last_catchup` to be # greater than 0 assert max( getAllReturnVals( newNode, newNode.num_txns_caught_up_in_last_catchup)) > 0 for li in newNode.ledgerManager.ledgerRegistry.values(): assert not li.receivedCatchUpReplies assert not li.recvdCatchupRepliesFrm return newNode
def test_new_node_accepts_chosen_primary( txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns): looper, new_node, sdk_pool_handle, new_steward_wallet_handle = sdk_node_set_with_node_added_after_some_txns logger.debug("Ensure nodes data equality".format(txnPoolNodeSet[0].viewNo)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # here we must have view_no = 4 # - current primary is Alpha (based on node registry before new node joined) # - but new node expects itself as primary basing # on updated node registry # -> new node doesn't verify current primary assert not new_node.view_changer._primary_verified # -> new node haven't received ViewChangeDone from the expected primary # (self VCHD message is registered when node sends it, not the case # for primary propagate logic) assert not new_node.view_changer.has_view_change_from_primary # -> BUT new node understands that no view change actually happens assert new_node.view_changer._is_propagated_view_change_completed logger.debug("Send requests to ensure that pool is working properly, " "viewNo: {}".format(txnPoolNodeSet[0].viewNo)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet_handle, 3) logger.debug("Ensure nodes data equality".format(txnPoolNodeSet[0].viewNo)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
def test_upper_bound_of_checkpoint_after_catchup_is_divisible_by_chk_freq( chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client, tdir, client_tdir, tconf, allPluginsPath): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) _, new_node = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # Epsilon did not participate in ordering of the batch with EpsilonSteward # NYM transaction and the batch with Epsilon NODE transaction. # Epsilon got these transactions via catch-up. sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) for replica in new_node.replicas: assert len(replica.checkpoints) == 1 assert next(iter(replica.checkpoints)) == (7, 10)
def test_idr_cache_update_after_catchup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): wallet_handle, identifier = sdk_wallet_steward node_to_disconnect = txnPoolNodeSet[-1] disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect.name, stopNode=True) looper.removeProdable(node_to_disconnect) idr, verkey = createHalfKeyIdentifierAndAbbrevVerkey() request = looper.loop.run_until_complete( build_nym_request(identifier, idr, verkey, None, None)) req_signed = looper.loop.run_until_complete( sign_request(wallet_handle, identifier, request)) result = json.loads( looper.loop.run_until_complete( submit_request(sdk_pool_handle, req_signed))) restarted_node = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = restarted_node waitNodeDataEquality(looper, restarted_node, *txnPoolNodeSet[:-1]) req_handler = restarted_node.get_req_handler(DOMAIN_LEDGER_ID) root_hash = req_handler.ts_store.get_equal_or_prev( get_txn_time(result['result'])) key = domain.make_state_path_for_nym(idr) from_state = req_handler.state.get_for_root_hash(root_hash=root_hash, key=key) assert from_state deserialized = req_handler.stateSerializer.deserialize(from_state) assert deserialized items_after = req_handler.idrCache.get(idr) assert items_after
def test_add_node_to_pool_with_large_ppseqno_diff_views( do_view_change, looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath): """ Adding a node to the pool while ppSeqNo is big caused a node to stash all the requests because of incorrect watermarks limits set. The case of view_no == 0 is special. The test emulates big ppSeqNo number, adds a node and checks all the pool nodes are functional. The test is run with several starting view_no, including 0 """ ensure_several_view_change(looper, txnPoolNodeSet, do_view_change, custom_timeout=tconf.NEW_VIEW_TIMEOUT) cur_ppseqno = get_pp_seq_no(txnPoolNodeSet) big_ppseqno = cur_ppseqno + tconf.LOG_SIZE * 2 + 2300 assert (big_ppseqno > cur_ppseqno) # ensure pool is working properly sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) assert (cur_ppseqno < get_pp_seq_no(txnPoolNodeSet)) _set_ppseqno(txnPoolNodeSet, big_ppseqno) cur_ppseqno = get_pp_seq_no(txnPoolNodeSet) assert (big_ppseqno == cur_ppseqno) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) assert (cur_ppseqno < get_pp_seq_no(txnPoolNodeSet)) # Disable view change after adding new node as it will not be able to finish due to fake ppSeqNo set for n in txnPoolNodeSet: n._on_node_count_changed_committed = lambda: None new_steward_name = "testClientSteward" + randomString(4) new_node_name = "TestTheta" + randomString(4) new_steward_wallet_handle, new_node = sdk_add_new_steward_and_node( looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, new_node_name, tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet_handle, sdk_pool_handle) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
def testNodeCatchupAfterDisconnect(newNodeCaughtUp, txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns): """ A node that disconnects after some transactions should eventually get the transactions which happened while it was disconnected :return: """ looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns logger.debug("Stopping node {} with pool ledger size {}".format( newNode, newNode.poolManager.txnSeqNo)) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, newNode, stopNode=False) looper.removeProdable(newNode) # TODO: Check if the node has really stopped processing requests? logger.debug("Sending requests") sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) # Make sure new node got out of sync waitNodeDataInequality(looper, newNode, *txnPoolNodeSet[:-1]) logger.debug("Starting the stopped node, {}".format(newNode)) looper.add(newNode) reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, newNode) logger.debug("Waiting for the node to catch up, {}".format(newNode)) waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1]) logger.debug("Sending more requests") sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 10) checkNodeDataForEquality(newNode, *txnPoolNodeSet[:-1])
def test_handle_delayed_preprepares(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, teardown): """ Make a node send PREPREPARE again after the slow node has ordered """ slow_node, other_nodes, primary_node, other_non_primary_nodes = \ split_nodes(txnPoolNodeSet) # This node will send PRE-PREPARE again orig_method = primary_node.handlers[PREPREPARE].serve last_pp = None def patched_method(self, msg): nonlocal last_pp last_pp = orig_method(msg) return last_pp primary_node.handlers[PREPREPARE].serve = types.MethodType(patched_method, primary_node.handlers[ PREPREPARE]) # Delay PRE-PREPAREs by large amount simulating loss slow_node.nodeIbStasher.delay(ppDelay(300, 0)) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=10, num_batches=5) waitNodeDataEquality(looper, slow_node, *other_nodes) slow_master_replica = slow_node.master_replica count_pr_req = get_count(slow_master_replica, slow_master_replica.process_requested_pre_prepare) count_pr_tpc = get_count(slow_master_replica, slow_master_replica.processThreePhaseMsg) primary_node.sendToNodes(MessageRep(**{ f.MSG_TYPE.nm: PREPREPARE, f.PARAMS.nm: { f.INST_ID.nm: last_pp.instId, f.VIEW_NO.nm: last_pp.viewNo, f.PP_SEQ_NO.nm: last_pp.ppSeqNo }, f.MSG.nm: last_pp }), names=[slow_node.name, ]) def chk(): # `process_requested_pre_prepare` is called but # `processThreePhaseMsg` is not called assert get_count( slow_master_replica, slow_master_replica.process_requested_pre_prepare) > count_pr_req assert get_count( slow_master_replica, slow_master_replica.processThreePhaseMsg) == count_pr_tpc looper.run(eventually(chk, retryWait=1))
def test_get_last_ordered_timestamp_after_catchup(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): node_to_disconnect = txnPoolNodeSet[-1] reply_before = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1)[0][1] looper.runFor(2) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) reply = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1)[0][1] node_to_disconnect = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet[:-1]) ts_from_state = node_to_disconnect.master_replica._get_last_timestamp_from_state(DOMAIN_LEDGER_ID) assert ts_from_state == get_txn_time(reply['result']) assert ts_from_state != get_txn_time(reply_before['result'])
def add_new_node(looper, nodes, sdk_pool_handle, sdk_wallet_steward, tdir, tconf, all_plugins_path, name=None, wait_till_added=True): node_name = name or "Psi" new_steward_name = "testClientSteward" + randomString(3) _, new_node = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, node_name, tdir, tconf, allPluginsPath=all_plugins_path, wait_till_added=wait_till_added) if wait_till_added: nodes.append(new_node) looper.run(checkNodesConnected(nodes)) timeout = waits.expectedPoolCatchupTime(nodeCount=len(nodes)) waitNodeDataEquality( looper, new_node, *nodes[:-1], customTimeout=timeout, exclude_from_check=['check_last_ordered_3pc_backup']) sdk_pool_refresh(looper, sdk_pool_handle) return new_node
def test_state_regenerated_from_ledger(looper, txnPoolNodeSet, client1, wallet1, client1Connected, tconf, tdirWithPoolTxns, allPluginsPath): """ Node loses its state database but recreates it from ledger after start """ sent_batches = 10 send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 5 * sent_batches, sent_batches) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) node_to_stop = txnPoolNodeSet[-1] node_state = node_to_stop.states[DOMAIN_LEDGER_ID] assert not node_state.isEmpty state_db_path = node_state._kv.db_path nodeHa, nodeCHa = HA(*node_to_stop.nodestack.ha), HA( *node_to_stop.clientstack.ha) node_to_stop.stop() looper.removeProdable(node_to_stop) shutil.rmtree(state_db_path) restarted_node = TestNode(node_to_stop.name, basedirpath=tdirWithPoolTxns, config=tconf, ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) looper.add(restarted_node) txnPoolNodeSet[-1] = restarted_node looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, restarted_node, *txnPoolNodeSet[:-1])
def test_no_preprepare_requested(looper, txnPoolNodeSet, client1, wallet1, client1Connected, teardown): """ Node missing Propagates hence request not finalised, hence stashes PRE-PREPARE but does not request PRE-PREPARE on receiving PREPARE """ slow_node, other_nodes, _, _ = split_nodes(txnPoolNodeSet) slow_node.nodeIbStasher.delay(ppgDelay(20)) slow_node.nodeIbStasher.delay(msg_rep_delay(20, [PROPAGATE, ])) old_count_resp = count_requested_preprepare_resp(slow_node) send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 4, 2) # The slow node is behind checkNodeDataForInequality(slow_node, *other_nodes) # PRE-PREPARE were not requested assert count_requested_preprepare_resp(slow_node) == old_count_resp slow_node.nodeIbStasher.reset_delays_and_process_delayeds() # The slow node has processed all requests waitNodeDataEquality(looper, slow_node, *other_nodes) # PRE-PREPARE were not requested assert count_requested_preprepare_resp(slow_node) == old_count_resp
def sdk_change_bls_key(looper, txnPoolNodeSet, node, sdk_pool_handle, sdk_wallet_steward, add_wrong=False, new_bls=None): new_blspk = init_bls_keys(node.keys_dir, node.name) key_in_txn = new_bls or new_blspk \ if not add_wrong \ else base58.b58encode(randomString(128).encode()) node_dest = hexToFriendly(node.nodestack.verhex) sdk_send_update_node(looper, sdk_wallet_steward, sdk_pool_handle, node_dest, node.name, None, None, None, None, bls_key=key_in_txn, services=None) poolSetExceptOne = list(txnPoolNodeSet) poolSetExceptOne.remove(node) waitNodeDataEquality(looper, node, *poolSetExceptOne) sdk_pool_refresh(looper, sdk_pool_handle) sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_steward, alias=randomString(5)) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) return new_blspk
def test_number_txns_in_catchup_and_vc_queue_valid(looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath): num_txns = 5 master_node = get_master_primary_node(txnPoolNodeSet) master_node_index = txnPoolNodeSet.index(master_node) other_nodes = txnPoolNodeSet.copy() other_nodes.remove(master_node) old_view = master_node.viewNo expected_view_no = old_view + 1 disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, master_node, stopNode=True) looper.removeProdable(master_node) looper.run(eventually(checkViewNoForNodes, other_nodes, expected_view_no, retryWait=1, timeout=tconf.NEW_VIEW_TIMEOUT)) sdk_pool_refresh(looper, sdk_pool_handle) sdk_send_random_and_check(looper, other_nodes, sdk_pool_handle, sdk_wallet_steward, num_txns) master_node = start_stopped_node(master_node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[master_node_index] = master_node looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, master_node, *txnPoolNodeSet[-1:], exclude_from_check=['check_last_ordered_3pc_backup']) latest_info = master_node._info_tool.info assert latest_info['Node_info']['Catchup_status']['Number_txns_in_catchup'][1] == num_txns assert latest_info['Node_info']['View_change_status']['View_No'] == expected_view_no for n in other_nodes: assert n._info_tool.info['Node_info']['View_change_status']['Last_complete_view_no'] == expected_view_no
def testNodePortChanged(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1, stewardWallet, nodeThetaAdded): """ An running node's port is changed """ newSteward, newStewardWallet, newNode = nodeThetaAdded nodeNewHa = genHa(1) new_port = nodeNewHa.port node_ha = txnPoolNodeSet[0].nodeReg[newNode.name] cli_ha = txnPoolNodeSet[0].cliNodeReg[newNode.name + CLIENT_STACK_SUFFIX] node_data = { ALIAS: newNode.name, NODE_PORT: new_port, NODE_IP: node_ha.host, CLIENT_PORT: cli_ha.port, CLIENT_IP: cli_ha.host, } node = updateNodeDataAndReconnect(looper, newSteward, newStewardWallet, newNode, node_data, tdirWithPoolTxns, tconf, txnPoolNodeSet) waitNodeDataEquality(looper, node, *txnPoolNodeSet[:-1]) ensureClientConnectedToNodesAndPoolLedgerSame(looper, steward1, *txnPoolNodeSet) ensureClientConnectedToNodesAndPoolLedgerSame(looper, newSteward, *txnPoolNodeSet)
def test_integration_setup_last_ordered_after_catchup(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_wallet_client, sdk_pool_handle, tdir, tconf, allPluginsPath): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) _, new_node = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup']) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) looper.run(eventually(replicas_synced, new_node)) for node in txnPoolNodeSet: for replica in node.replicas.values(): assert replica.last_ordered_3pc == (0, 4) if not replica.isMaster: assert get_count( replica._ordering_service, replica._ordering_service._request_three_phase_msg) == 0
def test_catchup_with_reask_cp(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): ''' Start a catchup Delay ConsistencyProofs twice Check that the catchup finished ''' lagged_node = txnPoolNodeSet[-1] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) with delay_rules_without_processing( lagged_node.nodeIbStasher, delay_3pc(), msg_rep_delay(types_to_delay=[COMMIT])): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) lagged_node.nodeIbStasher.drop_delayeds() with delay_rules_without_processing(lagged_node.nodeIbStasher, cpDelay()): lagged_node.start_catchup() def chk(): cp_count = 0 for msg in lagged_node.nodeIbStasher.delayeds: if isinstance(msg.item[0], ConsistencyProof): cp_count += 1 assert cp_count >= (len(txnPoolNodeSet) - 1) * 2 lagged_node.nodeIbStasher.drop_delayeds() looper.run(eventually(chk)) waitNodeDataEquality(looper, lagged_node, *txnPoolNodeSet, exclude_from_check=['check_last_ordered_3pc_backup'])
def testNodeRejectingInvalidTxns(tconf, txnPoolNodeSet, patched_node, nodeCreatedAfterSomeTxns): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes replies with incorrect transactions. The newly joined node detects that and rejects the transactions and thus blacklists the node. Ii thus cannot complete the process till the timeout and then requests the missing transactions. """ looper, newNode, client, wallet, _, _ = nodeCreatedAfterSomeTxns bad_node = patched_node do_not_tell_clients_about_newly_joined_node(txnPoolNodeSet) logger.debug('Catchup request processor of {} patched'.format(bad_node)) looper.run(checkNodesConnected(txnPoolNodeSet)) # catchup #1 -> CatchupTransactionsTimeout -> catchup #2 catchup_timeout = waits.expectedPoolCatchupTime(len(txnPoolNodeSet) + 1) timeout = 2 * catchup_timeout + tconf.CatchupTransactionsTimeout # have to skip seqno_db check because the txns are not executed # on the new node waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1], customTimeout=timeout) assert newNode.isNodeBlacklisted(bad_node.name)
def testNodeDoesNotParticipateUntilCaughtUp(txnPoolNodeSet, nodes_slow_to_process_catchup_reqs, nodeCreatedAfterSomeTxns): """ A new node that joins after some transactions should stash new transactions until it has caught up :return: """ looper, new_node, client, wallet, newStewardClient, newStewardWallet = \ nodeCreatedAfterSomeTxns txnPoolNodeSet.append(new_node) old_nodes = txnPoolNodeSet[:-1] sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) chk_commits_prepares_recvd(0, old_nodes, new_node) for node in old_nodes: node.reset_delays_and_process_delayeds() timeout = waits.expectedPoolCatchupTime(len(txnPoolNodeSet)) + \ catchup_delay + \ waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=timeout) waitNodeDataEquality(looper, new_node, *old_nodes) sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 2) # Commits and Prepares are received by all old nodes with pytest.raises(AssertionError): # Since nodes discard 3PC messages for already ordered requests. chk_commits_prepares_recvd(0, old_nodes, new_node) waitNodeDataEquality(looper, new_node, *old_nodes)
def add_started_node(looper, new_node, node_ha, client_ha, txnPoolNodeSet, client_tdir, stewardClient, stewardWallet, sigseed, bls_key): ''' Adds already created node to the pool, that is sends NODE txn. Makes sure that node is actually added and connected to all otehr nodes. ''' newSteward, newStewardWallet = addNewSteward(looper, client_tdir, stewardClient, stewardWallet, "Steward" + new_node.name, clientClass=TestClient) node_name = new_node.name send_new_node_txn(sigseed, node_ha[0], node_ha[1], client_ha[0], client_ha[1], bls_key, node_name, newSteward, newStewardWallet) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureClientConnectedToNodesAndPoolLedgerSame(looper, newSteward, *txnPoolNodeSet) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
def testNodeRequestingConsProof(tconf, txnPoolNodeSet, sdk_node_created_after_some_txns_not_started): """ All of the 4 old nodes delay the processing of LEDGER_STATUS from the newly joined node while they are processing requests which results in them sending consistency proofs which are not same so that the newly joined node cannot conclude about the state of transactions in the system. So the new node requests consistency proof for a particular range from all nodes. """ looper, new_node, sdk_pool_handle, new_steward_wallet_handle = sdk_node_created_after_some_txns_not_started # So nodes wont tell the clients about the newly joined node so they # dont send any request to the newly joined node for node in txnPoolNodeSet: node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node) # The new node sends different ledger statuses to every node so it # does not get enough similar consistency proofs next_size = 0 origMethod = new_node.build_ledger_status def build_broken_ledger_status(self, ledger_id): nonlocal next_size if ledger_id != DOMAIN_LEDGER_ID: return origMethod(ledger_id) size = self.domainLedger.size next_size = next_size + 1 if next_size < size else 1 print("new size {}".format(next_size)) newRootHash = Ledger.hashToStr( self.domainLedger.tree.merkle_tree_hash(0, next_size)) three_pc_key = self.three_phase_key_for_txn_seq_no(ledger_id, next_size) v, p = three_pc_key if three_pc_key else None, None ledgerStatus = LedgerStatus(1, next_size, v, p, newRootHash, CURRENT_PROTOCOL_VERSION) print("dl status {}".format(ledgerStatus)) return ledgerStatus new_node.build_ledger_status = types.MethodType( build_broken_ledger_status, new_node) logger.debug( 'Domain Ledger status sender of {} patched'.format(new_node)) looper.add(new_node) txnPoolNodeSet.append(new_node) sdk_send_random_requests(looper, sdk_pool_handle, new_steward_wallet_handle, 10) # wait more than `ConsistencyProofsTimeout` # TODO: apply configurable timeout here # `ConsistencyProofsTimeout` is set to 60 sec, so need to wait more than # 60 sec, hence large timeout. Dont reduce it. waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=75) # Other nodes should have received a request for `CONSISTENCY_PROOF` and # processed it. for node in txnPoolNodeSet[:-1]: assert count_msg_reqs_of_type(node, CONSISTENCY_PROOF) > 0, node
def test_fill_ts_store_after_catchup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) node_to_disconnect = txnPoolNodeSet[-1] disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) sdk_replies = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) node_to_disconnect = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet, exclude_from_check=['check_last_ordered_3pc_backup']) req_handler = node_to_disconnect.read_manager.request_handlers[GET_BUY] for reply in sdk_replies: key = BuyHandler.prepare_buy_key(get_from(reply[1]['result']), get_req_id(reply[1]['result'])) root_hash = req_handler.database_manager.ts_store.get_equal_or_prev( get_txn_time(reply[1]['result'])) assert root_hash from_state = req_handler.state.get_for_root_hash(root_hash=root_hash, key=key) assert domain_state_serializer.deserialize(from_state)['amount'] == \ get_payload_data(reply[1]['result'])['amount']
def test_node_load_after_add(sdk_new_node_caught_up, txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, capsys): """ A node that restarts after some transactions should eventually get the transactions which happened while it was down :return: """ new_node = sdk_new_node_caught_up logger.debug("Sending requests") # Here's where we apply some load client_batches = 300 txns_per_batch = 25 for i in range(client_batches): s = perf_counter() sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, txns_per_batch) with capsys.disabled(): print('{} executed {} client txns in {:.2f} seconds'. format(i + 1, txns_per_batch, perf_counter() - s)) logger.debug("Starting the stopped node, {}".format(new_node)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4])
def test_new_node_accepts_chosen_primary( txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns): looper, new_node, sdk_pool_handle, new_steward_wallet_handle = sdk_node_set_with_node_added_after_some_txns logger.debug("Ensure nodes data equality".format(txnPoolNodeSet[0].viewNo)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # here we must have view_no = 4 # - current primary is Alpha (based on node registry before new node joined) # - but new node expects itself as primary basing # on updated node registry # -> new node doesn't verify current primary assert not new_node.view_changer._primary_verified # -> new node haven't received ViewChangeDone from the expected primary # (self VCHD message is registered when node sends it, not the case # for primary propagate logic) assert not new_node.view_changer.has_view_change_from_primary # -> BUT new node understands that no view change actually happens assert new_node.view_changer._is_propagated_view_change_completed logger.debug("Send requests to ensure that pool is working properly, " "viewNo: {}".format(txnPoolNodeSet[0].viewNo)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet_handle, 3) logger.debug("Ensure nodes data equality".format(txnPoolNodeSet[0].viewNo)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
def add_new_node(looper, nodes, sdk_pool_handle, sdk_wallet_steward, tdir, client_tdir, tconf, all_plugins_path, name=None): node_name = name or "Psi" new_steward_name = "testClientSteward" + randomString(3) _, new_node = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, new_steward_name, node_name, tdir, tconf, allPluginsPath=all_plugins_path) nodes.append(new_node) looper.run(checkNodesConnected(nodes)) timeout = waits.expectedPoolCatchupTime(nodeCount=len(nodes)) waitNodeDataEquality(looper, new_node, *nodes[:-1], customTimeout=timeout) sdk_pool_refresh(looper, sdk_pool_handle) return new_node
def test_fill_ts_store_after_catchup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) node_to_disconnect = txnPoolNodeSet[-1] disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect, stopNode=False) looper.runFor(2) sdk_replies = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, node_to_disconnect) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet) req_handler = node_to_disconnect.getDomainReqHandler() for reply in sdk_replies: key = req_handler.prepare_buy_key(reply[1]['result']['identifier'], reply[1]['result']['reqId']) root_hash = req_handler.ts_store.get_equal_or_prev( reply[1]['result']['txnTime']) assert root_hash from_state = req_handler.state.get_for_root_hash(root_hash=root_hash, key=key) assert req_handler.stateSerializer.deserialize(from_state)['amount'] == \ reply[1]['result']['amount']
def test_number_txns_in_catchup_and_vc_queue_valid(looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward): num_txns = 5 master_node = get_master_primary_node(txnPoolNodeSet) old_view = master_node.viewNo expected_view_no = old_view + 1 disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, master_node, stopNode=False) looper.run( eventually(checkViewNoForNodes, txnPoolNodeSet[1:], expected_view_no, retryWait=1, timeout=tconf.VIEW_CHANGE_TIMEOUT)) sdk_pool_refresh(looper, sdk_pool_handle) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, num_txns) reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, master_node) waitNodeDataEquality(looper, master_node, *txnPoolNodeSet[-1:]) latest_info = master_node._info_tool.info assert latest_info['Node_info']['Catchup_status'][ 'Number_txns_in_catchup'][1] == num_txns assert latest_info['Node_info']['View_change_status'][ 'View_No'] == expected_view_no node_names = [n.name for n in txnPoolNodeSet[1:]] for node_name in node_names: assert latest_info['Node_info']['View_change_status']['VCDone_queue'][ node_name][0] == master_node.master_primary_name assert latest_info['Node_info']['View_change_status']['VCDone_queue'][ node_name][1] assert latest_info['Node_info']['View_change_status'][ 'Last_complete_view_no'] == expected_view_no
def sdk_change_bls_key(looper, txnPoolNodeSet, node, sdk_pool_handle, sdk_wallet_steward, add_wrong=False, new_bls=None): new_blspk = init_bls_keys(node.keys_dir, node.name) key_in_txn = new_bls or new_blspk \ if not add_wrong \ else base58.b58encode(randomString(128).encode()).decode("utf-8") node_dest = hexToFriendly(node.nodestack.verhex) sdk_send_update_node(looper, sdk_wallet_steward, sdk_pool_handle, node_dest, node.name, None, None, None, None, bls_key=key_in_txn, services=None) poolSetExceptOne = list(txnPoolNodeSet) poolSetExceptOne.remove(node) waitNodeDataEquality(looper, node, *poolSetExceptOne) sdk_pool_refresh(looper, sdk_pool_handle) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) return new_blspk
def sdk_new_node_caught_up(txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns): looper, new_node, _, _ = sdk_node_set_with_node_added_after_some_txns waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4]) check_last_3pc_master(new_node, txnPoolNodeSet[:4]) # Check if catchup done once catchup_done_once = True for li in new_node.ledgerManager.ledgerRegistry.values(): catchup_done_once = catchup_done_once and (li.num_txns_caught_up > 0) if not catchup_done_once: # It might be the case that node has to do catchup again, in that case # check the return value of `num_txns_caught_up_in_last_catchup` to be # greater than 0 assert max( getAllReturnVals( new_node, new_node.num_txns_caught_up_in_last_catchup)) > 0 for li in new_node.ledgerManager.ledgerRegistry.values(): assert not li.receivedCatchUpReplies assert not li.recvdCatchupRepliesFrm return new_node
def sdk_one_node_added(looper, txnPoolNodeSet, sdk_node_theta_added): # New node knows primary same primary as others and has rank greater # than others _, new_node = sdk_node_theta_added waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) check_newly_added_nodes(looper, txnPoolNodeSet, [new_node]) return new_node
def testChangeHaPersistsPostNodesRestart(looper, txnPoolNodeSet, tdir, tconf, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward): new_steward_wallet, new_node = \ sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, 'AnotherSteward' + randomString(4), 'AnotherNode' + randomString(4), tdir, tconf) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_pool_refresh(looper, sdk_pool_handle) node_new_ha, client_new_ha = genHa(2) logger.debug("{} changing HAs to {} {}".format(new_node, node_new_ha, client_new_ha)) # Making the change HA txn an confirming its succeeded node_dest = hexToFriendly(new_node.nodestack.verhex) sdk_send_update_node(looper, new_steward_wallet, sdk_pool_handle, node_dest, new_node.name, node_new_ha.host, node_new_ha.port, client_new_ha.host, client_new_ha.port) # Stopping existing nodes for node in txnPoolNodeSet: node.stop() looper.removeProdable(node) # Starting nodes again by creating `Node` objects since that simulates # what happens when starting the node with script restartedNodes = [] for node in txnPoolNodeSet[:-1]: config_helper = PNodeConfigHelper(node.name, tconf, chroot=tdir) restartedNode = TestNode(node.name, config_helper=config_helper, config=tconf, ha=node.nodestack.ha, cliha=node.clientstack.ha) looper.add(restartedNode) restartedNodes.append(restartedNode) # Starting the node whose HA was changed config_helper = PNodeConfigHelper(new_node.name, tconf, chroot=tdir) node = TestNode(new_node.name, config_helper=config_helper, config=tconf, ha=node_new_ha, cliha=client_new_ha) looper.add(node) restartedNodes.append(node) looper.run(checkNodesConnected(restartedNodes)) waitNodeDataEquality(looper, node, *restartedNodes[:-1]) sdk_pool_refresh(looper, sdk_pool_handle) sdk_ensure_pool_functional(looper, restartedNodes, sdk_wallet_client, sdk_pool_handle)
def test_discard_3PC_messages_for_already_ordered(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ Nodes discard any 3PC messages for already ordered 3PC keys (view_no, pp_seq_no). Delay all 3PC messages to a node so it cannot respond to them unless the other nodes order them, now when the slow node will get them it will respond but other nodes will not process them and discard them """ slow_node = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)][-1] other_nodes = [n for n in txnPoolNodeSet if n != slow_node] delay = 20 delay_3pc_messages([slow_node], 0, delay) delay_3pc_messages([slow_node], 1, delay) sent_batches = 3 sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=2 * sent_batches, num_batches=sent_batches) # send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, # 2 * sent_batches, sent_batches) def chk(node, inst_id, p_count, c_count): # A node will still record PREPRAREs even if more than n-f-1, till the # request is not ordered assert len(node.replicas[inst_id].prepares) >= p_count assert len(node.replicas[inst_id].commits) == c_count def count_discarded(inst_id, count): for node in other_nodes: assert countDiscarded(node.replicas[inst_id], 'already ordered 3 phase message') == count # `slow_node` did not receive any PREPAREs or COMMITs chk(slow_node, 0, 0, 0) # `other_nodes` have not discarded any 3PC message count_discarded(0, 0) # `other_nodes` have not recorded any PREPAREs or COMMITs from `slow_node` chk_commits_prepares_recvd(0, other_nodes, slow_node) slow_node.reset_delays_and_process_delayeds() waitNodeDataEquality(looper, slow_node, *other_nodes) # `slow_node` did receive correct number of PREPAREs and COMMITs looper.run(eventually(chk, slow_node, 0, sent_batches - 1, sent_batches, retryWait=1)) # `other_nodes` have not recorded any PREPAREs or COMMITs from `slow_node` chk_commits_prepares_recvd(0, other_nodes, slow_node) # `other_nodes` have discarded PREPAREs and COMMITs all batches count_discarded(0, 2 * sent_batches)
def test_slow_nodes_catchup_before_selecting_primary_in_new_view( tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, one_node_added): """ Delay 3PC messages to one node and view change messages to some others (including primary) so the node that does not receive enough 3PC messages is behind but learns of the view change quickly and starts catchup. Other nodes learn of the view change late and thus keep on processing requests """ new_node = one_node_added nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)] primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node slow_node = nprs[-1] # nodes_slow_to_inst_chg = [primary_node] + nprs[:2] nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node] delay_3pc = 100 delay_ic = 5 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) delay_3pc_messages([slow_node], 0, delay_3pc) for n in nodes_slow_to_inst_chg: n.nodeIbStasher.delay(icDelay(delay_ic)) def start_count(): return sum([1 for e in slow_node.ledgerManager.spylog.getAll( slow_node.ledgerManager.startCatchUpProcess.__name__) if e.params['ledgerId'] == DOMAIN_LEDGER_ID]) s = start_count() requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 10 * Max3PCBatchSize) ensure_view_change(looper, nodes=txnPoolNodeSet, exclude_from_check=nodes_slow_to_inst_chg) sdk_get_and_check_replies(looper, requests) waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1]) e = start_count() assert e - s >= 2 looper.run(eventually(checkViewNoForNodes, slow_node.viewNo)) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg)
def test_node_catchup_after_checkpoints( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, broken_node_and_others): """ A node misses 3pc messages and checkpoints during some period but later it stashes some amount of checkpoints and decides to catchup. """ max_batch_size = chkFreqPatched.Max3PCBatchSize broken_node, other_nodes = broken_node_and_others logger.info("Step 1: The node misses quite a lot of requests") send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint + max_batch_size) waitNodeDataInequality(looper, broken_node, *other_nodes) logger.info( "Step 2: The node gets requests but cannot process them because of " "missed ones. But the nodes eventually stashes some amount checkpoints " "after that the node starts catch up") repaired_node = repair_broken_node(broken_node) completed_catchups_before = get_number_of_completed_catchups(broken_node) send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * reqs_for_checkpoint - max_batch_size) waitNodeDataEquality(looper, repaired_node, *other_nodes) # Note that the repaired node might not fill the gap of missed 3PC-messages # by requesting them from other nodes because these 3PC-batches start from # an already stabilized checkpoint, so a part of these 3PC-messages are # already unavailable # Verify that a catch-up was done completed_catchups_after = get_number_of_completed_catchups(repaired_node) assert completed_catchups_after > completed_catchups_before logger.info("Step 3: Check if the node is able to process requests") send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint + max_batch_size) waitNodeDataEquality(looper, repaired_node, *other_nodes)
def test_view_change_done_delayed(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ A node is slow so is behind other nodes, after view change, it catches up but it also gets view change message as delayed, a node should start participating only when caught up and ViewChangeCone quorum received. """ nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)] slow_node = nprs[-1] other_nodes = [n for n in txnPoolNodeSet if n != slow_node] delay_3pc = 10 delay_vcd = 25 delay_3pc_messages([slow_node], 0, delay_3pc) slow_node.nodeIbStasher.delay(vcd_delay(delay_vcd)) def chk(node): assert node.view_changer.has_acceptable_view_change_quorum assert node.view_changer._primary_verified assert node.isParticipating assert None not in {r.isPrimary for r in node.replicas.values()} sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5 * 4, 4) ensure_view_change(looper, nodes=txnPoolNodeSet) # After view change, the slow node successfully completes catchup waitNodeDataEquality(looper, slow_node, *other_nodes) # Other nodes complete view change, select primary and participate for node in other_nodes: looper.run(eventually(chk, node, retryWait=1)) # Since `ViewChangeCone` is delayed, slow_node is not able to select primary # and participate assert not slow_node.view_changer.has_acceptable_view_change_quorum assert not slow_node.view_changer._primary_verified assert not slow_node.isParticipating assert {r.isPrimary for r in slow_node.replicas.values()} == {None} # Send requests to make sure pool is functional sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) # Repair network slow_node.reset_delays_and_process_delayeds() # `slow_node` selects primary and participate looper.run(eventually(chk, slow_node, retryWait=1)) # Processes requests received during lack of primary waitNodeDataEquality(looper, slow_node, *other_nodes) # Send more requests and compare data of all nodes sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_catchup_with_lost_first_consistency_proofs(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath, monkeypatch, lost_count): '''Skip processing of first lost_count CONSISTENCY_PROOFs in catchup. In this case catchup node has no quorum with f+1 CONSISTENCY_PROOFs for the longer transactions list. It need to request CONSISTENCY_PROOFs again and finishes catchup. Test makes sure that the node eventually finishes catchup''' node_to_disconnect = txnPoolNodeSet[-1] def unpatch_after_call(proof, frm): global call_count call_count += 1 if call_count >= lost_count: # unpatch processConsistencyProof after lost_count calls monkeypatch.undo() call_count = 0 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) # restart node disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) nodeHa, nodeCHa = HA(*node_to_disconnect.nodestack.ha), HA( *node_to_disconnect.clientstack.ha) config_helper = PNodeConfigHelper(node_to_disconnect.name, tconf, chroot=tdir) node_to_disconnect = TestNode(node_to_disconnect.name, config_helper=config_helper, config=tconf, ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) # patch processConsistencyProof monkeypatch.setattr(node_to_disconnect.ledgerManager, 'processConsistencyProof', unpatch_after_call) # add node_to_disconnect to pool looper.add(node_to_disconnect) txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet)
def testNodeCatchupFPlusOne(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, tdirWithPoolTxns, allPluginsPath, testNodeClass): """ Check that f+1 nodes is enough for catchup """ assert len(txnPoolNodeSet) == 4 node1 = txnPoolNodeSet[-1] node0 = txnPoolNodeSet[-2] logger.debug("Stopping node0 with pool ledger size {}". format(node0.poolManager.txnSeqNo)) disconnect_node_and_ensure_disconnected( looper, txnPoolNodeSet, node0, stopNode=True) looper.removeProdable(node0) logger.debug("Sending requests") sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) logger.debug("Stopping node1 with pool ledger size {}". format(node1.poolManager.txnSeqNo)) disconnect_node_and_ensure_disconnected( looper, txnPoolNodeSet, node1, stopNode=True) looper.removeProdable(node1) # Make sure new node got out of sync # Excluding state check since the node is stopped hence the state db is closed waitNodeDataInequality(looper, node0, *txnPoolNodeSet[:-2], exclude_from_check=['check_state']) # TODO: Check if the node has really stopped processing requests? logger.debug("Starting the stopped node0") nodeHa, nodeCHa = HA(*node0.nodestack.ha), HA(*node0.clientstack.ha) config_helper = PNodeConfigHelper(node0.name, tconf, chroot=tdir) node0 = testNodeClass(node0.name, config_helper=config_helper, ha=nodeHa, cliha=nodeCHa, config=tconf, pluginPaths=allPluginsPath) looper.add(node0) logger.debug("Waiting for the node0 to catch up") waitNodeDataEquality(looper, node0, *txnPoolNodeSet[:-2]) logger.debug("Sending more requests") sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) checkNodeDataForEquality(node0, *txnPoolNodeSet[:-2])
def test_lag_size_for_catchup( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Verifies that if the stored own checkpoints have aligned bounds then the master replica lag which makes the node perform catch-up is Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1 quorumed stashed received checkpoints. """ slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node other_nodes = [n for n in txnPoolNodeSet if n != slow_node] # The master replica of the slow node stops to receive 3PC-messages slow_node.master_replica.threePhaseRouter.extend( ( (PrePrepare, lambda *x, **y: None), (Prepare, lambda *x, **y: None), (Commit, lambda *x, **y: None), ) ) completed_catchups_before_reqs = get_number_of_completed_catchups(slow_node) # Send requests for the slow node's master replica to get # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP quorumed stashed checkpoints # from others send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP * reqs_for_checkpoint) # Give time for the slow node to catch up if it is going to do it looper.runFor(waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) + waits.expectedPoolCatchupTime(len(txnPoolNodeSet))) checkNodeDataForInequality(slow_node, *other_nodes) # Verify that the slow node has not caught up assert get_number_of_completed_catchups(slow_node) == completed_catchups_before_reqs # Send more requests for the slow node's master replica to reach # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1 quorumed stashed # checkpoints from others send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint) waitNodeDataEquality(looper, slow_node, *other_nodes) # Verify that the slow node has caught up assert get_number_of_completed_catchups(slow_node) > completed_catchups_before_reqs
def test_catchup_with_lost_ledger_status(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath, monkeypatch, lost_count): '''Skip processing of lost_count Message Responses with LEDGER STATUS in catchup; test makes sure that the node eventually finishes catchup''' node_to_disconnect = txnPoolNodeSet[-1] def unpatch_after_call(status, frm): global call_count call_count += 1 if call_count >= lost_count: # unpatch processLedgerStatus after lost_count calls monkeypatch.undo() call_count = 0 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) # restart node disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) nodeHa, nodeCHa = HA(*node_to_disconnect.nodestack.ha), HA( *node_to_disconnect.clientstack.ha) config_helper = PNodeConfigHelper(node_to_disconnect.name, tconf, chroot=tdir) node_to_disconnect = TestNode(node_to_disconnect.name, config_helper=config_helper, config=tconf, ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) # patch processLedgerStatus monkeypatch.setattr(node_to_disconnect.ledgerManager, 'processLedgerStatus', unpatch_after_call) # add node_to_disconnect to pool looper.add(node_to_disconnect) txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet)
def test_nodes_maintain_master_txn_3PC_map(looper, txnPoolNodeSet, pre_check, sdk_node_created_after_some_txns): _, new_node, sdk_pool_handle, new_steward_wallet_handle = \ sdk_node_created_after_some_txns txnPoolNodeSet.append(new_node) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4]) # Check the new node has set same `last_3pc_ordered` for master as others check_last_3pc_master(new_node, txnPoolNodeSet[:4]) chk_if_equal_txn_to_3pc(txnPoolNodeSet[:4]) # Requests still processed sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet_handle, 2) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4])
def _restart_node(looper, txnPoolNodeSet, node_to_disconnect, tconf, tdir, allPluginsPath): idx = txnPoolNodeSet.index(node_to_disconnect) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) # add node_to_disconnect to pool node_to_disconnect = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[idx] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet)