def do_view_change_with_propagate_primary_on_one_delayed_node( slow_node, nodes, looper, sdk_pool_handle, sdk_wallet_client): slow_stasher = slow_node.nodeIbStasher fast_nodes = [n for n in nodes if n != slow_node] stashers = [n.nodeIbStasher for n in nodes] # Get last prepared certificate in pool lpc = last_prepared_certificate(nodes) # Get pool current view no view_no = lpc[0] with delay_rules(slow_stasher, icDelay()): with delay_rules(slow_stasher, vcd_delay()): with delay_rules(stashers, cDelay()): # Send request request = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) # Wait until this request is prepared on N-f nodes looper.run(eventually(check_last_prepared_certificate_on_quorum, nodes, (lpc[0], lpc[1] + 1))) # Trigger view change for n in nodes: n.view_changer.on_master_degradation() # Wait until view change is completed on all nodes except slow one waitForViewChange(looper, fast_nodes, expectedViewNo=view_no + 1, customTimeout=waits.expectedPoolViewChangeStartedTimeout(len(nodes))) wait_for_elections_done_on_given_nodes(looper, fast_nodes, getRequiredInstances(len(nodes)), timeout=waits.expectedPoolElectionTimeout(len(nodes))) # Now all the nodes receive Commits # The slow node will accept Commits and order the 3PC-batch in the old view looper.runFor(waits.expectedOrderingTime(getNoInstances(len(nodes)))) # Now slow node receives ViewChangeDones waitForViewChange(looper, [slow_node], expectedViewNo=view_no + 1, customTimeout=waits.expectedPoolViewChangeStartedTimeout(len(nodes))) wait_for_elections_done_on_given_nodes(looper, [slow_node], getRequiredInstances(len(nodes)), timeout=waits.expectedPoolElectionTimeout(len(nodes))) # Now slow node receives InstanceChanges but discards them because already # started propagate primary to the same view. # Finish request gracefully sdk_get_reply(looper, request)
def testMultipleInstanceChangeMsgsMarkNodeAsSuspicious(looper, txnPoolNodeSet): maliciousNode = txnPoolNodeSet[0] for i in range(0, 5): maliciousNode.send(maliciousNode.view_changer._create_instance_change_msg(i, 0)) def chk(instId): for node in txnPoolNodeSet: if node.name != maliciousNode.name: args = getAllArgs(node, ViewChanger.process_instance_change_msg) assert len(args) == 5 for arg in args: assert arg['frm'] == maliciousNode.name numOfNodes = len(txnPoolNodeSet) instanceChangeTimeout = waits.expectedPoolViewChangeStartedTimeout( numOfNodes) for i in range(0, 5): looper.run(eventually(chk, i, retryWait=1, timeout=instanceChangeTimeout)) def g(): for node in txnPoolNodeSet: if node.name != maliciousNode.name: frm, reason, code = getAllArgs(node, Node.reportSuspiciousNode) assert frm == maliciousNode.name assert isinstance(reason, SuspiciousNode) suspectingNodes = \ getNodeSuspicions(node, Suspicions.FREQUENT_INST_CHNG.code) assert len(suspectingNodes) == 13 timeout = waits.expectedTransactionExecutionTime(numOfNodes) looper.run(eventually(g, retryWait=1, timeout=timeout))
def testDiscardInstChngMsgFrmPastView(txnPoolNodeSet, looper, ensureView): """ Once a view change is done, any further INSTANCE_CHANGE messages for that view must be discarded by the node. """ curViewNo = ensureView # Send an instance change for an old instance message to all nodes icMsg = txnPoolNodeSet[0].view_changer._create_instance_change_msg(curViewNo, 0) txnPoolNodeSet[0].send(icMsg) # ensure every node but Alpha discards the invalid instance change request timeout = waits.expectedPoolViewChangeStartedTimeout(len(txnPoolNodeSet)) # Check that that message is discarded. looper.run(eventually(checkDiscardMsg, txnPoolNodeSet, icMsg, 'which is not more than its view no', txnPoolNodeSet[0], timeout=timeout)) waitForViewChange(looper, txnPoolNodeSet)
def testDiscardInstChngMsgFrmPastView(nodeSet, looper, ensureView): """ Once a view change is done, any further INSTANCE_CHANGE messages for that view must be discarded by the node. """ curViewNo = ensureView # Send an instance change for an old instance message to all nodes icMsg = nodeSet.Alpha.view_changer._create_instance_change_msg(curViewNo, 0) nodeSet.Alpha.send(icMsg) # ensure every node but Alpha discards the invalid instance change request timeout = waits.expectedPoolViewChangeStartedTimeout(len(nodeSet)) # Check that that message is discarded. looper.run(eventually(checkDiscardMsg, nodeSet, icMsg, 'which is not more than its view no', nodeSet.Alpha, timeout=timeout)) waitForViewChange(looper, nodeSet)
def ensure_view_change(looper, nodes, exclude_from_check=None, custom_timeout=None): """ This method patches the master performance check to return False and thus ensures that all given nodes do a view change """ old_view_no = checkViewNoForNodes(nodes) old_meths = do_view_change(nodes) perf_check_freq = next(iter(nodes)).config.PerfCheckFreq timeout = custom_timeout or waits.expectedPoolViewChangeStartedTimeout( len(nodes)) + perf_check_freq nodes_to_check = nodes if exclude_from_check is None else [ n for n in nodes if n not in exclude_from_check] logger.debug('Checking view no for nodes {}'.format(nodes_to_check)) looper.run(eventually(checkViewNoForNodes, nodes_to_check, old_view_no + 1, retryWait=1, timeout=timeout)) revert_do_view_change(nodes, old_meths) return old_view_no + 1
def ensure_view_change_by_primary_restart( looper, nodes, tconf, tdirWithPoolTxns, allPluginsPath, customTimeout=None, exclude_from_check=None): """ This method stops current primary for a while to force a view change Returns new set of nodes """ old_view_no = checkViewNoForNodes(nodes) primaryNode = [node for node in nodes if node.has_master_primary][0] logger.debug("Disconnect current primary node {} from others, " "current viewNo {}".format(primaryNode, old_view_no)) disconnect_node_and_ensure_disconnected(looper, nodes, primaryNode, stopNode=True) looper.removeProdable(primaryNode) remainingNodes = list(set(nodes) - {primaryNode}) logger.debug("Waiting for viewNo {} for nodes {}" "".format(old_view_no + 1, remainingNodes)) timeout = customTimeout or waits.expectedPoolViewChangeStartedTimeout( len(remainingNodes)) + nodes[0].config.ToleratePrimaryDisconnection looper.run(eventually(checkViewNoForNodes, remainingNodes, old_view_no + 1, retryWait=1, timeout=timeout)) logger.debug("Starting stopped ex-primary {}".format(primaryNode)) restartedNode = start_stopped_node(primaryNode, looper, tconf, tdirWithPoolTxns, allPluginsPath, delay_instance_change_msgs=False) nodes = remainingNodes + [restartedNode] logger.debug("Ensure all nodes are connected") looper.run(checkNodesConnected(nodes)) logger.debug("Ensure all nodes have the same data") ensure_all_nodes_have_same_data(looper, nodes=nodes, exclude_from_check=exclude_from_check) return nodes
def ensure_view_change(looper, nodes, exclude_from_check=None, custom_timeout=None): """ This method patches the master performance check to return False and thus ensures that all given nodes do a view change """ old_view_no = checkViewNoForNodes(nodes) old_meths = {} view_changes = {} for node in nodes: old_meths[node.name] = node.monitor.isMasterDegraded view_changes[node.name] = node.monitor.totalViewChanges def slow_master(self): # Only allow one view change rv = self.totalViewChanges == view_changes[self.name] if rv: logger.info('{} making master look slow'.format(self)) return rv node.monitor.isMasterDegraded = types.MethodType( slow_master, node.monitor) perf_check_freq = next(iter(nodes)).config.PerfCheckFreq timeout = custom_timeout or waits.expectedPoolViewChangeStartedTimeout( len(nodes)) + perf_check_freq nodes_to_check = nodes if exclude_from_check is None else [ n for n in nodes if n not in exclude_from_check] logger.debug('Checking view no for nodes {}'.format(nodes_to_check)) looper.run(eventually(checkViewNoForNodes, nodes_to_check, old_view_no + 1, retryWait=1, timeout=timeout)) logger.debug('Patching back perf check for all nodes') for node in nodes: node.monitor.isMasterDegraded = old_meths[node.name] return old_view_no + 1
def testMultipleInstanceChangeMsgsMarkNodeAsSuspicious(looper, nodeSet, up): maliciousNode = nodeSet.Alpha for i in range(0, 5): maliciousNode.send( maliciousNode.view_changer._create_instance_change_msg(i, 0)) def chk(instId): for node in nodeSet: if node.name != maliciousNode.name: args = getAllArgs(node, ViewChanger.process_instance_change_msg) assert len(args) == 5 for arg in args: assert arg['frm'] == maliciousNode.name numOfNodes = len(nodeSet) instanceChangeTimeout = waits.expectedPoolViewChangeStartedTimeout( numOfNodes) for i in range(0, 5): looper.run( eventually(chk, i, retryWait=1, timeout=instanceChangeTimeout)) def g(): for node in nodeSet: if node.name != maliciousNode.name: frm, reason, code = getAllArgs(node, Node.reportSuspiciousNode) assert frm == maliciousNode.name assert isinstance(reason, SuspiciousNode) suspectingNodes = \ getNodeSuspicions(node, Suspicions.FREQUENT_INST_CHNG.code) assert len(suspectingNodes) == 13 timeout = waits.expectedTransactionExecutionTime(numOfNodes) looper.run(eventually(g, retryWait=1, timeout=timeout))
def test_restart_node_with_view_changes(tdir, tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, allPluginsPath): ''' 1. Stop the node Delta 2. Patch methods for processing VCStartMsgStrategy messages 3. Delay CurrentState messages on Delta 4. Start Delta 5. Start view change with a maser degradation reason (from view 0 to 1) 6. Check that Delta start VCStartMsgStrategy after quorum of InstanceChanges 7. Reset delay for CurrentStates 8. Check that propagate primary happened. 9. Unpatch VCStartMsgStrategy methods and process catching messages. 10. Start view change with a maser degradation reason (from view 1 to 2) 11. Check that all nodes has viewNo = 2 and can order transactions. ''' # Prepare nodes lagging_node = txnPoolNodeSet[-1] rest_nodes = txnPoolNodeSet[:-1] start_view_no = lagging_node.viewNo # Stop Delta waitNodeDataEquality(looper, lagging_node, *rest_nodes) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, lagging_node, stopNode=True) looper.removeProdable(lagging_node) # Send more requests to active nodes sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, len(rest_nodes) * 3) waitNodeDataEquality(looper, *rest_nodes) # Restart stopped node lagging_node = start_stopped_node( lagging_node, looper, tconf, tdir, allPluginsPath, start=False, ) # Add to lagging_node node route a patched method for processing # ViewChangeStartMessage to delay processing. global view_change_started_messages view_change_started_messages = [] def patch_on_view_change_started(node, msg, frm): view_change_started_messages.append((node, msg, frm)) processor = partial(patch_on_view_change_started, lagging_node) lagging_node.nodeMsgRouter.add((ViewChangeStartMessage, processor)) # Delay CurrentState messages on lagging_node to delay propagate primary with delay_rules(lagging_node.nodeIbStasher, msg_rep_delay()): # Add lagging_node to pool looper.add(lagging_node) txnPoolNodeSet[-1] = lagging_node looper.run(checkNodesConnected(txnPoolNodeSet)) looper.run( eventually(lambda: assertExp( len(lagging_node.nodeIbStasher.delayeds) >= 3))) # Start ViewChange (0 -> 1) for n in rest_nodes: n.view_changer.on_master_degradation() # Lagging node still did not catchup, so it can't participate and process I_CH looper.run( eventually( lambda: assertExp(len(view_change_started_messages) == 0))) # Lagging node catches up till old view looper.run( eventually(lambda: assertExp(lagging_node.viewNo == start_view_no))) # Unpatch ViewChangeStartMessages processing and process delayed messages processor = partial(VCStartMsgStrategy.on_view_change_started, lagging_node) lagging_node.nodeMsgRouter.add((ViewChangeStartMessage, processor)) for msg in view_change_started_messages: lagging_node.view_changer.node.nodeInBox.append( (msg[1], lagging_node.view_changer.node.name)) waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=start_view_no + 1, customTimeout=waits.expectedPoolViewChangeStartedTimeout( len(txnPoolNodeSet))) # Start ViewChange (1 -> 2) for n in rest_nodes: n.view_changer.on_master_degradation() waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=start_view_no + 2, customTimeout=waits.expectedPoolViewChangeStartedTimeout( len(txnPoolNodeSet))) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet, instances_list=range( txnPoolNodeSet[0].requiredNumberOfInstances)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) waitNodeDataEquality(looper, *txnPoolNodeSet)
def do_view_change_with_delayed_commits_and_node_restarts( fast_nodes, slow_nodes, nodes_to_restart, old_view_no, old_last_ordered, looper, sdk_pool_handle, sdk_wallet_client, tconf, tdir, all_plugins_path, wait_for_catchup=False): """ Delays commits without processing on `slow_nodes`, restarts `nodes_to_restart`, triggers view change, and confirms that view changed completed successfully and that the ledgers are consistent and in sync. :param fast_nodes: Nodes that will order the requests :param slow_nodes: Nodes whose commits will be delay, and that will not order the requests :param nodes_to_restart: Nodes that will be restarted :param old_view_no: View that we started from :param old_last_ordered: Last ordered 3pc txn before we did any requests :param wait_for_catchup: Should we wait for restarted nodes to finish catchup """ nodes = fast_nodes + slow_nodes slow_stashers = [slow_node.nodeIbStasher for slow_node in slow_nodes] # Delay commits on `slow_nodes` with delay_rules_without_processing(slow_stashers, cDelay()): request = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) # Check that all of the nodes except the slows one ordered the request looper.run( eventually(check_last_ordered, fast_nodes, (old_view_no, old_last_ordered[1] + 1))) looper.run(eventually(check_last_ordered, slow_nodes, old_last_ordered)) # Restart nodes for node in nodes_to_restart: disconnect_node_and_ensure_disconnected(looper, nodes, node, timeout=len(nodes_to_restart), stopNode=True) looper.removeProdable(node) nodes.remove(node) restarted_node = start_stopped_node(node, looper, tconf, tdir, all_plugins_path) nodes.append(restarted_node) looper.runFor(waits.expectedNodeStartUpTimeout()) looper.run(checkNodesConnected(nodes)) if wait_for_catchup: ensure_all_nodes_have_same_data(looper, nodes) # Trigger view change on all nodes for node in nodes: node.view_changer.on_master_degradation() assert len(nodes) == len(slow_nodes) + len(fast_nodes) # Assert that view change was successful and that ledger data is consistent waitForViewChange(looper, nodes, expectedViewNo=(old_view_no + 1), customTimeout=waits.expectedPoolViewChangeStartedTimeout( len(nodes))) ensureElectionsDone(looper=looper, nodes=nodes) ensure_all_nodes_have_same_data(looper, nodes) sdk_get_reply(looper, request) sdk_ensure_pool_functional(looper, nodes, sdk_wallet_client, sdk_pool_handle)
def do_view_change_with_propagate_primary_on_one_delayed_node( slow_node, nodes, looper, sdk_pool_handle, sdk_wallet_client): slow_stasher = slow_node.nodeIbStasher fast_nodes = [n for n in nodes if n != slow_node] stashers = [n.nodeIbStasher for n in nodes] # Get last prepared certificate in pool lpc = last_prepared_certificate(nodes) # Get pool current view no view_no = lpc[0] with delay_rules(slow_stasher, icDelay()): with delay_rules(slow_stasher, nv_delay()): with delay_rules(stashers, cDelay()): # Send request request = sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) # Wait until this request is prepared on N-f nodes looper.run( eventually(check_last_prepared_certificate_on_quorum, nodes, (lpc[0], lpc[1] + 1))) # Trigger view change for n in nodes: n.view_changer.on_master_degradation() # Wait until view change is completed on all nodes except slow one waitForViewChange( looper, fast_nodes, expectedViewNo=view_no + 1, customTimeout=waits.expectedPoolViewChangeStartedTimeout( len(nodes))) wait_for_elections_done_on_given_nodes( looper, fast_nodes, getRequiredInstances(len(nodes)), timeout=waits.expectedPoolElectionTimeout(len(nodes))) # Now all the nodes receive Commits # The slow node will accept Commits and order the 3PC-batch in the old view looper.runFor( waits.expectedOrderingTime(getNoInstances(len(nodes)))) # Now slow node receives NewView waitForViewChange(looper, [slow_node], expectedViewNo=view_no + 1, customTimeout=waits.expectedPoolViewChangeStartedTimeout( len(nodes))) wait_for_elections_done_on_given_nodes( looper, [slow_node], getRequiredInstances(len(nodes)), timeout=waits.expectedPoolElectionTimeout(len(nodes))) # Now slow node receives InstanceChanges but discards them because already # started propagate primary to the same view. # Finish request gracefully sdk_get_reply(looper, request)
def check_no_view_change(looper, node): looper.run(eventually(check_future_vcd_count, node, 3, timeout=expectedPoolViewChangeStartedTimeout(4))) try_view_change(looper, node) check_future_vcd_count(node, 3)
def check_no_view_change(looper, nodes): looper.run(eventually(check_stashed_instance_changes, nodes, 3, timeout=expectedPoolViewChangeStartedTimeout(len(nodes))))
def check_no_view_change(looper, node): looper.run(eventually(check_future_vcd_count, node, 3, timeout=expectedPoolViewChangeStartedTimeout(4))) try_view_change(looper, node) check_future_vcd_count(node, 3)
def check_no_view_change(looper, nodes): looper.run(eventually(check_instance_change_count, nodes, 3, timeout=expectedPoolViewChangeStartedTimeout(len(nodes)))) try_view_change(looper, nodes) check_instance_change_count(nodes, 3)