async def test_f_isolated_with_primary(self, bft_network, tracker): ''' Isolate f replicas including the primary and make sure the system is able to make progress ''' n = bft_network.config.n f = bft_network.config.f c = bft_network.config.c initial_primary = 0 expected_next_primary = 1 isolated_replicas = bft_network.random_set_of_replicas( f - 1, without={initial_primary, expected_next_primary}) isolated_replicas.add(initial_primary) bft_network.start_all_replicas() await trio.sleep(SKVBC_INIT_GRACE_TIME) await bft_network.init_preexec_count() with net.ReplicaSubsetIsolatingAdversary( bft_network, isolated_replicas) as adversary: read_client = bft_network.random_client() start_block = await tracker.get_last_block_id(read_client) adversary.interfere() await self.issue_tracked_ops_to_the_system(bft_network, tracker) await bft_network.wait_for_view( replica_id=expected_next_primary, expected=lambda v: v == expected_next_primary, err_msg="Make sure view change has been triggered.") await self.issue_tracked_ops_to_the_system(bft_network, tracker) last_block = await tracker.get_last_block_id(read_client) assert last_block > start_block
async def test_isolate_f_non_primaries_state_transfer(self, bft_network): """ In this test we isolate f replicas long enough for the unaffected replicas to trigger a checkpoint. Then, once the adversary is not active anymore, we make sure the isolated replicas catch up via state transfer. """ bft_network.start_all_replicas() skvbc = kvbc.SimpleKVBCProtocol(bft_network) f = bft_network.config.f curr_primary = await bft_network.get_current_primary() isolated_replicas = bft_network.random_set_of_replicas( f, without={curr_primary}) live_replicas = set( bft_network.all_replicas()) - set(isolated_replicas) # reach a checkpoint, despite the presence of an adversary with net.ReplicaSubsetIsolatingAdversary( bft_network, isolated_replicas) as adversary: adversary.interfere() await skvbc.fill_and_wait_for_checkpoint( initial_nodes=list(live_replicas), num_of_checkpoints_to_add=3, verify_checkpoint_persistency=False) # at this point the adversary is inactive, so the isolated replicas # should be able to catch-up via state transfer await bft_network.wait_for_state_transfer_to_start() # state transfer should complete on all isolated replicas for ir in isolated_replicas: await bft_network.wait_for_state_transfer_to_stop(0, ir)
async def send_concurrent_ops_with_isolated_replica( self, isolated_replica, run_duration): """ Sending concurrent operation while isolated replic is kept blocked """ clients = self.bft_network.get_all_clients() max_read_set_size = len(self.keys) read_version = 0 readset = self.readset(0, max_read_set_size) writeset = self.writeset(0, readset) total_run_time = time.time() + run_duration initial_nodes = self.bft_network.all_replicas(without=isolated_replica) with log.start_action( action_type="send_concurrent_ops_with_isolated_replica"): with trio.move_on_after(run_duration + 30): with net.ReplicaSubsetIsolatingAdversary( self.bft_network, isolated_replica) as adversary: adversary.interfere() while (time.time() < total_run_time): async with trio.open_nursery() as nursery: for client in clients: nursery.start_soon(self.send_kv_set, client, readset, writeset, read_version, False, False, False) await self.bft_network.wait_for_replicas_to_checkpoint( initial_nodes)
async def test_isolate_non_primaries_subset_with_view_change( self, bft_network, tracker): """ In this test we isolate f-1 replicas from the rest of the BFT network. We crash the primary and trigger view change while the f-1 replicas are still isolated. At this point we have a total of f unavailable replicas. The adversary is then deactivated and we make sure the previously isolated replicas activate the new view and correctly process incoming client requests. """ bft_network.start_all_replicas() f = bft_network.config.f initial_primary = await bft_network.get_current_primary() expected_next_primary = 1 + initial_primary isolated_replicas = bft_network.random_set_of_replicas( f - 1, without={initial_primary, expected_next_primary}) log.log_message( message_type= f'Isolating network traffic to/from replicas {isolated_replicas}.') with net.ReplicaSubsetIsolatingAdversary( bft_network, isolated_replicas) as adversary: adversary.interfere() bft_network.stop_replica(initial_primary) await self._send_random_writes(tracker) await bft_network.wait_for_view( replica_id=random.choice( bft_network.all_replicas( without={initial_primary}.union(isolated_replicas))), expected=lambda v: v == expected_next_primary, err_msg="Make sure view change has been triggered.") # waiting for the active window to be rebuilt after the view change await trio.sleep(seconds=5) # the adversary is not active anymore: # make sure the isolated replicas activate the new view for ir in isolated_replicas: await bft_network.wait_for_view( replica_id=ir, expected=lambda v: v == expected_next_primary, err_msg= f"Make sure isolated replica #{ir} works in new view {expected_next_primary}." ) # then make sure the isolated replicas participate in consensus & request execution await tracker.run_concurrent_ops(num_ops=50) expected_last_executed_seq_num = await bft_network.wait_for_last_executed_seq_num( replica_id=random.choice( bft_network.all_replicas( without={initial_primary}.union(isolated_replicas)))) for ir in isolated_replicas: await bft_network.wait_for_last_executed_seq_num( replica_id=ir, expected=expected_last_executed_seq_num)
async def test_alternate_f_isolated(self, bft_network, tracker): ''' Isolate f replicas and make sure the system is able to make progress. Then, isolate a different set of f replicas and make sure the system is able to make progress ''' n = bft_network.config.n f = bft_network.config.f c = bft_network.config.c initial_primary = 0 isolated_replicas_take_1 = bft_network.random_set_of_replicas( f, without={initial_primary}) bft_network.start_all_replicas() await trio.sleep(SKVBC_INIT_GRACE_TIME) await bft_network.init_preexec_count() read_client = bft_network.random_client() with net.ReplicaSubsetIsolatingAdversary( bft_network, isolated_replicas_take_1) as adversary: start_block = await tracker.get_last_block_id(read_client) adversary.interfere() await self.issue_tracked_ops_to_the_system(bft_network, tracker) last_block = await tracker.get_last_block_id(read_client) assert last_block > start_block isolated_replicas_take_1.add(initial_primary) isolated_replicas_take_2 = bft_network.random_set_of_replicas( f, without=isolated_replicas_take_1) with net.ReplicaSubsetIsolatingAdversary( bft_network, isolated_replicas_take_2) as adversary: start_block = await tracker.get_last_block_id(read_client) adversary.interfere() await self.issue_tracked_ops_to_the_system(bft_network, tracker) last_block = await tracker.get_last_block_id(read_client) assert last_block > start_block
async def test_checkpoint_propagation_after_f_non_primaries_isolated( self, bft_network): """ Here we isolate f non primary replicas, trigger a checkpoint, as well as verify checkpoint creation and propagation to isolated replicas after the adversary is gone. 1) Given a BFT network, make sure all nodes are up 2) Isolate f non primary replicas both from other replicas and clients 3) Send sufficient number of client requests to trigger checkpoint protocol 4) Make sure checkpoint is propagated to all the nodes """ bft_network.start_all_replicas() skvbc = kvbc.SimpleKVBCProtocol(bft_network) n = bft_network.config.n f = bft_network.config.f self.assertEqual(len(bft_network.procs), n, "Make sure all replicas are up initially.") current_primary = await bft_network.get_current_primary() checkpoint_before = await bft_network.wait_for_checkpoint( replica_id=current_primary) isolated_replicas = bft_network.random_set_of_replicas( f, without={current_primary}) with net.ReplicaSubsetIsolatingAdversary( bft_network, isolated_replicas) as adversary: adversary.interfere() await skvbc.fill_and_wait_for_checkpoint( initial_nodes=bft_network.all_replicas( without=isolated_replicas), checkpoint_num=1, verify_checkpoint_persistency=False) # verify checkpoint creation by all replicas except isolated replicas for replica in bft_network.all_replicas(without=isolated_replicas): checkpoint_after = await bft_network.wait_for_checkpoint( replica_id=replica) self.assertEqual(checkpoint_after, checkpoint_before + 1) # Once the adversary is gone, the isolated replicas should be able reach the checkpoint for isolated_replica in isolated_replicas: checkpoint_isolated = await bft_network.wait_for_checkpoint( replica_id=isolated_replica, expected_checkpoint_num=checkpoint_before + 1) self.assertEqual(checkpoint_isolated, checkpoint_before + 1)
async def test_state_transfer_isolated(self, bft_network): """ Test that a replica is working after being isolated and then catches up via state transfer. Isolate one node, add a bunch of data to the rest of the cluster, end the isolation of the node and verify state transfer works as expected. Stop f other nodes after state transfer completes and execute a request to ensure the isolated node still operates correctly. """ bft_network.start_all_replicas() skvbc = kvbc.SimpleKVBCProtocol(bft_network) n = bft_network.config.n f = bft_network.config.f self.assertEqual(len(bft_network.procs), n, "Make sure all replicas are up initially.") current_primary = await bft_network.get_current_primary() isolated_node = random.choice( bft_network.all_replicas(without={current_primary})) isolated_replicas = set([isolated_node]) with net.ReplicaSubsetIsolatingAdversary( bft_network, isolated_replicas) as adversary: adversary.interfere() # send sufficient number of client requests to trigger checkpoint protocol # verify checkpoint creation by all replicas except isolated replica await skvbc.fill_and_wait_for_checkpoint( initial_nodes=bft_network.all_replicas( without=isolated_replicas), num_of_checkpoints_to_add=3, verify_checkpoint_persistency=False) await bft_network.wait_for_state_transfer_to_start() await bft_network.wait_for_state_transfer_to_stop( current_primary, isolated_node) await skvbc.assert_successful_put_get(self) await bft_network.force_quorum_including_replica(isolated_node) # After stopping f other replicas we execute another request and if the isolated_node # fails to process it for any reason we won't have consensus. Thus we'll know it's # recovered correctly. await skvbc.assert_successful_put_get(self)
async def test_isolate_f_non_primaries_slow_path(self, bft_network, tracker): """ This test makes sure that a BFT network continues making progress (albeit on the slow path), despite the presence of an adversary that isolates f replicas. Once the adversary disappears, we check that the isolated replicas catch up with the others and correctly participate in consensus. Note: there is no state transfer in this test scenario, because the replica isolating adversary hasn't been active for long enough for the unaffected replicas to trigger a checkpoint. """ bft_network.start_all_replicas() f = bft_network.config.f curr_primary = await bft_network.get_current_primary() isolated_replicas = bft_network.random_set_of_replicas( f, without={curr_primary}) num_ops = 100 write_weight = 0.5 # make sure the presence of the adversary triggers the slow path # (because f replicas cannot participate in consensus) with net.ReplicaSubsetIsolatingAdversary( bft_network, isolated_replicas) as adversary: adversary.interfere() await tracker.run_concurrent_ops(num_ops=num_ops, write_weight=write_weight) await bft_network.wait_for_slow_path_to_be_prevalent( as_of_seq_num=1) # Once the adversary is gone, the disconnected replicas should be able # to resume their participation in consensus & request execution await tracker.run_concurrent_ops(num_ops=num_ops, write_weight=write_weight) last_executed_seq_num = await bft_network.wait_for_last_executed_seq_num( ) for ir in isolated_replicas: await bft_network.wait_for_last_executed_seq_num( replica_id=ir, expected=last_executed_seq_num)
async def test_f_isolated_non_primaries(self, bft_network, tracker): ''' Isolate f non primaries replicas and make sure the system is able to make progress ''' n = bft_network.config.n f = bft_network.config.f c = bft_network.config.c bft_network.start_all_replicas() with net.ReplicaSubsetIsolatingAdversary(bft_network, bft_network.random_set_of_replicas(f, without={0}))\ as adversary: read_client = bft_network.random_client() start_block = await tracker.get_last_block_id(read_client) adversary.interfere() await self.issue_tracked_ops_to_the_system(tracker) last_block = await tracker.get_last_block_id(read_client) assert last_block > start_block
async def test_checkpoint_propagation_after_f_nodes_including_primary_isolated( self, bft_network): """ Here we isolate f replicas including the primary, trigger a view change and then a checkpoint. We then verify checkpoint creation and propagation to isolated replicas after the adversary is gone. 1) Given a BFT network, make sure all nodes are up 2) Isolate f replicas including the primary both from other replicas and clients 3) Send a batch of write requests to trigger a view change 4) Send sufficient number of client requests to trigger checkpoint protocol 5) Make sure checkpoint is propagated to all the nodes in the new view """ bft_network.start_all_replicas() skvbc = kvbc.SimpleKVBCProtocol(bft_network) n = bft_network.config.n f = bft_network.config.f self.assertEqual(len(bft_network.procs), n, "Make sure all replicas are up initially.") initial_primary = await bft_network.get_current_primary() expected_next_primary = initial_primary + 1 checkpoint_before = await bft_network.wait_for_checkpoint( replica_id=initial_primary) isolated_replicas = bft_network.random_set_of_replicas( f - 1, without={initial_primary, expected_next_primary}) isolated_replicas.add(initial_primary) with net.ReplicaSubsetIsolatingAdversary( bft_network, isolated_replicas) as adversary: adversary.interfere() # send a batch of write requests to trigger view change await self._send_random_writes(skvbc) # verify view change has been triggered for all the non isolated nodes for replica in bft_network.all_replicas(without=isolated_replicas): current_view = await bft_network.wait_for_view( replica_id=replica, expected=lambda v: v == expected_next_primary, err_msg="Make sure view change has been triggered.") self.assertEqual(current_view, expected_next_primary) # send sufficient number of client requests to trigger checkpoint protocol # verify checkpoint creation by all replicas except isolated replicas await skvbc.fill_and_wait_for_checkpoint( initial_nodes=bft_network.all_replicas( without=isolated_replicas), num_of_checkpoints_to_add=1, verify_checkpoint_persistency=False) # Once the adversary is gone, the isolated replicas should be able enter the new view for isolated_replica in isolated_replicas: current_view = await bft_network.wait_for_view( replica_id=isolated_replica, expected=lambda v: v == expected_next_primary, err_msg="Make sure view change has been triggered.") self.assertEqual(current_view, expected_next_primary) # Once the adversary is gone, the isolated replicas should be able reach the checkpoint await bft_network.wait_for_replicas_to_checkpoint( isolated_replicas, expected_checkpoint_num=lambda ecn: ecn == checkpoint_before + 1)