async def verify_replicas_are_in_wedged_checkpoint(self, bft_network, previous_checkpoint, replicas): with log.start_action(action_type="verify_replicas_are_in_wedged_checkpoint", previous_checkpoint=previous_checkpoint): for replica_id in replicas: with log.start_action(action_type="verify_replica", replica=replica_id): with trio.fail_after(seconds=60): while True: with trio.move_on_after(seconds=1): checkpoint_after = await bft_network.wait_for_checkpoint(replica_id=replica_id) if checkpoint_after == previous_checkpoint + 2: break else: await trio.sleep(1)
async def send_kv_set(self, client, readset, writeset, read_version, long_exec=False, reply_assert=True, raise_slowErrorIfAny=True): with log.start_action(action_type="send_kv_set"): msg = self.write_req(readset, writeset, read_version, long_exec) seq_num = client.req_seq_num.next() client_id = client.client_id if self.tracker is not None: self.tracker.send_write(client_id, seq_num, readset, dict(writeset), read_version) try: serialized_reply = await client.write( msg, seq_num, pre_process=self.pre_exec_all) reply = self.parse_reply(serialized_reply) if reply_assert is True: assert reply.success if self.tracker is not None: self.tracker.status.record_client_reply(client_id) self.tracker.handle_write_reply(client_id, seq_num, reply) return reply except trio.TooSlowError: if self.tracker is not None: self.tracker.status.record_client_timeout(client_id) if raise_slowErrorIfAny is True: raise trio.TooSlowError else: return
async def source_replica(self, replica_id): """Return whether the current replica has a source replica already set""" with log.start_action(action_type="source_replica"): key = ['bc_state_transfer', 'Gauges', 'current_source_replica'] source_replica_id = await self.metrics.get(replica_id, *key) return source_replica_id
async def _count_replicas_in_view(self, view): """ Count the number of replicas that have activated a given view """ with log.start_action(action_type="_count_replicas_in_view"): nb_replicas_in_view = 0 async def count_if_replica_in_view(r, expected_view): """ A closure that allows concurrent counting of replicas that have activated a given view. """ with log.start_action(action_type="count_if_replica_in_view"): nonlocal nb_replicas_in_view key = ['replica', 'Gauges', 'currentActiveView'] with trio.move_on_after(seconds=5): while True: with trio.move_on_after(seconds=1): try: replica_view = await self.metrics.get(r, *key) if replica_view == expected_view: nb_replicas_in_view += 1 except KeyError: # metrics not yet available, continue looping continue else: break async with trio.open_nursery() as nursery: for r in self.get_live_replicas(): nursery.start_soon( count_if_replica_in_view, r, view) return nb_replicas_in_view
async def wait_for_view(self, replica_id, expected=None, err_msg="Expected view not reached"): """ Waits for a view that matches the "expected" predicate, and returns the corresponding view number. If the "expected" predicate is not provided, returns the current view number. In case of a timeout, fails with the provided err_msg """ with log.start_action(action_type="wait_for_view"): if expected is None: expected = lambda _: True matching_view = None nb_replicas_in_matching_view = 0 try: matching_view = await self._wait_for_matching_agreed_view(replica_id, expected) print(f'Matching view #{matching_view} has been agreed among replicas.') nb_replicas_in_matching_view = await self._wait_for_active_view(matching_view) print(f'View #{matching_view} is active on ' f'{nb_replicas_in_matching_view} replicas ' f'({nb_replicas_in_matching_view} >= n-f = {self.config.n - self.config.f}).') return matching_view except trio.TooSlowError: assert False, err_msg + \ f'(matchingView={matching_view} ' \ f'replicasInMatchingView={nb_replicas_in_matching_view})'
async def get_current_primary(self): """ Returns the current primary replica id """ with log.start_action(action_type="get_current_primary"): current_primary = await self.get_current_view() return current_primary % self.config.n
async def send_write_kv_set_batch(self, client, max_set_size, batch_size, read_version = None, long_exec = False): msg_batch = [] batch_seq_nums = [] client_id = client.client_id if read_version is None: read_version=0 if self.tracker is not None: read_version = self.tracker.read_block_id() for i in range(batch_size): max_read_set_size = max_set_size if self.tracker is not None: max_read_set_size = 0 if self.tracker.no_conflicts else max_set_size readset = self.readset(0, max_read_set_size) writeset = self.writeset(max_set_size) msg_batch.append(self.write_req(readset, writeset, read_version, long_exec)) seq_num = client.req_seq_num.next() batch_seq_nums.append(seq_num) if self.tracker is not None: self.tracker.send_write(client_id, seq_num, readset, dict(writeset), read_version) with log.start_action(action_type="send_tracked_kv_set_batch"): try: replies = await client.write_batch(msg_batch, batch_seq_nums) if self.tracker is not None: self.tracker.status.record_client_reply(client_id) for seq_num, reply_msg in replies.items(): reply = self.parse_reply(reply_msg.get_common_data()) if self.tracker is not None: self.tracker.handle_write_reply(client_id, seq_num, reply) except trio.TooSlowError: if self.tracker is not None: self.tracker.status.record_client_timeout(client_id) return
async def test_delay_with_soft_limit_reached_counter(self, bft_network): """ 1. Start all replicas except a non-primary 2. Expected result: The cluster should work in slow path 3. Add a delay of 500ms in network 4. Start the stopped non-primary replica 5. Expected result: Increment in soft_limit_reached_counter 6. Remove the delay from network """ initial_primary = 0 skvbc = kvbc.SimpleKVBCProtocol(bft_network) non_primary_replica = random.choice( bft_network.all_replicas(without={initial_primary})) bft_network.start_replicas(bft_network.all_replicas(without={non_primary_replica})) await bft_network.wait_for_slow_path_to_be_prevalent( run_ops=lambda: skvbc.run_concurrent_ops(num_ops=20, write_weight=1), threshold=20) with ntc.NetworkTrafficControl() as tc: #delay added to loopback interface tc.put_loop_back_interface_delay(500) bft_network.start_replica(non_primary_replica) await skvbc.run_concurrent_ops(100, 1) with log.start_action(action_type="get_soft_limit_reached_counter") as action: soft_limit_cnt = await bft_network.get_metric(non_primary_replica, bft_network, "Counters", "soft_limit_reached_counter", "time_service") action.log( message_type=f'soft_limit_reached_counter #{soft_limit_cnt}') self.assertGreater(soft_limit_cnt, 0)
async def test_pruning_command(self, bft_network): with log.start_action(action_type="test_pruning_command"): bft_network.start_all_replicas() skvbc = kvbc.SimpleKVBCProtocol(bft_network) client = bft_network.random_client() # Create 100 blocks in total, including the genesis block we have 101 blocks k, v = await skvbc.write_known_kv() for i in range(99): v = skvbc.random_value() await client.write(skvbc.write_req([], [(k, v)], 0)) # Get the minimal latest pruneable block among all replicas reconf_msg = self._construct_reconfiguration_latest_prunebale_block_coammand() await client.read(reconf_msg.serialize(), m_of_n_quorum=bft_client.MofNQuorum.All(client.config, [r for r in range( bft_network.config.n)]), reconfiguration=True) latest_pruneable_blocks = [] rsi_rep = client.get_rsi_replies() for r in rsi_rep.values(): lpab = cmf_msgs.ReconfigurationResponse.deserialize(r)[0] latest_pruneable_blocks += [lpab.response] reconf_msg = self._construct_reconfiguration_prune_request(latest_pruneable_blocks) await client.write(reconf_msg.serialize(), reconfiguration=True) rsi_rep = client.get_rsi_replies() # we expect to have at least 2f + 1 replies for rep in rsi_rep: r = rsi_rep[rep] data = cmf_msgs.ReconfigurationResponse.deserialize(r)[0] pruned_block = int(data.additional_data.decode('utf-8')) assert pruned_block <= 90
async def test_pruning_command(self, bft_network): with log.start_action(action_type="test_pruning_command"): bft_network.start_all_replicas() skvbc = kvbc.SimpleKVBCProtocol(bft_network) client = bft_network.random_client() # Create 100 blocks in total, including the genesis block we have 101 blocks k, v = await skvbc.write_known_kv() for i in range(99): v = skvbc.random_value() await client.write(skvbc.write_req([], [(k, v)], 0)) # Get the minimal latest pruneable block among all replicas op = operator.Operator(bft_network.config, client, bft_network.builddir) await op.latest_pruneable_block() latest_pruneable_blocks = [] rsi_rep = client.get_rsi_replies() for r in rsi_rep.values(): lpab = cmf_msgs.ReconfigurationResponse.deserialize(r)[0] latest_pruneable_blocks += [lpab.response] await op.prune(latest_pruneable_blocks) rsi_rep = client.get_rsi_replies() # we expect to have at least 2f + 1 replies for rep in rsi_rep: r = rsi_rep[rep] data = cmf_msgs.ReconfigurationResponse.deserialize(r)[0] pruned_block = int(data.additional_data.decode('utf-8')) assert pruned_block <= 90
async def send_concurrent_ops_with_isolated_replica( self, isolated_replica, run_duration): """ Sending concurrent operation while isolated replic is kept blocked """ clients = self.bft_network.get_all_clients() max_read_set_size = len(self.keys) read_version = 0 readset = self.readset(0, max_read_set_size) writeset = self.writeset(0, readset) total_run_time = time.time() + run_duration initial_nodes = self.bft_network.all_replicas(without=isolated_replica) with log.start_action( action_type="send_concurrent_ops_with_isolated_replica"): with trio.move_on_after(run_duration + 30): with net.ReplicaSubsetIsolatingAdversary( self.bft_network, isolated_replica) as adversary: adversary.interfere() while (time.time() < total_run_time): async with trio.open_nursery() as nursery: for client in clients: nursery.start_soon(self.send_kv_set, client, readset, writeset, read_version, False, False, False) await self.bft_network.wait_for_replicas_to_checkpoint( initial_nodes)
def start_replica(self, replica_id): """ Start a replica if it isn't already started. Otherwise raise an AlreadyStoppedError. """ with log.start_action(action_type="start_replica"): stdout_file = None stderr_file = None if os.environ.get('KEEP_APOLLO_LOGS', "").lower() in ["true", "on"]: try: os.mkdir(f"/tmp/apollo/{self.current_test}/") except FileExistsError: pass stdout_file = open("/tmp/apollo/{}/stdout_{}.log".format(self.current_test, replica_id), 'a+') stderr_file = open("/tmp/apollo/{}/stderr_{}.log".format(self.current_test, replica_id), 'a+') stdout_file.write("############################################\n") stdout_file.flush() stderr_file.write("############################################\n") stderr_file.flush() self.open_fds[replica_id] = (stdout_file, stderr_file) if replica_id in self.procs: raise AlreadyRunningError(replica_id) if self.is_existing and self.config.stop_replica_cmd is not None: self.procs[replica_id] = self._start_external_replica(replica_id) else: self.procs[replica_id] = subprocess.Popen( self.start_replica_cmd(replica_id), stdout=stdout_file, stderr=stderr_file, close_fds=True)
async def network_wait_for_checkpoint( self, initial_nodes, expected_checkpoint_num=lambda ecn: ecn == 2, verify_checkpoint_persistency=True, assert_state_transfer_not_started=True): with log.start_action(action_type="network_wait_for_checkpoint"): if assert_state_transfer_not_started: await self.bft_network.assert_state_transfer_not_started_all_up_nodes( up_replica_ids=initial_nodes) # Wait for initial replicas to take checkpoints (exhausting # the full window) await self.bft_network.wait_for_replicas_to_checkpoint( initial_nodes, expected_checkpoint_num) if verify_checkpoint_persistency: # Stop the initial replicas to ensure the checkpoints get persisted self.bft_network.stop_replicas(initial_nodes) # Bring up the first 3 replicas and ensure that they have the # checkpoint data. [self.bft_network.start_replica(i) for i in initial_nodes] await self.bft_network.wait_for_replicas_to_checkpoint( initial_nodes, expected_checkpoint_num)
def _create_keys(self): """ Create a sequence of KV store keys with length = 2*num_clients. The last character in each key becomes the previous value + 1. When the value reaches 'Z', a new character is appended and the sequence starts over again. Since all keys must be KV_LEN bytes long, they are extended with '.' characters. """ with log.start_action(action_type="_create_keys"): num_clients = self.bft_network.config.num_clients if num_clients == 0: return [] cur = bytearray("A", 'utf-8') keys = [b"A...................."] for i in range(1, 2 * num_clients): end = cur[-1] if chr(end) == 'Z': # extend the key cur.append(self.alpha[0]) else: cur[-1] = end + 1 key = copy.deepcopy(cur) # Extend the key to be KV_LEN bytes key.extend([ord('.') for _ in range(self.KV_LEN - len(cur))]) keys.append(bytes(key)) return keys
async def assert_successful_put_get(self, testcase): """ Assert that we can get a valid put """ with log.start_action(action_type="assert_successful_put_get"): client = self.bft_network.random_client() read_reply = await client.read(self.get_last_block_req()) last_block = self.parse_reply(read_reply) # Perform an unconditional KV put. # Ensure that the block number increments. key = self.random_key() val = self.random_value() reply = await client.write(self.write_req([], [(key, val)], 0)) reply = self.parse_reply(reply) testcase.assertTrue(reply.success) testcase.assertEqual(last_block + 1, reply.last_block_id) # Retrieve the last block and ensure that it matches what's expected read_reply = await client.read(self.get_last_block_req()) newest_block = self.parse_reply(read_reply) testcase.assertEqual(last_block + 1, newest_block) # Get the previous put value, and ensure it's correct read_req = self.read_req([key], newest_block) kvpairs = self.parse_reply(await client.read(read_req)) testcase.assertDictEqual({key: val}, kvpairs)
async def send_concurrent_ops(self, num_ops, max_concurrency, max_size, write_weight, create_conflicts=False): max_read_set_size = 0 if self.tracker.no_conflicts else max_size sent = 0 write_count = 0 read_count = 0 clients = self.bft_network.random_clients(max_concurrency) with log.start_action(action_type="send_concurrent_ops"): while sent < num_ops: readset = self.tracker.readset(0, max_read_set_size) writeset = self.tracker.writeset(0, readset) read_version = self.tracker.read_block_id() async with trio.open_nursery() as nursery: for client in clients: if random.random() < write_weight: if create_conflicts is False: readset = self.tracker.readset(0, max_read_set_size) writeset = self.tracker.writeset(max_size) read_version = self.tracker.read_block_id() nursery.start_soon(self.send_tracked_kv_set, client, readset, writeset, read_version) write_count += 1 else: nursery.start_soon(self.send_tracked_read, client, max_size) read_count += 1 sent += len(clients) return read_count, write_count
async def send_read_kv_set(self, client, key, max_set_size=0): with log.start_action(action_type="send_read_kv_set"): readData = [] if key is None: readData = self.readset(1, max_set_size) else: readData = [key] msg = self.read_req(readData) if client is None: client = self.bft_network.random_client() seq_num = client.req_seq_num.next() client_id = client.client_id if self.tracker is not None: self.tracker.send_read(client_id, seq_num, readData) try: serialized_reply = await client.read(msg, seq_num) reply = self.parse_reply(serialized_reply) if self.tracker is not None: self.tracker.status.record_client_reply(client_id) self.tracker.handle_read_reply(client_id, seq_num, reply) return reply except trio.TooSlowError: if self.tracker is not None: self.tracker.status.record_client_timeout(client_id) return
async def read_your_writes(self, test_class): with log.start_action(action_type="read_your_writes") as action: action.log( message_type= "[READ-YOUR-WRITES] Starting 'read-your-writes' check...") client = self.bft_network.random_client() # Verify by "Read your write" # Perform write with the new primary last_block = self.parse_reply(await client.read( self.get_last_block_req())) action.log( message_type=f'[READ-YOUR-WRITES] Last block ID: #{last_block}' ) kv = [(self.keys[0], self.random_value()), (self.keys[1], self.random_value())] reply = await client.write(self.write_req([], kv, 0)) reply = self.parse_reply(reply) test_class.assertTrue(reply.success) test_class.assertEqual(last_block + 1, reply.last_block_id) last_block = reply.last_block_id # Read the last write and check if equal # Get the kvpairs in the last written block action.log( message_type= f'[READ-YOUR-WRITES] Checking if the {kv} entry is readable...' ) data = await client.read(self.get_block_data_req(last_block)) kv2 = self.parse_reply(data) test_class.assertDictEqual(kv2, dict(kv)) action.log(message_type=f'[READ-YOUR-WRITES] OK.')
async def send_kv_set(self, client, readset, writeset, read_version, long_exec=False, reply_assert=True, raise_slowErrorIfAny=True, description='send_kv_set'): seq_num = client.req_seq_num.next() with log.start_action(action_type=description, seq=seq_num, client=client.client_id, readset_size=len(readset), writeset_size=len(writeset)) as action: msg = self.write_req(readset, writeset, read_version, long_exec) client_id = client.client_id if self.tracker is not None: self.tracker.send_write(client_id, seq_num, readset, dict(writeset), read_version) try: serialized_reply = await client.write(msg, seq_num, pre_process=self.pre_exec_all) action.log(message_type=f"received reply", client=client.client_id, seq=client.req_seq_num.val()) reply = self.parse_reply(serialized_reply) if reply_assert is True: assert reply.success if self.tracker is not None: self.tracker.status.record_client_reply(client_id) self.tracker.handle_write_reply(client_id, seq_num, reply) return reply except trio.TooSlowError: if self.tracker is not None: self.tracker.status.record_client_timeout(client_id) if raise_slowErrorIfAny is True: raise trio.TooSlowError else: return
async def fill_and_wait_for_checkpoint( self, initial_nodes, num_of_checkpoints_to_add=2, verify_checkpoint_persistency=True, assert_state_transfer_not_started=True): """ A helper function used by tests to fill a window with data and then checkpoint it. The nodes are then stopped and restarted to ensure the checkpoint data was persisted. TODO: Make filling concurrent to speed up tests """ with log.start_action(action_type="fill_and_wait_for_checkpoint"): client = SkvbcClient(self.bft_network.random_client()) checkpoint_before = await self.bft_network.wait_for_checkpoint( replica_id=random.choice(initial_nodes)) # Write enough data to checkpoint and create a need for state transfer for i in range(1 + num_of_checkpoints_to_add * 150): key = self.random_key() val = self.random_value() reply = await client.write([], [(key, val)]) assert reply.success await self.network_wait_for_checkpoint( initial_nodes, expected_checkpoint_num=lambda ecn: ecn == checkpoint_before + num_of_checkpoints_to_add, verify_checkpoint_persistency=verify_checkpoint_persistency, assert_state_transfer_not_started= assert_state_transfer_not_started)
def verify_snapshot_is_available(self, bft_network, replicaId, shapshotId): with log.start_action(action_type="verify snapshot db files"): snapshot_db_dir = os.path.join(bft_network.testdir, DB_SNAPSHOT_PREFIX + str(replicaId) + "/" + str(shapshotId)) assert os.path.exists(snapshot_db_dir) size=0 for ele in os.scandir(snapshot_db_dir): size+=os.path.getsize(ele) assert (size > 0) #make sure that checkpoint folder is not empty
def initial_state(self): """ Return a dict with empty byte strings (which is the value SKVBC will report for any key that has never been written to) for all keys """ with log.start_action(action_type="initial_state"): empty_byte_string = b'' return dict([(k, empty_byte_string) for k in self.keys])
def _start_external_replica(self, replica_id): with log.start_action(action_type="_start_external_replica"): subprocess.run( self.start_replica_cmd(replica_id), check=True ) return self.replicas[replica_id]
def restore_form_older_snapshot(self, bft_network, replica, snapshot_id): with log.start_action(action_type="restore with older snapshot"): snapshot_db_dir = os.path.join(bft_network.testdir, DB_SNAPSHOT_PREFIX + str(replica) + "/" + str(snapshot_id)) dest_db_dir = os.path.join(bft_network.testdir, DB_FILE_PREFIX + str(replica)) if os.path.exists(dest_db_dir) : shutil.rmtree(dest_db_dir) ret = shutil.copytree(snapshot_db_dir, dest_db_dir) log.log_message(message_type=f"copy db files from {snapshot_db_dir} to {dest_db_dir}, result is {ret}")
async def send_tracked_write(self, client, max_set_size, long_exec=False): with log.start_action(action_type="send_tracked_write") as action: max_read_set_size = 0 if self.tracker.no_conflicts else max_set_size readset = self.tracker.readset(0, max_read_set_size) writeset = self.tracker.writeset(max_set_size) read_version = self.tracker.read_block_id() await self.send_tracked_kv_set(client, readset, writeset, read_version, long_exec) action.log(message_type="[send_tracked_write] OK")
async def run_concurrent_ops(self, num_ops, write_weight=.70): with log.start_action(action_type="run_concurrent_ops"): max_concurrency = len(self.bft_network.clients) // 2 max_size = len(self.keys) // 2 return await self.send_concurrent_ops(num_ops, max_concurrency, max_size, write_weight, create_conflicts=True)
async def run_concurrent_batch_ops(self, num_ops, batch_size): with log.start_action(action_type="run_concurrent_batch_ops"): max_concurrency = len(self.bft_network.clients) // 2 max_size = len(self.keys) // 2 sent = 0 write_count = 0 clients = self.bft_network.random_clients(max_concurrency) with log.start_action(action_type="send_concurrent_ops"): while sent < num_ops: async with trio.open_nursery() as nursery: for client in clients: client.config = client.config._replace( retry_timeout_milli=500) nursery.start_soon(self.send_write_kv_set_batch, client, max_size, batch_size) write_count += 1 sent += len(clients) return write_count
def transfer_dbcheckpoint_files(self, bft_network, source_replica, snapshot_id, dest_replicas): with log.start_action(action_type="transfer snapshot db files"): snapshot_db_dir = os.path.join(bft_network.testdir, DB_SNAPSHOT_PREFIX + str(source_replica) + "/" + str(snapshot_id)) for r in dest_replicas: dest_db_dir = os.path.join(bft_network.testdir, DB_FILE_PREFIX + str(r)) if os.path.exists(dest_db_dir) : shutil.rmtree(dest_db_dir) ret = shutil.copytree(snapshot_db_dir, dest_db_dir) log.log_message(message_type=f"copy db files from {snapshot_db_dir} to {dest_db_dir}, result is {ret}")
async def wedge(self): seq_num = self.client.req_seq_num.next() with log.start_action(action_type="wedge", client=self.client.client_id, seq_num=seq_num): reconf_msg = self._construct_reconfiguration_wedge_command() return await self.client.write(reconf_msg.serialize(), reconfiguration=True, seq_num=seq_num)
async def assert_kv_write_executed(self, key, val): with log.start_action(action_type="assert_kv_write_executed"): config = self.bft_network.config client = self.bft_network.random_client() reply = await client.read(self.read_req([key])) kv_reply = self.parse_reply(reply) assert {key: val} == kv_reply, \ f'Could not read original key-value in the case of n={config.n}, f={config.f}, c={config.c}.'