def run(self, bench_parameters_dict, node_parameters_dict, debug=False): assert isinstance(debug, bool) Print.heading('Starting remote benchmark') try: bench_parameters = BenchParameters(bench_parameters_dict) node_parameters = NodeParameters(node_parameters_dict) except ConfigError as e: raise BenchError('Invalid nodes or bench parameters', e) # Select which hosts to use. selected_hosts = self._select_hosts(bench_parameters) if not selected_hosts: Print.warn('There are not enough instances available') return # Update nodes. try: self._update(selected_hosts) except (GroupException, ExecutionError) as e: e = FabricError(e) if isinstance(e, GroupException) else e raise BenchError('Failed to update nodes', e) # Run benchmarks. for n in bench_parameters.nodes: for r in bench_parameters.rate: Print.heading(f'\nRunning {n} nodes (input rate: {r:,} tx/s)') hosts = selected_hosts[:n] # Upload all configuration files. try: self._config(hosts, node_parameters) except (subprocess.SubprocessError, GroupException) as e: e = FabricError(e) if isinstance(e, GroupException) else e Print.error(BenchError('Failed to configure nodes', e)) continue # Run the benchmark. for i in range(bench_parameters.runs): Print.heading(f'Run {i+1}/{bench_parameters.runs}') try: self._run_single(hosts, r, bench_parameters, node_parameters, debug) self._logs(hosts).print( PathMaker.result_file(n, r, bench_parameters.tx_size)) except (subprocess.SubprocessError, GroupException, ParseError) as e: self.kill(hosts=hosts) if isinstance(e, GroupException): e = FabricError(e) Print.error(BenchError('Benchmark failed', e)) continue
def create_instances(self, instances): assert isinstance(instances, int) and instances > 0 # Create the security group in every region. for client in self.clients.values(): try: self._create_security_group(client) except ClientError as e: error = AWSError(e) if error.code != 'InvalidGroup.Duplicate': raise BenchError('Failed to create security group', error) try: # Create all instances. size = instances * len(self.clients) progress = progress_bar( self.clients.values(), prefix=f'Creating {size} instances' ) for client in progress: client.run_instances( ImageId=self._get_ami(client), InstanceType=self.settings.instance_type, KeyName=self.settings.key_name, MaxCount=instances, MinCount=instances, SecurityGroups=[self.SECURITY_GROUP_NAME], TagSpecifications=[{ 'ResourceType': 'instance', 'Tags': [{ 'Key': 'Name', 'Value': self.INSTANCE_NAME }] }], EbsOptimized=True, BlockDeviceMappings=[{ 'DeviceName': '/dev/sda1', 'Ebs': { 'VolumeType': 'gp2', 'VolumeSize': 200, 'DeleteOnTermination': True } }], ) # Wait for the instances to boot. Print.info('Waiting for all instances to boot...') self._wait(['pending']) Print.heading(f'Successfully created {size} new instances') except ClientError as e: raise BenchError('Failed to create AWS instances', AWSError(e))
def install(self): Print.info('Installing rust and cloning the repo...') cmd = [ 'sudo apt-get update', 'sudo apt-get -y upgrade', 'sudo apt-get -y autoremove', # The following dependencies prevent the error: [error: linker `cc` not found]. 'sudo apt-get -y install build-essential', 'sudo apt-get -y install cmake', # Install rust (non-interactive). 'curl --proto "=https" --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y', 'source $HOME/.cargo/env', 'rustup default stable', # This is missing from the Rocksdb installer (needed for Rocksdb). 'sudo apt-get install -y clang', # Clone the repo. f'(git clone {self.settings.repo_url} || (cd {self.settings.repo_name} ; git pull))' ] hosts = self.manager.hosts(flat=True) try: g = Group(*hosts, user='******', connect_kwargs=self.connect) g.run(' && '.join(cmd), hide=True) Print.heading(f'Initialized testbed of {len(hosts)} nodes') except (GroupException, ExecutionError) as e: e = FabricError(e) if isinstance(e, GroupException) else e raise BenchError('Failed to install repo on testbed', e)
def __init__(self, ctx): self.manager = InstanceManager.make() self.settings = self.manager.settings try: ctx.connect_kwargs.pkey = RSAKey.from_private_key_file( self.manager.settings.key_path) self.connect = ctx.connect_kwargs except (IOError, PasswordRequiredException, SSHException) as e: raise BenchError('Failed to load SSH key', e)
def stop_instances(self): try: ids, _ = self._get(['pending', 'running']) for region, client in self.clients.items(): if ids[region]: client.stop_instances(InstanceIds=ids[region]) size = sum(len(x) for x in ids.values()) Print.heading(f'Stopping {size} instances') except ClientError as e: raise BenchError(AWSError(e))
def kill(self, hosts=[], delete_logs=False): assert isinstance(hosts, list) assert isinstance(delete_logs, bool) hosts = hosts if hosts else self.manager.hosts(flat=True) delete_logs = CommandMaker.clean_logs() if delete_logs else 'true' cmd = [delete_logs, f'({CommandMaker.kill()} || true)'] try: g = Group(*hosts, user='******', connect_kwargs=self.connect) g.run(' && '.join(cmd), hide=True) except GroupException as e: raise BenchError('Failed to kill nodes', FabricError(e))
def terminate_instances(self): try: ids, _ = self._get(['pending', 'running', 'stopping', 'stopped']) size = sum(len(x) for x in ids.values()) if size == 0: Print.heading(f'All instances are shut down') return # Terminate instances. for region, client in self.clients.items(): if ids[region]: client.terminate_instances(InstanceIds=ids[region]) # Wait for all instances to properly shut down. Print.info('Waiting for all instances to shut down...') self._wait(['shutting-down']) for client in self.clients.values(): client.delete_security_group( GroupName=self.SECURITY_GROUP_NAME ) Print.heading(f'Testbed of {size} instances destroyed') except ClientError as e: raise BenchError('Failed to terminate instances', AWSError(e))
def make(cls, settings_file='settings.json'): try: return cls(Settings.load(settings_file)) except SettingsError as e: raise BenchError('Failed to load settings', e)
def hosts(self, flat=False): try: _, ips = self._get(['pending', 'running']) return [x for y in ips.values() for x in y] if flat else ips except ClientError as e: raise BenchError('Failed to gather instances IPs', AWSError(e))
def run(self, debug=False): assert isinstance(debug, bool) Print.heading('Starting local benchmark') # Kill any previous testbed. self._kill_nodes() try: Print.info('Setting up testbed...') nodes, rate = self.nodes[0], self.rate[0] # Cleanup all files. cmd = f'{CommandMaker.clean_logs()} ; {CommandMaker.cleanup()}' subprocess.run([cmd], shell=True, stderr=subprocess.DEVNULL) sleep(0.5) # Removing the store may take time. # Recompile the latest code. cmd = CommandMaker.compile().split() subprocess.run(cmd, check=True, cwd=PathMaker.node_crate_path()) # Create alias for the client and nodes binary. cmd = CommandMaker.alias_binaries(PathMaker.binary_path()) subprocess.run([cmd], shell=True) # Generate configuration files. keys = [] key_files = [PathMaker.key_file(i) for i in range(nodes)] for filename in key_files: cmd = CommandMaker.generate_key(filename).split() subprocess.run(cmd, check=True) keys += [Key.from_file(filename)] names = [x.name for x in keys] committee = LocalCommittee(names, self.BASE_PORT) committee.print(PathMaker.committee_file()) self.node_parameters.print(PathMaker.parameters_file()) # Run the clients (they will wait for the nodes to be ready). addresses = committee.front_addresses() rate_share = ceil(rate / nodes) timeout = self.node_parameters.timeout_delay client_logs = [PathMaker.client_log_file(i) for i in range(nodes)] for addr, log_file in zip(addresses, client_logs): cmd = CommandMaker.run_client(addr, self.tx_size, rate_share, timeout) self._background_run(cmd, log_file) # Run the nodes. dbs = [PathMaker.db_path(i) for i in range(nodes)] node_logs = [PathMaker.node_log_file(i) for i in range(nodes)] for key_file, db, log_file in zip(key_files, dbs, node_logs): cmd = CommandMaker.run_node(key_file, PathMaker.committee_file(), db, PathMaker.parameters_file(), debug=debug) self._background_run(cmd, log_file) # Wait for the nodes to synchronize Print.info('Waiting for the nodes to synchronize...') sleep(2 * self.node_parameters.timeout_delay / 1000) # Wait for all transactions to be processed. Print.info(f'Running benchmark ({self.duration} sec)...') sleep(self.duration) self._kill_nodes() # Parse logs and return the parser. Print.info('Parsing logs...') return LogParser.process('./logs') except (subprocess.SubprocessError, ParseError) as e: self._kill_nodes() raise BenchError('Failed to run benchmark', e)
def _kill_nodes(self): try: cmd = CommandMaker.kill().split() subprocess.run(cmd, stderr=subprocess.DEVNULL) except subprocess.SubprocessError as e: raise BenchError('Failed to kill testbed', e)
def __init__(self, bench_parameters_dict, node_parameters_dict): try: self.bench_parameters = BenchParameters(bench_parameters_dict) self.node_parameters = NodeParameters(node_parameters_dict) except ConfigError as e: raise BenchError('Invalid nodes or bench parameters', e)
def run(self, bench_parameters_dict, node_parameters_dict, debug=False): assert isinstance(debug, bool) Print.heading('Starting remote benchmark') try: bench_parameters = BenchParameters(bench_parameters_dict) node_parameters = NodeParameters(node_parameters_dict) except ConfigError as e: raise BenchError('Invalid nodes or bench parameters', e) # Select which hosts to use. selected_hosts = self._select_hosts(bench_parameters) if not selected_hosts: Print.warn('There are not enough instances available') return # Update nodes. try: self._update(selected_hosts) except (GroupException, ExecutionError) as e: e = FabricError(e) if isinstance(e, GroupException) else e raise BenchError('Failed to update nodes', e) if node_parameters.protocol == 0: Print.info('Running HotStuff') elif node_parameters.protocol == 1: Print.info('Running AsyncHotStuff') elif node_parameters.protocol == 2: Print.info('Running TwoChainVABA') else: Print.info('Wrong protocol type!') return Print.info(f'{bench_parameters.faults} faults') Print.info( f'Timeout {node_parameters.timeout_delay} ms, Network delay {node_parameters.network_delay} ms' ) Print.info(f'DDOS attack {node_parameters.ddos}') hosts = selected_hosts[:bench_parameters.nodes[0]] # Upload all configuration files. try: self._config(hosts, node_parameters) except (subprocess.SubprocessError, GroupException) as e: e = FabricError(e) if isinstance(e, GroupException) else e Print.error(BenchError('Failed to configure nodes', e)) # Run benchmarks. for n in bench_parameters.nodes: for r in bench_parameters.rate: Print.heading(f'\nRunning {n} nodes (input rate: {r:,} tx/s)') hosts = selected_hosts[:n] # # Upload all configuration files. # try: # self._config(hosts, node_parameters) # except (subprocess.SubprocessError, GroupException) as e: # e = FabricError(e) if isinstance(e, GroupException) else e # Print.error(BenchError('Failed to configure nodes', e)) # continue # Do not boot faulty nodes. faults = bench_parameters.faults hosts = hosts[:n - faults] protocol = node_parameters.protocol ddos = node_parameters.ddos # Run the benchmark. for i in range(bench_parameters.runs): Print.heading(f'Run {i+1}/{bench_parameters.runs}') try: self._run_single(hosts, r, bench_parameters, node_parameters, debug) self._logs(hosts, faults, protocol, ddos).print( PathMaker.result_file(n, r, bench_parameters.tx_size, faults)) except (subprocess.SubprocessError, GroupException, ParseError) as e: self.kill(hosts=hosts) if isinstance(e, GroupException): e = FabricError(e) Print.error(BenchError('Benchmark failed', e)) continue
def run(self, debug=False): assert isinstance(debug, bool) Print.heading('Starting local benchmark') # Kill any previous testbed. self._kill_nodes() try: Print.info('Setting up testbed...') nodes, rate = self.nodes[0], self.rate[0] # Cleanup all files. cmd = f'{CommandMaker.clean_logs()} ; {CommandMaker.cleanup()}' subprocess.run([cmd], shell=True, stderr=subprocess.DEVNULL) sleep(0.5) # Removing the store may take time. # Recompile the latest code. cmd = CommandMaker.compile().split() subprocess.run(cmd, check=True, cwd=PathMaker.node_crate_path()) # Create alias for the client and nodes binary. cmd = CommandMaker.alias_binaries(PathMaker.binary_path()) subprocess.run([cmd], shell=True) # Generate configuration files. keys = [] key_files = [PathMaker.key_file(i) for i in range(nodes)] for filename in key_files: cmd = CommandMaker.generate_key(filename).split() subprocess.run(cmd, check=True) keys += [Key.from_file(filename)] # Generate threshold signature files. cmd = './node threshold_keys' for i in range(nodes): cmd += ' --filename ' + PathMaker.threshold_key_file(i) # print(cmd) cmd = cmd.split() subprocess.run(cmd, capture_output=True, check=True) names = [x.name for x in keys] tss_keys = [] for i in range(nodes): tss_keys += [TSSKey.from_file(PathMaker.threshold_key_file(i))] ids = [x.id for x in tss_keys] committee = LocalCommittee(names, ids, self.BASE_PORT) committee.print(PathMaker.committee_file()) self.node_parameters.print(PathMaker.parameters_file()) # Do not boot faulty nodes. nodes = nodes - self.faults # Run the clients (they will wait for the nodes to be ready). addresses = committee.front rate_share = ceil(rate / nodes) timeout = self.node_parameters.timeout_delay client_logs = [PathMaker.client_log_file(i) for i in range(nodes)] for addr, log_file in zip(addresses, client_logs): cmd = CommandMaker.run_client(addr, self.tx_size, rate_share, timeout) self._background_run(cmd, log_file) if self.node_parameters.protocol == 0: Print.info('Running HotStuff') elif self.node_parameters.protocol == 1: Print.info('Running Async HotStuff') elif self.node_parameters.protocol == 2: Print.info('Running TwoChainVABA') else: Print.info('Wrong protocol type!') return Print.info(f'{self.faults} faults') Print.info( f'Timeout {self.node_parameters.timeout_delay} ms, Network delay {self.node_parameters.network_delay} ms' ) Print.info(f'DDOS attack {self.node_parameters.ddos}') # Run the nodes. dbs = [PathMaker.db_path(i) for i in range(nodes)] node_logs = [PathMaker.node_log_file(i) for i in range(nodes)] threshold_key_files = [ PathMaker.threshold_key_file(i) for i in range(nodes) ] for key_file, threshold_key_file, db, log_file in zip( key_files, threshold_key_files, dbs, node_logs): cmd = CommandMaker.run_node(key_file, threshold_key_file, PathMaker.committee_file(), db, PathMaker.parameters_file(), debug=debug) self._background_run(cmd, log_file) # Wait for the nodes to synchronize Print.info('Waiting for the nodes to synchronize...') sleep(2 * self.node_parameters.timeout_delay / 1000) # Wait for all transactions to be processed. Print.info(f'Running benchmark ({self.duration} sec)...') sleep(self.duration) self._kill_nodes() # Parse logs and return the parser. Print.info('Parsing logs...') return LogParser.process('./logs', self.faults, self.node_parameters.protocol, self.node_parameters.ddos) except (subprocess.SubprocessError, ParseError) as e: self._kill_nodes() raise BenchError('Failed to run benchmark', e)