예제 #1
0
    def run(self, bench_parameters_dict, node_parameters_dict, debug=False):
        assert isinstance(debug, bool)
        Print.heading('Starting remote benchmark')
        try:
            bench_parameters = BenchParameters(bench_parameters_dict)
            node_parameters = NodeParameters(node_parameters_dict)
        except ConfigError as e:
            raise BenchError('Invalid nodes or bench parameters', e)

        # Select which hosts to use.
        selected_hosts = self._select_hosts(bench_parameters)
        if not selected_hosts:
            Print.warn('There are not enough instances available')
            return

        # Update nodes.
        try:
            self._update(selected_hosts)
        except (GroupException, ExecutionError) as e:
            e = FabricError(e) if isinstance(e, GroupException) else e
            raise BenchError('Failed to update nodes', e)

        # Run benchmarks.
        for n in bench_parameters.nodes:
            for r in bench_parameters.rate:
                Print.heading(f'\nRunning {n} nodes (input rate: {r:,} tx/s)')
                hosts = selected_hosts[:n]

                # Upload all configuration files.
                try:
                    self._config(hosts, node_parameters)
                except (subprocess.SubprocessError, GroupException) as e:
                    e = FabricError(e) if isinstance(e, GroupException) else e
                    Print.error(BenchError('Failed to configure nodes', e))
                    continue

                # Run the benchmark.
                for i in range(bench_parameters.runs):
                    Print.heading(f'Run {i+1}/{bench_parameters.runs}')
                    try:
                        self._run_single(hosts, r, bench_parameters,
                                         node_parameters, debug)
                        self._logs(hosts).print(
                            PathMaker.result_file(n, r,
                                                  bench_parameters.tx_size))
                    except (subprocess.SubprocessError, GroupException,
                            ParseError) as e:
                        self.kill(hosts=hosts)
                        if isinstance(e, GroupException):
                            e = FabricError(e)
                        Print.error(BenchError('Benchmark failed', e))
                        continue
예제 #2
0
    def create_instances(self, instances):
        assert isinstance(instances, int) and instances > 0

        # Create the security group in every region.
        for client in self.clients.values():
            try:
                self._create_security_group(client)
            except ClientError as e:
                error = AWSError(e)
                if error.code != 'InvalidGroup.Duplicate':
                    raise BenchError('Failed to create security group', error)

        try:
            # Create all instances.
            size = instances * len(self.clients)
            progress = progress_bar(
                self.clients.values(), prefix=f'Creating {size} instances'
            )
            for client in progress:
                client.run_instances(
                    ImageId=self._get_ami(client),
                    InstanceType=self.settings.instance_type,
                    KeyName=self.settings.key_name,
                    MaxCount=instances,
                    MinCount=instances,
                    SecurityGroups=[self.SECURITY_GROUP_NAME],
                    TagSpecifications=[{
                        'ResourceType': 'instance',
                        'Tags': [{
                            'Key': 'Name',
                            'Value': self.INSTANCE_NAME
                        }]
                    }],
                    EbsOptimized=True,
                    BlockDeviceMappings=[{
                        'DeviceName': '/dev/sda1',
                        'Ebs': {
                            'VolumeType': 'gp2',
                            'VolumeSize': 200,
                            'DeleteOnTermination': True
                        }
                    }],
                )

            # Wait for the instances to boot.
            Print.info('Waiting for all instances to boot...')
            self._wait(['pending'])
            Print.heading(f'Successfully created {size} new instances')
        except ClientError as e:
            raise BenchError('Failed to create AWS instances', AWSError(e))
예제 #3
0
    def install(self):
        Print.info('Installing rust and cloning the repo...')
        cmd = [
            'sudo apt-get update',
            'sudo apt-get -y upgrade',
            'sudo apt-get -y autoremove',

            # The following dependencies prevent the error: [error: linker `cc` not found].
            'sudo apt-get -y install build-essential',
            'sudo apt-get -y install cmake',

            # Install rust (non-interactive).
            'curl --proto "=https" --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y',
            'source $HOME/.cargo/env',
            'rustup default stable',

            # This is missing from the Rocksdb installer (needed for Rocksdb).
            'sudo apt-get install -y clang',

            # Clone the repo.
            f'(git clone {self.settings.repo_url} || (cd {self.settings.repo_name} ; git pull))'
        ]
        hosts = self.manager.hosts(flat=True)
        try:
            g = Group(*hosts, user='******', connect_kwargs=self.connect)
            g.run(' && '.join(cmd), hide=True)
            Print.heading(f'Initialized testbed of {len(hosts)} nodes')
        except (GroupException, ExecutionError) as e:
            e = FabricError(e) if isinstance(e, GroupException) else e
            raise BenchError('Failed to install repo on testbed', e)
예제 #4
0
 def __init__(self, ctx):
     self.manager = InstanceManager.make()
     self.settings = self.manager.settings
     try:
         ctx.connect_kwargs.pkey = RSAKey.from_private_key_file(
             self.manager.settings.key_path)
         self.connect = ctx.connect_kwargs
     except (IOError, PasswordRequiredException, SSHException) as e:
         raise BenchError('Failed to load SSH key', e)
예제 #5
0
 def stop_instances(self):
     try:
         ids, _ = self._get(['pending', 'running'])
         for region, client in self.clients.items():
             if ids[region]:
                 client.stop_instances(InstanceIds=ids[region])
         size = sum(len(x) for x in ids.values())
         Print.heading(f'Stopping {size} instances')
     except ClientError as e:
         raise BenchError(AWSError(e))
예제 #6
0
 def kill(self, hosts=[], delete_logs=False):
     assert isinstance(hosts, list)
     assert isinstance(delete_logs, bool)
     hosts = hosts if hosts else self.manager.hosts(flat=True)
     delete_logs = CommandMaker.clean_logs() if delete_logs else 'true'
     cmd = [delete_logs, f'({CommandMaker.kill()} || true)']
     try:
         g = Group(*hosts, user='******', connect_kwargs=self.connect)
         g.run(' && '.join(cmd), hide=True)
     except GroupException as e:
         raise BenchError('Failed to kill nodes', FabricError(e))
예제 #7
0
    def terminate_instances(self):
        try:
            ids, _ = self._get(['pending', 'running', 'stopping', 'stopped'])
            size = sum(len(x) for x in ids.values())
            if size == 0:
                Print.heading(f'All instances are shut down')
                return

            # Terminate instances.
            for region, client in self.clients.items():
                if ids[region]:
                    client.terminate_instances(InstanceIds=ids[region])

            # Wait for all instances to properly shut down.
            Print.info('Waiting for all instances to shut down...')
            self._wait(['shutting-down'])
            for client in self.clients.values():
                client.delete_security_group(
                    GroupName=self.SECURITY_GROUP_NAME
                )

            Print.heading(f'Testbed of {size} instances destroyed')
        except ClientError as e:
            raise BenchError('Failed to terminate instances', AWSError(e))
예제 #8
0
 def make(cls, settings_file='settings.json'):
     try:
         return cls(Settings.load(settings_file))
     except SettingsError as e:
         raise BenchError('Failed to load settings', e)
예제 #9
0
 def hosts(self, flat=False):
     try:
         _, ips = self._get(['pending', 'running'])
         return [x for y in ips.values() for x in y] if flat else ips
     except ClientError as e:
         raise BenchError('Failed to gather instances IPs', AWSError(e))
예제 #10
0
    def run(self, debug=False):
        assert isinstance(debug, bool)
        Print.heading('Starting local benchmark')

        # Kill any previous testbed.
        self._kill_nodes()

        try:
            Print.info('Setting up testbed...')
            nodes, rate = self.nodes[0], self.rate[0]

            # Cleanup all files.
            cmd = f'{CommandMaker.clean_logs()} ; {CommandMaker.cleanup()}'
            subprocess.run([cmd], shell=True, stderr=subprocess.DEVNULL)
            sleep(0.5)  # Removing the store may take time.

            # Recompile the latest code.
            cmd = CommandMaker.compile().split()
            subprocess.run(cmd, check=True, cwd=PathMaker.node_crate_path())

            # Create alias for the client and nodes binary.
            cmd = CommandMaker.alias_binaries(PathMaker.binary_path())
            subprocess.run([cmd], shell=True)

            # Generate configuration files.
            keys = []
            key_files = [PathMaker.key_file(i) for i in range(nodes)]
            for filename in key_files:
                cmd = CommandMaker.generate_key(filename).split()
                subprocess.run(cmd, check=True)
                keys += [Key.from_file(filename)]

            names = [x.name for x in keys]
            committee = LocalCommittee(names, self.BASE_PORT)
            committee.print(PathMaker.committee_file())

            self.node_parameters.print(PathMaker.parameters_file())

            # Run the clients (they will wait for the nodes to be ready).
            addresses = committee.front_addresses()
            rate_share = ceil(rate / nodes)
            timeout = self.node_parameters.timeout_delay
            client_logs = [PathMaker.client_log_file(i) for i in range(nodes)]
            for addr, log_file in zip(addresses, client_logs):
                cmd = CommandMaker.run_client(addr, self.tx_size, rate_share,
                                              timeout)
                self._background_run(cmd, log_file)

            # Run the nodes.
            dbs = [PathMaker.db_path(i) for i in range(nodes)]
            node_logs = [PathMaker.node_log_file(i) for i in range(nodes)]
            for key_file, db, log_file in zip(key_files, dbs, node_logs):
                cmd = CommandMaker.run_node(key_file,
                                            PathMaker.committee_file(),
                                            db,
                                            PathMaker.parameters_file(),
                                            debug=debug)
                self._background_run(cmd, log_file)

            # Wait for the nodes to synchronize
            Print.info('Waiting for the nodes to synchronize...')
            sleep(2 * self.node_parameters.timeout_delay / 1000)

            # Wait for all transactions to be processed.
            Print.info(f'Running benchmark ({self.duration} sec)...')
            sleep(self.duration)
            self._kill_nodes()

            # Parse logs and return the parser.
            Print.info('Parsing logs...')
            return LogParser.process('./logs')

        except (subprocess.SubprocessError, ParseError) as e:
            self._kill_nodes()
            raise BenchError('Failed to run benchmark', e)
예제 #11
0
 def _kill_nodes(self):
     try:
         cmd = CommandMaker.kill().split()
         subprocess.run(cmd, stderr=subprocess.DEVNULL)
     except subprocess.SubprocessError as e:
         raise BenchError('Failed to kill testbed', e)
예제 #12
0
 def __init__(self, bench_parameters_dict, node_parameters_dict):
     try:
         self.bench_parameters = BenchParameters(bench_parameters_dict)
         self.node_parameters = NodeParameters(node_parameters_dict)
     except ConfigError as e:
         raise BenchError('Invalid nodes or bench parameters', e)
예제 #13
0
    def run(self, bench_parameters_dict, node_parameters_dict, debug=False):
        assert isinstance(debug, bool)
        Print.heading('Starting remote benchmark')
        try:
            bench_parameters = BenchParameters(bench_parameters_dict)
            node_parameters = NodeParameters(node_parameters_dict)
        except ConfigError as e:
            raise BenchError('Invalid nodes or bench parameters', e)

        # Select which hosts to use.
        selected_hosts = self._select_hosts(bench_parameters)
        if not selected_hosts:
            Print.warn('There are not enough instances available')
            return

        # Update nodes.
        try:
            self._update(selected_hosts)
        except (GroupException, ExecutionError) as e:
            e = FabricError(e) if isinstance(e, GroupException) else e
            raise BenchError('Failed to update nodes', e)

        if node_parameters.protocol == 0:
            Print.info('Running HotStuff')
        elif node_parameters.protocol == 1:
            Print.info('Running AsyncHotStuff')
        elif node_parameters.protocol == 2:
            Print.info('Running TwoChainVABA')
        else:
            Print.info('Wrong protocol type!')
            return

        Print.info(f'{bench_parameters.faults} faults')
        Print.info(
            f'Timeout {node_parameters.timeout_delay} ms, Network delay {node_parameters.network_delay} ms'
        )
        Print.info(f'DDOS attack {node_parameters.ddos}')

        hosts = selected_hosts[:bench_parameters.nodes[0]]
        # Upload all configuration files.
        try:
            self._config(hosts, node_parameters)
        except (subprocess.SubprocessError, GroupException) as e:
            e = FabricError(e) if isinstance(e, GroupException) else e
            Print.error(BenchError('Failed to configure nodes', e))

        # Run benchmarks.
        for n in bench_parameters.nodes:
            for r in bench_parameters.rate:
                Print.heading(f'\nRunning {n} nodes (input rate: {r:,} tx/s)')
                hosts = selected_hosts[:n]

                # # Upload all configuration files.
                # try:
                #     self._config(hosts, node_parameters)
                # except (subprocess.SubprocessError, GroupException) as e:
                #     e = FabricError(e) if isinstance(e, GroupException) else e
                #     Print.error(BenchError('Failed to configure nodes', e))
                #     continue

                # Do not boot faulty nodes.
                faults = bench_parameters.faults
                hosts = hosts[:n - faults]

                protocol = node_parameters.protocol
                ddos = node_parameters.ddos

                # Run the benchmark.
                for i in range(bench_parameters.runs):
                    Print.heading(f'Run {i+1}/{bench_parameters.runs}')
                    try:
                        self._run_single(hosts, r, bench_parameters,
                                         node_parameters, debug)
                        self._logs(hosts, faults, protocol, ddos).print(
                            PathMaker.result_file(n, r,
                                                  bench_parameters.tx_size,
                                                  faults))
                    except (subprocess.SubprocessError, GroupException,
                            ParseError) as e:
                        self.kill(hosts=hosts)
                        if isinstance(e, GroupException):
                            e = FabricError(e)
                        Print.error(BenchError('Benchmark failed', e))
                        continue
예제 #14
0
    def run(self, debug=False):
        assert isinstance(debug, bool)
        Print.heading('Starting local benchmark')

        # Kill any previous testbed.
        self._kill_nodes()

        try:
            Print.info('Setting up testbed...')
            nodes, rate = self.nodes[0], self.rate[0]

            # Cleanup all files.
            cmd = f'{CommandMaker.clean_logs()} ; {CommandMaker.cleanup()}'
            subprocess.run([cmd], shell=True, stderr=subprocess.DEVNULL)
            sleep(0.5)  # Removing the store may take time.

            # Recompile the latest code.
            cmd = CommandMaker.compile().split()
            subprocess.run(cmd, check=True, cwd=PathMaker.node_crate_path())

            # Create alias for the client and nodes binary.
            cmd = CommandMaker.alias_binaries(PathMaker.binary_path())
            subprocess.run([cmd], shell=True)

            # Generate configuration files.
            keys = []
            key_files = [PathMaker.key_file(i) for i in range(nodes)]
            for filename in key_files:
                cmd = CommandMaker.generate_key(filename).split()
                subprocess.run(cmd, check=True)
                keys += [Key.from_file(filename)]

            # Generate threshold signature files.
            cmd = './node threshold_keys'
            for i in range(nodes):
                cmd += ' --filename ' + PathMaker.threshold_key_file(i)
            # print(cmd)
            cmd = cmd.split()
            subprocess.run(cmd, capture_output=True, check=True)

            names = [x.name for x in keys]
            tss_keys = []
            for i in range(nodes):
                tss_keys += [TSSKey.from_file(PathMaker.threshold_key_file(i))]
            ids = [x.id for x in tss_keys]
            committee = LocalCommittee(names, ids, self.BASE_PORT)
            committee.print(PathMaker.committee_file())

            self.node_parameters.print(PathMaker.parameters_file())

            # Do not boot faulty nodes.
            nodes = nodes - self.faults

            # Run the clients (they will wait for the nodes to be ready).
            addresses = committee.front
            rate_share = ceil(rate / nodes)
            timeout = self.node_parameters.timeout_delay
            client_logs = [PathMaker.client_log_file(i) for i in range(nodes)]
            for addr, log_file in zip(addresses, client_logs):
                cmd = CommandMaker.run_client(addr, self.tx_size, rate_share,
                                              timeout)
                self._background_run(cmd, log_file)

            if self.node_parameters.protocol == 0:
                Print.info('Running HotStuff')
            elif self.node_parameters.protocol == 1:
                Print.info('Running Async HotStuff')
            elif self.node_parameters.protocol == 2:
                Print.info('Running TwoChainVABA')
            else:
                Print.info('Wrong protocol type!')
                return

            Print.info(f'{self.faults} faults')
            Print.info(
                f'Timeout {self.node_parameters.timeout_delay} ms, Network delay {self.node_parameters.network_delay} ms'
            )
            Print.info(f'DDOS attack {self.node_parameters.ddos}')

            # Run the nodes.
            dbs = [PathMaker.db_path(i) for i in range(nodes)]
            node_logs = [PathMaker.node_log_file(i) for i in range(nodes)]
            threshold_key_files = [
                PathMaker.threshold_key_file(i) for i in range(nodes)
            ]
            for key_file, threshold_key_file, db, log_file in zip(
                    key_files, threshold_key_files, dbs, node_logs):
                cmd = CommandMaker.run_node(key_file,
                                            threshold_key_file,
                                            PathMaker.committee_file(),
                                            db,
                                            PathMaker.parameters_file(),
                                            debug=debug)
                self._background_run(cmd, log_file)

            # Wait for the nodes to synchronize
            Print.info('Waiting for the nodes to synchronize...')
            sleep(2 * self.node_parameters.timeout_delay / 1000)

            # Wait for all transactions to be processed.
            Print.info(f'Running benchmark ({self.duration} sec)...')
            sleep(self.duration)
            self._kill_nodes()

            # Parse logs and return the parser.
            Print.info('Parsing logs...')
            return LogParser.process('./logs', self.faults,
                                     self.node_parameters.protocol,
                                     self.node_parameters.ddos)

        except (subprocess.SubprocessError, ParseError) as e:
            self._kill_nodes()
            raise BenchError('Failed to run benchmark', e)