def cleanup_cluster(client, timeout=None): """ Delete all containers and datasets in the given cluster. :param FlockerClient client: The API client instance for the cluster. :param timeout: A timeout in seconds for waiting until the deletions take effect if not ``None``, otherwise there is no waiting. :type timeout: int or None :returns: Deferred that fires when the clean up is complete if :param:`timeout` is not None, otherwise the Deferred fires when the deletion requests are aknowledged. """ containers_configuration = yield client.list_containers_configuration() results = [] for container in containers_configuration: print "deleting container", container.name results.append(client.delete_container(container.name)) yield gather_deferreds(results) datasets_configuration = yield client.list_datasets_configuration() results = [] for dataset in datasets_configuration: print "deleting dataset with id", dataset.dataset_id results.append(client.delete_dataset(dataset.dataset_id)) yield gather_deferreds(results) if timeout is not None: print "waiting for all containers to get deleted" yield loop_until( client._reactor, lambda: client.list_containers_state(). addCallback(lambda containers: not containers), repeat(1, timeout)) print "waiting for all datasets to get deleted" yield loop_until( client._reactor, lambda: client.list_datasets_state().addCallback( lambda datasets: not datasets), repeat(1, timeout))
def driver(reactor, cluster, scenario_factory, operation_factory, metric_factory, num_samples, result, output): """ :param reactor: Reactor to use. :param BenchmarkCluster cluster: Benchmark cluster. :param callable scenario_factory: A load scenario factory. :param callable operation_factory: An operation factory. :param callable metric_factory: A metric factory. :param int num_samples: Number of samples to take. :param result: A dictionary which will be updated with values to create a JSON result. :param output: A callable to receive the JSON structure, for printing or storage. """ control_service = cluster.get_control_service(reactor) d = gather_deferreds([ control_service.version(), control_service.list_nodes(), control_service.list_containers_configuration(), control_service.list_datasets_configuration(), ]) def add_control_service(characteristics, result): version = characteristics[0] node_count = len(characteristics[1]) container_count = len(characteristics[2]) dataset_count = len(characteristics[3].datasets) result['control_service'] = dict( host=cluster.control_node_address().compressed, flocker_version=version[u"flocker"], node_count=node_count, container_count=container_count, dataset_count=dataset_count, ) d.addCallback(add_control_service, result) def run_benchmark(ignored): return benchmark( scenario_factory(reactor, cluster), operation_factory(reactor, cluster), metric_factory(reactor, cluster), num_samples, ) d.addCallback(run_benchmark) def add_samples(outputs, result): samples, scenario_metrics = outputs result['samples'] = samples if scenario_metrics: result['scenario']['metrics'] = scenario_metrics return result d.addCallback(add_samples, result) d.addCallback(output) return d
def start_containers(nodes): Message.log(message_type='flocker.benchmark.container_setup:start', containers_per_node=per_node, total_nodes=len(nodes)) total = per_node * len(nodes) def log_progress(): Message.log( message_type='flocker.benchmark.container_setup:progress', container_count=self.container_count, error_count=self.error_count, total_containers=total) loop = LoopingCall(log_progress) loop.start(10, now=False) deferred_list = [] for node in nodes: d = succeed(None) for count in range(per_node): d.addCallback(lambda _ignore, n=node, i=count: self. create_stateful_container(n, i)) deferred_list.append(d) d = gather_deferreds(deferred_list) def stop_loop(result): loop.stop() return result d.addBoth(stop_loop) return d
def _cleanup_compose(self): """ Run docker-compose stop and rm -f for both demo templates to stop and remove all the containers that were created during the test. Run serially because docker-compose + swarm sometimes fail when commands are run in parallel. """ d_node1_compose = remote_docker_compose(self.client_node_ip, self.docker_host, self.compose_node1.path, 'stop') d_node1_compose.addCallback(lambda ignored: remote_docker_compose( self.client_node_ip, self.docker_host, self.compose_node1.path, 'rm', '-f' ).addErrback( # This sometimes fails with exit code 255 # and a message ValueError: No JSON object could be decoded lambda failure: failure.trap(ProcessTerminated))) d_node2_compose = remote_docker_compose( self.client_node_ip, self.docker_host, self.compose_node2.path, 'stop', ) d_node2_compose.addCallback(lambda ignored: remote_docker_compose( self.client_node_ip, self.docker_host, self.compose_node2.path, 'rm', '-f' ).addErrback( # This sometimes fails with exit code 255 # and a message ValueError: No JSON object could be decoded lambda failure: failure.trap(ProcessTerminated))) return gather_deferreds([d_node1_compose, d_node2_compose])
def cleanup_cluster(client, timeout=None): """ Delete all containers and datasets in the given cluster. :param FlockerClient client: The API client instance for the cluster. :param timeout: A timeout in seconds for waiting until the deletions take effect if not ``None``, otherwise there is no waiting. :type timeout: int or None :returns: Deferred that fires when the clean up is complete if :param:`timeout` is not None, otherwise the Deferred fires when the deletion requests are aknowledged. """ containers_configuration = yield client.list_containers_configuration() results = [] for container in containers_configuration: print "deleting container", container.name results.append(client.delete_container(container.name)) yield gather_deferreds(results) datasets_configuration = yield client.list_datasets_configuration() results = [] for dataset in datasets_configuration: print "deleting dataset with id", dataset.dataset_id results.append(client.delete_dataset(dataset.dataset_id)) yield gather_deferreds(results) if timeout is not None: print "waiting for all containers to get deleted" yield loop_until( client._reactor, lambda: client.list_containers_state().addCallback( lambda containers: not containers ), repeat(1, timeout) ) print "waiting for all datasets to get deleted" yield loop_until( client._reactor, lambda: client.list_datasets_state().addCallback( lambda datasets: not datasets ), repeat(1, timeout) )
def get_cluster_init_process_names(runner, nodes): """ Get the names of process 1 running on each node. :param runner: A method of running a command on a node. :param nodes: A list of Node to run the command on. :return: Deferred firing with a list of process names. """ return gather_deferreds(list( get_node_init_process_name(runner, node) for node in nodes ))
def get_cluster_cpu_times(reactor, runner, nodes, processes): """ Get the CPU times for processes running on a cluster. :param reactor: Twisted Reactor. :param runner: A method of running a command on a node. :param node: Node to run the command on. :param processes: An iterator of process names to monitor. The process names must not contain spaces. :return: Deferred firing with a dictionary mapping process names to elapsed cpu time. Process names may be truncated in the dictionary. If an error occurs, returns None (after logging error). """ return gather_deferreds( list( get_node_cpu_times(reactor, runner, node, processes) for node in nodes))
def get_cluster_cpu_times(reactor, runner, nodes, processes): """ Get the CPU times for processes running on a cluster. :param reactor: Twisted Reactor. :param runner: A method of running a command on a node. :param node: Node to run the command on. :param processes: An iterator of process names to monitor. The process names must not contain spaces. :return: Deferred firing with a dictionary mapping process names to elapsed cpu time. Process names may be truncated in the dictionary. If an error occurs, returns None (after logging error). """ return gather_deferreds(list( get_node_cpu_times(reactor, runner, node, processes) for node in nodes ))
def _cleanup_compose(self): """ Run docker-compose stop and rm -f for both demo templates to stop and remove all the containers that were created during the test. Run serially because docker-compose + swarm sometimes fail when commands are run in parallel. """ d_node1_compose = remote_docker_compose( self.client_node_ip, self.docker_host, self.compose_node1.path, 'stop' ) d_node1_compose.addCallback( lambda ignored: remote_docker_compose( self.client_node_ip, self.docker_host, self.compose_node1.path, 'rm', '-f' ).addErrback( # This sometimes fails with exit code 255 # and a message ValueError: No JSON object could be decoded lambda failure: failure.trap(ProcessTerminated) ) ) d_node2_compose = remote_docker_compose( self.client_node_ip, self.docker_host, self.compose_node2.path, 'stop', ) d_node2_compose.addCallback( lambda ignored: remote_docker_compose( self.client_node_ip, self.docker_host, self.compose_node2.path, 'rm', '-f' ).addErrback( # This sometimes fails with exit code 255 # and a message ValueError: No JSON object could be decoded lambda failure: failure.trap(ProcessTerminated) ) ) return gather_deferreds([d_node1_compose, d_node2_compose])
def parallel_setup(node): # Ensure the Docker image is cached by starting and stopping a # container. name = unicode(uuid4()) container_setup = create_container(reactor, control_service, node.uuid, name, image) container_setup.addCallback( partial(delete_container, reactor, control_service)) # Create the dataset dataset_id = uuid4() dataset_setup = create_dataset(reactor, control_service, node.uuid, dataset_id, volume_size) d = gather_deferreds((container_setup, dataset_setup)) # Return only the dataset state d.addCallback(lambda results: results[1]) return d
def create_datasets_and_containers(self): """ Create ``per_node`` containers and datasets in each node of the cluster. :return Deferred: once all the requests to create the datasets and containers are made. """ deferred_list = [] for node in self.nodes: create_container_in_node = partial(self.create_container, node=node) for i in range(self.per_node): msg = ("Creating dataset {num_dataset} in node {node_uuid}").format( num_dataset=i + 1, node_uuid=node.uuid ) Message.log(action=msg) d = self.client.create_dataset(node.uuid, maximum_size=self.max_size) d.addCallback(create_container_in_node) deferred_list.append(d) return gather_deferreds(deferred_list)
def capture_upstart(reactor, host, output_file): """ SSH into given machine and capture relevant logs, writing them to output file. :param reactor: The reactor. :param bytes host: Machine to SSH into. :param file output_file: File to write to. :return deferred: that will run the tail command """ # note that we are using tail -F to keep retrying and not to exit when we # reach the end of the file, as we expect the logs to keep being generated results = [] for (directory, service) in [ (b"flocker", b"flocker-control"), (b"flocker", b"flocker-dataset-agent"), (b"flocker", b"flocker-container-agent"), (b"flocker", b"flocker-docker-plugin"), (b"upstart", b"docker")]: path = FilePath(b'/var/log/').child(directory).child(service + b'.log') formatter = TailFormatter(output_file, host, service) ran = run_ssh( reactor=reactor, host=host, username='******', command=[ b'tail', b'-F', path.path ], handle_stdout=formatter.handle_output_line, ) ran.addErrback(write_failure, logger=None) # Deliver a final empty line to process the last message ran.addCallback(lambda ignored, formatter=formatter: formatter.handle_output_line(b"")) results.append(ran) return gather_deferreds(results)
def parallel_setup(node): # Ensure the Docker image is cached by starting and stopping a # container. name = unicode(uuid4()) container_setup = create_container( reactor, control_service, node.uuid, name, image ) container_setup.addCallback( partial(delete_container, reactor, control_service) ) # Create the dataset dataset_id = uuid4() dataset_setup = create_dataset( reactor, control_service, node.uuid, dataset_id, volume_size ) d = gather_deferreds((container_setup, dataset_setup)) # Return only the dataset state d.addCallback(lambda results: results[1]) return d
def start_containers(nodes): Message.log( message_type='flocker.benchmark.container_setup:start', containers_per_node=per_node, total_nodes=len(nodes) ) total = per_node * len(nodes) def log_progress(): Message.log( message_type='flocker.benchmark.container_setup:progress', container_count=self.container_count, error_count=self.error_count, total_containers=total ) loop = LoopingCall(log_progress) loop.start(10, now=False) deferred_list = [] for node in nodes: d = succeed(None) for count in range(per_node): d.addCallback( lambda _ignore, n=node, i=count: self.create_stateful_container(n, i) ) deferred_list.append(d) d = gather_deferreds(deferred_list) def stop_loop(result): loop.stop() return result d.addBoth(stop_loop) return d
def driver( reactor, cluster, scenario_factory, operation_factory, metric_factory, num_samples, result, output ): """ :param reactor: Reactor to use. :param BenchmarkCluster cluster: Benchmark cluster. :param callable scenario_factory: A load scenario factory. :param callable operation_factory: An operation factory. :param callable metric_factory: A metric factory. :param int num_samples: Number of samples to take. :param result: A dictionary which will be updated with values to create a JSON result. :param output: A callable to receive the JSON structure, for printing or storage. """ control_service = cluster.get_control_service(reactor) d = gather_deferreds([ control_service.version(), control_service.list_nodes(), control_service.list_containers_configuration(), control_service.list_datasets_configuration(), ]) def add_control_service(characteristics, result): version = characteristics[0] node_count = len(characteristics[1]) container_count = len(characteristics[2]) dataset_count = len(characteristics[3].datasets) result['control_service'] = dict( host=cluster.control_node_address().compressed, flocker_version=version[u"flocker"], node_count=node_count, container_count=container_count, dataset_count=dataset_count, ) d.addCallback(add_control_service, result) def run_benchmark(ignored): return benchmark( scenario_factory(reactor, cluster), operation_factory(reactor, cluster), metric_factory(reactor, cluster), num_samples, ) d.addCallback(run_benchmark) def add_samples(outputs, result): samples, scenario_metrics = outputs result['samples'] = samples if scenario_metrics: result['scenario']['metrics'] = scenario_metrics return result d.addCallback(add_samples, result) d.addCallback(output) return d
def main(reactor, args, base_path, top_level): """ :param reactor: Reactor to use. :param list args: The arguments passed to the script. :param FilePath base_path: The executable being run. :param FilePath top_level: The top-level of the flocker repository. """ options = RunOptions(top_level=top_level) add_destination(eliot_output) try: options.parseOptions(args) except UsageError as e: sys.stderr.write("%s: %s\n" % (base_path.basename(), e)) raise SystemExit(1) runner = options.runner from flocker.common.script import eliot_logging_service log_writer = eliot_logging_service( destination=FileDestination( file=open("%s.log" % (base_path.basename(),), "a") ), reactor=reactor, capture_stdout=False) log_writer.startService() reactor.addSystemEventTrigger( 'before', 'shutdown', log_writer.stopService) cluster = None results = [] setup_succeeded = False reached_finally = False def cluster_cleanup(): if not reached_finally: print "interrupted..." print "stopping cluster" return runner.stop_cluster(reactor) cleanup_trigger_id = reactor.addSystemEventTrigger('before', 'shutdown', cluster_cleanup) try: yield runner.ensure_keys(reactor) cluster = yield runner.start_cluster(reactor) if options['distribution'] in ('centos-7',): remote_logs_file = open("remote_logs.log", "a") for node in cluster.all_nodes: results.append(capture_journal(reactor, node.address, remote_logs_file) ) elif options['distribution'] in ('ubuntu-14.04',): remote_logs_file = open("remote_logs.log", "a") for node in cluster.all_nodes: results.append(capture_upstart(reactor, node.address, remote_logs_file) ) gather_deferreds(results) if not options["no-pull"]: yield perform( make_dispatcher(reactor), parallel([ run_remotely( username='******', address=node.address, commands=task_pull_docker_images() ) for node in cluster.agent_nodes ]), ) setup_succeeded = True result = yield run_tests( reactor=reactor, cluster=cluster, trial_args=options['trial-args']) finally: reached_finally = True # We delete the nodes if the user hasn't asked to keep them # or if we failed to provision the cluster. if not setup_succeeded: print "cluster provisioning failed" elif not options['keep']: print "not keeping cluster" else: print "--keep specified, not destroying nodes." print ("To run acceptance tests against these nodes, " "set the following environment variables: ") environment_variables = get_trial_environment(cluster) for environment_variable in environment_variables: print "export {name}={value};".format( name=environment_variable, value=shell_quote( environment_variables[environment_variable]), ) reactor.removeSystemEventTrigger(cleanup_trigger_id) raise SystemExit(result)
def main(reactor, args, base_path, top_level): """ :param reactor: Reactor to use. :param list args: The arguments passed to the script. :param FilePath base_path: The executable being run. :param FilePath top_level: The top-level of the Flocker repository. """ options = RunOptions(top_level=top_level) add_destination(eliot_output) try: options.parseOptions(args) except UsageError as e: sys.stderr.write("%s: %s\n" % (base_path.basename(), e)) raise SystemExit(1) runner = options.runner from flocker.common.script import eliot_logging_service log_writer = eliot_logging_service( destination=FileDestination( file=open("%s.log" % (base_path.basename(),), "a") ), reactor=reactor, capture_stdout=False) log_writer.startService() reactor.addSystemEventTrigger( 'before', 'shutdown', log_writer.stopService) cluster = None results = [] try: yield runner.ensure_keys(reactor) cluster = yield runner.start_cluster(reactor) if options['distribution'] in ('centos-7',): remote_logs_file = open("remote_logs.log", "a") for node in cluster.all_nodes: results.append(capture_journal(reactor, node.address, remote_logs_file) ) elif options['distribution'] in ('ubuntu-14.04', 'ubuntu-15.10'): remote_logs_file = open("remote_logs.log", "a") for node in cluster.all_nodes: results.append(capture_upstart(reactor, node.address, remote_logs_file) ) gather_deferreds(results) if options['apps-per-node'] > 0: config = _build_config(cluster, options['template'], options['apps-per-node']) yield _configure(reactor, cluster, config) result = 0 except BaseException: result = 1 raise finally: if options['no-keep'] or result == 1: runner.stop_cluster(reactor) else: if cluster is None: print("Didn't finish creating the cluster.") runner.stop_cluster(reactor) else: print("The following variables describe the cluster:") environment_variables = get_trial_environment(cluster) for environment_variable in environment_variables: print("export {name}={value};".format( name=environment_variable, value=shell_quote( environment_variables[environment_variable]), )) print("Be sure to preserve the required files.") raise SystemExit(result)
def main(reactor, args, base_path, top_level): """ :param reactor: Reactor to use. :param list args: The arguments passed to the script. :param FilePath base_path: The executable being run. :param FilePath top_level: The top-level of the Flocker repository. """ options = RunOptions(top_level=top_level) add_destination(eliot_output) try: options.parseOptions(args) except UsageError as e: sys.stderr.write("%s: %s\n" % (base_path.basename(), e)) raise SystemExit(1) runner = options.runner from flocker.common.script import eliot_logging_service log_writer = eliot_logging_service(destination=FileDestination( file=open("%s.log" % (base_path.basename(), ), "a")), reactor=reactor, capture_stdout=False) log_writer.startService() reactor.addSystemEventTrigger('before', 'shutdown', log_writer.stopService) cluster = None results = [] try: yield runner.ensure_keys(reactor) cluster = yield runner.start_cluster(reactor) if options['distribution'] in ('centos-7', ): remote_logs_file = open("remote_logs.log", "a") for node in cluster.all_nodes: results.append( capture_journal(reactor, node.address, remote_logs_file)) elif options['distribution'] in ('ubuntu-14.04', 'ubuntu-15.10'): remote_logs_file = open("remote_logs.log", "a") for node in cluster.all_nodes: results.append( capture_upstart(reactor, node.address, remote_logs_file)) gather_deferreds(results) if options['apps-per-node'] > 0: config = _build_config(cluster, options['template'], options['apps-per-node']) yield _configure(reactor, cluster, config) result = 0 except BaseException: result = 1 raise finally: if options['no-keep'] or result == 1: runner.stop_cluster(reactor) else: if cluster is None: print("Didn't finish creating the cluster.") runner.stop_cluster(reactor) else: print("The following variables describe the cluster:") environment_variables = get_trial_environment(cluster) for environment_variable in environment_variables: print("export {name}={value};".format( name=environment_variable, value=shell_quote( environment_variables[environment_variable]), )) print("Be sure to preserve the required files.") raise SystemExit(result)