Ejemplo n.º 1
0
def cleanup_cluster(client, timeout=None):
    """
    Delete all containers and datasets in the given cluster.

    :param FlockerClient client: The API client instance for the cluster.
    :param timeout: A timeout in seconds for waiting until the deletions
        take effect if not ``None``, otherwise there is no waiting.
    :type timeout: int or None
    :returns: Deferred that fires when the clean up is complete if
        :param:`timeout` is not None, otherwise the Deferred fires
        when the deletion requests are aknowledged.
    """
    containers_configuration = yield client.list_containers_configuration()
    results = []
    for container in containers_configuration:
        print "deleting container", container.name
        results.append(client.delete_container(container.name))
    yield gather_deferreds(results)

    datasets_configuration = yield client.list_datasets_configuration()
    results = []
    for dataset in datasets_configuration:
        print "deleting dataset with id", dataset.dataset_id
        results.append(client.delete_dataset(dataset.dataset_id))
    yield gather_deferreds(results)

    if timeout is not None:
        print "waiting for all containers to get deleted"
        yield loop_until(
            client._reactor, lambda: client.list_containers_state().
            addCallback(lambda containers: not containers), repeat(1, timeout))
        print "waiting for all datasets to get deleted"
        yield loop_until(
            client._reactor, lambda: client.list_datasets_state().addCallback(
                lambda datasets: not datasets), repeat(1, timeout))
Ejemplo n.º 2
0
def driver(reactor, cluster, scenario_factory, operation_factory,
           metric_factory, num_samples, result, output):
    """
    :param reactor: Reactor to use.
    :param BenchmarkCluster cluster: Benchmark cluster.
    :param callable scenario_factory: A load scenario factory.
    :param callable operation_factory: An operation factory.
    :param callable metric_factory: A metric factory.
    :param int num_samples: Number of samples to take.
    :param result: A dictionary which will be updated with values to
        create a JSON result.
    :param output: A callable to receive the JSON structure, for
        printing or storage.
    """

    control_service = cluster.get_control_service(reactor)
    d = gather_deferreds([
        control_service.version(),
        control_service.list_nodes(),
        control_service.list_containers_configuration(),
        control_service.list_datasets_configuration(),
    ])

    def add_control_service(characteristics, result):
        version = characteristics[0]
        node_count = len(characteristics[1])
        container_count = len(characteristics[2])
        dataset_count = len(characteristics[3].datasets)
        result['control_service'] = dict(
            host=cluster.control_node_address().compressed,
            flocker_version=version[u"flocker"],
            node_count=node_count,
            container_count=container_count,
            dataset_count=dataset_count,
        )

    d.addCallback(add_control_service, result)

    def run_benchmark(ignored):
        return benchmark(
            scenario_factory(reactor, cluster),
            operation_factory(reactor, cluster),
            metric_factory(reactor, cluster),
            num_samples,
        )

    d.addCallback(run_benchmark)

    def add_samples(outputs, result):
        samples, scenario_metrics = outputs
        result['samples'] = samples
        if scenario_metrics:
            result['scenario']['metrics'] = scenario_metrics
        return result

    d.addCallback(add_samples, result)

    d.addCallback(output)

    return d
Ejemplo n.º 3
0
        def start_containers(nodes):

            Message.log(message_type='flocker.benchmark.container_setup:start',
                        containers_per_node=per_node,
                        total_nodes=len(nodes))
            total = per_node * len(nodes)

            def log_progress():
                Message.log(
                    message_type='flocker.benchmark.container_setup:progress',
                    container_count=self.container_count,
                    error_count=self.error_count,
                    total_containers=total)

            loop = LoopingCall(log_progress)
            loop.start(10, now=False)

            deferred_list = []
            for node in nodes:
                d = succeed(None)
                for count in range(per_node):
                    d.addCallback(lambda _ignore, n=node, i=count: self.
                                  create_stateful_container(n, i))
                deferred_list.append(d)

            d = gather_deferreds(deferred_list)

            def stop_loop(result):
                loop.stop()
                return result

            d.addBoth(stop_loop)

            return d
Ejemplo n.º 4
0
 def _cleanup_compose(self):
     """
     Run docker-compose stop and rm -f for both demo templates to stop and
     remove all the containers that were created during the test.
     Run serially because docker-compose + swarm sometimes fail when
     commands are run in parallel.
     """
     d_node1_compose = remote_docker_compose(self.client_node_ip,
                                             self.docker_host,
                                             self.compose_node1.path,
                                             'stop')
     d_node1_compose.addCallback(lambda ignored: remote_docker_compose(
         self.client_node_ip, self.docker_host, self.compose_node1.path,
         'rm', '-f'
     ).addErrback(
         # This sometimes fails with exit code 255
         # and a message ValueError: No JSON object could be decoded
         lambda failure: failure.trap(ProcessTerminated)))
     d_node2_compose = remote_docker_compose(
         self.client_node_ip,
         self.docker_host,
         self.compose_node2.path,
         'stop',
     )
     d_node2_compose.addCallback(lambda ignored: remote_docker_compose(
         self.client_node_ip, self.docker_host, self.compose_node2.path,
         'rm', '-f'
     ).addErrback(
         # This sometimes fails with exit code 255
         # and a message ValueError: No JSON object could be decoded
         lambda failure: failure.trap(ProcessTerminated)))
     return gather_deferreds([d_node1_compose, d_node2_compose])
Ejemplo n.º 5
0
def cleanup_cluster(client, timeout=None):
    """
    Delete all containers and datasets in the given cluster.

    :param FlockerClient client: The API client instance for the cluster.
    :param timeout: A timeout in seconds for waiting until the deletions
        take effect if not ``None``, otherwise there is no waiting.
    :type timeout: int or None
    :returns: Deferred that fires when the clean up is complete if
        :param:`timeout` is not None, otherwise the Deferred fires
        when the deletion requests are aknowledged.
    """
    containers_configuration = yield client.list_containers_configuration()
    results = []
    for container in containers_configuration:
        print "deleting container", container.name
        results.append(client.delete_container(container.name))
    yield gather_deferreds(results)

    datasets_configuration = yield client.list_datasets_configuration()
    results = []
    for dataset in datasets_configuration:
        print "deleting dataset with id", dataset.dataset_id
        results.append(client.delete_dataset(dataset.dataset_id))
    yield gather_deferreds(results)

    if timeout is not None:
        print "waiting for all containers to get deleted"
        yield loop_until(
            client._reactor,
            lambda: client.list_containers_state().addCallback(
                lambda containers: not containers
            ),
            repeat(1, timeout)
        )
        print "waiting for all datasets to get deleted"
        yield loop_until(
            client._reactor,
            lambda: client.list_datasets_state().addCallback(
                lambda datasets: not datasets
            ),
            repeat(1, timeout)
        )
Ejemplo n.º 6
0
def get_cluster_init_process_names(runner, nodes):
    """
    Get the names of process 1 running on each node.

    :param runner: A method of running a command on a node.
    :param nodes: A list of Node to run the command on.
    :return: Deferred firing with a list of process names.
    """
    return gather_deferreds(list(
        get_node_init_process_name(runner, node)
        for node in nodes
    ))
Ejemplo n.º 7
0
def get_cluster_cpu_times(reactor, runner, nodes, processes):
    """
    Get the CPU times for processes running on a cluster.

    :param reactor: Twisted Reactor.
    :param runner: A method of running a command on a node.
    :param node: Node to run the command on.
    :param processes: An iterator of process names to monitor. The process
        names must not contain spaces.
    :return: Deferred firing with a dictionary mapping process names to
        elapsed cpu time.  Process names may be truncated in the dictionary.
        If an error occurs, returns None (after logging error).
    """
    return gather_deferreds(
        list(
            get_node_cpu_times(reactor, runner, node, processes)
            for node in nodes))
Ejemplo n.º 8
0
def get_cluster_cpu_times(reactor, runner, nodes, processes):
    """
    Get the CPU times for processes running on a cluster.

    :param reactor: Twisted Reactor.
    :param runner: A method of running a command on a node.
    :param node: Node to run the command on.
    :param processes: An iterator of process names to monitor. The process
        names must not contain spaces.
    :return: Deferred firing with a dictionary mapping process names to
        elapsed cpu time.  Process names may be truncated in the dictionary.
        If an error occurs, returns None (after logging error).
    """
    return gather_deferreds(list(
        get_node_cpu_times(reactor, runner, node, processes)
        for node in nodes
    ))
Ejemplo n.º 9
0
 def _cleanup_compose(self):
     """
     Run docker-compose stop and rm -f for both demo templates to stop and
     remove all the containers that were created during the test.
     Run serially because docker-compose + swarm sometimes fail when
     commands are run in parallel.
     """
     d_node1_compose = remote_docker_compose(
         self.client_node_ip,
         self.docker_host,
         self.compose_node1.path, 'stop'
     )
     d_node1_compose.addCallback(
         lambda ignored: remote_docker_compose(
             self.client_node_ip,
             self.docker_host,
             self.compose_node1.path, 'rm', '-f'
         ).addErrback(
             # This sometimes fails with exit code 255
             # and a message ValueError: No JSON object could be decoded
             lambda failure: failure.trap(ProcessTerminated)
         )
     )
     d_node2_compose = remote_docker_compose(
         self.client_node_ip,
         self.docker_host,
         self.compose_node2.path,
         'stop',
     )
     d_node2_compose.addCallback(
         lambda ignored: remote_docker_compose(
             self.client_node_ip,
             self.docker_host,
             self.compose_node2.path,
             'rm', '-f'
         ).addErrback(
             # This sometimes fails with exit code 255
             # and a message ValueError: No JSON object could be decoded
             lambda failure: failure.trap(ProcessTerminated)
         )
     )
     return gather_deferreds([d_node1_compose, d_node2_compose])
Ejemplo n.º 10
0
        def parallel_setup(node):
            # Ensure the Docker image is cached by starting and stopping a
            # container.
            name = unicode(uuid4())
            container_setup = create_container(reactor, control_service,
                                               node.uuid, name, image)
            container_setup.addCallback(
                partial(delete_container, reactor, control_service))

            # Create the dataset
            dataset_id = uuid4()
            dataset_setup = create_dataset(reactor, control_service, node.uuid,
                                           dataset_id, volume_size)

            d = gather_deferreds((container_setup, dataset_setup))

            # Return only the dataset state
            d.addCallback(lambda results: results[1])

            return d
Ejemplo n.º 11
0
    def create_datasets_and_containers(self):
        """
        Create ``per_node`` containers and datasets in each node of the
        cluster.

        :return Deferred: once all the requests to create the datasets and
            containers are made.
        """
        deferred_list = []
        for node in self.nodes:
            create_container_in_node = partial(self.create_container, node=node)
            for i in range(self.per_node):
                msg = ("Creating dataset {num_dataset} in node {node_uuid}").format(
                    num_dataset=i + 1, node_uuid=node.uuid
                )
                Message.log(action=msg)

                d = self.client.create_dataset(node.uuid, maximum_size=self.max_size)
                d.addCallback(create_container_in_node)
                deferred_list.append(d)

        return gather_deferreds(deferred_list)
Ejemplo n.º 12
0
def capture_upstart(reactor, host, output_file):
    """
    SSH into given machine and capture relevant logs, writing them to
    output file.

    :param reactor: The reactor.
    :param bytes host: Machine to SSH into.
    :param file output_file: File to write to.
    :return deferred: that will run the tail command
    """
    # note that we are using tail -F to keep retrying and not to exit when we
    # reach the end of the file, as we expect the logs to keep being generated
    results = []
    for (directory, service) in [
            (b"flocker", b"flocker-control"),
            (b"flocker", b"flocker-dataset-agent"),
            (b"flocker", b"flocker-container-agent"),
            (b"flocker", b"flocker-docker-plugin"),
            (b"upstart", b"docker")]:
        path = FilePath(b'/var/log/').child(directory).child(service + b'.log')
        formatter = TailFormatter(output_file, host, service)
        ran = run_ssh(
            reactor=reactor,
            host=host,
            username='******',
            command=[
                b'tail',
                b'-F',
                path.path
            ],
            handle_stdout=formatter.handle_output_line,
        )
        ran.addErrback(write_failure, logger=None)
        # Deliver a final empty line to process the last message
        ran.addCallback(lambda ignored, formatter=formatter:
                        formatter.handle_output_line(b""))
        results.append(ran)
    return gather_deferreds(results)
Ejemplo n.º 13
0
        def parallel_setup(node):
            # Ensure the Docker image is cached by starting and stopping a
            # container.
            name = unicode(uuid4())
            container_setup = create_container(
                reactor, control_service, node.uuid, name, image
            )
            container_setup.addCallback(
                partial(delete_container, reactor, control_service)
            )

            # Create the dataset
            dataset_id = uuid4()
            dataset_setup = create_dataset(
                reactor, control_service, node.uuid, dataset_id, volume_size
            )

            d = gather_deferreds((container_setup, dataset_setup))

            # Return only the dataset state
            d.addCallback(lambda results: results[1])

            return d
Ejemplo n.º 14
0
        def start_containers(nodes):

            Message.log(
                message_type='flocker.benchmark.container_setup:start',
                containers_per_node=per_node,
                total_nodes=len(nodes)
            )
            total = per_node * len(nodes)

            def log_progress():
                Message.log(
                    message_type='flocker.benchmark.container_setup:progress',
                    container_count=self.container_count,
                    error_count=self.error_count,
                    total_containers=total
                )
            loop = LoopingCall(log_progress)
            loop.start(10, now=False)

            deferred_list = []
            for node in nodes:
                d = succeed(None)
                for count in range(per_node):
                    d.addCallback(
                        lambda _ignore, n=node, i=count:
                            self.create_stateful_container(n, i)
                    )
                deferred_list.append(d)

            d = gather_deferreds(deferred_list)

            def stop_loop(result):
                loop.stop()
                return result
            d.addBoth(stop_loop)

            return d
Ejemplo n.º 15
0
def driver(
    reactor, cluster, scenario_factory, operation_factory, metric_factory,
    num_samples, result, output
):
    """
    :param reactor: Reactor to use.
    :param BenchmarkCluster cluster: Benchmark cluster.
    :param callable scenario_factory: A load scenario factory.
    :param callable operation_factory: An operation factory.
    :param callable metric_factory: A metric factory.
    :param int num_samples: Number of samples to take.
    :param result: A dictionary which will be updated with values to
        create a JSON result.
    :param output: A callable to receive the JSON structure, for
        printing or storage.
    """

    control_service = cluster.get_control_service(reactor)
    d = gather_deferreds([
        control_service.version(),
        control_service.list_nodes(),
        control_service.list_containers_configuration(),
        control_service.list_datasets_configuration(),
    ])

    def add_control_service(characteristics, result):
        version = characteristics[0]
        node_count = len(characteristics[1])
        container_count = len(characteristics[2])
        dataset_count = len(characteristics[3].datasets)
        result['control_service'] = dict(
            host=cluster.control_node_address().compressed,
            flocker_version=version[u"flocker"],
            node_count=node_count,
            container_count=container_count,
            dataset_count=dataset_count,
        )

    d.addCallback(add_control_service, result)

    def run_benchmark(ignored):
        return benchmark(
            scenario_factory(reactor, cluster),
            operation_factory(reactor, cluster),
            metric_factory(reactor, cluster),
            num_samples,
        )

    d.addCallback(run_benchmark)

    def add_samples(outputs, result):
        samples, scenario_metrics = outputs
        result['samples'] = samples
        if scenario_metrics:
            result['scenario']['metrics'] = scenario_metrics
        return result

    d.addCallback(add_samples, result)

    d.addCallback(output)

    return d
Ejemplo n.º 16
0
def main(reactor, args, base_path, top_level):
    """
    :param reactor: Reactor to use.
    :param list args: The arguments passed to the script.
    :param FilePath base_path: The executable being run.
    :param FilePath top_level: The top-level of the flocker repository.
    """
    options = RunOptions(top_level=top_level)

    add_destination(eliot_output)
    try:
        options.parseOptions(args)
    except UsageError as e:
        sys.stderr.write("%s: %s\n" % (base_path.basename(), e))
        raise SystemExit(1)

    runner = options.runner

    from flocker.common.script import eliot_logging_service
    log_writer = eliot_logging_service(
        destination=FileDestination(
            file=open("%s.log" % (base_path.basename(),), "a")
        ),
        reactor=reactor,
        capture_stdout=False)
    log_writer.startService()
    reactor.addSystemEventTrigger(
        'before', 'shutdown', log_writer.stopService)

    cluster = None
    results = []

    setup_succeeded = False
    reached_finally = False

    def cluster_cleanup():
        if not reached_finally:
            print "interrupted..."
        print "stopping cluster"
        return runner.stop_cluster(reactor)

    cleanup_trigger_id = reactor.addSystemEventTrigger('before', 'shutdown',
                                                       cluster_cleanup)

    try:
        yield runner.ensure_keys(reactor)
        cluster = yield runner.start_cluster(reactor)
        if options['distribution'] in ('centos-7',):
            remote_logs_file = open("remote_logs.log", "a")
            for node in cluster.all_nodes:
                results.append(capture_journal(reactor,
                                               node.address,
                                               remote_logs_file)
                               )
        elif options['distribution'] in ('ubuntu-14.04',):
            remote_logs_file = open("remote_logs.log", "a")
            for node in cluster.all_nodes:
                results.append(capture_upstart(reactor,
                                               node.address,
                                               remote_logs_file)
                               )
        gather_deferreds(results)

        if not options["no-pull"]:
            yield perform(
                make_dispatcher(reactor),
                parallel([
                    run_remotely(
                        username='******',
                        address=node.address,
                        commands=task_pull_docker_images()
                    ) for node in cluster.agent_nodes
                ]),
            )

        setup_succeeded = True
        result = yield run_tests(
            reactor=reactor,
            cluster=cluster,
            trial_args=options['trial-args'])

    finally:
        reached_finally = True
        # We delete the nodes if the user hasn't asked to keep them
        # or if we failed to provision the cluster.
        if not setup_succeeded:
            print "cluster provisioning failed"
        elif not options['keep']:
            print "not keeping cluster"
        else:
            print "--keep specified, not destroying nodes."
            print ("To run acceptance tests against these nodes, "
                   "set the following environment variables: ")

            environment_variables = get_trial_environment(cluster)

            for environment_variable in environment_variables:
                print "export {name}={value};".format(
                    name=environment_variable,
                    value=shell_quote(
                        environment_variables[environment_variable]),
                )
            reactor.removeSystemEventTrigger(cleanup_trigger_id)

    raise SystemExit(result)
Ejemplo n.º 17
0
def main(reactor, args, base_path, top_level):
    """
    :param reactor: Reactor to use.
    :param list args: The arguments passed to the script.
    :param FilePath base_path: The executable being run.
    :param FilePath top_level: The top-level of the Flocker repository.
    """
    options = RunOptions(top_level=top_level)

    add_destination(eliot_output)
    try:
        options.parseOptions(args)
    except UsageError as e:
        sys.stderr.write("%s: %s\n" % (base_path.basename(), e))
        raise SystemExit(1)

    runner = options.runner

    from flocker.common.script import eliot_logging_service
    log_writer = eliot_logging_service(
        destination=FileDestination(
            file=open("%s.log" % (base_path.basename(),), "a")
        ),
        reactor=reactor,
        capture_stdout=False)
    log_writer.startService()
    reactor.addSystemEventTrigger(
        'before', 'shutdown', log_writer.stopService)

    cluster = None
    results = []
    try:
        yield runner.ensure_keys(reactor)
        cluster = yield runner.start_cluster(reactor)
        if options['distribution'] in ('centos-7',):
            remote_logs_file = open("remote_logs.log", "a")
            for node in cluster.all_nodes:
                results.append(capture_journal(reactor,
                                               node.address,
                                               remote_logs_file)
                               )
        elif options['distribution'] in ('ubuntu-14.04', 'ubuntu-15.10'):
            remote_logs_file = open("remote_logs.log", "a")
            for node in cluster.all_nodes:
                results.append(capture_upstart(reactor,
                                               node.address,
                                               remote_logs_file)
                               )
        gather_deferreds(results)

        if options['apps-per-node'] > 0:
            config = _build_config(cluster, options['template'],
                                   options['apps-per-node'])
            yield _configure(reactor, cluster, config)

        result = 0

    except BaseException:
        result = 1
        raise
    finally:
        if options['no-keep'] or result == 1:
            runner.stop_cluster(reactor)
        else:
            if cluster is None:
                print("Didn't finish creating the cluster.")
                runner.stop_cluster(reactor)
            else:
                print("The following variables describe the cluster:")
                environment_variables = get_trial_environment(cluster)
                for environment_variable in environment_variables:
                    print("export {name}={value};".format(
                        name=environment_variable,
                        value=shell_quote(
                            environment_variables[environment_variable]),
                    ))
                print("Be sure to preserve the required files.")

    raise SystemExit(result)
Ejemplo n.º 18
0
def main(reactor, args, base_path, top_level):
    """
    :param reactor: Reactor to use.
    :param list args: The arguments passed to the script.
    :param FilePath base_path: The executable being run.
    :param FilePath top_level: The top-level of the Flocker repository.
    """
    options = RunOptions(top_level=top_level)

    add_destination(eliot_output)
    try:
        options.parseOptions(args)
    except UsageError as e:
        sys.stderr.write("%s: %s\n" % (base_path.basename(), e))
        raise SystemExit(1)

    runner = options.runner

    from flocker.common.script import eliot_logging_service
    log_writer = eliot_logging_service(destination=FileDestination(
        file=open("%s.log" % (base_path.basename(), ), "a")),
                                       reactor=reactor,
                                       capture_stdout=False)
    log_writer.startService()
    reactor.addSystemEventTrigger('before', 'shutdown', log_writer.stopService)

    cluster = None
    results = []
    try:
        yield runner.ensure_keys(reactor)
        cluster = yield runner.start_cluster(reactor)
        if options['distribution'] in ('centos-7', ):
            remote_logs_file = open("remote_logs.log", "a")
            for node in cluster.all_nodes:
                results.append(
                    capture_journal(reactor, node.address, remote_logs_file))
        elif options['distribution'] in ('ubuntu-14.04', 'ubuntu-15.10'):
            remote_logs_file = open("remote_logs.log", "a")
            for node in cluster.all_nodes:
                results.append(
                    capture_upstart(reactor, node.address, remote_logs_file))
        gather_deferreds(results)

        if options['apps-per-node'] > 0:
            config = _build_config(cluster, options['template'],
                                   options['apps-per-node'])
            yield _configure(reactor, cluster, config)

        result = 0

    except BaseException:
        result = 1
        raise
    finally:
        if options['no-keep'] or result == 1:
            runner.stop_cluster(reactor)
        else:
            if cluster is None:
                print("Didn't finish creating the cluster.")
                runner.stop_cluster(reactor)
            else:
                print("The following variables describe the cluster:")
                environment_variables = get_trial_environment(cluster)
                for environment_variable in environment_variables:
                    print("export {name}={value};".format(
                        name=environment_variable,
                        value=shell_quote(
                            environment_variables[environment_variable]),
                    ))
                print("Be sure to preserve the required files.")

    raise SystemExit(result)