Esempio n. 1
0
def race(cfg,
         sources=False,
         build=False,
         distribution=False,
         external=False,
         docker=False):
    logger = logging.getLogger(__name__)
    # at this point an actor system has to run and we should only join
    actor_system = actor.bootstrap_actor_system(try_join=True)
    benchmark_actor = actor_system.createActor(
        BenchmarkActor, targetActorRequirements={"coordinator": True})
    try:
        result = actor_system.ask(
            benchmark_actor,
            Setup(cfg, sources, build, distribution, external, docker))
        if isinstance(result, Success):
            logger.info("Benchmark has finished successfully.")
        # may happen if one of the load generators has detected that the user has cancelled the benchmark.
        elif isinstance(result, actor.BenchmarkCancelled):
            logger.info(
                "User has cancelled the benchmark (detected by actor).")
        elif isinstance(result, actor.BenchmarkFailure):
            logger.error("A benchmark failure has occurred")
            raise exceptions.RallyError(result.message, result.cause)
        else:
            raise exceptions.RallyError(
                "Got an unexpected result during benchmarking: [%s]." %
                str(result))
    except KeyboardInterrupt:
        logger.info(
            "User has cancelled the benchmark (detected by race control).")
        # notify the coordinator so it can properly handle this state. Do it blocking so we don't have a race between this message
        # and the actor exit request.
        actor_system.ask(benchmark_actor, actor.BenchmarkCancelled())
    finally:
        logger.info("Telling benchmark actor to exit.")
        actor_system.tell(benchmark_actor, thespian.actors.ActorExitRequest())
Esempio n. 2
0
def create(cfg, sources, distribution, build, challenge_root_path, plugins):
    revisions = _extract_revisions(cfg.opts("mechanic", "source.revision"))
    java9_home = _java9_home(cfg)
    distribution_version = cfg.opts("mechanic", "distribution.version", mandatory=False)
    supply_requirements = _supply_requirements(sources, distribution, build, plugins, revisions, distribution_version)
    build_needed = any([build for _, _, build in supply_requirements.values()])
    src_config = cfg.all_opts("source")
    suppliers = []

    if build_needed:
        gradle = cfg.opts("build", "gradle.bin")
        es_src_dir = os.path.join(_src_dir(cfg), _config_value(src_config, "elasticsearch.src.subdir"))
        builder = Builder(es_src_dir, gradle, java9_home, challenge_root_path)
    else:
        builder = None

    es_supplier_type, es_version, es_build = supply_requirements["elasticsearch"]
    if es_supplier_type == "source":
        es_src_dir = os.path.join(_src_dir(cfg), _config_value(src_config, "elasticsearch.src.subdir"))
        suppliers.append(ElasticsearchSourceSupplier(es_version, es_src_dir, remote_url=cfg.opts("source", "remote.repo.url"), builder=builder))
        repo = None
    else:
        es_src_dir = None
        distributions_root = os.path.join(cfg.opts("node", "root.dir"), cfg.opts("source", "distribution.dir"))
        repo = DistributionRepository(name=cfg.opts("mechanic", "distribution.repository"),
                                      distribution_config=cfg.all_opts("distributions"),
                                      version=es_version)
        suppliers.append(ElasticsearchDistributionSupplier(repo, distributions_root))

    for plugin in plugins:
        supplier_type, plugin_version, build_plugin = supply_requirements[plugin.name]

        if supplier_type == "source":
            if CorePluginSourceSupplier.can_handle(plugin):
                logger.info("Adding core plugin source supplier for [%s]." % plugin.name)
                assert es_src_dir is not None, "Cannot build core plugin %s when Elasticsearch is not built from source." % plugin.name
                suppliers.append(CorePluginSourceSupplier(plugin, es_src_dir, builder))
            elif ExternalPluginSourceSupplier.can_handle(plugin):
                logger.info("Adding external plugin source supplier for [%s]." % plugin.name)
                suppliers.append(ExternalPluginSourceSupplier(plugin, plugin_version, _src_dir(cfg, mandatory=False), src_config, builder))
            else:
                raise exceptions.RallyError("Plugin %s can neither be treated as core nor as external plugin. Requirements: %s" %
                                            (plugin.name, supply_requirements[plugin.name]))
        else:
            logger.info("Adding plugin distribution supplier for [%s]." % plugin.name)
            assert repo is not None, "Cannot benchmark plugin %s from a distribution version but Elasticsearch from sources" % plugin.name
            suppliers.append(PluginDistributionSupplier(repo, plugin))

    return CompositeSupplier(suppliers)
Esempio n. 3
0
def main():
    check_python_version()
    log.remove_obsolete_default_log_config()
    log.install_default_log_config()
    log.configure_logging()
    console.init()

    parser = argparse.ArgumentParser(
        prog=PROGRAM_NAME,
        description=BANNER + "\n\n Rally daemon to support remote benchmarks",
        epilog="Find out more about Rally at %s" %
        console.format.link(DOC_LINK),
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--version',
                        action='version',
                        version="%(prog)s " + version.version())

    subparsers = parser.add_subparsers(title="subcommands",
                                       dest="subcommand",
                                       help="")
    subparsers.required = True

    start_command = subparsers.add_parser("start",
                                          help="Starts the Rally daemon")
    restart_command = subparsers.add_parser("restart",
                                            help="Restarts the Rally daemon")
    for p in [start_command, restart_command]:
        p.add_argument("--node-ip", required=True, help="The IP of this node.")
        p.add_argument("--coordinator-ip",
                       required=True,
                       help="The IP of the coordinator node.")
    subparsers.add_parser("stop", help="Stops the Rally daemon")
    subparsers.add_parser(
        "status", help="Shows the current status of the local Rally daemon")

    args = parser.parse_args()

    if args.subcommand == "start":
        start(args)
    elif args.subcommand == "stop":
        stop()
    elif args.subcommand == "status":
        status()
    elif args.subcommand == "restart":
        stop(raise_errors=False)
        start(args)
    else:
        raise exceptions.RallyError("Unknown subcommand [%s]" %
                                    args.subcommand)
Esempio n. 4
0
 def run(self, lap):
     self.metrics_store.lap = lap
     main_driver = self.actor_system.createActor(driver.Driver)
     self.cluster.on_benchmark_start()
     result = self.actor_system.ask(
         main_driver,
         driver.StartBenchmark(self.cfg, self.track,
                               self.metrics_store.meta_info,
                               self.metrics_store.lap))
     if isinstance(result, driver.BenchmarkComplete):
         logger.info("Benchmark is complete.")
         logger.info("Notifying cluster.")
         self.cluster.on_benchmark_stop()
         logger.info("Bulk adding data to metrics store.")
         self.metrics_store.bulk_add(result.metrics)
         logger.info("Flushing metrics data...")
         self.metrics_store.flush()
         logger.info("Flushing done")
     elif isinstance(result, driver.BenchmarkFailure):
         raise exceptions.RallyError(result.message, result.cause)
     else:
         raise exceptions.RallyError(
             "Driver has returned no metrics but instead [%s]. Terminating race without result."
             % str(result))
Esempio n. 5
0
def start(args):
    if actor.actor_system_already_running():
        raise exceptions.RallyError(
            "An actor system appears to be already running.")
    # TheSpian writes the following warning upon start (at least) on Mac OS X:
    #
    # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\
    # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known
    #
    # Therefore, we will not show warnings but only errors.
    logging.basicConfig(level=logging.ERROR)
    actor.bootstrap_actor_system(local_ip=args.node_ip,
                                 coordinator_ip=args.coordinator_ip)
    console.info(
        "Successfully started actor system on node [%s] with coordinator node IP [%s]."
        % (args.node_ip, args.coordinator_ip))
Esempio n. 6
0
def run(cfg):
    logger = logging.getLogger(__name__)
    name = cfg.opts("race", "pipeline")
    race_id = cfg.opts("system", "race.id")
    console.info(f"Race id is [{race_id}]", logger=logger)
    if len(name) == 0:
        # assume from-distribution pipeline if distribution.version has been specified and --pipeline cli arg not set
        if cfg.exists("mechanic", "distribution.version"):
            name = "from-distribution"
        else:
            name = "from-sources"
        logger.info(
            "User specified no pipeline. Automatically derived pipeline [%s].",
            name)
        cfg.add(config.Scope.applicationOverride, "race", "pipeline", name)
    else:
        logger.info("User specified pipeline [%s].", name)

    if os.environ.get("RALLY_RUNNING_IN_DOCKER", "").upper() == "TRUE":
        # in this case only benchmarking remote Elasticsearch clusters makes sense
        if name != "benchmark-only":
            raise exceptions.SystemSetupError(
                "Only the [benchmark-only] pipeline is supported by the Rally Docker image.\n"
                "Add --pipeline=benchmark-only in your Rally arguments and try again.\n"
                "For more details read the docs for the benchmark-only pipeline in {}\n"
                .format(doc_link("pipelines.html#benchmark-only")))

    try:
        pipeline = pipelines[name]
    except KeyError:
        raise exceptions.SystemSetupError(
            "Unknown pipeline [%s]. List the available pipelines with %s list pipelines."
            % (name, PROGRAM_NAME))
    try:
        pipeline(cfg)
    except exceptions.RallyError as e:
        # just pass on our own errors. It should be treated differently on top-level
        raise e
    except KeyboardInterrupt:
        logger.info("User has cancelled the benchmark.")
        raise exceptions.UserInterrupted(
            "User has cancelled the benchmark (detected by race control)."
        ) from None
    except BaseException:
        tb = sys.exc_info()[2]
        raise exceptions.RallyError(
            "This race ended with a fatal crash.").with_traceback(tb)
Esempio n. 7
0
def run(cfg):
    logger = logging.getLogger(__name__)
    name = cfg.opts("race", "pipeline")

    if len(name) == 0:
        # assume from-distribution pipeline if distribution.version has been specified and --pipeline cli arg not set
        if cfg.exists("mechanic", "distribution.version"):
            name = "from-distribution"
        else:
            name = "from-sources-complete"
        logger.info(
            "User specified no pipeline. Automatically derived pipeline [%s].",
            name)
        cfg.add(config.Scope.applicationOverride, "race", "pipeline", name)
    else:
        if (cfg.exists("mechanic", "distribution.version") and name in [
                "from-sources-complete", "from-sources-skip-build",
                "benchmark-only"
        ]):
            raise exceptions.SystemSetupError(
                "--distribution-version can only be used together with pipeline from-distribution, "
                ""
                "but you specified {}.\n"
                "If you intend to benchmark an externally provisioned cluster, don't specify --distribution-version otherwise\n"
                "please read the docs for from-distribution pipeline at "
                "{}/pipelines.html#from-distribution".format(name, DOC_LINK))
        logger.info("User specified pipeline [%s].", name)

    try:
        pipeline = pipelines[name]
    except KeyError:
        raise exceptions.SystemSetupError(
            "Unknown pipeline [%s]. List the available pipelines with %s list pipelines."
            % (name, PROGRAM_NAME))
    try:
        pipeline(cfg)
    except exceptions.RallyError as e:
        # just pass on our own errors. It should be treated differently on top-level
        raise e
    except KeyboardInterrupt:
        logger.info("User has cancelled the benchmark.")
    except BaseException:
        tb = sys.exc_info()[2]
        raise exceptions.RallyError(
            "This race ended with a fatal crash.").with_traceback(tb)
Esempio n. 8
0
def run(cfg):
    name = cfg.opts("system", "pipeline")
    try:
        pipeline = pipelines[name](RacingContext(cfg))
    except KeyError:
        raise exceptions.ImproperlyConfigured(
            "Unknown pipeline [%s]. You can list the available pipelines with %s list pipelines."
            % (name, PROGRAM_NAME))
    try:
        pipeline()
    except exceptions.RallyError as e:
        # just pass on our own errors. It should be treated differently on top-level
        raise e
    except BaseException:
        tb = sys.exc_info()[2]
        raise exceptions.RallyError(
            "This race ended early with a fatal crash. For details please see the logs."
        ).with_traceback(tb)
Esempio n. 9
0
    def install(self, es_home_path, plugin_url=None):
        installer_binary_path = os.path.join(es_home_path, "bin", "elasticsearch-plugin")
        if plugin_url:
            logger.info("Installing [%s] into [%s] from [%s]" % (self.plugin_name, es_home_path, plugin_url))
            install_cmd = '%s install --batch "%s"' % (installer_binary_path, plugin_url)
        else:
            logger.info("Installing [%s] into [%s]" % (self.plugin_name, es_home_path))
            install_cmd = '%s install --batch "%s"' % (installer_binary_path, self.plugin_name)

        return_code = process.run_subprocess_with_logging(install_cmd)
        # see: https://www.elastic.co/guide/en/elasticsearch/plugins/current/_other_command_line_parameters.html
        if return_code == 0:
            logger.info("Successfully installed [%s]." % self.plugin_name)
        elif return_code == 64:
            # most likely this is an unknown plugin
            raise exceptions.SystemSetupError("Unknown plugin [%s]" % self.plugin_name)
        elif return_code == 74:
            raise exceptions.SupplyError("I/O error while trying to install [%s]" % self.plugin_name)
        else:
            raise exceptions.RallyError("Unknown error while trying to install [%s] (installer return code [%s]). Please check the logs." %
                                        (self.plugin_name, str(return_code)))
Esempio n. 10
0
def benchmark_external(ctx):
    # TODO dm module refactoring: we can just inline prepare_benchmark_external and simplify this code a bit
    track_name = ctx.config.opts("system", "track")
    challenge_name = ctx.config.opts("benchmarks", "challenge")
    print("Racing on track [%s] and challenge [%s]" %
          (track_name, challenge_name))
    actors = thespian.actors.ActorSystem()
    main_driver = actors.createActor(driver.Driver)
    #TODO dm: Retrieving the metrics store here is *dirty*...
    metrics_store = ctx.mechanic._metrics_store

    ctx.cluster.on_benchmark_start()
    completed = actors.ask(
        main_driver,
        driver.StartBenchmark(ctx.config, ctx.track, metrics_store.meta_info))
    ctx.cluster.on_benchmark_stop()
    if not hasattr(completed, "metrics"):
        raise exceptions.RallyError(
            "Driver has returned no metrics but instead [%s]. Terminating race without result."
            % str(completed))
    metrics_store.bulk_add(completed.metrics)
    ctx.mechanic.stop_metrics()
Esempio n. 11
0
def list_facts(cfg):
    console.info("This is an experimental command and subject to change.")
    # provide a custom error message
    target_hosts = cfg.opts("facts", "hosts", mandatory=False)
    if not target_hosts:
        raise exceptions.SystemSetupError(
            "Please define a target host with --target-hosts")
    if len(target_hosts) > 1:
        raise exceptions.SystemSetupError(
            "Only one target host is supported at the moment but you provided %s"
            % target_hosts)

    # at this point an actor system has to run and we should only join
    actor_system = actor.bootstrap_actor_system(try_join=True)
    facts_actor = actor_system.createActor(
        FactsActor, targetActorRequirements={"ip": target_hosts[0]})
    result = actor_system.ask(facts_actor, GatherFacts())
    if isinstance(result, Facts):
        console.println(json.dumps(result.facts, indent="  "))
    else:
        raise exceptions.RallyError("Could not gather facts: [%s]." %
                                    str(result))
Esempio n. 12
0
def run_async(cfg):
    console.warn("The race-async command is experimental.")
    logger = logging.getLogger(__name__)
    # We'll use a special car name for external benchmarks.
    cfg.add(config.Scope.benchmark, "mechanic", "car.names", ["external"])
    coordinator = BenchmarkCoordinator(cfg)

    try:
        coordinator.setup()
        race_driver = driver.AsyncDriver(cfg, coordinator.current_track,
                                         coordinator.current_challenge)
        distribution_flavor, distribution_version, revision = race_driver.setup(
        )
        coordinator.on_preparation_complete(distribution_flavor,
                                            distribution_version, revision)

        new_metrics = race_driver.run()
        coordinator.on_benchmark_complete(new_metrics)
    except KeyboardInterrupt:
        logger.info("User has cancelled the benchmark.")
    except BaseException as e:
        tb = sys.exc_info()[2]
        raise exceptions.RallyError(str(e)).with_traceback(tb)
Esempio n. 13
0
def create(cfg, metrics_store, all_node_ips, cluster_settings=None, sources=False, build=False, distribution=False, external=False,
           docker=False):
    races_root = paths.races_root(cfg)
    challenge_root_path = paths.race_root(cfg)
    node_ids = cfg.opts("provisioning", "node.ids", mandatory=False)
    repo = team.team_repo(cfg)
    # externally provisioned clusters do not support cars / plugins
    if external:
        car = None
        plugins = []
    else:
        car = team.load_car(repo, cfg.opts("mechanic", "car.names"))
        plugins = team.load_plugins(repo, cfg.opts("mechanic", "car.plugins"))

    if sources:
        try:
            src_dir = cfg.opts("source", "local.src.dir")
        except config.ConfigError:
            logger.exception("Cannot determine source directory")
            raise exceptions.SystemSetupError("You cannot benchmark Elasticsearch from sources. Did you install Gradle? Please install"
                                              " all prerequisites and reconfigure Rally with %s configure" % PROGRAM_NAME)

        remote_url = cfg.opts("source", "remote.repo.url")
        revision = cfg.opts("mechanic", "source.revision")
        gradle = cfg.opts("build", "gradle.bin")
        java_home = cfg.opts("runtime", "java.home")

        if len(plugins) > 0:
            raise exceptions.RallyError("Source builds of plugins are not supported yet. For more details, please "
                                        "check https://github.com/elastic/rally/issues/309 and upgrade Rally in case support has been "
                                        "added in the meantime.")
        s = lambda: supplier.from_sources(remote_url, src_dir, revision, gradle, java_home, challenge_root_path, build)
        p = []
        for node_id in node_ids:
            p.append(provisioner.local_provisioner(cfg, car, plugins, cluster_settings, all_node_ips, challenge_root_path, node_id))
        l = launcher.InProcessLauncher(cfg, metrics_store, races_root)
    elif distribution:
        version = cfg.opts("mechanic", "distribution.version")
        repo_name = cfg.opts("mechanic", "distribution.repository")
        distributions_root = "%s/%s" % (cfg.opts("node", "root.dir"), cfg.opts("source", "distribution.dir"))
        distribution_cfg = cfg.all_opts("distributions")

        s = lambda: supplier.from_distribution(version=version, repo_name=repo_name, distribution_config=distribution_cfg,
                                               distributions_root=distributions_root, plugins=plugins)
        p = []
        for node_id in node_ids:
            p.append(provisioner.local_provisioner(cfg, car, plugins, cluster_settings, all_node_ips, challenge_root_path, node_id))
        l = launcher.InProcessLauncher(cfg, metrics_store, races_root)
    elif external:
        if cluster_settings:
            logger.warning("Cannot apply challenge-specific cluster settings [%s] for an externally provisioned cluster. Please ensure "
                           "that the cluster settings are present or the benchmark may fail or behave unexpectedly." % cluster_settings)
        if len(plugins) > 0:
            raise exceptions.SystemSetupError("You cannot specify any plugins for externally provisioned clusters. Please remove "
                                              "\"--elasticsearch-plugins\" and try again.")

        s = lambda: None
        p = [provisioner.no_op_provisioner()]
        l = launcher.ExternalLauncher(cfg, metrics_store)
    elif docker:
        if len(plugins) > 0:
            raise exceptions.SystemSetupError("You cannot specify any plugins for Docker clusters. Please remove "
                                              "\"--elasticsearch-plugins\" and try again.")
        s = lambda: None
        p = []
        for node_id in node_ids:
            p.append(provisioner.docker_provisioner(cfg, car, cluster_settings, challenge_root_path, node_id))
        l = launcher.DockerLauncher(cfg, metrics_store)
    else:
        # It is a programmer error (and not a user error) if this function is called with wrong parameters
        raise RuntimeError("One of sources, distribution, docker or external must be True")

    return Mechanic(s, p, l)
Esempio n. 14
0
 def receiveMessage(self, msg, sender):
     try:
         logger.info(
             "MechanicActor#receiveMessage(msg = [%s] sender = [%s])" %
             (str(type(msg)), str(sender)))
         if isinstance(msg, StartEngine):
             self.on_start_engine(msg, sender)
         elif isinstance(msg, NodesStarted):
             self.metrics_store.merge_meta_info(msg.system_meta_info)
             self.transition_when_all_children_responded(
                 sender, msg, "starting", "nodes_started",
                 self.on_all_nodes_started)
         elif isinstance(msg, MetricsMetaInfoApplied):
             self.transition_when_all_children_responded(
                 sender, msg, "apply_meta_info", "cluster_started",
                 self.on_cluster_started)
         elif isinstance(msg, OnBenchmarkStart):
             self.metrics_store.lap = msg.lap
             self.cluster.on_benchmark_start()
             # in the first lap, we are in state "cluster_started", after that in "benchmark_stopped"
             self.send_to_children_and_transition(
                 sender, msg, ["cluster_started", "benchmark_stopped"],
                 "benchmark_starting")
         elif isinstance(msg, BenchmarkStarted):
             self.transition_when_all_children_responded(
                 sender, msg, "benchmark_starting", "benchmark_started",
                 self.on_benchmark_started)
         elif isinstance(msg, ResetRelativeTime):
             if msg.reset_in_seconds > 0:
                 self.wakeupAfter(msg.reset_in_seconds)
             else:
                 self.reset_relative_time()
         elif isinstance(msg, thespian.actors.WakeupMessage):
             self.reset_relative_time()
         elif isinstance(msg, actor.BenchmarkFailure):
             self.send(self.race_control, msg)
         elif isinstance(msg, OnBenchmarkStop):
             self.send_to_children_and_transition(sender, msg,
                                                  "benchmark_started",
                                                  "benchmark_stopping")
         elif isinstance(msg, BenchmarkStopped):
             self.metrics_store.bulk_add(msg.system_metrics)
             self.transition_when_all_children_responded(
                 sender, msg, "benchmark_stopping", "benchmark_stopped",
                 self.on_benchmark_stopped)
         elif isinstance(msg, StopEngine):
             # detach from cluster and gather all system metrics
             self.cluster_launcher.stop(self.cluster)
             # we might have experienced a launch error or the user has cancelled the benchmark. Hence we need to allow to stop the
             # cluster from various states and we don't check here for a specific one.
             self.send_to_children_and_transition(sender, StopNodes(), [],
                                                  "cluster_stopping")
         elif isinstance(msg, NodesStopped):
             self.metrics_store.bulk_add(msg.system_metrics)
             self.transition_when_all_children_responded(
                 sender, msg, "cluster_stopping", "cluster_stopped",
                 self.on_all_nodes_stopped)
         elif isinstance(msg, thespian.actors.ActorExitRequest):
             # due to early termination by race control. If it's self-initiated we already took care of the rest.
             if sender != self.myAddress:
                 self.send_to_children_and_transition(
                     self.myAddress,
                     msg,
                     expected_status=None,
                     new_status="cluster_stopping")
         elif isinstance(msg, thespian.actors.ChildActorExited):
             if self.is_current_status_expected("cluster_stopping"):
                 logger.info(
                     "Child actor exited while engine is stopping: [%s]" %
                     msg)
             else:
                 raise exceptions.RallyError(
                     "Child actor exited with [%s] while in status [%s]." %
                     (msg, self.status))
         elif isinstance(msg, thespian.actors.PoisonMessage):
             # something went wrong with a child actor
             if isinstance(msg.poisonMessage, StartEngine):
                 raise exceptions.LaunchError(
                     "Could not start benchmark candidate. Are Rally daemons on all targeted machines running?"
                 )
             else:
                 logger.error(
                     "[%s] sent to a child actor has resulted in PoisonMessage"
                     % str(msg.poisonMessage))
                 raise exceptions.RallyError(
                     "Could not communicate with benchmark candidate (unknown reason)"
                 )
     except BaseException:
         # usually, we'll notify the sender but in case a child sent something that caused an exception we'd rather
         # have it bubble up to race control. Otherwise, we could play ping-pong with our child actor.
         recipient = self.race_control if sender in self.children else sender
         logger.exception("Cannot process message [%s]. Notifying [%s]." %
                          (msg, recipient))
         ex_type, ex_value, ex_traceback = sys.exc_info()
         # avoid "can't pickle traceback objects"
         import traceback
         self.send(
             recipient,
             actor.BenchmarkFailure(
                 "Could not execute command (%s)" % ex_value,
                 traceback.format_exc()))
Esempio n. 15
0
def create(cfg, sources, distribution, build, car, plugins=None):
    logger = logging.getLogger(__name__)
    if plugins is None:
        plugins = []
    revisions = _extract_revisions(cfg.opts("mechanic", "source.revision"))
    distribution_version = cfg.opts("mechanic",
                                    "distribution.version",
                                    mandatory=False)
    supply_requirements = _supply_requirements(sources, distribution, build,
                                               plugins, revisions,
                                               distribution_version)
    build_needed = any([build for _, _, build in supply_requirements.values()])
    src_config = cfg.all_opts("source")
    suppliers = []

    if build_needed:
        java_home = _java_home(car)
        es_src_dir = os.path.join(
            _src_dir(cfg), _config_value(src_config,
                                         "elasticsearch.src.subdir"))
        builder = Builder(es_src_dir, java_home, paths.logs())
    else:
        builder = None

    es_supplier_type, es_version, es_build = supply_requirements[
        "elasticsearch"]
    if es_supplier_type == "source":
        es_src_dir = os.path.join(
            _src_dir(cfg), _config_value(src_config,
                                         "elasticsearch.src.subdir"))
        suppliers.append(
            ElasticsearchSourceSupplier(es_version,
                                        es_src_dir,
                                        remote_url=cfg.opts(
                                            "source", "remote.repo.url"),
                                        car=car,
                                        builder=builder))
        repo = None
    else:
        es_src_dir = None
        distributions_root = os.path.join(
            cfg.opts("node", "root.dir"), cfg.opts("source",
                                                   "distribution.dir"))

        dist_cfg = {}
        # car / plugin defines defaults...
        dist_cfg.update(car.variables)
        for plugin in plugins:
            for k, v in plugin.variables.items():
                dist_cfg["plugin_{}_{}".format(plugin.name, k)] = v
        # ... but the user can override it in rally.ini
        dist_cfg.update(cfg.all_opts("distributions"))
        repo = DistributionRepository(name=cfg.opts("mechanic",
                                                    "distribution.repository"),
                                      distribution_config=dist_cfg,
                                      version=es_version)
        suppliers.append(
            ElasticsearchDistributionSupplier(repo, distributions_root))

    for plugin in plugins:
        supplier_type, plugin_version, build_plugin = supply_requirements[
            plugin.name]

        if supplier_type == "source":
            if CorePluginSourceSupplier.can_handle(plugin):
                logger.info("Adding core plugin source supplier for [%s].",
                            plugin.name)
                assert es_src_dir is not None, "Cannot build core plugin %s when Elasticsearch is not built from source." % plugin.name
                suppliers.append(
                    CorePluginSourceSupplier(plugin, es_src_dir, builder))
            elif ExternalPluginSourceSupplier.can_handle(plugin):
                logger.info("Adding external plugin source supplier for [%s].",
                            plugin.name)
                suppliers.append(
                    ExternalPluginSourceSupplier(
                        plugin, plugin_version, _src_dir(cfg, mandatory=False),
                        src_config, builder))
            else:
                raise exceptions.RallyError(
                    "Plugin %s can neither be treated as core nor as external plugin. Requirements: %s"
                    % (plugin.name, supply_requirements[plugin.name]))
        else:
            logger.info("Adding plugin distribution supplier for [%s].",
                        plugin.name)
            assert repo is not None, "Cannot benchmark plugin %s from a distribution version but Elasticsearch from sources" % plugin.name
            suppliers.append(PluginDistributionSupplier(repo, plugin))

    return CompositeSupplier(suppliers)
Esempio n. 16
0
 def receiveMessage(self, msg, sender):
     try:
         logger.info(
             "MechanicActor#receiveMessage(msg = [%s] sender = [%s])" %
             (str(type(msg)), str(sender)))
         if isinstance(msg, StartEngine):
             self.on_start_engine(msg, sender)
         elif isinstance(msg, NodesStarted):
             self.metrics_store.merge_meta_info(msg.system_meta_info)
             self.transition_when_all_children_responded(
                 sender, msg, "starting", "nodes_started",
                 self.on_all_nodes_started)
         elif isinstance(msg, MetricsMetaInfoApplied):
             self.transition_when_all_children_responded(
                 sender, msg, "apply_meta_info", "cluster_started",
                 self.on_cluster_started)
         elif isinstance(msg, OnBenchmarkStart):
             self.metrics_store.lap = msg.lap
             # in the first lap, we are in state "cluster_started", after that in "benchmark_stopped"
             self.send_to_children_and_transition(
                 sender, msg, ["cluster_started", "benchmark_stopped"],
                 "benchmark_starting")
         elif isinstance(msg, BenchmarkStarted):
             self.transition_when_all_children_responded(
                 sender, msg, "benchmark_starting", "benchmark_started",
                 self.on_benchmark_started)
         elif isinstance(msg, ResetRelativeTime):
             if msg.reset_in_seconds > 0:
                 self.wakeupAfter(msg.reset_in_seconds)
             else:
                 self.reset_relative_time()
         elif isinstance(msg, thespian.actors.WakeupMessage):
             self.reset_relative_time()
         elif isinstance(msg, actor.BenchmarkFailure):
             self.send(self.race_control, msg)
         elif isinstance(msg, OnBenchmarkStop):
             self.send_to_children_and_transition(sender, msg,
                                                  "benchmark_started",
                                                  "benchmark_stopping")
         elif isinstance(msg, BenchmarkStopped):
             self.metrics_store.bulk_add(msg.system_metrics)
             self.transition_when_all_children_responded(
                 sender, msg, "benchmark_stopping", "benchmark_stopped",
                 self.on_benchmark_stopped)
         elif isinstance(msg, StopEngine):
             # detach from cluster and gather all system metrics
             self.cluster_launcher.stop(self.cluster)
             # we might have experienced a launch error or the user has cancelled the benchmark. Hence we need to allow to stop the
             # cluster from various states and we don't check here for a specific one.
             self.send_to_children_and_transition(sender, StopNodes(), [],
                                                  "cluster_stopping")
         elif isinstance(msg, NodesStopped):
             self.metrics_store.bulk_add(msg.system_metrics)
             self.transition_when_all_children_responded(
                 sender, msg, "cluster_stopping", "cluster_stopped",
                 self.on_all_nodes_stopped)
         elif isinstance(msg, thespian.actors.ActorExitRequest):
             # due to early termination by race control. If it's self-initiated we already took care of the rest.
             if sender != self.myAddress:
                 self.send_to_children_and_transition(
                     self.myAddress,
                     msg,
                     expected_status=None,
                     new_status="cluster_stopping")
         elif isinstance(msg, thespian.actors.ChildActorExited):
             if self.is_current_status_expected(
                 ["cluster_stopping", "cluster_stopped"]):
                 logger.info(
                     "Child actor exited while engine is stopping or stopped: [%s]"
                     % msg)
             else:
                 raise exceptions.RallyError(
                     "Child actor exited with [%s] while in status [%s]." %
                     (msg, self.status))
         elif isinstance(msg, thespian.actors.PoisonMessage):
             # something went wrong with a child actor
             if isinstance(msg.poisonMessage, StartEngine):
                 raise exceptions.LaunchError(
                     "Could not start benchmark candidate. Are Rally daemons on all targeted machines running?"
                 )
             else:
                 logger.error(
                     "[%s] sent to a child actor has resulted in PoisonMessage"
                     % str(msg.poisonMessage))
                 raise exceptions.RallyError(
                     "Could not communicate with benchmark candidate (unknown reason)"
                 )
         else:
             logger.info(
                 "MechanicActor received unknown message [%s] (ignoring)." %
                 (str(msg)))
     except BaseException as e:
         logger.exception("Cannot process message")
         logger.error("Failed message details: [%s]. Notifying [%s]." %
                      (msg, self.race_control))
         self.send(
             self.race_control,
             actor.BenchmarkFailure(
                 "Error in Elasticsearch cluster coordinator", e))
Esempio n. 17
0
def create(cfg, sources, distribution, car, plugins=None):
    logger = logging.getLogger(__name__)
    if plugins is None:
        plugins = []
    caching_enabled = cfg.opts("source",
                               "cache",
                               mandatory=False,
                               default_value=True)
    revisions = _extract_revisions(
        cfg.opts("mechanic", "source.revision", mandatory=sources))
    distribution_version = cfg.opts("mechanic",
                                    "distribution.version",
                                    mandatory=False)
    supply_requirements = _supply_requirements(sources, distribution, plugins,
                                               revisions, distribution_version)
    build_needed = any([build for _, _, build in supply_requirements.values()])
    es_supplier_type, es_version, _ = supply_requirements["elasticsearch"]
    src_config = cfg.all_opts("source")
    suppliers = []

    target_os = cfg.opts("mechanic", "target.os", mandatory=False)
    target_arch = cfg.opts("mechanic", "target.arch", mandatory=False)
    template_renderer = TemplateRenderer(version=es_version,
                                         os_name=target_os,
                                         arch=target_arch)

    if build_needed:
        raw_build_jdk = car.mandatory_var("build.jdk")
        try:
            build_jdk = int(raw_build_jdk)
        except ValueError:
            raise exceptions.SystemSetupError(
                f"Car config key [build.jdk] is invalid: [{raw_build_jdk}] (must be int)"
            )

        es_src_dir = os.path.join(
            _src_dir(cfg), _config_value(src_config,
                                         "elasticsearch.src.subdir"))
        builder = Builder(es_src_dir, build_jdk, paths.logs())
    else:
        builder = None

    distributions_root = os.path.join(cfg.opts("node", "root.dir"),
                                      cfg.opts("source", "distribution.dir"))
    dist_cfg = {}
    # car / plugin defines defaults...
    dist_cfg.update(car.variables)
    for plugin in plugins:
        for k, v in plugin.variables.items():
            dist_cfg["plugin_{}_{}".format(plugin.name, k)] = v
    # ... but the user can override it in rally.ini
    dist_cfg.update(cfg.all_opts("distributions"))

    if caching_enabled:
        logger.info("Enabling source artifact caching.")
        max_age_days = int(
            cfg.opts("source", "cache.days", mandatory=False, default_value=7))
        if max_age_days <= 0:
            raise exceptions.SystemSetupError(
                f"cache.days must be a positive number but is {max_age_days}")

        source_distributions_root = os.path.join(distributions_root, "src")
        _prune(source_distributions_root, max_age_days)
    else:
        logger.info("Disabling source artifact caching.")
        source_distributions_root = None

    if es_supplier_type == "source":
        es_src_dir = os.path.join(
            _src_dir(cfg), _config_value(src_config,
                                         "elasticsearch.src.subdir"))

        source_supplier = ElasticsearchSourceSupplier(
            es_version,
            es_src_dir,
            remote_url=cfg.opts("source", "remote.repo.url"),
            car=car,
            builder=builder,
            template_renderer=template_renderer)

        if caching_enabled:
            es_file_resolver = ElasticsearchFileNameResolver(
                dist_cfg, template_renderer)
            source_supplier = CachedSourceSupplier(source_distributions_root,
                                                   source_supplier,
                                                   es_file_resolver)

        suppliers.append(source_supplier)
        repo = None
    else:
        es_src_dir = None
        repo = DistributionRepository(name=cfg.opts("mechanic",
                                                    "distribution.repository"),
                                      distribution_config=dist_cfg,
                                      template_renderer=template_renderer)
        suppliers.append(
            ElasticsearchDistributionSupplier(repo, es_version,
                                              distributions_root))

    for plugin in plugins:
        supplier_type, plugin_version, _ = supply_requirements[plugin.name]

        if supplier_type == "source":
            if CorePluginSourceSupplier.can_handle(plugin):
                logger.info("Adding core plugin source supplier for [%s].",
                            plugin.name)
                assert es_src_dir is not None, f"Cannot build core plugin {plugin.name} when Elasticsearch is not built from source."
                plugin_supplier = CorePluginSourceSupplier(
                    plugin, es_src_dir, builder)
            elif ExternalPluginSourceSupplier.can_handle(plugin):
                logger.info("Adding external plugin source supplier for [%s].",
                            plugin.name)
                plugin_supplier = ExternalPluginSourceSupplier(
                    plugin, plugin_version, _src_dir(cfg, mandatory=False),
                    src_config, builder)
            else:
                raise exceptions.RallyError(
                    "Plugin %s can neither be treated as core nor as external plugin. Requirements: %s"
                    % (plugin.name, supply_requirements[plugin.name]))

            if caching_enabled:
                plugin_file_resolver = PluginFileNameResolver(
                    plugin.name, plugin_version)
                plugin_supplier = CachedSourceSupplier(
                    source_distributions_root, plugin_supplier,
                    plugin_file_resolver)
            suppliers.append(plugin_supplier)
        else:
            logger.info("Adding plugin distribution supplier for [%s].",
                        plugin.name)
            assert repo is not None, "Cannot benchmark plugin %s from a distribution version but Elasticsearch from sources" % plugin.name
            suppliers.append(PluginDistributionSupplier(repo, plugin))

    return CompositeSupplier(suppliers)
Esempio n. 18
0
    def run(self, lap):
        """
        Runs the provided lap of a benchmark.

        :param lap: The current lap number.
        :return: True iff the benchmark may go on. False iff the user has cancelled the benchmark.
        """
        self.metrics_store.lap = lap
        logger.info("Notifying mechanic of benchmark start.")
        # we could use #tell() here but then the ask call to driver below will fail because it returns the response that mechanic
        # sends (see http://godaddy.github.io/Thespian/doc/using.html#sec-6-6-1).
        self.actor_system.ask(self.mechanic, mechanic.OnBenchmarkStart(lap))
        logger.info("Asking driver to start benchmark.")
        main_driver = self.actor_system.createActor(
            driver.DriverActor,
            targetActorRequirements={"coordinator": True},
            globalName="/rally/driver/coordinator")
        try:
            result = self.actor_system.ask(
                main_driver,
                driver.StartBenchmark(self.cfg, self.race.track,
                                      self.metrics_store.meta_info, lap))
        except KeyboardInterrupt:
            logger.info("User has cancelled the benchmark.")
            self.actor_system.send(main_driver, driver.BenchmarkCancelled())
            return False
        finally:
            logger.info(
                "Race control has received a benchmark result message. Terminating main driver actor."
            )
            import thespian.actors
            self.actor_system.tell(main_driver,
                                   thespian.actors.ActorExitRequest())

        if isinstance(result, driver.BenchmarkComplete):
            logger.info("Benchmark is complete.")
            logger.info("Bulk adding request metrics to metrics store.")
            self.metrics_store.bulk_add(result.metrics)
            stop_result = self.actor_system.ask(self.mechanic,
                                                mechanic.OnBenchmarkStop())
            if isinstance(stop_result, mechanic.BenchmarkStopped):
                logger.info("Bulk adding system metrics to metrics store.")
                self.metrics_store.bulk_add(stop_result.system_metrics)
            else:
                raise exceptions.RallyError(
                    "Mechanic has returned no metrics but instead [%s]. Terminating race without result."
                    % str(stop_result))

            logger.info("Flushing metrics data...")
            self.metrics_store.flush()
            logger.info("Flushing done")
        # may happen if one of the load generators has detected that the user has cancelled the benchmark.
        elif isinstance(result, driver.BenchmarkCancelled):
            logger.info("User has cancelled the benchmark.")
            return False
        elif isinstance(result, driver.BenchmarkFailure):
            logger.info("Driver has reported a benchmark failure.")
            raise exceptions.RallyError(result.message, result.cause)
        else:
            raise exceptions.RallyError(
                "Driver has returned no metrics but instead [%s]. Terminating race without result."
                % str(result))
        return True
Esempio n. 19
0
 def size(self):
     raise exceptions.RallyError(
         "Do not use a BulkIndexParamSource without partitioning")
Esempio n. 20
0
def runner_for(operation_type):
    try:
        return __RUNNERS[operation_type]
    except KeyError:
        raise exceptions.RallyError("No runner available for operation type [%s]" % operation_type)
Esempio n. 21
0
def start(args):
    if actor.actor_system_already_running():
        raise exceptions.RallyError("An actor system appears to be already running.")
    actor.bootstrap_actor_system(local_ip=args.node_ip, coordinator_ip=args.coordinator_ip)
    console.info("Successfully started actor system on node [%s] with coordinator node IP [%s]." % (args.node_ip, args.coordinator_ip))
Esempio n. 22
0
    def receiveMessage(self, msg, sender):
        try:
            logger.debug(
                "MechanicActor#receiveMessage(msg = [%s] sender = [%s])" %
                (str(type(msg)), str(sender)))
            if isinstance(msg, StartEngine):
                logger.info(
                    "Received signal from race control to start engine.")
                self.race_control = sender
                # In our startup procedure we first create all mechanics. Only if this succeeds
                mechanics_and_start_message = []

                if msg.external:
                    logger.info(
                        "Target node(s) will not be provisioned by Rally.")
                    # just create one actor for this special case and run it on the coordinator node (i.e. here)
                    m = self.createActor(
                        LocalNodeMechanicActor,
                        globalName="/rally/mechanic/worker/external",
                        targetActorRequirements={"coordinator": True})
                    self.mechanics.append(m)
                    # we can use the original message in this case
                    mechanics_and_start_message.append((m, msg))
                else:
                    hosts = msg.cfg.opts("client", "hosts")
                    logger.info(
                        "Target node(s) %s will be provisioned by Rally." %
                        hosts)
                    if len(hosts) == 0:
                        raise exceptions.LaunchError(
                            "No target hosts are configured.")
                    for host in hosts:
                        ip = host["host"]
                        port = int(host["port"])
                        # user may specify "localhost" on the command line but the problem is that we auto-register the actor system
                        # with "ip": "127.0.0.1" so we convert this special case automatically. In all other cases the user needs to
                        # start the actor system on the other host and is aware that the parameter for the actor system and the
                        # --target-hosts parameter need to match.
                        if ip == "localhost" or ip == "127.0.0.1":
                            m = self.createActor(
                                LocalNodeMechanicActor,
                                globalName="/rally/mechanic/worker/localhost",
                                targetActorRequirements={"coordinator": True})
                            self.mechanics.append(m)
                            mechanics_and_start_message.append(
                                (m, msg.with_port(port)))
                        else:
                            if msg.cfg.opts("system",
                                            "remote.benchmarking.supported"):
                                logger.info(
                                    "Benchmarking against %s with external Rally daemon."
                                    % hosts)
                            else:
                                logger.error(
                                    "User tried to benchmark against %s but no external Rally daemon has been started."
                                    % hosts)
                                raise exceptions.SystemSetupError(
                                    "To benchmark remote hosts (e.g. %s) you need to start the Rally daemon "
                                    "on each machine including this one." % ip)
                            already_running = actor.actor_system_already_running(
                                ip=ip)
                            logger.info(
                                "Actor system on [%s] already running? [%s]" %
                                (ip, str(already_running)))
                            if not already_running:
                                console.println(
                                    "Waiting for Rally daemon on [%s] " % ip,
                                    end="",
                                    flush=True)
                            while not actor.actor_system_already_running(
                                    ip=ip):
                                console.println(".", end="", flush=True)
                                time.sleep(3)
                            if not already_running:
                                console.println(" [OK]")
                            m = self.createActor(
                                RemoteNodeMechanicActor,
                                globalName="/rally/mechanic/worker/%s" % ip,
                                targetActorRequirements={"ip": ip})
                            mechanics_and_start_message.append(
                                (m, msg.with_port(port)))
                            self.mechanics.append(m)
                for mechanic_actor, start_message in mechanics_and_start_message:
                    self.send(mechanic_actor, start_message)
            elif isinstance(msg, EngineStarted):
                self.send(self.race_control, msg)
            elif isinstance(msg, OnBenchmarkStart):
                for m in self.mechanics:
                    self.send(m, msg)
            elif isinstance(msg, Success):
                self.send(self.race_control, msg)
            elif isinstance(msg, Failure):
                self.send(self.race_control, msg)
            elif isinstance(msg, OnBenchmarkStop):
                for m in self.mechanics:
                    self.send(m, msg)
            elif isinstance(msg, BenchmarkStopped):
                # TODO dm: Actually we need to wait for all BenchmarkStopped messages from all our mechanic actors
                # TODO dm: We will actually duplicate cluster level metrics if each of our mechanic actors gathers these...
                self.send(self.race_control, msg)
            elif isinstance(msg, StopEngine):
                for m in self.mechanics:
                    self.send(m, msg)
            elif isinstance(msg, EngineStopped):
                self.send(self.race_control, msg)
                # clear all state as the mechanic might get reused later
                for m in self.mechanics:
                    self.send(m, thespian.actors.ActorExitRequest())
                self.mechanics = []
                # self terminate + slave nodes
                self.send(self.myAddress, thespian.actors.ActorExitRequest())
            elif isinstance(msg, thespian.actors.ChildActorExited):
                # TODO dm: Depending on our state model this can be fine (e.g. when it exited due to our ActorExitRequest message
                # or it could be problematic and mean that an exception has occured.
                pass
            elif isinstance(msg, thespian.actors.PoisonMessage):
                # something went wrong with a child actor
                if isinstance(msg.poisonMessage, StartEngine):
                    raise exceptions.LaunchError(
                        "Could not start benchmark candidate. Are Rally daemons on all targeted machines running?"
                    )
                else:
                    logger.error(
                        "[%s] sent to a child actor has resulted in PoisonMessage"
                        % str(msg.poisonMessage))
                    raise exceptions.RallyError(
                        "Could not communicate with benchmark candidate (unknown reason)"
                    )
        except BaseException:
            logger.exception("Cannot process message [%s]" % msg)
            # usually, we'll notify the sender but in case a child sent something that caused an exception we'd rather
            # have it bubble up to race control. Otherwise, we could play ping-pong with our child actor.
            recipient = self.race_control if sender in self.mechanics else sender
            ex_type, ex_value, ex_traceback = sys.exc_info()
            # avoid "can't pickle traceback objects"
            import traceback
            self.send(
                recipient,
                Failure("Could not execute command (%s)" % ex_value,
                        traceback.format_exc()))
Esempio n. 23
0
 def partition(self, partition_index, total_partitions):
     raise exceptions.RallyError(
         "Cannot partition a PartitionBulkIndexParamSource further")
Esempio n. 24
0
 def receiveMessage(self, msg, sender):
     try:
         logger.debug(
             "MechanicActor#receiveMessage(msg = [%s] sender = [%s])" %
             (str(type(msg)), str(sender)))
         if isinstance(msg, StartEngine):
             self.on_start_engine(msg, sender)
         elif isinstance(msg, NodesStarted):
             self.metrics_store.merge_meta_info(msg.system_meta_info)
             self.transition_when_all_children_responded(
                 sender, msg, "starting", "nodes_started",
                 self.on_all_nodes_started)
         elif isinstance(msg, MetricsMetaInfoApplied):
             self.transition_when_all_children_responded(
                 sender, msg, "apply_meta_info", "cluster_started",
                 self.on_cluster_started)
         elif isinstance(msg, OnBenchmarkStart):
             self.metrics_store.lap = msg.lap
             self.cluster.on_benchmark_start()
             # in the first lap, we are in state "cluster_started", after that in "benchmark_stopped"
             self.send_to_children_and_transition(
                 sender, msg, ["cluster_started", "benchmark_stopped"],
                 "benchmark_starting")
         elif isinstance(msg, BenchmarkStarted):
             self.transition_when_all_children_responded(
                 sender, msg, "benchmark_starting", "benchmark_started",
                 self.on_benchmark_started)
         elif isinstance(msg, Failure):
             self.send(self.race_control, msg)
         elif isinstance(msg, OnBenchmarkStop):
             self.send_to_children_and_transition(sender, msg,
                                                  "benchmark_started",
                                                  "benchmark_stopping")
         elif isinstance(msg, BenchmarkStopped):
             self.metrics_store.bulk_add(msg.system_metrics)
             self.transition_when_all_children_responded(
                 sender, msg, "benchmark_stopping", "benchmark_stopped",
                 self.on_benchmark_stopped)
         elif isinstance(msg, StopEngine):
             # detach from cluster and gather all system metrics
             self.cluster_launcher.stop(self.cluster)
             # we might have experienced a launch error, hence we need to allow to stop the cluster also after a launch
             self.send_to_children_and_transition(
                 sender, StopNodes(),
                 ["nodes_started", "benchmark_stopped"], "cluster_stopping")
         elif isinstance(msg, NodesStopped):
             self.metrics_store.bulk_add(msg.system_metrics)
             self.transition_when_all_children_responded(
                 sender, msg, "cluster_stopping", "cluster_stopped",
                 self.on_all_nodes_stopped)
         elif isinstance(msg, thespian.actors.ChildActorExited):
             if self.is_current_status_expected("cluster_stopping"):
                 logger.info(
                     "Child actor exited while engine is stopping: [%s]" %
                     msg)
             else:
                 raise exceptions.RallyError(
                     "Child actor exited with [%s] while in status [%s]." %
                     (msg, self.status))
         elif isinstance(msg, thespian.actors.PoisonMessage):
             # something went wrong with a child actor
             if isinstance(msg.poisonMessage, StartEngine):
                 raise exceptions.LaunchError(
                     "Could not start benchmark candidate. Are Rally daemons on all targeted machines running?"
                 )
             else:
                 logger.error(
                     "[%s] sent to a child actor has resulted in PoisonMessage"
                     % str(msg.poisonMessage))
                 raise exceptions.RallyError(
                     "Could not communicate with benchmark candidate (unknown reason)"
                 )
     except BaseException:
         logger.exception("Cannot process message [%s]" % msg)
         # usually, we'll notify the sender but in case a child sent something that caused an exception we'd rather
         # have it bubble up to race control. Otherwise, we could play ping-pong with our child actor.
         recipient = self.race_control if sender in self.mechanics else sender
         ex_type, ex_value, ex_traceback = sys.exc_info()
         # avoid "can't pickle traceback objects"
         import traceback
         self.send(
             recipient,
             Failure("Could not execute command (%s)" % ex_value,
                     traceback.format_exc()))