Ejemplo n.º 1
0
def version():
    release = __version__
    # noinspection PyBroadException
    try:
        if git.is_working_copy(io.normalize_path("%s/.." % paths.rally_root())):
            revision = git.head_revision(paths.rally_root())
            return "%s (git revision: %s)" % (release, revision.strip())
    except BaseException:
        pass
    # cannot determine head revision so user has probably installed Rally via pip instead of git clone
    return release
Ejemplo n.º 2
0
def revision():
    """
    :return: The current git revision if Rally is installed in development mode or ``None``.
    """
    # noinspection PyBroadException
    try:
        if git.is_working_copy(io.normalize_path("%s/.." % paths.rally_root())):
            raw_revision = git.head_revision(paths.rally_root())
            return raw_revision.strip()
    except BaseException:
        pass
    return None
Ejemplo n.º 3
0
def version():
    """
    :return: The release version string and an optional suffix for the current git revision if Rally is installed in development mode.
    """
    release = __version__
    # noinspection PyBroadException
    try:
        if git.is_working_copy(io.normalize_path("%s/.." %
                                                 paths.rally_root())):
            revision = git.head_revision(paths.rally_root())
            return "%s (git revision: %s)" % (release, revision.strip())
    except BaseException:
        pass
    # cannot determine head revision so user has probably installed Rally via pip instead of git clone
    return release
Ejemplo n.º 4
0
    def receiveMsg_StartNodes(self, msg, sender):
        try:
            self.host = msg.ip
            if msg.external:
                self.logger.info(
                    "Connecting to externally provisioned nodes on [%s].",
                    msg.ip)
            else:
                self.logger.info("Starting node(s) %s on [%s].", msg.node_ids,
                                 msg.ip)

            # Load node-specific configuration
            self.config = config.auto_load_local_config(
                msg.cfg,
                additional_sections=[
                    # only copy the relevant bits
                    "track",
                    "mechanic",
                    "client",
                    # allow metrics store to extract race meta-data
                    "race",
                    "source"
                ])
            # set root path (normally done by the main entry point)
            self.config.add(config.Scope.application, "node", "rally.root",
                            paths.rally_root())
            if not msg.external:
                self.config.add(config.Scope.benchmark, "provisioning",
                                "node.ip", msg.ip)
                # we need to override the port with the value that the user has specified instead of using the default value (39200)
                self.config.add(config.Scope.benchmark, "provisioning",
                                "node.http.port", msg.port)
                self.config.add(config.Scope.benchmark, "provisioning",
                                "node.ids", msg.node_ids)

            cls = metrics.metrics_store_class(self.config)
            self.metrics_store = cls(self.config)
            self.metrics_store.open(ctx=msg.open_metrics_context)
            # avoid follow-up errors in case we receive an unexpected ActorExitRequest due to an early failure in a parent actor.
            self.metrics_store.lap = 0

            self.mechanic = create(self.config, self.metrics_store,
                                   msg.all_node_ips, msg.cluster_settings,
                                   msg.sources, msg.build, msg.distribution,
                                   msg.external, msg.docker)
            nodes = self.mechanic.start_engine()
            self.running = True
            self.send(
                getattr(msg, "reply_to", sender),
                NodesStarted([NodeMetaInfo(node) for node in nodes],
                             self.metrics_store.meta_info))
        except Exception:
            self.logger.exception("Cannot process message [%s]", msg)
            # avoid "can't pickle traceback objects"
            import traceback
            ex_type, ex_value, ex_traceback = sys.exc_info()
            self.send(getattr(msg, "reply_to", sender),
                      actor.BenchmarkFailure(ex_value, traceback.format_exc()))
Ejemplo n.º 5
0
def main():
    check_python_version()
    log.install_default_log_config()
    log.configure_logging()
    logger = logging.getLogger(__name__)
    start = time.time()

    # Early init of console output so we start to show everything consistently.
    console.init(quiet=False)

    arg_parser = create_arg_parser()
    args = arg_parser.parse_args()

    console.init(quiet=args.quiet)
    console.println(BANNER)

    cfg = config.Config(config_name=args.configuration_name)
    if not cfg.config_present():
        cfg.install_default_config()
    cfg.load_config(auto_upgrade=True)
    cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.utcnow())
    # Local config per node
    cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root())
    cfg.add(config.Scope.application, "node", "rally.cwd", os.getcwd())

    logger.info("OS [%s]", str(platform.uname()))
    logger.info("Python [%s]", str(sys.implementation))
    logger.info("Rally version [%s]", version.version())
    logger.debug("Command line arguments: %s", args)
    # Configure networking
    net.init()
    if not args.offline:
        probing_url = cfg.opts("system", "probing.url", default_value="https://github.com", mandatory=False)
        if not net.has_internet_connection(probing_url):
            console.warn("No Internet connection detected. Automatic download of track data sets etc. is disabled.", logger=logger)
            cfg.add(config.Scope.applicationOverride, "system", "offline.mode", True)
        else:
            logger.info("Detected a working Internet connection.")

    result = dispatch_sub_command(arg_parser, args, cfg)

    end = time.time()
    if result == ExitStatus.SUCCESSFUL:
        console.println("")
        console.info("SUCCESS (took %d seconds)" % (end - start), overline="-", underline="-")
    elif result == ExitStatus.INTERRUPTED:
        console.println("")
        console.info("ABORTED (took %d seconds)" % (end - start), overline="-", underline="-")
        sys.exit(130)
    elif result == ExitStatus.ERROR:
        console.println("")
        console.info("FAILURE (took %d seconds)" % (end - start), overline="-", underline="-")
        sys.exit(64)
Ejemplo n.º 6
0
    def receiveMsg_StartNodes(self, msg, sender):
        try:
            self.host = msg.ip
            if msg.external:
                self.logger.info("Connecting to externally provisioned nodes on [%s].", msg.ip)
            else:
                self.logger.info("Starting node(s) %s on [%s].", msg.node_ids, msg.ip)

            # Load node-specific configuration
            cfg = config.auto_load_local_config(
                msg.cfg,
                additional_sections=[
                    # only copy the relevant bits
                    "track",
                    "mechanic",
                    "client",
                    "telemetry",
                    # allow metrics store to extract race meta-data
                    "race",
                    "source",
                ],
            )
            # set root path (normally done by the main entry point)
            cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root())
            if not msg.external:
                cfg.add(config.Scope.benchmark, "provisioning", "node.ids", msg.node_ids)

            cls = metrics.metrics_store_class(cfg)
            metrics_store = cls(cfg)
            metrics_store.open(ctx=msg.open_metrics_context)
            # avoid follow-up errors in case we receive an unexpected ActorExitRequest due to an early failure in a parent actor.

            self.mechanic = create(
                cfg,
                metrics_store,
                msg.ip,
                msg.port,
                msg.all_node_ips,
                msg.all_node_ids,
                msg.sources,
                msg.distribution,
                msg.external,
                msg.docker,
            )
            self.mechanic.start_engine()
            self.wakeupAfter(METRIC_FLUSH_INTERVAL_SECONDS)
            self.send(getattr(msg, "reply_to", sender), NodesStarted())
        except Exception:
            self.logger.exception("Cannot process message [%s]", msg)
            # avoid "can't pickle traceback objects"
            _, ex_value, _ = sys.exc_info()
            self.send(getattr(msg, "reply_to", sender), actor.BenchmarkFailure(ex_value, traceback.format_exc()))
Ejemplo n.º 7
0
def main():
    check_python_version()
    log.install_default_log_config()
    log.configure_logging()
    logger = logging.getLogger(__name__)
    start = time.time()

    # Early init of console output so we start to show everything consistently.
    console.init(quiet=False)

    arg_parser = create_arg_parser()
    args = arg_parser.parse_args()

    console.init(quiet=args.quiet)
    console.println(BANNER)

    cfg = config.Config(config_name=args.configuration_name)
    sub_command = derive_sub_command(args, cfg)
    ensure_configuration_present(cfg, args, sub_command)

    if args.effective_start_date:
        cfg.add(config.Scope.application, "system", "time.start",
                args.effective_start_date)
        cfg.add(config.Scope.application, "system", "time.start.user_provided",
                True)
    else:
        cfg.add(config.Scope.application, "system", "time.start",
                datetime.datetime.utcnow())
        cfg.add(config.Scope.application, "system", "time.start.user_provided",
                False)

    cfg.add(config.Scope.applicationOverride, "system", "trial.id",
            str(uuid.uuid4()))
    cfg.add(config.Scope.applicationOverride, "system", "quiet.mode",
            args.quiet)
    cfg.add(config.Scope.applicationOverride, "system", "offline.mode",
            args.offline)

    # Local config per node
    cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root())
    cfg.add(config.Scope.application, "node", "rally.cwd", os.getcwd())

    cfg.add(config.Scope.applicationOverride, "mechanic", "source.revision",
            args.revision)
    if args.distribution_version:
        cfg.add(config.Scope.applicationOverride, "mechanic",
                "distribution.version", args.distribution_version)
    cfg.add(config.Scope.applicationOverride, "mechanic",
            "distribution.repository", args.distribution_repository)
    cfg.add(config.Scope.applicationOverride, "mechanic", "car.names",
            opts.csv_to_list(args.car))
    if args.team_path:
        cfg.add(config.Scope.applicationOverride, "mechanic", "team.path",
                os.path.abspath(io.normalize_path(args.team_path)))
        cfg.add(config.Scope.applicationOverride, "mechanic",
                "repository.name", None)
    else:
        cfg.add(config.Scope.applicationOverride, "mechanic",
                "repository.name", args.team_repository)
    cfg.add(config.Scope.applicationOverride, "mechanic", "car.plugins",
            opts.csv_to_list(args.elasticsearch_plugins))
    cfg.add(config.Scope.applicationOverride, "mechanic", "car.params",
            opts.to_dict(args.car_params))
    cfg.add(config.Scope.applicationOverride, "mechanic", "plugin.params",
            opts.to_dict(args.plugin_params))
    if args.keep_cluster_running:
        cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running",
                True)
        # force-preserve the cluster nodes.
        cfg.add(config.Scope.applicationOverride, "mechanic",
                "preserve.install", True)
    else:
        cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running",
                False)
        cfg.add(config.Scope.applicationOverride, "mechanic",
                "preserve.install", convert.to_bool(args.preserve_install))
    cfg.add(config.Scope.applicationOverride, "mechanic", "runtime.jdk",
            args.runtime_jdk)
    cfg.add(config.Scope.applicationOverride, "mechanic", "telemetry.devices",
            opts.csv_to_list(args.telemetry))
    cfg.add(config.Scope.applicationOverride, "mechanic", "telemetry.params",
            opts.to_dict(args.telemetry_params))

    cfg.add(config.Scope.applicationOverride, "race", "pipeline",
            args.pipeline)
    cfg.add(config.Scope.applicationOverride, "race", "laps", args.laps)
    cfg.add(config.Scope.applicationOverride, "race", "user.tag",
            args.user_tag)

    # We can assume here that if a track-path is given, the user did not specify a repository either (although argparse sets it to
    # its default value)
    if args.track_path:
        cfg.add(config.Scope.applicationOverride, "track", "track.path",
                os.path.abspath(io.normalize_path(args.track_path)))
        cfg.add(config.Scope.applicationOverride, "track", "repository.name",
                None)
        if args.track:
            # stay as close as possible to argparse errors although we have a custom validation.
            arg_parser.error(
                "argument --track not allowed with argument --track-path")
        # cfg.add(config.Scope.applicationOverride, "track", "track.name", None)
    else:
        # cfg.add(config.Scope.applicationOverride, "track", "track.path", None)
        cfg.add(config.Scope.applicationOverride, "track", "repository.name",
                args.track_repository)
        # set the default programmatically because we need to determine whether the user has provided a value
        chosen_track = args.track if args.track else "geonames"
        cfg.add(config.Scope.applicationOverride, "track", "track.name",
                chosen_track)

    cfg.add(config.Scope.applicationOverride, "track", "params",
            opts.to_dict(args.track_params))
    cfg.add(config.Scope.applicationOverride, "track", "challenge.name",
            args.challenge)
    cfg.add(config.Scope.applicationOverride, "track", "include.tasks",
            opts.csv_to_list(args.include_tasks))
    cfg.add(config.Scope.applicationOverride, "track", "test.mode.enabled",
            args.test_mode)

    cfg.add(config.Scope.applicationOverride, "reporting", "format",
            args.report_format)
    cfg.add(config.Scope.applicationOverride, "reporting", "values",
            args.show_in_report)
    cfg.add(config.Scope.applicationOverride, "reporting", "output.path",
            args.report_file)
    if sub_command == "compare":
        cfg.add(config.Scope.applicationOverride, "reporting",
                "baseline.timestamp", args.baseline)
        cfg.add(config.Scope.applicationOverride, "reporting",
                "contender.timestamp", args.contender)
    if sub_command == "generate":
        cfg.add(config.Scope.applicationOverride, "generator", "chart.type",
                args.chart_type)
        cfg.add(config.Scope.applicationOverride, "generator", "output.path",
                args.output_path)

        if args.chart_spec_path and (args.track or args.challenge or args.car
                                     or args.node_count):
            console.println(
                "You need to specify either --chart-spec-path or --track, --challenge, --car and "
                "--node-count but not both.")
            exit(1)
        if args.chart_spec_path:
            cfg.add(config.Scope.applicationOverride, "generator",
                    "chart.spec.path", args.chart_spec_path)
        else:
            # other options are stored elsewhere already
            cfg.add(config.Scope.applicationOverride, "generator",
                    "node.count", args.node_count)

    cfg.add(config.Scope.applicationOverride, "driver", "profiling",
            args.enable_driver_profiling)
    cfg.add(config.Scope.applicationOverride, "driver", "on.error",
            args.on_error)
    cfg.add(config.Scope.applicationOverride, "driver", "load_driver_hosts",
            opts.csv_to_list(args.load_driver_hosts))
    if sub_command != "list":
        # Also needed by mechanic (-> telemetry) - duplicate by module?
        target_hosts = opts.TargetHosts(args.target_hosts)
        cfg.add(config.Scope.applicationOverride, "client", "hosts",
                target_hosts)
        client_options = opts.ClientOptions(args.client_options,
                                            target_hosts=target_hosts)
        cfg.add(config.Scope.applicationOverride, "client", "options",
                client_options)
        if "timeout" not in client_options.default:
            console.info(
                "You did not provide an explicit timeout in the client options. Assuming default of 10 seconds."
            )
        if list(target_hosts.all_hosts) != list(
                client_options.all_client_options):
            console.println(
                "--target-hosts and --client-options must define the same keys for multi cluster setups."
            )
            exit(1)
    # split by component?
    if sub_command == "list":
        cfg.add(config.Scope.applicationOverride, "system",
                "list.config.option", args.configuration)
        cfg.add(config.Scope.applicationOverride, "system",
                "list.races.max_results", args.limit)

    logger.info("OS [%s]", str(os.uname()))
    logger.info("Python [%s]", str(sys.implementation))
    logger.info("Rally version [%s]", version.version())
    logger.info("Command line arguments: %s", args)
    # Configure networking
    net.init()
    if not args.offline:
        if not net.has_internet_connection():
            console.warn(
                "No Internet connection detected. Automatic download of track data sets etc. is disabled.",
                logger=logger)
            cfg.add(config.Scope.applicationOverride, "system", "offline.mode",
                    True)
        else:
            logger.info("Detected a working Internet connection.")

    success = dispatch_sub_command(cfg, sub_command)

    end = time.time()
    if success:
        console.println("")
        console.info("SUCCESS (took %d seconds)" % (end - start),
                     overline="-",
                     underline="-")
    else:
        console.println("")
        console.info("FAILURE (took %d seconds)" % (end - start),
                     overline="-",
                     underline="-")
        sys.exit(64)
Ejemplo n.º 8
0
    def receiveMessage(self, msg, sender):
        # at the moment, we implement all message handling blocking. This is not ideal but simple to get started with. Besides, the caller
        # needs to block anyway. The only reason we implement mechanic as an actor is to distribute them.
        # noinspection PyBroadException
        try:
            logger.debug(
                "NodeMechanicActor#receiveMessage(msg = [%s] sender = [%s])" %
                (str(type(msg)), str(sender)))
            if isinstance(msg, StartNodes):
                self.host = msg.ip
                if msg.external:
                    logger.info(
                        "Connecting to externally provisioned nodes on [%s]." %
                        msg.ip)
                else:
                    logger.info("Starting node(s) %s on [%s]." %
                                (msg.node_ids, msg.ip))

                # Load node-specific configuration
                self.config = config.auto_load_local_config(
                    msg.cfg,
                    additional_sections=[
                        # only copy the relevant bits
                        "track",
                        "mechanic",
                        "client",
                        # allow metrics store to extract race meta-data
                        "race",
                        "source"
                    ])
                # set root path (normally done by the main entry point)
                self.config.add(config.Scope.application, "node", "rally.root",
                                paths.rally_root())
                if not msg.external:
                    self.config.add(config.Scope.benchmark, "provisioning",
                                    "node.ip", msg.ip)
                    # we need to override the port with the value that the user has specified instead of using the default value (39200)
                    self.config.add(config.Scope.benchmark, "provisioning",
                                    "node.http.port", msg.port)
                    self.config.add(config.Scope.benchmark, "provisioning",
                                    "node.ids", msg.node_ids)

                self.metrics_store = metrics.InMemoryMetricsStore(self.config)
                self.metrics_store.open(ctx=msg.open_metrics_context)
                # avoid follow-up errors in case we receive an unexpected ActorExitRequest due to an early failure in a parent actor.
                self.metrics_store.lap = 0

                self.mechanic = create(self.config, self.metrics_store,
                                       msg.all_node_ips, msg.cluster_settings,
                                       msg.sources, msg.build,
                                       msg.distribution, msg.external,
                                       msg.docker)
                nodes = self.mechanic.start_engine()
                self.running = True
                self.send(
                    sender,
                    NodesStarted([NodeMetaInfo(node) for node in nodes],
                                 self.metrics_store.meta_info))
            elif isinstance(msg, ApplyMetricsMetaInfo):
                self.metrics_store.merge_meta_info(msg.meta_info)
                self.send(sender, MetricsMetaInfoApplied())
            elif isinstance(msg, ResetRelativeTime):
                logger.info(
                    "Resetting relative time of system metrics store on host [%s]."
                    % self.host)
                self.metrics_store.reset_relative_time()
            elif isinstance(msg, OnBenchmarkStart):
                self.metrics_store.lap = msg.lap
                self.mechanic.on_benchmark_start()
                self.send(sender, BenchmarkStarted())
            elif isinstance(msg, OnBenchmarkStop):
                self.mechanic.on_benchmark_stop()
                # clear metrics store data to not send duplicate system metrics data
                self.send(
                    sender,
                    BenchmarkStopped(
                        self.metrics_store.to_externalizable(clear=True)))
            elif isinstance(msg, StopNodes):
                logger.info("Stopping nodes %s." % self.mechanic.nodes)
                self.mechanic.stop_engine()
                self.send(sender,
                          NodesStopped(self.metrics_store.to_externalizable()))
                # clear all state as the mechanic might get reused later
                self.running = False
                self.config = None
                self.mechanic = None
                self.metrics_store = None
            elif isinstance(msg, thespian.actors.ActorExitRequest):
                if self.running:
                    logger.info("Stopping nodes %s (due to ActorExitRequest)" %
                                self.mechanic.nodes)
                    self.mechanic.stop_engine()
                    self.running = False
        except BaseException:
            self.running = False
            logger.exception("Cannot process message [%s]" % msg)
            # avoid "can't pickle traceback objects"
            import traceback
            ex_type, ex_value, ex_traceback = sys.exc_info()
            self.send(sender,
                      actor.BenchmarkFailure(ex_value, traceback.format_exc()))
Ejemplo n.º 9
0
def main():
    check_python_version()

    start = time.time()

    # Early init of console output so we start to show everything consistently.
    console.init(quiet=False)
    # allow to see a thread-dump on SIGQUIT
    faulthandler.register(signal.SIGQUIT, file=sys.stderr)

    pre_configure_logging()
    args = parse_args()

    console.init(quiet=args.quiet)
    console.println(BANNER)

    cfg = config.Config(config_name=args.configuration_name)
    sub_command = derive_sub_command(args, cfg)
    ensure_configuration_present(cfg, args, sub_command)

    if args.effective_start_date:
        cfg.add(config.Scope.application, "system", "time.start",
                args.effective_start_date)
        cfg.add(config.Scope.application, "system", "time.start.user_provided",
                True)
    else:
        cfg.add(config.Scope.application, "system", "time.start",
                datetime.datetime.utcnow())
        cfg.add(config.Scope.application, "system", "time.start.user_provided",
                False)

    cfg.add(config.Scope.applicationOverride, "system", "quiet.mode",
            args.quiet)

    # per node?
    cfg.add(config.Scope.applicationOverride, "system", "offline.mode",
            args.offline)
    cfg.add(config.Scope.applicationOverride, "system", "logging.output",
            args.logging)
    # only temporary to ignore unknown actor messages
    cfg.add(config.Scope.applicationOverride, "system",
            "ignore.unknown.return", args.ignore_unknown_return_values)

    # Local config per node
    cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root())
    cfg.add(config.Scope.application, "node", "rally.cwd", os.getcwd())

    cfg.add(config.Scope.applicationOverride, "mechanic", "source.revision",
            args.revision)
    #TODO dm: Consider renaming this one. It's used by different modules
    if args.distribution_version:
        cfg.add(config.Scope.applicationOverride, "mechanic",
                "distribution.version", args.distribution_version)
    cfg.add(config.Scope.applicationOverride, "mechanic",
            "distribution.repository", args.distribution_repository)
    cfg.add(config.Scope.applicationOverride, "mechanic", "repository.name",
            args.team_repository)
    cfg.add(config.Scope.applicationOverride, "mechanic", "car.name", args.car)
    cfg.add(config.Scope.applicationOverride, "mechanic", "car.plugins",
            csv_to_list(args.elasticsearch_plugins))
    cfg.add(config.Scope.applicationOverride, "mechanic", "node.datapaths",
            csv_to_list(args.data_paths))
    cfg.add(config.Scope.applicationOverride, "mechanic", "preserve.install",
            convert.to_bool(args.preserve_install))
    cfg.add(config.Scope.applicationOverride, "mechanic", "telemetry.devices",
            csv_to_list(args.telemetry))
    if args.override_src_dir is not None:
        cfg.add(config.Scope.applicationOverride, "source", "local.src.dir",
                args.override_src_dir)

    cfg.add(config.Scope.applicationOverride, "race", "pipeline",
            args.pipeline)
    cfg.add(config.Scope.applicationOverride, "race", "laps", args.laps)
    cfg.add(config.Scope.applicationOverride, "race", "user.tag",
            args.user_tag)

    cfg.add(config.Scope.applicationOverride, "track", "repository.name",
            args.track_repository)
    cfg.add(config.Scope.applicationOverride, "track", "track.name",
            args.track)
    cfg.add(config.Scope.applicationOverride, "track", "challenge.name",
            args.challenge)
    cfg.add(config.Scope.applicationOverride, "track", "test.mode.enabled",
            args.test_mode)
    cfg.add(config.Scope.applicationOverride, "track", "auto_manage_indices",
            to_bool(args.auto_manage_indices))

    cfg.add(config.Scope.applicationOverride, "reporting", "format",
            args.report_format)
    cfg.add(config.Scope.applicationOverride, "reporting", "output.path",
            args.report_file)
    if sub_command == "compare":
        cfg.add(config.Scope.applicationOverride, "reporting",
                "baseline.timestamp", args.baseline)
        cfg.add(config.Scope.applicationOverride, "reporting",
                "contender.timestamp", args.contender)

    ################################
    # new section name: driver
    ################################
    cfg.add(config.Scope.applicationOverride, "benchmarks", "cluster.health",
            args.cluster_health)
    cfg.add(config.Scope.applicationOverride, "driver", "profiling",
            args.enable_driver_profiling)
    if sub_command != "list":
        # Also needed by mechanic (-> telemetry) - duplicate by module?
        cfg.add(config.Scope.applicationOverride, "client", "hosts",
                _normalize_hosts(csv_to_list(args.target_hosts)))
        client_options = kv_to_map(csv_to_list(args.client_options))
        cfg.add(config.Scope.applicationOverride, "client", "options",
                client_options)
        if "timeout" not in client_options:
            console.info(
                "You did not provide an explicit timeout in the client options. Assuming default of 10 seconds."
            )

    # split by component?
    if sub_command == "list":
        cfg.add(config.Scope.applicationOverride, "system",
                "list.config.option", args.configuration)
        cfg.add(config.Scope.applicationOverride, "system",
                "list.races.max_results", args.limit)

    configure_logging(cfg)
    logger.info("OS [%s]" % str(os.uname()))
    logger.info("Python [%s]" % str(sys.implementation))
    logger.info("Rally version [%s]" % version.version())
    logger.info("Command line arguments: %s" % args)
    # Configure networking
    net.init()
    if not args.offline:
        if not net.has_internet_connection():
            console.warn(
                "No Internet connection detected. Automatic download of track data sets etc. is disabled.",
                logger=logger)
            cfg.add(config.Scope.applicationOverride, "system", "offline.mode",
                    True)
        else:
            logger.info("Detected a working Internet connection.")

    # Kill any lingering Rally processes before attempting to continue - the actor system needs to be a singleton on this machine
    # noinspection PyBroadException
    try:
        process.kill_running_rally_instances()
    except BaseException:
        logger.exception(
            "Could not terminate potentially running Rally instances correctly. Attempting to go on anyway."
        )

    success = dispatch_sub_command(cfg, sub_command)

    end = time.time()
    if success:
        console.println("")
        console.info("SUCCESS (took %d seconds)" % (end - start),
                     overline="-",
                     underline="-")
    else:
        console.println("")
        console.info("FAILURE (took %d seconds)" % (end - start),
                     overline="-",
                     underline="-")
        sys.exit(64)
Ejemplo n.º 10
0
    def receiveMessage(self, msg, sender):
        # at the moment, we implement all message handling blocking. This is not ideal but simple to get started with. Besides, the caller
        # needs to block anyway. The only reason we implement mechanic as an actor is to distribute them.
        # noinspection PyBroadException
        try:
            logger.debug(
                "NodeMechanicActor#receiveMessage(msg = [%s] sender = [%s])" %
                (str(type(msg)), str(sender)))
            if isinstance(msg, StartEngine):
                logger.info("Starting engine")
                # Load node-specific configuration
                self.config = config.Config(config_name=msg.cfg.name)
                self.config.load_config()
                self.config.add(config.Scope.application, "node", "rally.root",
                                paths.rally_root())
                # copy only the necessary configuration sections
                self.config.add_all(msg.cfg, "system")
                self.config.add_all(msg.cfg, "client")
                self.config.add_all(msg.cfg, "track")
                self.config.add_all(msg.cfg, "mechanic")
                if msg.port is not None:
                    # we need to override the port with the value that the user has specified instead of using the default value (39200)
                    self.config.add(config.Scope.benchmark, "provisioning",
                                    "node.http.port", msg.port)

                self.metrics_store = metrics.InMemoryMetricsStore(self.config)
                self.metrics_store.open(ctx=msg.open_metrics_context)

                self.mechanic = create(self.config, self.metrics_store,
                                       self.single_machine, msg.sources,
                                       msg.build, msg.distribution,
                                       msg.external, msg.docker)
                cluster = self.mechanic.start_engine()
                self.send(
                    sender,
                    EngineStarted(
                        ClusterMetaInfo(cluster.hosts, cluster.source_revision,
                                        cluster.distribution_version),
                        self.metrics_store.meta_info))
            elif isinstance(msg, OnBenchmarkStart):
                self.metrics_store.lap = msg.lap
                self.mechanic.on_benchmark_start()
                self.send(sender, Success())
            elif isinstance(msg, OnBenchmarkStop):
                self.mechanic.on_benchmark_stop()
                # clear metrics store data to not send duplicate system metrics data
                self.send(
                    sender,
                    BenchmarkStopped(
                        self.metrics_store.to_externalizable(clear=True)))
            elif isinstance(msg, StopEngine):
                logger.info("Stopping engine")
                self.mechanic.stop_engine()
                self.send(
                    sender,
                    EngineStopped(self.metrics_store.to_externalizable()))
                # clear all state as the mechanic might get reused later
                self.config = None
                self.mechanic = None
                self.metrics_store = None
        except BaseException:
            logger.exception("Cannot process message [%s]" % msg)
            # avoid "can't pickle traceback objects"
            import traceback
            ex_type, ex_value, ex_traceback = sys.exc_info()
            self.send(sender, Failure(ex_value, traceback.format_exc()))
Ejemplo n.º 11
0
    def receiveMessage(self, msg, sender):
        # at the moment, we implement all message handling blocking. This is not ideal but simple to get started with. Besides, the caller
        # needs to block anyway. The only reason we implement mechanic as an actor is to distribute them.
        # noinspection PyBroadException
        try:
            logger.debug("NodeMechanicActor#receiveMessage(msg = [%s] sender = [%s])" % (str(type(msg)), str(sender)))
            if isinstance(msg, StartNodes):
                self.host = msg.ip
                if msg.external:
                    logger.info("Connecting to externally provisioned nodes on [%s]." % msg.ip)
                else:
                    logger.info("Starting node(s) %s on [%s]." % (msg.node_ids, msg.ip))

                # Load node-specific configuration
                self.config = config.auto_load_local_config(msg.cfg, additional_sections=[
                    # only copy the relevant bits
                    "track", "mechanic", "client",
                    # allow metrics store to extract race meta-data
                    "race",
                    "source"
                ])
                # set root path (normally done by the main entry point)
                self.config.add(config.Scope.application, "node", "rally.root", paths.rally_root())
                if not msg.external:
                    self.config.add(config.Scope.benchmark, "provisioning", "node.ip", msg.ip)
                    # we need to override the port with the value that the user has specified instead of using the default value (39200)
                    self.config.add(config.Scope.benchmark, "provisioning", "node.http.port", msg.port)
                    self.config.add(config.Scope.benchmark, "provisioning", "node.ids", msg.node_ids)

                self.metrics_store = metrics.InMemoryMetricsStore(self.config)
                self.metrics_store.open(ctx=msg.open_metrics_context)
                # avoid follow-up errors in case we receive an unexpected ActorExitRequest due to an early failure in a parent actor.
                self.metrics_store.lap = 0

                self.mechanic = create(self.config, self.metrics_store, msg.all_node_ips, msg.cluster_settings, msg.sources, msg.build,
                                       msg.distribution, msg.external, msg.docker)
                nodes = self.mechanic.start_engine()
                self.running = True
                self.send(sender, NodesStarted([NodeMetaInfo(node) for node in nodes], self.metrics_store.meta_info))
            elif isinstance(msg, ApplyMetricsMetaInfo):
                self.metrics_store.merge_meta_info(msg.meta_info)
                self.send(sender, MetricsMetaInfoApplied())
            elif isinstance(msg, ResetRelativeTime):
                logger.info("Resetting relative time of system metrics store on host [%s]." % self.host)
                self.metrics_store.reset_relative_time()
            elif isinstance(msg, OnBenchmarkStart):
                self.metrics_store.lap = msg.lap
                self.mechanic.on_benchmark_start()
                self.send(sender, BenchmarkStarted())
            elif isinstance(msg, OnBenchmarkStop):
                self.mechanic.on_benchmark_stop()
                # clear metrics store data to not send duplicate system metrics data
                self.send(sender, BenchmarkStopped(self.metrics_store.to_externalizable(clear=True)))
            elif isinstance(msg, StopNodes):
                logger.info("Stopping nodes %s." % self.mechanic.nodes)
                self.mechanic.stop_engine()
                self.send(sender, NodesStopped(self.metrics_store.to_externalizable()))
                # clear all state as the mechanic might get reused later
                self.running = False
                self.config = None
                self.mechanic = None
                self.metrics_store = None
            elif isinstance(msg, thespian.actors.ActorExitRequest):
                if self.running:
                    logger.info("Stopping nodes %s (due to ActorExitRequest)" % self.mechanic.nodes)
                    self.mechanic.stop_engine()
                    self.running = False
        except BaseException:
            self.running = False
            logger.exception("Cannot process message [%s]" % msg)
            # avoid "can't pickle traceback objects"
            import traceback
            ex_type, ex_value, ex_traceback = sys.exc_info()
            self.send(sender, actor.BenchmarkFailure(ex_value, traceback.format_exc()))
Ejemplo n.º 12
0
def main():
    check_python_version()

    start = time.time()

    # Early init of console output so we start to show everything consistently.
    console.init(quiet=False)
    # allow to see a thread-dump on SIGQUIT
    faulthandler.register(signal.SIGQUIT, file=sys.stderr)

    pre_configure_logging()
    arg_parser = create_arg_parser()
    args = arg_parser.parse_args()

    console.init(quiet=args.quiet)
    console.println(BANNER)

    cfg = config.Config(config_name=args.configuration_name)
    sub_command = derive_sub_command(args, cfg)
    ensure_configuration_present(cfg, args, sub_command)

    if args.effective_start_date:
        cfg.add(config.Scope.application, "system", "time.start",
                args.effective_start_date)
        cfg.add(config.Scope.application, "system", "time.start.user_provided",
                True)
    else:
        cfg.add(config.Scope.application, "system", "time.start",
                datetime.datetime.utcnow())
        cfg.add(config.Scope.application, "system", "time.start.user_provided",
                False)

    cfg.add(config.Scope.applicationOverride, "system", "quiet.mode",
            args.quiet)

    # per node?
    cfg.add(config.Scope.applicationOverride, "system", "offline.mode",
            args.offline)
    cfg.add(config.Scope.applicationOverride, "system", "logging.output",
            args.logging)

    # Local config per node
    cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root())
    cfg.add(config.Scope.application, "node", "rally.cwd", os.getcwd())

    cfg.add(config.Scope.applicationOverride, "mechanic", "source.revision",
            args.revision)
    if args.distribution_version:
        cfg.add(config.Scope.applicationOverride, "mechanic",
                "distribution.version", args.distribution_version)
    cfg.add(config.Scope.applicationOverride, "mechanic",
            "distribution.repository", args.distribution_repository)
    cfg.add(config.Scope.applicationOverride, "mechanic", "repository.name",
            args.team_repository)
    cfg.add(config.Scope.applicationOverride, "mechanic", "car.names",
            csv_to_list(args.car))
    cfg.add(config.Scope.applicationOverride, "mechanic", "car.plugins",
            csv_to_list(args.elasticsearch_plugins))
    cfg.add(config.Scope.applicationOverride, "mechanic", "node.datapaths",
            csv_to_list(args.data_paths))
    if args.keep_cluster_running:
        cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running",
                True)
        # force-preserve the cluster nodes.
        cfg.add(config.Scope.applicationOverride, "mechanic",
                "preserve.install", True)
    else:
        cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running",
                False)
        cfg.add(config.Scope.applicationOverride, "mechanic",
                "preserve.install", convert.to_bool(args.preserve_install))
    cfg.add(config.Scope.applicationOverride, "mechanic", "telemetry.devices",
            csv_to_list(args.telemetry))

    cfg.add(config.Scope.applicationOverride, "race", "pipeline",
            args.pipeline)
    cfg.add(config.Scope.applicationOverride, "race", "laps", args.laps)
    cfg.add(config.Scope.applicationOverride, "race", "user.tag",
            args.user_tag)

    # We can assume here that if a track-path is given, the user did not specify a repository either (although argparse sets it to
    # its default value)
    if args.track_path:
        cfg.add(config.Scope.applicationOverride, "track", "track.path",
                os.path.abspath(io.normalize_path(args.track_path)))
        cfg.add(config.Scope.applicationOverride, "track", "repository.name",
                None)
        if args.track:
            # stay as close as possible to argparse errors although we have a custom validation.
            arg_parser.error(
                "argument --track not allowed with argument --track-path")
        # cfg.add(config.Scope.applicationOverride, "track", "track.name", None)
    else:
        # cfg.add(config.Scope.applicationOverride, "track", "track.path", None)
        cfg.add(config.Scope.applicationOverride, "track", "repository.name",
                args.track_repository)
        # set the default programmatically because we need to determine whether the user has provided a value
        chosen_track = args.track if args.track else "geonames"
        cfg.add(config.Scope.applicationOverride, "track", "track.name",
                chosen_track)

    cfg.add(config.Scope.applicationOverride, "track", "params",
            kv_to_map(csv_to_list(args.track_params)))
    cfg.add(config.Scope.applicationOverride, "track", "challenge.name",
            args.challenge)
    cfg.add(config.Scope.applicationOverride, "track", "include.tasks",
            csv_to_list(args.include_tasks))
    cfg.add(config.Scope.applicationOverride, "track", "test.mode.enabled",
            args.test_mode)
    cfg.add(config.Scope.applicationOverride, "track", "auto_manage_indices",
            to_bool(args.auto_manage_indices))

    cfg.add(config.Scope.applicationOverride, "reporting", "format",
            args.report_format)
    cfg.add(config.Scope.applicationOverride, "reporting", "values",
            args.show_in_report)
    cfg.add(config.Scope.applicationOverride, "reporting", "output.path",
            args.report_file)
    if sub_command == "compare":
        cfg.add(config.Scope.applicationOverride, "reporting",
                "baseline.timestamp", args.baseline)
        cfg.add(config.Scope.applicationOverride, "reporting",
                "contender.timestamp", args.contender)

    cfg.add(config.Scope.applicationOverride, "driver", "cluster.health",
            args.cluster_health)
    if args.cluster_health != "green":
        console.warn(
            "--cluster-health is deprecated and will be removed in a future version of Rally."
        )
    cfg.add(config.Scope.applicationOverride, "driver", "profiling",
            args.enable_driver_profiling)
    cfg.add(config.Scope.applicationOverride, "driver", "on.error",
            args.on_error)
    cfg.add(config.Scope.applicationOverride, "driver", "load_driver_hosts",
            csv_to_list(args.load_driver_hosts))
    if sub_command != "list":
        # Also needed by mechanic (-> telemetry) - duplicate by module?
        cfg.add(config.Scope.applicationOverride, "client", "hosts",
                _normalize_hosts(csv_to_list(args.target_hosts)))
        client_options = kv_to_map(csv_to_list(args.client_options))
        cfg.add(config.Scope.applicationOverride, "client", "options",
                client_options)
        if "timeout" not in client_options:
            console.info(
                "You did not provide an explicit timeout in the client options. Assuming default of 10 seconds."
            )

    # split by component?
    if sub_command == "list":
        cfg.add(config.Scope.applicationOverride, "system",
                "list.config.option", args.configuration)
        cfg.add(config.Scope.applicationOverride, "system",
                "list.races.max_results", args.limit)

    configure_logging(cfg)
    logger.info("OS [%s]" % str(os.uname()))
    logger.info("Python [%s]" % str(sys.implementation))
    logger.info("Rally version [%s]" % version.version())
    logger.info("Command line arguments: %s" % args)
    # Configure networking
    net.init()
    if not args.offline:
        if not net.has_internet_connection():
            console.warn(
                "No Internet connection detected. Automatic download of track data sets etc. is disabled.",
                logger=logger)
            cfg.add(config.Scope.applicationOverride, "system", "offline.mode",
                    True)
        else:
            logger.info("Detected a working Internet connection.")

    # Kill any lingering Rally processes before attempting to continue - the actor system needs to be a singleton on this machine
    # noinspection PyBroadException
    try:
        process.kill_running_rally_instances()
    except BaseException:
        logger.exception(
            "Could not terminate potentially running Rally instances correctly. Attempting to go on anyway."
        )

    success = dispatch_sub_command(cfg, sub_command)

    end = time.time()
    if success:
        console.println("")
        console.info("SUCCESS (took %d seconds)" % (end - start),
                     overline="-",
                     underline="-")
    else:
        console.println("")
        console.info("FAILURE (took %d seconds)" % (end - start),
                     overline="-",
                     underline="-")
        sys.exit(64)
Ejemplo n.º 13
0
def main():
    check_python_version()

    start = time.time()

    # Early init of console output so we start to show everything consistently.
    console.init(quiet=False)
    # allow to see a thread-dump on SIGQUIT
    faulthandler.register(signal.SIGQUIT, file=sys.stderr)

    pre_configure_logging()
    args = parse_args()

    console.init(quiet=args.quiet)
    console.println(BANNER)

    cfg = config.Config(config_name=args.configuration_name)
    sub_command = derive_sub_command(args, cfg)
    ensure_configuration_present(cfg, args, sub_command)

    if args.effective_start_date:
        cfg.add(config.Scope.application, "system", "time.start", args.effective_start_date)
        cfg.add(config.Scope.application, "system", "time.start.user_provided", True)
    else:
        cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.utcnow())
        cfg.add(config.Scope.application, "system", "time.start.user_provided", False)

    cfg.add(config.Scope.applicationOverride, "system", "quiet.mode", args.quiet)

    # per node?
    cfg.add(config.Scope.applicationOverride, "system", "offline.mode", args.offline)
    cfg.add(config.Scope.applicationOverride, "system", "logging.output", args.logging)

    # Local config per node
    cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root())
    cfg.add(config.Scope.application, "node", "rally.cwd", os.getcwd())

    cfg.add(config.Scope.applicationOverride, "mechanic", "source.revision", args.revision)
    #TODO dm: Consider renaming this one. It's used by different modules
    if args.distribution_version:
        cfg.add(config.Scope.applicationOverride, "mechanic", "distribution.version", args.distribution_version)
    cfg.add(config.Scope.applicationOverride, "mechanic", "distribution.repository", args.distribution_repository)
    cfg.add(config.Scope.applicationOverride, "mechanic", "repository.name", args.team_repository)
    cfg.add(config.Scope.applicationOverride, "mechanic", "car.names", csv_to_list(args.car))
    cfg.add(config.Scope.applicationOverride, "mechanic", "car.plugins", csv_to_list(args.elasticsearch_plugins))
    cfg.add(config.Scope.applicationOverride, "mechanic", "node.datapaths", csv_to_list(args.data_paths))
    if args.keep_cluster_running:
        cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running", True)
        # force-preserve the cluster nodes.
        cfg.add(config.Scope.applicationOverride, "mechanic", "preserve.install", True)
    else:
        cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running", False)
        cfg.add(config.Scope.applicationOverride, "mechanic", "preserve.install", convert.to_bool(args.preserve_install))
    cfg.add(config.Scope.applicationOverride, "mechanic", "telemetry.devices", csv_to_list(args.telemetry))

    cfg.add(config.Scope.applicationOverride, "race", "pipeline", args.pipeline)
    cfg.add(config.Scope.applicationOverride, "race", "laps", args.laps)
    cfg.add(config.Scope.applicationOverride, "race", "user.tag", args.user_tag)

    cfg.add(config.Scope.applicationOverride, "track", "repository.name", args.track_repository)
    cfg.add(config.Scope.applicationOverride, "track", "track.name", args.track)
    cfg.add(config.Scope.applicationOverride, "track", "challenge.name", args.challenge)
    cfg.add(config.Scope.applicationOverride, "track", "include.tasks", csv_to_list(args.include_tasks))
    cfg.add(config.Scope.applicationOverride, "track", "test.mode.enabled", args.test_mode)
    cfg.add(config.Scope.applicationOverride, "track", "auto_manage_indices", to_bool(args.auto_manage_indices))

    cfg.add(config.Scope.applicationOverride, "reporting", "format", args.report_format)
    cfg.add(config.Scope.applicationOverride, "reporting", "output.path", args.report_file)
    if sub_command == "compare":
        cfg.add(config.Scope.applicationOverride, "reporting", "baseline.timestamp", args.baseline)
        cfg.add(config.Scope.applicationOverride, "reporting", "contender.timestamp", args.contender)

    ################################
    # new section name: driver
    ################################
    cfg.add(config.Scope.applicationOverride, "driver", "cluster.health", args.cluster_health)
    cfg.add(config.Scope.applicationOverride, "driver", "profiling", args.enable_driver_profiling)
    cfg.add(config.Scope.applicationOverride, "driver", "load_driver_hosts", csv_to_list(args.load_driver_hosts))
    if sub_command != "list":
        # Also needed by mechanic (-> telemetry) - duplicate by module?
        cfg.add(config.Scope.applicationOverride, "client", "hosts", _normalize_hosts(csv_to_list(args.target_hosts)))
        client_options = kv_to_map(csv_to_list(args.client_options))
        cfg.add(config.Scope.applicationOverride, "client", "options", client_options)
        if "timeout" not in client_options:
            console.info("You did not provide an explicit timeout in the client options. Assuming default of 10 seconds.")

    # split by component?
    if sub_command == "list":
        cfg.add(config.Scope.applicationOverride, "system", "list.config.option", args.configuration)
        cfg.add(config.Scope.applicationOverride, "system", "list.races.max_results", args.limit)

    configure_logging(cfg)
    logger.info("OS [%s]" % str(os.uname()))
    logger.info("Python [%s]" % str(sys.implementation))
    logger.info("Rally version [%s]" % version.version())
    logger.info("Command line arguments: %s" % args)
    # Configure networking
    net.init()
    if not args.offline:
        if not net.has_internet_connection():
            console.warn("No Internet connection detected. Automatic download of track data sets etc. is disabled.",
                         logger=logger)
            cfg.add(config.Scope.applicationOverride, "system", "offline.mode", True)
        else:
            logger.info("Detected a working Internet connection.")

    # Kill any lingering Rally processes before attempting to continue - the actor system needs to be a singleton on this machine
    # noinspection PyBroadException
    try:
        process.kill_running_rally_instances()
    except BaseException:
        logger.exception("Could not terminate potentially running Rally instances correctly. Attempting to go on anyway.")

    success = dispatch_sub_command(cfg, sub_command)

    end = time.time()
    if success:
        console.println("")
        console.info("SUCCESS (took %d seconds)" % (end - start), overline="-", underline="-")
    else:
        console.println("")
        console.info("FAILURE (took %d seconds)" % (end - start), overline="-", underline="-")
        sys.exit(64)