def version(): release = __version__ # noinspection PyBroadException try: if git.is_working_copy(io.normalize_path("%s/.." % paths.rally_root())): revision = git.head_revision(paths.rally_root()) return "%s (git revision: %s)" % (release, revision.strip()) except BaseException: pass # cannot determine head revision so user has probably installed Rally via pip instead of git clone return release
def revision(): """ :return: The current git revision if Rally is installed in development mode or ``None``. """ # noinspection PyBroadException try: if git.is_working_copy(io.normalize_path("%s/.." % paths.rally_root())): raw_revision = git.head_revision(paths.rally_root()) return raw_revision.strip() except BaseException: pass return None
def version(): """ :return: The release version string and an optional suffix for the current git revision if Rally is installed in development mode. """ release = __version__ # noinspection PyBroadException try: if git.is_working_copy(io.normalize_path("%s/.." % paths.rally_root())): revision = git.head_revision(paths.rally_root()) return "%s (git revision: %s)" % (release, revision.strip()) except BaseException: pass # cannot determine head revision so user has probably installed Rally via pip instead of git clone return release
def receiveMsg_StartNodes(self, msg, sender): try: self.host = msg.ip if msg.external: self.logger.info( "Connecting to externally provisioned nodes on [%s].", msg.ip) else: self.logger.info("Starting node(s) %s on [%s].", msg.node_ids, msg.ip) # Load node-specific configuration self.config = config.auto_load_local_config( msg.cfg, additional_sections=[ # only copy the relevant bits "track", "mechanic", "client", # allow metrics store to extract race meta-data "race", "source" ]) # set root path (normally done by the main entry point) self.config.add(config.Scope.application, "node", "rally.root", paths.rally_root()) if not msg.external: self.config.add(config.Scope.benchmark, "provisioning", "node.ip", msg.ip) # we need to override the port with the value that the user has specified instead of using the default value (39200) self.config.add(config.Scope.benchmark, "provisioning", "node.http.port", msg.port) self.config.add(config.Scope.benchmark, "provisioning", "node.ids", msg.node_ids) cls = metrics.metrics_store_class(self.config) self.metrics_store = cls(self.config) self.metrics_store.open(ctx=msg.open_metrics_context) # avoid follow-up errors in case we receive an unexpected ActorExitRequest due to an early failure in a parent actor. self.metrics_store.lap = 0 self.mechanic = create(self.config, self.metrics_store, msg.all_node_ips, msg.cluster_settings, msg.sources, msg.build, msg.distribution, msg.external, msg.docker) nodes = self.mechanic.start_engine() self.running = True self.send( getattr(msg, "reply_to", sender), NodesStarted([NodeMetaInfo(node) for node in nodes], self.metrics_store.meta_info)) except Exception: self.logger.exception("Cannot process message [%s]", msg) # avoid "can't pickle traceback objects" import traceback ex_type, ex_value, ex_traceback = sys.exc_info() self.send(getattr(msg, "reply_to", sender), actor.BenchmarkFailure(ex_value, traceback.format_exc()))
def main(): check_python_version() log.install_default_log_config() log.configure_logging() logger = logging.getLogger(__name__) start = time.time() # Early init of console output so we start to show everything consistently. console.init(quiet=False) arg_parser = create_arg_parser() args = arg_parser.parse_args() console.init(quiet=args.quiet) console.println(BANNER) cfg = config.Config(config_name=args.configuration_name) if not cfg.config_present(): cfg.install_default_config() cfg.load_config(auto_upgrade=True) cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.utcnow()) # Local config per node cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root()) cfg.add(config.Scope.application, "node", "rally.cwd", os.getcwd()) logger.info("OS [%s]", str(platform.uname())) logger.info("Python [%s]", str(sys.implementation)) logger.info("Rally version [%s]", version.version()) logger.debug("Command line arguments: %s", args) # Configure networking net.init() if not args.offline: probing_url = cfg.opts("system", "probing.url", default_value="https://github.com", mandatory=False) if not net.has_internet_connection(probing_url): console.warn("No Internet connection detected. Automatic download of track data sets etc. is disabled.", logger=logger) cfg.add(config.Scope.applicationOverride, "system", "offline.mode", True) else: logger.info("Detected a working Internet connection.") result = dispatch_sub_command(arg_parser, args, cfg) end = time.time() if result == ExitStatus.SUCCESSFUL: console.println("") console.info("SUCCESS (took %d seconds)" % (end - start), overline="-", underline="-") elif result == ExitStatus.INTERRUPTED: console.println("") console.info("ABORTED (took %d seconds)" % (end - start), overline="-", underline="-") sys.exit(130) elif result == ExitStatus.ERROR: console.println("") console.info("FAILURE (took %d seconds)" % (end - start), overline="-", underline="-") sys.exit(64)
def receiveMsg_StartNodes(self, msg, sender): try: self.host = msg.ip if msg.external: self.logger.info("Connecting to externally provisioned nodes on [%s].", msg.ip) else: self.logger.info("Starting node(s) %s on [%s].", msg.node_ids, msg.ip) # Load node-specific configuration cfg = config.auto_load_local_config( msg.cfg, additional_sections=[ # only copy the relevant bits "track", "mechanic", "client", "telemetry", # allow metrics store to extract race meta-data "race", "source", ], ) # set root path (normally done by the main entry point) cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root()) if not msg.external: cfg.add(config.Scope.benchmark, "provisioning", "node.ids", msg.node_ids) cls = metrics.metrics_store_class(cfg) metrics_store = cls(cfg) metrics_store.open(ctx=msg.open_metrics_context) # avoid follow-up errors in case we receive an unexpected ActorExitRequest due to an early failure in a parent actor. self.mechanic = create( cfg, metrics_store, msg.ip, msg.port, msg.all_node_ips, msg.all_node_ids, msg.sources, msg.distribution, msg.external, msg.docker, ) self.mechanic.start_engine() self.wakeupAfter(METRIC_FLUSH_INTERVAL_SECONDS) self.send(getattr(msg, "reply_to", sender), NodesStarted()) except Exception: self.logger.exception("Cannot process message [%s]", msg) # avoid "can't pickle traceback objects" _, ex_value, _ = sys.exc_info() self.send(getattr(msg, "reply_to", sender), actor.BenchmarkFailure(ex_value, traceback.format_exc()))
def main(): check_python_version() log.install_default_log_config() log.configure_logging() logger = logging.getLogger(__name__) start = time.time() # Early init of console output so we start to show everything consistently. console.init(quiet=False) arg_parser = create_arg_parser() args = arg_parser.parse_args() console.init(quiet=args.quiet) console.println(BANNER) cfg = config.Config(config_name=args.configuration_name) sub_command = derive_sub_command(args, cfg) ensure_configuration_present(cfg, args, sub_command) if args.effective_start_date: cfg.add(config.Scope.application, "system", "time.start", args.effective_start_date) cfg.add(config.Scope.application, "system", "time.start.user_provided", True) else: cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.utcnow()) cfg.add(config.Scope.application, "system", "time.start.user_provided", False) cfg.add(config.Scope.applicationOverride, "system", "trial.id", str(uuid.uuid4())) cfg.add(config.Scope.applicationOverride, "system", "quiet.mode", args.quiet) cfg.add(config.Scope.applicationOverride, "system", "offline.mode", args.offline) # Local config per node cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root()) cfg.add(config.Scope.application, "node", "rally.cwd", os.getcwd()) cfg.add(config.Scope.applicationOverride, "mechanic", "source.revision", args.revision) if args.distribution_version: cfg.add(config.Scope.applicationOverride, "mechanic", "distribution.version", args.distribution_version) cfg.add(config.Scope.applicationOverride, "mechanic", "distribution.repository", args.distribution_repository) cfg.add(config.Scope.applicationOverride, "mechanic", "car.names", opts.csv_to_list(args.car)) if args.team_path: cfg.add(config.Scope.applicationOverride, "mechanic", "team.path", os.path.abspath(io.normalize_path(args.team_path))) cfg.add(config.Scope.applicationOverride, "mechanic", "repository.name", None) else: cfg.add(config.Scope.applicationOverride, "mechanic", "repository.name", args.team_repository) cfg.add(config.Scope.applicationOverride, "mechanic", "car.plugins", opts.csv_to_list(args.elasticsearch_plugins)) cfg.add(config.Scope.applicationOverride, "mechanic", "car.params", opts.to_dict(args.car_params)) cfg.add(config.Scope.applicationOverride, "mechanic", "plugin.params", opts.to_dict(args.plugin_params)) if args.keep_cluster_running: cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running", True) # force-preserve the cluster nodes. cfg.add(config.Scope.applicationOverride, "mechanic", "preserve.install", True) else: cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running", False) cfg.add(config.Scope.applicationOverride, "mechanic", "preserve.install", convert.to_bool(args.preserve_install)) cfg.add(config.Scope.applicationOverride, "mechanic", "runtime.jdk", args.runtime_jdk) cfg.add(config.Scope.applicationOverride, "mechanic", "telemetry.devices", opts.csv_to_list(args.telemetry)) cfg.add(config.Scope.applicationOverride, "mechanic", "telemetry.params", opts.to_dict(args.telemetry_params)) cfg.add(config.Scope.applicationOverride, "race", "pipeline", args.pipeline) cfg.add(config.Scope.applicationOverride, "race", "laps", args.laps) cfg.add(config.Scope.applicationOverride, "race", "user.tag", args.user_tag) # We can assume here that if a track-path is given, the user did not specify a repository either (although argparse sets it to # its default value) if args.track_path: cfg.add(config.Scope.applicationOverride, "track", "track.path", os.path.abspath(io.normalize_path(args.track_path))) cfg.add(config.Scope.applicationOverride, "track", "repository.name", None) if args.track: # stay as close as possible to argparse errors although we have a custom validation. arg_parser.error( "argument --track not allowed with argument --track-path") # cfg.add(config.Scope.applicationOverride, "track", "track.name", None) else: # cfg.add(config.Scope.applicationOverride, "track", "track.path", None) cfg.add(config.Scope.applicationOverride, "track", "repository.name", args.track_repository) # set the default programmatically because we need to determine whether the user has provided a value chosen_track = args.track if args.track else "geonames" cfg.add(config.Scope.applicationOverride, "track", "track.name", chosen_track) cfg.add(config.Scope.applicationOverride, "track", "params", opts.to_dict(args.track_params)) cfg.add(config.Scope.applicationOverride, "track", "challenge.name", args.challenge) cfg.add(config.Scope.applicationOverride, "track", "include.tasks", opts.csv_to_list(args.include_tasks)) cfg.add(config.Scope.applicationOverride, "track", "test.mode.enabled", args.test_mode) cfg.add(config.Scope.applicationOverride, "reporting", "format", args.report_format) cfg.add(config.Scope.applicationOverride, "reporting", "values", args.show_in_report) cfg.add(config.Scope.applicationOverride, "reporting", "output.path", args.report_file) if sub_command == "compare": cfg.add(config.Scope.applicationOverride, "reporting", "baseline.timestamp", args.baseline) cfg.add(config.Scope.applicationOverride, "reporting", "contender.timestamp", args.contender) if sub_command == "generate": cfg.add(config.Scope.applicationOverride, "generator", "chart.type", args.chart_type) cfg.add(config.Scope.applicationOverride, "generator", "output.path", args.output_path) if args.chart_spec_path and (args.track or args.challenge or args.car or args.node_count): console.println( "You need to specify either --chart-spec-path or --track, --challenge, --car and " "--node-count but not both.") exit(1) if args.chart_spec_path: cfg.add(config.Scope.applicationOverride, "generator", "chart.spec.path", args.chart_spec_path) else: # other options are stored elsewhere already cfg.add(config.Scope.applicationOverride, "generator", "node.count", args.node_count) cfg.add(config.Scope.applicationOverride, "driver", "profiling", args.enable_driver_profiling) cfg.add(config.Scope.applicationOverride, "driver", "on.error", args.on_error) cfg.add(config.Scope.applicationOverride, "driver", "load_driver_hosts", opts.csv_to_list(args.load_driver_hosts)) if sub_command != "list": # Also needed by mechanic (-> telemetry) - duplicate by module? target_hosts = opts.TargetHosts(args.target_hosts) cfg.add(config.Scope.applicationOverride, "client", "hosts", target_hosts) client_options = opts.ClientOptions(args.client_options, target_hosts=target_hosts) cfg.add(config.Scope.applicationOverride, "client", "options", client_options) if "timeout" not in client_options.default: console.info( "You did not provide an explicit timeout in the client options. Assuming default of 10 seconds." ) if list(target_hosts.all_hosts) != list( client_options.all_client_options): console.println( "--target-hosts and --client-options must define the same keys for multi cluster setups." ) exit(1) # split by component? if sub_command == "list": cfg.add(config.Scope.applicationOverride, "system", "list.config.option", args.configuration) cfg.add(config.Scope.applicationOverride, "system", "list.races.max_results", args.limit) logger.info("OS [%s]", str(os.uname())) logger.info("Python [%s]", str(sys.implementation)) logger.info("Rally version [%s]", version.version()) logger.info("Command line arguments: %s", args) # Configure networking net.init() if not args.offline: if not net.has_internet_connection(): console.warn( "No Internet connection detected. Automatic download of track data sets etc. is disabled.", logger=logger) cfg.add(config.Scope.applicationOverride, "system", "offline.mode", True) else: logger.info("Detected a working Internet connection.") success = dispatch_sub_command(cfg, sub_command) end = time.time() if success: console.println("") console.info("SUCCESS (took %d seconds)" % (end - start), overline="-", underline="-") else: console.println("") console.info("FAILURE (took %d seconds)" % (end - start), overline="-", underline="-") sys.exit(64)
def receiveMessage(self, msg, sender): # at the moment, we implement all message handling blocking. This is not ideal but simple to get started with. Besides, the caller # needs to block anyway. The only reason we implement mechanic as an actor is to distribute them. # noinspection PyBroadException try: logger.debug( "NodeMechanicActor#receiveMessage(msg = [%s] sender = [%s])" % (str(type(msg)), str(sender))) if isinstance(msg, StartNodes): self.host = msg.ip if msg.external: logger.info( "Connecting to externally provisioned nodes on [%s]." % msg.ip) else: logger.info("Starting node(s) %s on [%s]." % (msg.node_ids, msg.ip)) # Load node-specific configuration self.config = config.auto_load_local_config( msg.cfg, additional_sections=[ # only copy the relevant bits "track", "mechanic", "client", # allow metrics store to extract race meta-data "race", "source" ]) # set root path (normally done by the main entry point) self.config.add(config.Scope.application, "node", "rally.root", paths.rally_root()) if not msg.external: self.config.add(config.Scope.benchmark, "provisioning", "node.ip", msg.ip) # we need to override the port with the value that the user has specified instead of using the default value (39200) self.config.add(config.Scope.benchmark, "provisioning", "node.http.port", msg.port) self.config.add(config.Scope.benchmark, "provisioning", "node.ids", msg.node_ids) self.metrics_store = metrics.InMemoryMetricsStore(self.config) self.metrics_store.open(ctx=msg.open_metrics_context) # avoid follow-up errors in case we receive an unexpected ActorExitRequest due to an early failure in a parent actor. self.metrics_store.lap = 0 self.mechanic = create(self.config, self.metrics_store, msg.all_node_ips, msg.cluster_settings, msg.sources, msg.build, msg.distribution, msg.external, msg.docker) nodes = self.mechanic.start_engine() self.running = True self.send( sender, NodesStarted([NodeMetaInfo(node) for node in nodes], self.metrics_store.meta_info)) elif isinstance(msg, ApplyMetricsMetaInfo): self.metrics_store.merge_meta_info(msg.meta_info) self.send(sender, MetricsMetaInfoApplied()) elif isinstance(msg, ResetRelativeTime): logger.info( "Resetting relative time of system metrics store on host [%s]." % self.host) self.metrics_store.reset_relative_time() elif isinstance(msg, OnBenchmarkStart): self.metrics_store.lap = msg.lap self.mechanic.on_benchmark_start() self.send(sender, BenchmarkStarted()) elif isinstance(msg, OnBenchmarkStop): self.mechanic.on_benchmark_stop() # clear metrics store data to not send duplicate system metrics data self.send( sender, BenchmarkStopped( self.metrics_store.to_externalizable(clear=True))) elif isinstance(msg, StopNodes): logger.info("Stopping nodes %s." % self.mechanic.nodes) self.mechanic.stop_engine() self.send(sender, NodesStopped(self.metrics_store.to_externalizable())) # clear all state as the mechanic might get reused later self.running = False self.config = None self.mechanic = None self.metrics_store = None elif isinstance(msg, thespian.actors.ActorExitRequest): if self.running: logger.info("Stopping nodes %s (due to ActorExitRequest)" % self.mechanic.nodes) self.mechanic.stop_engine() self.running = False except BaseException: self.running = False logger.exception("Cannot process message [%s]" % msg) # avoid "can't pickle traceback objects" import traceback ex_type, ex_value, ex_traceback = sys.exc_info() self.send(sender, actor.BenchmarkFailure(ex_value, traceback.format_exc()))
def main(): check_python_version() start = time.time() # Early init of console output so we start to show everything consistently. console.init(quiet=False) # allow to see a thread-dump on SIGQUIT faulthandler.register(signal.SIGQUIT, file=sys.stderr) pre_configure_logging() args = parse_args() console.init(quiet=args.quiet) console.println(BANNER) cfg = config.Config(config_name=args.configuration_name) sub_command = derive_sub_command(args, cfg) ensure_configuration_present(cfg, args, sub_command) if args.effective_start_date: cfg.add(config.Scope.application, "system", "time.start", args.effective_start_date) cfg.add(config.Scope.application, "system", "time.start.user_provided", True) else: cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.utcnow()) cfg.add(config.Scope.application, "system", "time.start.user_provided", False) cfg.add(config.Scope.applicationOverride, "system", "quiet.mode", args.quiet) # per node? cfg.add(config.Scope.applicationOverride, "system", "offline.mode", args.offline) cfg.add(config.Scope.applicationOverride, "system", "logging.output", args.logging) # only temporary to ignore unknown actor messages cfg.add(config.Scope.applicationOverride, "system", "ignore.unknown.return", args.ignore_unknown_return_values) # Local config per node cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root()) cfg.add(config.Scope.application, "node", "rally.cwd", os.getcwd()) cfg.add(config.Scope.applicationOverride, "mechanic", "source.revision", args.revision) #TODO dm: Consider renaming this one. It's used by different modules if args.distribution_version: cfg.add(config.Scope.applicationOverride, "mechanic", "distribution.version", args.distribution_version) cfg.add(config.Scope.applicationOverride, "mechanic", "distribution.repository", args.distribution_repository) cfg.add(config.Scope.applicationOverride, "mechanic", "repository.name", args.team_repository) cfg.add(config.Scope.applicationOverride, "mechanic", "car.name", args.car) cfg.add(config.Scope.applicationOverride, "mechanic", "car.plugins", csv_to_list(args.elasticsearch_plugins)) cfg.add(config.Scope.applicationOverride, "mechanic", "node.datapaths", csv_to_list(args.data_paths)) cfg.add(config.Scope.applicationOverride, "mechanic", "preserve.install", convert.to_bool(args.preserve_install)) cfg.add(config.Scope.applicationOverride, "mechanic", "telemetry.devices", csv_to_list(args.telemetry)) if args.override_src_dir is not None: cfg.add(config.Scope.applicationOverride, "source", "local.src.dir", args.override_src_dir) cfg.add(config.Scope.applicationOverride, "race", "pipeline", args.pipeline) cfg.add(config.Scope.applicationOverride, "race", "laps", args.laps) cfg.add(config.Scope.applicationOverride, "race", "user.tag", args.user_tag) cfg.add(config.Scope.applicationOverride, "track", "repository.name", args.track_repository) cfg.add(config.Scope.applicationOverride, "track", "track.name", args.track) cfg.add(config.Scope.applicationOverride, "track", "challenge.name", args.challenge) cfg.add(config.Scope.applicationOverride, "track", "test.mode.enabled", args.test_mode) cfg.add(config.Scope.applicationOverride, "track", "auto_manage_indices", to_bool(args.auto_manage_indices)) cfg.add(config.Scope.applicationOverride, "reporting", "format", args.report_format) cfg.add(config.Scope.applicationOverride, "reporting", "output.path", args.report_file) if sub_command == "compare": cfg.add(config.Scope.applicationOverride, "reporting", "baseline.timestamp", args.baseline) cfg.add(config.Scope.applicationOverride, "reporting", "contender.timestamp", args.contender) ################################ # new section name: driver ################################ cfg.add(config.Scope.applicationOverride, "benchmarks", "cluster.health", args.cluster_health) cfg.add(config.Scope.applicationOverride, "driver", "profiling", args.enable_driver_profiling) if sub_command != "list": # Also needed by mechanic (-> telemetry) - duplicate by module? cfg.add(config.Scope.applicationOverride, "client", "hosts", _normalize_hosts(csv_to_list(args.target_hosts))) client_options = kv_to_map(csv_to_list(args.client_options)) cfg.add(config.Scope.applicationOverride, "client", "options", client_options) if "timeout" not in client_options: console.info( "You did not provide an explicit timeout in the client options. Assuming default of 10 seconds." ) # split by component? if sub_command == "list": cfg.add(config.Scope.applicationOverride, "system", "list.config.option", args.configuration) cfg.add(config.Scope.applicationOverride, "system", "list.races.max_results", args.limit) configure_logging(cfg) logger.info("OS [%s]" % str(os.uname())) logger.info("Python [%s]" % str(sys.implementation)) logger.info("Rally version [%s]" % version.version()) logger.info("Command line arguments: %s" % args) # Configure networking net.init() if not args.offline: if not net.has_internet_connection(): console.warn( "No Internet connection detected. Automatic download of track data sets etc. is disabled.", logger=logger) cfg.add(config.Scope.applicationOverride, "system", "offline.mode", True) else: logger.info("Detected a working Internet connection.") # Kill any lingering Rally processes before attempting to continue - the actor system needs to be a singleton on this machine # noinspection PyBroadException try: process.kill_running_rally_instances() except BaseException: logger.exception( "Could not terminate potentially running Rally instances correctly. Attempting to go on anyway." ) success = dispatch_sub_command(cfg, sub_command) end = time.time() if success: console.println("") console.info("SUCCESS (took %d seconds)" % (end - start), overline="-", underline="-") else: console.println("") console.info("FAILURE (took %d seconds)" % (end - start), overline="-", underline="-") sys.exit(64)
def receiveMessage(self, msg, sender): # at the moment, we implement all message handling blocking. This is not ideal but simple to get started with. Besides, the caller # needs to block anyway. The only reason we implement mechanic as an actor is to distribute them. # noinspection PyBroadException try: logger.debug( "NodeMechanicActor#receiveMessage(msg = [%s] sender = [%s])" % (str(type(msg)), str(sender))) if isinstance(msg, StartEngine): logger.info("Starting engine") # Load node-specific configuration self.config = config.Config(config_name=msg.cfg.name) self.config.load_config() self.config.add(config.Scope.application, "node", "rally.root", paths.rally_root()) # copy only the necessary configuration sections self.config.add_all(msg.cfg, "system") self.config.add_all(msg.cfg, "client") self.config.add_all(msg.cfg, "track") self.config.add_all(msg.cfg, "mechanic") if msg.port is not None: # we need to override the port with the value that the user has specified instead of using the default value (39200) self.config.add(config.Scope.benchmark, "provisioning", "node.http.port", msg.port) self.metrics_store = metrics.InMemoryMetricsStore(self.config) self.metrics_store.open(ctx=msg.open_metrics_context) self.mechanic = create(self.config, self.metrics_store, self.single_machine, msg.sources, msg.build, msg.distribution, msg.external, msg.docker) cluster = self.mechanic.start_engine() self.send( sender, EngineStarted( ClusterMetaInfo(cluster.hosts, cluster.source_revision, cluster.distribution_version), self.metrics_store.meta_info)) elif isinstance(msg, OnBenchmarkStart): self.metrics_store.lap = msg.lap self.mechanic.on_benchmark_start() self.send(sender, Success()) elif isinstance(msg, OnBenchmarkStop): self.mechanic.on_benchmark_stop() # clear metrics store data to not send duplicate system metrics data self.send( sender, BenchmarkStopped( self.metrics_store.to_externalizable(clear=True))) elif isinstance(msg, StopEngine): logger.info("Stopping engine") self.mechanic.stop_engine() self.send( sender, EngineStopped(self.metrics_store.to_externalizable())) # clear all state as the mechanic might get reused later self.config = None self.mechanic = None self.metrics_store = None except BaseException: logger.exception("Cannot process message [%s]" % msg) # avoid "can't pickle traceback objects" import traceback ex_type, ex_value, ex_traceback = sys.exc_info() self.send(sender, Failure(ex_value, traceback.format_exc()))
def receiveMessage(self, msg, sender): # at the moment, we implement all message handling blocking. This is not ideal but simple to get started with. Besides, the caller # needs to block anyway. The only reason we implement mechanic as an actor is to distribute them. # noinspection PyBroadException try: logger.debug("NodeMechanicActor#receiveMessage(msg = [%s] sender = [%s])" % (str(type(msg)), str(sender))) if isinstance(msg, StartNodes): self.host = msg.ip if msg.external: logger.info("Connecting to externally provisioned nodes on [%s]." % msg.ip) else: logger.info("Starting node(s) %s on [%s]." % (msg.node_ids, msg.ip)) # Load node-specific configuration self.config = config.auto_load_local_config(msg.cfg, additional_sections=[ # only copy the relevant bits "track", "mechanic", "client", # allow metrics store to extract race meta-data "race", "source" ]) # set root path (normally done by the main entry point) self.config.add(config.Scope.application, "node", "rally.root", paths.rally_root()) if not msg.external: self.config.add(config.Scope.benchmark, "provisioning", "node.ip", msg.ip) # we need to override the port with the value that the user has specified instead of using the default value (39200) self.config.add(config.Scope.benchmark, "provisioning", "node.http.port", msg.port) self.config.add(config.Scope.benchmark, "provisioning", "node.ids", msg.node_ids) self.metrics_store = metrics.InMemoryMetricsStore(self.config) self.metrics_store.open(ctx=msg.open_metrics_context) # avoid follow-up errors in case we receive an unexpected ActorExitRequest due to an early failure in a parent actor. self.metrics_store.lap = 0 self.mechanic = create(self.config, self.metrics_store, msg.all_node_ips, msg.cluster_settings, msg.sources, msg.build, msg.distribution, msg.external, msg.docker) nodes = self.mechanic.start_engine() self.running = True self.send(sender, NodesStarted([NodeMetaInfo(node) for node in nodes], self.metrics_store.meta_info)) elif isinstance(msg, ApplyMetricsMetaInfo): self.metrics_store.merge_meta_info(msg.meta_info) self.send(sender, MetricsMetaInfoApplied()) elif isinstance(msg, ResetRelativeTime): logger.info("Resetting relative time of system metrics store on host [%s]." % self.host) self.metrics_store.reset_relative_time() elif isinstance(msg, OnBenchmarkStart): self.metrics_store.lap = msg.lap self.mechanic.on_benchmark_start() self.send(sender, BenchmarkStarted()) elif isinstance(msg, OnBenchmarkStop): self.mechanic.on_benchmark_stop() # clear metrics store data to not send duplicate system metrics data self.send(sender, BenchmarkStopped(self.metrics_store.to_externalizable(clear=True))) elif isinstance(msg, StopNodes): logger.info("Stopping nodes %s." % self.mechanic.nodes) self.mechanic.stop_engine() self.send(sender, NodesStopped(self.metrics_store.to_externalizable())) # clear all state as the mechanic might get reused later self.running = False self.config = None self.mechanic = None self.metrics_store = None elif isinstance(msg, thespian.actors.ActorExitRequest): if self.running: logger.info("Stopping nodes %s (due to ActorExitRequest)" % self.mechanic.nodes) self.mechanic.stop_engine() self.running = False except BaseException: self.running = False logger.exception("Cannot process message [%s]" % msg) # avoid "can't pickle traceback objects" import traceback ex_type, ex_value, ex_traceback = sys.exc_info() self.send(sender, actor.BenchmarkFailure(ex_value, traceback.format_exc()))
def main(): check_python_version() start = time.time() # Early init of console output so we start to show everything consistently. console.init(quiet=False) # allow to see a thread-dump on SIGQUIT faulthandler.register(signal.SIGQUIT, file=sys.stderr) pre_configure_logging() arg_parser = create_arg_parser() args = arg_parser.parse_args() console.init(quiet=args.quiet) console.println(BANNER) cfg = config.Config(config_name=args.configuration_name) sub_command = derive_sub_command(args, cfg) ensure_configuration_present(cfg, args, sub_command) if args.effective_start_date: cfg.add(config.Scope.application, "system", "time.start", args.effective_start_date) cfg.add(config.Scope.application, "system", "time.start.user_provided", True) else: cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.utcnow()) cfg.add(config.Scope.application, "system", "time.start.user_provided", False) cfg.add(config.Scope.applicationOverride, "system", "quiet.mode", args.quiet) # per node? cfg.add(config.Scope.applicationOverride, "system", "offline.mode", args.offline) cfg.add(config.Scope.applicationOverride, "system", "logging.output", args.logging) # Local config per node cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root()) cfg.add(config.Scope.application, "node", "rally.cwd", os.getcwd()) cfg.add(config.Scope.applicationOverride, "mechanic", "source.revision", args.revision) if args.distribution_version: cfg.add(config.Scope.applicationOverride, "mechanic", "distribution.version", args.distribution_version) cfg.add(config.Scope.applicationOverride, "mechanic", "distribution.repository", args.distribution_repository) cfg.add(config.Scope.applicationOverride, "mechanic", "repository.name", args.team_repository) cfg.add(config.Scope.applicationOverride, "mechanic", "car.names", csv_to_list(args.car)) cfg.add(config.Scope.applicationOverride, "mechanic", "car.plugins", csv_to_list(args.elasticsearch_plugins)) cfg.add(config.Scope.applicationOverride, "mechanic", "node.datapaths", csv_to_list(args.data_paths)) if args.keep_cluster_running: cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running", True) # force-preserve the cluster nodes. cfg.add(config.Scope.applicationOverride, "mechanic", "preserve.install", True) else: cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running", False) cfg.add(config.Scope.applicationOverride, "mechanic", "preserve.install", convert.to_bool(args.preserve_install)) cfg.add(config.Scope.applicationOverride, "mechanic", "telemetry.devices", csv_to_list(args.telemetry)) cfg.add(config.Scope.applicationOverride, "race", "pipeline", args.pipeline) cfg.add(config.Scope.applicationOverride, "race", "laps", args.laps) cfg.add(config.Scope.applicationOverride, "race", "user.tag", args.user_tag) # We can assume here that if a track-path is given, the user did not specify a repository either (although argparse sets it to # its default value) if args.track_path: cfg.add(config.Scope.applicationOverride, "track", "track.path", os.path.abspath(io.normalize_path(args.track_path))) cfg.add(config.Scope.applicationOverride, "track", "repository.name", None) if args.track: # stay as close as possible to argparse errors although we have a custom validation. arg_parser.error( "argument --track not allowed with argument --track-path") # cfg.add(config.Scope.applicationOverride, "track", "track.name", None) else: # cfg.add(config.Scope.applicationOverride, "track", "track.path", None) cfg.add(config.Scope.applicationOverride, "track", "repository.name", args.track_repository) # set the default programmatically because we need to determine whether the user has provided a value chosen_track = args.track if args.track else "geonames" cfg.add(config.Scope.applicationOverride, "track", "track.name", chosen_track) cfg.add(config.Scope.applicationOverride, "track", "params", kv_to_map(csv_to_list(args.track_params))) cfg.add(config.Scope.applicationOverride, "track", "challenge.name", args.challenge) cfg.add(config.Scope.applicationOverride, "track", "include.tasks", csv_to_list(args.include_tasks)) cfg.add(config.Scope.applicationOverride, "track", "test.mode.enabled", args.test_mode) cfg.add(config.Scope.applicationOverride, "track", "auto_manage_indices", to_bool(args.auto_manage_indices)) cfg.add(config.Scope.applicationOverride, "reporting", "format", args.report_format) cfg.add(config.Scope.applicationOverride, "reporting", "values", args.show_in_report) cfg.add(config.Scope.applicationOverride, "reporting", "output.path", args.report_file) if sub_command == "compare": cfg.add(config.Scope.applicationOverride, "reporting", "baseline.timestamp", args.baseline) cfg.add(config.Scope.applicationOverride, "reporting", "contender.timestamp", args.contender) cfg.add(config.Scope.applicationOverride, "driver", "cluster.health", args.cluster_health) if args.cluster_health != "green": console.warn( "--cluster-health is deprecated and will be removed in a future version of Rally." ) cfg.add(config.Scope.applicationOverride, "driver", "profiling", args.enable_driver_profiling) cfg.add(config.Scope.applicationOverride, "driver", "on.error", args.on_error) cfg.add(config.Scope.applicationOverride, "driver", "load_driver_hosts", csv_to_list(args.load_driver_hosts)) if sub_command != "list": # Also needed by mechanic (-> telemetry) - duplicate by module? cfg.add(config.Scope.applicationOverride, "client", "hosts", _normalize_hosts(csv_to_list(args.target_hosts))) client_options = kv_to_map(csv_to_list(args.client_options)) cfg.add(config.Scope.applicationOverride, "client", "options", client_options) if "timeout" not in client_options: console.info( "You did not provide an explicit timeout in the client options. Assuming default of 10 seconds." ) # split by component? if sub_command == "list": cfg.add(config.Scope.applicationOverride, "system", "list.config.option", args.configuration) cfg.add(config.Scope.applicationOverride, "system", "list.races.max_results", args.limit) configure_logging(cfg) logger.info("OS [%s]" % str(os.uname())) logger.info("Python [%s]" % str(sys.implementation)) logger.info("Rally version [%s]" % version.version()) logger.info("Command line arguments: %s" % args) # Configure networking net.init() if not args.offline: if not net.has_internet_connection(): console.warn( "No Internet connection detected. Automatic download of track data sets etc. is disabled.", logger=logger) cfg.add(config.Scope.applicationOverride, "system", "offline.mode", True) else: logger.info("Detected a working Internet connection.") # Kill any lingering Rally processes before attempting to continue - the actor system needs to be a singleton on this machine # noinspection PyBroadException try: process.kill_running_rally_instances() except BaseException: logger.exception( "Could not terminate potentially running Rally instances correctly. Attempting to go on anyway." ) success = dispatch_sub_command(cfg, sub_command) end = time.time() if success: console.println("") console.info("SUCCESS (took %d seconds)" % (end - start), overline="-", underline="-") else: console.println("") console.info("FAILURE (took %d seconds)" % (end - start), overline="-", underline="-") sys.exit(64)
def main(): check_python_version() start = time.time() # Early init of console output so we start to show everything consistently. console.init(quiet=False) # allow to see a thread-dump on SIGQUIT faulthandler.register(signal.SIGQUIT, file=sys.stderr) pre_configure_logging() args = parse_args() console.init(quiet=args.quiet) console.println(BANNER) cfg = config.Config(config_name=args.configuration_name) sub_command = derive_sub_command(args, cfg) ensure_configuration_present(cfg, args, sub_command) if args.effective_start_date: cfg.add(config.Scope.application, "system", "time.start", args.effective_start_date) cfg.add(config.Scope.application, "system", "time.start.user_provided", True) else: cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.utcnow()) cfg.add(config.Scope.application, "system", "time.start.user_provided", False) cfg.add(config.Scope.applicationOverride, "system", "quiet.mode", args.quiet) # per node? cfg.add(config.Scope.applicationOverride, "system", "offline.mode", args.offline) cfg.add(config.Scope.applicationOverride, "system", "logging.output", args.logging) # Local config per node cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root()) cfg.add(config.Scope.application, "node", "rally.cwd", os.getcwd()) cfg.add(config.Scope.applicationOverride, "mechanic", "source.revision", args.revision) #TODO dm: Consider renaming this one. It's used by different modules if args.distribution_version: cfg.add(config.Scope.applicationOverride, "mechanic", "distribution.version", args.distribution_version) cfg.add(config.Scope.applicationOverride, "mechanic", "distribution.repository", args.distribution_repository) cfg.add(config.Scope.applicationOverride, "mechanic", "repository.name", args.team_repository) cfg.add(config.Scope.applicationOverride, "mechanic", "car.names", csv_to_list(args.car)) cfg.add(config.Scope.applicationOverride, "mechanic", "car.plugins", csv_to_list(args.elasticsearch_plugins)) cfg.add(config.Scope.applicationOverride, "mechanic", "node.datapaths", csv_to_list(args.data_paths)) if args.keep_cluster_running: cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running", True) # force-preserve the cluster nodes. cfg.add(config.Scope.applicationOverride, "mechanic", "preserve.install", True) else: cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running", False) cfg.add(config.Scope.applicationOverride, "mechanic", "preserve.install", convert.to_bool(args.preserve_install)) cfg.add(config.Scope.applicationOverride, "mechanic", "telemetry.devices", csv_to_list(args.telemetry)) cfg.add(config.Scope.applicationOverride, "race", "pipeline", args.pipeline) cfg.add(config.Scope.applicationOverride, "race", "laps", args.laps) cfg.add(config.Scope.applicationOverride, "race", "user.tag", args.user_tag) cfg.add(config.Scope.applicationOverride, "track", "repository.name", args.track_repository) cfg.add(config.Scope.applicationOverride, "track", "track.name", args.track) cfg.add(config.Scope.applicationOverride, "track", "challenge.name", args.challenge) cfg.add(config.Scope.applicationOverride, "track", "include.tasks", csv_to_list(args.include_tasks)) cfg.add(config.Scope.applicationOverride, "track", "test.mode.enabled", args.test_mode) cfg.add(config.Scope.applicationOverride, "track", "auto_manage_indices", to_bool(args.auto_manage_indices)) cfg.add(config.Scope.applicationOverride, "reporting", "format", args.report_format) cfg.add(config.Scope.applicationOverride, "reporting", "output.path", args.report_file) if sub_command == "compare": cfg.add(config.Scope.applicationOverride, "reporting", "baseline.timestamp", args.baseline) cfg.add(config.Scope.applicationOverride, "reporting", "contender.timestamp", args.contender) ################################ # new section name: driver ################################ cfg.add(config.Scope.applicationOverride, "driver", "cluster.health", args.cluster_health) cfg.add(config.Scope.applicationOverride, "driver", "profiling", args.enable_driver_profiling) cfg.add(config.Scope.applicationOverride, "driver", "load_driver_hosts", csv_to_list(args.load_driver_hosts)) if sub_command != "list": # Also needed by mechanic (-> telemetry) - duplicate by module? cfg.add(config.Scope.applicationOverride, "client", "hosts", _normalize_hosts(csv_to_list(args.target_hosts))) client_options = kv_to_map(csv_to_list(args.client_options)) cfg.add(config.Scope.applicationOverride, "client", "options", client_options) if "timeout" not in client_options: console.info("You did not provide an explicit timeout in the client options. Assuming default of 10 seconds.") # split by component? if sub_command == "list": cfg.add(config.Scope.applicationOverride, "system", "list.config.option", args.configuration) cfg.add(config.Scope.applicationOverride, "system", "list.races.max_results", args.limit) configure_logging(cfg) logger.info("OS [%s]" % str(os.uname())) logger.info("Python [%s]" % str(sys.implementation)) logger.info("Rally version [%s]" % version.version()) logger.info("Command line arguments: %s" % args) # Configure networking net.init() if not args.offline: if not net.has_internet_connection(): console.warn("No Internet connection detected. Automatic download of track data sets etc. is disabled.", logger=logger) cfg.add(config.Scope.applicationOverride, "system", "offline.mode", True) else: logger.info("Detected a working Internet connection.") # Kill any lingering Rally processes before attempting to continue - the actor system needs to be a singleton on this machine # noinspection PyBroadException try: process.kill_running_rally_instances() except BaseException: logger.exception("Could not terminate potentially running Rally instances correctly. Attempting to go on anyway.") success = dispatch_sub_command(cfg, sub_command) end = time.time() if success: console.println("") console.info("SUCCESS (took %d seconds)" % (end - start), overline="-", underline="-") else: console.println("") console.info("FAILURE (took %d seconds)" % (end - start), overline="-", underline="-") sys.exit(64)