def test_pass_java_opts(self): cfg = config.Config() cfg.add(config.Scope.application, "mechanic", "keep.running", False) cfg.add(config.Scope.application, "system", "env.name", "test") cfg.add(config.Scope.application, "system", "passenv", "ES_JAVA_OPTS") os.environ["ES_JAVA_OPTS"] = "-XX:-someJunk" proc_launcher = launcher.ProcessLauncher(cfg) t = telemetry.Telemetry() # no JAVA_HOME -> use the bundled JDK env = proc_launcher._prepare_env(node_name="node0", java_home=None, t=t) # unmodified self.assertEqual(os.environ["ES_JAVA_OPTS"], env["ES_JAVA_OPTS"])
def test_bundled_jdk_not_in_path(self): cfg = config.Config() cfg.add(config.Scope.application, "mechanic", "keep.running", False) cfg.add(config.Scope.application, "system", "env.name", "test") os.environ["JAVA_HOME"] = "/path/to/java" proc_launcher = launcher.ProcessLauncher(cfg) t = telemetry.Telemetry() # no JAVA_HOME -> use the bundled JDK env = proc_launcher._prepare_env(node_name="node0", java_home=None, t=t) # unmodified self.assertEqual(os.environ["PATH"], env["PATH"]) self.assertIsNone(env.get("JAVA_HOME"))
def test_stops_container_when_no_metrics_store_is_provided( self, run_subprocess_with_logging, add_metadata_for_node): cfg = config.Config() metrics_store = None docker = launcher.DockerLauncher(cfg) nodes = [ cluster.Node(0, "/bin", "127.0.0.1", "testnode", telemetry.Telemetry()) ] docker.stop(nodes, metrics_store=metrics_store) self.assertEqual(0, add_metadata_for_node.call_count) run_subprocess_with_logging.assert_called_once_with( "docker-compose -f /bin/docker-compose.yml down")
def test_env_options_order(self, sleep): cfg = config.Config() cfg.add(config.Scope.application, "mechanic", "keep.running", False) cfg.add(config.Scope.application, "system", "env.name", "test") proc_launcher = launcher.ProcessLauncher(cfg) node_telemetry = [ telemetry.FlightRecorder(telemetry_params={}, log_root="/tmp/telemetry", java_major_version=8) ] t = telemetry.Telemetry(["jfr"], devices=node_telemetry) env = proc_launcher._prepare_env(car_env={}, node_name="node0", java_home="/java_home", t=t) self.assertEqual("/java_home/bin" + os.pathsep + os.environ["PATH"], env["PATH"]) self.assertEqual("-XX:+ExitOnOutOfMemoryError -XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints " "-XX:+UnlockCommercialFeatures -XX:+FlightRecorder " "-XX:FlightRecorderOptions=disk=true,maxage=0s,maxsize=0,dumponexit=true,dumponexitpath=/tmp/telemetry/profile.jfr " # pylint: disable=line-too-long "-XX:StartFlightRecording=defaultrecording=true", env["ES_JAVA_OPTS"])
def start(self, node_configurations): nodes = [] for node_configuration in node_configurations: node_name = node_configuration.node_name host_name = node_configuration.ip binary_path = node_configuration.binary_path self.binary_paths[node_name] = binary_path self._start_process(binary_path) node_telemetry = [ # Don't attach any telemetry devices for now but keep the infrastructure in place ] t = telemetry.Telemetry(devices=node_telemetry) telemetry.add_metadata_for_node(self.metrics_store, node_name, host_name) node = cluster.Node(0, host_name, node_name, t) t.attach_to_node(node) nodes.append(node) return nodes
def _start_node(self, node_configuration, node_count_on_host): host_name = node_configuration.ip node_name = node_configuration.node_name binary_path = node_configuration.binary_path data_paths = node_configuration.data_paths node_telemetry_dir = os.path.join(node_configuration.node_root_path, "telemetry") java_major_version, java_home = java_resolver.java_home( node_configuration.car_runtime_jdks, self.cfg.opts("mechanic", "runtime.jdk"), node_configuration.car_provides_bundled_jdk) self.logger.info("Starting node [%s].", node_name) enabled_devices = self.cfg.opts("telemetry", "devices") telemetry_params = self.cfg.opts("telemetry", "params") node_telemetry = [ telemetry.FlightRecorder(telemetry_params, node_telemetry_dir, java_major_version), telemetry.JitCompiler(node_telemetry_dir), telemetry.Gc(telemetry_params, node_telemetry_dir, java_major_version), telemetry.Heapdump(node_telemetry_dir), telemetry.DiskIo(node_count_on_host), telemetry.IndexSize(data_paths), telemetry.StartupTime(), ] t = telemetry.Telemetry(enabled_devices, devices=node_telemetry) # TODO #822: Remove reference to car's environment env = self._prepare_env(node_configuration.car_env, node_name, java_home, t) t.on_pre_node_start(node_name) node_pid = self._start_process(binary_path, env) self.logger.info("Successfully started node [%s] with PID [%s].", node_name, node_pid) node = cluster.Node(node_pid, binary_path, host_name, node_name, t) self.logger.info("Attaching telemetry devices to node [%s].", node_name) t.attach_to_node(node) return node
def start(self, metrics_store): configured_host_list = self.cfg.opts("launcher", "external.target.hosts") hosts = [] try: for authority in configured_host_list: host, port = authority.split(":") hosts.append({"host": host, "port": port}) except ValueError: msg = "Could not initialize external cluster. Invalid format for %s. Expected a comma-separated list of host:port pairs, " \ "e.g. host1:9200,host2:9200." % configured_host_list logger.exception(msg) raise exceptions.SystemSetupError(msg) t = telemetry.Telemetry( self.cfg, metrics_store, devices=[ telemetry.ExternalEnvironmentInfo(self.cfg, metrics_store), telemetry.NodeStats(self.cfg, metrics_store), telemetry.IndexStats(self.cfg, metrics_store) ]) c = self.cluster_factory.create( hosts, [], self.cfg.opts("launcher", "client.options"), metrics_store, t) user_defined_version = self.cfg.opts("source", "distribution.version", mandatory=False) distribution_version = c.info()["version"]["number"] if not user_defined_version or user_defined_version.strip() == "": logger.info( "Distribution version was not specified by user. Rally-determined version is [%s]" % distribution_version) self.cfg.add(config.Scope.benchmark, "source", "distribution.version", distribution_version) elif user_defined_version != distribution_version: logger.warn( "User specified version [%s] but cluster reports version [%s]." % (user_defined_version, distribution_version)) print( "Warning: Specified distribution version '%s' on the command line differs from version '%s' reported by the cluster." % (user_defined_version, distribution_version)) t.attach_to_cluster(c) return c
def start(self, track, setup, metrics_store): if self._servers: logger.warn( "There are still referenced servers on startup. Did the previous shutdown succeed?" ) num_nodes = setup.candidate.nodes first_http_port = self._config.opts("provisioning", "node.http.port") t = telemetry.Telemetry(self._config, metrics_store) c = cluster.Cluster([{ "host": "localhost", "port": first_http_port }], [ self._start_node(node, setup, metrics_store) for node in range(num_nodes) ], metrics_store) t.attach_to_cluster(c) return c
def start(self, car, metrics_store): if self._servers: logger.warn( "There are still referenced servers on startup. Did the previous shutdown succeed?" ) logger.info("Starting a cluster based on car [%s] with [%d] nodes." % (car, car.nodes)) first_http_port = self.cfg.opts("provisioning", "node.http.port") t = telemetry.Telemetry(self.cfg, metrics_store) c = self.cluster_factory.create([{ "host": "localhost", "port": first_http_port }], [ self._start_node(node, car, metrics_store) for node in range(car.nodes) ], self.cfg.opts("launcher", "client.options"), metrics_store, t) t.attach_to_cluster(c) return c
def _start_node(self, node_configuration, node_count_on_host): host_name = node_configuration.ip node_name = node_configuration.node_name car = node_configuration.car binary_path = node_configuration.binary_path data_paths = node_configuration.data_paths node_telemetry_dir = os.path.join(node_configuration.node_root_path, "telemetry") java_major_version, java_home = java_resolver.java_home(car, self.cfg) telemetry.add_metadata_for_node(self.metrics_store, node_name, host_name) self.logger.info("Starting node [%s] based on car [%s].", node_name, car) enabled_devices = self.cfg.opts("telemetry", "devices") telemetry_params = self.cfg.opts("telemetry", "params") node_telemetry = [ telemetry.FlightRecorder(telemetry_params, node_telemetry_dir, java_major_version), telemetry.JitCompiler(node_telemetry_dir), telemetry.Gc(node_telemetry_dir, java_major_version), telemetry.Heapdump(node_telemetry_dir), telemetry.DiskIo(self.metrics_store, node_count_on_host, node_telemetry_dir, node_name), telemetry.IndexSize(data_paths, self.metrics_store), telemetry.StartupTime(self.metrics_store), ] t = telemetry.Telemetry(enabled_devices, devices=node_telemetry) env = self._prepare_env(car, node_name, java_home, t) t.on_pre_node_start(node_name) node_pid = self._start_process(binary_path, env) node = cluster.Node(node_pid, host_name, node_name, t) self.logger.info("Attaching telemetry devices to node [%s].", node_name) t.attach_to_node(node) return node
def test_pass_env_vars(self): cfg = config.Config() cfg.add(config.Scope.application, "system", "env.name", "test") cfg.add(config.Scope.application, "system", "passenv", "JAVA_HOME,FOO1") os.environ["JAVA_HOME"] = "/path/to/java" os.environ["FOO1"] = "BAR1" proc_launcher = launcher.ProcessLauncher(cfg) t = telemetry.Telemetry() # no JAVA_HOME -> use the bundled JDK env = proc_launcher._prepare_env(node_name="node0", java_home=None, t=t) # unmodified self.assertEqual(os.environ["JAVA_HOME"], env["JAVA_HOME"]) self.assertEqual(os.environ["FOO1"], env["FOO1"]) self.assertEqual(env["ES_JAVA_OPTS"], "-XX:+ExitOnOutOfMemoryError")
def test_stops_container_successfully_with_metrics_store( self, run_subprocess_with_logging, add_metadata_for_node): cfg = config.Config() cfg.add(config.Scope.application, "system", "env.name", "test") metrics_store = get_metrics_store(cfg) docker = launcher.DockerLauncher(cfg) nodes = [ cluster.Node(0, "/bin", "127.0.0.1", "testnode", telemetry.Telemetry()) ] docker.stop(nodes, metrics_store=metrics_store) add_metadata_for_node.assert_called_once_with(metrics_store, "testnode", "127.0.0.1") run_subprocess_with_logging.assert_called_once_with( "docker-compose -f /bin/docker-compose.yml down")
def _list(self, ctx): what = ctx.config.opts("system", "list.config.option") if what == "telemetry": telemetry.Telemetry(ctx.config).list() elif what == "tracks": print("Available tracks:\n") for t in track.tracks.values(): print("* %s: %s" % (t.name, t.description)) print("\tTrack setups for this track:") for track_setup in t.track_setups: print("\t* %s" % track_setup.name) print("") elif what == "pipelines": print("Available pipelines:\n") for p in pipelines.values(): pipeline = p(ctx) print("* %s: %s" % (pipeline.name, pipeline.description)) else: raise exceptions.ImproperlyConfigured( "Cannot list unknown configuration option [%s]" % what)
def test_stores_cluster_level_metrics_on_attach( self, nodes_info, cluster_info, metrics_store_add_meta_info): nodes_info.return_value = { "nodes": { "FCFjozkeTiOpN-SI88YEcg": { "name": "rally0", "host": "127.0.0.1", "os": { "name": "Mac OS X", "version": "10.11.4", "available_processors": 8 }, "jvm": { "version": "1.8.0_74", "vm_vendor": "Oracle Corporation" } } } } cluster_info.return_value = {"version": {"build_hash": "abc123"}} cfg = self.create_config() metrics_store = metrics.EsMetricsStore(cfg) env_device = telemetry.EnvironmentInfo(cfg, metrics_store) t = telemetry.Telemetry(cfg, metrics_store, devices=[env_device]) t.attach_to_cluster( cluster.Cluster([{ "host": "::1:9200" }], [], {}, metrics_store, t, client_factory_class=MockClientFactory)) calls = [ mock.call(metrics.MetaInfoScope.cluster, None, "source_revision", "abc123"), mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_vendor", "Oracle Corporation"), mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_version", "1.8.0_74") ] metrics_store_add_meta_info.assert_has_calls(calls)
def test_daemon_stop_with_already_terminated_process(self): cfg = config.Config() cfg.add(config.Scope.application, "node", "root.dir", "test") cfg.add(config.Scope.application, "telemetry", "devices", []) cfg.add(config.Scope.application, "telemetry", "params", None) cfg.add(config.Scope.application, "system", "env.name", "test") ms = get_metrics_store(cfg) proc_launcher = launcher.ProcessLauncher(cfg) nodes = [ cluster.Node(pid=-1, binary_path="/bin", host_name="localhost", node_name="rally-0", telemetry=telemetry.Telemetry()) ] stopped_nodes = proc_launcher.stop(nodes, ms) # no nodes should have been stopped (they were already stopped) self.assertEqual([], stopped_nodes)
def start(self, node_configurations): nodes = [] for node_configuration in node_configurations: node_name = node_configuration.node_name host_name = node_configuration.ip binary_path = node_configuration.binary_path node_telemetry_dir = os.path.join( node_configuration.node_root_path, "telemetry") self.binary_paths[node_name] = binary_path self._start_process(binary_path) # only support a subset of telemetry for Docker hosts # (specifically, we do not allow users to enable any devices) node_telemetry = [ telemetry.DiskIo(self.metrics_store, len(node_configurations), node_telemetry_dir, node_name), ] t = telemetry.Telemetry(devices=node_telemetry) telemetry.add_metadata_for_node(self.metrics_store, node_name, host_name) nodes.append(cluster.Node(0, host_name, node_name, t)) return nodes
def prepare_telemetry(self): enabled_devices = self.config.opts("telemetry", "devices") telemetry_params = self.config.opts("telemetry", "params") es = self.es_clients es_default = self.es_clients["default"] self.telemetry = telemetry.Telemetry( enabled_devices, devices=[ telemetry.NodeStats(telemetry_params, es, self.metrics_store), telemetry.ExternalEnvironmentInfo(es_default, self.metrics_store), telemetry.ClusterEnvironmentInfo(es_default, self.metrics_store), telemetry.JvmStatsSummary(es_default, self.metrics_store), telemetry.IndexStats(es_default, self.metrics_store), telemetry.MlBucketProcessingTime(es_default, self.metrics_store), telemetry.CcrStats(telemetry_params, es, self.metrics_store), telemetry.RecoveryStats(telemetry_params, es, self.metrics_store) ])
def _list(self, ctx): what = ctx.config.opts("system", "list.config.option") if what == "telemetry": print("Available telemetry devices:\n") print(tabulate.tabulate(telemetry.Telemetry(ctx.config).list(), ["Command", "Name", "Description"])) print("\nKeep in mind that each telemetry device may incur a runtime overhead which can skew results.") elif what == "tracks": print("Available tracks:\n") print(tabulate.tabulate([[t.name, t.short_description, ",".join(map(str, t.track_setups))] for t in track.tracks.values()], headers=["Name", "Description", "Track setups"])) elif what == "pipelines": print("Available pipelines:\n") print(tabulate.tabulate([[pipeline(ctx).name, pipeline(ctx).description] for pipeline in pipelines.values()], headers=["Name", "Description"])) elif what == "races": print("Recent races:\n") races = [] for race in metrics.RaceStore(ctx.config).list(): races.append([race["trial-timestamp"], race["track"], ",".join(race["track-setups"]), race["user-tag"]]) print(tabulate.tabulate(races, headers=["Trial Timestamp", "Track", "Track setups", "User Tag"])) else: raise exceptions.ImproperlyConfigured("Cannot list unknown configuration option [%s]" % what)