Example #1
0
    def test_pass_java_opts(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "mechanic", "keep.running", False)
        cfg.add(config.Scope.application, "system", "env.name", "test")
        cfg.add(config.Scope.application, "system", "passenv", "ES_JAVA_OPTS")
        os.environ["ES_JAVA_OPTS"] = "-XX:-someJunk"

        proc_launcher = launcher.ProcessLauncher(cfg)

        t = telemetry.Telemetry()
        # no JAVA_HOME -> use the bundled JDK
        env = proc_launcher._prepare_env(node_name="node0",
                                         java_home=None,
                                         t=t)

        # unmodified
        self.assertEqual(os.environ["ES_JAVA_OPTS"], env["ES_JAVA_OPTS"])
Example #2
0
    def test_bundled_jdk_not_in_path(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "mechanic", "keep.running", False)
        cfg.add(config.Scope.application, "system", "env.name", "test")
        os.environ["JAVA_HOME"] = "/path/to/java"

        proc_launcher = launcher.ProcessLauncher(cfg)

        t = telemetry.Telemetry()
        # no JAVA_HOME -> use the bundled JDK
        env = proc_launcher._prepare_env(node_name="node0",
                                         java_home=None,
                                         t=t)

        # unmodified
        self.assertEqual(os.environ["PATH"], env["PATH"])
        self.assertIsNone(env.get("JAVA_HOME"))
Example #3
0
    def test_stops_container_when_no_metrics_store_is_provided(
            self, run_subprocess_with_logging, add_metadata_for_node):
        cfg = config.Config()
        metrics_store = None
        docker = launcher.DockerLauncher(cfg)

        nodes = [
            cluster.Node(0, "/bin", "127.0.0.1", "testnode",
                         telemetry.Telemetry())
        ]

        docker.stop(nodes, metrics_store=metrics_store)

        self.assertEqual(0, add_metadata_for_node.call_count)

        run_subprocess_with_logging.assert_called_once_with(
            "docker-compose -f /bin/docker-compose.yml down")
Example #4
0
    def test_env_options_order(self, sleep):
        cfg = config.Config()
        cfg.add(config.Scope.application, "mechanic", "keep.running", False)
        cfg.add(config.Scope.application, "system", "env.name", "test")

        proc_launcher = launcher.ProcessLauncher(cfg)

        node_telemetry = [
            telemetry.FlightRecorder(telemetry_params={}, log_root="/tmp/telemetry", java_major_version=8)
        ]
        t = telemetry.Telemetry(["jfr"], devices=node_telemetry)
        env = proc_launcher._prepare_env(car_env={}, node_name="node0", java_home="/java_home", t=t)

        self.assertEqual("/java_home/bin" + os.pathsep + os.environ["PATH"], env["PATH"])
        self.assertEqual("-XX:+ExitOnOutOfMemoryError -XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints "
                         "-XX:+UnlockCommercialFeatures -XX:+FlightRecorder "
                         "-XX:FlightRecorderOptions=disk=true,maxage=0s,maxsize=0,dumponexit=true,dumponexitpath=/tmp/telemetry/profile.jfr " # pylint: disable=line-too-long
                         "-XX:StartFlightRecording=defaultrecording=true", env["ES_JAVA_OPTS"])
Example #5
0
 def start(self, node_configurations):
     nodes = []
     for node_configuration in node_configurations:
         node_name = node_configuration.node_name
         host_name = node_configuration.ip
         binary_path = node_configuration.binary_path
         self.binary_paths[node_name] = binary_path
         self._start_process(binary_path)
         node_telemetry = [
             # Don't attach any telemetry devices for now but keep the infrastructure in place
         ]
         t = telemetry.Telemetry(devices=node_telemetry)
         telemetry.add_metadata_for_node(self.metrics_store, node_name,
                                         host_name)
         node = cluster.Node(0, host_name, node_name, t)
         t.attach_to_node(node)
         nodes.append(node)
     return nodes
Example #6
0
    def _start_node(self, node_configuration, node_count_on_host):
        host_name = node_configuration.ip
        node_name = node_configuration.node_name
        binary_path = node_configuration.binary_path
        data_paths = node_configuration.data_paths
        node_telemetry_dir = os.path.join(node_configuration.node_root_path,
                                          "telemetry")

        java_major_version, java_home = java_resolver.java_home(
            node_configuration.car_runtime_jdks,
            self.cfg.opts("mechanic", "runtime.jdk"),
            node_configuration.car_provides_bundled_jdk)

        self.logger.info("Starting node [%s].", node_name)

        enabled_devices = self.cfg.opts("telemetry", "devices")
        telemetry_params = self.cfg.opts("telemetry", "params")
        node_telemetry = [
            telemetry.FlightRecorder(telemetry_params, node_telemetry_dir,
                                     java_major_version),
            telemetry.JitCompiler(node_telemetry_dir),
            telemetry.Gc(telemetry_params, node_telemetry_dir,
                         java_major_version),
            telemetry.Heapdump(node_telemetry_dir),
            telemetry.DiskIo(node_count_on_host),
            telemetry.IndexSize(data_paths),
            telemetry.StartupTime(),
        ]

        t = telemetry.Telemetry(enabled_devices, devices=node_telemetry)
        # TODO #822: Remove reference to car's environment
        env = self._prepare_env(node_configuration.car_env, node_name,
                                java_home, t)
        t.on_pre_node_start(node_name)
        node_pid = self._start_process(binary_path, env)
        self.logger.info("Successfully started node [%s] with PID [%s].",
                         node_name, node_pid)
        node = cluster.Node(node_pid, binary_path, host_name, node_name, t)

        self.logger.info("Attaching telemetry devices to node [%s].",
                         node_name)
        t.attach_to_node(node)

        return node
Example #7
0
    def start(self, metrics_store):
        configured_host_list = self.cfg.opts("launcher",
                                             "external.target.hosts")
        hosts = []
        try:
            for authority in configured_host_list:
                host, port = authority.split(":")
                hosts.append({"host": host, "port": port})
        except ValueError:
            msg = "Could not initialize external cluster. Invalid format for %s. Expected a comma-separated list of host:port pairs, " \
                  "e.g. host1:9200,host2:9200." % configured_host_list
            logger.exception(msg)
            raise exceptions.SystemSetupError(msg)

        t = telemetry.Telemetry(
            self.cfg,
            metrics_store,
            devices=[
                telemetry.ExternalEnvironmentInfo(self.cfg, metrics_store),
                telemetry.NodeStats(self.cfg, metrics_store),
                telemetry.IndexStats(self.cfg, metrics_store)
            ])
        c = self.cluster_factory.create(
            hosts, [], self.cfg.opts("launcher", "client.options"),
            metrics_store, t)
        user_defined_version = self.cfg.opts("source",
                                             "distribution.version",
                                             mandatory=False)
        distribution_version = c.info()["version"]["number"]
        if not user_defined_version or user_defined_version.strip() == "":
            logger.info(
                "Distribution version was not specified by user. Rally-determined version is [%s]"
                % distribution_version)
            self.cfg.add(config.Scope.benchmark, "source",
                         "distribution.version", distribution_version)
        elif user_defined_version != distribution_version:
            logger.warn(
                "User specified version [%s] but cluster reports version [%s]."
                % (user_defined_version, distribution_version))
            print(
                "Warning: Specified distribution version '%s' on the command line differs from version '%s' reported by the cluster."
                % (user_defined_version, distribution_version))
        t.attach_to_cluster(c)
        return c
Example #8
0
    def start(self, track, setup, metrics_store):
        if self._servers:
            logger.warn(
                "There are still referenced servers on startup. Did the previous shutdown succeed?"
            )
        num_nodes = setup.candidate.nodes
        first_http_port = self._config.opts("provisioning", "node.http.port")

        t = telemetry.Telemetry(self._config, metrics_store)
        c = cluster.Cluster([{
            "host": "localhost",
            "port": first_http_port
        }], [
            self._start_node(node, setup, metrics_store)
            for node in range(num_nodes)
        ], metrics_store)
        t.attach_to_cluster(c)

        return c
Example #9
0
    def start(self, car, metrics_store):
        if self._servers:
            logger.warn(
                "There are still referenced servers on startup. Did the previous shutdown succeed?"
            )
        logger.info("Starting a cluster based on car [%s] with [%d] nodes." %
                    (car, car.nodes))
        first_http_port = self.cfg.opts("provisioning", "node.http.port")

        t = telemetry.Telemetry(self.cfg, metrics_store)
        c = self.cluster_factory.create([{
            "host": "localhost",
            "port": first_http_port
        }], [
            self._start_node(node, car, metrics_store)
            for node in range(car.nodes)
        ], self.cfg.opts("launcher", "client.options"), metrics_store, t)
        t.attach_to_cluster(c)
        return c
Example #10
0
    def _start_node(self, node_configuration, node_count_on_host):
        host_name = node_configuration.ip
        node_name = node_configuration.node_name
        car = node_configuration.car
        binary_path = node_configuration.binary_path
        data_paths = node_configuration.data_paths
        node_telemetry_dir = os.path.join(node_configuration.node_root_path,
                                          "telemetry")

        java_major_version, java_home = java_resolver.java_home(car, self.cfg)

        telemetry.add_metadata_for_node(self.metrics_store, node_name,
                                        host_name)

        self.logger.info("Starting node [%s] based on car [%s].", node_name,
                         car)

        enabled_devices = self.cfg.opts("telemetry", "devices")
        telemetry_params = self.cfg.opts("telemetry", "params")
        node_telemetry = [
            telemetry.FlightRecorder(telemetry_params, node_telemetry_dir,
                                     java_major_version),
            telemetry.JitCompiler(node_telemetry_dir),
            telemetry.Gc(node_telemetry_dir, java_major_version),
            telemetry.Heapdump(node_telemetry_dir),
            telemetry.DiskIo(self.metrics_store, node_count_on_host,
                             node_telemetry_dir, node_name),
            telemetry.IndexSize(data_paths, self.metrics_store),
            telemetry.StartupTime(self.metrics_store),
        ]

        t = telemetry.Telemetry(enabled_devices, devices=node_telemetry)
        env = self._prepare_env(car, node_name, java_home, t)
        t.on_pre_node_start(node_name)
        node_pid = self._start_process(binary_path, env)
        node = cluster.Node(node_pid, host_name, node_name, t)

        self.logger.info("Attaching telemetry devices to node [%s].",
                         node_name)
        t.attach_to_node(node)

        return node
Example #11
0
    def test_pass_env_vars(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "system", "env.name", "test")
        cfg.add(config.Scope.application, "system", "passenv",
                "JAVA_HOME,FOO1")
        os.environ["JAVA_HOME"] = "/path/to/java"
        os.environ["FOO1"] = "BAR1"

        proc_launcher = launcher.ProcessLauncher(cfg)

        t = telemetry.Telemetry()
        # no JAVA_HOME -> use the bundled JDK
        env = proc_launcher._prepare_env(node_name="node0",
                                         java_home=None,
                                         t=t)

        # unmodified
        self.assertEqual(os.environ["JAVA_HOME"], env["JAVA_HOME"])
        self.assertEqual(os.environ["FOO1"], env["FOO1"])
        self.assertEqual(env["ES_JAVA_OPTS"], "-XX:+ExitOnOutOfMemoryError")
Example #12
0
    def test_stops_container_successfully_with_metrics_store(
            self, run_subprocess_with_logging, add_metadata_for_node):
        cfg = config.Config()
        cfg.add(config.Scope.application, "system", "env.name", "test")

        metrics_store = get_metrics_store(cfg)
        docker = launcher.DockerLauncher(cfg)

        nodes = [
            cluster.Node(0, "/bin", "127.0.0.1", "testnode",
                         telemetry.Telemetry())
        ]

        docker.stop(nodes, metrics_store=metrics_store)

        add_metadata_for_node.assert_called_once_with(metrics_store,
                                                      "testnode", "127.0.0.1")

        run_subprocess_with_logging.assert_called_once_with(
            "docker-compose -f /bin/docker-compose.yml down")
Example #13
0
 def _list(self, ctx):
     what = ctx.config.opts("system", "list.config.option")
     if what == "telemetry":
         telemetry.Telemetry(ctx.config).list()
     elif what == "tracks":
         print("Available tracks:\n")
         for t in track.tracks.values():
             print("* %s: %s" % (t.name, t.description))
             print("\tTrack setups for this track:")
             for track_setup in t.track_setups:
                 print("\t* %s" % track_setup.name)
             print("")
     elif what == "pipelines":
         print("Available pipelines:\n")
         for p in pipelines.values():
             pipeline = p(ctx)
             print("* %s: %s" % (pipeline.name, pipeline.description))
     else:
         raise exceptions.ImproperlyConfigured(
             "Cannot list unknown configuration option [%s]" % what)
Example #14
0
    def test_stores_cluster_level_metrics_on_attach(
            self, nodes_info, cluster_info, metrics_store_add_meta_info):
        nodes_info.return_value = {
            "nodes": {
                "FCFjozkeTiOpN-SI88YEcg": {
                    "name": "rally0",
                    "host": "127.0.0.1",
                    "os": {
                        "name": "Mac OS X",
                        "version": "10.11.4",
                        "available_processors": 8
                    },
                    "jvm": {
                        "version": "1.8.0_74",
                        "vm_vendor": "Oracle Corporation"
                    }
                }
            }
        }
        cluster_info.return_value = {"version": {"build_hash": "abc123"}}
        cfg = self.create_config()
        metrics_store = metrics.EsMetricsStore(cfg)
        env_device = telemetry.EnvironmentInfo(cfg, metrics_store)
        t = telemetry.Telemetry(cfg, metrics_store, devices=[env_device])
        t.attach_to_cluster(
            cluster.Cluster([{
                "host": "::1:9200"
            }], [], {},
                            metrics_store,
                            t,
                            client_factory_class=MockClientFactory))

        calls = [
            mock.call(metrics.MetaInfoScope.cluster, None, "source_revision",
                      "abc123"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_vendor",
                      "Oracle Corporation"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_version",
                      "1.8.0_74")
        ]
        metrics_store_add_meta_info.assert_has_calls(calls)
Example #15
0
    def test_daemon_stop_with_already_terminated_process(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "node", "root.dir", "test")
        cfg.add(config.Scope.application, "telemetry", "devices", [])
        cfg.add(config.Scope.application, "telemetry", "params", None)
        cfg.add(config.Scope.application, "system", "env.name", "test")

        ms = get_metrics_store(cfg)
        proc_launcher = launcher.ProcessLauncher(cfg)

        nodes = [
            cluster.Node(pid=-1,
                         binary_path="/bin",
                         host_name="localhost",
                         node_name="rally-0",
                         telemetry=telemetry.Telemetry())
        ]

        stopped_nodes = proc_launcher.stop(nodes, ms)
        # no nodes should have been stopped (they were already stopped)
        self.assertEqual([], stopped_nodes)
Example #16
0
 def start(self, node_configurations):
     nodes = []
     for node_configuration in node_configurations:
         node_name = node_configuration.node_name
         host_name = node_configuration.ip
         binary_path = node_configuration.binary_path
         node_telemetry_dir = os.path.join(
             node_configuration.node_root_path, "telemetry")
         self.binary_paths[node_name] = binary_path
         self._start_process(binary_path)
         # only support a subset of telemetry for Docker hosts
         # (specifically, we do not allow users to enable any devices)
         node_telemetry = [
             telemetry.DiskIo(self.metrics_store, len(node_configurations),
                              node_telemetry_dir, node_name),
         ]
         t = telemetry.Telemetry(devices=node_telemetry)
         telemetry.add_metadata_for_node(self.metrics_store, node_name,
                                         host_name)
         nodes.append(cluster.Node(0, host_name, node_name, t))
     return nodes
Example #17
0
    def prepare_telemetry(self):
        enabled_devices = self.config.opts("telemetry", "devices")
        telemetry_params = self.config.opts("telemetry", "params")

        es = self.es_clients
        es_default = self.es_clients["default"]
        self.telemetry = telemetry.Telemetry(
            enabled_devices,
            devices=[
                telemetry.NodeStats(telemetry_params, es, self.metrics_store),
                telemetry.ExternalEnvironmentInfo(es_default,
                                                  self.metrics_store),
                telemetry.ClusterEnvironmentInfo(es_default,
                                                 self.metrics_store),
                telemetry.JvmStatsSummary(es_default, self.metrics_store),
                telemetry.IndexStats(es_default, self.metrics_store),
                telemetry.MlBucketProcessingTime(es_default,
                                                 self.metrics_store),
                telemetry.CcrStats(telemetry_params, es, self.metrics_store),
                telemetry.RecoveryStats(telemetry_params, es,
                                        self.metrics_store)
            ])
Example #18
0
    def _list(self, ctx):
        what = ctx.config.opts("system", "list.config.option")
        if what == "telemetry":
            print("Available telemetry devices:\n")
            print(tabulate.tabulate(telemetry.Telemetry(ctx.config).list(), ["Command", "Name", "Description"]))
            print("\nKeep in mind that each telemetry device may incur a runtime overhead which can skew results.")
        elif what == "tracks":
            print("Available tracks:\n")
            print(tabulate.tabulate([[t.name, t.short_description, ",".join(map(str, t.track_setups))] for t in track.tracks.values()],
                                    headers=["Name", "Description", "Track setups"]))

        elif what == "pipelines":
            print("Available pipelines:\n")
            print(tabulate.tabulate([[pipeline(ctx).name, pipeline(ctx).description] for pipeline in pipelines.values()],
                                    headers=["Name", "Description"]))
        elif what == "races":
            print("Recent races:\n")
            races = []
            for race in metrics.RaceStore(ctx.config).list():
                races.append([race["trial-timestamp"], race["track"], ",".join(race["track-setups"]), race["user-tag"]])

            print(tabulate.tabulate(races, headers=["Trial Timestamp", "Track", "Track setups", "User Tag"]))
        else:
            raise exceptions.ImproperlyConfigured("Cannot list unknown configuration option [%s]" % what)