Example #1
0
    def _start_node(self, node_configuration, node_count_on_host):
        host_name = node_configuration.ip
        node_name = node_configuration.node_name
        car = node_configuration.car
        binary_path = node_configuration.binary_path
        data_paths = node_configuration.data_paths
        node_telemetry_dir = "%s/telemetry" % node_configuration.node_root_path
        java_major_version, java_home = java_resolver.java_home(car, self.cfg)

        self.logger.info("Starting node [%s] based on car [%s].", node_name,
                         car)

        enabled_devices = self.cfg.opts("mechanic", "telemetry.devices")
        telemetry_params = self.cfg.opts("mechanic", "telemetry.params")
        node_telemetry = [
            telemetry.DiskIo(self.metrics_store, node_count_on_host,
                             node_telemetry_dir, node_name),
            telemetry.NodeEnvironmentInfo(self.metrics_store),
            telemetry.IndexSize(data_paths, self.metrics_store),
            telemetry.MergeParts(self.metrics_store,
                                 node_configuration.log_path),
            telemetry.StartupTime(self.metrics_store),
        ]

        t = telemetry.Telemetry(enabled_devices, devices=node_telemetry)
        env = self._prepare_env(car, node_name, java_home, t)
        t.on_pre_node_start(node_name)
        node_pid = self._start_process(binary_path, env)
        node = cluster.Node(node_pid, host_name, node_name, t)
        self.logger.info("Attaching telemetry devices to node [%s].",
                         node_name)
        t.attach_to_node(node)

        return node
Example #2
0
    def _start_node(self, node_configuration, node_count_on_host, java_major_version):
        host_name = node_configuration.ip
        node_name = node_configuration.node_name
        car = node_configuration.car
        binary_path = node_configuration.binary_path
        data_paths = node_configuration.data_paths
        node_telemetry_dir = "%s/telemetry" % node_configuration.node_root_path

        self.logger.info("Starting node [%s] based on car [%s].", node_name, car)

        enabled_devices = self.cfg.opts("mechanic", "telemetry.devices")
        telemetry_params = self.cfg.opts("mechanic", "telemetry.params")
        node_telemetry = [
            telemetry.FlightRecorder(telemetry_params, node_telemetry_dir, java_major_version),
            telemetry.JitCompiler(node_telemetry_dir),
            telemetry.Gc(node_telemetry_dir, java_major_version),
            telemetry.PerfStat(node_telemetry_dir),
            telemetry.DiskIo(self.metrics_store, node_count_on_host),
            telemetry.CpuUsage(self.metrics_store),
            telemetry.NodeEnvironmentInfo(self.metrics_store),
            telemetry.IndexSize(data_paths, self.metrics_store),
            telemetry.MergeParts(self.metrics_store, node_configuration.log_path),
            telemetry.StartupTime(self.metrics_store),
        ]

        t = telemetry.Telemetry(enabled_devices, devices=node_telemetry)
        env = self._prepare_env(car, node_name, t)
        t.on_pre_node_start(node_name)
        node_process = self._start_process(env, node_name, binary_path)
        node = cluster.Node(node_process, host_name, node_name, t)
        self.logger.info("Node [%s] has successfully started. Attaching telemetry devices.", node_name)
        t.attach_to_node(node)
        self.logger.info("Telemetry devices are now attached to node [%s].", node_name)

        return node
Example #3
0
    def start(self, car):
        port = self.cfg.opts("provisioning", "node.http.port")
        hosts = [{"host": "localhost", "port": port}]
        client_options = self.cfg.opts("launcher", "client.options")
        # unified client config
        self.cfg.add(config.Scope.benchmark, "client", "hosts", hosts)
        self.cfg.add(config.Scope.benchmark, "client", "options",
                     client_options)

        es = client.EsClientFactory(hosts, client_options).create()

        # we're very specific which nodes we kill as there is potentially also an Elasticsearch based metrics store running on this machine
        node_prefix = self.cfg.opts("provisioning", "node.name.prefix")
        process.kill_running_es_instances(node_prefix)

        logger.info("Starting a cluster based on car [%s] with [%d] nodes." %
                    (car, car.nodes))

        cluster_telemetry = [
            # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster
            telemetry.MergeParts(self.cfg, self.metrics_store),
            telemetry.EnvironmentInfo(self.cfg, es, self.metrics_store),
            telemetry.NodeStats(self.cfg, es, self.metrics_store),
            telemetry.IndexStats(self.cfg, es, self.metrics_store),
            # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster
            telemetry.IndexSize(self.cfg, self.metrics_store)
        ]

        t = telemetry.Telemetry(self.cfg, devices=cluster_telemetry)
        c = cluster.Cluster(
            [self._start_node(node, car, es) for node in range(car.nodes)], t)
        t.attach_to_cluster(c)
        return c
Example #4
0
    def test_stores_index_size_for_data_paths(self, run_subprocess,
                                              metrics_store_node_count,
                                              get_size):
        get_size.side_effect = [2048, 16384]

        cfg = create_config()
        metrics_store = metrics.EsMetricsStore(cfg)
        device = telemetry.IndexSize(
            ["/var/elasticsearch/data/1", "/var/elasticsearch/data/2"],
            metrics_store)
        t = telemetry.Telemetry(enabled_devices=[], devices=[device])
        node = cluster.Node(process=None,
                            host_name="localhost",
                            node_name="rally-node-0",
                            telemetry=t)
        t.attach_to_node(node)
        t.on_benchmark_start()
        t.on_benchmark_stop()
        t.detach_from_node(node, running=True)
        t.detach_from_node(node, running=False)

        metrics_store_node_count.assert_has_calls([
            mock.call("rally-node-0", "final_index_size_bytes", 18432, "byte")
        ])

        run_subprocess.assert_has_calls([
            mock.call("find /var/elasticsearch/data/1 -ls",
                      header="index files:"),
            mock.call("find /var/elasticsearch/data/2 -ls",
                      header="index files:")
        ])
Example #5
0
    def start(self, car, binary, data_paths):
        hosts = self.cfg.opts("client", "hosts")
        client_options = self.cfg.opts("client", "options")
        es = client.EsClientFactory(hosts, client_options).create()

        # we're very specific which nodes we kill as there is potentially also an Elasticsearch based metrics store running on this machine
        node_prefix = self.cfg.opts("provisioning", "node.name.prefix")
        process.kill_running_es_instances(node_prefix)

        logger.info("Starting a cluster based on car [%s] with [%d] nodes." % (car, car.nodes))

        # TODO dm: Get rid of these...
        enabled_devices = self.cfg.opts("mechanic", "telemetry.devices")

        cluster_telemetry = [
            # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster
            telemetry.MergeParts(self.metrics_store, self.node_log_dir),
            telemetry.EnvironmentInfo(es, self.metrics_store),
            telemetry.NodeStats(es, self.metrics_store),
            telemetry.IndexStats(es, self.metrics_store),
            # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster
            telemetry.IndexSize(data_paths, self.metrics_store)
        ]
        t = telemetry.Telemetry(enabled_devices, devices=cluster_telemetry)
        c = cluster.Cluster(hosts, [self._start_node(node, car, es, binary) for node in range(car.nodes)], t)
        logger.info("All cluster nodes have successfully started. Checking if REST API is available.")
        if wait_for_rest_layer(es):
            logger.info("REST API is available. Attaching telemetry devices to cluster.")
            t.attach_to_cluster(c)
            logger.info("Telemetry devices are now attached to the cluster.")
        else:
            logger.error("REST API layer is not yet available. Forcefully terminating cluster.")
            self.stop(c)
            raise exceptions.LaunchError("Elasticsearch REST API layer is not available. Forcefully terminated cluster.")
        return c
Example #6
0
    def test_stores_nothing_if_no_data_path(self, run_subprocess, metrics_store_cluster_count, get_size):
        get_size.return_value = 2048

        cfg = create_config()

        metrics_store = metrics.EsMetricsStore(cfg)
        device = telemetry.IndexSize(data_paths=[], metrics_store=metrics_store)
        t = telemetry.Telemetry(devices=[device])
        t.attach_to_cluster(None)
        t.on_benchmark_start()
        t.on_benchmark_stop()
        t.detach_from_cluster(None)

        run_subprocess.assert_not_called()
        metrics_store_cluster_count.assert_not_called()
        get_size.assert_not_called()
Example #7
0
    def test_stores_nothing_if_no_data_path(self, run_subprocess, metrics_store_cluster_count, get_size):
        get_size.return_value = 2048

        cfg = create_config()

        metrics_store = metrics.EsMetricsStore(cfg)
        device = telemetry.IndexSize(data_paths=[], metrics_store=metrics_store)
        t = telemetry.Telemetry(devices=[device])
        node = cluster.Node(process=None, host_name="localhost", node_name="rally-node-0", telemetry=t)
        t.attach_to_node(node)
        t.on_benchmark_start()
        t.on_benchmark_stop()
        t.detach_from_node(node, running=True)
        t.detach_from_node(node, running=False)

        run_subprocess.assert_not_called()
        metrics_store_cluster_count.assert_not_called()
        get_size.assert_not_called()
Example #8
0
    def test_stores_index_size_for_data_path(self, run_subprocess, metrics_store_cluster_count, get_size):
        get_size.return_value = 2048

        cfg = create_config()
        metrics_store = metrics.EsMetricsStore(cfg)
        device = telemetry.IndexSize(["/var/elasticsearch/data"], metrics_store)
        t = telemetry.Telemetry(enabled_devices=[], devices=[device])
        t.attach_to_cluster(None)
        t.on_benchmark_start()
        t.on_benchmark_stop()
        t.detach_from_cluster(None)

        metrics_store_cluster_count.assert_has_calls([
            mock.call("final_index_size_bytes", 2048, "byte")
        ])

        run_subprocess.assert_has_calls([
            mock.call("find /var/elasticsearch/data -ls", header="index files:")
        ])
Example #9
0
    def _start_node(self, node_configuration, node_count_on_host):
        host_name = node_configuration.ip
        node_name = node_configuration.node_name
        car = node_configuration.car
        binary_path = node_configuration.binary_path
        data_paths = node_configuration.data_paths
        node_telemetry_dir = os.path.join(node_configuration.node_root_path,
                                          "telemetry")

        java_major_version, java_home = java_resolver.java_home(car, self.cfg)

        telemetry.add_metadata_for_node(self.metrics_store, node_name,
                                        host_name)

        self.logger.info("Starting node [%s] based on car [%s].", node_name,
                         car)

        enabled_devices = self.cfg.opts("mechanic", "telemetry.devices")
        telemetry_params = self.cfg.opts("mechanic", "telemetry.params")
        node_telemetry = [
            telemetry.FlightRecorder(telemetry_params, node_telemetry_dir,
                                     java_major_version),
            telemetry.JitCompiler(node_telemetry_dir),
            telemetry.Gc(node_telemetry_dir, java_major_version),
            telemetry.DiskIo(self.metrics_store, node_count_on_host,
                             node_telemetry_dir, node_name),
            telemetry.IndexSize(data_paths, self.metrics_store),
            telemetry.StartupTime(self.metrics_store),
        ]

        t = telemetry.Telemetry(enabled_devices, devices=node_telemetry)
        env = self._prepare_env(car, node_name, java_home, t)
        t.on_pre_node_start(node_name)
        node_pid = self._start_process(binary_path, env)
        node = cluster.Node(node_pid, host_name, node_name, t)

        self.logger.info("Attaching telemetry devices to node [%s].",
                         node_name)
        t.attach_to_node(node)

        return node