Beispiel #1
0
    def _start_node(self, node_configuration, node_count_on_host, java_major_version):
        host_name = node_configuration.ip
        node_name = node_configuration.node_name
        car = node_configuration.car
        binary_path = node_configuration.binary_path
        data_paths = node_configuration.data_paths
        node_telemetry_dir = "%s/telemetry" % node_configuration.node_root_path

        self.logger.info("Starting node [%s] based on car [%s].", node_name, car)

        enabled_devices = self.cfg.opts("mechanic", "telemetry.devices")
        telemetry_params = self.cfg.opts("mechanic", "telemetry.params")
        node_telemetry = [
            telemetry.FlightRecorder(telemetry_params, node_telemetry_dir, java_major_version),
            telemetry.JitCompiler(node_telemetry_dir),
            telemetry.Gc(node_telemetry_dir, java_major_version),
            telemetry.PerfStat(node_telemetry_dir),
            telemetry.DiskIo(self.metrics_store, node_count_on_host),
            telemetry.CpuUsage(self.metrics_store),
            telemetry.NodeEnvironmentInfo(self.metrics_store),
            telemetry.IndexSize(data_paths, self.metrics_store),
            telemetry.MergeParts(self.metrics_store, node_configuration.log_path),
            telemetry.StartupTime(self.metrics_store),
        ]

        t = telemetry.Telemetry(enabled_devices, devices=node_telemetry)
        env = self._prepare_env(car, node_name, t)
        t.on_pre_node_start(node_name)
        node_process = self._start_process(env, node_name, binary_path)
        node = cluster.Node(node_process, host_name, node_name, t)
        self.logger.info("Node [%s] has successfully started. Attaching telemetry devices.", node_name)
        t.attach_to_node(node)
        self.logger.info("Telemetry devices are now attached to node [%s].", node_name)

        return node
Beispiel #2
0
    def _start_node(self, node_configuration, node_count_on_host):
        host_name = node_configuration.ip
        node_name = node_configuration.node_name
        car = node_configuration.car
        binary_path = node_configuration.binary_path
        data_paths = node_configuration.data_paths
        node_telemetry_dir = "%s/telemetry" % node_configuration.node_root_path
        java_major_version, java_home = java_resolver.java_home(car, self.cfg)

        self.logger.info("Starting node [%s] based on car [%s].", node_name,
                         car)

        enabled_devices = self.cfg.opts("mechanic", "telemetry.devices")
        telemetry_params = self.cfg.opts("mechanic", "telemetry.params")
        node_telemetry = [
            telemetry.DiskIo(self.metrics_store, node_count_on_host,
                             node_telemetry_dir, node_name),
            telemetry.NodeEnvironmentInfo(self.metrics_store),
            telemetry.IndexSize(data_paths, self.metrics_store),
            telemetry.MergeParts(self.metrics_store,
                                 node_configuration.log_path),
            telemetry.StartupTime(self.metrics_store),
        ]

        t = telemetry.Telemetry(enabled_devices, devices=node_telemetry)
        env = self._prepare_env(car, node_name, java_home, t)
        t.on_pre_node_start(node_name)
        node_pid = self._start_process(binary_path, env)
        node = cluster.Node(node_pid, host_name, node_name, t)
        self.logger.info("Attaching telemetry devices to node [%s].",
                         node_name)
        t.attach_to_node(node)

        return node
Beispiel #3
0
    def test_store_calculated_metrics(self, listdir_mock, open_mock,
                                      metrics_store_put_value,
                                      metrics_store_put_count):
        log_file = '''
        INFO: System starting up
        INFO: 100 msec to merge doc values [500 docs]
        INFO: Something unrelated
        INFO: 250 msec to merge doc values [1350 docs]
        INFO: System shutting down
        '''
        listdir_mock.return_value = [open_mock]
        open_mock.side_effect = [
            mock.mock_open(read_data=log_file).return_value
        ]
        metrics_store = metrics.EsMetricsStore(self.cfg)
        node = cluster.Node(None, "io", "rally0", None)
        merge_parts_device = telemetry.MergeParts(metrics_store,
                                                  node_log_dir="/var/log")
        merge_parts_device.attach_to_node(node)
        merge_parts_device.on_benchmark_stop()

        metrics_store_put_value.assert_called_with(
            "rally0", "merge_parts_total_time_doc_values", 350, "ms")
        metrics_store_put_count.assert_called_with(
            "rally0", "merge_parts_total_docs_doc_values", 1850)
Beispiel #4
0
    def start(self, car):
        port = self.cfg.opts("provisioning", "node.http.port")
        hosts = [{"host": "localhost", "port": port}]
        client_options = self.cfg.opts("launcher", "client.options")
        # unified client config
        self.cfg.add(config.Scope.benchmark, "client", "hosts", hosts)
        self.cfg.add(config.Scope.benchmark, "client", "options",
                     client_options)

        es = client.EsClientFactory(hosts, client_options).create()

        # we're very specific which nodes we kill as there is potentially also an Elasticsearch based metrics store running on this machine
        node_prefix = self.cfg.opts("provisioning", "node.name.prefix")
        process.kill_running_es_instances(node_prefix)

        logger.info("Starting a cluster based on car [%s] with [%d] nodes." %
                    (car, car.nodes))

        cluster_telemetry = [
            # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster
            telemetry.MergeParts(self.cfg, self.metrics_store),
            telemetry.EnvironmentInfo(self.cfg, es, self.metrics_store),
            telemetry.NodeStats(self.cfg, es, self.metrics_store),
            telemetry.IndexStats(self.cfg, es, self.metrics_store),
            # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster
            telemetry.IndexSize(self.cfg, self.metrics_store)
        ]

        t = telemetry.Telemetry(self.cfg, devices=cluster_telemetry)
        c = cluster.Cluster(
            [self._start_node(node, car, es) for node in range(car.nodes)], t)
        t.attach_to_cluster(c)
        return c
Beispiel #5
0
    def start(self, car, binary, data_paths):
        hosts = self.cfg.opts("client", "hosts")
        client_options = self.cfg.opts("client", "options")
        es = client.EsClientFactory(hosts, client_options).create()

        # we're very specific which nodes we kill as there is potentially also an Elasticsearch based metrics store running on this machine
        node_prefix = self.cfg.opts("provisioning", "node.name.prefix")
        process.kill_running_es_instances(node_prefix)

        logger.info("Starting a cluster based on car [%s] with [%d] nodes." % (car, car.nodes))

        # TODO dm: Get rid of these...
        enabled_devices = self.cfg.opts("mechanic", "telemetry.devices")

        cluster_telemetry = [
            # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster
            telemetry.MergeParts(self.metrics_store, self.node_log_dir),
            telemetry.EnvironmentInfo(es, self.metrics_store),
            telemetry.NodeStats(es, self.metrics_store),
            telemetry.IndexStats(es, self.metrics_store),
            # TODO dm: Once we do distributed launching, this needs to be done per node not per cluster
            telemetry.IndexSize(data_paths, self.metrics_store)
        ]
        t = telemetry.Telemetry(enabled_devices, devices=cluster_telemetry)
        c = cluster.Cluster(hosts, [self._start_node(node, car, es, binary) for node in range(car.nodes)], t)
        logger.info("All cluster nodes have successfully started. Checking if REST API is available.")
        if wait_for_rest_layer(es):
            logger.info("REST API is available. Attaching telemetry devices to cluster.")
            t.attach_to_cluster(c)
            logger.info("Telemetry devices are now attached to the cluster.")
        else:
            logger.error("REST API layer is not yet available. Forcefully terminating cluster.")
            self.stop(c)
            raise exceptions.LaunchError("Elasticsearch REST API layer is not available. Forcefully terminated cluster.")
        return c
Beispiel #6
0
    def test_store_nothing_if_no_metrics_present(self, listdir_mock, open_mock, metrics_store_put_value, metrics_store_put_count):
        listdir_mock.return_value = [open_mock]
        open_mock.side_effect = [
            mock.mock_open(read_data="no data to parse").return_value
        ]
        metrics_store = metrics.EsMetricsStore(self.cfg)
        merge_parts_device = telemetry.MergeParts(self.cfg, metrics_store)
        merge_parts_device.on_benchmark_stop()

        metrics_store_put_value.assert_not_called()
        metrics_store_put_count.assert_not_called()