Example #1
0
    def start(self):
        hosts = self.cfg.opts("client", "hosts")
        client_options = self.cfg.opts("client", "options")
        es = self.client_factory(hosts, client_options).create()

        t = telemetry.Telemetry(devices=[
            telemetry.ClusterMetaDataInfo(es),
            telemetry.ClusterEnvironmentInfo(es, self.metrics_store),
            telemetry.NodeStats(es, self.metrics_store),
            telemetry.IndexStats(es, self.metrics_store)
        ])

        # The list of nodes will be populated by ClusterMetaDataInfo, so no need to do it here
        c = cluster.Cluster(hosts, [], t)
        logger.info(
            "All cluster nodes have successfully started. Checking if REST API is available."
        )
        if wait_for_rest_layer(es, max_attempts=20):
            logger.info(
                "REST API is available. Attaching telemetry devices to cluster."
            )
            t.attach_to_cluster(c)
            logger.info("Telemetry devices are now attached to the cluster.")
        else:
            # Just stop the cluster here and raise. The caller is responsible for terminating individual nodes.
            logger.error(
                "REST API layer is not yet available. Forcefully terminating cluster."
            )
            self.stop(c)
            raise exceptions.LaunchError(
                "Elasticsearch REST API layer is not available. Forcefully terminated cluster."
            )

        return c
Example #2
0
    def start(self):
        """
        Performs final startup tasks.

        Precondition: All cluster nodes have been started.
        Postcondition: The cluster is ready to receive HTTP requests or a ``LaunchError`` is raised.

        :return: A representation of the launched cluster.
        """
        enabled_devices = self.cfg.opts("mechanic", "telemetry.devices")
        telemetry_params = self.cfg.opts("mechanic", "telemetry.params")
        all_hosts = self.cfg.opts("client", "hosts").all_hosts
        default_hosts = self.cfg.opts("client", "hosts").default
        preserve = self.cfg.opts("mechanic", "preserve.install")
        skip_rest_api_check = self.cfg.opts("mechanic", "skip.rest.api.check")

        es = {}
        for cluster_name, cluster_hosts in all_hosts.items():
            all_client_options = self.cfg.opts("client", "options").all_client_options
            cluster_client_options = dict(all_client_options[cluster_name])
            # Use retries to avoid aborts on long living connections for telemetry devices
            cluster_client_options["retry-on-timeout"] = True
            es[cluster_name] = self.client_factory(cluster_hosts, cluster_client_options).create()

        es_default = es["default"]

        t = telemetry.Telemetry(enabled_devices, devices=[
            telemetry.NodeStats(telemetry_params, es, self.metrics_store),
            telemetry.ClusterMetaDataInfo(es_default),
            telemetry.ClusterEnvironmentInfo(es_default, self.metrics_store),
            telemetry.JvmStatsSummary(es_default, self.metrics_store),
            telemetry.IndexStats(es_default, self.metrics_store),
            telemetry.MlBucketProcessingTime(es_default, self.metrics_store),
            telemetry.CcrStats(telemetry_params, es, self.metrics_store),
            telemetry.RecoveryStats(telemetry_params, es, self.metrics_store)
        ])

        # The list of nodes will be populated by ClusterMetaDataInfo, so no need to do it here
        c = cluster.Cluster(default_hosts, [], t, preserve)

        if skip_rest_api_check:
            self.logger.info("Skipping REST API check and attaching telemetry devices to cluster.")
            t.attach_to_cluster(c)
            self.logger.info("Telemetry devices are now attached to the cluster.")
        else:
            self.logger.info("All cluster nodes have successfully started. Checking if REST API is available.")
            if wait_for_rest_layer(es_default, max_attempts=40):
                self.logger.info("REST API is available. Attaching telemetry devices to cluster.")
                t.attach_to_cluster(c)
                self.logger.info("Telemetry devices are now attached to the cluster.")
            else:
                # Just stop the cluster here and raise. The caller is responsible for terminating individual nodes.
                self.logger.error("REST API layer is not yet available. Forcefully terminating cluster.")
                self.stop(c)
                raise exceptions.LaunchError(
                    "Elasticsearch REST API layer is not available. Forcefully terminated cluster.")
        return c
Example #3
0
    def test_stores_cluster_level_metrics_on_attach(
            self, metrics_store_add_meta_info):
        nodes_info = {"nodes": collections.OrderedDict()}
        nodes_info["nodes"]["FCFjozkeTiOpN-SI88YEcg"] = {
            "name":
            "rally0",
            "host":
            "127.0.0.1",
            "attributes": {
                "group": "cold_nodes"
            },
            "os": {
                "name": "Mac OS X",
                "version": "10.11.4",
                "available_processors": 8
            },
            "jvm": {
                "version": "1.8.0_74",
                "vm_vendor": "Oracle Corporation"
            },
            "plugins": [{
                "name": "ingest-geoip",
                "version": "5.0.0",
                "description":
                "Ingest processor that uses looksup geo data ...",
                "classname":
                "org.elasticsearch.ingest.geoip.IngestGeoIpPlugin",
                "has_native_controller": False
            }]
        }
        nodes_info["nodes"]["EEEjozkeTiOpN-SI88YEcg"] = {
            "name":
            "rally1",
            "host":
            "127.0.0.1",
            "attributes": {
                "group": "hot_nodes"
            },
            "os": {
                "name": "Mac OS X",
                "version": "10.11.5",
                "available_processors": 8
            },
            "jvm": {
                "version": "1.8.0_102",
                "vm_vendor": "Oracle Corporation"
            },
            "plugins": [{
                "name": "ingest-geoip",
                "version": "5.0.0",
                "description":
                "Ingest processor that uses looksup geo data ...",
                "classname":
                "org.elasticsearch.ingest.geoip.IngestGeoIpPlugin",
                "has_native_controller": False
            }]
        }

        cluster_info = {
            "version": {
                "build_hash": "abc123",
                "number": "6.0.0-alpha1"
            }
        }

        cfg = create_config()
        client = Client(nodes=SubClient(info=nodes_info), info=cluster_info)
        metrics_store = metrics.EsMetricsStore(cfg)
        env_device = telemetry.ClusterEnvironmentInfo(client, metrics_store)
        t = telemetry.Telemetry(cfg, devices=[env_device])
        t.attach_to_cluster(cluster.Cluster([], [], t))
        calls = [
            mock.call(metrics.MetaInfoScope.cluster, None, "source_revision",
                      "abc123"),
            mock.call(metrics.MetaInfoScope.cluster, None,
                      "distribution_version", "6.0.0-alpha1"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_vendor",
                      "Oracle Corporation"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "jvm_version",
                      "1.8.0_74"),
            mock.call(metrics.MetaInfoScope.node, "rally1", "jvm_vendor",
                      "Oracle Corporation"),
            mock.call(metrics.MetaInfoScope.node, "rally1", "jvm_version",
                      "1.8.0_102"),
            mock.call(metrics.MetaInfoScope.node, "rally0", "plugins",
                      ["ingest-geoip"]),
            mock.call(metrics.MetaInfoScope.node, "rally1", "plugins",
                      ["ingest-geoip"]),
            # can push up to cluster level as all nodes have the same plugins installed
            mock.call(metrics.MetaInfoScope.cluster, None, "plugins",
                      ["ingest-geoip"]),
            mock.call(metrics.MetaInfoScope.node, "rally0", "attribute_group",
                      "cold_nodes"),
            mock.call(metrics.MetaInfoScope.node, "rally1", "attribute_group",
                      "hot_nodes"),
        ]

        metrics_store_add_meta_info.assert_has_calls(calls)