Example #1
0
 def _setup_grafana(self):
     # grafana needs daemon-reload before enable and start
     ServiceControlEL7.daemon_reload()
     service = ServiceControl.create("grafana-server")
     error = service.enable()
     if error:
         log.error(error)
         raise RuntimeError(error)
     if service.running:
         service.stop()
     error = service.start()
     if error:
         log.error(error)
         raise RuntimeError(error)
Example #2
0
    def _setup_rabbitmq_service(self):
        log.info("Starting RabbitMQ...")
        # special case where service requires legacy service control
        rabbit_service = ServiceControlEL7("rabbitmq-server")

        error = rabbit_service.enable()
        if error:
            log.error(error)
            raise RuntimeError(error)

        # FIXME: HYD_640: there's really no sane reason to have to set the stderr and
        #        stdout to None here except that subprocess.PIPE ends up
        #        blocking subprocess.communicate().
        #        we need to figure out why
        # FIXME: this should also be converted to use the common Shell utility class
        #self.try_shell(["service", "rabbitmq-server", "restart"],
        #               mystderr=None, mystdout=None)
        # ServiceControlEL7 really needs a _restart() method
        error = rabbit_service._stop()
        if error:
            log.error(error)
            raise RuntimeError(error)
        error = rabbit_service._start()
        if error:
            log.error(error)
            raise RuntimeError(error)
    def setUp(self):
        try:

            for service in self.SERVICES:
                log.info("Starting service '%s'" % service)
                ServiceControlEL7(service).start(retry_time=0.1, validate_time=0)
        except:
            # Ensure we don't leave a systemd unit up
            self.tearDown()
            raise
Example #4
0
    def _setup_rabbitmq_service(self):
        log.info("Starting RabbitMQ...")
        # special case where service requires legacy service control
        rabbit_service = ServiceControlEL7("rabbitmq-server")

        error = rabbit_service.enable()
        if error:
            log.error(error)
            raise RuntimeError(error)

        try:
            self.try_shell(["systemctl", "restart", "rabbitmq-server"])
        except CommandError as error:
            log.error(error)
            raise error
    def tearDown(self):
        # You can't import this gobally because DJANGO_SETTINGS_MODULE is not initialized yet for some
        # reason, but maybe by the time the code meanders its way to here it will work.
        from chroma_core.services.rpc import RpcClientFactory

        # Shutdown any RPC Threads if they were started. Bit of horrible insider knowledge here.
        if RpcClientFactory._lightweight is False:
            RpcClientFactory.shutdown_threads()
            RpcClientFactory._lightweight = True
            RpcClientFactory._available = True
            RpcClientFactory._instances = {}

        for service in self.SERVICES:
            log.info("Stopping service '%s'" % service)
            ServiceControlEL7(service).stop(retry_time=0.1, validate_time=0)
Example #6
0
    def _setup_rabbitmq_service(self):
        log.info("Starting RabbitMQ...")
        # special case where service requires legacy service control
        rabbit_service = ServiceControlEL7("rabbitmq-server")

        error = rabbit_service.enable()
        if error:
            log.error(error)
            raise RuntimeError(error)

        error = rabbit_service._stop()
        if error:
            log.error(error)
            raise RuntimeError(error)
        error = rabbit_service._start()
        if error:
            log.error(error)
            raise RuntimeError(error)
Example #7
0
    def _setup_influxdb(self):
        influx_service = ServiceControlEL7("influxdb")

        # Disable reporting
        # Disable influx http logging (of every write and every query)
        with open("/etc/default/influxdb", "w") as f:
            f.write("INFLUXDB_DATA_QUERY_LOG_ENABLED=false\n")
            f.write("INFLUXDB_REPORTING_DISABLED=true\n")
            f.write("INFLUXDB_HTTP_LOG_ENABLED=false\n")

        log.info("Starting InfluxDB...")
        error = influx_service.enable()
        if error:
            log.error(error)
            raise RuntimeError(error)
        error = influx_service._stop()
        if error:
            log.error(error)
            raise RuntimeError(error)
        error = influx_service._start()
        if error:
            log.error(error)
            raise RuntimeError(error)

        # Wait for influx to finish starting
        wait_for_result(
            lambda: self.try_shell(["influx", "-execute", "exit"]),
            logger=log,
            timeout=60,
            expected_exception_classes=[CommandError],
        )

        # When changing any of the following also change: docker/influxdb/setup-influxdb.sh
        log.info("Creating InfluxDB database...")
        self.try_shell([
            "influx", "-execute",
            "CREATE DATABASE {}".format(settings.INFLUXDB_IML_DB)
        ])
        self.try_shell([
            "influx", "-execute",
            "CREATE DATABASE {}".format(settings.INFLUXDB_STRATAGEM_SCAN_DB)
        ])
        self.try_shell([
            "influx",
            "-database",
            settings.INFLUXDB_STRATAGEM_SCAN_DB,
            "-execute",
            'ALTER RETENTION POLICY "autogen" ON "{}" DURATION 90d SHARD DURATION 9d'
            .format(settings.INFLUXDB_STRATAGEM_SCAN_DB),
        ])
        self.try_shell([
            "influx", "-execute",
            "CREATE DATABASE {}".format(settings.INFLUXDB_IML_STATS_DB)
        ])

        try:
            self.try_shell([
                "influx",
                "-database",
                settings.INFLUXDB_IML_STATS_DB,
                "-execute",
                'CREATE RETENTION POLICY "long_term" ON "{}" DURATION {} REPLICATION 1 SHARD DURATION 5d'
                .format(
                    settings.INFLUXDB_IML_STATS_DB,
                    settings.INFLUXDB_IML_STATS_LONG_DURATION,
                ),
            ])
        except CommandError:
            self.try_shell([
                "influx",
                "-database",
                settings.INFLUXDB_IML_STATS_DB,
                "-execute",
                'ALTER RETENTION POLICY "long_term" ON "{}" DURATION {} REPLICATION 1 SHARD DURATION 5d'
                .format(
                    settings.INFLUXDB_IML_STATS_DB,
                    settings.INFLUXDB_IML_STATS_LONG_DURATION,
                ),
            ])

        self.try_shell([
            "influx",
            "-database",
            settings.INFLUXDB_IML_STATS_DB,
            "-execute",
            "{}; {}; {}; {}; {}; {}; {}; {}".format(
                'DROP CONTINUOUS QUERY "downsample_means" ON "{}"'.format(
                    settings.INFLUXDB_IML_STATS_DB),
                'DROP CONTINUOUS QUERY "downsample_lnet" ON "{}"'.format(
                    settings.INFLUXDB_IML_STATS_DB),
                'DROP CONTINUOUS QUERY "downsample_samples" ON "{}"'.format(
                    settings.INFLUXDB_IML_STATS_DB),
                'DROP CONTINUOUS QUERY "downsample_sums" ON "{}"'.format(
                    settings.INFLUXDB_IML_STATS_DB),
                'CREATE CONTINUOUS QUERY "downsample_means" ON "{}" BEGIN SELECT mean(*) INTO "{}"."long_term".:MEASUREMENT FROM "{}"."autogen"."target","{}"."autogen"."host","{}"."autogen"."node" GROUP BY time(30m),* END'
                .format(
                    settings.INFLUXDB_IML_STATS_DB,
                    settings.INFLUXDB_IML_STATS_DB,
                    settings.INFLUXDB_IML_STATS_DB,
                    settings.INFLUXDB_IML_STATS_DB,
                    settings.INFLUXDB_IML_STATS_DB,
                ),
                'CREATE CONTINUOUS QUERY "downsample_lnet" ON "{}" BEGIN SELECT (last("send_count") - first("send_count")) / count("send_count") AS "mean_diff_send", (last("recv_count") - first("recv_count")) / count("recv_count") AS "mean_diff_recv" INTO "{}"."long_term"."lnet" FROM "lnet" WHERE "nid" != \'"0@lo"\' GROUP BY time(30m),"host","nid" END'
                .format(
                    settings.INFLUXDB_IML_STATS_DB,
                    settings.INFLUXDB_IML_STATS_DB,
                ),
                'CREATE CONTINUOUS QUERY "downsample_samples" ON "{}" BEGIN SELECT (last("samples") - first("samples")) / count("samples") AS "mean_diff_samples" INTO "{}"."long_term"."target" FROM "target" GROUP BY time(30m),* END'
                .format(
                    settings.INFLUXDB_IML_STATS_DB,
                    settings.INFLUXDB_IML_STATS_DB,
                ),
                'CREATE CONTINUOUS QUERY "downsample_sums" ON "{}" BEGIN SELECT (last("sum") - first("sum")) / count("sum") AS "mean_diff_sum" INTO "{}"."long_term"."target" FROM "target" WHERE "units"=\'"bytes"\' GROUP BY time(30m),* END'
                .format(
                    settings.INFLUXDB_IML_STATS_DB,
                    settings.INFLUXDB_IML_STATS_DB,
                ),
            ),
        ])
        self.try_shell([
            "influx",
            "-database",
            settings.INFLUXDB_IML_STATS_DB,
            "-execute",
            'ALTER RETENTION POLICY "autogen" ON "{}" DURATION 1d  REPLICATION 1 SHARD DURATION 2h DEFAULT'
            .format(settings.INFLUXDB_IML_STATS_DB),
        ])
 def restart(self, program):
     ServiceControlEL7(program).restart(retry_time=0.1, validate_time=0)
 def stop(self, program):
     ServiceControlEL7(program).stop(retry_time=0.1, validate_time=0)
 def start(self, program):
     ServiceControlEL7(program).start(retry_time=0.1, validate_time=0)
     for port in self.PORTS.get(program, []):
         self._wait_for_port(port)