Ejemplo n.º 1
0
    def run(self, command=None, checks_list=None, reporter=None, redirect_std_streams=False):
        """
        Run JMXFetch

        redirect_std_streams: if left to False, the stdout and stderr of JMXFetch are streamed
        directly to the environment's stdout and stderr and cannot be retrieved via python's
        sys.stdout and sys.stderr. Set to True to redirect these streams to python's sys.stdout
        and sys.stderr.
        """
        command = command or JMX_COLLECT_COMMAND

        if checks_list or self.jmx_checks is None:
            # (Re)set/(re)configure JMXFetch parameters when `checks_list` is specified or
            # no configuration was found
            self.configure(checks_list)

        try:
            if len(self.invalid_checks) > 0:
                try:
                    JMXFiles.write_status_file(self.invalid_checks)
                except Exception:
                    log.exception("Error while writing JMX status file")

            if len(self.jmx_checks) > 0 or self.service_discovery:
                return self._start(self.java_bin_path, self.java_options, self.jmx_checks,
                                   command, reporter, self.tools_jar_path, self.custom_jar_paths, redirect_std_streams)
            else:
                # We're exiting purposefully, so exit with zero (supervisor's expected
                # code). HACK: Sleep a little bit so supervisor thinks we've started cleanly
                # and thus can exit cleanly.
                time.sleep(4)
                log.info("No valid JMX integration was found. Exiting ...")
        except Exception:
            log.exception("Error while initiating JMXFetch")
            raise
Ejemplo n.º 2
0
    def _add_jmxinfo_tar(self):
        _, _, should_run_jmx = self._capture_output(self._should_run_jmx)
        if should_run_jmx:
            # status files (before listing beans because executing jmxfetch overwrites status files)
            for file_name, file_path in [
                (JMXFiles._STATUS_FILE, JMXFiles.get_status_file_path()),
                (JMXFiles._PYTHON_STATUS_FILE, JMXFiles.get_python_status_file_path())
            ]:
                if self._can_read(file_path, warn=False):
                    self._add_file_tar(
                        file_path,
                        os.path.join('jmxinfo', file_name)
                    )

            # beans lists
            for command in ['list_matching_attributes', 'list_everything']:
                log.info("  * datadog-agent jmx {0} output".format(command))
                self._add_command_output_tar(
                    os.path.join('jmxinfo', '{0}.log'.format(command)),
                    partial(self._jmx_command_call, command)
                )

            # java version
            log.info("  * java -version output")
            _, _, java_bin_path = self._capture_output(
                lambda: JMXFetch.get_configuration(get_confd_path())[2] or 'java')
            self._add_command_output_tar(
                os.path.join('jmxinfo', 'java_version.log'),
                lambda: self._java_version(java_bin_path),
                command_desc="{0} -version".format(java_bin_path)
            )
Ejemplo n.º 3
0
 def run(self):
     from config import initialize_logging
     initialize_logging('jmxfetch')
     if self.is_enabled:
         log.debug("Windows Service - Starting JMXFetch")
         JMXFiles.clean_exit_file()
         self.jmx_daemon.run()
     else:
         log.info("Windows Service - Not starting JMXFetch: no valid configuration found")
Ejemplo n.º 4
0
    def configure(self, checks_list=None, clean_status_file=True):
        """
        Instantiate JMXFetch parameters, clean potential previous run leftovers.
        """
        if clean_status_file:
            JMXFiles.clean_status_file()

        self.jmx_checks, self.invalid_checks, self.java_bin_path, self.java_options, self.tools_jar_path = \
            self.get_configuration(self.confd_path, checks_list=checks_list)
Ejemplo n.º 5
0
    def stop(self):
        """
        Override `stop` method to properly exit JMXFetch.
        """
        if self._proc is not None and self._proc.is_running():
            JMXFiles.write_exit_file()
            try:
                self._proc.wait(timeout=self._JMX_STOP_TIMEOUT)
            except psutil.TimeoutExpired:
                log.debug("JMXFetch process didn't stop after %ss, killing it", self._JMX_STOP_TIMEOUT)

        super(JMXFetchProcess, self).stop()
Ejemplo n.º 6
0
    def stop(self):
        """
        Override `stop` method to properly exit JMXFetch.
        """
        if self._proc is not None and self._proc.is_running():
            JMXFiles.write_exit_file()
            try:
                self._proc.wait(timeout=self._JMX_STOP_TIMEOUT)
            except psutil.TimeoutExpired:
                log.debug("JMXFetch process didn't stop after %ss, killing it",
                          self._JMX_STOP_TIMEOUT)

        super(JMXFetchProcess, self).stop()
Ejemplo n.º 7
0
    def run(self,
            command=None,
            checks_list=None,
            reporter=None,
            redirect_std_streams=False):
        """
        Run JMXFetch

        redirect_std_streams: if left to False, the stdout and stderr of JMXFetch are streamed
        directly to the environment's stdout and stderr and cannot be retrieved via python's
        sys.stdout and sys.stderr. Set to True to redirect these streams to python's sys.stdout
        and sys.stderr.
        """

        if checks_list or self.jmx_checks is None:
            # (Re)set/(re)configure JMXFetch parameters when `checks_list` is specified or
            # no configuration was found
            self.configure(checks_list)

        try:
            command = command or JMX_COLLECT_COMMAND

            if len(self.invalid_checks) > 0:
                try:
                    JMXFiles.write_status_file(self.invalid_checks)
                except Exception:
                    log.exception("Error while writing JMX status file")

            if len(self.jmx_checks) > 0:
                return self._start(self.java_bin_path, self.java_options,
                                   self.jmx_checks, command, reporter,
                                   self.tools_jar_path, self.custom_jar_paths,
                                   redirect_std_streams)
            else:
                # We're exiting purposefully, so exit with zero (supervisor's expected
                # code). HACK: Sleep a little bit so supervisor thinks we've started cleanly
                # and thus can exit cleanly.
                time.sleep(4)
                log.info("No valid JMX integration was found. Exiting ...")
        except Exception:
            log.exception("Error while initiating JMXFetch")
            raise
Ejemplo n.º 8
0
    def configure(self, command=None, checks_list=None, clean_status_file=True):
        """
        Instantiate JMXFetch parameters, clean potential previous run leftovers.
        """
        if clean_status_file:
            JMXFiles.clean_status_file()

        self.jmx_checks, self.invalid_checks, self.java_bin_path, self.java_options, \
            tools_jar_path, self.custom_jar_paths, use_attach_api = \
            self.get_configuration(self.confd_path, checks_list=checks_list)

        # Setup the JDK `tool.jar`
        if command == JMX_LIST_JVMS:
            if not tools_jar_path:
                raise InvalidJMXConfiguration(
                    u"Command `{}` requires access to the JDK `tools.jar` file. "
                    u"See `tools_jar_path` parameter in JMX YAML configuration files.".format(
                        JMX_LIST_JVMS
                    )
                )
            use_attach_api = True

        self.tools_jar_path = tools_jar_path if use_attach_api else None
Ejemplo n.º 9
0
    def configure(self, command=None, checks_list=None, clean_status_file=True):
        """
        Instantiate JMXFetch parameters, clean potential previous run leftovers.
        """
        if clean_status_file:
            JMXFiles.clean_status_file()

        self.jmx_checks, self.invalid_checks, self.java_bin_path, self.java_options, \
            tools_jar_path, self.custom_jar_paths, use_attach_api = \
            self.get_configuration(self.confd_path, checks_list=checks_list)

        # Setup the JDK `tool.jar`
        if command == JMX_LIST_JVMS:
            if not tools_jar_path:
                raise InvalidJMXConfiguration(
                    u"Command `{}` requires access to the JDK `tools.jar` file. "
                    u"See `tools_jar_path` parameter in JMX YAML configuration files.".format(
                        JMX_LIST_JVMS
                    )
                )
            use_attach_api = True

        self.tools_jar_path = tools_jar_path if use_attach_api else None
Ejemplo n.º 10
0
    def start(self):
        if self.is_enabled():
            JMXFiles.clean_exit_file()

        super(JMXFetchProcess, self).start()
Ejemplo n.º 11
0
 def terminate(self):
     """
     Override `terminate` method to properly exit JMXFetch.
     """
     JMXFiles.write_exit_file()
     self.join()
Ejemplo n.º 12
0
 def run(self):
     if self.is_enabled:
         JMXFiles.clean_exit_file()
         self.jmx_daemon.run()
Ejemplo n.º 13
0
    def _start(self, path_to_java, java_run_opts, jmx_checks, command,
               reporter, tools_jar_path, custom_jar_paths,
               redirect_std_streams):
        if reporter is None:
            statsd_host = self.agentConfig.get('bind_host', 'localhost')
            statsd_port = self.agentConfig.get('monitorstatsd_port', "8125")
            reporter = "statsd:%s:%s" % (statsd_host, statsd_port)

        log.info("Starting jmxfetch:")
        try:
            path_to_java = path_to_java or "java"
            java_run_opts = java_run_opts or ""
            path_to_jmxfetch = self._get_path_to_jmxfetch()
            path_to_status_file = JMXFiles.get_status_file_path()

            classpath = path_to_jmxfetch
            if tools_jar_path is not None:
                classpath = r"%s:%s" % (tools_jar_path, classpath)
            if custom_jar_paths:
                classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath)

            subprocess_args = [
                path_to_java,
                '-classpath',
                classpath,
                JMXFETCH_MAIN_CLASS,
                '--check_period',
                str(self.check_frequency * 1000),
                '--conf_directory',
                r"%s" % self.confd_path,
                '--log_level',
                JAVA_LOGGING_LEVEL.get(self.logging_config.get("log_level"),
                                       "INFO"),
                '--log_location',
                r"%s" % self.logging_config.get('jmxfetch_log_file'),
                '--reporter',
                reporter,
                '--status_location',
                r"%s" % path_to_status_file,
                command,
            ]

            if Platform.is_windows():
                path_to_exit_file = JMXFiles.get_python_exit_file_path()
                subprocess_args.insert(
                    len(subprocess_args) - 1, '--exit_file_location')
                subprocess_args.insert(
                    len(subprocess_args) - 1, path_to_exit_file)

            subprocess_args.insert(4, '--check')
            for check in jmx_checks:
                subprocess_args.insert(5, check)

            if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_MAX_MEMORY_ALLOCATION
            if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION

            for opt in java_run_opts.split():
                subprocess_args.insert(1, opt)

            log.info("Running %s" % " ".join(subprocess_args))

            with nested(tempfile.TemporaryFile(),
                        tempfile.TemporaryFile()) as (stdout_f, stderr_f):
                jmx_process = subprocess.Popen(
                    subprocess_args,
                    close_fds=not redirect_std_streams,
                    stdout=stdout_f if redirect_std_streams else None,
                    stderr=stderr_f if redirect_std_streams else None)
                self.jmx_process = jmx_process

                self.register_signal_handlers()

                jmx_process.wait()

                if redirect_std_streams:
                    stderr_f.seek(0)
                    err = stderr_f.read()
                    stdout_f.seek(0)
                    out = stdout_f.read()
                    sys.stdout.write(out)
                    sys.stderr.write(err)

            return jmx_process.returncode

        except OSError:
            java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?"
            log.exception(java_path_msg)
            invalid_checks = {}
            for check in jmx_checks:
                check_name = check.split('.')[0]
                check_name = check_name.encode('ascii', 'ignore')
                invalid_checks[check_name] = java_path_msg
            JMXFiles.write_status_file(invalid_checks)
            raise
        except Exception:
            log.exception("Couldn't launch JMXFetch")
            raise
Ejemplo n.º 14
0
 def terminate(self):
     """
     Override `terminate` method to properly exit JMXFetch.
     """
     JMXFiles.write_exit_file()
     self.join()
Ejemplo n.º 15
0
 def terminate(self):
     JMXFiles.write_exit_file()
     self.join()
Ejemplo n.º 16
0
    def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path, custom_jar_paths, redirect_std_streams):
        if reporter is None:
            statsd_host = self.agent_config.get('bind_host', 'localhost')
            if statsd_host == "0.0.0.0":
                # If statsd is bound to all interfaces, just use localhost for clients
                statsd_host = "localhost"
            statsd_port = self.agent_config.get('dogstatsd_port', "8125")
            reporter = "statsd:%s:%s" % (statsd_host, statsd_port)

        log.info("Starting jmxfetch:")
        try:
            path_to_java = path_to_java or "java"
            java_run_opts = java_run_opts or ""
            path_to_jmxfetch = self._get_path_to_jmxfetch()
            path_to_status_file = JMXFiles.get_status_file_path()

            classpath = path_to_jmxfetch
            if tools_jar_path is not None:
                classpath = r"%s:%s" % (tools_jar_path, classpath)
            if custom_jar_paths:
                classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath)
            if self.config_jar_path:
                classpath = r"%s:%s" % (self.config_jar_path, classpath)

            subprocess_args = [
                path_to_java,  # Path to the java bin
                '-classpath',
                classpath,
                JMXFETCH_MAIN_CLASS,
                '--check_period', str(self.check_frequency * 1000),  # Period of the main loop of jmxfetch in ms
                '--conf_directory', r"%s" % self.confd_path,  # Path of the conf.d directory that will be read by jmxfetch,
                '--log_level', JAVA_LOGGING_LEVEL.get(self.logging_config.get("log_level"), "INFO"),  # Log Level: Mapping from Python log level to log4j log levels
                '--log_location', r"%s" % self.logging_config.get('jmxfetch_log_file'),  # Path of the log file
                '--reporter', reporter,  # Reporter to use
                '--status_location', r"%s" % path_to_status_file,  # Path to the status file to write
                command,  # Name of the command
            ]

            if Platform.is_windows():
                # Signal handlers are not supported on Windows:
                # use a file to trigger JMXFetch exit instead
                path_to_exit_file = JMXFiles.get_python_exit_file_path()
                subprocess_args.insert(len(subprocess_args) - 1, '--exit_file_location')
                subprocess_args.insert(len(subprocess_args) - 1, path_to_exit_file)

            if self.service_discovery:
                pipe_path = get_jmx_pipe_path()
                subprocess_args.insert(4, '--tmp_directory')
                subprocess_args.insert(5, pipe_path)
                subprocess_args.insert(4, '--sd_pipe')
                subprocess_args.insert(5, SD_PIPE_NAME)
                subprocess_args.insert(4, '--sd_enabled')

            if jmx_checks:
                subprocess_args.insert(4, '--check')
                for check in jmx_checks:
                    subprocess_args.insert(5, check)

            # Specify a maximum memory allocation pool for the JVM
            if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_SD_MAX_MEMORY_ALLOCATION if self.service_discovery else _JVM_DEFAULT_MAX_MEMORY_ALLOCATION
            # Specify the initial memory allocation pool for the JVM
            if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION

            for opt in java_run_opts.split():
                subprocess_args.insert(1, opt)

            log.info("Running %s" % " ".join(subprocess_args))
            return self.execute(subprocess_args, redirect_std_streams)

        except OSError:
            java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?"
            log.exception(java_path_msg)
            invalid_checks = {}
            for check in jmx_checks:
                check_name = check.split('.')[0]
                check_name = check_name.encode('ascii', 'ignore')
                invalid_checks[check_name] = java_path_msg
            JMXFiles.write_status_file(invalid_checks)
            raise
        except Exception:
            log.info("unable to launch JMXFetch")
            raise
Ejemplo n.º 17
0
def get_jmx_status():
    """This function tries to read the 2 jmxfetch status file which are yaml file
    located in the temp directory.

    There are 2 files:
        - One generated by the Agent itself, for jmx checks that can't be initialized because
        there are missing stuff.
        Its format is as following:

        ###
        invalid_checks:
              jmx: !!python/object/apply:jmxfetch.InvalidJMXConfiguration [You need to have at
                              least one instance defined in the YAML file for this check]
        timestamp: 1391040927.136523
        ###

        - One generated by jmxfetch that return information about the collection of metrics
        its format is as following:

        ###
        timestamp: 1391037347435
        checks:
          failed_checks:
            jmx:
            - {message: Unable to create instance. Please check your yaml file, status: ERROR}
          initialized_checks:
            tomcat:
            - {message: null, status: OK, metric_count: 7, instance_name: jmx-remihakim.fr-3000}
        ###
    """
    check_statuses = []
    java_status_path = JMXFiles.get_status_file_path()
    python_status_path = JMXFiles.get_python_status_file_path()
    if not os.path.exists(java_status_path) and not os.path.exists(python_status_path):
        log.debug("There is no jmx_status file at: %s or at: %s" % (java_status_path, python_status_path))
        return []

    check_data = defaultdict(lambda: defaultdict(list))
    try:
        if os.path.exists(java_status_path):
            java_jmx_stats = yaml.load(file(java_status_path))

            status_age = time.time() - java_jmx_stats.get('timestamp')/1000  # JMX timestamp is saved in milliseconds
            jmx_checks = java_jmx_stats.get('checks', {})

            if status_age > 60:
                check_statuses.append(
                    CheckStatus("jmx", [
                        InstanceStatus(
                            0,
                            STATUS_ERROR,
                            error="JMXfetch didn't return any metrics during the last minute"
                        )
                    ])
                )
            else:
                for check_name, instances in jmx_checks.get('failed_checks', {}).iteritems():
                    for info in instances:
                        message = info.get('message', None)
                        metric_count = info.get('metric_count', 0)
                        service_check_count = info.get('service_check_count', 0)
                        status = info.get('status')
                        instance_name = info.get('instance_name', None)
                        check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count))
                        check_data[check_name]['metric_count'].append(metric_count)
                        check_data[check_name]['service_check_count'].append(service_check_count)

                for check_name, instances in jmx_checks.get('initialized_checks', {}).iteritems():
                    for info in instances:
                        message = info.get('message', None)
                        metric_count = info.get('metric_count', 0)
                        service_check_count = info.get('service_check_count', 0)
                        status = info.get('status')
                        instance_name = info.get('instance_name', None)
                        check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count))
                        check_data[check_name]['metric_count'].append(metric_count)
                        check_data[check_name]['service_check_count'].append(service_check_count)

                for check_name, data in check_data.iteritems():
                    check_status = CheckStatus(check_name, data['statuses'],
                                               metric_count=sum(data['metric_count']),
                                               service_check_count=sum(data['service_check_count']))
                    check_statuses.append(check_status)

        if os.path.exists(python_status_path):
            python_jmx_stats = yaml.load(file(python_status_path))
            jmx_checks = python_jmx_stats.get('invalid_checks', {})
            for check_name, excep in jmx_checks.iteritems():
                check_statuses.append(CheckStatus(check_name, [], init_failed_error=excep))

        return check_statuses

    except Exception:
        log.exception("Couldn't load latest jmx status")
        return []
Ejemplo n.º 18
0
    def _start(self, path_to_java, java_run_opts, jmx_checks, command,
               reporter, tools_jar_path, custom_jar_paths,
               redirect_std_streams):
        statsd_port = self.agentConfig.get('dogstatsd_port', "8125")
        if reporter is None:
            reporter = "statsd:%s" % str(statsd_port)

        log.info("Starting jmxfetch:")
        try:
            path_to_java = path_to_java or "java"
            java_run_opts = java_run_opts or ""
            path_to_jmxfetch = self._get_path_to_jmxfetch()
            path_to_status_file = JMXFiles.get_status_file_path()

            classpath = path_to_jmxfetch
            if tools_jar_path is not None:
                classpath = r"%s:%s" % (tools_jar_path, classpath)
            if custom_jar_paths:
                classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath)

            subprocess_args = [
                path_to_java,  # Path to the java bin
                '-classpath',
                classpath,
                JMXFETCH_MAIN_CLASS,
                '--check_period',
                str(self.check_frequency *
                    1000),  # Period of the main loop of jmxfetch in ms
                '--conf_directory',
                r"%s" % self.
                confd_path,  # Path of the conf.d directory that will be read by jmxfetch,
                '--log_level',
                JAVA_LOGGING_LEVEL.get(
                    self.logging_config.get("log_level"), "INFO"
                ),  # Log Level: Mapping from Python log level to log4j log levels
                '--log_location',
                r"%s" % self.logging_config.get(
                    'jmxfetch_log_file'),  # Path of the log file
                '--reporter',
                reporter,  # Reporter to use
                '--status_location',
                r"%s" %
                path_to_status_file,  # Path to the status file to write
                command,  # Name of the command
            ]

            if Platform.is_windows():
                # Signal handlers are not supported on Windows:
                # use a file to trigger JMXFetch exit instead
                path_to_exit_file = JMXFiles.get_python_exit_file_path()
                subprocess_args.insert(
                    len(subprocess_args) - 1, '--exit_file_location')
                subprocess_args.insert(
                    len(subprocess_args) - 1, path_to_exit_file)

            subprocess_args.insert(4, '--check')
            for check in jmx_checks:
                subprocess_args.insert(5, check)

            # Specify a maximum memory allocation pool for the JVM
            if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_MAX_MEMORY_ALLOCATION
            # Specify the initial memory allocation pool for the JVM
            if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION

            for opt in java_run_opts.split():
                subprocess_args.insert(1, opt)

            log.info("Running %s" % " ".join(subprocess_args))

            # Launch JMXfetch subprocess manually, w/o get_subprocess_output(), since it's a special case
            with nested(tempfile.TemporaryFile('rw'),
                        tempfile.TemporaryFile('rw')) as (stdout_f, stderr_f):
                jmx_process = subprocess.Popen(
                    subprocess_args,
                    close_fds=
                    not redirect_std_streams,  # only set to True when the streams are not redirected, for WIN compatibility
                    stdout=stdout_f if redirect_std_streams else None,
                    stderr=stderr_f if redirect_std_streams else None)
                self.jmx_process = jmx_process

                # Register SIGINT and SIGTERM signal handlers
                self.register_signal_handlers()

                if redirect_std_streams:
                    # Wait for JMXFetch to return, and write out the stdout and stderr of JMXFetch to sys.stdout and sys.stderr
                    out, err = jmx_process.communicate()
                    sys.stdout.write(out)
                    sys.stderr.write(err)
                else:
                    # Wait for JMXFetch to return
                    jmx_process.wait()

            return jmx_process.returncode

        except OSError:
            java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?"
            log.exception(java_path_msg)
            invalid_checks = {}
            for check in jmx_checks:
                check_name = check.split('.')[0]
                check_name = check_name.encode('ascii', 'ignore')
                invalid_checks[check_name] = java_path_msg
            JMXFiles.write_status_file(invalid_checks)
            raise
        except Exception:
            log.exception("Couldn't launch JMXFetch")
            raise
Ejemplo n.º 19
0
def get_jmx_status():
    """This function tries to read the 2 jmxfetch status file which are yaml file
    located in the temp directory.

    There are 2 files:
        - One generated by the Agent itself, for jmx checks that can't be initialized because
        there are missing stuff.
        Its format is as following:

        ###
        invalid_checks:
              jmx: !!python/object/apply:jmxfetch.InvalidJMXConfiguration [You need to have at
                              least one instance defined in the YAML file for this check]
        timestamp: 1391040927.136523
        ###

        - One generated by jmxfetch that return information about the collection of metrics
        its format is as following:

        ###
        timestamp: 1391037347435
        checks:
          failed_checks:
            jmx:
            - {message: Unable to create instance. Please check your yaml file, status: ERROR}
          initialized_checks:
            tomcat:
            - {message: null, status: OK, metric_count: 7, instance_name: jmx-remihakim.fr-3000}
        ###
    """
    check_statuses = []
    java_status_path = JMXFiles.get_status_file_path()
    python_status_path = JMXFiles.get_python_status_file_path()
    if not os.path.exists(java_status_path) and not os.path.exists(python_status_path):
        log.debug("There is no jmx_status file at: %s or at: %s" % (java_status_path, python_status_path))
        return []

    check_data = defaultdict(lambda: defaultdict(list))
    try:
        if os.path.exists(java_status_path):
            java_jmx_stats = yaml.load(file(java_status_path))

            status_age = time.time() - java_jmx_stats.get('timestamp')/1000  # JMX timestamp is saved in milliseconds
            jmx_checks = java_jmx_stats.get('checks', {})

            if status_age > 60:
                check_statuses.append(
                    CheckStatus("jmx", [
                        InstanceStatus(
                            0,
                            STATUS_ERROR,
                            error="JMXfetch didn't return any metrics during the last minute"
                        )
                    ])
                )
            else:
                for check_name, instances in jmx_checks.get('failed_checks', {}).iteritems():
                    for info in instances:
                        message = info.get('message', None)
                        metric_count = info.get('metric_count', 0)
                        service_check_count = info.get('service_check_count', 0)
                        status = info.get('status')
                        instance_name = info.get('instance_name', None)
                        check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count))
                        check_data[check_name]['metric_count'].append(metric_count)
                        check_data[check_name]['service_check_count'].append(service_check_count)

                for check_name, instances in jmx_checks.get('initialized_checks', {}).iteritems():
                    for info in instances:
                        message = info.get('message', None)
                        metric_count = info.get('metric_count', 0)
                        service_check_count = info.get('service_check_count', 0)
                        status = info.get('status')
                        instance_name = info.get('instance_name', None)
                        check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count))
                        check_data[check_name]['metric_count'].append(metric_count)
                        check_data[check_name]['service_check_count'].append(service_check_count)

                for check_name, data in check_data.iteritems():
                    check_status = CheckStatus(check_name, data['statuses'],
                                               metric_count=sum(data['metric_count']),
                                               service_check_count=sum(data['service_check_count']))
                    check_statuses.append(check_status)

        if os.path.exists(python_status_path):
            python_jmx_stats = yaml.load(file(python_status_path))
            jmx_checks = python_jmx_stats.get('invalid_checks', {})
            for check_name, excep in jmx_checks.iteritems():
                check_statuses.append(CheckStatus(check_name, [], init_failed_error=excep))

        return check_statuses

    except Exception:
        log.exception("Couldn't load latest jmx status")
        return []
Ejemplo n.º 20
0
 def run(self):
     if self.is_enabled:
         JMXFiles.clean_exit_file()
         self.jmx_daemon.run()
Ejemplo n.º 21
0
    def _start(self, path_to_java, java_run_opts, jmx_checks, command,
               reporter, tools_jar_path, custom_jar_paths,
               redirect_std_streams):
        if reporter is None:
            statsd_host = self.agent_config.get('bind_host', 'localhost')
            if statsd_host == "0.0.0.0":
                # If statsd is bound to all interfaces, just use localhost for clients
                statsd_host = "localhost"
            statsd_port = self.agent_config.get('dogstatsd_port', "8125")
            reporter = "statsd:%s:%s" % (statsd_host, statsd_port)

        log.info("Starting jmxfetch:")
        try:
            path_to_java = path_to_java or "java"
            java_run_opts = java_run_opts or ""
            path_to_jmxfetch = self._get_path_to_jmxfetch()
            path_to_status_file = JMXFiles.get_status_file_path()

            classpath = path_to_jmxfetch
            if tools_jar_path is not None:
                classpath = r"%s:%s" % (tools_jar_path, classpath)
            if custom_jar_paths:
                classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath)

            subprocess_args = [
                path_to_java,  # Path to the java bin
                '-classpath',
                classpath,
                JMXFETCH_MAIN_CLASS,
                '--check_period',
                str(self.check_frequency *
                    1000),  # Period of the main loop of jmxfetch in ms
                '--conf_directory',
                r"%s" % self.
                confd_path,  # Path of the conf.d directory that will be read by jmxfetch,
                '--log_level',
                JAVA_LOGGING_LEVEL.get(
                    self.logging_config.get("log_level"), "INFO"
                ),  # Log Level: Mapping from Python log level to log4j log levels
                '--log_location',
                r"%s" % self.logging_config.get(
                    'jmxfetch_log_file'),  # Path of the log file
                '--reporter',
                reporter,  # Reporter to use
                '--status_location',
                r"%s" %
                path_to_status_file,  # Path to the status file to write
                command,  # Name of the command
            ]

            if Platform.is_windows():
                # Signal handlers are not supported on Windows:
                # use a file to trigger JMXFetch exit instead
                path_to_exit_file = JMXFiles.get_python_exit_file_path()
                subprocess_args.insert(
                    len(subprocess_args) - 1, '--exit_file_location')
                subprocess_args.insert(
                    len(subprocess_args) - 1, path_to_exit_file)

            if self.service_discovery:
                pipe_path = get_jmx_pipe_path()
                subprocess_args.insert(4, '--tmp_directory')
                subprocess_args.insert(5, pipe_path)
                subprocess_args.insert(4, '--sd_pipe')
                subprocess_args.insert(5, SD_PIPE_NAME)
                subprocess_args.insert(4, '--sd_enabled')

            if jmx_checks:
                subprocess_args.insert(4, '--check')
                for check in jmx_checks:
                    subprocess_args.insert(5, check)

            # Specify a maximum memory allocation pool for the JVM
            if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_SD_MAX_MEMORY_ALLOCATION if self.service_discovery else _JVM_DEFAULT_MAX_MEMORY_ALLOCATION
            # Specify the initial memory allocation pool for the JVM
            if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION

            for opt in java_run_opts.split():
                subprocess_args.insert(1, opt)

            log.info("Running %s" % " ".join(subprocess_args))
            return self.execute(subprocess_args, redirect_std_streams)

        except OSError:
            java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?"
            log.exception(java_path_msg)
            invalid_checks = {}
            for check in jmx_checks:
                check_name = check.split('.')[0]
                check_name = check_name.encode('ascii', 'ignore')
                invalid_checks[check_name] = java_path_msg
            JMXFiles.write_status_file(invalid_checks)
            raise
        except Exception:
            log.info("unable to launch JMXFetch")
            raise
Ejemplo n.º 22
0
    def _populate_payload_metadata(self, payload, check_statuses, start_event=True):
        """
        Periodically populate the payload with metadata related to the system, host, and/or checks.
        """
        now = time.time()

        # Include system stats on first postback
        if start_event and self._is_first_run():
            payload["systemStats"] = self.agentConfig.get("system_stats", {})
            # Also post an event in the newsfeed
            payload["events"]["System"] = [
                {
                    "api_key": self.agentConfig["api_key"],
                    "host": payload["internalHostname"],
                    "timestamp": now,
                    "event_type": "Agent Startup",
                    "msg_text": "Version %s" % get_version(),
                }
            ]

        # Periodically send the host metadata.
        if self._should_send_additional_data("host_metadata"):
            # gather metadata with gohai
            try:
                if not Platform.is_windows():
                    command = "gohai"
                else:
                    command = "gohai\gohai.exe"
                gohai_metadata, gohai_err, _ = get_subprocess_output([command], log)
                payload["gohai"] = gohai_metadata
                if gohai_err:
                    log.warning("GOHAI LOG | {0}".format(gohai_err))
            except OSError as e:
                if e.errno == 2:  # file not found, expected when install from source
                    log.info("gohai file not found")
                else:
                    raise e
            except Exception as e:
                log.warning("gohai command failed with error %s" % str(e))

            payload["systemStats"] = get_system_stats()
            payload["meta"] = self._get_hostname_metadata()

            self.hostname_metadata_cache = payload["meta"]
            # Add static tags from the configuration file
            host_tags = []
            if self.agentConfig["tags"] is not None:
                host_tags.extend([unicode(tag.strip()) for tag in self.agentConfig["tags"].split(",")])

            if self.agentConfig["collect_ec2_tags"]:
                host_tags.extend(EC2.get_tags(self.agentConfig))

            if host_tags:
                payload["host-tags"]["system"] = host_tags

            # If required by the user, let's create the dd_check:xxx host tags
            if self.agentConfig["create_dd_check_tags"]:
                app_tags_list = [DD_CHECK_TAG.format(c.name) for c in self.initialized_checks_d]
                app_tags_list.extend([DD_CHECK_TAG.format(cname) for cname in JMXFiles.get_jmx_appnames()])

                if "system" not in payload["host-tags"]:
                    payload["host-tags"]["system"] = []

                payload["host-tags"]["system"].extend(app_tags_list)

            GCE_tags = GCE.get_tags(self.agentConfig)
            if GCE_tags is not None:
                payload["host-tags"][GCE.SOURCE_TYPE_NAME] = GCE_tags

            # Log the metadata on the first run
            if self._is_first_run():
                log.info("Hostnames: %s, tags: %s" % (repr(self.hostname_metadata_cache), payload["host-tags"]))

        # Periodically send extra hosts metadata (vsphere)
        # Metadata of hosts that are not the host where the agent runs, not all the checks use
        # that
        external_host_tags = []
        if self._should_send_additional_data("external_host_tags"):
            for check in self.initialized_checks_d:
                try:
                    getter = getattr(check, "get_external_host_tags")
                    check_tags = getter()
                    external_host_tags.extend(check_tags)
                except AttributeError:
                    pass

        if external_host_tags:
            payload["external_host_tags"] = external_host_tags

        # Periodically send agent_checks metadata
        if self._should_send_additional_data("agent_checks"):
            # Add agent checks statuses and error/warning messages
            agent_checks = []
            for check in check_statuses:
                if check.instance_statuses is not None:
                    for i, instance_status in enumerate(check.instance_statuses):
                        agent_checks.append(
                            (
                                check.name,
                                check.source_type_name,
                                instance_status.instance_id,
                                instance_status.status,
                                # put error message or list of warning messages in the same field
                                # it will be handled by the UI
                                instance_status.error or instance_status.warnings or "",
                                check.service_metadata[i],
                            )
                        )
                else:
                    agent_checks.append(
                        (
                            check.name,
                            check.source_type_name,
                            "initialization",
                            check.status,
                            repr(check.init_failed_error),
                        )
                    )
            payload["agent_checks"] = agent_checks
            payload["meta"] = self.hostname_metadata_cache  # add hostname metadata
Ejemplo n.º 23
0
    def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path, redirect_std_streams):
        statsd_port = self.agentConfig.get('dogstatsd_port', "8125")
        if reporter is None:
            reporter = "statsd:%s" % str(statsd_port)

        log.info("Starting jmxfetch:")
        try:
            path_to_java = path_to_java or "java"
            java_run_opts = java_run_opts or ""
            path_to_jmxfetch = self._get_path_to_jmxfetch()
            path_to_status_file = JMXFiles.get_status_file_path()

            if tools_jar_path is None:
                classpath = path_to_jmxfetch
            else:
                classpath = r"%s:%s" % (tools_jar_path, path_to_jmxfetch)

            subprocess_args = [
                path_to_java,  # Path to the java bin
                '-classpath',
                classpath,
                JMXFETCH_MAIN_CLASS,
                '--check_period', str(self.check_frequency * 1000),  # Period of the main loop of jmxfetch in ms
                '--conf_directory', r"%s" % self.confd_path,  # Path of the conf.d directory that will be read by jmxfetch,
                '--log_level', JAVA_LOGGING_LEVEL.get(self.logging_config.get("log_level"), "INFO"),  # Log Level: Mapping from Python log level to log4j log levels
                '--log_location', r"%s" % self.logging_config.get('jmxfetch_log_file'),  # Path of the log file
                '--reporter', reporter,  # Reporter to use
                '--status_location', r"%s" % path_to_status_file,  # Path to the status file to write
                command,  # Name of the command
            ]

            if Platform.is_windows():
                # Signal handlers are not supported on Windows:
                # use a file to trigger JMXFetch exit instead
                path_to_exit_file = JMXFiles.get_python_exit_file_path()
                subprocess_args.insert(len(subprocess_args) - 1, '--exit_file_location')
                subprocess_args.insert(len(subprocess_args) - 1, path_to_exit_file)

            subprocess_args.insert(4, '--check')
            for check in jmx_checks:
                subprocess_args.insert(5, check)

            # Specify a maximum memory allocation pool for the JVM
            if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_MAX_MEMORY_ALLOCATION
            # Specify the initial memory allocation pool for the JVM
            if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION

            for opt in java_run_opts.split():
                subprocess_args.insert(1, opt)

            log.info("Running %s" % " ".join(subprocess_args))

            # Launch JMXfetch subprocess
            jmx_process = subprocess.Popen(
                subprocess_args,
                close_fds=not redirect_std_streams,  # set to True instead of False when the streams are redirected for WIN compatibility
                stdout=subprocess.PIPE if redirect_std_streams else None,
                stderr=subprocess.PIPE if redirect_std_streams else None
            )
            self.jmx_process = jmx_process

            # Register SIGINT and SIGTERM signal handlers
            self.register_signal_handlers()

            if redirect_std_streams:
                # Wait for JMXFetch to return, and write out the stdout and stderr of JMXFetch to sys.stdout and sys.stderr
                out, err = jmx_process.communicate()
                sys.stdout.write(out)
                sys.stderr.write(err)
            else:
                # Wait for JMXFetch to return
                jmx_process.wait()

            return jmx_process.returncode

        except OSError:
            java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?"
            log.exception(java_path_msg)
            invalid_checks = {}
            for check in jmx_checks:
                check_name = check.split('.')[0]
                check_name = check_name.encode('ascii', 'ignore')
                invalid_checks[check_name] = java_path_msg
            JMXFiles.write_status_file(invalid_checks)
            raise
        except Exception:
            log.exception("Couldn't launch JMXFetch")
            raise
Ejemplo n.º 24
0
    def _populate_payload_metadata(self, payload, check_statuses, start_event=True):
        now = time.time()

        if start_event and self._is_first_run():
            payload['systemStats'] = self.agentConfig.get('system_stats', {})
            payload['events']['System'] = [{
                'api_key': self.agentConfig['api_key'],
                'host': payload['internalHostname'],
                'timestamp': now,
                'event_type': 'Agent Startup',
                'msg_text': 'Version %s' % get_version()
            }]

        if self._should_send_additional_data('host_metadata'):
            payload['gohai'] = get_gohai_data()
            payload['systemStats'] = get_system_stats()
            payload['meta'] = self._get_hostname_metadata()
            log.info('GOHAI data: {0}'.format(payload['gohai']))

            self.hostname_metadata_cache = payload['meta']
            host_tags = []
            if self.agentConfig['tags'] is not None:
                host_tags.extend([tag.strip()
                                  for tag in self.agentConfig['tags'].split(",")])

            if self.agentConfig['collect_ec2_tags']:
                host_tags.extend(EC2.get_tags(self.agentConfig))

            if host_tags:
                payload['host-tags']['system'] = host_tags

            if self.agentConfig['create_dd_check_tags']:
                app_tags_list = [DD_CHECK_TAG.format(c.name) for c in self.initialized_checks_d]
                app_tags_list.extend([DD_CHECK_TAG.format(cname) for cname
                                      in JMXFiles.get_jmx_appnames()])

                if 'system' not in payload['host-tags']:
                    payload['host-tags']['system'] = []

                payload['host-tags']['system'].extend(app_tags_list)

            GCE_tags = GCE.get_tags(self.agentConfig)
            if GCE_tags is not None:
                payload['host-tags'][GCE.SOURCE_TYPE_NAME] = GCE_tags

            if self._is_first_run():
                log.info("Hostnames: %s, tags: %s" %
                         (repr(self.hostname_metadata_cache), payload['host-tags']))

        external_host_tags = []
        if self._should_send_additional_data('external_host_tags'):
            for check in self.initialized_checks_d:
                try:
                    getter = getattr(check, 'get_external_host_tags')
                    check_tags = getter()
                    external_host_tags.extend(check_tags)
                except AttributeError:
                    pass

        if external_host_tags:
            payload['external_host_tags'] = external_host_tags

        if self._should_send_additional_data('agent_checks'):
            agent_checks = []
            for check in check_statuses:
                if check.instance_statuses is not None:
                    for i, instance_status in enumerate(check.instance_statuses):
                        agent_checks.append(
                            (
                                check.name, check.source_type_name,
                                instance_status.instance_id,
                                instance_status.status,

                                instance_status.error or instance_status.warnings or "",
                                check.service_metadata[i]
                            )
                        )
                else:
                    agent_checks.append(
                        (
                            check.name, check.source_type_name,
                            "initialization",
                            check.status, repr(check.init_failed_error)
                        )
                    )
            payload['agent_checks'] = agent_checks
            payload['meta'] = self.hostname_metadata_cache
Ejemplo n.º 25
0
    def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path, custom_jar_paths, redirect_std_streams):
        if reporter is None:
            statsd_host = self.agentConfig.get('bind_host', 'localhost')
            if statsd_host == "0.0.0.0":
                # If statsd is bound to all interfaces, just use localhost for clients
                statsd_host = "localhost"
            statsd_port = self.agentConfig.get('dogstatsd_port', "8125")
            reporter = "statsd:%s:%s" % (statsd_host, statsd_port)

        log.info("Starting jmxfetch:")
        try:
            path_to_java = path_to_java or "java"
            java_run_opts = java_run_opts or ""
            path_to_jmxfetch = self._get_path_to_jmxfetch()
            path_to_status_file = JMXFiles.get_status_file_path()

            classpath = path_to_jmxfetch
            if tools_jar_path is not None:
                classpath = r"%s:%s" % (tools_jar_path, classpath)
            if custom_jar_paths:
                classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath)

            subprocess_args = [
                path_to_java,  # Path to the java bin
                '-classpath',
                classpath,
                JMXFETCH_MAIN_CLASS,
                '--check_period', str(self.check_frequency * 1000),  # Period of the main loop of jmxfetch in ms
                '--conf_directory', r"%s" % self.confd_path,  # Path of the conf.d directory that will be read by jmxfetch,
                '--log_level', JAVA_LOGGING_LEVEL.get(self.logging_config.get("log_level"), "INFO"),  # Log Level: Mapping from Python log level to log4j log levels
                '--log_location', r"%s" % self.logging_config.get('jmxfetch_log_file'),  # Path of the log file
                '--reporter', reporter,  # Reporter to use
                '--status_location', r"%s" % path_to_status_file,  # Path to the status file to write
                command,  # Name of the command
            ]

            if Platform.is_windows():
                # Signal handlers are not supported on Windows:
                # use a file to trigger JMXFetch exit instead
                path_to_exit_file = JMXFiles.get_python_exit_file_path()
                subprocess_args.insert(len(subprocess_args) - 1, '--exit_file_location')
                subprocess_args.insert(len(subprocess_args) - 1, path_to_exit_file)

            if self.service_discovery:
                pipe_path = get_jmx_pipe_path()
                subprocess_args.insert(4, '--tmp_directory')
                subprocess_args.insert(5, pipe_path)
                subprocess_args.insert(4, '--sd_standby')

            if jmx_checks:
                subprocess_args.insert(4, '--check')
                for check in jmx_checks:
                    subprocess_args.insert(5, check)

            # Specify a maximum memory allocation pool for the JVM
            if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_SD_MAX_MEMORY_ALLOCATION if self.service_discovery else _JVM_DEFAULT_MAX_MEMORY_ALLOCATION
            # Specify the initial memory allocation pool for the JVM
            if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION

            for opt in java_run_opts.split():
                subprocess_args.insert(1, opt)

            log.info("Running %s" % " ".join(subprocess_args))

            # Launch JMXfetch subprocess manually, w/o get_subprocess_output(), since it's a special case
            with nested(tempfile.TemporaryFile(), tempfile.TemporaryFile()) as (stdout_f, stderr_f):
                jmx_process = subprocess.Popen(
                    subprocess_args,
                    close_fds=not redirect_std_streams,  # only set to True when the streams are not redirected, for WIN compatibility
                    stdout=stdout_f if redirect_std_streams else None,
                    stderr=stderr_f if redirect_std_streams else None
                )
                self.jmx_process = jmx_process

                # Register SIGINT and SIGTERM signal handlers
                self.register_signal_handlers()

                # Wait for JMXFetch to return
                jmx_process.wait()

                if redirect_std_streams:
                    # Write out the stdout and stderr of JMXFetch to sys.stdout and sys.stderr
                    stderr_f.seek(0)
                    err = stderr_f.read()
                    stdout_f.seek(0)
                    out = stdout_f.read()
                    sys.stdout.write(out)
                    sys.stderr.write(err)

            return jmx_process.returncode

        except OSError:
            java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?"
            log.exception(java_path_msg)
            invalid_checks = {}
            for check in jmx_checks:
                check_name = check.split('.')[0]
                check_name = check_name.encode('ascii', 'ignore')
                invalid_checks[check_name] = java_path_msg
            JMXFiles.write_status_file(invalid_checks)
            raise
        except Exception:
            log.exception("Couldn't launch JMXFetch")
            raise
Ejemplo n.º 26
0
    def start(self):
        if self.is_enabled():
            JMXFiles.clean_exit_file()

        super(JMXFetchProcess, self).start()
Ejemplo n.º 27
0
    def _populate_payload_metadata(self, payload, check_statuses, start_event=True):
        """
        Periodically populate the payload with metadata related to the system, host, and/or checks.
        """
        now = time.time()

        # Include system stats on first postback
        if start_event and self._is_first_run():
            payload['systemStats'] = self.agentConfig.get('system_stats', {})
            # Also post an event in the newsfeed
            payload['events']['System'] = [{
                'api_key': self.agentConfig['api_key'],
                'host': payload['internalHostname'],
                'timestamp': now,
                'event_type':'Agent Startup',
                'msg_text': 'Version %s' % get_version()
            }]

        # Periodically send the host metadata.
        if self._should_send_additional_data('host_metadata'):
            # gather metadata with gohai
            gohai_metadata = self._run_gohai_metadata()
            if gohai_metadata:
                payload['gohai'] = gohai_metadata

            payload['systemStats'] = get_system_stats(
                proc_path=self.agentConfig.get('procfs_path', '/proc').rstrip('/')
            )
            payload['meta'] = self._get_hostname_metadata()

            self.hostname_metadata_cache = payload['meta']
            # Add static tags from the configuration file
            host_tags = []
            if self.agentConfig['tags'] is not None:
                host_tags.extend([unicode(tag.strip())
                                 for tag in self.agentConfig['tags'].split(",")])

            if self.agentConfig['collect_ec2_tags']:
                host_tags.extend(EC2.get_tags(self.agentConfig))

            if host_tags:
                payload['host-tags']['system'] = host_tags

            # If required by the user, let's create the dd_check:xxx host tags
            if self.agentConfig['create_dd_check_tags']:
                app_tags_list = [DD_CHECK_TAG.format(c.name) for c in self.initialized_checks_d]
                app_tags_list.extend([DD_CHECK_TAG.format(cname) for cname
                                      in JMXFiles.get_jmx_appnames()])

                if 'system' not in payload['host-tags']:
                    payload['host-tags']['system'] = []

                payload['host-tags']['system'].extend(app_tags_list)

            GCE_tags = GCE.get_tags(self.agentConfig)
            if GCE_tags is not None:
                payload['host-tags'][GCE.SOURCE_TYPE_NAME] = GCE_tags

            # Log the metadata on the first run
            if self._is_first_run():
                log.info("Hostnames: %s, tags: %s" %
                         (repr(self.hostname_metadata_cache), payload['host-tags']))

        # Periodically send extra hosts metadata (vsphere)
        # Metadata of hosts that are not the host where the agent runs, not all the checks use
        # that
        external_host_tags = []
        if self._should_send_additional_data('external_host_tags'):
            for check in self.initialized_checks_d:
                try:
                    getter = getattr(check, 'get_external_host_tags')
                    check_tags = getter()
                    external_host_tags.extend(check_tags)
                except AttributeError:
                    pass

        if external_host_tags:
            payload['external_host_tags'] = external_host_tags

        # Periodically send agent_checks metadata
        if self._should_send_additional_data('agent_checks'):
            # Add agent checks statuses and error/warning messages
            agent_checks = []
            for check in check_statuses:
                if check.instance_statuses is not None:
                    for i, instance_status in enumerate(check.instance_statuses):
                        agent_checks.append(
                            (
                                check.name, check.source_type_name,
                                instance_status.instance_id,
                                instance_status.status,
                                # put error message or list of warning messages in the same field
                                # it will be handled by the UI
                                instance_status.error or instance_status.warnings or "",
                                check.service_metadata[i]
                            )
                        )
                else:
                    agent_checks.append(
                        (
                            check.name, check.source_type_name,
                            "initialization",
                            check.status, repr(check.init_failed_error)
                        )
                    )
            payload['agent_checks'] = agent_checks
            payload['meta'] = self.hostname_metadata_cache  # add hostname metadata
Ejemplo n.º 28
0
def get_jmx_status():
    check_statuses = []
    java_status_path = JMXFiles.get_status_file_path()
    python_status_path = JMXFiles.get_python_status_file_path()
    if not os.path.exists(java_status_path) and not os.path.exists(
            python_status_path):
        log.debug("There is no jmx_status file at: %s or at: %s" %
                  (java_status_path, python_status_path))
        return []

    check_data = defaultdict(lambda: defaultdict(list))
    try:
        if os.path.exists(java_status_path):
            java_jmx_stats = yaml.load(file(java_status_path))

            status_age = time.time() - java_jmx_stats.get('timestamp') / 1000
            jmx_checks = java_jmx_stats.get('checks', {})

            if status_age > 60:
                check_statuses.append(
                    CheckStatus("jmx", [
                        InstanceStatus(
                            0,
                            STATUS_ERROR,
                            error=
                            "JMXfetch didn't return any metrics during the last minute"
                        )
                    ]))
            else:
                for check_name, instances in jmx_checks.get(
                        'failed_checks', {}).iteritems():
                    for info in instances:
                        message = info.get('message', None)
                        metric_count = info.get('metric_count', 0)
                        service_check_count = info.get('service_check_count',
                                                       0)
                        status = info.get('status')
                        instance_name = info.get('instance_name', None)
                        check_data[check_name]['statuses'].append(
                            get_jmx_instance_status(instance_name, status,
                                                    message, metric_count))
                        check_data[check_name]['metric_count'].append(
                            metric_count)
                        check_data[check_name]['service_check_count'].append(
                            service_check_count)

                for check_name, instances in jmx_checks.get(
                        'initialized_checks', {}).iteritems():
                    for info in instances:
                        message = info.get('message', None)
                        metric_count = info.get('metric_count', 0)
                        service_check_count = info.get('service_check_count',
                                                       0)
                        status = info.get('status')
                        instance_name = info.get('instance_name', None)
                        check_data[check_name]['statuses'].append(
                            get_jmx_instance_status(instance_name, status,
                                                    message, metric_count))
                        check_data[check_name]['metric_count'].append(
                            metric_count)
                        check_data[check_name]['service_check_count'].append(
                            service_check_count)

                for check_name, data in check_data.iteritems():
                    check_status = CheckStatus(
                        check_name,
                        data['statuses'],
                        metric_count=sum(data['metric_count']),
                        service_check_count=sum(data['service_check_count']))
                    check_statuses.append(check_status)

        if os.path.exists(python_status_path):
            python_jmx_stats = yaml.load(file(python_status_path))
            jmx_checks = python_jmx_stats.get('invalid_checks', {})
            for check_name, excep in jmx_checks.iteritems():
                check_statuses.append(
                    CheckStatus(check_name, [], init_failed_error=excep))

        return check_statuses

    except Exception:
        log.exception("Couldn't load latest jmx status")
        return []