Ejemplo n.º 1
0
def get_jmx_status():
    """This function tries to read the 2 jmxfetch status file which are yaml file
    located in the temp directory.

    There are 2 files:
        - One generated by the Agent itself, for jmx checks that can't be initialized because
        there are missing stuff.
        Its format is as following:

        ###
        invalid_checks:
              jmx: !!python/object/apply:jmxfetch.InvalidJMXConfiguration [You need to have at
                              least one instance defined in the YAML file for this check]
        timestamp: 1391040927.136523
        ###

        - One generated by jmxfetch that return information about the collection of metrics
        its format is as following:

        ###
        timestamp: 1391037347435
        checks:
          failed_checks:
            jmx:
            - {message: Unable to create instance. Please check your yaml file, status: ERROR}
          initialized_checks:
            tomcat:
            - {message: null, status: OK, metric_count: 7, instance_name: jmx-remihakim.fr-3000}
        ###
    """
    check_statuses = []
    java_status_path = JMXFiles.get_status_file_path()
    python_status_path = JMXFiles.get_python_status_file_path()
    if not os.path.exists(java_status_path) and not os.path.exists(python_status_path):
        log.debug("There is no jmx_status file at: %s or at: %s" % (java_status_path, python_status_path))
        return []

    check_data = defaultdict(lambda: defaultdict(list))
    try:
        if os.path.exists(java_status_path):
            java_jmx_stats = yaml.load(file(java_status_path))

            status_age = time.time() - java_jmx_stats.get('timestamp')/1000 # JMX timestamp is saved in milliseconds
            jmx_checks = java_jmx_stats.get('checks', {})

            if status_age > 60:
                check_statuses.append(
                    CheckStatus("jmx", [
                        InstanceStatus(
                            0,
                            STATUS_ERROR,
                            error="JMXfetch didn't return any metrics during the last minute"
                        )
                    ])
                )
            else:
                for check_name, instances in jmx_checks.get('failed_checks', {}).iteritems():
                    for info in instances:
                        message = info.get('message', None)
                        metric_count = info.get('metric_count', 0)
                        service_check_count = info.get('service_check_count', 0)
                        status = info.get('status')
                        instance_name = info.get('instance_name', None)
                        check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count))
                        check_data[check_name]['metric_count'].append(metric_count)
                        check_data[check_name]['service_check_count'].append(service_check_count)

                for check_name, instances in jmx_checks.get('initialized_checks', {}).iteritems():
                    for info in instances:
                        message = info.get('message', None)
                        metric_count = info.get('metric_count', 0)
                        service_check_count = info.get('service_check_count', 0)
                        status = info.get('status')
                        instance_name = info.get('instance_name', None)
                        check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count))
                        check_data[check_name]['metric_count'].append(metric_count)
                        check_data[check_name]['service_check_count'].append(service_check_count)

                for check_name, data in check_data.iteritems():
                    check_status = CheckStatus(check_name, data['statuses'],
                                               metric_count=sum(data['metric_count']),
                                               service_check_count=sum(data['service_check_count']))
                    check_statuses.append(check_status)

        if os.path.exists(python_status_path):
            python_jmx_stats = yaml.load(file(python_status_path))
            jmx_checks = python_jmx_stats.get('invalid_checks', {})
            for check_name, excep in jmx_checks.iteritems():
                check_statuses.append(CheckStatus(check_name, [], init_failed_error=excep))


        return check_statuses

    except Exception:
        log.exception("Couldn't load latest jmx status")
        return []
Ejemplo n.º 2
0
    def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path):
        statsd_port = self.agentConfig.get('dogstatsd_port', "8125")
        if reporter is None:
            reporter = "statsd:%s" % str(statsd_port)

        log.info("Starting jmxfetch:")
        try:
            path_to_java = path_to_java or "java"
            java_run_opts = java_run_opts or ""
            path_to_jmxfetch = self._get_path_to_jmxfetch()
            path_to_status_file = JMXFiles.get_status_file_path()

            if tools_jar_path is None:
                classpath = path_to_jmxfetch
            else:
                classpath = r"%s:%s" % (tools_jar_path, path_to_jmxfetch)

            subprocess_args = [
                path_to_java,  # Path to the java bin
                '-classpath',
                classpath,
                JMXFETCH_MAIN_CLASS,
                '--check_period', str(self.check_frequency * 1000),  # Period of the main loop of jmxfetch in ms
                '--conf_directory', r"%s" % self.confd_path,  # Path of the conf.d directory that will be read by jmxfetch,
                '--log_level', JAVA_LOGGING_LEVEL.get(self.logging_config.get("log_level"), "INFO"),  # Log Level: Mapping from Python log level to log4j log levels
                '--log_location', r"%s" % self.logging_config.get('jmxfetch_log_file'),  # Path of the log file
                '--reporter', reporter,  # Reporter to use
                '--status_location', r"%s" % path_to_status_file,  # Path to the status file to write
                command,  # Name of the command
            ]

            if Platform.is_windows():
                # Signal handlers are not supported on Windows:
                # use a file to trigger JMXFetch exit instead
                path_to_exit_file = JMXFiles.get_python_exit_file_path()
                subprocess_args.insert(len(subprocess_args) - 1, '--exit_file_location')
                subprocess_args.insert(len(subprocess_args) - 1, path_to_exit_file)

            subprocess_args.insert(4, '--check')
            for check in jmx_checks:
                subprocess_args.insert(5, check)

            # Specify a maximum memory allocation pool for the JVM
            if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_MAX_MEMORY_ALLOCATION
            # Specify the initial memory allocation pool for the JVM
            if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION

            for opt in java_run_opts.split():
                subprocess_args.insert(1, opt)

            log.info("Running %s" % " ".join(subprocess_args))
            jmx_process = subprocess.Popen(subprocess_args, close_fds=True)
            self.jmx_process = jmx_process

            # Register SIGINT and SIGTERM signal handlers
            self.register_signal_handlers()

            # Wait for JMXFetch to return
            jmx_process.wait()

            return jmx_process.returncode

        except OSError:
            java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?"
            log.exception(java_path_msg)
            invalid_checks = {}
            for check in jmx_checks:
                check_name = check.split('.')[0]
                check_name = check_name.encode('ascii', 'ignore')
                invalid_checks[check_name] = java_path_msg
            JMXFiles.write_status_file(invalid_checks)
            raise
        except Exception:
            log.exception("Couldn't launch JMXFetch")
            raise