def _add_jmxinfo_tar(self): _, _, should_run_jmx = self._capture_output(self._should_run_jmx) if should_run_jmx: # status files (before listing beans because executing jmxfetch overwrites status files) for file_name, file_path in [ (JMXFiles._STATUS_FILE, JMXFiles.get_status_file_path()), (JMXFiles._PYTHON_STATUS_FILE, JMXFiles.get_python_status_file_path()) ]: if self._can_read(file_path, warn=False): self._add_file_tar(file_path, os.path.join('jmxinfo', file_name)) # beans lists for command in ['list_matching_attributes', 'list_everything']: log.info(" * datadog-agent jmx {0} output".format(command)) self._add_command_output_tar( os.path.join('jmxinfo', '{0}.log'.format(command)), partial(self._jmx_command_call, command)) # java version log.info(" * java -version output") _, _, java_bin_path = self._capture_output( lambda: JMXFetch.get_configuration(get_confd_path())[ 2] or 'java') self._add_command_output_tar( os.path.join('jmxinfo', 'java_version.log'), lambda: self._java_version(java_bin_path), command_desc="{0} -version".format(java_bin_path))
def _add_jmxinfo_tar(self): _, _, should_run_jmx = self._capture_output(self._should_run_jmx) if should_run_jmx: # status files (before listing beans because executing jmxfetch overwrites status files) for file_name, file_path in [ (JMXFiles._STATUS_FILE, JMXFiles.get_status_file_path()), (JMXFiles._PYTHON_STATUS_FILE, JMXFiles.get_python_status_file_path()) ]: if self._can_read(file_path, warn=False): self._add_file_tar( file_path, os.path.join('jmxinfo', file_name) ) # beans lists for command in ['list_matching_attributes', 'list_everything']: log.info(" * datadog-agent jmx {0} output".format(command)) self._add_command_output_tar( os.path.join('jmxinfo', '{0}.log'.format(command)), partial(self._jmx_command_call, command) ) # java version log.info(" * java -version output") _, _, java_bin_path = self._capture_output( lambda: JMXFetch.get_configuration(get_confd_path())[2] or 'java') self._add_command_output_tar( os.path.join('jmxinfo', 'java_version.log'), lambda: self._java_version(java_bin_path), command_desc="{0} -version".format(java_bin_path) )
def get_jmx_status(): """This function tries to read the 2 jmxfetch status file which are yaml file located in the temp directory. There are 2 files: - One generated by the Agent itself, for jmx checks that can't be initialized because there are missing stuff. Its format is as following: ### invalid_checks: jmx: !!python/object/apply:jmxfetch.InvalidJMXConfiguration [You need to have at least one instance defined in the YAML file for this check] timestamp: 1391040927.136523 ### - One generated by jmxfetch that return information about the collection of metrics its format is as following: ### timestamp: 1391037347435 checks: failed_checks: jmx: - {message: Unable to create instance. Please check your yaml file, status: ERROR} initialized_checks: tomcat: - {message: null, status: OK, metric_count: 7, instance_name: jmx-remihakim.fr-3000} ### """ check_statuses = [] java_status_path = JMXFiles.get_status_file_path() python_status_path = JMXFiles.get_python_status_file_path() if not os.path.exists(java_status_path) and not os.path.exists(python_status_path): log.debug("There is no jmx_status file at: %s or at: %s" % (java_status_path, python_status_path)) return [] check_data = defaultdict(lambda: defaultdict(list)) try: if os.path.exists(java_status_path): java_jmx_stats = yaml.load(file(java_status_path)) status_age = time.time() - java_jmx_stats.get('timestamp')/1000 # JMX timestamp is saved in milliseconds jmx_checks = java_jmx_stats.get('checks', {}) if status_age > 60: check_statuses.append( CheckStatus("jmx", [ InstanceStatus( 0, STATUS_ERROR, error="JMXfetch didn't return any metrics during the last minute" ) ]) ) else: for check_name, instances in jmx_checks.get('failed_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append(metric_count) check_data[check_name]['service_check_count'].append(service_check_count) for check_name, instances in jmx_checks.get('initialized_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append(metric_count) check_data[check_name]['service_check_count'].append(service_check_count) for check_name, data in check_data.iteritems(): check_status = CheckStatus(check_name, data['statuses'], metric_count=sum(data['metric_count']), service_check_count=sum(data['service_check_count'])) check_statuses.append(check_status) if os.path.exists(python_status_path): python_jmx_stats = yaml.load(file(python_status_path)) jmx_checks = python_jmx_stats.get('invalid_checks', {}) for check_name, excep in jmx_checks.iteritems(): check_statuses.append(CheckStatus(check_name, [], init_failed_error=excep)) return check_statuses except Exception: log.exception("Couldn't load latest jmx status") return []
def get_jmx_status(): """This function tries to read the 2 jmxfetch status file which are yaml file located in the temp directory. There are 2 files: - One generated by the Agent itself, for jmx checks that can't be initialized because there are missing stuff. Its format is as following: ### invalid_checks: jmx: !!python/object/apply:jmxfetch.InvalidJMXConfiguration [You need to have at least one instance defined in the YAML file for this check] timestamp: 1391040927.136523 ### - One generated by jmxfetch that return information about the collection of metrics its format is as following: ### timestamp: 1391037347435 checks: failed_checks: jmx: - {message: Unable to create instance. Please check your yaml file, status: ERROR} initialized_checks: tomcat: - {message: null, status: OK, metric_count: 7, instance_name: jmx-remihakim.fr-3000} ### """ check_statuses = [] java_status_path = JMXFiles.get_status_file_path() python_status_path = JMXFiles.get_python_status_file_path() if not os.path.exists(java_status_path) and not os.path.exists(python_status_path): log.debug("There is no jmx_status file at: %s or at: %s" % (java_status_path, python_status_path)) return [] check_data = defaultdict(lambda: defaultdict(list)) try: if os.path.exists(java_status_path): java_jmx_stats = yaml.load(file(java_status_path)) status_age = time.time() - java_jmx_stats.get('timestamp')/1000 # JMX timestamp is saved in milliseconds jmx_checks = java_jmx_stats.get('checks', {}) if status_age > 60: check_statuses.append( CheckStatus("jmx", [ InstanceStatus( 0, STATUS_ERROR, error="JMXfetch didn't return any metrics during the last minute" ) ]) ) else: for check_name, instances in jmx_checks.get('failed_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append(metric_count) check_data[check_name]['service_check_count'].append(service_check_count) for check_name, instances in jmx_checks.get('initialized_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append(metric_count) check_data[check_name]['service_check_count'].append(service_check_count) for check_name, data in check_data.iteritems(): check_status = CheckStatus(check_name, data['statuses'], metric_count=sum(data['metric_count']), service_check_count=sum(data['service_check_count'])) check_statuses.append(check_status) if os.path.exists(python_status_path): python_jmx_stats = yaml.load(file(python_status_path)) jmx_checks = python_jmx_stats.get('invalid_checks', {}) for check_name, excep in jmx_checks.iteritems(): check_statuses.append(CheckStatus(check_name, [], init_failed_error=excep)) return check_statuses except Exception: log.exception("Couldn't load latest jmx status") return []
def get_jmx_status(): check_statuses = [] java_status_path = JMXFiles.get_status_file_path() python_status_path = JMXFiles.get_python_status_file_path() if not os.path.exists(java_status_path) and not os.path.exists( python_status_path): log.debug("There is no jmx_status file at: %s or at: %s" % (java_status_path, python_status_path)) return [] check_data = defaultdict(lambda: defaultdict(list)) try: if os.path.exists(java_status_path): java_jmx_stats = yaml.load(file(java_status_path)) status_age = time.time() - java_jmx_stats.get('timestamp') / 1000 jmx_checks = java_jmx_stats.get('checks', {}) if status_age > 60: check_statuses.append( CheckStatus("jmx", [ InstanceStatus( 0, STATUS_ERROR, error= "JMXfetch didn't return any metrics during the last minute" ) ])) else: for check_name, instances in jmx_checks.get( 'failed_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append( get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append( metric_count) check_data[check_name]['service_check_count'].append( service_check_count) for check_name, instances in jmx_checks.get( 'initialized_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append( get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append( metric_count) check_data[check_name]['service_check_count'].append( service_check_count) for check_name, data in check_data.iteritems(): check_status = CheckStatus( check_name, data['statuses'], metric_count=sum(data['metric_count']), service_check_count=sum(data['service_check_count'])) check_statuses.append(check_status) if os.path.exists(python_status_path): python_jmx_stats = yaml.load(file(python_status_path)) jmx_checks = python_jmx_stats.get('invalid_checks', {}) for check_name, excep in jmx_checks.iteritems(): check_statuses.append( CheckStatus(check_name, [], init_failed_error=excep)) return check_statuses except Exception: log.exception("Couldn't load latest jmx status") return []