def _add_conf_tar(self): conf_path = get_config_path() if self._can_read(conf_path, output=False): self._add_clean_conf( conf_path, 'etc', self.MAIN_CREDENTIALS ) if not Platform.is_windows(): supervisor_path = os.path.join( os.path.dirname(get_config_path()), 'supervisor.conf' ) if self._can_read(supervisor_path, output=False): self._add_clean_conf( supervisor_path, 'etc' ) for file_path in glob.glob(os.path.join(get_confd_path(), '*.yaml')) +\ glob.glob(os.path.join(get_confd_path(), '*.yaml.default')): if self._can_read(file_path, output=False): self._add_clean_conf( file_path, os.path.join('etc', 'confd'), self.CHECK_CREDENTIALS )
def _add_conf_tar(self): conf_path = get_config_path() if self._can_read(conf_path): self._add_file_tar( self._strip_comment(conf_path), os.path.join('etc', 'datadog.conf'), original_file_path=conf_path ) if not Platform.is_windows(): supervisor_path = os.path.join( os.path.dirname(get_config_path()), 'supervisor.conf' ) if self._can_read(supervisor_path): self._add_file_tar( self._strip_comment(supervisor_path), os.path.join('etc', 'supervisor.conf'), original_file_path=supervisor_path ) for file_path in glob.glob(os.path.join(get_confd_path(), '*.yaml')) +\ glob.glob(os.path.join(get_confd_path(), '*.yaml.default')): if self._can_read(file_path, output=False): self._add_clean_confd(file_path)
def _add_conf_tar(self): conf_path = get_config_path() log.info(" * {0}".format(conf_path)) self._tar.add(self._strip_comment(conf_path), os.path.join(self._prefix, "etc", "datadog.conf")) if not Platform.is_windows(): supervisor_path = os.path.join(os.path.dirname(get_config_path()), "supervisor.conf") log.info(" * {0}".format(supervisor_path)) self._tar.add(self._strip_comment(supervisor_path), os.path.join(self._prefix, "etc", "supervisor.conf")) for file_path in glob.glob(os.path.join(get_confd_path(), "*.yaml")) + glob.glob( os.path.join(get_confd_path(), "*.yaml.default") ): self._add_clean_confd(file_path)
def jmx_command(args, agent_config, redirect_std_streams=False): """ Run JMXFetch with the given command if it is valid (and print user-friendly info if it's not) """ from jmxfetch import JMX_LIST_COMMANDS, JMXFetch if len(args) < 1 or args[0] not in JMX_LIST_COMMANDS.keys(): print "#" * 80 print "JMX tool to be used to help configuring your JMX checks." print "See http://docs.datadoghq.com/integrations/java/ for more information" print "#" * 80 print "\n" print "You have to specify one of the following commands:" for command, desc in JMX_LIST_COMMANDS.iteritems(): print " - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc) print "Example: sudo /etc/init.d/datadog-agent jmx list_matching_attributes tomcat jmx solr" print "\n" else: jmx_command = args[0] checks_list = args[1:] confd_directory = get_confd_path() jmx_process = JMXFetch(confd_directory, agent_config) jmx_process.configure() should_run = jmx_process.should_run() if should_run: jmx_process.run(jmx_command, checks_list, reporter="console", redirect_std_streams=redirect_std_streams) else: print "Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_directory print "Have you enabled any JMX check ?" print "If you think it's not normal please get in touch with Datadog Support"
def run_check(name, path=None): from tests.common import get_check # Read the config file confd_path = path or os.path.join(get_confd_path(get_os()), '%s.yaml' % name) try: f = open(confd_path) except IOError: raise Exception('Unable to open configuration at %s' % confd_path) config_str = f.read() f.close() # Run the check check, instances = get_check(name, config_str) if not instances: raise Exception('YAML configuration returned no instances.') for instance in instances: check.check(instance) if check.has_events(): print "Events:\n" pprint(check.get_events(), indent=4) print "Metrics:\n" pprint(check.get_metrics(), indent=4)
def run_check(name, path=None): """ Test custom checks on Windows. """ # Read the config file confd_path = path or os.path.join(get_confd_path(get_os()), "%s.yaml" % name) try: f = open(confd_path) except IOError: raise Exception("Unable to open configuration at %s" % confd_path) config_str = f.read() f.close() # Run the check check, instances = get_check(name, config_str) if not instances: raise Exception("YAML configuration returned no instances.") for instance in instances: check.check(instance) if check.has_events(): print "Events:\n" pprint(check.get_events(), indent=4) print "Metrics:\n" pprint(check.get_metrics(), indent=4)
def get_hostname(): """Return the `Name` param from `docker info` to use as the hostname""" from config import get_confd_path, check_yaml, PathNotFound confd_path = "" try: confd_path = get_confd_path() except PathNotFound: log.error("Couldn't find the check configuration folder, not using the docker hostname.") return None conf_path = os.path.join(confd_path, "%s.yaml" % CHECK_NAME) if not os.path.exists(conf_path): default_conf_path = os.path.join(confd_path, "%s.yaml.default" % CHECK_NAME) if not os.path.exists(default_conf_path): log.error("Couldn't find any configuration file for the docker check." " Not using the docker hostname.") return None else: conf_path = default_conf_path check_config = check_yaml(conf_path) init_config, instances = check_config.get("init_config", {}), check_config["instances"] init_config = {} if init_config is None else init_config if len(instances) > 0: set_docker_settings(init_config, instances[0]) return get_client().info().get("Name") return None
def to_dict(self): status_info = AgentStatus.to_dict(self) # Hostnames status_info['hostnames'] = {} metadata_whitelist = [ 'hostname', 'fqdn', 'ipv4', 'instance-id' ] if self.metadata: for key, host in self.metadata.items(): for whitelist_item in metadata_whitelist: if whitelist_item in key: status_info['hostnames'][key] = host break # Checks.d Status status_info['checks'] = {} for cs in self.check_statuses: status_info['checks'][cs.name] = {'instances': {}} for s in cs.instance_statuses: status_info['checks'][cs.name]['instances'][s.instance_id] = { 'status': s.status, 'has_error': s.has_error(), 'has_warnings': s.has_warnings(), } if s.has_error(): status_info['checks'][cs.name]['instances'][s.instance_id]['error'] = s.error if s.has_warnings(): status_info['checks'][cs.name]['instances'][s.instance_id]['warnings'] = s.warnings status_info['checks'][cs.name]['metric_count'] = cs.metric_count status_info['checks'][cs.name]['event_count'] = cs.event_count # Emitter status status_info['emitter'] = [] for es in self.emitter_statuses: check_status = { 'name': es.name, 'status': es.status, 'has_error': es.has_error(), } if es.has_error(): check_status['error'] = es.error status_info['emitter'].append(check_status) osname = config.get_os() try: status_info['confd_path'] = config.get_confd_path(osname) except config.PathNotFound: status_info['confd_path'] = 'Not found' try: status_info['checksd_path'] = config.get_checksd_path(osname) except config.PathNotFound: status_info['checksd_path'] = 'Not found' return status_info
def get_checks(): checks = {} conf_d_directory = get_confd_path() for filename in sorted(os.listdir(conf_d_directory)): module_name, ext = osp.splitext(filename) if Platform.is_windows(): excluded_checks = EXCLUDED_WINDOWS_CHECKS else: excluded_checks = EXCLUDED_MAC_CHECKS if filename.split(".")[0] in excluded_checks: continue if ext not in (".yaml", ".example", ".disabled"): continue agent_check = AgentCheck(filename, ext, conf_d_directory) if ( agent_check.enabled or agent_check.module_name not in checks or (not agent_check.is_example and not checks[agent_check.module_name].enabled) ): checks[agent_check.module_name] = agent_check checks_list = checks.values() checks_list.sort(key=lambda c: c.module_name) return checks_list
def init(config_path=None): agentConfig = get_config(parse_args=False, cfg_path=config_path) try: confd_path = get_confd_path() except PathNotFound, e: log.error("No conf.d folder found at '%s' or in the directory where" "the Agent is currently deployed.\n" % e.args[0])
def _add_jmxinfo_tar(self): _, _, should_run_jmx = self._capture_output(self._should_run_jmx) if should_run_jmx: # status files (before listing beans because executing jmxfetch overwrites status files) for file_name, file_path in [ (JMXFiles._STATUS_FILE, JMXFiles.get_status_file_path()), (JMXFiles._PYTHON_STATUS_FILE, JMXFiles.get_python_status_file_path()) ]: if self._can_read(file_path, warn=False): self._add_file_tar( file_path, os.path.join('jmxinfo', file_name) ) # beans lists for command in ['list_matching_attributes', 'list_everything']: log.info(" * datadog-agent jmx {0} output".format(command)) self._add_command_output_tar( os.path.join('jmxinfo', '{0}.log'.format(command)), partial(self._jmx_command_call, command) ) # java version log.info(" * java -version output") _, _, java_bin_path = self._capture_output( lambda: JMXFetch.get_configuration(get_confd_path())[2] or 'java') self._add_command_output_tar( os.path.join('jmxinfo', 'java_version.log'), lambda: self._java_version(java_bin_path), command_desc="{0} -version".format(java_bin_path) )
def to_dict(self): status_info = AgentStatus.to_dict(self) # Hostnames status_info["hostnames"] = {} metadata_whitelist = ["hostname", "fqdn", "ipv4", "instance-id"] if self.metadata: for key, host in self.metadata.items(): for whitelist_item in metadata_whitelist: if whitelist_item in key: status_info["hostnames"][key] = host break # Checks.d Status status_info["checks"] = {} check_statuses = self.check_statuses + get_jmx_status() for cs in check_statuses: status_info["checks"][cs.name] = {"instances": {}} if cs.init_failed_error: status_info["checks"][cs.name]["init_failed"] = True status_info["checks"][cs.name]["traceback"] = cs.init_failed_traceback else: status_info["checks"][cs.name] = {"instances": {}} status_info["checks"][cs.name]["init_failed"] = False for s in cs.instance_statuses: status_info["checks"][cs.name]["instances"][s.instance_id] = { "status": s.status, "has_error": s.has_error(), "has_warnings": s.has_warnings(), } if s.has_error(): status_info["checks"][cs.name]["instances"][s.instance_id]["error"] = s.error if s.has_warnings(): status_info["checks"][cs.name]["instances"][s.instance_id]["warnings"] = s.warnings status_info["checks"][cs.name]["metric_count"] = cs.metric_count status_info["checks"][cs.name]["event_count"] = cs.event_count # Emitter status status_info["emitter"] = [] for es in self.emitter_statuses: check_status = {"name": es.name, "status": es.status, "has_error": es.has_error()} if es.has_error(): check_status["error"] = es.error status_info["emitter"].append(check_status) osname = config.get_os() try: status_info["confd_path"] = config.get_confd_path(osname) except config.PathNotFound: status_info["confd_path"] = "Not found" try: status_info["checksd_path"] = config.get_checksd_path(osname) except config.PathNotFound: status_info["checksd_path"] = "Not found" return status_info
def init(config_path=None): agentConfig = get_config(parse_args=False, cfg_path=config_path) try: confd_path = get_confd_path() except PathNotFound as e: log.error("No conf.d folder found at '%s' or in the directory where" "the Agent is currently deployed.\n" % e.args[0]) return confd_path, agentConfig
def _add_conf_tar(self): conf_path = get_config_path() if self._can_read(conf_path): self._tar.add(self._strip_comment(conf_path), os.path.join(self._prefix, 'etc', 'datadog.conf')) if not Platform.is_windows(): supervisor_path = os.path.join(os.path.dirname(get_config_path()), 'supervisor.conf') if self._can_read(supervisor_path): self._tar.add( self._strip_comment(supervisor_path), os.path.join(self._prefix, 'etc', 'supervisor.conf')) for file_path in glob.glob(os.path.join(get_confd_path(), '*.yaml')) +\ glob.glob(os.path.join(get_confd_path(), '*.yaml.default')): if self._can_read(file_path, output=False): self._add_clean_confd(file_path)
def set_user_ntp_settings(instance=None): global user_ntp_settings if instance is None: try: ntp_check_config = check_yaml(os.path.join(get_confd_path(), "ntp.yaml")) instance = ntp_check_config["instances"][0] except Exception: instance = {} user_ntp_settings = instance
def to_dict(self): status_info = AgentStatus.to_dict(self) # Hostnames status_info['hostnames'] = {} # Checks.d Status status_info['checks'] = {} check_statuses = self.check_statuses + get_jmx_status() for cs in check_statuses: status_info['checks'][cs.name] = {'instances': {}} if cs.init_failed_error: status_info['checks'][cs.name]['init_failed'] = True status_info['checks'][cs.name]['traceback'] = cs.init_failed_traceback else: status_info['checks'][cs.name] = {'instances': {}} status_info['checks'][cs.name]['init_failed'] = False for s in cs.instance_statuses: status_info['checks'][cs.name]['instances'][s.instance_id] = { 'status': s.status, 'has_error': s.has_error(), 'has_warnings': s.has_warnings(), } if s.has_error(): status_info['checks'][cs.name]['instances'][ s.instance_id]['error'] = s.error if s.has_warnings(): status_info['checks'][cs.name]['instances'][ s.instance_id]['warnings'] = s.warnings status_info['checks'][cs.name]['metric_count'] = cs.metric_count status_info['checks'][cs.name]['event_count'] = cs.event_count # Emitter status status_info['emitter'] = [] for es in self.emitter_statuses: check_status = {'name': es.name, 'status': es.status, 'has_error': es.has_error()} if es.has_error(): check_status['error'] = es.error status_info['emitter'].append(check_status) osname = config.get_os() try: status_info['confd_path'] = config.get_confd_path(osname) except config.PathNotFound: status_info['confd_path'] = 'Not found' try: status_info['checksd_path'] = config.get_checksd_path(osname) except config.PathNotFound: status_info['checksd_path'] = 'Not found' return status_info
def set_user_ntp_settings(instance=None): global user_ntp_settings if instance is None: try: ntp_check_config = check_yaml( os.path.join(get_confd_path(), 'ntp.yaml')) instance = ntp_check_config['instances'][0] except Exception: instance = {} user_ntp_settings = instance
def __init__(self): c = get_config() self.request_interval = c.get("request_interval", 120) self.requests = requests.session() self.uuid = get_uuid() self.apikey = c.get("api_key") self.post_param = {"apikey": self.apikey} self.urls = self.init_url(c) self.run_path = os.path.join(os.path.dirname(get_confd_path()), 'run') self.script_path = get_checksd_path().replace("checks.d", "scripts") self.need_restart = False
def configcheck(): all_valid = True for conf_path in glob.glob(os.path.join(get_confd_path(), "*.yaml")): basename = os.path.basename(conf_path) try: check_yaml(conf_path) except Exception, e: all_valid = False print "%s contains errors:\n %s" % (basename, e) else: print "%s is valid" % basename
def __init__(self, agentConfig, hostname): multiprocessing.Process.__init__(self, name='jmxfetch') self.config = agentConfig self.is_enabled = True self.hostname = hostname osname = get_os() try: confd_path = get_confd_path(osname) except PathNotFound, e: log.error("No conf.d folder found at '%s' or in the directory where" "the Agent is currently deployed.\n" % e.args[0])
def __init__(self, agentConfig, hostname): multiprocessing.Process.__init__(self, name='jmxfetch') self.config = agentConfig self.hostname = hostname try: confd_path = get_confd_path() self.jmx_daemon = JMXFetch(confd_path, agentConfig) self.jmx_daemon.configure() self.is_enabled = self.jmx_daemon.should_run() except PathNotFound: self.is_enabled = False
def __init__(self, agentConfig, hostname): multiprocessing.Process.__init__(self, name='jmxfetch') self.config = agentConfig self.is_enabled = True self.hostname = hostname osname = get_os() try: confd_path = get_confd_path(osname) except PathNotFound, e: log.error( "No conf.d folder found at '%s' or in the directory where" "the Agent is currently deployed.\n" % e.args[0])
def _add_conf_tar(self): conf_path = get_config_path() if self._can_read(conf_path, output=False): self._add_clean_conf(conf_path, 'etc', self.MAIN_CREDENTIALS) if not Platform.is_windows(): supervisor_path = os.path.join(os.path.dirname(get_config_path()), 'supervisor.conf') if self._can_read(supervisor_path, output=False): self._add_clean_conf(supervisor_path, 'etc') for file_path in glob.glob(os.path.join(get_confd_path(), '*.yaml')) +\ glob.glob(os.path.join(get_confd_path(), '*.yaml.default')): if self._can_read(file_path, output=False): self._add_clean_conf(file_path, os.path.join('etc', 'confd'), self.CHECK_CREDENTIALS) for file_path in glob.glob( os.path.join(get_auto_confd_path(), '*.yaml')): if self._can_read(file_path, output=False): self._add_clean_conf(file_path, os.path.join('etc', 'confd', 'auto_conf'), self.CHECK_CREDENTIALS)
def _add_conf_tar(self): conf_path = get_config_path() log.info(" * {0}".format(conf_path)) self._tar.add(self._strip_comment(conf_path), os.path.join(self._prefix, 'etc', 'datadog.conf')) if not Platform.is_windows(): supervisor_path = os.path.join(os.path.dirname(get_config_path()), 'supervisor.conf') log.info(" * {0}".format(supervisor_path)) self._tar.add(self._strip_comment(supervisor_path), os.path.join(self._prefix, 'etc', 'supervisor.conf')) for file_path in glob.glob(os.path.join(get_confd_path(), '*.yaml')): self._add_clean_confd(file_path)
def main(): options, args = get_parsed_args() agent_config = get_config(options=options) try: confd_path = get_confd_path() conf_path = os.path.join(confd_path, '%s.yaml' % "updater") log.info(conf_path) except PathNotFound as e: log.warn("Path conf.d does not exist %s." % e) except IOError: log.info("Not starting updater: no valid configuration found") else: updater_process = UpdaterProcess() updater_process.run()
def get_jmx_checks(confd_path=None, auto_conf=False): jmx_checks = [] if not confd_path: confd_path = get_confd_path() if auto_conf: path = confd_path + '/auto_conf' else: path = confd_path for conf in glob.glob(os.path.join(path, '*.yaml')): filename = os.path.basename(conf) check_name = filename.split('.')[0] if os.path.exists(conf): with open(conf, 'r') as f: try: check_config = yaml.load(f.read(), Loader=yLoader) assert check_config is not None except Exception: log.error("Unable to parse yaml config in %s" % conf) continue init_config = check_config.get('init_config', {}) or {} if init_config.get('is_jmx') or check_name in JMX_CHECKS: # If called by `get_configuration()` we should return the check_config and check_name if auto_conf: jmx_checks.append(check_name) else: jmx_checks.append({ 'check_config': check_config, 'check_name': check_name, 'filename': filename }) if auto_conf: # Calls from SD expect all JMX checks, let's add check names in JMX_CHECKS for check in JMX_CHECKS: if check not in jmx_checks: jmx_checks.append(check) return jmx_checks
def configcheck(): all_valid = True for conf_path in glob.glob(os.path.join(get_confd_path(), "*.yaml")): basename = os.path.basename(conf_path) try: check_yaml(conf_path) except Exception as e: all_valid = False print "%s contains errors:\n %s" % (basename, e) else: print "%s is valid" % basename if all_valid: print "All yaml files passed. You can now run the Datadog agent." return 0 else: print("Fix the invalid yaml files above in order to start the Datadog agent. " "A useful external tool for yaml parsing can be found at " "http://yaml-online-parser.appspot.com/") return 1
def get_conf_path(check_name): """Return the yaml config file path for a given check name or raise an IOError.""" from config import get_confd_path, PathNotFound confd_path = '' try: confd_path = get_confd_path() except PathNotFound: log.error("Couldn't find the check configuration folder, this shouldn't happen.") return None conf_path = os.path.join(confd_path, '%s.yaml' % check_name) if not os.path.exists(conf_path): default_conf_path = os.path.join(confd_path, '%s.yaml.default' % check_name) if not os.path.exists(default_conf_path): raise IOError("Couldn't find any configuration file for the %s check." % check_name) else: conf_path = default_conf_path return conf_path
def configcheck(): all_valid = True for conf_path in glob.glob(os.path.join(get_confd_path(), "*.yaml")): basename = os.path.basename(conf_path) try: check_yaml(conf_path) except Exception as e: all_valid = False print "%s contains errors:\n %s" % (basename, e) else: print "%s is valid" % basename if all_valid: print "All yaml files passed. You can now run the StackState agent." return 0 else: print("Fix the invalid yaml files above in order to start the StackState agent. " "A useful external tool for yaml parsing can be found at " "http://yaml-online-parser.appspot.com/") return 1
def get_checks(): checks = {} conf_d_directory = get_confd_path(get_os()) for filename in sorted(os.listdir(conf_d_directory)): module_name, ext = osp.splitext(filename) if filename.split('.')[0] in EXCLUDED_WINDOWS_CHECKS: continue if ext not in ('.yaml', '.example', '.disabled'): continue agent_check = AgentCheck(filename, ext, conf_d_directory) if (agent_check.enabled or agent_check.module_name not in checks or (not agent_check.is_example and not checks[agent_check.module_name].enabled)): checks[agent_check.module_name] = agent_check checks_list = checks.values() checks_list.sort(key=lambda c: c.module_name) return checks_list
def __init__(self, config=None): try: if config: ntp_config = config else: ntp_config = check_yaml(os.path.join(get_confd_path(), 'ntp.yaml')) settings = ntp_config['instances'][0] except Exception: settings = {} self.host = settings.get('host') or "{0}.datadog.pool.ntp.org".format(random.randint(0, 3)) self.version = int(settings.get("version") or NTPUtil.DEFAULT_VERSION) self.port = settings.get('port') or NTPUtil.DEFAULT_PORT self.timeout = float(settings.get('timeout') or NTPUtil.DEFAULT_TIMEOUT) self.args = { 'host': self.host, 'port': self.port, 'version': self.version, 'timeout': self.timeout, }
def run_check(name, path=None): confd_path = path or os.path.join(get_confd_path(get_os()), '%s.yaml' % name) try: f = open(confd_path) except IOError: raise Exception('Unable to open configuration at %s' % confd_path) config_str = f.read() f.close() check, instances = get_check(name, config_str) if not instances: raise Exception('YAML configuration returned no instances.') for instance in instances: check.check(instance) if check.has_events(): print "Events:\n" pprint(check.get_events(), indent=4) print "Metrics:\n" pprint(check.get_metrics(), indent=4)
def get_jmx_checks(confd_path=None, auto_conf=False): jmx_checks = [] if not confd_path: confd_path = get_confd_path() if auto_conf: path = confd_path + '/auto_conf' else: path = confd_path for conf in glob.glob(os.path.join(path, '*.yaml')): filename = os.path.basename(conf) check_name = filename.split('.')[0] if os.path.exists(conf): with open(conf, 'r') as f: try: check_config = yaml.load(f.read(), Loader=yLoader) assert check_config is not None except Exception: log.error("Unable to parse yaml config in %s" % conf) continue init_config = check_config.get('init_config', {}) or {} if init_config.get('is_jmx') or check_name in JMX_CHECKS: # If called by `get_configuration()` we should return the check_config and check_name if auto_conf: jmx_checks.append(check_name) else: jmx_checks.append({'check_config': check_config, 'check_name': check_name, 'filename': filename}) if auto_conf: # Calls from SD expect all JMX checks, let's add check names in JMX_CHECKS for check in JMX_CHECKS: if check not in jmx_checks: jmx_checks.append(check) return jmx_checks
def main(): options, args = get_parsed_args() agent_config = get_config(options=options) try: confd_path = get_confd_path() conf_path = os.path.join(confd_path, '%s.yaml' % "net_collector") config = check_yaml(conf_path) log.debug("Net scan config: %s" % config) except PathNotFound as e: log.warn( "Not starting net_collector_process: path conf.d does not exist %s." % e) time.sleep(6) return 0 except IOError: log.info( "Not starting net_collector_process: no valid configuration found") time.sleep(6) return 0 else: net_collector_process = NetCollectorProcess(agent_config, config) net_collector_process.run()
def _should_run_jmx(self): jmx_process = JMXFetch(get_confd_path(), self._config) jmx_process.configure(clean_status_file=False) return jmx_process.should_run()
def main(): options, args = get_parsed_args() agentConfig = get_config(options=options) autorestart = agentConfig.get('autorestart', False) COMMANDS = [ 'start', 'stop', 'restart', 'foreground', 'status', 'info', 'check', 'configcheck', 'jmx', ] if len(args) < 1: sys.stderr.write("Usage: %s %s\n" % (sys.argv[0], "|".join(COMMANDS))) return 2 command = args[0] if command not in COMMANDS: sys.stderr.write("Unknown command: %s\n" % command) return 3 pid_file = PidFile('dd-agent') if options.clean: pid_file.clean() agent = Agent(pid_file.get_path(), autorestart) if command in START_COMMANDS: log.info('Agent version %s' % get_version()) if 'start' == command: log.info('Start daemon') agent.start() elif 'stop' == command: log.info('Stop daemon') agent.stop() elif 'restart' == command: log.info('Restart daemon') agent.restart() elif 'status' == command: agent.status() elif 'info' == command: return agent.info(verbose=options.verbose) elif 'foreground' == command: logging.info('Running in foreground') if autorestart: # Set-up the supervisor callbacks and fork it. logging.info('Running Agent with auto-restart ON') def child_func(): agent.run() def parent_func(): agent.start_event = False AgentSupervisor.start(parent_func, child_func) else: # Run in the standard foreground. agent.run(config=agentConfig) elif 'check' == command: check_name = args[1] try: import checks.collector # Try the old-style check first print getattr(checks.collector, check_name)(log).check(agentConfig) except Exception: # If not an old-style check, try checks.d checks = load_check_directory(agentConfig) for check in checks['initialized_checks']: if check.name == check_name: check.run() print check.get_metrics() print check.get_events() if len(args) == 3 and args[2] == 'check_rate': print "Running 2nd iteration to capture rate metrics" time.sleep(1) check.run() print check.get_metrics() print check.get_events() elif 'configcheck' == command or 'configtest' == command: osname = get_os() all_valid = True for conf_path in glob.glob( os.path.join(get_confd_path(osname), "*.yaml")): basename = os.path.basename(conf_path) try: check_yaml(conf_path) except Exception, e: all_valid = False print "%s contains errors:\n %s" % (basename, e) else: print "%s is valid" % basename if all_valid: print "All yaml files passed. You can now run the Datadog agent." return 0 else: print( "Fix the invalid yaml files above in order to start the Datadog agent. " "A useful external tool for yaml parsing can be found at " "http://yaml-online-parser.appspot.com/") return 1
print "#" * 80 print "JMX tool to be used to help configuring your JMX checks." print "See http://docs.datadoghq.com/integrations/java/ for more information" print "#" * 80 print "\n" print "You have to specify one of the following command:" for command, desc in JMX_LIST_COMMANDS.iteritems(): print " - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc) print "Example: sudo /etc/init.d/datadog-agent jmx list_matching_attributes tomcat jmx solr" print "\n" else: jmx_command = args[1] checks_list = args[2:] confd_directory = get_confd_path(get_os()) should_run = JMXFetch.init(confd_directory, agentConfig, get_logging_config(), 15, jmx_command, checks_list, reporter="console") if not should_run: print "Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_directory print "Have you enabled any JMX check ?" print "If you think it's not normal please get in touch with Datadog Support" return 0
# uuid lines = ['UUID', '======', ''] try: uuid = get_uuid() lines.append(' System uuid: ' + str(uuid)) except Exception, e: lines.append(' System uuid: Unknown (%s)' % str(e)) lines.append('') # Paths to checks.d/conf.d lines += ['Paths', '=====', ''] osname = config.get_os() try: confd_path = config.get_confd_path(osname) except config.PathNotFound: confd_path = 'Not found' try: checksd_path = config.get_checksd_path(osname) except config.PathNotFound: checksd_path = 'Not found' lines.append(' conf.d: ' + confd_path) lines.append(' checks.d: ' + checksd_path) lines.append('') # Hostnames lines += ['Hostnames', '=========', '']
def main(): options, args = get_parsed_args() agentConfig = get_config(options=options) autorestart = agentConfig.get('autorestart', False) hostname = get_hostname(agentConfig) COMMANDS = [ 'start', 'stop', 'restart', 'foreground', 'status', 'info', 'check', 'configcheck', 'jmx', ] if len(args) < 1: sys.stderr.write("Usage: %s %s\n" % (sys.argv[0], "|".join(COMMANDS))) return 2 command = args[0] if command not in COMMANDS: sys.stderr.write("Unknown command: %s\n" % command) return 3 pid_file = PidFile('dd-agent') if options.clean: pid_file.clean() agent = Agent(pid_file.get_path(), autorestart) if command in START_COMMANDS: log.info('Agent version %s' % get_version()) if 'start' == command: log.info('Start daemon') agent.start() elif 'stop' == command: log.info('Stop daemon') agent.stop() elif 'restart' == command: log.info('Restart daemon') agent.restart() elif 'status' == command: agent.status() elif 'info' == command: return agent.info(verbose=options.verbose) elif 'foreground' == command: logging.info('Running in foreground') if autorestart: # Set-up the supervisor callbacks and fork it. logging.info('Running Agent with auto-restart ON') def child_func(): agent.run() def parent_func(): agent.start_event = False AgentSupervisor.start(parent_func, child_func) else: # Run in the standard foreground. agent.run(config=agentConfig) elif 'check' == command: check_name = args[1] try: import checks.collector # Try the old-style check first print getattr(checks.collector, check_name)(log).check(agentConfig) except Exception: # If not an old-style check, try checks.d checks = load_check_directory(agentConfig, hostname) for check in checks['initialized_checks']: if check.name == check_name: check.run() print check.get_metrics() print check.get_events() if len(args) == 3 and args[2] == 'check_rate': print "Running 2nd iteration to capture rate metrics" time.sleep(1) check.run() print check.get_metrics() print check.get_events() elif 'configcheck' == command or 'configtest' == command: osname = get_os() all_valid = True for conf_path in glob.glob(os.path.join(get_confd_path(osname), "*.yaml")): basename = os.path.basename(conf_path) try: check_yaml(conf_path) except Exception, e: all_valid = False print "%s contains errors:\n %s" % (basename, e) else: print "%s is valid" % basename if all_valid: print "All yaml files passed. You can now run the Datadog agent." return 0 else: print("Fix the invalid yaml files above in order to start the Datadog agent. " "A useful external tool for yaml parsing can be found at " "http://yaml-online-parser.appspot.com/") return 1
if len(args) < 2 or args[1] not in JMX_LIST_COMMANDS.keys(): print "#" * 80 print "JMX tool to be used to help configuring your JMX checks." print "See http://docs.datadoghq.com/integrations/java/ for more information" print "#" * 80 print "\n" print "You have to specify one of the following commands:" for command, desc in JMX_LIST_COMMANDS.iteritems(): print " - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc) print "Example: sudo /etc/init.d/datadog-agent jmx list_matching_attributes tomcat jmx solr" print "\n" else: jmx_command = args[1] checks_list = args[2:] confd_directory = get_confd_path(get_os()) should_run = JMXFetch.init(confd_directory, agentConfig, get_logging_config(), 15, jmx_command, checks_list, reporter="console") if not should_run: print "Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_directory print "Have you enabled any JMX check ?" print "If you think it's not normal please get in touch with Datadog Support" return 0 if __name__ == '__main__': try: sys.exit(main()) except StandardError: # Try our best to log the error.
def body_lines(self): # Metadata whitelist metadata_whitelist = [ 'hostname', 'fqdn', 'ipv4', 'instance-id' ] lines = [ 'Clocks', '======', '' ] try: ntp_offset, ntp_styles = get_ntp_info() lines.append(' ' + style('NTP offset', *ntp_styles) + ': ' + style('%s s' % round(ntp_offset, 4), *ntp_styles)) except Exception as e: lines.append(' NTP offset: Unknown (%s)' % str(e)) lines.append(' System UTC time: ' + datetime.datetime.utcnow().__str__()) lines.append('') # Paths to checks.d/conf.d lines += [ 'Paths', '=====', '' ] osname = config.get_os() try: confd_path = config.get_confd_path(osname) except config.PathNotFound: confd_path = 'Not found' try: checksd_path = config.get_checksd_path(osname) except config.PathNotFound: checksd_path = 'Not found' lines.append(' conf.d: ' + confd_path) lines.append(' checks.d: ' + checksd_path) lines.append('') # Hostnames lines += [ 'Hostnames', '=========', '' ] if not self.host_metadata: lines.append(" No host information available yet.") else: for key, host in self.host_metadata.iteritems(): for whitelist_item in metadata_whitelist: if whitelist_item in key: lines.append(" " + key + ": " + host) break lines.append('') # Checks.d Status lines += [ 'Checks', '======', '' ] check_statuses = self.check_statuses + get_jmx_status() if not check_statuses: lines.append(" No checks have run yet.") else: for cs in check_statuses: check_lines = [ ' ' + cs.name + ' ({})'.format(cs.check_version), ' ' + '-' * (len(cs.name) + 3 + len(cs.check_version)) ] if cs.init_failed_error: check_lines.append(" - initialize check class [%s]: %s" % (style(STATUS_ERROR, 'red'), repr(cs.init_failed_error))) if self.verbose and cs.init_failed_traceback: check_lines.extend(' ' + line for line in cs.init_failed_traceback.split('\n')) else: for s in cs.instance_statuses: c = 'green' if s.has_warnings(): c = 'yellow' if s.has_error(): c = 'red' line = " - instance #%s [%s]" % ( s.instance_id, style(s.status, c)) if s.has_error(): line += u": %s" % s.error if s.metric_count is not None: line += " collected %s metrics" % s.metric_count if s.instance_check_stats is not None: line += " Last run duration: %s" % s.instance_check_stats.get('run_time') check_lines.append(line) if s.has_warnings(): for warning in s.warnings: warn = warning.split('\n') if not len(warn): continue check_lines.append(u" %s: %s" % (style("Warning", 'yellow'), warn[0])) check_lines.extend(u" %s" % l for l in warn[1:]) if self.verbose and s.traceback is not None: check_lines.extend(' ' + line for line in s.traceback.split('\n')) check_lines += [ " - Collected %s metric%s, %s event%s & %s service check%s" % ( cs.metric_count, plural(cs.metric_count), cs.event_count, plural(cs.event_count), cs.service_check_count, plural(cs.service_check_count)), ] if cs.check_stats is not None: check_lines += [ " - Stats: %s" % pretty_statistics(cs.check_stats) ] if cs.library_versions is not None: check_lines += [ " - Dependencies:"] for library, version in cs.library_versions.iteritems(): check_lines += [ " - %s: %s" % (library, version)] check_lines += [""] lines += check_lines # Metadata status metadata_enabled = 1#_is_affirmative(get_config().get('display_service_metadata', False)) if metadata_enabled: lines += [ "", "Service metadata", "================", "" ] if not check_statuses: lines.append(" No checks have run yet.") else: meta_lines = [] for cs in check_statuses: # Check title check_line = [ ' ' + cs.name, ' ' + '-' * len(cs.name) ] instance_lines = [] for i, meta in enumerate(cs.service_metadata): if not meta: continue instance_lines += [" - instance #%s:" % i] for k, v in meta.iteritems(): instance_lines += [" - %s: %s" % (k, v)] if instance_lines: check_line += instance_lines meta_lines += check_line if meta_lines: lines += meta_lines else: lines.append(" No metadata were collected.") # Emitter status lines += [ "", "Emitters", "========", "" ] if not self.emitter_statuses: lines.append(" No emitters have run yet.") else: for es in self.emitter_statuses: c = 'green' if es.has_error(): c = 'red' line = " - %s [%s]" % (es.name, style(es.status, c)) if es.status != STATUS_OK: line += ": %s" % es.error lines.append(line) return lines
def to_dict(self): status_info = AgentStatus.to_dict(self) # Hostnames status_info["hostnames"] = {} metadata_whitelist = ["hostname", "fqdn", "ipv4", "instance-id"] if self.metadata: for key, host in self.metadata.items(): for whitelist_item in metadata_whitelist: if whitelist_item in key: status_info["hostnames"][key] = host break # Checks.d Status status_info["checks"] = {} check_statuses = self.check_statuses + get_jmx_status() for cs in check_statuses: status_info["checks"][cs.name] = {"instances": {}} if cs.init_failed_error: status_info["checks"][cs.name]["init_failed"] = True status_info["checks"][cs.name]["traceback"] = cs.init_failed_traceback else: status_info["checks"][cs.name] = {"instances": {}} status_info["checks"][cs.name]["init_failed"] = False for s in cs.instance_statuses: status_info["checks"][cs.name]["instances"][s.instance_id] = { "status": s.status, "has_error": s.has_error(), "has_warnings": s.has_warnings(), } if s.has_error(): status_info["checks"][cs.name]["instances"][s.instance_id]["error"] = s.error if s.has_warnings(): status_info["checks"][cs.name]["instances"][s.instance_id]["warnings"] = s.warnings status_info["checks"][cs.name]["metric_count"] = cs.metric_count status_info["checks"][cs.name]["event_count"] = cs.event_count status_info["checks"][cs.name]["service_check_count"] = cs.service_check_count # Emitter status status_info["emitter"] = [] for es in self.emitter_statuses: check_status = {"name": es.name, "status": es.status, "has_error": es.has_error()} if es.has_error(): check_status["error"] = es.error status_info["emitter"].append(check_status) osname = config.get_os() try: status_info["confd_path"] = config.get_confd_path(osname) except config.PathNotFound: status_info["confd_path"] = "Not found" try: status_info["checksd_path"] = config.get_checksd_path(osname) except config.PathNotFound: status_info["checksd_path"] = "Not found" # Clocks try: ntp_offset, ntp_style = get_ntp_info() warn_ntp = len(ntp_style) > 0 status_info["ntp_offset"] = round(ntp_offset, 4) except Exception as e: ntp_offset = "Unknown (%s)" % str(e) warn_ntp = True status_info["ntp_offset"] = ntp_offset status_info["ntp_warning"] = warn_ntp status_info["utc_time"] = datetime.datetime.utcnow().__str__() return status_info
def body_lines(self): # Metadata whitelist metadata_whitelist = ['hostname', 'fqdn', 'ipv4', 'instance-id'] # Paths to checks.d/conf.d lines = ['Paths', '=====', ''] osname = config.get_os() try: confd_path = config.get_confd_path(osname) except config.PathNotFound: confd_path = 'Not found' try: checksd_path = config.get_checksd_path(osname) except config.PathNotFound: checksd_path = 'Not found' lines.append(' conf.d: ' + confd_path) lines.append(' checks.d: ' + checksd_path) lines.append('') # Hostnames lines += ['Hostnames', '=========', ''] if not self.metadata: lines.append(" No host information available yet.") else: for key, host in self.metadata.items(): for whitelist_item in metadata_whitelist: if whitelist_item in key: lines.append(" " + key + ": " + host) break lines.append('') # Checks.d Status lines += ['Checks', '======', ''] check_statuses = self.check_statuses + get_jmx_status() if not check_statuses: lines.append(" No checks have run yet.") else: for cs in check_statuses: check_lines = [' ' + cs.name, ' ' + '-' * len(cs.name)] if cs.init_failed_error: check_lines.append( " - initialize check class [%s]: %s" % (style( STATUS_ERROR, 'red'), repr(cs.init_failed_error))) if self.verbose and cs.init_failed_traceback: check_lines.extend( ' ' + line for line in cs.init_failed_traceback.split('\n')) else: for s in cs.instance_statuses: c = 'green' if s.has_warnings(): c = 'yellow' if s.has_error(): c = 'red' line = " - instance #%s [%s]" % (s.instance_id, style(s.status, c)) if s.has_error(): line += u": %s" % s.error check_lines.append(line) if s.has_warnings(): for warning in s.warnings: warn = warning.split('\n') if not len(warn): continue check_lines.append( u" %s: %s" % (style("Warning", 'yellow'), warn[0])) check_lines.extend(u" %s" % l for l in warn[1:]) if self.verbose and s.traceback is not None: check_lines.extend( ' ' + line for line in s.traceback.split('\n')) check_lines += [ " - Collected %s metrics & %s events" % (cs.metric_count, cs.event_count), ] if cs.library_versions is not None: check_lines += [" - Dependencies:"] for library, version in cs.library_versions.iteritems( ): check_lines += [ " - %s: %s" % (library, version) ] check_lines += [""] lines += check_lines # Emitter status lines += ["", "Emitters", "========", ""] if not self.emitter_statuses: lines.append(" No emitters have run yet.") else: for es in self.emitter_statuses: c = 'green' if es.has_error(): c = 'red' line = " - %s [%s]" % (es.name, style(es.status, c)) if es.status != STATUS_OK: line += ": %s" % es.error lines.append(line) return lines
class CollectorStatus(AgentStatus): NAME = 'Collector' def __init__(self, check_statuses=None, emitter_statuses=None, metadata=None): AgentStatus.__init__(self) self.check_statuses = check_statuses or [] self.emitter_statuses = emitter_statuses or [] self.metadata = metadata or [] @property def status(self): for check_status in self.check_statuses: if check_status.status == STATUS_ERROR: return STATUS_ERROR return STATUS_OK def has_error(self): return self.status != STATUS_OK def body_lines(self): # Metadata whitelist metadata_whitelist = ['hostname', 'fqdn', 'ipv4', 'instance-id'] lines = ['Clocks', '======', ''] try: ntp_offset, ntp_styles = get_ntp_info() lines.append(' ' + style('NTP offset', *ntp_styles) + ': ' + style('%s s' % round(ntp_offset, 4), *ntp_styles)) except Exception, e: lines.append(' NTP offset: Unknown (%s)' % str(e)) lines.append(' System UTC time: ' + datetime.datetime.utcnow().__str__()) lines.append('') # Paths to checks.d/conf.d lines += ['Paths', '=====', ''] osname = config.get_os() try: confd_path = config.get_confd_path(osname) except config.PathNotFound: confd_path = 'Not found' try: checksd_path = config.get_checksd_path(osname) except config.PathNotFound: checksd_path = 'Not found' lines.append(' conf.d: ' + confd_path) lines.append(' checks.d: ' + checksd_path) lines.append('') # Hostnames lines += ['Hostnames', '=========', ''] if not self.metadata: lines.append(" No host information available yet.") else: for key, host in self.metadata.items(): for whitelist_item in metadata_whitelist: if whitelist_item in key: lines.append(" " + key + ": " + host) break lines.append('') # Checks.d Status lines += ['Checks', '======', ''] check_statuses = self.check_statuses + get_jmx_status() if not check_statuses: lines.append(" No checks have run yet.") else: for cs in check_statuses: check_lines = [' ' + cs.name, ' ' + '-' * len(cs.name)] if cs.init_failed_error: check_lines.append( " - initialize check class [%s]: %s" % (style( STATUS_ERROR, 'red'), repr(cs.init_failed_error))) if self.verbose and cs.init_failed_traceback: check_lines.extend( ' ' + line for line in cs.init_failed_traceback.split('\n')) else: for s in cs.instance_statuses: c = 'green' if s.has_warnings(): c = 'yellow' if s.has_error(): c = 'red' line = " - instance #%s [%s]" % (s.instance_id, style(s.status, c)) if s.has_error(): line += u": %s" % s.error if s.metric_count is not None: line += " collected %s metrics" % s.metric_count check_lines.append(line) if s.has_warnings(): for warning in s.warnings: warn = warning.split('\n') if not len(warn): continue check_lines.append( u" %s: %s" % (style("Warning", 'yellow'), warn[0])) check_lines.extend(u" %s" % l for l in warn[1:]) if self.verbose and s.traceback is not None: check_lines.extend( ' ' + line for line in s.traceback.split('\n')) check_lines += [ " - Collected %s metric%s, %s event%s & %s service check%s" % (cs.metric_count, plural( cs.metric_count), cs.event_count, plural(cs.event_count), cs.service_check_count, plural(cs.service_check_count)), ] if cs.library_versions is not None: check_lines += [" - Dependencies:"] for library, version in cs.library_versions.iteritems( ): check_lines += [ " - %s: %s" % (library, version) ] check_lines += [""] lines += check_lines # Emitter status lines += ["", "Emitters", "========", ""] if not self.emitter_statuses: lines.append(" No emitters have run yet.") else: for es in self.emitter_statuses: c = 'green' if es.has_error(): c = 'red' line = " - %s [%s]" % (es.name, style(es.status, c)) if es.status != STATUS_OK: line += ": %s" % es.error lines.append(line) return lines
def body_lines(self): lines = [ 'Clocks', '======', '' ] try: ntp_offset, ntp_styles = get_ntp_info() lines.append(' ' + style('NTP offset', *ntp_styles) + ': ' + style('%s s' % round(ntp_offset, 4), *ntp_styles)) except Exception as e: lines.append(' NTP offset: Unkwown (%s)' % str(e)) lines.append(' System UTC time: ' + datetime.datetime.utcnow().__str__()) lines.append('') # Paths to checks_d/conf.d lines += [ 'Paths', '=====', '' ] osname = config.get_os() try: confd_path = config.get_confd_path(osname) except config.PathNotFound: confd_path = 'Not found' try: checksd_path = config.get_checksd_path(osname) except config.PathNotFound: checksd_path = 'Not found' lines.append(' conf.d: ' + confd_path) lines.append(' checks_d: ' + checksd_path) lines.append('') # Hostnames lines += [ 'Hostnames', '=========', '' ] # Checks.d Status lines += [ 'Checks', '======', '' ] check_statuses = self.check_statuses + get_jmx_status() if not check_statuses: lines.append(" No checks have run yet.") else: for cs in check_statuses: check_lines = [ ' ' + cs.name, ' ' + '-' * len(cs.name) ] if cs.init_failed_error: check_lines.append(" - initialize check class [%s]: %s" % (style(STATUS_ERROR, 'red'), repr(cs.init_failed_error))) if self.verbose and cs.init_failed_traceback: check_lines.extend(' ' + line for line in cs.init_failed_traceback.split('\n')) else: for s in cs.instance_statuses: c = 'green' if s.has_warnings(): c = 'yellow' if s.has_error(): c = 'red' line = " - instance #%s [%s]" % (s.instance_id, style(s.status, c)) if s.has_error(): line += u": %s" % s.error if s.metric_count is not None: line += " collected %s metrics" % s.metric_count check_lines.append(line) if s.has_warnings(): for warning in s.warnings: warn = warning.split('\n') if not len(warn): continue check_lines.append(u" %s: %s" % (style("Warning", 'yellow'), warn[0])) check_lines.extend(u" %s" % l for l in warn[1:]) if self.verbose and s.traceback is not None: check_lines.extend(' ' + line for line in s.traceback.split('\n')) check_lines += [ " - Collected %s metrics & %s events" % ( cs.metric_count, cs.event_count), ] if cs.library_versions is not None: check_lines += [ " - Dependencies:"] for library, version in cs.library_versions.iteritems(): check_lines += [" - %s: %s" % (library, version)] check_lines += [""] lines += check_lines # Emitter status lines += [ "", "Emitters", "========", "" ] if not self.emitter_statuses: lines.append(" No emitters have run yet.") else: for es in self.emitter_statuses: c = 'green' if es.has_error(): c = 'red' line = " - %s [%s]" % (es.name, style(es.status, c)) if es.status != STATUS_OK: line += ": %s" % es.error lines.append(line) return lines
def _is_jmxfetch_enabled(self, config): confd_path = get_confd_path() jmxfetch = JMXFetch(confd_path, config) jmxfetch.configure() return jmxfetch.should_run()
def main(): options, args = get_parsed_args() agentConfig = get_config(options=options) autorestart = agentConfig.get('autorestart', False) hostname = get_hostname(agentConfig) COMMANDS_AGENT = [ 'start', 'stop', 'restart', 'status', 'foreground', ] COMMANDS_NO_AGENT = [ 'info', 'check', 'configcheck', 'jmx', 'flare', ] COMMANDS = COMMANDS_AGENT + COMMANDS_NO_AGENT if len(args) < 1: sys.stderr.write("Usage: %s %s\n" % (sys.argv[0], "|".join(COMMANDS))) return 2 command = args[0] if command not in COMMANDS: sys.stderr.write("Unknown command: %s\n" % command) return 3 # Deprecation notice if command not in DD_AGENT_COMMANDS: # Will become an error message and exit after deprecation period from utils.deprecations import deprecate_old_command_line_tools deprecate_old_command_line_tools() if command in COMMANDS_AGENT: agent = Agent(PidFile('dd-agent').get_path(), autorestart) if command in START_COMMANDS: log.info('Agent version %s' % get_version()) if 'start' == command: log.info('Start daemon') agent.start() elif 'stop' == command: log.info('Stop daemon') agent.stop() elif 'restart' == command: log.info('Restart daemon') agent.restart() elif 'status' == command: agent.status() elif 'info' == command: return Agent.info(verbose=options.verbose) elif 'foreground' == command: logging.info('Running in foreground') if autorestart: # Set-up the supervisor callbacks and fork it. logging.info('Running Agent with auto-restart ON') def child_func(): agent.start(foreground=True) def parent_func(): agent.start_event = False AgentSupervisor.start(parent_func, child_func) else: # Run in the standard foreground. agent.start(foreground=True) elif 'check' == command: if len(args) < 2: sys.stderr.write( "Usage: %s check <check_name> [check_rate]\n" "Add check_rate as last argument to compute rates\n" % sys.argv[0]) return 1 check_name = args[1] try: import checks.collector # Try the old-style check first print getattr(checks.collector, check_name)(log).check(agentConfig) except Exception: # If not an old-style check, try checks.d checks = load_check_directory(agentConfig, hostname) for check in checks['initialized_checks']: if check.name == check_name: check.run() print check.get_metrics() print check.get_events() print check.get_service_checks() if len(args) == 3 and args[2] == 'check_rate': print "Running 2nd iteration to capture rate metrics" time.sleep(1) check.run() print check.get_metrics() print check.get_events() print check.get_service_checks() check.stop() elif 'configcheck' == command or 'configtest' == command: configcheck() elif 'jmx' == command: from jmxfetch import JMX_LIST_COMMANDS, JMXFetch if len(args) < 2 or args[1] not in JMX_LIST_COMMANDS.keys(): print "#" * 80 print "JMX tool to be used to help configuring your JMX checks." print "See http://docs.datadoghq.com/integrations/java/ for more information" print "#" * 80 print "\n" print "You have to specify one of the following commands:" for command, desc in JMX_LIST_COMMANDS.iteritems(): print " - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc) print "Example: sudo /etc/init.d/datadog-agent jmx list_matching_attributes tomcat jmx solr" print "\n" else: jmx_command = args[1] checks_list = args[2:] confd_directory = get_confd_path(get_os()) jmx_process = JMXFetch(confd_directory, agentConfig) jmx_process.configure() should_run = jmx_process.should_run() if should_run: jmx_process.run(jmx_command, checks_list, reporter="console") else: print "Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_directory print "Have you enabled any JMX check ?" print "If you think it's not normal please get in touch with Datadog Support" elif 'flare' == command: Flare.check_user_rights() case_id = int(args[1]) if len(args) > 1 else None f = Flare(True, case_id) f.collect() try: f.upload() except Exception, e: print 'The upload failed:\n{0}'.format(str(e))
def body_lines(self): # Metadata whitelist metadata_whitelist = [ 'hostname', 'fqdn', 'ipv4', 'instance-id' ] lines = [ 'Clocks', '======', '' ] try: ntp_offset, ntp_styles = get_ntp_info() lines.append(' ' + style('NTP offset', *ntp_styles) + ': ' + style('%s s' % round(ntp_offset, 4), *ntp_styles)) except Exception as e: lines.append(' NTP offset: Unknown (%s)' % str(e)) lines.append(' System UTC time: ' + datetime.datetime.utcnow().__str__()) lines.append('') # Paths to checks.d/conf.d lines += [ 'Paths', '=====', '' ] osname = config.get_os() try: confd_path = config.get_confd_path(osname) except config.PathNotFound: confd_path = 'Not found' try: checksd_path = config.get_checksd_path(osname) except config.PathNotFound: checksd_path = 'Not found' lines.append(' conf.d: ' + confd_path) lines.append(' checks.d: ' + checksd_path) lines.append('') # Hostnames lines += [ 'Hostnames', '=========', '' ] if not self.host_metadata: lines.append(" No host information available yet.") else: for key, host in self.host_metadata.iteritems(): for whitelist_item in metadata_whitelist: if whitelist_item in key: lines.append(" " + key + ": " + host) break lines.append('') # Checks.d Status lines += [ 'Checks', '======', '' ] check_statuses = self.check_statuses + get_jmx_status() if not check_statuses: lines.append(" No checks have run yet.") else: for cs in check_statuses: check_lines = [ ' ' + cs.name + ' ({})'.format(cs.check_version), ' ' + '-' * (len(cs.name) + 3 + len(cs.check_version)) ] if cs.init_failed_error: check_lines.append(" - initialize check class [%s]: %s" % (style(STATUS_ERROR, 'red'), repr(cs.init_failed_error))) if self.verbose and cs.init_failed_traceback: check_lines.extend(' ' + line for line in cs.init_failed_traceback.split('\n')) else: for s in cs.instance_statuses: c = 'green' if s.has_warnings(): c = 'yellow' if s.has_error(): c = 'red' line = " - instance #%s [%s]" % ( s.instance_id, style(s.status, c)) if s.has_error(): line += u": %s" % s.error if s.metric_count is not None: line += " collected %s metrics" % s.metric_count if s.instance_check_stats is not None: line += " Last run duration: %s" % s.instance_check_stats.get('run_time') check_lines.append(line) if s.has_warnings(): for warning in s.warnings: warn = warning.split('\n') if not len(warn): continue check_lines.append(u" %s: %s" % (style("Warning", 'yellow'), warn[0])) check_lines.extend(u" %s" % l for l in warn[1:]) if self.verbose and s.traceback is not None: check_lines.extend(' ' + line for line in s.traceback.split('\n')) check_lines += [ " - Collected %s metric%s, %s event%s & %s service check%s" % ( cs.metric_count, plural(cs.metric_count), cs.event_count, plural(cs.event_count), cs.service_check_count, plural(cs.service_check_count)), ] if cs.check_stats is not None: check_lines += [ " - Stats: %s" % pretty_statistics(cs.check_stats) ] if cs.library_versions is not None: check_lines += [ " - Dependencies:"] for library, version in cs.library_versions.iteritems(): check_lines += [ " - %s: %s" % (library, version)] check_lines += [""] lines += check_lines # Metadata status metadata_enabled = _is_affirmative(get_config().get('display_service_metadata', False)) if metadata_enabled: lines += [ "", "Service metadata", "================", "" ] if not check_statuses: lines.append(" No checks have run yet.") else: meta_lines = [] for cs in check_statuses: # Check title check_line = [ ' ' + cs.name, ' ' + '-' * len(cs.name) ] instance_lines = [] for i, meta in enumerate(cs.service_metadata): if not meta: continue instance_lines += [" - instance #%s:" % i] for k, v in meta.iteritems(): instance_lines += [" - %s: %s" % (k, v)] if instance_lines: check_line += instance_lines meta_lines += check_line if meta_lines: lines += meta_lines else: lines.append(" No metadata were collected.") # Emitter status lines += [ "", "Emitters", "========", "" ] if not self.emitter_statuses: lines.append(" No emitters have run yet.") else: for es in self.emitter_statuses: c = 'green' if es.has_error(): c = 'red' line = " - %s [%s]" % (es.name, style(es.status, c)) if es.status != STATUS_OK: line += ": %s" % es.error lines.append(line) return lines
def to_dict(self): status_info = AgentStatus.to_dict(self) # Hostnames status_info['hostnames'] = {} metadata_whitelist = [ 'hostname', 'fqdn', 'ipv4', 'instance-id' ] if self.host_metadata: for key, host in self.host_metadata.iteritems(): for whitelist_item in metadata_whitelist: if whitelist_item in key: status_info['hostnames'][key] = host break # Checks.d Status status_info['checks'] = {} check_statuses = self.check_statuses + get_jmx_status() for cs in check_statuses: status_info['checks'][cs.name] = {'instances': {}} status_info['checks'][cs.name]['check_version'] = cs.check_version if cs.init_failed_error: status_info['checks'][cs.name]['init_failed'] = True status_info['checks'][cs.name]['traceback'] = \ cs.init_failed_traceback or cs.init_failed_error else: status_info['checks'][cs.name]['init_failed'] = False for s in cs.instance_statuses: status_info['checks'][cs.name]['instances'][s.instance_id] = { 'status': s.status, 'has_error': s.has_error(), 'has_warnings': s.has_warnings(), } if s.has_error(): status_info['checks'][cs.name]['instances'][s.instance_id]['error'] = s.error if s.has_warnings(): status_info['checks'][cs.name]['instances'][s.instance_id]['warnings'] = s.warnings status_info['checks'][cs.name]['metric_count'] = cs.metric_count status_info['checks'][cs.name]['event_count'] = cs.event_count status_info['checks'][cs.name]['service_check_count'] = cs.service_check_count # Emitter status status_info['emitter'] = [] for es in self.emitter_statuses: check_status = { 'name': es.name, 'status': es.status, 'has_error': es.has_error(), } if es.has_error(): check_status['error'] = es.error status_info['emitter'].append(check_status) osname = config.get_os() try: status_info['confd_path'] = config.get_confd_path(osname) except config.PathNotFound: status_info['confd_path'] = 'Not found' try: status_info['checksd_path'] = config.get_checksd_path(osname) except config.PathNotFound: status_info['checksd_path'] = 'Not found' # Clocks try: ntp_offset, ntp_style = get_ntp_info() warn_ntp = len(ntp_style) > 0 status_info["ntp_offset"] = round(ntp_offset, 4) except Exception as e: ntp_offset = "Unknown (%s)" % str(e) warn_ntp = True status_info["ntp_offset"] = ntp_offset status_info["ntp_warning"] = warn_ntp status_info["utc_time"] = datetime.datetime.utcnow().__str__() return status_info
def to_dict(self): status_info = AgentStatus.to_dict(self) # Hostnames status_info['hostnames'] = {} metadata_whitelist = ['hostname', 'fqdn', 'ipv4', 'instance-id'] if self.metadata: for key, host in self.metadata.items(): for whitelist_item in metadata_whitelist: if whitelist_item in key: status_info['hostnames'][key] = host break # Checks.d Status status_info['checks'] = {} check_statuses = self.check_statuses + get_jmx_status() for cs in check_statuses: status_info['checks'][cs.name] = {'instances': {}} if cs.init_failed_error: status_info['checks'][cs.name]['init_failed'] = True status_info['checks'][ cs.name]['traceback'] = cs.init_failed_traceback else: status_info['checks'][cs.name] = {'instances': {}} status_info['checks'][cs.name]['init_failed'] = False for s in cs.instance_statuses: status_info['checks'][cs.name]['instances'][ s.instance_id] = { 'status': s.status, 'has_error': s.has_error(), 'has_warnings': s.has_warnings(), } if s.has_error(): status_info['checks'][cs.name]['instances'][ s.instance_id]['error'] = s.error if s.has_warnings(): status_info['checks'][cs.name]['instances'][ s.instance_id]['warnings'] = s.warnings status_info['checks'][ cs.name]['metric_count'] = cs.metric_count status_info['checks'][cs.name]['event_count'] = cs.event_count status_info['checks'][ cs.name]['service_check_count'] = cs.service_check_count # Emitter status status_info['emitter'] = [] for es in self.emitter_statuses: check_status = { 'name': es.name, 'status': es.status, 'has_error': es.has_error(), } if es.has_error(): check_status['error'] = es.error status_info['emitter'].append(check_status) osname = config.get_os() try: status_info['confd_path'] = config.get_confd_path(osname) except config.PathNotFound: status_info['confd_path'] = 'Not found' try: status_info['checksd_path'] = config.get_checksd_path(osname) except config.PathNotFound: status_info['checksd_path'] = 'Not found' # Clocks try: ntp_offset, ntp_style = get_ntp_info() warn_ntp = len(ntp_style) > 0 status_info["ntp_offset"] = round(ntp_offset, 4) except Exception as e: ntp_offset = "Unknown (%s)" % str(e) warn_ntp = True status_info["ntp_offset"] = ntp_offset status_info["ntp_warning"] = warn_ntp status_info["utc_time"] = datetime.datetime.utcnow().__str__() return status_info
def to_dict(self): status_info = AgentStatus.to_dict(self) # Hostnames status_info['hostnames'] = {} metadata_whitelist = [ 'hostname', 'fqdn', 'ipv4', 'instance-id' ] if self.metadata: for key, host in self.metadata.items(): for whitelist_item in metadata_whitelist: if whitelist_item in key: status_info['hostnames'][key] = host break # Checks.d Status status_info['checks'] = {} check_statuses = self.check_statuses + get_jmx_status() for cs in check_statuses: status_info['checks'][cs.name] = {'instances': {}} if cs.init_failed_error: status_info['checks'][cs.name]['init_failed'] = True status_info['checks'][cs.name]['traceback'] = cs.init_failed_traceback else: status_info['checks'][cs.name] = {'instances': {}} status_info['checks'][cs.name]['init_failed'] = False for s in cs.instance_statuses: status_info['checks'][cs.name]['instances'][s.instance_id] = { 'status': s.status, 'has_error': s.has_error(), 'has_warnings': s.has_warnings(), } if s.has_error(): status_info['checks'][cs.name]['instances'][s.instance_id]['error'] = s.error if s.has_warnings(): status_info['checks'][cs.name]['instances'][s.instance_id]['warnings'] = s.warnings status_info['checks'][cs.name]['metric_count'] = cs.metric_count status_info['checks'][cs.name]['event_count'] = cs.event_count # Emitter status status_info['emitter'] = [] for es in self.emitter_statuses: check_status = { 'name': es.name, 'status': es.status, 'has_error': es.has_error(), } if es.has_error(): check_status['error'] = es.error status_info['emitter'].append(check_status) osname = config.get_os() try: status_info['confd_path'] = config.get_confd_path(osname) except config.PathNotFound: status_info['confd_path'] = 'Not found' try: status_info['checksd_path'] = config.get_checksd_path(osname) except config.PathNotFound: status_info['checksd_path'] = 'Not found' return status_info
def body_lines(self): lines = ['Clocks', '======', ''] try: ntp_offset, ntp_styles = get_ntp_info() lines.append(' ' + style('NTP offset', *ntp_styles) + ': ' + style('%s s' % round(ntp_offset, 4), *ntp_styles)) except Exception as e: lines.append(' NTP offset: Unkwown (%s)' % str(e)) lines.append(' System UTC time: ' + datetime.datetime.utcnow().__str__()) lines.append('') # Paths to checks_d/conf.d lines += ['Paths', '=====', ''] osname = config.get_os() try: confd_path = config.get_confd_path(osname) except config.PathNotFound: confd_path = 'Not found' try: checksd_path = config.get_checksd_path(osname) except config.PathNotFound: checksd_path = 'Not found' lines.append(' conf.d: ' + confd_path) lines.append(' checks_d: ' + checksd_path) lines.append('') # Hostnames lines += ['Hostnames', '=========', ''] # Checks.d Status lines += ['Checks', '======', ''] check_statuses = self.check_statuses + get_jmx_status() if not check_statuses: lines.append(" No checks have run yet.") else: for cs in check_statuses: check_lines = [' ' + cs.name, ' ' + '-' * len(cs.name)] if cs.init_failed_error: check_lines.append( " - initialize check class [%s]: %s" % (style( STATUS_ERROR, 'red'), repr(cs.init_failed_error))) if self.verbose and cs.init_failed_traceback: check_lines.extend( ' ' + line for line in cs.init_failed_traceback.split('\n')) else: for s in cs.instance_statuses: c = 'green' if s.has_warnings(): c = 'yellow' if s.has_error(): c = 'red' line = " - instance #%s [%s]" % (s.instance_id, style(s.status, c)) if s.has_error(): line += u": %s" % s.error if s.metric_count is not None: line += " collected %s metrics" % s.metric_count check_lines.append(line) if s.has_warnings(): for warning in s.warnings: warn = warning.split('\n') if not len(warn): continue check_lines.append( u" %s: %s" % (style("Warning", 'yellow'), warn[0])) check_lines.extend(u" %s" % l for l in warn[1:]) if self.verbose and s.traceback is not None: check_lines.extend( ' ' + line for line in s.traceback.split('\n')) check_lines += [ " - Collected %s metrics & %s events" % (cs.metric_count, cs.event_count), ] if cs.library_versions is not None: check_lines += [" - Dependencies:"] for library, version in cs.library_versions.iteritems( ): check_lines += [ " - %s: %s" % (library, version) ] check_lines += [""] lines += check_lines # Emitter status lines += ["", "Emitters", "========", ""] if not self.emitter_statuses: lines.append(" No emitters have run yet.") else: for es in self.emitter_statuses: c = 'green' if es.has_error(): c = 'red' line = " - %s [%s]" % (es.name, style(es.status, c)) if es.status != STATUS_OK: line += ": %s" % es.error lines.append(line) return lines