def __init__(self, name, init_config, instances): AgentCheck.__init__(self, name, init_config, instances) self._clean_state() self.db = None self._version = None self.custom_metrics = None # Deprecate custom_metrics in favor of custom_queries if 'custom_metrics' in self.instance: self.warning( "DEPRECATION NOTICE: Please use the new custom_queries option " "rather than the now deprecated custom_metrics") host = self.instance.get('host', '') port = self.instance.get('port', '') if port != '': port = int(port) dbname = self.instance.get('dbname', 'postgres') self.relations = self.instance.get('relations', []) if self.relations and not dbname: raise ConfigurationError( '"dbname" parameter must be set when using the "relations" parameter.' ) self.key = (host, port, dbname) self.tags = self._build_tags(self.instance.get('tags', []), host, port, dbname)
def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise ConfigurationError('Disk check only supports one configured instance.') AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances) instance = instances[0] self._all_partitions = is_affirmative(instance.get('all_partitions', False)) self._file_system_whitelist = instance.get('file_system_whitelist', []) self._file_system_blacklist = instance.get('file_system_blacklist', []) self._device_whitelist = instance.get('device_whitelist', []) self._device_blacklist = instance.get('device_blacklist', []) self._mount_point_whitelist = instance.get('mount_point_whitelist', []) self._mount_point_blacklist = instance.get('mount_point_blacklist', []) self._tag_by_filesystem = is_affirmative(instance.get('tag_by_filesystem', False)) self._tag_by_label = is_affirmative(instance.get('tag_by_label', True)) self._device_tag_re = instance.get('device_tag_re', {}) self._custom_tags = instance.get('tags', []) self._service_check_rw = is_affirmative(instance.get('service_check_rw', False)) # TODO Remove this v5/v6 fork when agent 5 will be fully deprecated if is_agent_6: self._use_mount = is_affirmative(instance.get('use_mount', False)) else: # FIXME: 6.x, drop use_mount option in datadog.conf self._load_legacy_option(instance, 'use_mount', False, operation=is_affirmative) # FIXME: 6.x, drop device_blacklist_re option in datadog.conf self._load_legacy_option( instance, 'excluded_disk_re', '^$', legacy_name='device_blacklist_re', operation=re.compile ) self._compile_pattern_filters(instance) self._compile_tag_re() self._blkid_label_re = re.compile('LABEL=\"(.*?)\"', re.I) self.devices_label = {}
def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise ConfigurationError( 'Disk check only supports one configured instance.') AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances) instance = instances[0] self._use_mount = is_affirmative(instance.get('use_mount', False)) self._all_partitions = is_affirmative( instance.get('all_partitions', False)) self._file_system_whitelist = instance.get('file_system_whitelist', []) self._file_system_blacklist = instance.get('file_system_blacklist', []) self._device_whitelist = instance.get('device_whitelist', []) self._device_blacklist = instance.get('device_blacklist', []) self._mount_point_whitelist = instance.get('mount_point_whitelist', []) self._mount_point_blacklist = instance.get('mount_point_blacklist', []) self._tag_by_filesystem = is_affirmative( instance.get('tag_by_filesystem', False)) self._tag_by_label = is_affirmative(instance.get('tag_by_label', True)) self._device_tag_re = instance.get('device_tag_re', {}) self._custom_tags = instance.get('tags', []) self._service_check_rw = is_affirmative( instance.get('service_check_rw', False)) self._min_disk_size = instance.get('min_disk_size', 0) * 1024 * 1024 self._compile_pattern_filters(instance) self._compile_tag_re() self._blkid_label_re = re.compile('LABEL=\"(.*?)\"', re.I) self.devices_label = {}
def __init__(self, name, init_config, instances): AgentCheck.__init__(self, name, init_config, instances) self.time_started = time.time() self.batch_morlist_size = max( init_config.get("batch_morlist_size", BATCH_MORLIST_SIZE), 0) self.batch_collector_size = max( init_config.get("batch_property_collector_size", BATCH_COLLECTOR_SIZE), 0) self.refresh_morlist_interval = init_config.get( 'refresh_morlist_interval', REFRESH_MORLIST_INTERVAL) self.clean_morlist_interval = max( init_config.get('clean_morlist_interval', 2 * self.refresh_morlist_interval), self.refresh_morlist_interval) self.refresh_metrics_metadata_interval = init_config.get( 'refresh_metrics_metadata_interval', REFRESH_METRICS_METADATA_INTERVAL) # Connections open to vCenter instances self.server_instances = {} self.server_instances_lock = threading.RLock() # Event configuration self.event_config = {} # Host tags exclusion self.excluded_host_tags = instances[0].get( "excluded_host_tags", init_config.get("excluded_host_tags", [])) # Caching configuration self.cache_config = CacheConfig() # build up configurations for instance in instances: i_key = self._instance_key(instance) # caches self.cache_config.set_interval(CacheConfig.Morlist, i_key, self.refresh_morlist_interval) self.cache_config.set_interval( CacheConfig.Metadata, i_key, self.refresh_metrics_metadata_interval) # events self.event_config[i_key] = instance.get('event_config') # Queue of raw Mor objects to process self.mor_objects_queue = ObjectsQueue() # Cache of processed Mor objects self.mor_cache = MorCache(self.log) # managed entity raw view self.registry = {} # Metrics metadata, for each instance keeps the mapping: perfCounterKey -> {name, group, description} self.metadata_cache = MetadataCache() self.latest_event_query = {} self.exception_printed = 0
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.last_run = datetime.now() self.config = None if instances: self.config = Config(instances[0])
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Host status needs to persist across all checks. # We'll create keys when they are referenced. See: # https://en.wikipedia.org/wiki/Autovivification # https://gist.github.com/hrldcpr/2012250 self.host_status = defaultdict(lambda: defaultdict(lambda: None))
def __init__(self, *args, **kwargs): AgentCheck.__init__(self, *args, **kwargs) self.instance_cache = {} # preserve backwards compatible default timeouts if self.instance and self.instance.get('timeout') is None: if self.init_config.get('timeout') is None: self.instance['timeout'] = 2
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances) self._zk_timeout = int(init_config.get('zk_timeout', DEFAULT_ZK_TIMEOUT)) self._kafka_timeout = int(init_config.get('kafka_timeout', DEFAULT_KAFKA_TIMEOUT)) self.context_limit = int(init_config.get('max_partition_contexts', CONTEXT_UPPER_BOUND)) self._broker_retries = int(init_config.get('kafka_retries', DEFAULT_KAFKA_RETRIES)) self._zk_last_ts = {} self.kafka_clients = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Cache connections self.connections = {} self.failed_connections = {} self.instances_metrics = {} self.instances_per_type_metrics = defaultdict(dict) self.existing_databases = None self.do_check = {} self.proc_type_mapping = {'gauge': self.gauge, 'rate': self.rate, 'histogram': self.histogram} self.adoprovider = self.default_adoprovider self.connector = init_config.get('connector', 'adodbapi') if self.connector.lower() not in self.valid_connectors: self.log.error("Invalid database connector %s, defaulting to adodbapi", self.connector) self.connector = 'adodbapi' self.adoprovider = init_config.get('adoprovider', self.default_adoprovider) if self.adoprovider.upper() not in self.valid_adoproviders: self.log.error( "Invalid ADODB provider string %s, defaulting to %s", self.adoprovider, self.default_adoprovider ) self.adoprovider = self.default_adoprovider # Pre-process the list of metrics to collect self.custom_metrics = init_config.get('custom_metrics', []) for instance in instances: try: instance_key = self._conn_key(instance, self.DEFAULT_DB_KEY) self.do_check[instance_key] = True # check to see if the database exists before we try any connections to it with self.open_managed_db_connections(instance, None, db_name=self.DEFAULT_DATABASE): db_exists, context = self._check_db_exists(instance) if db_exists: if instance.get('stored_procedure') is None: with self.open_managed_db_connections(instance, self.DEFAULT_DB_KEY): self._make_metric_list_to_collect(instance, self.custom_metrics) else: # How much do we care that the DB doesn't exist? ignore = is_affirmative(instance.get("ignore_missing_database", False)) if ignore is not None and ignore: # not much : we expect it. leave checks disabled self.do_check[instance_key] = False self.log.warning("Database %s does not exist. Disabling checks for this instance.", context) else: # yes we do. Keep trying self.log.error("Database %s does not exist. Fix issue and restart agent", context) except SQLConnectionError: self.log.exception("Skipping SQL Server instance") continue except Exception as e: self.log.exception("Initialization exception %s", e) continue
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.metric_type_mapping = { 'AverageStatistic': self.gauge, 'BoundedRangeStatistic': self.gauge, 'CountStatistic': self.monotonic_count, 'DoubleStatistic': self.rate, 'RangeStatistic': self.gauge, 'TimeStatistic': self.gauge, }
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Members' last replica set states self._last_state_by_server = {} # List of metrics to collect per instance self.metrics_to_collect_by_instance = {} self.collection_metrics_names = [] for key in self.COLLECTION_METRICS: self.collection_metrics_names.append(key.split('.')[1])
def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise Exception( 'Disk check only supports one configured instance.') AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances) # Get the configuration once for all self._load_conf(instances[0]) self._compile_tag_re()
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) for k in ["mean", "median", "95", "99", "100"]: for m in self.stat_keys: self.keys.append(m + "_" + k) for k in ["min", "max", "mean", "median", "95", "99", "999"]: for m in self.search_latency_keys: self.keys.append(m + "_" + k) for k in ["min", "max", "mean", "median", "total"]: for m in self.vnodeq_keys: self.keys.append(m + "_" + k) self.prev_coord_redirs_total = -1
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # if they set the path, use that if init_config.get('nfsiostat_path'): self.nfs_cmd = init_config['nfsiostat_path'].split() + ['1', '2'] else: # if not, check if it's installed in the opt dir, if so use that if os.path.exists('/opt/datadog-agent/embedded/sbin/nfsiostat'): self.nfs_cmd = ['/opt/datadog-agent/embedded/sbin/nfsiostat', '1', '2'] # if not, then check if it is in the default place elif os.path.exists('/usr/local/sbin/nfsiostat'): self.nfs_cmd = ['/usr/local/sbin/nfsiostat', '1', '2'] else: raise Exception( 'nfsstat check requires nfsiostat be installed, please install it ' '(through nfs-utils) or set the path to the installed version' )
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.dbs = {} self.versions = {} self.instance_metrics = {} self.bgw_metrics = {} self.archiver_metrics = {} self.db_bgw_metrics = [] self.db_archiver_metrics = [] self.replication_metrics = {} self.activity_metrics = {} self.custom_metrics = {} # Deprecate custom_metrics in favor of custom_queries if instances is not None and any('custom_metrics' in instance for instance in instances): self.warning( "DEPRECATION NOTICE: Please use the new custom_queries option " "rather than the now deprecated custom_metrics" )
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # ad stands for access denied # We cache the PIDs getting this error and don't iterate on them more often than `access_denied_cache_duration`` # This cache is for all PIDs so it's global, but it should be refreshed by instance self.last_ad_cache_ts = {} self.ad_cache = set() self.access_denied_cache_duration = int( init_config.get('access_denied_cache_duration', DEFAULT_AD_CACHE_DURATION)) # By default cache the PID list for a while # Sometimes it's not wanted b/c it can mess with no-data monitoring # This cache is indexed per instance self.last_pid_cache_ts = {} self.pid_cache = {} self.pid_cache_duration = int( init_config.get('pid_cache_duration', DEFAULT_PID_CACHE_DURATION)) self._conflicting_procfs = False self._deprecated_init_procfs = False if Platform.is_linux(): procfs_path = init_config.get('procfs_path') if procfs_path: if 'procfs_path' in agentConfig and procfs_path != agentConfig.get( 'procfs_path').rstrip('/'): self._conflicting_procfs = True else: self._deprecated_init_procfs = True psutil.PROCFS_PATH = procfs_path # Process cache, indexed by instance self.process_cache = defaultdict(dict) self.process_list_cache.cache_duration = int( init_config.get('shared_process_list_cache_duration', DEFAULT_SHARED_PROCESS_LIST_CACHE_DURATION))
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.counts = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self._masters = defaultdict(lambda: "")
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.connections = {} self.last_timestamp_seen = defaultdict(int)
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Host status needs to persist across all checks self.cluster_status = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.already_alerted = [] self.cached_vhosts = {} # this is used to send CRITICAL rabbitmq.aliveness check if the server goes down
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.nagios_tails = {} check_freq = init_config.get("check_freq", 15) if instances is not None: for instance in instances: tailers = [] nagios_conf = {} instance_key = None custom_tag = instance.get('tags', []) if 'nagios_conf' in instance: # conf.d check conf_path = instance['nagios_conf'] nagios_conf = self.parse_nagios_config(conf_path) instance_key = conf_path # Retrocompatibility Code elif 'nagios_perf_cfg' in instance: conf_path = instance['nagios_perf_cfg'] nagios_conf = self.parse_nagios_config(conf_path) instance["collect_host_performance_data"] = True instance["collect_service_performance_data"] = True instance_key = conf_path if 'nagios_log' in instance: nagios_conf["log_file"] = instance['nagios_log'] if instance_key is None: instance_key = instance['nagios_log'] # End of retrocompatibility code if not nagios_conf: self.log.warning("Missing path to nagios_conf") continue if 'log_file' in nagios_conf and instance.get( 'collect_events', True): self.log.debug("Starting to tail the event log") tailers.append( NagiosEventLogTailer( log_path=nagios_conf['log_file'], file_template=None, logger=self.log, hostname=self.hostname, tags=custom_tag, event_func=self.event, gauge_func=self.gauge, freq=check_freq, passive_checks=instance.get( 'passive_checks_events', False), )) if ('host_perfdata_file' in nagios_conf and 'host_perfdata_file_template' in nagios_conf and instance.get('collect_host_performance_data', False)): self.log.debug("Starting to tail the host_perfdata file") tailers.append( NagiosHostPerfDataTailer( log_path=nagios_conf['host_perfdata_file'], file_template=nagios_conf[ 'host_perfdata_file_template'], logger=self.log, hostname=self.hostname, event_func=self.event, gauge_func=self.gauge, freq=check_freq, tags=custom_tag, )) if ('service_perfdata_file' in nagios_conf and 'service_perfdata_file_template' in nagios_conf and instance.get('collect_service_performance_data', False)): self.log.debug( "Starting to tail the service_perfdata file") tailers.append( NagiosServicePerfDataTailer( log_path=nagios_conf['service_perfdata_file'], file_template=nagios_conf[ 'service_perfdata_file_template'], logger=self.log, hostname=self.hostname, event_func=self.event, gauge_func=self.gauge, freq=check_freq, tags=custom_tag, )) self.nagios_tails[instance_key] = tailers
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.nodetool_cmd = init_config.get("nodetool", "/usr/bin/nodetool")
def __init__(self, name, init_config, instances): AgentCheck.__init__(self, name, init_config, instances) self.nagios_tails = {} instance = self.instances[0] tailers = [] nagios_conf = {} instance_key = None custom_tag = instance.get('tags', []) if 'nagios_conf' in instance: # conf.d check conf_path = instance['nagios_conf'] nagios_conf = self.parse_nagios_config(conf_path) instance_key = conf_path # Retrocompatibility Code elif 'nagios_perf_cfg' in instance: conf_path = instance['nagios_perf_cfg'] nagios_conf = self.parse_nagios_config(conf_path) instance["collect_host_performance_data"] = True instance["collect_service_performance_data"] = True instance_key = conf_path if 'nagios_log' in instance: nagios_conf["log_file"] = instance['nagios_log'] if instance_key is None: instance_key = instance['nagios_log'] # End of retrocompatibility code if not nagios_conf: self.log.warning("Missing path to nagios_conf") return if 'log_file' in nagios_conf and instance.get('collect_events', True): self.log.debug("Starting to tail the event log") tailers.append( NagiosEventLogTailer( log_path=nagios_conf['log_file'], logger=self.log, hostname=self.hostname, event_func=self.event, tags=custom_tag, passive_checks=instance.get('passive_checks_events', False), ) ) if ( 'host_perfdata_file' in nagios_conf and 'host_perfdata_file_template' in nagios_conf and instance.get('collect_host_performance_data', False) ): self.log.debug("Starting to tail the host_perfdata file") tailers.append( NagiosPerfDataTailer( log_path=nagios_conf['host_perfdata_file'], file_template=nagios_conf['host_perfdata_file_template'], logger=self.log, hostname=self.hostname, gauge_func=self.gauge, tags=custom_tag, perfdata_field='HOSTPERFDATA', metric_prefix=_get_host_metric_prefix, ) ) if ( 'service_perfdata_file' in nagios_conf and 'service_perfdata_file_template' in nagios_conf and instance.get('collect_service_performance_data', False) ): self.log.debug("Starting to tail the service_perfdata file") tailers.append( NagiosPerfDataTailer( log_path=nagios_conf['service_perfdata_file'], file_template=nagios_conf['service_perfdata_file_template'], logger=self.log, hostname=self.hostname, gauge_func=self.gauge, tags=custom_tag, perfdata_field='SERVICEPERFDATA', metric_prefix=_get_service_metric_prefix, ) ) self.nagios_tails[instance_key] = tailers
def __init__(self, name, init_config, instances): AgentCheck.__init__(self, name, init_config, instances) self.gunicorn_cmd = self.instance.get( 'gunicorn', init_config.get('gunicorn', 'gunicorn'))
def __init__(self, *args, **kwargs): AgentCheck.__init__(self, *args, **kwargs) self.instance_cache = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self._instance_states = defaultdict(lambda: ConsulCheckInstanceState())
def __init__(self, *args, **kwargs): AgentCheck.__init__(self, *args, **kwargs) self._previous_offset = {} self.instance_cache = {}
def __init__(self, name, init_config, instances=None): AgentCheck.__init__(self, name, init_config, instances) self._last_state_by_server = {} self.idx_rates = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.assumed_url = {}