def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # ad stands for access denied # We cache the PIDs getting this error and don't iterate on them more often than `access_denied_cache_duration`` # This cache is for all PIDs so it's global, but it should be refreshed by instance self.last_ad_cache_ts = {} self.ad_cache = set() self.access_denied_cache_duration = int( init_config.get('access_denied_cache_duration', DEFAULT_AD_CACHE_DURATION)) # By default cache the PID list for a while # Sometimes it's not wanted b/c it can mess with no-data monitoring # This cache is indexed per instance self.last_pid_cache_ts = {} self.pid_cache = {} self.pid_cache_duration = int( init_config.get('pid_cache_duration', DEFAULT_PID_CACHE_DURATION)) self._conflicting_procfs = False self._deprecated_init_procfs = False if Platform.is_linux(): procfs_path = init_config.get('procfs_path') if procfs_path: if 'procfs_path' in agentConfig and procfs_path != agentConfig.get( 'procfs_path').rstrip('/'): self._conflicting_procfs = True else: self._deprecated_init_procfs = True psutil.PROCFS_PATH = procfs_path # Process cache, indexed by instance self.process_cache = defaultdict(dict)
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self._countersettypes = {} self._counters = {} self._metrics = {} self._tags = {} try: for instance in instances: key = hash_mutable(instance) counterset = instance.get('countersetname') cfg_tags = instance.get('tags') if cfg_tags is not None: tags = cfg_tags.join(",") self._tags[key] = list(tags) if tags else [] metrics = instance.get('metrics') # list of the metrics. Each entry is itself an entry, # which is the pdh name, datadog metric name, type, and the # pdh counter object self._metrics[key] = [] for inst_name, dd_name, mtype in metrics: m = getattr(self, mtype.lower()) obj = WinPDHCounter(counterset, inst_name, self.log) if not obj: continue entry = [inst_name, dd_name, m, obj] self.log.debug("entry: %s" % str(entry)) self._metrics[key].append(entry) except Exception as e: self.log.debug("Exception in PDH init: %s", str(e)) raise
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self._ssl_verify = is_affirmative(init_config.get("ssl_verify", True)) self.keystone_server_url = init_config.get("keystone_server_url") self.hypervisor_name_cache = {} self.paginated_server_limit = init_config.get( 'paginated_server_limit') or DEFAULT_PAGINATED_SERVER_LIMIT self.request_timeout = init_config.get( 'request_timeout') or DEFAULT_API_REQUEST_TIMEOUT if not self.keystone_server_url: raise IncompleteConfig() # Current authentication scopes self._parent_scope = None self._current_scope = None # Cache some things between runs for values that change rarely self._aggregate_list = None # Mapping of check instances to associated OpenStack project scopes self.instance_map = {} # Mapping of Nova-managed servers to tags self.external_host_tags = {} self.exclude_network_id_rules = set([ re.compile(ex) for ex in init_config.get('exclude_network_ids', []) ]) self.exclude_server_id_rules = set([ re.compile(ex) for ex in init_config.get('exclude_server_ids', []) ]) self.include_project_name_rules = set([ re.compile(ex) for ex in init_config.get('whitelist_project_names', []) ]) self.exclude_project_name_rules = set([ re.compile(ex) for ex in init_config.get('blacklist_project_names', []) ]) skip_proxy = not is_affirmative( init_config.get('use_agent_proxy', True)) self.proxy_config = None if skip_proxy else self.proxies self.backoff = BackOffRetry(self) # ISO8601 date time: used to filter the call to get the list of nova servers self.changes_since_time = {} # Ex: server_details_by_id = { # UUID: {UUID: <value>, etc} # 1: {id: 1, name: hostA}, # 2: {id: 2, name: hostB} # } self.server_details_by_id = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Parse job specific counters self.general_counters = self._parse_general_counters(init_config) # Parse job specific counters self.job_specific_counters = self._parse_job_specific_counters(init_config)
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Host status needs to persist across all checks. # We'll create keys when they are referenced. See: # https://en.wikipedia.org/wiki/Autovivification # https://gist.github.com/hrldcpr/2012250 self.host_status = defaultdict(lambda: defaultdict(lambda: None))
def __init__(self, name, init_config, instances=None): AgentCheck.__init__(self, name, init_config, instances) self.assumed_url = {} if instances is not None and 'auth_type' in instances[0]: if instances[0]['auth_type'] == 'digest': auth = self.http.options['auth'] self.http.options['auth'] = requests.auth.HTTPDigestAuth(auth[0], auth[1])
def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise Exception("Disk check only supports one configured instance.") AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances) # Get the configuration once for all self._load_conf(instances[0]) self._compile_tag_re()
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.tenant_metrics = aci_metrics.make_tenant_metrics() self.last_events_ts = {} self.external_host_tags = {} self._api_cache = {} self.check_tags = ['cisco'] self.tagger = CiscoTags(log=self.log)
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances) if instances is not None and len(instances) > 1: raise Exception( "BTRFS check only supports one configured instance.")
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Cache connections self.connections = {} self.failed_connections = {} self.instances_metrics = {} self.instances_per_type_metrics = defaultdict(dict) self.existing_databases = None self.do_check = {} self.proc_type_mapping = {'gauge': self.gauge, 'rate': self.rate, 'histogram': self.histogram} self.adoprovider = self.default_adoprovider self.connector = init_config.get('connector', 'adodbapi') if self.connector.lower() not in self.valid_connectors: self.log.error("Invalid database connector %s, defaulting to adodbapi", self.connector) self.connector = 'adodbapi' self.adoprovider = init_config.get('adoprovider', self.default_adoprovider) if self.adoprovider.upper() not in self.valid_adoproviders: self.log.error( "Invalid ADODB provider string %s, defaulting to %s", self.adoprovider, self.default_adoprovider ) self.adoprovider = self.default_adoprovider # Pre-process the list of metrics to collect self.custom_metrics = init_config.get('custom_metrics', []) for instance in instances: try: instance_key = self._conn_key(instance, self.DEFAULT_DB_KEY) self.do_check[instance_key] = True # check to see if the database exists before we try any connections to it with self.open_managed_db_connections(instance, None, db_name=self.DEFAULT_DATABASE): db_exists, context = self._check_db_exists(instance) if db_exists: if instance.get('stored_procedure') is None: with self.open_managed_db_connections(instance, self.DEFAULT_DB_KEY): self._make_metric_list_to_collect(instance, self.custom_metrics) else: # How much do we care that the DB doesn't exist? ignore = is_affirmative(instance.get("ignore_missing_database", False)) if ignore is not None and ignore: # not much : we expect it. leave checks disabled self.do_check[instance_key] = False self.log.warning("Database %s does not exist. Disabling checks for this instance.", context) else: # yes we do. Keep trying self.log.error("Database %s does not exist. Fix issue and restart agent", context) except SQLConnectionError: self.log.exception("Skipping SQL Server instance") continue except Exception as e: self.log.exception("INitialization exception %s", e) continue
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.cluster_name = None for instance in instances or []: url = instance.get('url', '') parsed_url = urlparse(url) ssl_verify = not _is_affirmative(instance.get('disable_ssl_validation', False)) if not ssl_verify and parsed_url.scheme == 'https': self.log.warning('Skipping SSL cert validation for %s based on configuration.' % url)
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.time_started = time.time() self.pool_started = False self.exceptionq = Queue() self.batch_morlist_size = max( init_config.get("batch_morlist_size", BATCH_MORLIST_SIZE), 0) self.batch_collector_size = max( init_config.get("batch_property_collector_size", BATCH_COLLECTOR_SIZE), 0) self.refresh_morlist_interval = init_config.get( 'refresh_morlist_interval', REFRESH_MORLIST_INTERVAL) self.clean_morlist_interval = max( init_config.get('clean_morlist_interval', 2 * self.refresh_morlist_interval), self.refresh_morlist_interval) self.refresh_metrics_metadata_interval = init_config.get( 'refresh_metrics_metadata_interval', REFRESH_METRICS_METADATA_INTERVAL) # Connections open to vCenter instances self.server_instances = {} self.server_instances_lock = threading.RLock() # Event configuration self.event_config = {} # Caching configuration self.cache_config = CacheConfig() # build up configurations for instance in instances: i_key = self._instance_key(instance) # caches self.cache_config.set_interval(CacheConfig.Morlist, i_key, self.refresh_morlist_interval) self.cache_config.set_interval( CacheConfig.Metadata, i_key, self.refresh_metrics_metadata_interval) # events self.event_config[i_key] = instance.get('event_config') # Queue of raw Mor objects to process self.mor_objects_queue = ObjectsQueue() # Cache of processed Mor objects self.mor_cache = MorCache() # managed entity raw view self.registry = {} # Metrics metadata, for each instance keeps the mapping: perfCounterKey -> {name, group, description} self.metadata_cache = MetadataCache() self.latest_event_query = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.dbs = {} self.versions = {} self.instance_metrics = {} self.bgw_metrics = {} self.archiver_metrics = {} self.db_bgw_metrics = [] self.db_archiver_metrics = [] self.replication_metrics = {} self.custom_metrics = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Members' last replica set states self._last_state_by_server = {} # List of metrics to collect per instance self.metrics_to_collect_by_instance = {} self.collection_metrics_names = [] for (key, value) in self.COLLECTION_METRICS.iteritems(): self.collection_metrics_names.append(key.split('.')[1])
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.time_started = time.time() self.pool_started = False self.exceptionq = Queue() self.batch_morlist_size = max( init_config.get("batch_morlist_size", BATCH_MORLIST_SIZE), 0) self.batch_collector_size = max( init_config.get("batch_property_collector_size", BATCH_COLLECTOR_SIZE), 0) self.refresh_morlist_interval = init_config.get( 'refresh_morlist_interval', REFRESH_MORLIST_INTERVAL) self.clean_morlist_interval = max( init_config.get('clean_morlist_interval', 2 * self.refresh_morlist_interval), self.refresh_morlist_interval) self.refresh_metrics_metadata_interval = init_config.get( 'refresh_metrics_metadata_interval', REFRESH_METRICS_METADATA_INTERVAL) # Connections open to vCenter instances self.server_instances = {} # Event configuration self.event_config = {} # Caching configuration self.cache_config = CacheConfig() # build up configurations for instance in instances: i_key = self._instance_key(instance) # caches self.cache_config.set_interval(CacheConfig.Morlist, i_key, self.refresh_morlist_interval) self.cache_config.set_interval( CacheConfig.Metadata, i_key, self.refresh_metrics_metadata_interval) # events self.event_config[i_key] = instance.get('event_config') # managed entity raw view self.registry = {} # First layer of cache (get entities from the tree) self.morlist_raw = {} # Second layer, processed from the first one self.morlist = {} # Metrics metadata, basically perfCounterId -> {name, group, description} self.metrics_metadata = {} self.latest_event_query = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.dbs = {} self.versions = {} self.instance_metrics = {} self.bgw_metrics = {} self.archiver_metrics = {} self.db_bgw_metrics = [] self.db_archiver_metrics = [] self.replication_metrics = {} self.custom_metrics = {} # keep track of host/port present in any configured instance self._known_servers = set()
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) for k in ["mean", "median", "95", "99", "100"]: for m in self.stat_keys: self.keys.append(m + "_" + k) for k in ["min", "max", "mean", "median", "95", "99", "999"]: for m in self.search_latency_keys: self.keys.append(m + "_" + k) for k in ["min", "max", "mean", "median", "total"]: for m in self.vnodeq_keys: self.keys.append(m + "_" + k) self.prev_coord_redirs_total = -1
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances) self._zk_timeout = int( init_config.get('zk_timeout', DEFAULT_ZK_TIMEOUT)) self._kafka_timeout = int( init_config.get('kafka_timeout', DEFAULT_KAFKA_TIMEOUT)) self.context_limit = int( init_config.get('max_partition_contexts', CONTEXT_UPPER_BOUND)) self._broker_retries = int( init_config.get('kafka_retries', DEFAULT_KAFKA_RETRIES)) self._zk_last_ts = {} self.kafka_clients = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.dbs = {} self.versions = {} self.instance_metrics = {} self.bgw_metrics = {} self.archiver_metrics = {} self.db_bgw_metrics = [] self.db_archiver_metrics = [] self.replication_metrics = {} self.activity_metrics = {} self.custom_metrics = {} # Deprecate custom_metrics in favor of custom_queries if instances is not None and any(['custom_metrics' in instance for instance in instances]): self.warning("DEPRECATION NOTICE: Please use the new custom_queries option " "rather than the now deprecated custom_metrics")
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # if they set the path, use that if init_config.get('nfsiostat_path'): self.nfs_cmd = [init_config.get('nfsiostat_path'), '1', '2'] else: # if not, check if it's installed in the opt dir, if so use that if os.path.exists('/opt/datadog-agent/embedded/sbin/nfsiostat'): self.nfs_cmd = ['/opt/datadog-agent/embedded/sbin/nfsiostat', '1', '2'] # if not, then check if it is in the default place elif os.path.exists('/usr/local/sbin/nfsiostat'): self.nfs_cmd = ['/usr/local/sbin/nfsiostat', '1', '2'] else: raise Exception( 'nfsstat check requires nfsiostat be installed, please install it ' '(through nfs-utils) or set the path to the installed version' )
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.time_started = time.time() self.pool_started = False self.jobs_status = {} self.exceptionq = Queue() # Connections open to vCenter instances self.server_instances = {} # Event configuration self.event_config = {} # Caching resources, timeouts self.cache_times = {} for instance in self.instances: i_key = self._instance_key(instance) self.cache_times[i_key] = { MORLIST: { LAST: 0, INTERVAL: init_config.get('refresh_morlist_interval', REFRESH_MORLIST_INTERVAL) }, METRICS_METADATA: { LAST: 0, INTERVAL: init_config.get('refresh_metrics_metadata_interval', REFRESH_METRICS_METADATA_INTERVAL) } } self.event_config[i_key] = instance.get('event_config') # managed entity raw view self.registry = {} # First layer of cache (get entities from the tree) self.morlist_raw = {} # Second layer, processed from the first one self.morlist = {} # Metrics metadata, basically perfCounterId -> {name, group, description} self.metrics_metadata = {} self.latest_event_query = {}
def __init__(self, *args, **kwargs): AgentCheck.__init__(self, *args, **kwargs) self.instance_cache = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self._last_state_by_server = {} self.idx_rates = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Keep track of last build IDs per instance self.last_build_ids = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.default_timeout = init_config.get('default_timeout', self.DEFAULT_TIMEOUT)
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.nodetool_cmd = init_config.get("nodetool", "/usr/bin/nodetool")
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Keep track of all instances self._instance_states = defaultdict(lambda: self.CouchbaseInstanceState())
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Host status needs to persist across all checks self.cluster_status = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.dbs = {}
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.wmi_samplers = {} self.wmi_props = {}