def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances) if instances is not None and len(instances) > 1: raise Exception( "BTRFS check only supports one configured instance.")
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Cache connections self.connections = {} self.failed_connections = {} self.instances_metrics = {} self.instances_per_type_metrics = defaultdict(dict) self.existing_databases = None self.do_check = {} self.proc_type_mapping = {'gauge': self.gauge, 'rate': self.rate, 'histogram': self.histogram} self.adoprovider = self.default_adoprovider self.connector = init_config.get('connector', 'adodbapi') if self.connector.lower() not in self.valid_connectors: self.log.error("Invalid database connector %s, defaulting to adodbapi", self.connector) self.connector = 'adodbapi' self.adoprovider = init_config.get('adoprovider', self.default_adoprovider) if self.adoprovider.upper() not in self.valid_adoproviders: self.log.error( "Invalid ADODB provider string %s, defaulting to %s", self.adoprovider, self.default_adoprovider ) self.adoprovider = self.default_adoprovider # Pre-process the list of metrics to collect self.custom_metrics = init_config.get('custom_metrics', []) for instance in instances: try: instance_key = self._conn_key(instance, self.DEFAULT_DB_KEY) self.do_check[instance_key] = True # check to see if the database exists before we try any connections to it with self.open_managed_db_connections(instance, None, db_name=self.DEFAULT_DATABASE): db_exists, context = self._check_db_exists(instance) if db_exists: if instance.get('stored_procedure') is None: with self.open_managed_db_connections(instance, self.DEFAULT_DB_KEY): self._make_metric_list_to_collect(instance, self.custom_metrics) else: # How much do we care that the DB doesn't exist? ignore = is_affirmative(instance.get("ignore_missing_database", False)) if ignore is not None and ignore: # not much : we expect it. leave checks disabled self.do_check[instance_key] = False self.log.warning("Database %s does not exist. Disabling checks for this instance.", context) else: # yes we do. Keep trying self.log.error("Database %s does not exist. Fix issue and restart agent", context) except SQLConnectionError: self.log.exception("Skipping SQL Server instance") continue except Exception as e: self.log.exception("INitialization exception %s", e) continue
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.time_started = time.time() self.pool_started = False self.exceptionq = Queue() self.batch_morlist_size = max( init_config.get("batch_morlist_size", BATCH_MORLIST_SIZE), 0) self.batch_collector_size = max( init_config.get("batch_property_collector_size", BATCH_COLLECTOR_SIZE), 0) self.refresh_morlist_interval = init_config.get( 'refresh_morlist_interval', REFRESH_MORLIST_INTERVAL) self.clean_morlist_interval = max( init_config.get('clean_morlist_interval', 2 * self.refresh_morlist_interval), self.refresh_morlist_interval) self.refresh_metrics_metadata_interval = init_config.get( 'refresh_metrics_metadata_interval', REFRESH_METRICS_METADATA_INTERVAL) # Connections open to vCenter instances self.server_instances = {} self.server_instances_lock = threading.RLock() # Event configuration self.event_config = {} # Caching configuration self.cache_config = CacheConfig() # build up configurations for instance in instances: i_key = self._instance_key(instance) # caches self.cache_config.set_interval(CacheConfig.Morlist, i_key, self.refresh_morlist_interval) self.cache_config.set_interval( CacheConfig.Metadata, i_key, self.refresh_metrics_metadata_interval) # events self.event_config[i_key] = instance.get('event_config') # Queue of raw Mor objects to process self.mor_objects_queue = ObjectsQueue() # Cache of processed Mor objects self.mor_cache = MorCache() # managed entity raw view self.registry = {} # Metrics metadata, for each instance keeps the mapping: perfCounterKey -> {name, group, description} self.metadata_cache = MetadataCache() self.latest_event_query = {}
def test_default_string(self): check = AgentCheck() tag = 'default:string' tags = [tag] normalized_tags = check._normalize_tags(tags, None) normalized_tag = normalized_tags[0] assert normalized_tags is not tags assert normalized_tag == tag.encode('utf-8')
def test_unicode_string(self): check = AgentCheck() tag = u'unicode:string' tags = [tag] normalized_tags = check._normalize_tags_type(tags, None) normalized_tag = normalized_tags[0] assert normalized_tags is not tags assert normalized_tag == tag.encode('utf-8')
def test_valid_event(self, aggregator): check = AgentCheck() event = { "event_type": "new.event", "msg_title": "new test event", "aggregation_key": "test.event", "msg_text": "test event test event", "tags": None } check.event(event) aggregator.assert_event('test event test event')
def test_bytes_string(self): check = AgentCheck() tag = b'bytes:string' tags = [tag] normalized_tags = check._normalize_tags_type(tags, None) normalized_tag = normalized_tags[0] assert normalized_tags is not tags # Ensure no new allocation occurs assert normalized_tag is tag
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.dbs = {} self.versions = {} self.instance_metrics = {} self.bgw_metrics = {} self.archiver_metrics = {} self.db_bgw_metrics = [] self.db_archiver_metrics = [] self.replication_metrics = {} self.custom_metrics = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) for instance in instances or []: url = instance.get('url', '') parsed_url = urlparse(url) ssl_verify = not _is_affirmative( instance.get('disable_ssl_validation', False)) if not ssl_verify and parsed_url.scheme == 'https': self.log.warning( 'Skipping SSL cert validation for {0} based on configuration.' .format(url))
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Members' last replica set states self._last_state_by_server = {} # List of metrics to collect per instance self.metrics_to_collect_by_instance = {} self.collection_metrics_names = [] for (key, value) in self.COLLECTION_METRICS.iteritems(): self.collection_metrics_names.append(key.split('.')[1])
def test_https_proxy(): old_env = dict(os.environ) os.environ['HTTPS_PROXY'] = BAD_PROXY_SETTINGS['https'] try: check = AgentCheck() proxies = check.get_instance_proxy({'skip_proxy': True}, 'uri/health') response = requests.get('https://google.com', proxies=proxies) response.raise_for_status() finally: os.environ.clear() os.environ.update(old_env)
def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise Exception( "Disk check only supports one configured instance.") AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances) # Get the configuration once for all self._load_conf(instances[0]) self._compile_tag_re()
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.time_started = time.time() self.pool_started = False self.exceptionq = Queue() self.batch_morlist_size = max( init_config.get("batch_morlist_size", BATCH_MORLIST_SIZE), 0) self.batch_collector_size = max( init_config.get("batch_property_collector_size", BATCH_COLLECTOR_SIZE), 0) self.refresh_morlist_interval = init_config.get( 'refresh_morlist_interval', REFRESH_MORLIST_INTERVAL) self.clean_morlist_interval = max( init_config.get('clean_morlist_interval', 2 * self.refresh_morlist_interval), self.refresh_morlist_interval) self.refresh_metrics_metadata_interval = init_config.get( 'refresh_metrics_metadata_interval', REFRESH_METRICS_METADATA_INTERVAL) # Connections open to vCenter instances self.server_instances = {} # Event configuration self.event_config = {} # Caching configuration self.cache_config = CacheConfig() # build up configurations for instance in instances: i_key = self._instance_key(instance) # caches self.cache_config.set_interval(CacheConfig.Morlist, i_key, self.refresh_morlist_interval) self.cache_config.set_interval( CacheConfig.Metadata, i_key, self.refresh_metrics_metadata_interval) # events self.event_config[i_key] = instance.get('event_config') # managed entity raw view self.registry = {} # First layer of cache (get entities from the tree) self.morlist_raw = {} # Second layer, processed from the first one self.morlist = {} # Metrics metadata, basically perfCounterId -> {name, group, description} self.metrics_metadata = {} self.latest_event_query = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.dbs = {} self.versions = {} self.instance_metrics = {} self.bgw_metrics = {} self.archiver_metrics = {} self.db_bgw_metrics = [] self.db_archiver_metrics = [] self.replication_metrics = {} self.custom_metrics = {} # keep track of host/port present in any configured instance self._known_servers = set()
def test_https_proxy_fail(): old_env = dict(os.environ) os.environ['HTTPS_PROXY'] = BAD_PROXY_SETTINGS['https'] try: with mock.patch('datadog_checks.checks.AgentCheck._get_requests_proxy', return_value={}): check = AgentCheck() proxies = check.get_instance_proxy({}, 'uri/health') with pytest.raises((ConnectTimeout, ProxyError)): requests.get('https://google.com', timeout=1, proxies=proxies) finally: os.environ.clear() os.environ.update(old_env)
def test_valid_sc(self, aggregator): check = AgentCheck() check.service_check("testservicecheck", AgentCheck.OK, tags=None, message="") aggregator.assert_service_check("testservicecheck", status=AgentCheck.OK) check.service_check("testservicecheckwithhostname", AgentCheck.OK, tags=["foo", "bar"], hostname="testhostname", message="a message") aggregator.assert_service_check("testservicecheckwithhostname", status=AgentCheck.OK, tags=["foo", "bar"], hostname="testhostname", message="a message") check.service_check("testservicecheckwithnonemessage", AgentCheck.OK, message=None) aggregator.assert_service_check( "testservicecheckwithnonemessage", status=AgentCheck.OK, )
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) for k in ["mean", "median", "95", "99", "100"]: for m in self.stat_keys: self.keys.append(m + "_" + k) for k in ["min", "max", "mean", "median", "95", "99", "999"]: for m in self.search_latency_keys: self.keys.append(m + "_" + k) for k in ["min", "max", "mean", "median", "total"]: for m in self.vnodeq_keys: self.keys.append(m + "_" + k) self.prev_coord_redirs_total = -1
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # if they set the path, use that if init_config.get('nfsiostat_path'): self.nfs_cmd = [init_config.get('nfsiostat_path'), '1', '2'] else: # if not, check if it's installed in the opt dir, if so use that if os.path.exists('/opt/datadog-agent/embedded/sbin/nfsiostat'): self.nfs_cmd = ['/opt/datadog-agent/embedded/sbin/nfsiostat', '1', '2'] # if not, then check if it is in the default place elif os.path.exists('/usr/local/sbin/nfsiostat'): self.nfs_cmd = ['/usr/local/sbin/nfsiostat', '1', '2'] else: raise Exception( 'nfsstat check requires nfsiostat be installed, please install it ' '(through nfs-utils) or set the path to the installed version' )
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances) self._zk_timeout = int( init_config.get('zk_timeout', DEFAULT_ZK_TIMEOUT)) self._kafka_timeout = int( init_config.get('kafka_timeout', DEFAULT_KAFKA_TIMEOUT)) self.context_limit = int( init_config.get('max_partition_contexts', CONTEXT_UPPER_BOUND)) self._broker_retries = int( init_config.get('kafka_retries', DEFAULT_KAFKA_RETRIES)) self._zk_last_ts = {} self.kafka_clients = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.dbs = {} self.versions = {} self.instance_metrics = {} self.bgw_metrics = {} self.archiver_metrics = {} self.db_bgw_metrics = [] self.db_archiver_metrics = [] self.replication_metrics = {} self.activity_metrics = {} self.custom_metrics = {} # Deprecate custom_metrics in favor of custom_queries if instances is not None and any(['custom_metrics' in instance for instance in instances]): self.warning("DEPRECATION NOTICE: Please use the new custom_queries option " "rather than the now deprecated custom_metrics")
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.time_started = time.time() self.pool_started = False self.jobs_status = {} self.exceptionq = Queue() # Connections open to vCenter instances self.server_instances = {} # Event configuration self.event_config = {} # Caching resources, timeouts self.cache_times = {} for instance in self.instances: i_key = self._instance_key(instance) self.cache_times[i_key] = { MORLIST: { LAST: 0, INTERVAL: init_config.get('refresh_morlist_interval', REFRESH_MORLIST_INTERVAL) }, METRICS_METADATA: { LAST: 0, INTERVAL: init_config.get('refresh_metrics_metadata_interval', REFRESH_METRICS_METADATA_INTERVAL) } } self.event_config[i_key] = instance.get('event_config') # managed entity raw view self.registry = {} # First layer of cache (get entities from the tree) self.morlist_raw = {} # Second layer, processed from the first one self.morlist = {} # Metrics metadata, basically perfCounterId -> {name, group, description} self.metrics_metadata = {} self.latest_event_query = {}
def test_metric_limit_instance_config(self, aggregator): instances = [{ "max_returned_metrics": 42, }] check = AgentCheck("test", {}, instances) assert check.get_warnings() == [] for i in range(0, 42): check.gauge("metric", 0) assert len(check.get_warnings()) == 0 assert len(aggregator.metrics("metric")) == 42 check.gauge("metric", 0) assert len(check.get_warnings()) == 1 assert len(aggregator.metrics("metric")) == 42
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # ad stands for access denied # We cache the PIDs getting this error and don't iterate on them more often than `access_denied_cache_duration`` # This cache is for all PIDs so it's global, but it should be refreshed by instance self.last_ad_cache_ts = {} self.ad_cache = set() self.access_denied_cache_duration = int( init_config.get( 'access_denied_cache_duration', DEFAULT_AD_CACHE_DURATION ) ) # By default cache the PID list for a while # Sometimes it's not wanted b/c it can mess with no-data monitoring # This cache is indexed per instance self.last_pid_cache_ts = {} self.pid_cache = {} self.pid_cache_duration = int( init_config.get( 'pid_cache_duration', DEFAULT_PID_CACHE_DURATION ) ) self._conflicting_procfs = False self._deprecated_init_procfs = False if Platform.is_linux(): procfs_path = init_config.get('procfs_path') if procfs_path: if 'procfs_path' in agentConfig and procfs_path != agentConfig.get('procfs_path').rstrip('/'): self._conflicting_procfs = True else: self._deprecated_init_procfs = True psutil.PROCFS_PATH = procfs_path # Process cache, indexed by instance self.process_cache = defaultdict(dict)
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self._last_state_by_server = {} self.idx_rates = {}
def __init__(self, *args, **kwargs): AgentCheck.__init__(self, *args, **kwargs) self.instance_cache = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Keep track of last build IDs per instance self.last_build_ids = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Keep track of all instances self._instance_states = defaultdict(lambda: self.CouchbaseInstanceState())
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Host status needs to persist across all checks self.cluster_status = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.dbs = {}
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.wmi_samplers = {} self.wmi_props = {}