def __init__(self, name, init_config, agentConfig): AgentCheck.__init__(self, name, init_config, agentConfig) # Used to store the instances of the jmx connector (1 per instance) self.jmxs = {} self.jmx_metrics = [] self.init_config = init_config
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Initialize a HTTP opener with Unix socket support socket_timeout = int(init_config.get('socket_timeout', 0)) \ or DEFAULT_SOCKET_TIMEOUT UnixHTTPConnection.socket_timeout = socket_timeout self.url_opener = urllib2.build_opener(UnixSocketHandler())
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # message.type is the index in this array # see: https://github.com/prometheus/client_model/blob/master/ruby/lib/prometheus/client/model/metrics.pb.rb self.METRIC_TYPES = ['counter', 'gauge', 'summary', 'untyped', 'histogram'] # patterns used for metrics and labels extraction form the prometheus # text format. Do not overwrite those self.metrics_pattern = re.compile(r'^(\w+)(.*)\s+([0-9.+eE,]+)$') self.lbl_pattern = re.compile(r'(\w+)="(.*?)"') # `NAMESPACE` is the prefix metrics will have. Need to be hardcoded in the # child check class. self.NAMESPACE = '' # `metrics_mapper` is a dictionnary where the keys are the metrics to capture # and the values are the corresponding metrics names to have in datadog. # Note: it is empty in the mother class but will need to be # overloaded/hardcoded in the final check not to be counted as custom metric. self.metrics_mapper = {} # If the `labels_mapper` dictionnary is provided, the metrics labels names # in the `labels_mapper` will use the corresponding value as tag name # when sending the gauges. self.labels_mapper = {} # `exclude_labels` is an array of labels names to exclude. Those labels # will just not be added as tags when submitting the metric. self.exclude_labels = []
def test_service_check(self): check_name = "test.service_check" status = AgentCheck.CRITICAL tags = ["host:test", "other:thing"] host_name = "foohost" timestamp = time.time() check = AgentCheck("test", {}, {"checksd_hostname": "foo"}) check.service_check(check_name, status, tags, timestamp, host_name) self.assertEquals(len(check.service_checks), 1, check.service_checks) val = check.get_service_checks() self.assertEquals(len(val), 1) check_run_id = val[0].get("id", None) self.assertNotEquals(check_run_id, None) self.assertEquals( [ { "id": check_run_id, "check": check_name, "status": status, "host_name": host_name, "tags": tags, "timestamp": timestamp, "message": None, } ], val, ) self.assertEquals(len(check.service_checks), 0, check.service_checks)
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) for k in ["mean", "median", "95", "99", "100"]: for m in self.stat_keys: self.keys.append(m + "_" + k) self.prev_coord_redirs_total = -1
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # ad stands for access denied # We cache the PIDs getting this error and don't iterate on them # more often than `access_denied_cache_duration` # This cache is for all PIDs so it's global, but it should # be refreshed by instance self.last_ad_cache_ts = {} self.ad_cache = set() self.access_denied_cache_duration = int( init_config.get( 'access_denied_cache_duration', DEFAULT_AD_CACHE_DURATION ) ) # By default cache the PID list for a while # Sometimes it's not wanted b/c it can mess with no-data monitoring # This cache is indexed per instance self.last_pid_cache_ts = {} self.pid_cache = {} self.pid_cache_duration = int( init_config.get( 'pid_cache_duration', DEFAULT_PID_CACHE_DURATION ) ) # Process cache, indexed by instance self.process_cache = defaultdict(dict)
def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise Exception("Disk check only supports one configured instance.") AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances) # Get the configuration once for all self._load_conf(instances[0])
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # A dictionary to keep track of service statuses self.statuses = {} self.notified = {} self.start_pool()
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.time_started = time.time() self.pool_started = False self.exceptionq = Queue() # Connections open to vCenter instances self.server_instances = {} # Caching resources, timeouts self.cache_times = {} for instance in self.instances: i_key = self._instance_key(instance) self.cache_times[i_key] = { MORLIST: { LAST: 0, INTERVAL: init_config.get('refresh_morlist_interval', REFRESH_MORLIST_INTERVAL) }, METRICS_METADATA: { LAST: 0, INTERVAL: init_config.get('refresh_metrics_metadata_interval', REFRESH_METRICS_METADATA_INTERVAL) } } # First layer of cache (get entities from the tree) self.morlist_raw = {} # Second layer, processed from the first one self.morlist = {} # Metrics metadata, basically perfCounterId -> {name, group, description} self.metrics_metadata = {} self.latest_event_query = {}
def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise Exception('Kubernetes check only supports one configured instance.') AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.kubeutil = KubeUtil() if not self.kubeutil.host: raise Exception('Unable to get default router and host parameter is not set')
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) for instance in instances or []: url = instance.get('url', '') parsed_url = urlparse(url) ssl_verify = not _is_affirmative(instance.get('disable_ssl_validation', False)) if not ssl_verify and parsed_url.scheme == 'https': self.log.warning('Skipping SSL cert validation for %s based on configuration.' % url)
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Members' last replica set states self._last_state_by_server = {} # List of metrics to collect per instance self.metrics_to_collect_by_instance = {}
def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise Exception("Docker check only supports one configured instance.") AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances) self.init_success = False self.init()
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Parse job specific counters self.general_counters = self._parse_general_counters(init_config) # Parse job specific counters self.job_specific_counters = self._parse_job_specific_counters(init_config)
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) if instances is not None and len(instances) > 1: raise Exception("Consul check only supports one configured instance.") self._local_config = None self._last_config_fetch_time = None self._last_known_leader = None
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances, allow_no_data=True) # A dictionary to keep track of service statuses self.statuses = {} self.notified = {} self.start_pool() self.nb_failures = 0
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Load Custom MIB directory mibs_path = None if init_config is not None: mibs_path = init_config.get("mibs_folder") SnmpCheck.create_command_generator(mibs_path)
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # A dictionary to keep track of service statuses self.statuses = {} self.notified = {} self.nb_failures = 0 self.pool_started = False
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.dbs = {} self.versions = {} self.instance_metrics = {} self.bgw_metrics = {} self.db_instance_metrics = [] self.db_bgw_metrics = [] self.replication_metrics = {}
def __init__(self, *args, **kwargs): AgentCheck.__init__(self, *args, **kwargs) self.log.info('key_file_location: %s' % self.init_config.get('key_file_location')) self.service = self.get_service( self.apiName, self.version, self.scope, self.init_config.get('key_file_location'))
def __init__(self, name, init_config, agentConfig): AgentCheck.__init__(self, name, init_config, agentConfig) try: import redis except ImportError: self.log.error('redisdb.yaml exists but redis module can not be imported. Skipping check.') self.previous_total_commands = {} self.connections = {}
def __init__(self, name, init_config, agentConfig): AgentCheck.__init__(self, name, init_config, agentConfig) self._mountpoints = {} docker_root = init_config.get('docker_root', '/') for metric in CGROUP_METRICS: self._mountpoints[metric["cgroup"]] = self._find_cgroup(metric["cgroup"], docker_root) self._last_event_collection_ts = defaultdict(lambda: None) self.url_opener = urllib2.build_opener(UnixSocketHandler()) self.should_get_size = True self._cgroup_filename_pattern = None
def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise Exception('Kubernetes check only supports one configured instance.') AgentCheck.__init__(self, name, init_config, agentConfig, instances) inst = instances[0] if instances is not None else None self.kubeutil = KubeUtil(instance=inst) if not self.kubeutil.host: raise Exception('Unable to retrieve Docker hostname and host parameter is not set')
def __init__(self, name, init_config, agentConfig): AgentCheck.__init__(self, name, init_config, agentConfig) # Used to store the instances of the jmx connector (1 per instance) self.jmxs = {} self.jmx_metrics = [] self.init_config = init_config # Used to store the number of times we opened a new jmx connector for this instance self.jmx_connections_watcher = {}
def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise Exception("Docker check only supports one configured instance.") AgentCheck.__init__(self, name, init_config, agentConfig, instances=instances) self.init_success = False self.init() self._service_discovery = agentConfig.get('service_discovery') and \ agentConfig.get('service_discovery_backend') == 'docker'
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Load Custom MIB directory mibs_path = None ignore_nonincreasing_oid = False if init_config is not None: mibs_path = init_config.get("mibs_folder") ignore_nonincreasing_oid = _is_affirmative(init_config.get("ignore_nonincreasing_oid", False)) SnmpCheck.create_command_generator(mibs_path, ignore_nonincreasing_oid)
def test_monotonic_count(self): metric = 'test.count.type.2' tags = ['test', 'type:count'] hostname = 'test.host' device_name = 'host1' agent_check = AgentCheck('test_count_check', {}, {}) counters = [0, 1, 2, 4, 7, 12, 20] for counter in counters: agent_check.monotonic_count(metric, counter, tags=tags, hostname=hostname, device_name=device_name) flush_ts = time.time() results = agent_check.get_metrics() nt.assert_true(results is not None) nt.assert_equal(1, len(results)) result = results[0] ret_metric, timestamp, value = result[0], result[1], result[2] nt.assert_equal(metric, ret_metric, msg=self.INCORRECT_METRIC) nt.ok_(abs(flush_ts-timestamp) <= 1, msg=self.DELAYED_TS) nt.assert_equal(counters[-1]-counters[0], value) # add a single point counters = [30] for counter in counters: agent_check.monotonic_count(metric, counter, tags=tags, hostname=hostname, device_name=device_name) flush_ts = time.time() results = agent_check.get_metrics() nt.assert_true(results is not None) nt.assert_equal(1, len(results)) result = results[0] ret_metric, timestamp, value = result[0], result[1], result[2] nt.assert_equal(metric, ret_metric, msg=self.INCORRECT_METRIC) nt.ok_(abs(flush_ts-timestamp) <= 1, msg=self.DELAYED_TS) nt.assert_equal(10, value) # test non-monotonic sequence counters = [40, 35, 40, 45, 30, 32] for counter in counters: agent_check.monotonic_count(metric, counter, tags=tags, hostname=hostname, device_name=device_name) flush_ts = time.time() results = agent_check.get_metrics() nt.assert_true(results is not None) nt.assert_equal(1, len(results)) result = results[0] ret_metric, timestamp, value = result[0], result[1], result[2] nt.assert_equal(metric, ret_metric, msg=self.INCORRECT_METRIC) nt.ok_(abs(flush_ts-timestamp) <= 1, msg=self.DELAYED_TS) # should skip when counter is reset # i.e. between 40 and 35 and between 45 and 30 # 22 = (40-30) + (40-35) + (45-40) + (32-30) = 10 + 5 + 5 + 2 nt.assert_equal(22, value)
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Members' last replica set states self._last_state_by_server = {} # List of metrics to collect per instance self.metrics_to_collect_by_instance = {} self.collection_metrics_names = [] for (key, value) in self.COLLECTION_METRICS.iteritems(): self.collection_metrics_names.append(key.split('.')[1])
def __init__(self, name, init_config, agentConfig): AgentCheck.__init__(self, name, init_config, agentConfig) # If we can't import the redis module, we should always skip this check try: import redis self.enabled = True except ImportError: self.enabled = False self.log.error('redisdb.yaml exists but redis module can not be imported. Skipping check.') self.previous_total_commands = {} self.connections = {}
def __init__(self, name, init_config, agentConfig): AgentCheck.__init__(self, name, init_config, agentConfig) # Load any custom metrics from conf.d/sqlserver.yaml for row in init_config.get('custom_metrics', []): if row['type'] not in VALID_METRIC_TYPES: self.log.error('%s has an invalid metric type: %s' \ % (row['name'], row['type'])) self.METRICS.append( (row['name'], row['type'], row['counter_name'], row.get('instance_name', ''), row.get('tag_by', None)) ) # Cache connections self.connections = {}
def __init__(self, name, init_config, agentConfig, instances): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Cache self.wmi_samplers = {} self.wmi_props = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.NAMESAPCE = r"root\virtualization\v2" self.SystemName = "" self.conn = "" self.ComputerName = ""
def __init__(self, name, init_config, agentConfig): AgentCheck.__init__(self, name, init_config, agentConfig) self.high_watermarks = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self._instance_states = defaultdict(lambda: ConsulCheckInstanceState())
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.connections = {} self.last_timestamp_seen = defaultdict(int)
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.assumed_url = {}
def __init__(self, *args, **kwargs): AgentCheck.__init__(self, *args, **kwargs)
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.already_alerted = []
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.checker = None
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.get_values()
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.already_alerted = [] self.cached_vhosts = { } # this is used to send CRITICAL rabbitmq.aliveness check if the server goes down
def __init__(self, *args, **kwargs): AgentCheck.__init__(self, *args, **kwargs) self._collector_payload = {} self._metric_context = {}
class TestCore(unittest.TestCase): "Tests to validate the core check logic" def setUp(self): self.c = Check(logger) self.c.gauge("test-metric") self.c.counter("test-counter") def setUpAgentCheck(self): self.ac = AgentCheck('test', {}, {'checksd_hostname': "foo"}) def test_gauge(self): self.assertEquals(self.c.is_gauge("test-metric"), True) self.assertEquals(self.c.is_counter("test-metric"), False) self.c.save_sample("test-metric", 1.0) # call twice in a row, should be invariant self.assertEquals(self.c.get_sample("test-metric"), 1.0) self.assertEquals(self.c.get_sample("test-metric"), 1.0) self.assertEquals(self.c.get_sample_with_timestamp("test-metric")[1], 1.0) # new value, old one should be gone self.c.save_sample("test-metric", 2.0) self.assertEquals(self.c.get_sample("test-metric"), 2.0) self.assertEquals(len(self.c._sample_store["test-metric"]), 1) # with explicit timestamp self.c.save_sample("test-metric", 3.0, 1298066183.607717) self.assertEquals(self.c.get_sample_with_timestamp("test-metric"), (1298066183.607717, 3.0, None, None)) # get_samples() self.assertEquals(self.c.get_samples(), {"test-metric": 3.0}) def testEdgeCases(self): self.assertRaises(CheckException, self.c.get_sample, "unknown-metric") # same value self.c.save_sample("test-counter", 1.0, 1.0) self.c.save_sample("test-counter", 1.0, 1.0) self.assertRaises(Infinity, self.c.get_sample, "test-counter") def test_counter(self): self.c.save_sample("test-counter", 1.0, 1.0) self.assertRaises(UnknownValue, self.c.get_sample, "test-counter", expire=False) self.c.save_sample("test-counter", 2.0, 2.0) self.assertEquals(self.c.get_sample("test-counter", expire=False), 1.0) self.assertEquals(self.c.get_sample_with_timestamp("test-counter", expire=False), (2.0, 1.0, None, None)) self.assertEquals(self.c.get_samples(expire=False), {"test-counter": 1.0}) self.c.save_sample("test-counter", -2.0, 3.0) self.assertRaises(UnknownValue, self.c.get_sample_with_timestamp, "test-counter") def test_tags(self): # Test metric tagging now = int(time.time()) # Tag metrics self.c.save_sample("test-counter", 1.0, 1.0, tags = ["tag1", "tag2"]) self.c.save_sample("test-counter", 2.0, 2.0, tags = ["tag1", "tag2"]) # Only 1 point recording for this combination of tags, won't be sent self.c.save_sample("test-counter", 3.0, 3.0, tags = ["tag1", "tag3"]) self.c.save_sample("test-metric", 3.0, now, tags = ["tag3", "tag4"]) # Arg checks self.assertRaises(CheckException, self.c.save_sample, "test-metric", 4.0, now + 5, tags = "abc") # This is a different combination of tags self.c.save_sample("test-metric", 3.0, now, tags = ["tag5", "tag3"]) results = self.c.get_metrics() results.sort() self.assertEquals(results, [("test-counter", 2.0, 1.0, {"tags": ["tag1", "tag2"]}), ("test-metric", now, 3.0, {"tags": ["tag3", "tag4"]}), ("test-metric", now, 3.0, {"tags": ["tag3", "tag5"]}), ]) # Tagged metrics are not available through get_samples anymore self.assertEquals(self.c.get_samples(), {}) def test_samples(self): self.assertEquals(self.c.get_samples(), {}) self.c.save_sample("test-metric", 1.0, 0.0) # value, ts self.c.save_sample("test-counter", 1.0, 1.0) # value, ts self.c.save_sample("test-counter", 4.0, 2.0) # value, ts assert "test-metric" in self.c.get_samples_with_timestamps(expire=False), self.c.get_samples_with_timestamps(expire=False) self.assertEquals(self.c.get_samples_with_timestamps(expire=False)["test-metric"], (0.0, 1.0, None, None)) assert "test-counter" in self.c.get_samples_with_timestamps(expire=False), self.c.get_samples_with_timestamps(expire=False) self.assertEquals(self.c.get_samples_with_timestamps(expire=False)["test-counter"], (2.0, 3.0, None, None)) def test_name(self): self.assertEquals(self.c.normalize("metric"), "metric") self.assertEquals(self.c.normalize("metric", "prefix"), "prefix.metric") self.assertEquals(self.c.normalize("__metric__", "prefix"), "prefix.metric") self.assertEquals(self.c.normalize("abc.metric(a+b+c{}/5)", "prefix"), "prefix.abc.metric_a_b_c_5") self.assertEquals(self.c.normalize("VBE.default(127.0.0.1,,8080).happy", "varnish"), "varnish.VBE.default_127.0.0.1_8080.happy") self.assertEquals(self.c.normalize("metric@device"), "metric_device") # Same tests for the AgentCheck self.setUpAgentCheck() self.assertEquals(self.ac.normalize("metric"), "metric") self.assertEquals(self.ac.normalize("metric", "prefix"), "prefix.metric") self.assertEquals(self.ac.normalize("__metric__", "prefix"), "prefix.metric") self.assertEquals(self.ac.normalize("abc.metric(a+b+c{}/5)", "prefix"), "prefix.abc.metric_a_b_c_5") self.assertEquals(self.ac.normalize("VBE.default(127.0.0.1,,8080).happy", "varnish"), "varnish.VBE.default_127.0.0.1_8080.happy") self.assertEquals(self.ac.normalize("metric@device"), "metric_device") self.assertEqual(self.ac.normalize("PauseTotalNs", "prefix", fix_case = True), "prefix.pause_total_ns") self.assertEqual(self.ac.normalize("Metric.wordThatShouldBeSeparated", "prefix", fix_case = True), "prefix.metric.word_that_should_be_separated") self.assertEqual(self.ac.normalize_device_name(",@+*-()[]{}//device@name"), "___________//device_name") def test_service_check(self): check_name = 'test.service_check' status = AgentCheck.CRITICAL tags = ['host:test', 'other:thing'] host_name = 'foohost' timestamp = time.time() check = AgentCheck('test', {}, {'checksd_hostname':'foo'}) # No "message"/"tags" field check.service_check(check_name, status, timestamp=timestamp, hostname=host_name) self.assertEquals(len(check.service_checks), 1, check.service_checks) val = check.get_service_checks() self.assertEquals(len(val), 1) check_run_id = val[0].get('id', None) self.assertNotEquals(check_run_id, None) self.assertEquals([{ 'id': check_run_id, 'check': check_name, 'status': status, 'host_name': host_name, 'timestamp': timestamp, }], val) self.assertEquals(len(check.service_checks), 0, check.service_checks) # With "message" field check.service_check(check_name, status, tags, timestamp, host_name, message='foomessage') self.assertEquals(len(check.service_checks), 1, check.service_checks) val = check.get_service_checks() self.assertEquals(len(val), 1) check_run_id = val[0].get('id', None) self.assertNotEquals(check_run_id, None) self.assertEquals([{ 'id': check_run_id, 'check': check_name, 'status': status, 'host_name': host_name, 'tags': tags, 'timestamp': timestamp, 'message': 'foomessage', }], val) self.assertEquals(len(check.service_checks), 0, check.service_checks) def test_no_proxy(self): """ Starting with Agent 5.0.0, there should always be a local forwarder running and all payloads should go through it. So we should make sure that we pass the no_proxy environment variable that will be used by requests (See: https://github.com/kennethreitz/requests/pull/945 ) """ from requests.utils import get_environ_proxies from os import environ as env env["http_proxy"] = "http://*****:*****@google.com:4444" proxy_from_env = get_proxy({}) self.assertEqual(proxy_from_env, { "host": "google.com", "port": 4444, "user": "******", "password": "******" })
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.default_timeout = init_config.get('default_timeout', self.DEFAULT_TIMEOUT)
class TestCore(unittest.TestCase): "Tests to validate the core check logic" def setUp(self): self.c = Check(logger) self.c.gauge("test-metric") self.c.counter("test-counter") def setUpAgentCheck(self): self.ac = AgentCheck('test', {}, {'checksd_hostname': "foo"}) def test_gauge(self): self.assertEquals(self.c.is_gauge("test-metric"), True) self.assertEquals(self.c.is_counter("test-metric"), False) self.c.save_sample("test-metric", 1.0) # call twice in a row, should be invariant self.assertEquals(self.c.get_sample("test-metric"), 1.0) self.assertEquals(self.c.get_sample("test-metric"), 1.0) self.assertEquals(self.c.get_sample_with_timestamp("test-metric")[1], 1.0) # new value, old one should be gone self.c.save_sample("test-metric", 2.0) self.assertEquals(self.c.get_sample("test-metric"), 2.0) self.assertEquals(len(self.c._sample_store["test-metric"]), 1) # with explicit timestamp self.c.save_sample("test-metric", 3.0, 1298066183.607717) self.assertEquals(self.c.get_sample_with_timestamp("test-metric"), (1298066183.607717, 3.0, None, None)) # get_samples() self.assertEquals(self.c.get_samples(), {"test-metric": 3.0}) def testEdgeCases(self): self.assertRaises(CheckException, self.c.get_sample, "unknown-metric") # same value self.c.save_sample("test-counter", 1.0, 1.0) self.c.save_sample("test-counter", 1.0, 1.0) self.assertRaises(Infinity, self.c.get_sample, "test-counter") def test_counter(self): self.c.save_sample("test-counter", 1.0, 1.0) self.assertRaises(UnknownValue, self.c.get_sample, "test-counter", expire=False) self.c.save_sample("test-counter", 2.0, 2.0) self.assertEquals(self.c.get_sample("test-counter", expire=False), 1.0) self.assertEquals(self.c.get_sample_with_timestamp("test-counter", expire=False), (2.0, 1.0, None, None)) self.assertEquals(self.c.get_samples(expire=False), {"test-counter": 1.0}) self.c.save_sample("test-counter", -2.0, 3.0) self.assertRaises(UnknownValue, self.c.get_sample_with_timestamp, "test-counter") def test_tags(self): # Test metric tagging now = int(time.time()) # Tag metrics self.c.save_sample("test-counter", 1.0, 1.0, tags = ["tag1", "tag2"]) self.c.save_sample("test-counter", 2.0, 2.0, tags = ["tag1", "tag2"]) # Only 1 point recording for this combination of tags, won't be sent self.c.save_sample("test-counter", 3.0, 3.0, tags = ["tag1", "tag3"]) self.c.save_sample("test-metric", 3.0, now, tags = ["tag3", "tag4"]) # Arg checks self.assertRaises(CheckException, self.c.save_sample, "test-metric", 4.0, now + 5, tags = "abc") # This is a different combination of tags self.c.save_sample("test-metric", 3.0, now, tags = ["tag5", "tag3"]) results = self.c.get_metrics() results.sort() self.assertEquals(results, [("test-counter", 2.0, 1.0, {"tags": ["tag1", "tag2"]}), ("test-metric", now, 3.0, {"tags": ["tag3", "tag4"]}), ("test-metric", now, 3.0, {"tags": ["tag3", "tag5"]}), ]) # Tagged metrics are not available through get_samples anymore self.assertEquals(self.c.get_samples(), {}) def test_samples(self): self.assertEquals(self.c.get_samples(), {}) self.c.save_sample("test-metric", 1.0, 0.0) # value, ts self.c.save_sample("test-counter", 1.0, 1.0) # value, ts self.c.save_sample("test-counter", 4.0, 2.0) # value, ts assert "test-metric" in self.c.get_samples_with_timestamps(expire=False), self.c.get_samples_with_timestamps(expire=False) self.assertEquals(self.c.get_samples_with_timestamps(expire=False)["test-metric"], (0.0, 1.0, None, None)) assert "test-counter" in self.c.get_samples_with_timestamps(expire=False), self.c.get_samples_with_timestamps(expire=False) self.assertEquals(self.c.get_samples_with_timestamps(expire=False)["test-counter"], (2.0, 3.0, None, None)) def test_name(self): self.assertEquals(self.c.normalize("metric"), "metric") self.assertEquals(self.c.normalize("metric", "prefix"), "prefix.metric") self.assertEquals(self.c.normalize("__metric__", "prefix"), "prefix.metric") self.assertEquals(self.c.normalize("abc.metric(a+b+c{}/5)", "prefix"), "prefix.abc.metric_a_b_c_5") self.assertEquals(self.c.normalize("VBE.default(127.0.0.1,,8080).happy", "varnish"), "varnish.VBE.default_127.0.0.1_8080.happy") # Same tests for the AgentCheck self.setUpAgentCheck() self.assertEquals(self.ac.normalize("metric"), "metric") self.assertEquals(self.ac.normalize("metric", "prefix"), "prefix.metric") self.assertEquals(self.ac.normalize("__metric__", "prefix"), "prefix.metric") self.assertEquals(self.ac.normalize("abc.metric(a+b+c{}/5)", "prefix"), "prefix.abc.metric_a_b_c_5") self.assertEquals(self.ac.normalize("VBE.default(127.0.0.1,,8080).happy", "varnish"), "varnish.VBE.default_127.0.0.1_8080.happy") self.assertEqual(self.ac.normalize("PauseTotalNs", "prefix", fix_case = True), "prefix.pause_total_ns") self.assertEqual(self.ac.normalize("Metric.wordThatShouldBeSeparated", "prefix", fix_case = True), "prefix.metric.word_that_should_be_separated") def test_service_check(self): check_name = 'test.service_check' status = AgentCheck.CRITICAL tags = ['host:test', 'other:thing'] host_name = 'foohost' timestamp = time.time() check = AgentCheck('test', {}, {'checksd_hostname':'foo'}) check.service_check(check_name, status, tags, timestamp, host_name) self.assertEquals(len(check.service_checks), 1, check.service_checks) val = check.get_service_checks() self.assertEquals(len(val), 1) check_run_id = val[0].get('id', None) self.assertNotEquals(check_run_id, None) self.assertEquals([{ 'id': check_run_id, 'check': check_name, 'status': status, 'host_name': host_name, 'tags': tags, 'timestamp': timestamp, 'message': None, }], val) self.assertEquals(len(check.service_checks), 0, check.service_checks) def test_collector(self): agentConfig = { 'api_key': 'test_apikey', 'check_timings': True, 'collect_ec2_tags': True, 'collect_instance_metadata': False, 'create_dd_check_tags': False, 'version': 'test', 'tags': '', } # Run a single checks.d check as part of the collector. redis_config = { "init_config": {}, "instances": [{"host": "localhost", "port": 6379}] } checks = [load_check('redisdb', redis_config, agentConfig)] c = Collector(agentConfig, [], {}, get_hostname(agentConfig)) payload = c.run({ 'initialized_checks': checks, 'init_failed_checks': {} }) metrics = payload['metrics'] # Check that we got a timing metric for all checks. timing_metrics = [m for m in metrics if m[0] == 'datadog.agent.check_run_time'] all_tags = [] for metric in timing_metrics: all_tags.extend(metric[3]['tags']) for check in checks: tag = "check:%s" % check.name assert tag in all_tags, all_tags def test_apptags(self): ''' Tests that the app tags are sent if specified so ''' agentConfig = { 'api_key': 'test_apikey', 'collect_ec2_tags': False, 'collect_instance_metadata': False, 'create_dd_check_tags': True, 'version': 'test', 'tags': '', } # Run a single checks.d check as part of the collector. redis_config = { "init_config": {}, "instances": [{"host": "localhost", "port": 6379}] } checks = [load_check('redisdb', redis_config, agentConfig)] c = Collector(agentConfig, [], {}, get_hostname(agentConfig)) payload = c.run({ 'initialized_checks': checks, 'init_failed_checks': {} }) # We check that the redis DD_CHECK_TAG is sent in the payload self.assertTrue('dd_check:redisdb' in payload['host-tags']['system']) def test_no_proxy(self): """ Starting with Agent 5.0.0, there should always be a local forwarder running and all payloads should go through it. So we should make sure that we pass the no_proxy environment variable that will be used by requests (See: https://github.com/kennethreitz/requests/pull/945 ) """ from requests.utils import get_environ_proxies from os import environ as env env["http_proxy"] = "http://*****:*****@google.com:4444" proxy_from_env = get_proxy({}) self.assertEqual(proxy_from_env, { "host": "google.com", "port": 4444, "user": "******", "password": "******" }) def test_min_collection_interval(self): config = {'instances': [{}], 'init_config': {}} agentConfig = { 'version': '0.1', 'api_key': 'toto' } # default min collection interval for that check was 20sec check = load_check('disk', config, agentConfig) check.DEFAULT_MIN_COLLECTION_INTERVAL = 20 check.run() metrics = check.get_metrics() self.assertTrue(len(metrics) > 0, metrics) check.run() metrics = check.get_metrics() # No metrics should be collected as it's too early self.assertEquals(len(metrics), 0, metrics) # equivalent to time.sleep(20) check.last_collection_time[0] -= 20 check.run() metrics = check.get_metrics() self.assertTrue(len(metrics) > 0, metrics) check.last_collection_time[0] -= 3 check.run() metrics = check.get_metrics() self.assertEquals(len(metrics), 0, metrics) check.DEFAULT_MIN_COLLECTION_INTERVAL = 0 check.run() metrics = check.get_metrics() self.assertTrue(len(metrics) > 0, metrics) config = {'instances': [{'min_collection_interval':3}], 'init_config': {}} check = load_check('disk', config, agentConfig) check.run() metrics = check.get_metrics() self.assertTrue(len(metrics) > 0, metrics) check.run() metrics = check.get_metrics() self.assertEquals(len(metrics), 0, metrics) check.last_collection_time[0] -= 4 check.run() metrics = check.get_metrics() self.assertTrue(len(metrics) > 0, metrics) config = {'instances': [{'min_collection_interval': 12}], 'init_config': {'min_collection_interval':3}} check = load_check('disk', config, agentConfig) check.run() metrics = check.get_metrics() self.assertTrue(len(metrics) > 0, metrics) check.run() metrics = check.get_metrics() self.assertEquals(len(metrics), 0, metrics) check.last_collection_time[0] -= 4 check.run() metrics = check.get_metrics() self.assertEquals(len(metrics), 0, metrics) check.last_collection_time[0] -= 8 check.run() metrics = check.get_metrics() self.assertTrue(len(metrics) > 0, metrics) def test_ntp_global_settings(self): # Clear any existing ntp config NTPUtil._drop() config = {'instances': [{ "host": "foo.com", "port": "bar", "version": 42, "timeout": 13.37}], 'init_config': {}} agentConfig = { 'version': '0.1', 'api_key': 'toto' } # load this config in the ntp singleton ntp_util = NTPUtil(config) # default min collection interval for that check was 20sec check = load_check('ntp', config, agentConfig) check.run() self.assertEqual(ntp_util.args["host"], "foo.com") self.assertEqual(ntp_util.args["port"], "bar") self.assertEqual(ntp_util.args["version"], 42) self.assertEqual(ntp_util.args["timeout"], 13.37) # Clear the singleton to prepare for next config NTPUtil._drop() config = {'instances': [{}], 'init_config': {}} agentConfig = { 'version': '0.1', 'api_key': 'toto' } # load the new config ntp_util = NTPUtil(config) # default min collection interval for that check was 20sec check = load_check('ntp', config, agentConfig) try: check.run() except Exception: pass self.assertTrue(ntp_util.args["host"].endswith("datadog.pool.ntp.org")) self.assertEqual(ntp_util.args["port"], "ntp") self.assertEqual(ntp_util.args["version"], 3) self.assertEqual(ntp_util.args["timeout"], 1.0) NTPUtil._drop()
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self._last_state_by_server = {} self.idx_rates = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Defaults self.LICEXPIRE = 30 self.MONCOUNT = 100 self.syslog = syslog.syslog
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Host status needs to persist across all checks self.host_status = defaultdict(lambda: defaultdict(lambda: None))
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) # Host status needs to persist across all checks self.cluster_status = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.previous_jobs = {} self.previous_stages = {}
def __init__(self, name, init_config, agentConfig): AgentCheck.__init__(self, name, init_config, agentConfig) self.dbs = {} self.versions = {}
def __init__(self, name, init_config, agentConfig): AgentCheck.__init__(self, name, init_config, agentConfig) self.wmi_conns = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.nodetool_cmd = init_config.get("nodetool", "/usr/bin/nodetool")
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.high_watermarks = {}
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.cluster_name = None
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.mysql_version = {} self.greater_502 = {}
def __init__(self, name, init_config, agentConfig, instances=None): if instances is not None and len(instances) > 1: raise Exception('Kubernetes check only supports one configured instance.') AgentCheck.__init__(self, name, init_config, agentConfig, instances) self.kubeutil = KubeUtil()
def setUpAgentCheck(self): self.ac = AgentCheck('test', {}, {'checksd_hostname': "foo"})
DEFAULT_PUBLISH_ALIASES = False DEFAULT_ENABLED_RATES = [ 'diskio.io_service_bytes.stats.total', 'network.??_bytes', 'cpu.*.total' ] DEFAULT_COLLECT_EVENTS = False DEFAULT_NAMESPACES = ['default'] DEFAULT_SERVICE_EVENT_FREQ = 5 * 60 # seconds NET_ERRORS = ['rx_errors', 'tx_errors', 'rx_dropped', 'tx_dropped'] DEFAULT_ENABLED_GAUGES = ['memory.usage', 'filesystem.usage'] GAUGE = AgentCheck.gauge RATE = AgentCheck.rate HISTORATE = AgentCheck.generate_historate_func(["container_name"]) HISTO = AgentCheck.generate_histogram_func(["container_name"]) FUNC_MAP = { GAUGE: { True: HISTO, False: GAUGE }, RATE: { True: HISTORATE, False: RATE } } EVENT_TYPE = 'kubernetes' # Mapping between k8s events and ddog alert types per
def __init__(self, name, init_config, agentConfig, instances=None): AgentCheck.__init__(self, name, init_config, agentConfig, instances) self._last_gc_count = defaultdict(int)