Example #1
0
 def __init__(self, name, init_config, agentConfig):
     AgentCheck.__init__(self, name, init_config, agentConfig)
     
     # Used to store the instances of the jmx connector (1 per instance)
     self.jmxs = {}
     self.jmx_metrics = []
     self.init_config = init_config
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     # Initialize a HTTP opener with Unix socket support
     socket_timeout = int(init_config.get('socket_timeout', 0)) \
                      or DEFAULT_SOCKET_TIMEOUT
     UnixHTTPConnection.socket_timeout = socket_timeout
     self.url_opener = urllib2.build_opener(UnixSocketHandler())
Example #3
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)
        # message.type is the index in this array
        # see: https://github.com/prometheus/client_model/blob/master/ruby/lib/prometheus/client/model/metrics.pb.rb
        self.METRIC_TYPES = ['counter', 'gauge', 'summary', 'untyped', 'histogram']

        # patterns used for metrics and labels extraction form the prometheus
        # text format. Do not overwrite those
        self.metrics_pattern = re.compile(r'^(\w+)(.*)\s+([0-9.+eE,]+)$')
        self.lbl_pattern = re.compile(r'(\w+)="(.*?)"')

        # `NAMESPACE` is the prefix metrics will have. Need to be hardcoded in the
        # child check class.
        self.NAMESPACE = ''

        # `metrics_mapper` is a dictionnary where the keys are the metrics to capture
        # and the values are the corresponding metrics names to have in datadog.
        # Note: it is empty in the mother class but will need to be
        # overloaded/hardcoded in the final check not to be counted as custom metric.
        self.metrics_mapper = {}

        # If the `labels_mapper` dictionnary is provided, the metrics labels names
        # in the `labels_mapper` will use the corresponding value as tag name
        # when sending the gauges.
        self.labels_mapper = {}

        # `exclude_labels` is an array of labels names to exclude. Those labels
        # will just not be added as tags when submitting the metric.
        self.exclude_labels = []
Example #4
0
    def test_service_check(self):
        check_name = "test.service_check"
        status = AgentCheck.CRITICAL
        tags = ["host:test", "other:thing"]
        host_name = "foohost"
        timestamp = time.time()

        check = AgentCheck("test", {}, {"checksd_hostname": "foo"})
        check.service_check(check_name, status, tags, timestamp, host_name)
        self.assertEquals(len(check.service_checks), 1, check.service_checks)
        val = check.get_service_checks()
        self.assertEquals(len(val), 1)
        check_run_id = val[0].get("id", None)
        self.assertNotEquals(check_run_id, None)
        self.assertEquals(
            [
                {
                    "id": check_run_id,
                    "check": check_name,
                    "status": status,
                    "host_name": host_name,
                    "tags": tags,
                    "timestamp": timestamp,
                    "message": None,
                }
            ],
            val,
        )
        self.assertEquals(len(check.service_checks), 0, check.service_checks)
Example #5
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)
        for k in ["mean", "median", "95", "99", "100"]:
            for m in self.stat_keys:
                self.keys.append(m + "_" + k)

        self.prev_coord_redirs_total = -1
Example #6
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # ad stands for access denied
        # We cache the PIDs getting this error and don't iterate on them
        # more often than `access_denied_cache_duration`
        # This cache is for all PIDs so it's global, but it should
        # be refreshed by instance
        self.last_ad_cache_ts = {}
        self.ad_cache = set()
        self.access_denied_cache_duration = int(
            init_config.get(
                'access_denied_cache_duration',
                DEFAULT_AD_CACHE_DURATION
            )
        )

        # By default cache the PID list for a while
        # Sometimes it's not wanted b/c it can mess with no-data monitoring
        # This cache is indexed per instance
        self.last_pid_cache_ts = {}
        self.pid_cache = {}
        self.pid_cache_duration = int(
            init_config.get(
                'pid_cache_duration',
                DEFAULT_PID_CACHE_DURATION
            )
        )

        # Process cache, indexed by instance
        self.process_cache = defaultdict(dict)
Example #7
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     if instances is not None and len(instances) > 1:
         raise Exception("Disk check only supports one configured instance.")
     AgentCheck.__init__(self, name, init_config,
                         agentConfig, instances=instances)
     # Get the configuration once for all
     self._load_conf(instances[0])
Example #8
0
    def __init__(self, name, init_config, agentConfig, instances):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # A dictionary to keep track of service statuses
        self.statuses = {}
        self.notified = {}
        self.start_pool()
Example #9
0
    def __init__(self, name, init_config, agentConfig, instances):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)
        self.time_started = time.time()
        self.pool_started = False
        self.exceptionq = Queue()

        # Connections open to vCenter instances
        self.server_instances = {}

        # Caching resources, timeouts
        self.cache_times = {}
        for instance in self.instances:
            i_key = self._instance_key(instance)
            self.cache_times[i_key] = {
                MORLIST: {
                    LAST: 0,
                    INTERVAL: init_config.get('refresh_morlist_interval',
                                    REFRESH_MORLIST_INTERVAL)
                },
                METRICS_METADATA: {
                    LAST: 0,
                    INTERVAL: init_config.get('refresh_metrics_metadata_interval',
                                    REFRESH_METRICS_METADATA_INTERVAL)
                }
            }

        # First layer of cache (get entities from the tree)
        self.morlist_raw = {}
        # Second layer, processed from the first one
        self.morlist = {}
        # Metrics metadata, basically perfCounterId -> {name, group, description}
        self.metrics_metadata = {}

        self.latest_event_query = {}
Example #10
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     if instances is not None and len(instances) > 1:
         raise Exception('Kubernetes check only supports one configured instance.')
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.kubeutil = KubeUtil()
     if not self.kubeutil.host:
         raise Exception('Unable to get default router and host parameter is not set')
Example #11
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     for instance in instances or []:
         url = instance.get('url', '')
         parsed_url = urlparse(url)
         ssl_verify = not _is_affirmative(instance.get('disable_ssl_validation', False))
         if not ssl_verify and parsed_url.scheme == 'https':
             self.log.warning('Skipping SSL cert validation for %s based on configuration.' % url)
Example #12
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Members' last replica set states
        self._last_state_by_server = {}

        # List of metrics to collect per instance
        self.metrics_to_collect_by_instance = {}
Example #13
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        if instances is not None and len(instances) > 1:
            raise Exception("Docker check only supports one configured instance.")
        AgentCheck.__init__(self, name, init_config,
                            agentConfig, instances=instances)

        self.init_success = False
        self.init()
Example #14
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Parse job specific counters
        self.general_counters = self._parse_general_counters(init_config)

        # Parse job specific counters
        self.job_specific_counters = self._parse_job_specific_counters(init_config)
Example #15
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)
        if instances is not None and len(instances) > 1:
            raise Exception("Consul check only supports one configured instance.")

        self._local_config = None
        self._last_config_fetch_time = None
        self._last_known_leader = None
    def __init__(self, name, init_config, agentConfig, instances):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances, allow_no_data=True)

        # A dictionary to keep track of service statuses
        self.statuses = {}
        self.notified = {}
        self.start_pool()
        self.nb_failures = 0
Example #17
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Load Custom MIB directory
        mibs_path = None
        if init_config is not None:
            mibs_path = init_config.get("mibs_folder")
        SnmpCheck.create_command_generator(mibs_path)
Example #18
0
    def __init__(self, name, init_config, agentConfig, instances):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # A dictionary to keep track of service statuses
        self.statuses = {}
        self.notified = {}
        self.nb_failures = 0
        self.pool_started = False
Example #19
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.dbs = {}
     self.versions = {}
     self.instance_metrics = {}
     self.bgw_metrics = {}
     self.db_instance_metrics = []
     self.db_bgw_metrics = []
     self.replication_metrics = {}
Example #20
0
 def __init__(self, *args, **kwargs):
   AgentCheck.__init__(self, *args, **kwargs)
   self.log.info('key_file_location: %s' % self.init_config.get('key_file_location'))
   
   self.service = self.get_service(
     self.apiName, 
     self.version, 
     self.scope, 
     self.init_config.get('key_file_location'))
Example #21
0
    def __init__(self, name, init_config, agentConfig):
        AgentCheck.__init__(self, name, init_config, agentConfig)

        try:
            import redis
        except ImportError:
            self.log.error('redisdb.yaml exists but redis module can not be imported. Skipping check.')

        self.previous_total_commands = {}
        self.connections = {}
Example #22
0
 def __init__(self, name, init_config, agentConfig):
     AgentCheck.__init__(self, name, init_config, agentConfig)
     self._mountpoints = {}
     docker_root = init_config.get('docker_root', '/')
     for metric in CGROUP_METRICS:
         self._mountpoints[metric["cgroup"]] = self._find_cgroup(metric["cgroup"], docker_root)
     self._last_event_collection_ts = defaultdict(lambda: None)
     self.url_opener = urllib2.build_opener(UnixSocketHandler())
     self.should_get_size = True
     self._cgroup_filename_pattern = None
Example #23
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        if instances is not None and len(instances) > 1:
            raise Exception('Kubernetes check only supports one configured instance.')

        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        inst = instances[0] if instances is not None else None
        self.kubeutil = KubeUtil(instance=inst)
        if not self.kubeutil.host:
            raise Exception('Unable to retrieve Docker hostname and host parameter is not set')
Example #24
0
    def __init__(self, name, init_config, agentConfig):
        AgentCheck.__init__(self, name, init_config, agentConfig)
        
        # Used to store the instances of the jmx connector (1 per instance)
        self.jmxs = {}
        self.jmx_metrics = []
        self.init_config = init_config

        # Used to store the number of times we opened a new jmx connector for this instance
        self.jmx_connections_watcher = {}
Example #25
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        if instances is not None and len(instances) > 1:
            raise Exception("Docker check only supports one configured instance.")
        AgentCheck.__init__(self, name, init_config,
                            agentConfig, instances=instances)

        self.init_success = False
        self.init()
        self._service_discovery = agentConfig.get('service_discovery') and \
            agentConfig.get('service_discovery_backend') == 'docker'
Example #26
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Load Custom MIB directory
        mibs_path = None
        ignore_nonincreasing_oid = False
        if init_config is not None:
            mibs_path = init_config.get("mibs_folder")
            ignore_nonincreasing_oid = _is_affirmative(init_config.get("ignore_nonincreasing_oid", False))
        SnmpCheck.create_command_generator(mibs_path, ignore_nonincreasing_oid)
    def test_monotonic_count(self):
        metric = 'test.count.type.2'
        tags = ['test', 'type:count']
        hostname = 'test.host'
        device_name = 'host1'
        agent_check = AgentCheck('test_count_check', {}, {})
        counters = [0, 1, 2, 4, 7, 12, 20]
        for counter in counters:
            agent_check.monotonic_count(metric, counter, tags=tags,
                                           hostname=hostname,
                                           device_name=device_name)
        flush_ts = time.time()
        results = agent_check.get_metrics()
        nt.assert_true(results is not None)
        nt.assert_equal(1, len(results))
        result = results[0]
        ret_metric, timestamp, value = result[0], result[1], result[2]
        nt.assert_equal(metric, ret_metric, msg=self.INCORRECT_METRIC)
        nt.ok_(abs(flush_ts-timestamp) <= 1, msg=self.DELAYED_TS)
        nt.assert_equal(counters[-1]-counters[0], value)

        # add a single point
        counters = [30]
        for counter in counters:
            agent_check.monotonic_count(metric, counter, tags=tags,
                                           hostname=hostname,
                                           device_name=device_name)
        flush_ts = time.time()
        results = agent_check.get_metrics()
        nt.assert_true(results is not None)
        nt.assert_equal(1, len(results))
        result = results[0]
        ret_metric, timestamp, value = result[0], result[1], result[2]
        nt.assert_equal(metric, ret_metric, msg=self.INCORRECT_METRIC)
        nt.ok_(abs(flush_ts-timestamp) <= 1, msg=self.DELAYED_TS)
        nt.assert_equal(10, value)

        # test non-monotonic sequence
        counters = [40, 35, 40, 45, 30, 32]
        for counter in counters:
            agent_check.monotonic_count(metric, counter, tags=tags,
                                           hostname=hostname,
                                           device_name=device_name)
        flush_ts = time.time()
        results = agent_check.get_metrics()
        nt.assert_true(results is not None)
        nt.assert_equal(1, len(results))
        result = results[0]
        ret_metric, timestamp, value = result[0], result[1], result[2]
        nt.assert_equal(metric, ret_metric, msg=self.INCORRECT_METRIC)
        nt.ok_(abs(flush_ts-timestamp) <= 1, msg=self.DELAYED_TS)
        # should skip when counter is reset
        # i.e. between 40 and 35 and between 45 and 30
        # 22 = (40-30) + (40-35) + (45-40) + (32-30) = 10 + 5 + 5 + 2
        nt.assert_equal(22, value)
Example #28
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Members' last replica set states
        self._last_state_by_server = {}

        # List of metrics to collect per instance
        self.metrics_to_collect_by_instance = {}

        self.collection_metrics_names = []
        for (key, value) in self.COLLECTION_METRICS.iteritems():
            self.collection_metrics_names.append(key.split('.')[1])
Example #29
0
    def __init__(self, name, init_config, agentConfig):
        AgentCheck.__init__(self, name, init_config, agentConfig)

        # If we can't import the redis module, we should always skip this check
        try:
            import redis
            self.enabled = True
        except ImportError:
            self.enabled = False
            self.log.error('redisdb.yaml exists but redis module can not be imported. Skipping check.')

        self.previous_total_commands = {}
        self.connections = {}
Example #30
0
    def __init__(self, name, init_config, agentConfig):
        AgentCheck.__init__(self, name, init_config, agentConfig)

        # Load any custom metrics from conf.d/sqlserver.yaml
        for row in init_config.get('custom_metrics', []):
            if row['type'] not in VALID_METRIC_TYPES:
                self.log.error('%s has an invalid metric type: %s' \
                    % (row['name'], row['type']))
            self.METRICS.append( (row['name'], row['type'], row['counter_name'],
                row.get('instance_name', ''), row.get('tag_by', None)) )

        # Cache connections
        self.connections = {}
Example #31
0
    def __init__(self, name, init_config, agentConfig, instances):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Cache
        self.wmi_samplers = {}
        self.wmi_props = {}
Example #32
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.NAMESAPCE = r"root\virtualization\v2"
     self.SystemName = ""
     self.conn = ""
     self.ComputerName = ""
Example #33
0
 def __init__(self, name, init_config, agentConfig):
     AgentCheck.__init__(self, name, init_config, agentConfig)
     self.high_watermarks = {}
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        self._instance_states = defaultdict(lambda: ConsulCheckInstanceState())
Example #35
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.connections = {}
     self.last_timestamp_seen = defaultdict(int)
Example #36
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.assumed_url = {}
Example #37
0
 def __init__(self, *args, **kwargs):
     AgentCheck.__init__(self, *args, **kwargs)
Example #38
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.already_alerted = []
Example #39
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.checker = None
Example #40
0
 def __init__(self, name, init_config, agentConfig, instances=None):
   AgentCheck.__init__(self, name, init_config, agentConfig, instances)
   self.get_values()
Example #41
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.already_alerted = []
     self.cached_vhosts = {
     }  # this is used to send CRITICAL rabbitmq.aliveness check if the server goes down
Example #42
0
 def __init__(self, *args, **kwargs):
     AgentCheck.__init__(self, *args, **kwargs)
     self._collector_payload = {}
     self._metric_context = {}
Example #43
0
class TestCore(unittest.TestCase):
    "Tests to validate the core check logic"

    def setUp(self):
        self.c = Check(logger)
        self.c.gauge("test-metric")
        self.c.counter("test-counter")

    def setUpAgentCheck(self):
        self.ac = AgentCheck('test', {}, {'checksd_hostname': "foo"})

    def test_gauge(self):
        self.assertEquals(self.c.is_gauge("test-metric"), True)
        self.assertEquals(self.c.is_counter("test-metric"), False)
        self.c.save_sample("test-metric", 1.0)
        # call twice in a row, should be invariant
        self.assertEquals(self.c.get_sample("test-metric"), 1.0)
        self.assertEquals(self.c.get_sample("test-metric"), 1.0)
        self.assertEquals(self.c.get_sample_with_timestamp("test-metric")[1], 1.0)
        # new value, old one should be gone
        self.c.save_sample("test-metric", 2.0)
        self.assertEquals(self.c.get_sample("test-metric"), 2.0)
        self.assertEquals(len(self.c._sample_store["test-metric"]), 1)
        # with explicit timestamp
        self.c.save_sample("test-metric", 3.0, 1298066183.607717)
        self.assertEquals(self.c.get_sample_with_timestamp("test-metric"), (1298066183.607717, 3.0, None, None))
        # get_samples()
        self.assertEquals(self.c.get_samples(), {"test-metric": 3.0})

    def testEdgeCases(self):
        self.assertRaises(CheckException, self.c.get_sample, "unknown-metric")
        # same value
        self.c.save_sample("test-counter", 1.0, 1.0)
        self.c.save_sample("test-counter", 1.0, 1.0)
        self.assertRaises(Infinity, self.c.get_sample, "test-counter")

    def test_counter(self):
        self.c.save_sample("test-counter", 1.0, 1.0)
        self.assertRaises(UnknownValue, self.c.get_sample, "test-counter", expire=False)
        self.c.save_sample("test-counter", 2.0, 2.0)
        self.assertEquals(self.c.get_sample("test-counter", expire=False), 1.0)
        self.assertEquals(self.c.get_sample_with_timestamp("test-counter", expire=False), (2.0, 1.0, None, None))
        self.assertEquals(self.c.get_samples(expire=False), {"test-counter": 1.0})
        self.c.save_sample("test-counter", -2.0, 3.0)
        self.assertRaises(UnknownValue, self.c.get_sample_with_timestamp, "test-counter")

    def test_tags(self):
        # Test metric tagging
        now = int(time.time())
        # Tag metrics
        self.c.save_sample("test-counter", 1.0, 1.0, tags = ["tag1", "tag2"])
        self.c.save_sample("test-counter", 2.0, 2.0, tags = ["tag1", "tag2"])
        # Only 1 point recording for this combination of tags, won't be sent
        self.c.save_sample("test-counter", 3.0, 3.0, tags = ["tag1", "tag3"])
        self.c.save_sample("test-metric", 3.0, now, tags = ["tag3", "tag4"])
        # Arg checks
        self.assertRaises(CheckException, self.c.save_sample, "test-metric", 4.0, now + 5, tags = "abc")
        # This is a different combination of tags
        self.c.save_sample("test-metric", 3.0, now, tags = ["tag5", "tag3"])
        results = self.c.get_metrics()
        results.sort()
        self.assertEquals(results,
                          [("test-counter", 2.0, 1.0, {"tags": ["tag1", "tag2"]}),
                           ("test-metric", now, 3.0, {"tags": ["tag3", "tag4"]}),
                           ("test-metric", now, 3.0, {"tags": ["tag3", "tag5"]}),
                           ])
        # Tagged metrics are not available through get_samples anymore
        self.assertEquals(self.c.get_samples(), {})

    def test_samples(self):
        self.assertEquals(self.c.get_samples(), {})
        self.c.save_sample("test-metric", 1.0, 0.0)  # value, ts
        self.c.save_sample("test-counter", 1.0, 1.0) # value, ts
        self.c.save_sample("test-counter", 4.0, 2.0) # value, ts
        assert "test-metric" in self.c.get_samples_with_timestamps(expire=False), self.c.get_samples_with_timestamps(expire=False)
        self.assertEquals(self.c.get_samples_with_timestamps(expire=False)["test-metric"], (0.0, 1.0, None, None))
        assert "test-counter" in self.c.get_samples_with_timestamps(expire=False), self.c.get_samples_with_timestamps(expire=False)
        self.assertEquals(self.c.get_samples_with_timestamps(expire=False)["test-counter"], (2.0, 3.0, None, None))

    def test_name(self):
        self.assertEquals(self.c.normalize("metric"), "metric")
        self.assertEquals(self.c.normalize("metric", "prefix"), "prefix.metric")
        self.assertEquals(self.c.normalize("__metric__", "prefix"), "prefix.metric")
        self.assertEquals(self.c.normalize("abc.metric(a+b+c{}/5)", "prefix"), "prefix.abc.metric_a_b_c_5")
        self.assertEquals(self.c.normalize("VBE.default(127.0.0.1,,8080).happy", "varnish"), "varnish.VBE.default_127.0.0.1_8080.happy")
        self.assertEquals(self.c.normalize("metric@device"), "metric_device")

        # Same tests for the AgentCheck
        self.setUpAgentCheck()
        self.assertEquals(self.ac.normalize("metric"), "metric")
        self.assertEquals(self.ac.normalize("metric", "prefix"), "prefix.metric")
        self.assertEquals(self.ac.normalize("__metric__", "prefix"), "prefix.metric")
        self.assertEquals(self.ac.normalize("abc.metric(a+b+c{}/5)", "prefix"), "prefix.abc.metric_a_b_c_5")
        self.assertEquals(self.ac.normalize("VBE.default(127.0.0.1,,8080).happy", "varnish"), "varnish.VBE.default_127.0.0.1_8080.happy")
        self.assertEquals(self.ac.normalize("metric@device"), "metric_device")

        self.assertEqual(self.ac.normalize("PauseTotalNs", "prefix", fix_case = True), "prefix.pause_total_ns")
        self.assertEqual(self.ac.normalize("Metric.wordThatShouldBeSeparated", "prefix", fix_case = True), "prefix.metric.word_that_should_be_separated")
        self.assertEqual(self.ac.normalize_device_name(",@+*-()[]{}//device@name"), "___________//device_name")

    def test_service_check(self):
        check_name = 'test.service_check'
        status = AgentCheck.CRITICAL
        tags = ['host:test', 'other:thing']
        host_name = 'foohost'
        timestamp = time.time()

        check = AgentCheck('test', {}, {'checksd_hostname':'foo'})
        # No "message"/"tags" field
        check.service_check(check_name, status, timestamp=timestamp, hostname=host_name)
        self.assertEquals(len(check.service_checks), 1, check.service_checks)
        val = check.get_service_checks()
        self.assertEquals(len(val), 1)
        check_run_id = val[0].get('id', None)
        self.assertNotEquals(check_run_id, None)
        self.assertEquals([{
            'id': check_run_id,
            'check': check_name,
            'status': status,
            'host_name': host_name,
            'timestamp': timestamp,
        }], val)
        self.assertEquals(len(check.service_checks), 0, check.service_checks)

        # With "message" field
        check.service_check(check_name, status, tags, timestamp, host_name, message='foomessage')
        self.assertEquals(len(check.service_checks), 1, check.service_checks)
        val = check.get_service_checks()
        self.assertEquals(len(val), 1)
        check_run_id = val[0].get('id', None)
        self.assertNotEquals(check_run_id, None)
        self.assertEquals([{
            'id': check_run_id,
            'check': check_name,
            'status': status,
            'host_name': host_name,
            'tags': tags,
            'timestamp': timestamp,
            'message': 'foomessage',
        }], val)
        self.assertEquals(len(check.service_checks), 0, check.service_checks)


    def test_no_proxy(self):
        """ Starting with Agent 5.0.0, there should always be a local forwarder
        running and all payloads should go through it. So we should make sure
        that we pass the no_proxy environment variable that will be used by requests
        (See: https://github.com/kennethreitz/requests/pull/945 )
        """
        from requests.utils import get_environ_proxies
        from os import environ as env

        env["http_proxy"] = "http://*****:*****@google.com:4444"
        proxy_from_env = get_proxy({})
        self.assertEqual(proxy_from_env,
            {
                "host": "google.com",
                "port": 4444,
                "user": "******",
                "password": "******"
            })
Example #44
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.default_timeout = init_config.get('default_timeout',
                                            self.DEFAULT_TIMEOUT)
Example #45
0
class TestCore(unittest.TestCase):
    "Tests to validate the core check logic"

    def setUp(self):
        self.c = Check(logger)
        self.c.gauge("test-metric")
        self.c.counter("test-counter")

    def setUpAgentCheck(self):
        self.ac = AgentCheck('test', {}, {'checksd_hostname': "foo"})

    def test_gauge(self):
        self.assertEquals(self.c.is_gauge("test-metric"), True)
        self.assertEquals(self.c.is_counter("test-metric"), False)
        self.c.save_sample("test-metric", 1.0)
        # call twice in a row, should be invariant
        self.assertEquals(self.c.get_sample("test-metric"), 1.0)
        self.assertEquals(self.c.get_sample("test-metric"), 1.0)
        self.assertEquals(self.c.get_sample_with_timestamp("test-metric")[1], 1.0)
        # new value, old one should be gone
        self.c.save_sample("test-metric", 2.0)
        self.assertEquals(self.c.get_sample("test-metric"), 2.0)
        self.assertEquals(len(self.c._sample_store["test-metric"]), 1)
        # with explicit timestamp
        self.c.save_sample("test-metric", 3.0, 1298066183.607717)
        self.assertEquals(self.c.get_sample_with_timestamp("test-metric"), (1298066183.607717, 3.0, None, None))
        # get_samples()
        self.assertEquals(self.c.get_samples(), {"test-metric": 3.0})

    def testEdgeCases(self):
        self.assertRaises(CheckException, self.c.get_sample, "unknown-metric")
        # same value
        self.c.save_sample("test-counter", 1.0, 1.0)
        self.c.save_sample("test-counter", 1.0, 1.0)
        self.assertRaises(Infinity, self.c.get_sample, "test-counter")

    def test_counter(self):
        self.c.save_sample("test-counter", 1.0, 1.0)
        self.assertRaises(UnknownValue, self.c.get_sample, "test-counter", expire=False)
        self.c.save_sample("test-counter", 2.0, 2.0)
        self.assertEquals(self.c.get_sample("test-counter", expire=False), 1.0)
        self.assertEquals(self.c.get_sample_with_timestamp("test-counter", expire=False), (2.0, 1.0, None, None))
        self.assertEquals(self.c.get_samples(expire=False), {"test-counter": 1.0})
        self.c.save_sample("test-counter", -2.0, 3.0)
        self.assertRaises(UnknownValue, self.c.get_sample_with_timestamp, "test-counter")

    def test_tags(self):
        # Test metric tagging
        now = int(time.time())
        # Tag metrics
        self.c.save_sample("test-counter", 1.0, 1.0, tags = ["tag1", "tag2"])
        self.c.save_sample("test-counter", 2.0, 2.0, tags = ["tag1", "tag2"])
        # Only 1 point recording for this combination of tags, won't be sent
        self.c.save_sample("test-counter", 3.0, 3.0, tags = ["tag1", "tag3"])
        self.c.save_sample("test-metric", 3.0, now, tags = ["tag3", "tag4"])
        # Arg checks
        self.assertRaises(CheckException, self.c.save_sample, "test-metric", 4.0, now + 5, tags = "abc")
        # This is a different combination of tags
        self.c.save_sample("test-metric", 3.0, now, tags = ["tag5", "tag3"])
        results = self.c.get_metrics()
        results.sort()
        self.assertEquals(results,
                          [("test-counter", 2.0, 1.0, {"tags": ["tag1", "tag2"]}),
                           ("test-metric", now, 3.0, {"tags": ["tag3", "tag4"]}),
                           ("test-metric", now, 3.0, {"tags": ["tag3", "tag5"]}),
                           ])
        # Tagged metrics are not available through get_samples anymore
        self.assertEquals(self.c.get_samples(), {})

    def test_samples(self):
        self.assertEquals(self.c.get_samples(), {})
        self.c.save_sample("test-metric", 1.0, 0.0)  # value, ts
        self.c.save_sample("test-counter", 1.0, 1.0) # value, ts
        self.c.save_sample("test-counter", 4.0, 2.0) # value, ts
        assert "test-metric" in self.c.get_samples_with_timestamps(expire=False), self.c.get_samples_with_timestamps(expire=False)
        self.assertEquals(self.c.get_samples_with_timestamps(expire=False)["test-metric"], (0.0, 1.0, None, None))
        assert "test-counter" in self.c.get_samples_with_timestamps(expire=False), self.c.get_samples_with_timestamps(expire=False)
        self.assertEquals(self.c.get_samples_with_timestamps(expire=False)["test-counter"], (2.0, 3.0, None, None))

    def test_name(self):
        self.assertEquals(self.c.normalize("metric"), "metric")
        self.assertEquals(self.c.normalize("metric", "prefix"), "prefix.metric")
        self.assertEquals(self.c.normalize("__metric__", "prefix"), "prefix.metric")
        self.assertEquals(self.c.normalize("abc.metric(a+b+c{}/5)", "prefix"), "prefix.abc.metric_a_b_c_5")
        self.assertEquals(self.c.normalize("VBE.default(127.0.0.1,,8080).happy", "varnish"), "varnish.VBE.default_127.0.0.1_8080.happy")

        # Same tests for the AgentCheck
        self.setUpAgentCheck()
        self.assertEquals(self.ac.normalize("metric"), "metric")
        self.assertEquals(self.ac.normalize("metric", "prefix"), "prefix.metric")
        self.assertEquals(self.ac.normalize("__metric__", "prefix"), "prefix.metric")
        self.assertEquals(self.ac.normalize("abc.metric(a+b+c{}/5)", "prefix"), "prefix.abc.metric_a_b_c_5")
        self.assertEquals(self.ac.normalize("VBE.default(127.0.0.1,,8080).happy", "varnish"), "varnish.VBE.default_127.0.0.1_8080.happy")

        self.assertEqual(self.ac.normalize("PauseTotalNs", "prefix", fix_case = True), "prefix.pause_total_ns")
        self.assertEqual(self.ac.normalize("Metric.wordThatShouldBeSeparated", "prefix", fix_case = True), "prefix.metric.word_that_should_be_separated")

    def test_service_check(self):
        check_name = 'test.service_check'
        status = AgentCheck.CRITICAL
        tags = ['host:test', 'other:thing']
        host_name = 'foohost'
        timestamp = time.time()

        check = AgentCheck('test', {}, {'checksd_hostname':'foo'})
        check.service_check(check_name, status, tags, timestamp, host_name)
        self.assertEquals(len(check.service_checks), 1, check.service_checks)
        val = check.get_service_checks()
        self.assertEquals(len(val), 1)
        check_run_id = val[0].get('id', None)
        self.assertNotEquals(check_run_id, None)
        self.assertEquals([{
            'id': check_run_id,
            'check': check_name,
            'status': status,
            'host_name': host_name,
            'tags': tags,
            'timestamp': timestamp,
            'message': None,
        }], val)
        self.assertEquals(len(check.service_checks), 0, check.service_checks)

    def test_collector(self):
        agentConfig = {
            'api_key': 'test_apikey',
            'check_timings': True,
            'collect_ec2_tags': True,
            'collect_instance_metadata': False,
            'create_dd_check_tags': False,
            'version': 'test',
            'tags': '',
        }

        # Run a single checks.d check as part of the collector.
        redis_config = {
            "init_config": {},
            "instances": [{"host": "localhost", "port": 6379}]
        }
        checks = [load_check('redisdb', redis_config, agentConfig)]

        c = Collector(agentConfig, [], {}, get_hostname(agentConfig))
        payload = c.run({
            'initialized_checks': checks,
            'init_failed_checks': {}
        })
        metrics = payload['metrics']

        # Check that we got a timing metric for all checks.
        timing_metrics = [m for m in metrics
            if m[0] == 'datadog.agent.check_run_time']
        all_tags = []
        for metric in timing_metrics:
            all_tags.extend(metric[3]['tags'])
        for check in checks:
            tag = "check:%s" % check.name
            assert tag in all_tags, all_tags

    def test_apptags(self):
        '''
        Tests that the app tags are sent if specified so
        '''
        agentConfig = {
            'api_key': 'test_apikey',
            'collect_ec2_tags': False,
            'collect_instance_metadata': False,
            'create_dd_check_tags': True,
            'version': 'test',
            'tags': '',
        }

        # Run a single checks.d check as part of the collector.
        redis_config = {
            "init_config": {},
            "instances": [{"host": "localhost", "port": 6379}]
        }
        checks = [load_check('redisdb', redis_config, agentConfig)]

        c = Collector(agentConfig, [], {}, get_hostname(agentConfig))
        payload = c.run({
            'initialized_checks': checks,
            'init_failed_checks': {}
        })

        # We check that the redis DD_CHECK_TAG is sent in the payload
        self.assertTrue('dd_check:redisdb' in payload['host-tags']['system'])

    def test_no_proxy(self):
        """ Starting with Agent 5.0.0, there should always be a local forwarder
        running and all payloads should go through it. So we should make sure
        that we pass the no_proxy environment variable that will be used by requests
        (See: https://github.com/kennethreitz/requests/pull/945 )
        """
        from requests.utils import get_environ_proxies
        from os import environ as env

        env["http_proxy"] = "http://*****:*****@google.com:4444"
        proxy_from_env = get_proxy({})
        self.assertEqual(proxy_from_env,
            {
                "host": "google.com",
                "port": 4444,
                "user": "******",
                "password": "******"
            })

    def test_min_collection_interval(self):
        config = {'instances': [{}], 'init_config': {}}

        agentConfig = {
            'version': '0.1',
            'api_key': 'toto'
        }

        # default min collection interval for that check was 20sec
        check = load_check('disk', config, agentConfig)
        check.DEFAULT_MIN_COLLECTION_INTERVAL = 20

        check.run()
        metrics = check.get_metrics()
        self.assertTrue(len(metrics) > 0, metrics)

        check.run()
        metrics = check.get_metrics()
        # No metrics should be collected as it's too early
        self.assertEquals(len(metrics), 0, metrics)

        # equivalent to time.sleep(20)
        check.last_collection_time[0] -= 20
        check.run()
        metrics = check.get_metrics()
        self.assertTrue(len(metrics) > 0, metrics)
        check.last_collection_time[0] -= 3
        check.run()
        metrics = check.get_metrics()
        self.assertEquals(len(metrics), 0, metrics)
        check.DEFAULT_MIN_COLLECTION_INTERVAL = 0
        check.run()
        metrics = check.get_metrics()
        self.assertTrue(len(metrics) > 0, metrics)

        config = {'instances': [{'min_collection_interval':3}], 'init_config': {}}
        check = load_check('disk', config, agentConfig)
        check.run()
        metrics = check.get_metrics()
        self.assertTrue(len(metrics) > 0, metrics)
        check.run()
        metrics = check.get_metrics()
        self.assertEquals(len(metrics), 0, metrics)
        check.last_collection_time[0] -= 4
        check.run()
        metrics = check.get_metrics()
        self.assertTrue(len(metrics) > 0, metrics)

        config = {'instances': [{'min_collection_interval': 12}], 'init_config': {'min_collection_interval':3}}
        check = load_check('disk', config, agentConfig)
        check.run()
        metrics = check.get_metrics()
        self.assertTrue(len(metrics) > 0, metrics)
        check.run()
        metrics = check.get_metrics()
        self.assertEquals(len(metrics), 0, metrics)
        check.last_collection_time[0] -= 4
        check.run()
        metrics = check.get_metrics()
        self.assertEquals(len(metrics), 0, metrics)
        check.last_collection_time[0] -= 8
        check.run()
        metrics = check.get_metrics()
        self.assertTrue(len(metrics) > 0, metrics)

    def test_ntp_global_settings(self):
        # Clear any existing ntp config
        NTPUtil._drop()

        config = {'instances': [{
            "host": "foo.com",
            "port": "bar",
            "version": 42,
            "timeout": 13.37}],
            'init_config': {}}

        agentConfig = {
            'version': '0.1',
            'api_key': 'toto'
        }

        # load this config in the ntp singleton
        ntp_util = NTPUtil(config)

        # default min collection interval for that check was 20sec
        check = load_check('ntp', config, agentConfig)
        check.run()

        self.assertEqual(ntp_util.args["host"], "foo.com")
        self.assertEqual(ntp_util.args["port"], "bar")
        self.assertEqual(ntp_util.args["version"], 42)
        self.assertEqual(ntp_util.args["timeout"], 13.37)

        # Clear the singleton to prepare for next config
        NTPUtil._drop()

        config = {'instances': [{}], 'init_config': {}}
        agentConfig = {
            'version': '0.1',
            'api_key': 'toto'
        }

        # load the new config
        ntp_util = NTPUtil(config)

        # default min collection interval for that check was 20sec
        check = load_check('ntp', config, agentConfig)
        try:
            check.run()
        except Exception:
            pass

        self.assertTrue(ntp_util.args["host"].endswith("datadog.pool.ntp.org"))
        self.assertEqual(ntp_util.args["port"], "ntp")
        self.assertEqual(ntp_util.args["version"], 3)
        self.assertEqual(ntp_util.args["timeout"], 1.0)

        NTPUtil._drop()
Example #46
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self._last_state_by_server = {}
     self.idx_rates = {}
Example #47
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     # Defaults
     self.LICEXPIRE = 30
     self.MONCOUNT = 100
     self.syslog = syslog.syslog
Example #48
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Host status needs to persist across all checks
        self.host_status = defaultdict(lambda: defaultdict(lambda: None))
Example #49
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Host status needs to persist across all checks
        self.cluster_status = {}
Example #50
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.previous_jobs = {}
     self.previous_stages = {}
Example #51
0
 def __init__(self, name, init_config, agentConfig):
     AgentCheck.__init__(self, name, init_config, agentConfig)
     self.dbs = {}
     self.versions = {}
Example #52
0
 def __init__(self, name, init_config, agentConfig):
     AgentCheck.__init__(self, name, init_config, agentConfig)
     self.wmi_conns = {}
Example #53
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.nodetool_cmd = init_config.get("nodetool", "/usr/bin/nodetool")
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.high_watermarks = {}
Example #55
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.cluster_name = None
Example #56
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.mysql_version = {}
     self.greater_502 = {}
Example #57
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     if instances is not None and len(instances) > 1:
         raise Exception('Kubernetes check only supports one configured instance.')
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self.kubeutil = KubeUtil()
Example #58
0
 def setUpAgentCheck(self):
     self.ac = AgentCheck('test', {}, {'checksd_hostname': "foo"})
Example #59
0
DEFAULT_PUBLISH_ALIASES = False
DEFAULT_ENABLED_RATES = [
    'diskio.io_service_bytes.stats.total', 'network.??_bytes', 'cpu.*.total'
]
DEFAULT_COLLECT_EVENTS = False
DEFAULT_NAMESPACES = ['default']

DEFAULT_SERVICE_EVENT_FREQ = 5 * 60  # seconds

NET_ERRORS = ['rx_errors', 'tx_errors', 'rx_dropped', 'tx_dropped']

DEFAULT_ENABLED_GAUGES = ['memory.usage', 'filesystem.usage']

GAUGE = AgentCheck.gauge
RATE = AgentCheck.rate
HISTORATE = AgentCheck.generate_historate_func(["container_name"])
HISTO = AgentCheck.generate_histogram_func(["container_name"])
FUNC_MAP = {
    GAUGE: {
        True: HISTO,
        False: GAUGE
    },
    RATE: {
        True: HISTORATE,
        False: RATE
    }
}

EVENT_TYPE = 'kubernetes'

# Mapping between k8s events and ddog alert types per
Example #60
0
 def __init__(self, name, init_config, agentConfig, instances=None):
     AgentCheck.__init__(self, name, init_config, agentConfig, instances)
     self._last_gc_count = defaultdict(int)