Ejemplo n.º 1
0
    def test_service_check_tags(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('_sc|check.1|0')
        stats.submit_packets('_sc|check.2|0|#t1')
        stats.submit_packets('_sc|check.3|0|h:i-abcd1234|#t1,t2|m:fakeout#t5')
        stats.submit_packets('_sc|check.4|0|#t1,t2:v2,t3,t4')

        service_checks = self.sort_service_checks(stats.flush_service_checks())

        assert len(service_checks) == 4
        first, second, third, fourth = service_checks

        nt.assert_equal(first['check'], 'check.1')
        assert first.get('tags') is None, "service_check['tags'] shouldn't be" + \
            "defined when no tags aren't explicited in the packet"

        nt.assert_equal(second['check'], 'check.2')
        nt.assert_equal(second['tags'], sorted(['t1']))

        nt.assert_equal(third['check'], 'check.3')
        nt.assert_equal(third['host_name'], 'i-abcd1234')
        nt.assert_equal(third['message'], 'fakeout#t5')
        nt.assert_equal(third['tags'], sorted(['t1', 't2']))

        nt.assert_equal(fourth['check'], 'check.4')
        nt.assert_equal(fourth['tags'], sorted(['t1', 't2:v2', 't3', 't4']))
Ejemplo n.º 2
0
    def test_custom_aggregate(self):
        configstr = 'median, max'
        stats = MetricsAggregator(
            'myhost',
            histogram_aggregates=get_histogram_aggregates(configstr)
        )

        self.assertEquals(
            sorted(stats.metric_config[Histogram]['aggregates']),
            ['max', 'median'],
            stats.metric_config[Histogram]
        )

        for i in xrange(20):
            stats.submit_packets('myhistogram:{0}|h'.format(i))

        metrics = stats.flush()

        self.assertEquals(len(metrics), 3, metrics)

        value_by_type = {}
        for k in metrics:
            value_by_type[k['metric'][len('myhistogram')+1:]] = k['points'][0][1]

        self.assertEquals(value_by_type['median'], 9, value_by_type)
        self.assertEquals(value_by_type['max'], 19, value_by_type)
        self.assertEquals(value_by_type['95percentile'], 18, value_by_type)
Ejemplo n.º 3
0
    def test_custom_single_percentile(self):
        configstr = '0.40'
        stats = MetricsAggregator(
            'myhost',
            histogram_percentiles=get_histogram_percentiles(configstr)
        )

        self.assertEquals(
            stats.metric_config[Histogram]['percentiles'],
            [0.40],
            stats.metric_config[Histogram]
        )

        for i in xrange(20):
            stats.submit_packets('myhistogram:{0}|h'.format(i))

        metrics = stats.flush()

        self.assertEquals(len(metrics), 5, metrics)

        value_by_type = {}
        for k in metrics:
            value_by_type[k[0][len('myhistogram')+1:]] = k[2]

        self.assertEquals(value_by_type['40percentile'], 7, value_by_type)
Ejemplo n.º 4
0
    def test_tags(self):
        stats = MetricsAggregator("myhost")
        stats.submit_packets("gauge:1|c")
        stats.submit_packets("gauge:2|c|@1")
        stats.submit_packets("gauge:4|c|#tag1,tag2")
        stats.submit_packets("gauge:8|c|#tag2,tag1")  # Should be the same as above
        stats.submit_packets("gauge:16|c|#tag3,tag4")

        metrics = self.sort_metrics(stats.flush())

        assert len(metrics) == 3
        first, second, third = metrics

        nt.assert_equal(first["metric"], "gauge")
        nt.assert_equal(first["tags"], None)
        nt.assert_equal(first["points"][0][1], 3)
        nt.assert_equal(first["host"], "myhost")

        nt.assert_equal(second["metric"], "gauge")
        nt.assert_equal(second["tags"], ("tag1", "tag2"))
        nt.assert_equal(second["points"][0][1], 12)
        nt.assert_equal(second["host"], "myhost")

        nt.assert_equal(third["metric"], "gauge")
        nt.assert_equal(third["tags"], ("tag3", "tag4"))
        nt.assert_equal(third["points"][0][1], 16)
        nt.assert_equal(third["host"], "myhost")
Ejemplo n.º 5
0
    def test_custom_multiple_percentile(self):
        configstr = '0.4, 0.65, 0.999'
        stats = MetricsAggregator(
            'myhost',
            histogram_percentiles=get_histogram_percentiles(configstr)
        )

        self.assertEquals(
            stats.metric_config[Histogram]['percentiles'],
            [0.4, 0.65, 0.99],
            stats.metric_config[Histogram]
        )

        for i in xrange(20):
            stats.submit_packets('myhistogram:{0}|h'.format(i))

        metrics = stats.flush()

        self.assertEquals(len(metrics), 7, metrics)

        value_by_type = {}
        for k in metrics:
            value_by_type[k['metric'][len('myhistogram')+1:]] = k['points'][0][1]

        self.assertEquals(value_by_type['40percentile'], 7, value_by_type)
        self.assertEquals(value_by_type['65percentile'], 12, value_by_type)
        self.assertEquals(value_by_type['99percentile'], 19, value_by_type)
Ejemplo n.º 6
0
    def test_event_tags(self):
        stats = MetricsAggregator("myhost")
        stats.submit_packets("_e{6,4}:title1|text")
        stats.submit_packets("_e{6,4}:title2|text|#t1")
        stats.submit_packets("_e{6,4}:title3|text|#t1,t2:v2,t3,t4")
        stats.submit_packets("_e{6,4}:title4|text|k:key|p:normal|#t1,t2")

        events = self.sort_events(stats.flush_events())

        assert len(events) == 4
        first, second, third, fourth = events

        try:
            first["tags"]
        except Exception:
            assert True
        else:
            assert False, "event['tags'] shouldn't be defined when no tags aren't explicited in the packet"
        nt.assert_equal(first["msg_title"], "title1")
        nt.assert_equal(first["msg_text"], "text")

        nt.assert_equal(second["msg_title"], "title2")
        nt.assert_equal(second["msg_text"], "text")
        nt.assert_equal(second["tags"], sorted(["t1"]))

        nt.assert_equal(third["msg_title"], "title3")
        nt.assert_equal(third["msg_text"], "text")
        nt.assert_equal(third["tags"], sorted(["t1", "t2:v2", "t3", "t4"]))

        nt.assert_equal(fourth["msg_title"], "title4")
        nt.assert_equal(fourth["msg_text"], "text")
        nt.assert_equal(fourth["aggregation_key"], "key")
        nt.assert_equal(fourth["priority"], "normal")
        nt.assert_equal(fourth["tags"], sorted(["t1", "t2"]))
Ejemplo n.º 7
0
    def test_event_tags(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('_e{6,4}:title1|text')
        stats.submit_packets('_e{6,4}:title2|text|#t1')
        stats.submit_packets('_e{6,4}:title3|text|#t1,t2:v2,t3,t4')
        stats.submit_packets('_e{6,4}:title4|text|k:key|p:normal|#t1,t2')

        events = self.sort_events(stats.flush_events())

        assert len(events) == 4
        first, second, third, fourth = events

        try:
            first['tags']
        except Exception:
                assert True
        else:
            assert False, "event['tags'] shouldn't be defined when no tags aren't explicited in the packet"
        nt.assert_equal(first['msg_title'], 'title1')
        nt.assert_equal(first['msg_text'], 'text')

        nt.assert_equal(second['msg_title'], 'title2')
        nt.assert_equal(second['msg_text'], 'text')
        nt.assert_equal(second['tags'], sorted(['t1']))

        nt.assert_equal(third['msg_title'], 'title3')
        nt.assert_equal(third['msg_text'], 'text')
        nt.assert_equal(third['tags'], sorted(['t1', 't2:v2', 't3', 't4']))

        nt.assert_equal(fourth['msg_title'], 'title4')
        nt.assert_equal(fourth['msg_text'], 'text')
        nt.assert_equal(fourth['aggregation_key'], 'key')
        nt.assert_equal(fourth['priority'], 'normal')
        nt.assert_equal(fourth['tags'], sorted(['t1', 't2']))
Ejemplo n.º 8
0
    def test_histogram(self):
        stats = MetricsAggregator('myhost')

        # Sample all numbers between 1-100 many times. This
        # means our percentiles should be relatively close to themselves.
        percentiles = range(100)
        random.shuffle(percentiles) # in place
        for i in percentiles:
            for j in xrange(20):
                for type_ in ['h', 'ms']:
                    m = 'my.p:%s|%s' % (i, type_)
                    stats.submit_packets(m)

        metrics = self.sort_metrics(stats.flush())

        nt.assert_equal(len(metrics), 5)
        p95, pavg, pcount, pmax, pmed = self.sort_metrics(metrics)
        nt.assert_equal(p95['metric'], 'my.p.95percentile')
        self.assert_almost_equal(p95['points'][0][1], 95, 10)
        self.assert_almost_equal(pmax['points'][0][1], 99, 1)
        self.assert_almost_equal(pmed['points'][0][1], 50, 2)
        self.assert_almost_equal(pavg['points'][0][1], 50, 2)
        self.assert_almost_equal(pcount['points'][0][1], 4000, 0) # 100 * 20 * 2
        nt.assert_equals(p95['host'], 'myhost')

        # Ensure that histograms are reset.
        metrics = self.sort_metrics(stats.flush())
        assert not metrics
Ejemplo n.º 9
0
    def test_tags(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('gauge:1|c')
        stats.submit_packets('gauge:2|c|@1')
        stats.submit_packets('gauge:4|c|#tag1,tag2')
        stats.submit_packets('gauge:8|c|#tag2,tag1') # Should be the same as above
        stats.submit_packets('gauge:16|c|#tag3,tag4')

        metrics = self.sort_metrics(stats.flush())

        assert len(metrics) == 3
        first, second, third = metrics

        nt.assert_equal(first['metric'], 'gauge')
        nt.assert_equal(first['tags'], None)
        nt.assert_equal(first['points'][0][1], 3)
        nt.assert_equal(first['host'], 'myhost')

        nt.assert_equal(second['metric'], 'gauge')
        nt.assert_equal(second['tags'], ('tag1', 'tag2'))
        nt.assert_equal(second['points'][0][1], 12)
        nt.assert_equal(second['host'], 'myhost')

        nt.assert_equal(third['metric'], 'gauge')
        nt.assert_equal(third['tags'], ('tag3', 'tag4'))
        nt.assert_equal(third['points'][0][1], 16)
        nt.assert_equal(third['host'], 'myhost')
Ejemplo n.º 10
0
    def test_magic_tags(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('my.gauge.a:1|c|#host:test-a')
        stats.submit_packets('my.gauge.b:4|c|#tag1,tag2,host:test-b')
        stats.submit_packets('my.gauge.b:8|c|#host:test-b,tag2,tag1')
        stats.submit_packets('my.gauge.c:10|c|#tag3')
        stats.submit_packets('my.gauge.c:16|c|#device:floppy,tag3')

        metrics = self.sort_metrics(stats.flush())

        nt.assert_equal(len(metrics), 4)
        first, second, third, fourth = metrics

        nt.assert_equal(first['metric'], 'my.gauge.a')
        nt.assert_equal(first['tags'], None)
        nt.assert_equal(first['points'][0][1], 1)
        nt.assert_equal(first['host'], 'test-a')

        nt.assert_equal(second['metric'], 'my.gauge.b')
        nt.assert_equal(second['tags'], ('tag1', 'tag2'))
        nt.assert_equal(second['points'][0][1], 12)
        nt.assert_equal(second['host'], 'test-b')

        nt.assert_equal(third['metric'], 'my.gauge.c')
        nt.assert_equal(third['tags'], ('tag3', ))
        nt.assert_equal(third['points'][0][1], 10)
        nt.assert_equal(third['device_name'], None)

        nt.assert_equal(fourth['metric'], 'my.gauge.c')
        nt.assert_equal(fourth['tags'], ('tag3', ))
        nt.assert_equal(fourth['points'][0][1], 16)
        nt.assert_equal(fourth['device_name'], 'floppy')
Ejemplo n.º 11
0
    def test_histogram(self):
        # The min is not enabled by default
        stats = MetricsAggregator(
            'myhost',
            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
        )

        # Sample all numbers between 1-100 many times. This
        # means our percentiles should be relatively close to themselves.
        percentiles = range(100)
        random.shuffle(percentiles)  # in place
        for i in percentiles:
            for j in xrange(20):
                for type_ in ['h', 'ms']:
                    m = 'my.p:%s|%s' % (i, type_)
                    stats.submit_packets(m)

        metrics = self.sort_metrics(stats.flush())

        nt.assert_equal(len(metrics), 6)
        p95, pavg, pcount, pmax, pmed, pmin = self.sort_metrics(metrics)
        nt.assert_equal(p95[0], 'my.p.95percentile')
        self.assert_almost_equal(p95[2], 95, 10)
        self.assert_almost_equal(pmax[2], 99, 1)
        self.assert_almost_equal(pmed[2], 50, 2)
        self.assert_almost_equal(pavg[2], 50, 2)
        self.assert_almost_equal(pmin[2], 1, 1)
        self.assert_almost_equal(pcount[2], 4000, 0)  # 100 * 20 * 2
        nt.assert_equals(p95[3]['hostname'], 'myhost')

        # Ensure that histograms are reset.
        metrics = self.sort_metrics(stats.flush())
        assert not metrics
Ejemplo n.º 12
0
    def test_scientific_notation(self):
        stats = MetricsAggregator('myhost', interval=10)

        stats.submit_packets('test.scinot:9.512901e-05|g')
        metrics = self.sort_metrics(stats.flush())

        assert len(metrics) == 1
        ts, val = metrics[0].get('points')[0]
        nt.assert_almost_equal(val, 9.512901e-05)
Ejemplo n.º 13
0
class TestAggregator(unittest.TestCase):
    def setUp(self):
        self.aggr = MetricsAggregator("test-aggr")

    def test_dupe_tags(self):
        self.aggr.increment("test-counter", 1, tags=["a", "b"])
        self.aggr.increment("test-counter", 1, tags=["a", "b", "b"])
        self.assertEquals(len(self.aggr.metrics), 1, self.aggr.metrics)
        metric = self.aggr.metrics.values()[0]
        self.assertEquals(metric.value, 2)
Ejemplo n.º 14
0
class TestAggregator(unittest.TestCase):
    def setUp(self):
        self.aggr = MetricsAggregator('test-aggr')

    def test_dupe_tags(self):
        self.aggr.increment('test-counter', 1, tags=['a', 'b'])
        self.aggr.increment('test-counter', 1, tags=['a', 'b', 'b'])
        self.assertEquals(len(self.aggr.metrics), 1, self.aggr.metrics)
        metric = self.aggr.metrics.values()[0]
        self.assertEquals(metric.value, 2)
Ejemplo n.º 15
0
    def test_sampled_counter(self):

        # Submit a sampled counter.
        stats = MetricsAggregator('myhost')
        stats.submit_packets('sampled.counter:1|c|@0.5')
        metrics = stats.flush()
        assert len(metrics) == 1
        m = metrics[0]
        assert m['metric'] == 'sampled.counter'
        nt.assert_equal(m['points'][0][1], 2)
Ejemplo n.º 16
0
    def test_sampled_counter(self):

        # Submit a sampled counter.
        stats = MetricsAggregator("myhost")
        stats.submit_packets("sampled.counter:1|c|@0.5")
        metrics = stats.flush()
        assert len(metrics) == 1
        m = metrics[0]
        assert m["metric"] == "sampled.counter"
        nt.assert_equal(m["points"][0][1], 2)
Ejemplo n.º 17
0
    def test_counter(self):
        stats = MetricsAggregator('myhost')

        # Track some counters.
        stats.submit_packets('my.first.counter:1|c')
        stats.submit_packets('my.first.counter:5|c')
        stats.submit_packets('my.second.counter:1|c')
        stats.submit_packets('my.third.counter:3|c')

        # Ensure they roll up nicely.
        metrics = self.sort_metrics(stats.flush())
        assert len(metrics) == 3

        first, second, third = metrics
        nt.assert_equals(first['metric'], 'my.first.counter')
        nt.assert_equals(first['points'][0][1], 6)
        nt.assert_equals(first['host'], 'myhost')

        nt.assert_equals(second['metric'], 'my.second.counter')
        nt.assert_equals(second['points'][0][1], 1)

        nt.assert_equals(third['metric'], 'my.third.counter')
        nt.assert_equals(third['points'][0][1], 3)

        # Ensure that counters reset to zero.
        metrics = self.sort_metrics(stats.flush())
        first, second, third = metrics
        nt.assert_equals(first['metric'], 'my.first.counter')
        nt.assert_equals(first['points'][0][1], 0)
        nt.assert_equals(second['metric'], 'my.second.counter')
        nt.assert_equals(second['points'][0][1], 0)
        nt.assert_equals(third['metric'], 'my.third.counter')
        nt.assert_equals(third['points'][0][1], 0)
Ejemplo n.º 18
0
    def test_gauge_sample_rate(self):
        stats = MetricsAggregator('myhost')

        # Submit a sampled gauge metric.
        stats.submit_packets('sampled.gauge:10|g|@0.1')

        # Assert that it's treated normally.
        metrics = stats.flush()
        nt.assert_equal(len(metrics), 1)
        m = metrics[0]
        nt.assert_equal(m['metric'], 'sampled.gauge')
        nt.assert_equal(m['points'][0][1], 10)
Ejemplo n.º 19
0
    def test_gauge_sample_rate(self):
        stats = MetricsAggregator("myhost")

        # Submit a sampled gauge metric.
        stats.submit_packets("sampled.gauge:10|g|@0.1")

        # Assert that it's treated normally.
        metrics = stats.flush()
        nt.assert_equal(len(metrics), 1)
        m = metrics[0]
        nt.assert_equal(m["metric"], "sampled.gauge")
        nt.assert_equal(m["points"][0][1], 10)
Ejemplo n.º 20
0
    def test_batch_submission(self):
        # Submit a sampled histogram.
        stats = MetricsAggregator("myhost")
        metrics = ["counter:1|c", "counter:1|c", "gauge:1|g"]
        packet = "\n".join(metrics)
        stats.submit_packets(packet)

        metrics = self.sort_metrics(stats.flush())
        nt.assert_equal(2, len(metrics))
        counter, gauge = metrics
        assert counter["points"][0][1] == 2
        assert gauge["points"][0][1] == 1
Ejemplo n.º 21
0
    def test_sampled_histogram(self):
        # Submit a sampled histogram.
        stats = MetricsAggregator("myhost")
        stats.submit_packets("sampled.hist:5|h|@0.5")

        # Assert we scale up properly.
        metrics = self.sort_metrics(stats.flush())
        p95, pavg, pcount, pmax, pmed = self.sort_metrics(metrics)

        nt.assert_equal(pcount["points"][0][1], 2)
        for p in [p95, pavg, pmed, pmax]:
            nt.assert_equal(p["points"][0][1], 5)
Ejemplo n.º 22
0
    def test_monokey_batching_withtags_with_sampling(self):
        # The min is not enabled by default
        stats = MetricsAggregator(
            'host',
            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
        )
        stats.submit_packets('test_metric:1.5|c|#tag1:one,tag2:two:2.3|g|#tag3:three:3|g:42|h|#tag1:12,tag42:42|@0.22')

        stats_ref = MetricsAggregator(
            'host',
            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
        )
        packets = [
            'test_metric:1.5|c|#tag1:one,tag2:two',
            'test_metric:2.3|g|#tag3:three',
            'test_metric:3|g',
            'test_metric:42|h|#tag1:12,tag42:42|@0.22'
        ]
        stats_ref.submit_packets("\n".join(packets))

        metrics = self.sort_metrics(stats.flush())
        metrics_ref = self.sort_metrics(stats_ref.flush())

        self.assertTrue(len(metrics) == len(metrics_ref) == 9, (metrics, metrics_ref))
        for i in range(len(metrics)):
            nt.assert_equal(metrics[i]['points'][0][1], metrics_ref[i]['points'][0][1])
            nt.assert_equal(metrics[i]['tags'], metrics_ref[i]['tags'])
Ejemplo n.º 23
0
    def test_monokey_batching_notags(self):
        # The min is not enabled by default
        stats = MetricsAggregator(
            'host',
            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
        )
        stats.submit_packets('test_hist:0.3|ms:2.5|ms|@0.5:3|ms')

        stats_ref = MetricsAggregator(
            'host',
            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
        )
        packets = [
            'test_hist:0.3|ms',
            'test_hist:2.5|ms|@0.5',
            'test_hist:3|ms'
        ]
        stats_ref.submit_packets("\n".join(packets))

        metrics = stats.flush()
        metrics_ref = stats_ref.flush()

        self.assertTrue(len(metrics) == len(metrics_ref) == 6, (metrics, metrics_ref))

        for i in range(len(metrics)):
            nt.assert_equal(metrics[i]['points'][0][1], metrics_ref[i]['points'][0][1])
Ejemplo n.º 24
0
    def test_sampled_histogram(self):
        # Submit a sampled histogram.
        # The min is not enabled by default
        stats = MetricsAggregator(
            'myhost',
            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
        )
        stats.submit_packets('sampled.hist:5|h|@0.5')

        # Assert we scale up properly.
        metrics = self.sort_metrics(stats.flush())
        p95, pavg, pcount, pmax, pmed, pmin = self.sort_metrics(metrics)

        nt.assert_equal(pcount['points'][0][1], 2)
        for p in [p95, pavg, pmed, pmax, pmin]:
            nt.assert_equal(p['points'][0][1], 5)
Ejemplo n.º 25
0
    def test_event_text(self):
        stats = MetricsAggregator("myhost")
        stats.submit_packets("_e{2,0}:t1|")
        stats.submit_packets("_e{2,12}:t2|text|content")
        stats.submit_packets("_e{2,23}:t3|First line\\nSecond line")  # \n is a newline
        stats.submit_packets(u"_e{2,19}:t4|♬ †øU †øU ¥ºu T0µ ♪")  # utf-8 compliant

        events = self.sort_events(stats.flush_events())

        assert len(events) == 4
        first, second, third, fourth = events

        nt.assert_equal(first["msg_text"], "")
        nt.assert_equal(second["msg_text"], "text|content")
        nt.assert_equal(third["msg_text"], "First line\nSecond line")
        nt.assert_equal(fourth["msg_text"], u"♬ †øU †øU ¥ºu T0µ ♪")
Ejemplo n.º 26
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        """
        Initialize a new check.

        :param name: The name of the check
        :param init_config: The config for initializing the check
        :param agentConfig: The global configuration for the agent
        :param instances: A list of configuration objects for each instance.
        """
        from aggregator import MetricsAggregator

        self.name = name
        self.init_config = init_config or {}
        self.agentConfig = agentConfig
        self.in_developer_mode = agentConfig.get('developer_mode') and psutil is not None
        self._internal_profiling_stats = None

        self.hostname = agentConfig.get('checksd_hostname') or get_hostname(agentConfig)
        self.log = logging.getLogger('%s.%s' % (__name__, name))

        self.aggregator = MetricsAggregator(
            self.hostname,
            formatter=agent_formatter,
            recent_point_threshold=agentConfig.get('recent_point_threshold', None),
            histogram_aggregates=agentConfig.get('histogram_aggregates'),
            histogram_percentiles=agentConfig.get('histogram_percentiles')
        )

        self.events = []
        self.service_checks = []
        self.instances = instances or []
        self.warnings = []
        self.library_versions = None
        self.last_collection_time = defaultdict(int)
Ejemplo n.º 27
0
    def test_event_text(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('_e{2,0}:t1|')
        stats.submit_packets('_e{2,12}:t2|text|content')
        stats.submit_packets('_e{2,23}:t3|First line\\nSecond line') # \n is a newline
        stats.submit_packets(u'_e{2,19}:t4|♬ †øU †øU ¥ºu T0µ ♪') # utf-8 compliant

        events = self.sort_events(stats.flush_events())

        assert len(events) == 4
        first, second, third, fourth = events

        nt.assert_equal(first['msg_text'], '')
        nt.assert_equal(second['msg_text'], 'text|content')
        nt.assert_equal(third['msg_text'], 'First line\nSecond line')
        nt.assert_equal(fourth['msg_text'], u'♬ †øU †øU ¥ºu T0µ ♪')
Ejemplo n.º 28
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        """
        Initialize a new check.

        :param name: The name of the check
        :param init_config: The config for initializing the check
        :param agentConfig: The global configuration for the agent
        :param instances: A list of configuration objects for each instance.
        """
        from aggregator import MetricsAggregator


        self.name = name
        self.init_config = init_config
        self.agentConfig = agentConfig
        self.hostname = get_hostname(agentConfig)
        self.log = logging.getLogger('%s.%s' % (__name__, name))

        self.aggregator = MetricsAggregator(self.hostname, formatter=agent_formatter, recent_point_threshold=agentConfig.get('recent_point_threshold', None))

        self.events = []
        self.service_checks = []
        self.instances = instances or []
        self.warnings = []
        self.library_versions = None
Ejemplo n.º 29
0
    def test_batch_submission(self):
        # Submit a sampled histogram.
        stats = MetricsAggregator('myhost')
        metrics = [
            'counter:1|c',
            'counter:1|c',
            'gauge:1|g'
        ]
        packet = "\n".join(metrics)
        stats.submit_packets(packet)

        metrics = self.sort_metrics(stats.flush())
        nt.assert_equal(2, len(metrics))
        counter, gauge = metrics
        assert counter[2] == 2
        assert gauge[2] == 1
Ejemplo n.º 30
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        """
        Initialize a new check.

        :param name: The name of the check
        :param init_config: The config for initializing the check
        :param agentConfig: The global configuration for the agent
        :param instances: A list of configuration objects for each instance.
        """
        from aggregator import MetricsAggregator

        self._enabled_checks.append(name)
        self._enabled_checks = list(set(self._enabled_checks))

        self.name = name
        self.init_config = init_config or {}
        self.agentConfig = agentConfig
        self.in_developer_mode = agentConfig.get("developer_mode") and psutil
        self._internal_profiling_stats = None
        self.default_integration_http_timeout = float(agentConfig.get("default_integration_http_timeout", 9))

        self.hostname = agentConfig.get("checksd_hostname") or get_hostname(agentConfig)
        self.log = logging.getLogger("%s.%s" % (__name__, name))

        self.min_collection_interval = self.init_config.get(
            "min_collection_interval", self.DEFAULT_MIN_COLLECTION_INTERVAL
        )

        self.aggregator = MetricsAggregator(
            self.hostname,
            expiry_seconds=self.min_collection_interval + self.DEFAULT_EXPIRY_SECONDS,
            formatter=agent_formatter,
            recent_point_threshold=agentConfig.get("recent_point_threshold", None),
            histogram_aggregates=agentConfig.get("histogram_aggregates"),
            histogram_percentiles=agentConfig.get("histogram_percentiles"),
        )

        self.events = []
        self.service_checks = []
        self.instances = instances or []
        self.warnings = []
        self.library_versions = None
        self.last_collection_time = defaultdict(int)
        self._instance_metadata = []
        self.svc_metadata = []
        self.historate_dict = {}

        # Set proxy settings
        self.proxy_settings = get_proxy(self.agentConfig)
        self._use_proxy = False if init_config is None else init_config.get("use_agent_proxy", True)
        self.proxies = {"http": None, "https": None}
        if self.proxy_settings and self._use_proxy:
            uri = "{host}:{port}".format(host=self.proxy_settings["host"], port=self.proxy_settings["port"])
            if self.proxy_settings["user"] and self.proxy_settings["password"]:
                uri = "{user}:{password}@{uri}".format(
                    user=self.proxy_settings["user"], password=self.proxy_settings["password"], uri=uri
                )
            self.proxies["http"] = "http://{uri}".format(uri=uri)
            self.proxies["https"] = "https://{uri}".format(uri=uri)
Ejemplo n.º 31
0
    def test_histogram_normalization(self):
        # The min is not enabled by default
        stats = MetricsAggregator(
            'myhost',
            interval=10,
            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES + ['min'])
        for i in range(5):
            stats.submit_packets('h1:1|h')
        for i in range(20):
            stats.submit_packets('h2:1|h')

        metrics = self.sort_metrics(stats.flush())
        _, _, h1count, _, _, _, \
        _, _, h2count, _, _, _ = metrics

        nt.assert_equal(h1count['points'][0][1], 0.5)
        nt.assert_equal(h2count['points'][0][1], 2)
Ejemplo n.º 32
0
    def test_service_check_basic(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('_sc|check.1|0')
        stats.submit_packets('_sc|check.2|1')
        stats.submit_packets('_sc|check.3|2')

        service_checks = self.sort_service_checks(stats.flush_service_checks())

        assert len(service_checks) == 3
        first, second, third = service_checks

        assert first['check'] == 'check.1'
        assert first['status'] == 0
        assert second['check'] == 'check.2'
        assert second['status'] == 1
        assert third['check'] == 'check.3'
        assert third['status'] == 2
Ejemplo n.º 33
0
    def test_service_check_basic(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('_sc|check.1|0')
        stats.submit_packets('_sc|check.2|1')
        stats.submit_packets('_sc|check.3|2')

        service_checks = self.sort_service_checks(stats.flush_service_checks())

        assert len(service_checks) == 3
        first, second, third = service_checks

        nt.assert_equal(first['check'], 'check.1')
        nt.assert_equal(first['status'], 0)
        nt.assert_equal(second['check'], 'check.2')
        nt.assert_equal(second['status'], 1)
        nt.assert_equal(third['check'], 'check.3')
        nt.assert_equal(third['status'], 2)
Ejemplo n.º 34
0
    def test_event_text_utf8(self):
        stats = MetricsAggregator('myhost', utf8_decoding=True)
        # Should raise because content is not encoded

        self.assertRaises(Exception, stats.submit_packets,
                          u'_e{2,19}:t4|♬ †øU †øU ¥ºu T0µ ♪')
        stats.submit_packets(u'_e{2,19}:t4|♬ †øU †øU ¥ºu T0µ ♪'.encode(
            'utf-8'))  # utf-8 compliant
        # Normal packet
        stats.submit_packets(
            '_e{2,23}:t3|First line\\nSecond line')  # \n is a newline

        events = self.sort_events(stats.flush_events())

        assert len(events) == 2

        nt.assert_equal(events[0]['msg_text'], 'First line\nSecond line')
        nt.assert_equal(events[1]['msg_text'], u'♬ †øU †øU ¥ºu T0µ ♪')
def test_spurr(subprocess_patch):
    # defer import to test to avoid breaking get_subprocess_output
    # patching.
    from datadog_checks.lparstats import LPARStats

    hostname = 'foo'
    aggregator = MetricsAggregator(
        hostname,
        interval=1.0,
        histogram_aggregates=None,
        histogram_percentiles=None,
    )

    c = LPARStats("lparstats", {}, {}, aggregator)
    c.collect_spurr()
    metrics = c.aggregator.flush(
    )[:-1]  # we remove the datadog.agent.running metric

    expected_metrics = [
        'system.lpar.spurr.user',
        'system.lpar.spurr.sys',
        'system.lpar.spurr.wait',
        'system.lpar.spurr.idle',
        'system.lpar.spurr.user.norm',
        'system.lpar.spurr.sys.norm',
        'system.lpar.spurr.wait.norm',
        'system.lpar.spurr.idle.norm',
        'system.lpar.spurr.user.pct',
        'system.lpar.spurr.sys.pct',
        'system.lpar.spurr.wait.pct',
        'system.lpar.spurr.idle.pct',
        'system.lpar.spurr.user.norm.pct',
        'system.lpar.spurr.sys.norm.pct',
        'system.lpar.spurr.wait.norm.pct',
        'system.lpar.spurr.idle.norm.pct',
    ]

    assert len(metrics) == len(expected_metrics)
    for metric in metrics:
        assert metric['metric'] in expected_metrics
Ejemplo n.º 36
0
def init(config_path=None, use_watchdog=False, use_forwarder=False):
    """Configure the server and the reporting thread.
    """
    c = get_config(parse_args=False, cfg_path=config_path)
    log.debug("Configuration dogstatsd")

    port = c['dogstatsd_port']
    interval = int(c['dogstatsd_interval'])
    api_key = c['api_key']
    non_local_traffic = c['non_local_traffic']

    target = c['dd_url']
    if use_forwarder:
        target = c['dogstatsd_target']

    hostname = get_hostname(c)

    # Create the aggregator (which is the point of communication between the
    # server and reporting threads.
    assert 0 < interval

    aggregator = MetricsAggregator(hostname,
                                   interval,
                                   recent_point_threshold=c.get(
                                       'recent_point_threshold', None))

    # Start the reporting thread.
    reporter = Reporter(interval, aggregator, target, api_key, use_watchdog)

    # Start the server on an IPv4 stack
    # Default to loopback
    server_host = '127.0.0.1'
    # If specified, bind to all addressses
    if non_local_traffic:
        server_host = ''

    server = Server(aggregator, server_host, port)

    return reporter, server, c
def test_memory_entitlements(subprocess_patch):
    # defer import to test to avoid breaking get_subprocess_output
    # patching.
    from datadog_checks.lparstats import LPARStats

    hostname = 'foo'
    aggregator = MetricsAggregator(
        hostname,
        interval=1.0,
        histogram_aggregates=None,
        histogram_percentiles=None,
    )

    c = LPARStats("lparstats", {}, {}, aggregator)
    c.collect_memory_entitlements()
    metrics = c.aggregator.flush(
    )[:-1]  # we remove the datadog.agent.running metric

    expected_metrics = [
        'system.lpar.memory.entitlement.iomin',
        'system.lpar.memory.entitlement.iodes',
        'system.lpar.memory.entitlement.iomu',
        'system.lpar.memory.entitlement.iores',
        'system.lpar.memory.entitlement.iohwm',
        'system.lpar.memory.entitlement.iomaf',
    ]

    # compile entitlements from mock output
    output = list(filter(None, AIX_LPARSTATS_MEMORY_ENTITLEMENTS.splitlines()))
    output = output[c.MEMORY_ENTITLEMENTS_START_IDX + 1:]
    entitlements = collect_column(output, 0)

    assert len(metrics) == (len(expected_metrics) * len(entitlements))
    for metric in metrics:
        for tag in metric['tags']:
            tag = tag.decode('utf-8')
            if 'iompn' in tag:
                assert tag.split(':')[1] in entitlements
def test_memory_page(subprocess_patch):
    # defer import to test to avoid breaking get_subprocess_output
    # patching.
    from datadog_checks.lparstats import LPARStats

    hostname = 'foo'
    aggregator = MetricsAggregator(
        hostname,
        interval=1.0,
        histogram_aggregates=None,
        histogram_percentiles=None,
    )

    c = LPARStats("lparstats", {}, {}, aggregator)
    c.collect_memory(page_stats=True)
    metrics = c.aggregator.flush(
    )[:-1]  # we remove the datadog.agent.running metric

    # NOTE: iomf unavailable
    expected_metrics = [
        'system.lpar.memory.physb',
        'system.lpar.memory.hpi',
        'system.lpar.memory.hpit',
        'system.lpar.memory.pmem',
        'system.lpar.memory.iomu',
        'system.lpar.memory.iomin',
        'system.lpar.memory.iohwm',
        'system.lpar.memory.iomaf',
        'system.lpar.memory.pgcol',
        'system.lpar.memory.mpgcol',
        'system.lpar.memory.ccol',
        'system.lpar.memory.entc',
        'system.lpar.memory.vcsw',
    ]

    assert len(metrics) == len(expected_metrics)
    for metric in metrics:
        assert metric['metric'] in expected_metrics
Ejemplo n.º 39
0
def test_disk_basic(disk_io_counters, disk_usage, disk_partitions):
    from datadog_checks.disk import Disk  # delayed import for good patching

    disk_partitions.return_value = MOCK_PARTITIONS

    aggregator = MetricsAggregator(
        HOSTNAME,
        interval=1.0,
        histogram_aggregates=None,
        histogram_percentiles=None,
    )

    total_gauges, expected_gauges = generate_expected_gauges()
    total_rates, expected_rates = generate_expected_rates()

    c = Disk("disk", {}, {}, aggregator)
    c.check({})

    metrics = c.aggregator.flush(
    )[:-1]  # we remove the datadog.agent.running metric
    assert len(metrics) == total_gauges

    time.sleep(1)

    c.check({})
    metrics = c.aggregator.flush(
    )[:-1]  # we remove the datadog.agent.running metric

    assert len(metrics) == (total_gauges + total_rates)
    for metric in metrics:
        assert metric['metric'] in expected_gauges or metric[
            'metric'] in expected_rates
        assert len(metric['points']) == 1
        assert metric['host'] == HOSTNAME
        assert metric['type'] == GAUGE
        assert is_metric_expected(expected_gauges,
                                  metric) or is_metric_expected(
                                      expected_rates, metric)
Ejemplo n.º 40
0
class AgentCheck(object):
    OK, WARNING, CRITICAL, UNKNOWN = (0, 1, 2, 3)

    SOURCE_TYPE_NAME = None

    DEFAULT_MIN_COLLECTION_INTERVAL = 0

    def __init__(self, name, init_config, agentConfig, instances=None):
        """
        Initialize a new check.

        :param name: The name of the check
        :param init_config: The config for initializing the check
        :param agentConfig: The global configuration for the agent
        :param instances: A list of configuration objects for each instance.
        """
        from aggregator import MetricsAggregator

        self.name = name
        self.init_config = init_config or {}
        self.agentConfig = agentConfig
        self.in_developer_mode = agentConfig.get(
            'developer_mode') and psutil is not None
        self._internal_profiling_stats = None

        self.hostname = agentConfig.get('checksd_hostname') or get_hostname(
            agentConfig)
        self.log = logging.getLogger('%s.%s' % (__name__, name))

        self.aggregator = MetricsAggregator(
            self.hostname,
            formatter=agent_formatter,
            recent_point_threshold=agentConfig.get('recent_point_threshold',
                                                   None),
            histogram_aggregates=agentConfig.get('histogram_aggregates'),
            histogram_percentiles=agentConfig.get('histogram_percentiles'))

        self.events = []
        self.service_checks = []
        self.instances = instances or []
        self.warnings = []
        self.library_versions = None
        self.last_collection_time = defaultdict(int)
        self._instance_metadata = []
        self.svc_metadata = []

    def instance_count(self):
        """ Return the number of instances that are configured for this check. """
        return len(self.instances)

    def gauge(self,
              metric,
              value,
              tags=None,
              hostname=None,
              device_name=None,
              timestamp=None):
        """
        Record the value of a gauge, with optional tags, hostname and device
        name.

        :param metric: The name of the metric
        :param value: The value of the gauge
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        :param timestamp: (optional) The timestamp for this metric value
        """
        self.aggregator.gauge(metric, value, tags, hostname, device_name,
                              timestamp)

    def increment(self,
                  metric,
                  value=1,
                  tags=None,
                  hostname=None,
                  device_name=None):
        """
        Increment a counter with optional tags, hostname and device name.

        :param metric: The name of the metric
        :param value: The value to increment by
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.increment(metric, value, tags, hostname, device_name)

    def decrement(self,
                  metric,
                  value=-1,
                  tags=None,
                  hostname=None,
                  device_name=None):
        """
        Increment a counter with optional tags, hostname and device name.

        :param metric: The name of the metric
        :param value: The value to decrement by
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.decrement(metric, value, tags, hostname, device_name)

    def count(self,
              metric,
              value=0,
              tags=None,
              hostname=None,
              device_name=None):
        """
        Submit a raw count with optional tags, hostname and device name

        :param metric: The name of the metric
        :param value: The value
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.submit_count(metric, value, tags, hostname,
                                     device_name)

    def monotonic_count(self,
                        metric,
                        value=0,
                        tags=None,
                        hostname=None,
                        device_name=None):
        """
        Submits a raw count with optional tags, hostname and device name
        based on increasing counter values. E.g. 1, 3, 5, 7 will submit
        6 on flush. Note that reset counters are skipped.

        :param metric: The name of the metric
        :param value: The value of the rate
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.count_from_counter(metric, value, tags, hostname,
                                           device_name)

    def rate(self, metric, value, tags=None, hostname=None, device_name=None):
        """
        Submit a point for a metric that will be calculated as a rate on flush.
        Values will persist across each call to `check` if there is not enough
        point to generate a rate on the flush.

        :param metric: The name of the metric
        :param value: The value of the rate
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.rate(metric, value, tags, hostname, device_name)

    def histogram(self,
                  metric,
                  value,
                  tags=None,
                  hostname=None,
                  device_name=None):
        """
        Sample a histogram value, with optional tags, hostname and device name.

        :param metric: The name of the metric
        :param value: The value to sample for the histogram
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.histogram(metric, value, tags, hostname, device_name)

    def set(self, metric, value, tags=None, hostname=None, device_name=None):
        """
        Sample a set value, with optional tags, hostname and device name.

        :param metric: The name of the metric
        :param value: The value for the set
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.set(metric, value, tags, hostname, device_name)

    def event(self, event):
        """
        Save an event.

        :param event: The event payload as a dictionary. Has the following
        structure:

            {
                "timestamp": int, the epoch timestamp for the event,
                "event_type": string, the event time name,
                "agent_key": string, the api key of the account to associate the event with,
                "msg_title": string, the title of the event,
                "msg_text": string, the text body of the event,
                "alert_type": (optional) string, one of ('error', 'warning', 'success', 'info').
                    Defaults to 'info'.
                "source_type_name": (optional) string, the source type name,
                "host": (optional) string, the name of the host,
                "tags": (optional) list, a list of tags to associate with this event
            }
        """
        # Events are disabled.
        return

        if event.get('agent_key') is None:
            event['agent_key'] = self.agentConfig['agent_key']
        self.events.append(event)

    def service_check(self,
                      check_name,
                      status,
                      tags=None,
                      timestamp=None,
                      hostname=None,
                      check_run_id=None,
                      message=None):
        """
        Save a service check.

        :param check_name: string, name of the service check
        :param status: int, describing the status.
                       0 for success, 1 for warning, 2 for failure
        :param tags: (optional) list of strings, a list of tags for this run
        :param timestamp: (optional) float, unix timestamp for when the run occurred
        :param hostname: (optional) str, host that generated the service
                          check. Defaults to the host_name of the agent
        :param check_run_id: (optional) int, id used for logging and tracing
                             purposes. Don't need to be unique. If not
                             specified, one will be generated.
        """
        if hostname is None:
            hostname = self.hostname
        if message is not None:
            message = str(message)
        self.service_checks.append(
            create_service_check(check_name, status, tags, timestamp, hostname,
                                 check_run_id, message))

    def service_metadata(self, meta_name, value):
        """
        Save metadata.

        :param meta_name: metadata key name
        :type meta_name: string

        :param value: metadata value
        :type value: string
        """
        self._instance_metadata.append((meta_name, str(value)))

    def has_events(self):
        """
        Check whether the check has saved any events

        @return whether or not the check has saved any events
        @rtype boolean
        """
        return len(self.events) > 0

    def get_metrics(self):
        """
        Get all metrics, including the ones that are tagged.

        @return the list of samples
        @rtype [(metric_name, timestamp, value, {"tags": ["tag1", "tag2"]}), ...]
        """
        return self.aggregator.flush()

    def get_events(self):
        """
        Return a list of the events saved by the check, if any

        @return the list of events saved by this check
        @rtype list of event dictionaries
        """
        events = self.events
        self.events = []
        return events

    def get_service_checks(self):
        """
        Return a list of the service checks saved by the check, if any
        and clears them out of the instance's service_checks list

        @return the list of service checks saved by this check
        @rtype list of service check dicts
        """
        service_checks = self.service_checks
        self.service_checks = []
        return service_checks

    def _roll_up_instance_metadata(self):
        """
        Concatenate and flush instance metadata.
        """
        self.svc_metadata.append(
            dict((k, v) for (k, v) in self._instance_metadata))
        self._instance_metadata = []

    def get_service_metadata(self):
        """
        Return a list of the metadata dictionaries saved by the check -if any-
        and clears them out of the instance's service_checks list

        @return the list of metadata saved by this check
        @rtype list of metadata dicts
        """
        if self._instance_metadata:
            self._roll_up_instance_metadata()
        service_metadata = self.svc_metadata
        self.svc_metadata = []
        return service_metadata

    def has_warnings(self):
        """
        Check whether the instance run created any warnings
        """
        return len(self.warnings) > 0

    def warning(self, warning_message):
        """ Add a warning message that will be printed in the info page
        :param warning_message: String. Warning message to be displayed
        """
        self.warnings.append(str(warning_message))

    def get_library_info(self):
        if self.library_versions is not None:
            return self.library_versions
        try:
            self.library_versions = self.get_library_versions()
        except NotImplementedError:
            pass

    def get_library_versions(self):
        """ Should return a string that shows which version
        of the needed libraries are used """
        raise NotImplementedError

    def get_warnings(self):
        """
        Return the list of warnings messages to be displayed in the info page
        """
        warnings = self.warnings
        self.warnings = []
        return warnings

    @staticmethod
    def _get_statistic_name_from_method(method_name):
        return method_name[4:] if method_name.startswith(
            'get_') else method_name

    @staticmethod
    def _collect_internal_stats(methods=None):
        current_process = psutil.Process(os.getpid())

        methods = methods or DEFAULT_PSUTIL_METHODS
        filtered_methods = [m for m in methods if hasattr(current_process, m)]

        stats = {}

        for method in filtered_methods:
            # Go from `get_memory_info` -> `memory_info`
            stat_name = AgentCheck._get_statistic_name_from_method(method)
            try:
                raw_stats = getattr(current_process, method)()
                try:
                    stats[stat_name] = raw_stats._asdict()
                except AttributeError:
                    if isinstance(raw_stats, numbers.Number):
                        stats[stat_name] = raw_stats
                    else:
                        log.warn(
                            "Could not serialize output of {0} to dict".format(
                                method))

            except psutil.AccessDenied:
                log.warn(
                    "Cannot call psutil method {0} : Access Denied".format(
                        method))

        return stats

    def _set_internal_profiling_stats(self, before, after):
        self._internal_profiling_stats = {'before': before, 'after': after}

    def _get_internal_profiling_stats(self):
        """
        If in developer mode, return a dictionary of statistics about the check run
        """
        stats = self._internal_profiling_stats
        self._internal_profiling_stats = None
        return stats

    def run(self):
        """ Run all instances. """

        # Store run statistics if needed
        before, after = None, None
        if self.in_developer_mode and self.name != AGENT_METRICS_CHECK_NAME:
            try:
                before = AgentCheck._collect_internal_stats()
            except Exception:  # It's fine if we can't collect stats for the run, just log and proceed
                self.log.debug(
                    "Failed to collect Agent Stats before check {0}".format(
                        self.name))

        instance_statuses = []
        for i, instance in enumerate(self.instances):
            try:
                min_collection_interval = instance.get(
                    'min_collection_interval',
                    self.init_config.get('min_collection_interval',
                                         self.DEFAULT_MIN_COLLECTION_INTERVAL))
                now = time.time()
                if now - self.last_collection_time[i] < min_collection_interval:
                    self.log.debug(
                        "Not running instance #{0} of check {1} as it ran less than {2}s ago"
                        .format(i, self.name, min_collection_interval))
                    continue

                self.last_collection_time[i] = now

                check_start_time = None
                if self.in_developer_mode:
                    check_start_time = timeit.default_timer()
                self.check(copy.deepcopy(instance))

                instance_check_stats = None
                if check_start_time is not None:
                    instance_check_stats = {
                        'run_time': timeit.default_timer() - check_start_time
                    }

                if self.has_warnings():
                    instance_status = check_status.InstanceStatus(
                        i,
                        check_status.STATUS_WARNING,
                        warnings=self.get_warnings(),
                        instance_check_stats=instance_check_stats)
                else:
                    instance_status = check_status.InstanceStatus(
                        i,
                        check_status.STATUS_OK,
                        instance_check_stats=instance_check_stats)
            except Exception, e:
                self.log.exception("Check '%s' instance #%s failed" %
                                   (self.name, i))
                instance_status = check_status.InstanceStatus(
                    i,
                    check_status.STATUS_ERROR,
                    error=str(e),
                    tb=traceback.format_exc())
            finally:
Ejemplo n.º 41
0
    def test_gauge(self):
        stats = MetricsAggregator('myhost')

        # Track some counters.
        stats.submit_packets('my.first.gauge:1|g')
        stats.submit_packets('my.first.gauge:5|g')
        stats.submit_packets('my.second.gauge:1.5|g')

        # Ensure that gauges roll up correctly.
        metrics = self.sort_metrics(stats.flush())
        assert len(metrics) == 2

        first, second = metrics

        nt.assert_equals(first['metric'], 'my.first.gauge')
        nt.assert_equals(first['points'][0][1], 5)
        nt.assert_equals(first['host'], 'myhost')

        nt.assert_equals(second['metric'], 'my.second.gauge')
        nt.assert_equals(second['points'][0][1], 1.5)

        # Ensure that old gauges get dropped due to old timestamps
        stats.gauge('my.first.gauge', 5)
        stats.gauge('my.first.gauge', 1, timestamp=1000000000)
        stats.gauge('my.second.gauge', 20, timestamp=1000000000)

        metrics = self.sort_metrics(stats.flush())
        assert len(metrics) == 1

        first = metrics[0]

        nt.assert_equals(first['metric'], 'my.first.gauge')
        nt.assert_equals(first['points'][0][1], 5)
        nt.assert_equals(first['host'], 'myhost')
Ejemplo n.º 42
0
    def test_event_tags(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('_e{6,4}:title1|text')
        stats.submit_packets('_e{6,4}:title2|text|#t1')
        stats.submit_packets('_e{6,4}:title3|text|#t1,t2:v2,t3,t4')
        stats.submit_packets('_e{6,4}:title4|text|k:key|p:normal|#t1,t2')

        events = self.sort_events(stats.flush_events())

        assert len(events) == 4
        first, second, third, fourth = events

        try:
            first['tags']
        except Exception:
            assert True
        else:
            assert False, "event['tags'] shouldn't be defined when no tags aren't explicited in the packet"
        nt.assert_equal(first['msg_title'], 'title1')
        nt.assert_equal(first['msg_text'], 'text')

        nt.assert_equal(second['msg_title'], 'title2')
        nt.assert_equal(second['msg_text'], 'text')
        nt.assert_equal(second['tags'], sorted(['t1']))

        nt.assert_equal(third['msg_title'], 'title3')
        nt.assert_equal(third['msg_text'], 'text')
        nt.assert_equal(third['tags'], sorted(['t1', 't2:v2', 't3', 't4']))

        nt.assert_equal(fourth['msg_title'], 'title4')
        nt.assert_equal(fourth['msg_text'], 'text')
        nt.assert_equal(fourth['aggregation_key'], 'key')
        nt.assert_equal(fourth['priority'], 'normal')
        nt.assert_equal(fourth['tags'], sorted(['t1', 't2']))
Ejemplo n.º 43
0
    def test_event_title(self):
        stats = MetricsAggregator('myhost', utf8_decoding=True)
        stats.submit_packets('_e{0,4}:|text')
        stats.submit_packets(
            u'_e{9,4}:2intitulé|text'.encode('utf-8'))  # comes from socket
        stats.submit_packets('_e{14,4}:3title content|text')
        stats.submit_packets('_e{14,4}:4title|content|text')
        stats.submit_packets(
            '_e{13,4}:5title\\ntitle|text')  # \n stays escaped

        events = self.sort_events(stats.flush_events())

        assert len(events) == 5

        nt.assert_equal(events[0]['msg_title'], '')
        nt.assert_equal(events[1]['msg_title'], u'2intitulé')
        nt.assert_equal(events[2]['msg_title'], '3title content')
        nt.assert_equal(events[3]['msg_title'], '4title|content')
        nt.assert_equal(events[4]['msg_title'], '5title\\ntitle')
Ejemplo n.º 44
0
    def test_metrics_expiry(self):
        # Ensure metrics eventually expire and stop submitting.
        ag_interval = 1
        expiry = ag_interval * 4 + 2
        # The min is not enabled by default
        stats = MetricsAggregator(
            'myhost',
            interval=ag_interval,
            expiry_seconds=expiry,
            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES + ['min'])
        stats.submit_packets('test.counter:123|c')
        stats.submit_packets('test.gauge:55|g')
        stats.submit_packets('test.set:44|s')
        stats.submit_packets('test.histogram:11|h')

        # Ensure points keep submitting
        time.sleep(ag_interval)
        metrics = self.sort_metrics(stats.flush())
        nt.assert_equal(len(metrics), 9)
        nt.assert_equal(metrics[0]['metric'], 'test.counter')
        nt.assert_equal(metrics[0]['points'][0][1], 123)
        time.sleep(ag_interval)
        metrics = self.sort_metrics(stats.flush())
        nt.assert_equal(len(metrics), 1)
        nt.assert_equal(metrics[0]['metric'], 'test.counter')
        nt.assert_equal(metrics[0]['points'][0][1], 0)

        time.sleep(ag_interval)
        time.sleep(0.5)
        metrics = self.sort_metrics(stats.flush())
        nt.assert_equal(len(metrics), 1)
        nt.assert_equal(metrics[0]['metric'], 'test.counter')
        nt.assert_equal(metrics[0]['points'][0][1], 0)

        # Now sleep for longer than the expiry window and ensure
        # no points are submitted
        time.sleep(ag_interval)
        time.sleep(2)
        m = stats.flush()
        assert not m, str(m)

        # If we submit again, we're all good.
        stats.submit_packets('test.counter:123|c')
        stats.submit_packets('test.gauge:55|g')
        stats.submit_packets('test.set:44|s')
        stats.submit_packets('test.histogram:11|h')

        metrics = self.sort_metrics(stats.flush())
        nt.assert_equal(len(metrics), 9)
        nt.assert_equal(metrics[0]['metric'], 'test.counter')
        nt.assert_equal(metrics[0]['points'][0][1], 123)
Ejemplo n.º 45
0
    def test_histogram_counter(self):
        # Test whether histogram.count == increment
        # same deal with a sample rate
        cnt = 100000
        for run in [1, 2]:
            stats = MetricsAggregator('myhost')
            for i in xrange(cnt):
                if run == 2:
                    stats.submit_packets('test.counter:1|c|@0.5')
                    stats.submit_packets('test.hist:1|ms|@0.5')
                else:
                    stats.submit_packets('test.counter:1|c')
                    stats.submit_packets('test.hist:1|ms')
            metrics = self.sort_metrics(stats.flush())
            assert len(metrics) > 0

            nt.assert_equal([
                m['points'][0][1]
                for m in metrics if m['metric'] == 'test.counter'
            ], [cnt * run])
            nt.assert_equal([
                m['points'][0][1]
                for m in metrics if m['metric'] == 'test.hist.count'
            ], [cnt * run])
Ejemplo n.º 46
0
class AgentCheck(object):
    def __init__(self, name, init_config, agentConfig):
        """
        Initialize a new check.

        :param name: The name of the check
        :param init_config: The config for initializing the check
        :param agentConfig: The global configuration for the agent
        """
        from aggregator import MetricsAggregator

        self.name = name
        self.init_config = init_config
        self.agentConfig = agentConfig
        self.hostname = gethostname(agentConfig)
        self.log = logging.getLogger('checks.%s' % name)
        self.aggregator = MetricsAggregator(self.hostname,
                                            formatter=agent_formatter)
        self.events = []

    def gauge(self,
              metric,
              value,
              tags=None,
              hostname=None,
              device_name=None,
              timestamp=None):
        """
        Record the value of a gauge, with optional tags, hostname and device
        name.

        :param metric: The name of the metric
        :param value: The value of the gauge
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        :param timestamp: (optional) The timestamp for this metric value
        """
        self.aggregator.gauge(metric, value, tags, hostname, device_name,
                              timestamp)

    def increment(self,
                  metric,
                  value=1,
                  tags=None,
                  hostname=None,
                  device_name=None):
        """
        Increment a counter with optional tags, hostname and device name.

        :param metric: The name of the metric
        :param value: The value to increment by
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.increment(metric, value, tags, hostname, device_name)

    def decrement(self,
                  metric,
                  value=-1,
                  tags=None,
                  hostname=None,
                  device_name=None):
        """
        Increment a counter with optional tags, hostname and device name.

        :param metric: The name of the metric
        :param value: The value to decrement by
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.decrement(metric, value, tags, hostname, device_name)

    def rate(self, metric, value, tags=None, hostname=None, device_name=None):
        """
        Submit a point for a metric that will be calculated as a rate on flush.
        Values will persist across each call to `check` if there is not enough
        point to generate a rate on the flush.

        :param metric: The name of the metric
        :param value: The value of the rate
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.rate(metric, value, tags, hostname, device_name)

    def histogram(self,
                  metric,
                  value,
                  tags=None,
                  hostname=None,
                  device_name=None):
        """
        Sample a histogram value, with optional tags, hostname and device name.

        :param metric: The name of the metric
        :param value: The value to sample for the histogram
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.histogram(metric, value, tags, hostname, device_name)

    def set(self, metric, value, tags=None, hostname=None, device_name=None):
        """
        Sample a set value, with optional tags, hostname and device name.

        :param metric: The name of the metric
        :param value: The value for the set
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.set(metric, value, tags, hostname, device_name)

    def event(self, event):
        """
        Save an event.

        :param event: The event payload as a dictionary. Has the following
        structure:

            {
                "timestamp": int, the epoch timestamp for the event,
                "event_type": string, the event time name,
                "api_key": string, the api key of the account to associate the event with,
                "msg_title": string, the title of the event,
                "msg_text": string, the text body of the event,
                "alert_type": (optional) string, one of ('error', 'warning', 'success', 'info').
                    Defaults to 'info'.
                "source_type_name": (optional) string, the source type name,
                "host": (optional) string, the name of the host,
                "tags": (optional) list, a list of tags to associate with this event
            }
        """
        self.events.append(event)

    def has_events(self):
        """
        Check whether the check has saved any events

        @return whether or not the check has saved any events
        @rtype boolean
        """
        return len(self.events) > 0

    def get_metrics(self):
        """
        Get all metrics, including the ones that are tagged.

        @return the list of samples
        @rtype [(metric_name, timestamp, value, {"tags": ["tag1", "tag2"]}), ...]
        """
        return self.aggregator.flush()

    def get_events(self):
        """
        Return a list of the events saved by the check, if any

        @return the list of events saved by this check
        @rtype list of event dictionaries
        """
        events = self.events
        self.events = []
        return events

    def check(self, instance):
        """
        Overriden by the check class. This will be called to run the check.

        :param instance: A dict with the instance information. This will vary
        depending on your config structure.
        """
        raise NotImplementedError()

    @classmethod
    def from_yaml(cls,
                  path_to_yaml=None,
                  agentConfig=None,
                  yaml_text=None,
                  check_name=None):
        """
        A method used for testing your check without running the agent.
        """
        from util import yaml, yLoader
        if path_to_yaml:
            check_name = os.path.basename(path_to_yaml).split('.')[0]
            try:
                f = open(path_to_yaml)
            except IOError:
                raise Exception('Unable to open yaml config: %s' %
                                path_to_yaml)
            yaml_text = f.read()
            f.close()

        config = yaml.load(yaml_text, Loader=yLoader)
        check = cls(check_name,
                    config.get('init_config') or {}, agentConfig or {})

        return check, config.get('instances', [])

    def normalize(self, metric, prefix=None):
        """
        Turn a metric into a well-formed metric name
        prefix.b.c

        :param metric The metric name to normalize
        :param prefix A prefix to to add to the normalized name, default None
        """
        name = re.sub(r"[,\+\*\-/()\[\]{}]", "_", metric)
        # Eliminate multiple _
        name = re.sub(r"__+", "_", name)
        # Don't start/end with _
        name = re.sub(r"^_", "", name)
        name = re.sub(r"_$", "", name)
        # Drop ._ and _.
        name = re.sub(r"\._", ".", name)
        name = re.sub(r"_\.", ".", name)

        if prefix is not None:
            return prefix + "." + name
        else:
            return name
Ejemplo n.º 47
0
    def test_formatter(self):
        stats = MetricsAggregator('myhost',
                                  interval=10,
                                  formatter=get_formatter(
                                      {"statsd_metric_namespace": "datadog"}))
        stats.submit_packets('gauge:16|c|#tag3,tag4')
        metrics = self.sort_metrics(stats.flush())
        self.assertTrue(len(metrics) == 1)
        self.assertTrue(metrics[0]['metric'] == "datadog.gauge")

        stats = MetricsAggregator(
            'myhost',
            interval=10,
            formatter=get_formatter({"statsd_metric_namespace": "datadoge."}))
        stats.submit_packets('gauge:16|c|#tag3,tag4')
        metrics = self.sort_metrics(stats.flush())
        self.assertTrue(len(metrics) == 1)
        self.assertTrue(metrics[0]['metric'] == "datadoge.gauge")

        stats = MetricsAggregator('myhost',
                                  interval=10,
                                  formatter=get_formatter(
                                      {"statsd_metric_namespace": None}))
        stats.submit_packets('gauge:16|c|#tag3,tag4')
        metrics = self.sort_metrics(stats.flush())
        self.assertTrue(len(metrics) == 1)
        self.assertTrue(metrics[0]['metric'] == "gauge")
Ejemplo n.º 48
0
    def test_recent_point_threshold(self):
        threshold = 100
        # The min is not enabled by default
        stats = MetricsAggregator(
            'myhost',
            recent_point_threshold=threshold,
            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES + ['min'])
        timestamp_beyond_threshold = time.time() - threshold * 2
        timestamp_within_threshold = time.time() - threshold / 2

        # Ensure that old gauges get dropped due to old timestamps
        stats.submit_metric('my.first.gauge', 5, 'g')
        stats.submit_metric('my.first.gauge',
                            1,
                            'g',
                            timestamp=timestamp_beyond_threshold)
        stats.submit_metric('my.second.gauge',
                            20,
                            'g',
                            timestamp=timestamp_beyond_threshold)

        metrics = self.sort_metrics(stats.flush())
        assert len(metrics) == 1

        first = metrics[0]
        nt.assert_equals(first['metric'], 'my.first.gauge')
        nt.assert_equals(first['points'][0][1], 5)
        nt.assert_equals(first['host'], 'myhost')

        # Ensure that old gauges get dropped due to old timestamps
        stats.submit_metric('my.1.gauge', 5, 'g')
        stats.submit_metric('my.1.gauge',
                            1,
                            'g',
                            timestamp=timestamp_within_threshold)
        stats.submit_metric('my.2.counter',
                            20,
                            'c',
                            timestamp=timestamp_within_threshold)
        stats.submit_metric('my.3.set',
                            20,
                            's',
                            timestamp=timestamp_within_threshold)
        stats.submit_metric('my.4.histogram',
                            20,
                            'h',
                            timestamp=timestamp_within_threshold)

        flush_timestamp = time.time()
        metrics = self.sort_metrics(stats.flush())
        nt.assert_equal(len(metrics), 9)

        first, second, third, h1, h2, h3, h4, h5, h6 = metrics
        nt.assert_equals(first['metric'], 'my.1.gauge')
        nt.assert_equals(first['points'][0][1], 1)
        nt.assert_equals(first['host'], 'myhost')
        self.assert_almost_equal(first['points'][0][0],
                                 timestamp_within_threshold, 0.1)

        nt.assert_equals(second['metric'], 'my.2.counter')
        nt.assert_equals(second['points'][0][1], 20)
        self.assert_almost_equal(second['points'][0][0], flush_timestamp, 0.1)

        nt.assert_equals(third['metric'], 'my.3.set')
        nt.assert_equals(third['points'][0][1], 1)
        self.assert_almost_equal(third['points'][0][0], flush_timestamp, 0.1)

        nt.assert_equals(h1['metric'], 'my.4.histogram.95percentile')
        nt.assert_equals(h1['points'][0][1], 20)
        self.assert_almost_equal(h1['points'][0][0], flush_timestamp, 0.1)
        nt.assert_equal(h1['points'][0][0], h2['points'][0][0])
        nt.assert_equal(h1['points'][0][0], h3['points'][0][0])
        nt.assert_equal(h1['points'][0][0], h4['points'][0][0])
        nt.assert_equal(h1['points'][0][0], h5['points'][0][0])
Ejemplo n.º 49
0
    def test_service_check_message(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('_sc|check.1|0|m:testing')
        stats.submit_packets('_sc|check.2|0|m:First line\\nSecond line')
        stats.submit_packets(u'_sc|check.3|0|m:♬ †øU †øU ¥ºu T0µ ♪')
        stats.submit_packets('_sc|check.4|0|m:|t:|m\:|d:')

        service_checks = self.sort_service_checks(stats.flush_service_checks())

        assert len(service_checks) == 4
        first, second, third, fourth = service_checks

        nt.assert_equal(first['check'], 'check.1')
        nt.assert_equal(first['message'], 'testing')
        nt.assert_equal(second['check'], 'check.2')
        nt.assert_equal(second['message'], 'First line\nSecond line')
        nt.assert_equal(third['check'], 'check.3')
        nt.assert_equal(third['message'], u'♬ †øU †øU ¥ºu T0µ ♪')
        nt.assert_equal(fourth['check'], 'check.4')
        nt.assert_equal(fourth['message'], '|t:|m:|d:')
Ejemplo n.º 50
0
    def test_magic_tags(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('my.gauge.a:1|c|#host:test-a')
        stats.submit_packets('my.gauge.b:4|c|#tag1,tag2,host:test-b')
        stats.submit_packets('my.gauge.b:8|c|#host:test-b,tag2,tag1')
        stats.submit_packets('my.gauge.c:10|c|#tag3')
        stats.submit_packets('my.gauge.c:16|c|#device:floppy,tag3')

        metrics = self.sort_metrics(stats.flush())

        nt.assert_equal(len(metrics), 4)
        first, second, third, fourth = metrics

        nt.assert_equal(first['metric'], 'my.gauge.a')
        nt.assert_equal(first['tags'], None)
        nt.assert_equal(first['points'][0][1], 1)
        nt.assert_equal(first['host'], 'test-a')

        nt.assert_equal(second['metric'], 'my.gauge.b')
        nt.assert_equal(second['tags'], ('tag1', 'tag2'))
        nt.assert_equal(second['points'][0][1], 12)
        nt.assert_equal(second['host'], 'test-b')

        nt.assert_equal(third['metric'], 'my.gauge.c')
        nt.assert_equal(third['tags'], ('tag3', ))
        nt.assert_equal(third['points'][0][1], 10)
        nt.assert_equal(third['device_name'], None)

        nt.assert_equal(fourth['metric'], 'my.gauge.c')
        nt.assert_equal(fourth['tags'], ('tag3', ))
        nt.assert_equal(fourth['points'][0][1], 16)
        nt.assert_equal(fourth['device_name'], 'floppy')
Ejemplo n.º 51
0
    def test_tags(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('gauge:1|c')
        stats.submit_packets('gauge:2|c|@1')
        stats.submit_packets('gauge:4|c|#tag1,tag2')
        stats.submit_packets(
            'gauge:8|c|#tag2,tag1')  # Should be the same as above
        stats.submit_packets('gauge:16|c|#tag3,tag4')

        metrics = self.sort_metrics(stats.flush())

        assert len(metrics) == 3
        first, second, third = metrics

        nt.assert_equal(first['metric'], 'gauge')
        nt.assert_equal(first['tags'], None)
        nt.assert_equal(first['points'][0][1], 3)
        nt.assert_equal(first['host'], 'myhost')

        nt.assert_equal(second['metric'], 'gauge')
        nt.assert_equal(second['tags'], ('tag1', 'tag2'))
        nt.assert_equal(second['points'][0][1], 12)
        nt.assert_equal(second['host'], 'myhost')

        nt.assert_equal(third['metric'], 'gauge')
        nt.assert_equal(third['tags'], ('tag3', 'tag4'))
        nt.assert_equal(third['points'][0][1], 16)
        nt.assert_equal(third['host'], 'myhost')
Ejemplo n.º 52
0
    def test_event_text(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('_e{2,0}:t1|')
        stats.submit_packets('_e{2,12}:t2|text|content')
        stats.submit_packets(
            '_e{2,23}:t3|First line\\nSecond line')  # \n is a newline
        stats.submit_packets(
            u'_e{2,19}:t4|♬ †øU †øU ¥ºu T0µ ♪')  # utf-8 compliant

        events = self.sort_events(stats.flush_events())

        assert len(events) == 4
        first, second, third, fourth = events

        nt.assert_equal(first['msg_text'], '')
        nt.assert_equal(second['msg_text'], 'text|content')
        nt.assert_equal(third['msg_text'], 'First line\nSecond line')
        nt.assert_equal(fourth['msg_text'], u'♬ †øU †øU ¥ºu T0µ ♪')
Ejemplo n.º 53
0
    def test_service_check_tags(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('_sc|check.1|0')
        stats.submit_packets('_sc|check.2|0|#t1')
        stats.submit_packets('_sc|check.3|0|h:i-abcd1234|#t1,t2|m:fakeout#t5')
        stats.submit_packets('_sc|check.4|0|#t1,t2:v2,t3,t4')

        service_checks = self.sort_service_checks(stats.flush_service_checks())

        assert len(service_checks) == 4
        first, second, third, fourth = service_checks

        nt.assert_equal(first['check'], 'check.1')
        assert first.get('tags') is None, "service_check['tags'] shouldn't be" + \
                                        "defined when no tags aren't explicited in the packet"

        nt.assert_equal(second['check'], 'check.2')
        nt.assert_equal(second['tags'], sorted(['t1']))

        nt.assert_equal(third['check'], 'check.3')
        nt.assert_equal(third['host_name'], 'i-abcd1234')
        nt.assert_equal(third['message'], 'fakeout#t5')
        nt.assert_equal(third['tags'], sorted(['t1', 't2']))

        nt.assert_equal(fourth['check'], 'check.4')
        nt.assert_equal(fourth['tags'], sorted(['t1', 't2:v2', 't3', 't4']))
Ejemplo n.º 54
0
class AgentCheck(object):
    OK, WARNING, CRITICAL, UNKNOWN = (0, 1, 2, 3)

    SOURCE_TYPE_NAME = None

    DEFAULT_EXPIRY_SECONDS = 300

    DEFAULT_MIN_COLLECTION_INTERVAL = 0

    _enabled_checks = []

    @classmethod
    def is_check_enabled(cls, name):
        return name in cls._enabled_checks

    def __init__(self, name, init_config, agentConfig, instances=None):
        """
        Initialize a new check.

        :param name: The name of the check
        :param init_config: The config for initializing the check
        :param agentConfig: The global configuration for the agent
        :param instances: A list of configuration objects for each instance.
        """
        from aggregator import MetricsAggregator

        self._enabled_checks.append(name)
        self._enabled_checks = list(set(self._enabled_checks))

        self.name = name
        self.init_config = init_config or {}
        self.agentConfig = agentConfig
        self.in_developer_mode = agentConfig.get('developer_mode') and psutil
        self._internal_profiling_stats = None
        self.default_integration_http_timeout = float(
            agentConfig.get('default_integration_http_timeout', 9))

        self.hostname = agentConfig.get('checksd_hostname') or get_hostname(
            agentConfig)
        self.log = logging.getLogger('%s.%s' % (__name__, name))

        self.min_collection_interval = self.init_config.get(
            'min_collection_interval', self.DEFAULT_MIN_COLLECTION_INTERVAL)

        self.aggregator = MetricsAggregator(
            self.hostname,
            expiry_seconds=self.min_collection_interval +
            self.DEFAULT_EXPIRY_SECONDS,
            formatter=agent_formatter,
            recent_point_threshold=agentConfig.get('recent_point_threshold',
                                                   None),
            histogram_aggregates=agentConfig.get('histogram_aggregates'),
            histogram_percentiles=agentConfig.get('histogram_percentiles'))

        self.events = []
        self.service_checks = []
        self.instances = instances or []
        self.warnings = []
        self.library_versions = None
        self.last_collection_time = defaultdict(int)
        self._instance_metadata = []
        self.svc_metadata = []
        self.historate_dict = {}

        # Set proxy settings
        self.proxy_settings = get_proxy(self.agentConfig)
        self._use_proxy = False if init_config is None else init_config.get(
            "use_agent_proxy", True)
        self.proxies = {
            "http": None,
            "https": None,
        }
        if self.proxy_settings and self._use_proxy:
            uri = "{host}:{port}".format(host=self.proxy_settings['host'],
                                         port=self.proxy_settings['port'])
            if self.proxy_settings['user'] and self.proxy_settings['password']:
                uri = "{user}:{password}@{uri}".format(
                    user=self.proxy_settings['user'],
                    password=self.proxy_settings['password'],
                    uri=uri)
            self.proxies['http'] = "http://{uri}".format(uri=uri)
            self.proxies['https'] = "https://{uri}".format(uri=uri)

    def instance_count(self):
        """ Return the number of instances that are configured for this check. """
        return len(self.instances)

    def gauge(self,
              metric,
              value,
              tags=None,
              hostname=None,
              device_name=None,
              timestamp=None):
        """
        Record the value of a gauge, with optional tags, hostname and device
        name.

        :param metric: The name of the metric
        :param value: The value of the gauge
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        :param timestamp: (optional) The timestamp for this metric value
        """
        self.aggregator.gauge(metric, value, tags, hostname, device_name,
                              timestamp)

    def increment(self,
                  metric,
                  value=1,
                  tags=None,
                  hostname=None,
                  device_name=None):
        """
        Increment a counter with optional tags, hostname and device name.

        :param metric: The name of the metric
        :param value: The value to increment by
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.increment(metric, value, tags, hostname, device_name)

    def decrement(self,
                  metric,
                  value=-1,
                  tags=None,
                  hostname=None,
                  device_name=None):
        """
        Increment a counter with optional tags, hostname and device name.

        :param metric: The name of the metric
        :param value: The value to decrement by
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.decrement(metric, value, tags, hostname, device_name)

    def count(self,
              metric,
              value=0,
              tags=None,
              hostname=None,
              device_name=None):
        """
        Submit a raw count with optional tags, hostname and device name

        :param metric: The name of the metric
        :param value: The value
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.submit_count(metric, value, tags, hostname,
                                     device_name)

    def monotonic_count(self,
                        metric,
                        value=0,
                        tags=None,
                        hostname=None,
                        device_name=None):
        """
        Submits a raw count with optional tags, hostname and device name
        based on increasing counter values. E.g. 1, 3, 5, 7 will submit
        6 on flush. Note that reset counters are skipped.

        :param metric: The name of the metric
        :param value: The value of the rate
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.count_from_counter(metric, value, tags, hostname,
                                           device_name)

    def rate(self, metric, value, tags=None, hostname=None, device_name=None):
        """
        Submit a point for a metric that will be calculated as a rate on flush.
        Values will persist across each call to `check` if there is not enough
        point to generate a rate on the flush.

        :param metric: The name of the metric
        :param value: The value of the rate
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.rate(metric, value, tags, hostname, device_name)

    def histogram(self,
                  metric,
                  value,
                  tags=None,
                  hostname=None,
                  device_name=None):
        """
        Sample a histogram value, with optional tags, hostname and device name.

        :param metric: The name of the metric
        :param value: The value to sample for the histogram
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.histogram(metric, value, tags, hostname, device_name)

    @classmethod
    def generate_historate_func(cls, excluding_tags):
        def fct(self,
                metric,
                value,
                tags=None,
                hostname=None,
                device_name=None):
            cls.historate(self,
                          metric,
                          value,
                          excluding_tags,
                          tags=tags,
                          hostname=hostname,
                          device_name=device_name)

        return fct

    @classmethod
    def generate_histogram_func(cls, excluding_tags):
        def fct(self,
                metric,
                value,
                tags=None,
                hostname=None,
                device_name=None):
            tags = list(
                tags
            )  # Use a copy of the list to avoid removing tags from originial
            for tag in list(tags):
                for exc_tag in excluding_tags:
                    if tag.startswith(exc_tag + ":"):
                        tags.remove(tag)

            cls.histogram(self,
                          metric,
                          value,
                          tags=tags,
                          hostname=hostname,
                          device_name=device_name)

        return fct

    def historate(self,
                  metric,
                  value,
                  excluding_tags,
                  tags=None,
                  hostname=None,
                  device_name=None):
        """
        Function to create a histogram metric for "rate" like metrics.
        Warning this doesn't use the harmonic mean, beware of what it means when using it.

        :param metric: The name of the metric
        :param value: The value to sample for the histogram
        :param excluding_tags: A list of tags that will be removed when computing the histogram
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """

        tags = list(
            tags
        )  # Use a copy of the list to avoid removing tags from originial
        context = [metric]
        if tags is not None:
            context.append("-".join(sorted(tags)))
        if hostname is not None:
            context.append("host:" + hostname)
        if device_name is not None:
            context.append("device:" + device_name)

        now = time.time()
        context = tuple(context)

        if context in self.historate_dict:
            if tags is not None:
                for tag in list(tags):
                    for exc_tag in excluding_tags:
                        if tag.startswith("{0}:".format(exc_tag)):
                            tags.remove(tag)

            prev_value, prev_ts = self.historate_dict[context]
            rate = float(value - prev_value) / float(now - prev_ts)
            self.aggregator.histogram(metric, rate, tags, hostname,
                                      device_name)

        self.historate_dict[context] = (value, now)

    def set(self, metric, value, tags=None, hostname=None, device_name=None):
        """
        Sample a set value, with optional tags, hostname and device name.

        :param metric: The name of the metric
        :param value: The value for the set
        :param tags: (optional) A list of tags for this metric
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        """
        self.aggregator.set(metric, value, tags, hostname, device_name)

    def event(self, event):
        """
        Save an event.

        :param event: The event payload as a dictionary. Has the following
        structure:

            {
                "timestamp": int, the epoch timestamp for the event,
                "event_type": string, the event time name,
                "msg_title": string, the title of the event,
                "msg_text": string, the text body of the event,
                "alert_type": (optional) string, one of ('error', 'warning', 'success', 'info').
                    Defaults to 'info'.
                "source_type_name": (optional) string, the source type name,
                "host": (optional) string, the name of the host,
                "tags": (optional) list, a list of tags to associate with this event
            }
        """
        self.events.append(event)

    def service_check(self,
                      check_name,
                      status,
                      tags=None,
                      timestamp=None,
                      hostname=None,
                      check_run_id=None,
                      message=None):
        """
        Save a service check.

        :param check_name: string, name of the service check
        :param status: int, describing the status.
                       0 for success, 1 for warning, 2 for failure
        :param tags: (optional) list of strings, a list of tags for this run
        :param timestamp: (optional) float, unix timestamp for when the run occurred
        :param hostname: (optional) str, host that generated the service
                          check. Defaults to the host_name of the agent
        :param check_run_id: (optional) int, id used for logging and tracing
                             purposes. Doesn't need to be unique. If not
                             specified, one will be generated.
        """
        if hostname is None:
            hostname = self.hostname
        if message is not None:
            message = unicode(
                message)  # ascii converts to unicode but not viceversa
        self.service_checks.append(
            create_service_check(check_name, status, tags, timestamp, hostname,
                                 check_run_id, message))

    def service_metadata(self, meta_name, value):
        """
        Save metadata.

        :param meta_name: metadata key name
        :type meta_name: string

        :param value: metadata value
        :type value: string
        """
        self._instance_metadata.append((meta_name, unicode(value)))

    def has_events(self):
        """
        Check whether the check has saved any events

        @return whether or not the check has saved any events
        @rtype boolean
        """
        return len(self.events) > 0

    def get_metrics(self):
        """
        Get all metrics, including the ones that are tagged.

        @return the list of samples
        @rtype [(metric_name, timestamp, value, {"tags": ["tag1", "tag2"]}), ...]
        """
        return self.aggregator.flush()

    def get_events(self):
        """
        Return a list of the events saved by the check, if any

        @return the list of events saved by this check
        @rtype list of event dictionaries
        """
        events = self.events
        self.events = []
        return events

    def get_service_checks(self):
        """
        Return a list of the service checks saved by the check, if any
        and clears them out of the instance's service_checks list

        @return the list of service checks saved by this check
        @rtype list of service check dicts
        """
        service_checks = self.service_checks
        self.service_checks = []
        return service_checks

    def _roll_up_instance_metadata(self):
        """
        Concatenate and flush instance metadata.
        """
        self.svc_metadata.append(
            dict((k, v) for (k, v) in self._instance_metadata))
        self._instance_metadata = []

    def get_service_metadata(self):
        """
        Return a list of the metadata dictionaries saved by the check -if any-
        and clears them out of the instance's service_checks list

        @return the list of metadata saved by this check
        @rtype list of metadata dicts
        """
        if self._instance_metadata:
            self._roll_up_instance_metadata()
        service_metadata = self.svc_metadata
        self.svc_metadata = []
        return service_metadata

    def has_warnings(self):
        """
        Check whether the instance run created any warnings
        """
        return len(self.warnings) > 0

    def warning(self, warning_message):
        """ Add a warning message that will be printed in the info page
        :param warning_message: String. Warning message to be displayed
        """
        warning_message = str(warning_message)

        self.log.warning(warning_message)
        self.warnings.append(warning_message)

    def get_library_info(self):
        if self.library_versions is not None:
            return self.library_versions
        try:
            self.library_versions = self.get_library_versions()
        except NotImplementedError:
            pass

    def get_library_versions(self):
        """ Should return a string that shows which version
        of the needed libraries are used """
        raise NotImplementedError

    def get_warnings(self):
        """
        Return the list of warnings messages to be displayed in the info page
        """
        warnings = self.warnings
        self.warnings = []
        return warnings

    @staticmethod
    def _get_statistic_name_from_method(method_name):
        return method_name[4:] if method_name.startswith(
            'get_') else method_name

    @staticmethod
    def _collect_internal_stats(methods=None):
        current_process = psutil.Process(os.getpid())

        methods = methods or DEFAULT_PSUTIL_METHODS
        filtered_methods = [m for m in methods if hasattr(current_process, m)]

        stats = {}

        for method in filtered_methods:
            # Go from `get_memory_info` -> `memory_info`
            stat_name = AgentCheck._get_statistic_name_from_method(method)
            try:
                raw_stats = getattr(current_process, method)()
                try:
                    stats[stat_name] = raw_stats._asdict()
                except AttributeError:
                    if isinstance(raw_stats, numbers.Number):
                        stats[stat_name] = raw_stats
                    else:
                        log.warn(
                            "Could not serialize output of {0} to dict".format(
                                method))

            except psutil.AccessDenied:
                log.warn("Cannot call psutil method {} : Access Denied".format(
                    method))

        return stats

    def _set_internal_profiling_stats(self, before, after):
        self._internal_profiling_stats = {'before': before, 'after': after}

    def _get_internal_profiling_stats(self):
        """
        If in developer mode, return a dictionary of statistics about the check run
        """
        stats = self._internal_profiling_stats
        self._internal_profiling_stats = None
        return stats

    def run(self):
        """ Run all instances. """

        # Store run statistics if needed
        before, after = None, None
        if self.in_developer_mode and self.name != AGENT_METRICS_CHECK_NAME:
            try:
                before = AgentCheck._collect_internal_stats()
            except Exception:  # It's fine if we can't collect stats for the run, just log and proceed
                self.log.debug(
                    "Failed to collect Agent Stats before check {0}".format(
                        self.name))

        instance_statuses = []
        for i, instance in enumerate(self.instances):
            try:
                min_collection_interval = instance.get(
                    'min_collection_interval', self.min_collection_interval)

                now = time.time()
                if now - self.last_collection_time[i] < min_collection_interval:
                    self.log.debug(
                        "Not running instance #{0} of check {1} as it ran less than {2}s ago"
                        .format(i, self.name, min_collection_interval))
                    continue

                self.last_collection_time[i] = now

                check_start_time = None
                if self.in_developer_mode:
                    check_start_time = timeit.default_timer()
                self.check(copy.deepcopy(instance))

                instance_check_stats = None
                if check_start_time is not None:
                    instance_check_stats = {
                        'run_time': timeit.default_timer() - check_start_time
                    }

                if self.has_warnings():
                    instance_status = check_status.InstanceStatus(
                        i,
                        check_status.STATUS_WARNING,
                        warnings=self.get_warnings(),
                        instance_check_stats=instance_check_stats)
                else:
                    instance_status = check_status.InstanceStatus(
                        i,
                        check_status.STATUS_OK,
                        instance_check_stats=instance_check_stats)
            except Exception as e:
                self.log.exception("Check '%s' instance #%s failed" %
                                   (self.name, i))
                instance_status = check_status.InstanceStatus(
                    i,
                    check_status.STATUS_ERROR,
                    error=str(e),
                    tb=traceback.format_exc())
            finally:
                self._roll_up_instance_metadata()

            instance_statuses.append(instance_status)

        if self.in_developer_mode and self.name != AGENT_METRICS_CHECK_NAME:
            try:
                after = AgentCheck._collect_internal_stats()
                self._set_internal_profiling_stats(before, after)
                log.info("\n \t %s %s" %
                         (self.name,
                          pretty_statistics(self._internal_profiling_stats)))
            except Exception:  # It's fine if we can't collect stats for the run, just log and proceed
                self.log.debug(
                    "Failed to collect Agent Stats after check {0}".format(
                        self.name))

        return instance_statuses

    def check(self, instance):
        """
        Overriden by the check class. This will be called to run the check.

        :param instance: A dict with the instance information. This will vary
        depending on your config structure.
        """
        raise NotImplementedError()

    def stop(self):
        """
        To be executed when the agent is being stopped to clean ressources
        """
        pass

    @classmethod
    def from_yaml(cls,
                  path_to_yaml=None,
                  agentConfig=None,
                  yaml_text=None,
                  check_name=None):
        """
        A method used for testing your check without running the agent.
        """
        if path_to_yaml:
            check_name = os.path.basename(path_to_yaml).split('.')[0]
            try:
                f = open(path_to_yaml)
            except IOError:
                raise Exception('Unable to open yaml config: %s' %
                                path_to_yaml)
            yaml_text = f.read()
            f.close()

        config = yaml.load(yaml_text, Loader=yLoader)
        try:
            check = cls(check_name,
                        config.get('init_config') or {}, agentConfig or {},
                        config.get('instances'))
        except TypeError:
            # Compatibility for the check not supporting instances
            check = cls(check_name,
                        config.get('init_config') or {}, agentConfig or {})
        return check, config.get('instances', [])

    def normalize(self, metric, prefix=None, fix_case=False):
        """
        Turn a metric into a well-formed metric name
        prefix.b.c

        :param metric The metric name to normalize
        :param prefix A prefix to to add to the normalized name, default None
        :param fix_case A boolean, indicating whether to make sure that
                        the metric name returned is in underscore_case
        """
        if isinstance(metric, unicode):
            metric_name = unicodedata.normalize('NFKD', metric).encode(
                'ascii', 'ignore')
        else:
            metric_name = metric

        if fix_case:
            name = self.convert_to_underscore_separated(metric_name)
            if prefix is not None:
                prefix = self.convert_to_underscore_separated(prefix)
        else:
            name = re.sub(r"[,\+\*\-/()\[\]{}\s]", "_", metric_name)
        # Eliminate multiple _
        name = re.sub(r"__+", "_", name)
        # Don't start/end with _
        name = re.sub(r"^_", "", name)
        name = re.sub(r"_$", "", name)
        # Drop ._ and _.
        name = re.sub(r"\._", ".", name)
        name = re.sub(r"_\.", ".", name)

        if prefix is not None:
            return prefix + "." + name
        else:
            return name

    FIRST_CAP_RE = re.compile('(.)([A-Z][a-z]+)')
    ALL_CAP_RE = re.compile('([a-z0-9])([A-Z])')
    METRIC_REPLACEMENT = re.compile(r'([^a-zA-Z0-9_.]+)|(^[^a-zA-Z]+)')
    DOT_UNDERSCORE_CLEANUP = re.compile(r'_*\._*')

    def convert_to_underscore_separated(self, name):
        """
        Convert from CamelCase to camel_case
        And substitute illegal metric characters
        """
        metric_name = self.FIRST_CAP_RE.sub(r'\1_\2', name)
        metric_name = self.ALL_CAP_RE.sub(r'\1_\2', metric_name).lower()
        metric_name = self.METRIC_REPLACEMENT.sub('_', metric_name)
        return self.DOT_UNDERSCORE_CLEANUP.sub('.', metric_name).strip('_')

    @staticmethod
    def read_config(instance, key, message=None, cast=None):
        val = instance.get(key)
        if val is None:
            message = message or 'Must provide `%s` value in instance config' % key
            raise Exception(message)

        if cast is None:
            return val
        else:
            return cast(val)
Ejemplo n.º 55
0
    def test_string_sets(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('my.set:string|s')
        stats.submit_packets('my.set:sets|s')
        stats.submit_packets('my.set:sets|s')
        stats.submit_packets('my.set:test|s')
        stats.submit_packets('my.set:test|s')
        stats.submit_packets('my.set:test|s')

        # Assert that it's treated normally.
        metrics = stats.flush()
        nt.assert_equal(len(metrics), 1)
        m = metrics[0]
        nt.assert_equal(m['metric'], 'my.set')
        nt.assert_equal(m['points'][0][1], 3)

        # Assert there are no more sets
        assert not stats.flush()
Ejemplo n.º 56
0
class AgentCheck(object):
    OK, WARNING, CRITICAL, UNKNOWN = (0, 1, 2, 3)

    SOURCE_TYPE_NAME = None

    DEFAULT_MIN_COLLECTION_INTERVAL = 0

    _enabled_checks = []

    @classmethod
    def is_check_enabled(cls, name):
        return name in cls._enabled_checks

    def __init__(self, name, init_config, agentConfig, instances=None):

        from aggregator import MetricsAggregator

        self._enabled_checks.append(name)
        self._enabled_checks = list(set(self._enabled_checks))

        self.name = name
        self.init_config = init_config or {}
        self.agentConfig = agentConfig
        self.in_developer_mode = agentConfig.get('developer_mode') and psutil
        self._internal_profiling_stats = None

        self.hostname = agentConfig.get('checksd_hostname') or get_hostname(
            agentConfig)
        self.log = logging.getLogger('%s.%s' % (__name__, name))
        self.aggregator = MetricsAggregator(
            self.hostname,
            formatter=agent_formatter,
            recent_point_threshold=agentConfig.get('recent_point_threshold',
                                                   None),
            histogram_aggregates=agentConfig.get('histogram_aggregates'),
            histogram_percentiles=agentConfig.get('histogram_percentiles'))

        self.events = []
        self.service_checks = []
        if instances:
            jsoned_instances = json.dumps(instances)
            encrypted_passwd_list = re.findall('>>>.*?<<<', jsoned_instances)
            if encrypted_passwd_list:
                for encrypted_passwd in encrypted_passwd_list:
                    decrypted_passwd = decrypted(encrypted_passwd)
                    jsoned_instances = jsoned_instances.replace(
                        encrypted_passwd, decrypted_passwd)
                self.instances = convert_to_str(
                    json.loads(jsoned_instances, encoding='utf-8'))
            else:
                self.instances = instances
        else:
            self.instances = []
        self.warnings = []
        self.library_versions = None
        self.last_collection_time = defaultdict(int)
        self._instance_metadata = []
        self.svc_metadata = []
        self.historate_dict = {}

    def instance_count(self):
        return len(self.instances)

    def gauge(self,
              metric,
              value,
              tags=None,
              hostname=None,
              device_name=None,
              timestamp=None):
        self.aggregator.gauge(metric, value, tags, hostname, device_name,
                              timestamp)

    def increment(self,
                  metric,
                  value=1,
                  tags=None,
                  hostname=None,
                  device_name=None):

        self.aggregator.increment(metric, value, tags, hostname, device_name)

    def decrement(self,
                  metric,
                  value=-1,
                  tags=None,
                  hostname=None,
                  device_name=None):
        self.aggregator.decrement(metric, value, tags, hostname, device_name)

    def count(self,
              metric,
              value=0,
              tags=None,
              hostname=None,
              device_name=None):
        self.aggregator.submit_count(metric, value, tags, hostname,
                                     device_name)

    def monotonic_count(self,
                        metric,
                        value=0,
                        tags=None,
                        hostname=None,
                        device_name=None):
        self.aggregator.count_from_counter(metric, value, tags, hostname,
                                           device_name)

    def rate(self, metric, value, tags=None, hostname=None, device_name=None):
        self.aggregator.rate(metric, value, tags, hostname, device_name)

    def histogram(self,
                  metric,
                  value,
                  tags=None,
                  hostname=None,
                  device_name=None):
        self.aggregator.histogram(metric, value, tags, hostname, device_name)

    @classmethod
    def generate_historate_func(cls, excluding_tags):
        def fct(self,
                metric,
                value,
                tags=None,
                hostname=None,
                device_name=None):
            cls.historate(self,
                          metric,
                          value,
                          excluding_tags,
                          tags=tags,
                          hostname=hostname,
                          device_name=device_name)

        return fct

    @classmethod
    def generate_histogram_func(cls, excluding_tags):
        def fct(self,
                metric,
                value,
                tags=None,
                hostname=None,
                device_name=None):
            tags = list(tags)
            for tag in list(tags):
                for exc_tag in excluding_tags:
                    if tag.startswith(exc_tag + ":"):
                        tags.remove(tag)

            cls.histogram(self,
                          metric,
                          value,
                          tags=tags,
                          hostname=hostname,
                          device_name=device_name)

        return fct

    def historate(self,
                  metric,
                  value,
                  excluding_tags,
                  tags=None,
                  hostname=None,
                  device_name=None):

        tags = list(tags)
        context = [metric]
        if tags is not None:
            context.append("-".join(sorted(tags)))
        if hostname is not None:
            context.append("host:" + hostname)
        if device_name is not None:
            context.append("device:" + device_name)

        now = time.time()
        context = tuple(context)

        if context in self.historate_dict:
            if tags is not None:
                for tag in list(tags):
                    for exc_tag in excluding_tags:
                        if tag.startswith("{0}:".format(exc_tag)):
                            tags.remove(tag)

            prev_value, prev_ts = self.historate_dict[context]
            rate = float(value - prev_value) / float(now - prev_ts)
            self.aggregator.histogram(metric, rate, tags, hostname,
                                      device_name)

        self.historate_dict[context] = (value, now)

    def set(self, metric, value, tags=None, hostname=None, device_name=None):
        self.aggregator.set(metric, value, tags, hostname, device_name)

    def event(self, event):
        if event.get('api_key') is None:
            event['api_key'] = self.agentConfig['api_key']
        self.events.append(event)

    def service_check(self,
                      check_name,
                      status,
                      tags=None,
                      timestamp=None,
                      hostname=None,
                      check_run_id=None,
                      message=None):

        if hostname is None:
            hostname = self.hostname
        if message is not None:
            message = unicode(message)
        self.service_checks.append(
            create_service_check(check_name, status, tags, timestamp, hostname,
                                 check_run_id, message))

    def service_metadata(self, meta_name, value):

        self._instance_metadata.append((meta_name, unicode(value)))

    def has_events(self):

        return len(self.events) > 0

    def get_metrics(self):
        return self.aggregator.flush()

    def get_events(self):
        events = self.events
        self.events = []
        return events

    def get_service_checks(self):
        service_checks = self.service_checks
        self.service_checks = []
        return service_checks

    def _roll_up_instance_metadata(self):
        self.svc_metadata.append(
            dict((k, v) for (k, v) in self._instance_metadata))
        self._instance_metadata = []

    def get_service_metadata(self):
        if self._instance_metadata:
            self._roll_up_instance_metadata()
        service_metadata = self.svc_metadata
        self.svc_metadata = []
        return service_metadata

    def has_warnings(self):
        return len(self.warnings) > 0

    def warning(self, warning_message):
        warning_message = str(warning_message)

        self.log.warning(warning_message)
        self.warnings.append(warning_message)

    def get_library_info(self):
        if self.library_versions is not None:
            return self.library_versions
        try:
            self.library_versions = self.get_library_versions()
        except NotImplementedError:
            pass

    def get_library_versions(self):
        raise NotImplementedError

    def get_warnings(self):
        warnings = self.warnings
        self.warnings = []
        return warnings

    @staticmethod
    def _get_statistic_name_from_method(method_name):
        return method_name[4:] if method_name.startswith(
            'get_') else method_name

    @staticmethod
    def _collect_internal_stats(methods=None):
        current_process = psutil.Process(os.getpid())

        methods = methods or DEFAULT_PSUTIL_METHODS
        filtered_methods = [m for m in methods if hasattr(current_process, m)]

        stats = {}

        for method in filtered_methods:
            stat_name = AgentCheck._get_statistic_name_from_method(method)
            try:
                raw_stats = getattr(current_process, method)()
                try:
                    stats[stat_name] = raw_stats._asdict()
                except AttributeError:
                    if isinstance(raw_stats, numbers.Number):
                        stats[stat_name] = raw_stats
                    else:
                        log.warn(
                            "Could not serialize output of {0} to dict".format(
                                method))

            except psutil.AccessDenied:
                log.warn("Cannot call psutil method {} : Access Denied".format(
                    method))

        return stats

    def _set_internal_profiling_stats(self, before, after):
        self._internal_profiling_stats = {'before': before, 'after': after}

    def _get_internal_profiling_stats(self):
        stats = self._internal_profiling_stats
        self._internal_profiling_stats = None
        return stats

    def run(self):
        before, after = None, None
        if self.in_developer_mode and self.name != AGENT_METRICS_CHECK_NAME:
            try:
                before = AgentCheck._collect_internal_stats()
            except Exception:
                self.log.debug(
                    "Failed to collect Agent Stats before check {0}".format(
                        self.name))

        instance_statuses = []
        for i, instance in enumerate(self.instances):
            try:
                min_collection_interval = instance.get(
                    'min_collection_interval',
                    self.init_config.get('min_collection_interval',
                                         self.DEFAULT_MIN_COLLECTION_INTERVAL))
                now = time.time()
                if now - self.last_collection_time[i] < min_collection_interval:
                    self.log.debug(
                        "Not running instance #{0} of check {1} as it ran less than {2}s ago"
                        .format(i, self.name, min_collection_interval))
                    continue

                self.last_collection_time[i] = now

                check_start_time = None
                if self.in_developer_mode:
                    check_start_time = timeit.default_timer()
                self.check(copy.deepcopy(instance))

                instance_check_stats = None
                if check_start_time is not None:
                    instance_check_stats = {
                        'run_time': timeit.default_timer() - check_start_time
                    }

                if self.has_warnings():
                    instance_status = check_status.InstanceStatus(
                        i,
                        check_status.STATUS_WARNING,
                        warnings=self.get_warnings(),
                        instance_check_stats=instance_check_stats)
                else:
                    instance_status = check_status.InstanceStatus(
                        i,
                        check_status.STATUS_OK,
                        instance_check_stats=instance_check_stats)
            except Exception, e:
                self.log.exception("Check '%s' instance #%s failed" %
                                   (self.name, i))
                instance_status = check_status.InstanceStatus(
                    i, check_status.STATUS_ERROR, error=str(e), tb=str(e))
            finally:
Ejemplo n.º 57
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        """
        Initialize a new check.

        :param name: The name of the check
        :param init_config: The config for initializing the check
        :param agentConfig: The global configuration for the agent
        :param instances: A list of configuration objects for each instance.
        """
        from aggregator import MetricsAggregator

        self._enabled_checks.append(name)
        self._enabled_checks = list(set(self._enabled_checks))

        self.name = name
        self.init_config = init_config or {}
        self.agentConfig = agentConfig
        self.in_developer_mode = agentConfig.get('developer_mode') and psutil
        self._internal_profiling_stats = None
        self.default_integration_http_timeout = float(
            agentConfig.get('default_integration_http_timeout', 9))

        self.hostname = agentConfig.get('checksd_hostname') or get_hostname(
            agentConfig)
        self.log = logging.getLogger('%s.%s' % (__name__, name))

        self.min_collection_interval = self.init_config.get(
            'min_collection_interval', self.DEFAULT_MIN_COLLECTION_INTERVAL)

        self.aggregator = MetricsAggregator(
            self.hostname,
            expiry_seconds=self.min_collection_interval +
            self.DEFAULT_EXPIRY_SECONDS,
            formatter=agent_formatter,
            recent_point_threshold=agentConfig.get('recent_point_threshold',
                                                   None),
            histogram_aggregates=agentConfig.get('histogram_aggregates'),
            histogram_percentiles=agentConfig.get('histogram_percentiles'))

        self.events = []
        self.service_checks = []
        self.instances = instances or []
        self.warnings = []
        self.library_versions = None
        self.last_collection_time = defaultdict(int)
        self._instance_metadata = []
        self.svc_metadata = []
        self.historate_dict = {}

        # Set proxy settings
        self.proxy_settings = get_proxy(self.agentConfig)
        self._use_proxy = False if init_config is None else init_config.get(
            "use_agent_proxy", True)
        self.proxies = {
            "http": None,
            "https": None,
        }
        if self.proxy_settings and self._use_proxy:
            uri = "{host}:{port}".format(host=self.proxy_settings['host'],
                                         port=self.proxy_settings['port'])
            if self.proxy_settings['user'] and self.proxy_settings['password']:
                uri = "{user}:{password}@{uri}".format(
                    user=self.proxy_settings['user'],
                    password=self.proxy_settings['password'],
                    uri=uri)
            self.proxies['http'] = "http://{uri}".format(uri=uri)
            self.proxies['https'] = "https://{uri}".format(uri=uri)
Ejemplo n.º 58
0
    def test_rate_errors(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('my.rate:10|_dd-r')
        # Sleep 1 second so the time interval > 0 (timestamp is converted to an int)
        time.sleep(1)
        stats.submit_packets('my.rate:9|_dd-r')

        # Since the difference < 0 we shouldn't get a value
        metrics = stats.flush()
        nt.assert_equal(len(metrics), 0)

        stats.submit_packets('my.rate:10|_dd-r')
        # Trying to have the times be the same
        stats.submit_packets('my.rate:40|_dd-r')

        metrics = stats.flush()
        nt.assert_equal(len(metrics), 0)
Ejemplo n.º 59
0
 def setUp(self):
     self.aggr = MetricsAggregator('test-aggr')
Ejemplo n.º 60
0
    def test_event_title(self):
        stats = MetricsAggregator('myhost')
        stats.submit_packets('_e{0,4}:|text')
        stats.submit_packets(u'_e{9,4}:2intitulé|text')
        stats.submit_packets('_e{14,4}:3title content|text')
        stats.submit_packets('_e{14,4}:4title|content|text')
        stats.submit_packets(
            '_e{13,4}:5title\\ntitle|text')  # \n stays escaped

        events = self.sort_events(stats.flush_events())

        assert len(events) == 5
        first, second, third, fourth, fifth = events

        nt.assert_equal(first['msg_title'], '')
        nt.assert_equal(second['msg_title'], u'2intitulé')
        nt.assert_equal(third['msg_title'], '3title content')
        nt.assert_equal(fourth['msg_title'], '4title|content')
        nt.assert_equal(fifth['msg_title'], '5title\\ntitle')