class DatadogMetrics(object): """DataDog Metric backend""" def __init__(self, api_key, app_key, flush_interval=10, namespace="autopush"): datadog.initialize(api_key=api_key, app_key=app_key) self._client = ThreadStats() self._flush_interval = flush_interval self._host = get_hostname() self._namespace = namespace def _prefix_name(self, name): return "%s.%s" % (self._namespace, name) def start(self): self._client.start(flush_interval=self._flush_interval, roll_up_interval=self._flush_interval) def increment(self, name, count=1, **kwargs): self._client.increment(self._prefix_name(name), count, host=self._host, **kwargs) def gauge(self, name, count, **kwargs): self._client.gauge(self._prefix_name(name), count, host=self._host, **kwargs) def timing(self, name, duration, **kwargs): self._client.timing(self._prefix_name(name), value=duration, host=self._host, **kwargs)
def test_gauge(self): # Create some fake metrics. dog = ThreadStats() dog.start(roll_up_interval=10, flush_in_thread=False) reporter = dog.reporter = MemoryReporter() dog.gauge("test.gauge.1", 20, 100.0) dog.gauge("test.gauge.1", 22, 105.0) dog.gauge("test.gauge.2", 30, 115.0) dog.gauge("test.gauge.3", 30, 125.0) dog.flush(120.0) # Assert they've been properly flushed. metrics = self.sort_metrics(reporter.metrics) nt.assert_equal(len(metrics), 2) (first, second) = metrics nt.assert_equal(first["metric"], "test.gauge.1") nt.assert_equal(first["points"][0][0], 100.0) nt.assert_equal(first["points"][0][1], 22) nt.assert_equal(second["metric"], "test.gauge.2") # Flush again and make sure we're progressing. reporter.metrics = [] dog.flush(130.0) nt.assert_equal(len(reporter.metrics), 1) # Finally, make sure we've flushed all metrics. reporter.metrics = [] dog.flush(150.0) nt.assert_equal(len(reporter.metrics), 0)
def test_disabled_mode(self): dog = ThreadStats() reporter = dog.reporter = MemoryReporter() dog.start(disabled=True, flush_interval=1, roll_up_interval=1) dog.gauge("testing", 1, timestamp=1000) dog.gauge("testing", 2, timestamp=1000) dog.flush(2000.0) assert not reporter.metrics
def test_default_host_and_device(self): dog = ThreadStats() dog.start(roll_up_interval=1, flush_in_thread=False) reporter = dog.reporter = MemoryReporter() dog.gauge("my.gauge", 1, 100.0) dog.flush(1000) metric = reporter.metrics[0] assert not metric["device"] assert not metric["host"]
def test_custom_host_and_device(self): dog = ThreadStats() dog.start(roll_up_interval=1, flush_in_thread=False, device="dev") reporter = dog.reporter = MemoryReporter() dog.gauge("my.gauge", 1, 100.0, host="host") dog.flush(1000) metric = reporter.metrics[0] nt.assert_equal(metric["device"], "dev") nt.assert_equal(metric["host"], "host")
def test_custom_host_and_device(self): dog = ThreadStats() dog.start(roll_up_interval=1, flush_in_thread=False, device='dev') reporter = dog.reporter = MemoryReporter() dog.gauge('my.gauge', 1, 100.0, host='host') dog.flush(1000) metric = reporter.metrics[0] nt.assert_equal(metric['device'], 'dev') nt.assert_equal(metric['host'], 'host')
def test_metric_namespace(self): """ Namespace prefixes all metric names. """ # Set up ThreadStats with a namespace dog = ThreadStats(namespace="foo") dog.start(roll_up_interval=1, flush_in_thread=False) dog.reporter = self.reporter # Send a few metrics dog.gauge("gauge", 20, timestamp=100.0) dog.increment("counter", timestamp=100.0) dog.flush(200.0) # Metric names are prefixed with the namespace self.assertMetric(count=2) self.assertMetric(name="foo.gauge", count=1) self.assertMetric(name="foo.counter", count=1)
class DatadogMetrics(object): """DataDog Metric backend""" def __init__(self, api_key, app_key, hostname, flush_interval=10, namespace="autopush"): datadog.initialize(api_key=api_key, app_key=app_key, host_name=hostname) self._client = ThreadStats() self._flush_interval = flush_interval self._host = hostname self._namespace = namespace def _prefix_name(self, name): return "%s.%s" % (self._namespace, name) def start(self): self._client.start(flush_interval=self._flush_interval, roll_up_interval=self._flush_interval) def increment(self, name, count=1, **kwargs): self._client.increment(self._prefix_name(name), count, host=self._host, **kwargs) def gauge(self, name, count, **kwargs): self._client.gauge(self._prefix_name(name), count, host=self._host, **kwargs) def timing(self, name, duration, **kwargs): self._client.timing(self._prefix_name(name), value=duration, host=self._host, **kwargs)
def test_tags(self): dog = ThreadStats() dog.start(roll_up_interval=10, flush_in_thread=False) reporter = dog.reporter = MemoryReporter() # Post the same metric with different tags. dog.gauge('gauge', 10, timestamp=100.0) dog.gauge('gauge', 15, timestamp=100.0, tags=['env:production', 'db']) dog.gauge('gauge', 20, timestamp=100.0, tags=['env:staging']) dog.increment('counter', timestamp=100.0) dog.increment('counter', timestamp=100.0, tags=['env:production', 'db']) dog.increment('counter', timestamp=100.0, tags=['env:staging']) dog.flush(200.0) metrics = self.sort_metrics(reporter.metrics) nt.assert_equal(len(metrics), 6) [c1, c2, c3, g1, g2, g3] = metrics (nt.assert_equal(c['metric'], 'counter') for c in [c1, c2, c3]) nt.assert_equal(c1['tags'], None) nt.assert_equal(c1['points'][0][1], 1) nt.assert_equal(c2['tags'], ['env:production', 'db']) nt.assert_equal(c2['points'][0][1], 1) nt.assert_equal(c3['tags'], ['env:staging']) nt.assert_equal(c3['points'][0][1], 1) (nt.assert_equal(c['metric'], 'gauge') for c in [g1, g2, g3]) nt.assert_equal(g1['tags'], None) nt.assert_equal(g1['points'][0][1], 10) nt.assert_equal(g2['tags'], ['env:production', 'db']) nt.assert_equal(g2['points'][0][1], 15) nt.assert_equal(g3['tags'], ['env:staging']) nt.assert_equal(g3['points'][0][1], 20)
def test_tags(self): dog = ThreadStats() dog.start(roll_up_interval=10, flush_in_thread=False) reporter = dog.reporter = MemoryReporter() # Post the same metric with different tags. dog.gauge("gauge", 10, timestamp=100.0) dog.gauge("gauge", 15, timestamp=100.0, tags=["env:production", "db"]) dog.gauge("gauge", 20, timestamp=100.0, tags=["env:staging"]) dog.increment("counter", timestamp=100.0) dog.increment("counter", timestamp=100.0, tags=["env:production", "db"]) dog.increment("counter", timestamp=100.0, tags=["env:staging"]) dog.flush(200.0) metrics = self.sort_metrics(reporter.metrics) nt.assert_equal(len(metrics), 6) [c1, c2, c3, g1, g2, g3] = metrics (nt.assert_equal(c["metric"], "counter") for c in [c1, c2, c3]) nt.assert_equal(c1["tags"], None) nt.assert_equal(c1["points"][0][1], 1) nt.assert_equal(c2["tags"], ["env:production", "db"]) nt.assert_equal(c2["points"][0][1], 1) nt.assert_equal(c3["tags"], ["env:staging"]) nt.assert_equal(c3["points"][0][1], 1) (nt.assert_equal(c["metric"], "gauge") for c in [g1, g2, g3]) nt.assert_equal(g1["tags"], None) nt.assert_equal(g1["points"][0][1], 10) nt.assert_equal(g2["tags"], ["env:production", "db"]) nt.assert_equal(g2["points"][0][1], 15) nt.assert_equal(g3["tags"], ["env:staging"]) nt.assert_equal(g3["points"][0][1], 20)
def test_tags(self): dog = ThreadStats() dog.start(roll_up_interval=10, flush_in_thread=False) reporter = dog.reporter = MemoryReporter() # Post the same metric with different tags. dog.gauge('gauge', 10, timestamp=100.0) dog.gauge('gauge', 15, timestamp=100.0, tags=['env:production', 'db']) dog.gauge('gauge', 20, timestamp=100.0, tags=['env:staging']) dog.increment('counter', timestamp=100.0) dog.increment('counter', timestamp=100.0, tags=['env:production', 'db']) dog.increment('counter', timestamp=100.0, tags=['env:staging']) dog.flush(200.0) metrics = self.sort_metrics(reporter.metrics) assert_equal(len(metrics), 6) [c1, c2, c3, g1, g2, g3] = metrics (assert_equal(c['metric'], 'counter') for c in [c1, c2, c3]) assert_equal(c1['tags'], None) assert_equal(c1['points'][0][1], 0.1) assert_equal(c2['tags'], ['env:production', 'db']) assert_equal(c2['points'][0][1], 0.1) assert_equal(c3['tags'], ['env:staging']) assert_equal(c3['points'][0][1], 0.1) (assert_equal(c['metric'], 'gauge') for c in [g1, g2, g3]) assert_equal(g1['tags'], None) assert_equal(g1['points'][0][1], 10) assert_equal(g2['tags'], ['env:production', 'db']) assert_equal(g2['points'][0][1], 15) assert_equal(g3['tags'], ['env:staging']) assert_equal(g3['points'][0][1], 20)
def test_host(self): dog = ThreadStats() dog.start(roll_up_interval=10, flush_in_thread=False) reporter = dog.reporter = MemoryReporter() # Post the same metric with different tags. dog.gauge('gauge', 12, timestamp=100.0, host='') # unset the host dog.gauge('gauge', 10, timestamp=100.0) dog.gauge('gauge', 15, timestamp=100.0, host='test') dog.gauge('gauge', 15, timestamp=100.0, host='test') dog.increment('counter', timestamp=100.0) dog.increment('counter', timestamp=100.0) dog.increment('counter', timestamp=100.0, host='test') dog.increment('counter', timestamp=100.0, host='test', tags=['tag']) dog.increment('counter', timestamp=100.0, host='test', tags=['tag']) dog.flush(200.0) metrics = self.sort_metrics(reporter.metrics) assert len(metrics) == 6 [c1, c2, c3, g1, g2, g3] = metrics assert c1['metric'] == 'counter' assert c2['metric'] == 'counter' assert c3['metric'] == 'counter' assert c1['host'] is None assert c1['tags'] is None assert c1['points'][0][1] == 0.2 assert c2['host'] == 'test' assert c2['tags'] is None assert c2['points'][0][1] == 0.1 assert c3['host'] == 'test' assert c3['tags'] == ['tag'] assert c3['points'][0][1] == 0.2 assert g1['metric'] == 'gauge' assert g2['metric'] == 'gauge' assert g3['metric'] == 'gauge' assert g1['host'] is None assert g1['points'][0][1] == 10 assert g2['host'] == '' assert g2['points'][0][1] == 12 assert g3['host'] == 'test' assert g3['points'][0][1] == 15 # Ensure histograms work as well. @dog.timed('timed', host='test') def test(): pass test() dog.histogram('timed', 20, timestamp=300.0, host='test') reporter.metrics = [] dog.flush(400) for metric in reporter.metrics: assert metric['host'] == 'test'
def send_metric(metric_name: str, data_value: float, **kwargs): tags = ['metric_submission:threadstats'] if kwargs is not None: for key, value in kwargs.items(): if 'tag' in key: tags.append('{0}:{1}'.format(key[3:], value)) api_key = config.Get().datadog_config()['api_key'] app_key = config.Get().datadog_config()['app_key'] options = {'api_key': '' + api_key + '', 'app_key': '' + app_key + ''} initialize(**options) stats = ThreadStats() stats.start() try: stats.gauge(metric_name, value=data_value, tags=tags) return True except Exception as e: print(e) return False
def test_host(self): dog = ThreadStats() dog.start(roll_up_interval=10, flush_in_thread=False) reporter = dog.reporter = MemoryReporter() # Post the same metric with different tags. dog.gauge('gauge', 12, timestamp=100.0, host='') # unset the host dog.gauge('gauge', 10, timestamp=100.0) dog.gauge('gauge', 15, timestamp=100.0, host='test') dog.gauge('gauge', 15, timestamp=100.0, host='test') dog.increment('counter', timestamp=100.0) dog.increment('counter', timestamp=100.0) dog.increment('counter', timestamp=100.0, host='test') dog.increment('counter', timestamp=100.0, host='test', tags=['tag']) dog.increment('counter', timestamp=100.0, host='test', tags=['tag']) dog.flush(200.0) metrics = self.sort_metrics(reporter.metrics) nt.assert_equal(len(metrics), 6) [c1, c2, c3, g1, g2, g3] = metrics (nt.assert_equal(c['metric'], 'counter') for c in [c1, c2, c3]) nt.assert_equal(c1['host'], None) nt.assert_equal(c1['tags'], None) nt.assert_equal(c1['points'][0][1], 0.2) nt.assert_equal(c2['host'], 'test') nt.assert_equal(c2['tags'], None) nt.assert_equal(c2['points'][0][1], 0.1) nt.assert_equal(c3['host'], 'test') nt.assert_equal(c3['tags'], ['tag']) nt.assert_equal(c3['points'][0][1], 0.2) (nt.assert_equal(g['metric'], 'gauge') for g in [g1, g2, g3]) nt.assert_equal(g1['host'], None) nt.assert_equal(g1['points'][0][1], 10) nt.assert_equal(g2['host'], '') nt.assert_equal(g2['points'][0][1], 12) nt.assert_equal(g3['host'], 'test') nt.assert_equal(g3['points'][0][1], 15) # Ensure histograms work as well. @dog.timed('timed', host='test') def test(): pass test() dog.histogram('timed', 20, timestamp=300.0, host='test') reporter.metrics = [] dog.flush(400) for metric in reporter.metrics: assert metric['host'] == 'test'
def test_host(self): dog = ThreadStats() dog.start(roll_up_interval=10, flush_in_thread=False) reporter = dog.reporter = MemoryReporter() # Post the same metric with different tags. dog.gauge("gauge", 12, timestamp=100.0, host="") # unset the host dog.gauge("gauge", 10, timestamp=100.0) dog.gauge("gauge", 15, timestamp=100.0, host="test") dog.gauge("gauge", 15, timestamp=100.0, host="test") dog.increment("counter", timestamp=100.0) dog.increment("counter", timestamp=100.0) dog.increment("counter", timestamp=100.0, host="test") dog.increment("counter", timestamp=100.0, host="test", tags=["tag"]) dog.increment("counter", timestamp=100.0, host="test", tags=["tag"]) dog.flush(200.0) metrics = self.sort_metrics(reporter.metrics) nt.assert_equal(len(metrics), 6) [c1, c2, c3, g1, g2, g3] = metrics (nt.assert_equal(c["metric"], "counter") for c in [c1, c2, c3]) nt.assert_equal(c1["host"], None) nt.assert_equal(c1["tags"], None) nt.assert_equal(c1["points"][0][1], 2) nt.assert_equal(c2["host"], "test") nt.assert_equal(c2["tags"], None) nt.assert_equal(c2["points"][0][1], 1) nt.assert_equal(c3["host"], "test") nt.assert_equal(c3["tags"], ["tag"]) nt.assert_equal(c3["points"][0][1], 2) (nt.assert_equal(g["metric"], "gauge") for g in [g1, g2, g3]) nt.assert_equal(g1["host"], None) nt.assert_equal(g1["points"][0][1], 10) nt.assert_equal(g2["host"], "") nt.assert_equal(g2["points"][0][1], 12) nt.assert_equal(g3["host"], "test") nt.assert_equal(g3["points"][0][1], 15) # Ensure histograms work as well. @dog.timed("timed", host="test") def test(): pass test() dog.histogram("timed", 20, timestamp=300.0, host="test") reporter.metrics = [] dog.flush(400) for metric in reporter.metrics: assert metric["host"] == "test"
def test_constant_tags(self): """ Constant tags are attached to all metrics. """ dog = ThreadStats(constant_tags=["type:constant"]) dog.start(roll_up_interval=1, flush_in_thread=False) dog.reporter = self.reporter # Post the same metric with different tags. dog.gauge("gauge", 10, timestamp=100.0) dog.gauge("gauge", 15, timestamp=100.0, tags=["env:production", 'db']) dog.gauge("gauge", 20, timestamp=100.0, tags=["env:staging"]) dog.increment("counter", timestamp=100.0) dog.increment("counter", timestamp=100.0, tags=["env:production", 'db']) dog.increment("counter", timestamp=100.0, tags=["env:staging"]) dog.flush(200.0) # Assertions on all metrics self.assertMetric(count=6) # Assertions on gauges self.assertMetric(name='gauge', value=10, tags=["type:constant"], count=1) self.assertMetric(name="gauge", value=15, tags=["env:production", "db", "type:constant"], count=1) # noqa self.assertMetric(name="gauge", value=20, tags=["env:staging", "type:constant"], count=1) # Assertions on counters self.assertMetric(name="counter", value=1, tags=["type:constant"], count=1) self.assertMetric(name="counter", value=1, tags=["env:production", "db", "type:constant"], count=1) # noqa self.assertMetric(name="counter", value=1, tags=["env:staging", "type:constant"], count=1) # Ensure histograms work as well. @dog.timed('timed', tags=['version:1']) def do_nothing(): """ A function that does nothing, but being timed. """ pass with patch("datadog.threadstats.base.time", return_value=300): do_nothing() dog.histogram('timed', 20, timestamp=300.0, tags=['db', 'version:2']) self.reporter.metrics = [] dog.flush(400.0) # Histograms, and related metric types, produce 8 different metrics self.assertMetric(tags=["version:1", "type:constant"], count=8) self.assertMetric(tags=["db", "version:2", "type:constant"], count=8)
def test_host(self): dog = ThreadStats() dog.start(roll_up_interval=10, flush_in_thread=False) reporter = dog.reporter = MemoryReporter() # Post the same metric with different tags. dog.gauge('gauge', 12, timestamp=100.0, host='') # unset the host dog.gauge('gauge', 10, timestamp=100.0) dog.gauge('gauge', 15, timestamp=100.0, host='test') dog.gauge('gauge', 15, timestamp=100.0, host='test') dog.increment('counter', timestamp=100.0) dog.increment('counter', timestamp=100.0) dog.increment('counter', timestamp=100.0, host='test') dog.increment('counter', timestamp=100.0, host='test', tags=['tag']) dog.increment('counter', timestamp=100.0, host='test', tags=['tag']) dog.flush(200.0) metrics = self.sort_metrics(reporter.metrics) nt.assert_equal(len(metrics), 6) [c1, c2, c3, g1, g2, g3] = metrics (nt.assert_equal(c['metric'], 'counter') for c in [c1, c2, c3]) nt.assert_equal(c1['host'], None) nt.assert_equal(c1['tags'], None) nt.assert_equal(c1['points'][0][1], 2) nt.assert_equal(c2['host'], 'test') nt.assert_equal(c2['tags'], None) nt.assert_equal(c2['points'][0][1], 1) nt.assert_equal(c3['host'], 'test') nt.assert_equal(c3['tags'], ['tag']) nt.assert_equal(c3['points'][0][1], 2) (nt.assert_equal(g['metric'], 'gauge') for g in [g1, g2, g3]) nt.assert_equal(g1['host'], None) nt.assert_equal(g1['points'][0][1], 10) nt.assert_equal(g2['host'], '') nt.assert_equal(g2['points'][0][1], 12) nt.assert_equal(g3['host'], 'test') nt.assert_equal(g3['points'][0][1], 15) # Ensure histograms work as well. @dog.timed('timed', host='test') def test(): pass test() dog.histogram('timed', 20, timestamp=300.0, host='test') reporter.metrics = [] dog.flush(400) for metric in reporter.metrics: assert metric['host'] == 'test'
def test_constant_tags(self): dog = ThreadStats(constant_tags=['type:constant']) dog.start(roll_up_interval=10, flush_in_thread=False) reporter = dog.reporter = MemoryReporter() # Post the same metric with different tags. dog.gauge('gauge', 10, timestamp=100.0) dog.gauge('gauge', 15, timestamp=100.0, tags=['env:production', 'db']) dog.gauge('gauge', 20, timestamp=100.0, tags=['env:staging']) dog.increment('counter', timestamp=100.0) dog.increment('counter', timestamp=100.0, tags=['env:production', 'db']) dog.increment('counter', timestamp=100.0, tags=['env:staging']) dog.flush(200.0) metrics = self.sort_metrics(reporter.metrics) nt.assert_equal(len(metrics), 6) [c1, c2, c3, g1, g2, g3] = metrics (nt.assert_equal(c['metric'], 'counter') for c in [c1, c2, c3]) nt.assert_equal(c1['tags'], ['env:production', 'db', 'type:constant']) nt.assert_equal(c1['points'][0][1], 1) nt.assert_equal(c2['tags'], ['env:staging', 'type:constant']) nt.assert_equal(c2['points'][0][1], 1) nt.assert_equal(c3['tags'], ['type:constant']) nt.assert_equal(c3['points'][0][1], 1) (nt.assert_equal(c['metric'], 'gauge') for c in [g1, g2, g3]) nt.assert_equal(g1['tags'], ['env:production', 'db', 'type:constant']) nt.assert_equal(g1['points'][0][1], 15) nt.assert_equal(g2['tags'], ['env:staging', 'type:constant']) nt.assert_equal(g2['points'][0][1], 20) nt.assert_equal(g3['tags'], ['type:constant']) nt.assert_equal(g3['points'][0][1], 10) # Ensure histograms work as well. @dog.timed('timed', tags=['version:1']) def test(): pass test() dog.histogram('timed', 20, timestamp=300.0, tags=['db', 'version:2']) reporter.metrics = [] dog.flush(400) for metric in reporter.metrics: assert metric['tags'] # this is enough
def test_stop(self): dog = ThreadStats() dog.start(flush_interval=1, roll_up_interval=1) for i in range(10): dog.gauge("metric", i) time.sleep(2) flush_count = dog.flush_count assert flush_count dog.stop() for i in range(10): dog.gauge("metric", i) time.sleep(2) for i in range(10): dog.gauge("metric", i) time.sleep(2) assert dog.flush_count in [flush_count, flush_count + 1]
def test_stop(self): dog = ThreadStats() dog.start(flush_interval=1, roll_up_interval=1) for i in range(10): dog.gauge('metric', i) time.sleep(2) flush_count = dog.flush_count assert flush_count dog.stop() for i in range(10): dog.gauge('metric', i) time.sleep(2) for i in range(10): dog.gauge('metric', i) time.sleep(2) assert dog.flush_count in [flush_count, flush_count + 1]
def test_constant_tags(self): dog = ThreadStats(constant_tags=["type:constant"]) dog.start(roll_up_interval=10, flush_in_thread=False) reporter = dog.reporter = MemoryReporter() # Post the same metric with different tags. dog.gauge("gauge", 10, timestamp=100.0) dog.gauge("gauge", 15, timestamp=100.0, tags=["env:production", "db"]) dog.gauge("gauge", 20, timestamp=100.0, tags=["env:staging"]) dog.increment("counter", timestamp=100.0) dog.increment("counter", timestamp=100.0, tags=["env:production", "db"]) dog.increment("counter", timestamp=100.0, tags=["env:staging"]) dog.flush(200.0) metrics = self.sort_metrics(reporter.metrics) nt.assert_equal(len(metrics), 6) [c1, c2, c3, g1, g2, g3] = metrics (nt.assert_equal(c["metric"], "counter") for c in [c1, c2, c3]) nt.assert_equal(c1["tags"], ["env:production", "db", "type:constant"]) nt.assert_equal(c1["points"][0][1], 1) nt.assert_equal(c2["tags"], ["env:staging", "type:constant"]) nt.assert_equal(c2["points"][0][1], 1) nt.assert_equal(c3["tags"], ["type:constant"]) nt.assert_equal(c3["points"][0][1], 1) (nt.assert_equal(c["metric"], "gauge") for c in [g1, g2, g3]) nt.assert_equal(g1["tags"], ["env:production", "db", "type:constant"]) nt.assert_equal(g1["points"][0][1], 15) nt.assert_equal(g2["tags"], ["env:staging", "type:constant"]) nt.assert_equal(g2["points"][0][1], 20) nt.assert_equal(g3["tags"], ["type:constant"]) nt.assert_equal(g3["points"][0][1], 10) # Ensure histograms work as well. @dog.timed("timed", tags=["version:1"]) def test(): pass test() dog.histogram("timed", 20, timestamp=300.0, tags=["db", "version:2"]) reporter.metrics = [] dog.flush(400) for metric in reporter.metrics: assert metric["tags"] # this is enough
def test_tags(self): dog = ThreadStats() dog.start(roll_up_interval=10, flush_in_thread=False) reporter = dog.reporter = MemoryReporter() # Post the same metric with different tags. dog.gauge('gauge', 10, timestamp=100.0) dog.gauge('gauge', 15, timestamp=100.0, tags=['env:production', 'db']) dog.gauge('gauge', 20, timestamp=100.0, tags=['env:staging']) dog.increment('counter', timestamp=100.0) dog.increment('counter', timestamp=100.0, tags=['env:production', 'db']) dog.increment('counter', timestamp=100.0, tags=['env:staging']) dog.flush(200.0) metrics = self.sort_metrics(reporter.metrics) assert len(metrics) == 6 [c1, c2, c3, g1, g2, g3] = metrics assert c1['metric'] == 'counter' assert c2['metric'] == 'counter' assert c3['metric'] == 'counter' assert c1['tags'] is None assert c1['points'][0][1] == 0.1 assert c2['tags'] == ['env:production', 'db'] assert c2['points'][0][1] == 0.1 assert c3['tags'] == ['env:staging'] assert c3['points'][0][1] == 0.1 assert g1['metric'] == 'gauge' assert g2['metric'] == 'gauge' assert g3['metric'] == 'gauge' assert g1['tags'] is None assert g1['points'][0][1] == 10 assert g2['tags'] == ['env:production', 'db'] assert g2['points'][0][1] == 15 assert g3['tags'] == ['env:staging'] assert g3['points'][0][1] == 20
stats.start() initialize(**options) # Iperf Client Settings client = iperf3.Client() client.server_hostname = remote_site client.zerocopy = True client.verbose = False client.reverse = True client.duration = int(test_duration) # Run test and load into dictionary data = json.loads(str(client.run())) # Extract output into integers and convert to Megabits per second sent_mbps_avg = int(data['end']['sum_sent']['bits_per_second']) / 1000000 received_mbps_avg = int( data['end']['sum_received']['bits_per_second']) / 1000000 # Debugging output print('Sent Average ' + str(sent_mbps_avg) + ' Mbps') print('Received Average ' + str(received_mbps_avg) + ' Mbps') # Feed Metrics into DogStatsd print('iperf.' + siteabbrv + '.mbps.avg.ingress') stats.increment('iperf.' + siteabbrv + '.mbps.avg.ingress') stats.gauge('iperf.' + siteabbrv + '.mbps.avg.ingress', sent_mbps_avg) stats.increment('iperf.' + siteabbrv + '.mbps.avg.egress') stats.gauge('iperf.' + siteabbrv + '.mbps.avg.egress', received_mbps_avg)
def check(self): logging.info('check info') try: yaml_file = os.environ.get('DATADOG_CONF', '%s/aws_redshift_status.yaml' % config.get_confd_path()) yaml_data = yaml.load(file(yaml_file)) init_config = yaml_data['init_config'] interval = init_config.get('min_collection_interval', 300) stats = ThreadStats() stats.start(flush_interval=10, roll_up_interval=1, device=None, flush_in_thread=False, flush_in_greenlet=False, disabled=False) start = time.time() for instance in yaml_data['instances']: logging.debug('instance name is %s' % instance['name']) name, cluster_name, cluster_address, cluster_port, db_name, user_name, user_password, \ aws_access_key_id, aws_secret_access_key, aws_region, query, \ tags = self._load_conf(instance) if cluster_address is None and cluster_port is None: redshift = boto.redshift.connect_to_region(aws_region, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) clusters = redshift.describe_clusters(cluster_name) if len(clusters) == 0: raise Exception('Cluster is empty') cluster = clusters['DescribeClustersResponse']['DescribeClustersResult']['Clusters'][0] endpoint = cluster['Endpoint'] cluster_address = endpoint['Address'] cluster_port = endpoint['Port'] conn = None try: connect_timeout = init_config.get('connect_timeout', 5) conn = psycopg2.connect( host=cluster_address, port=cluster_port, database=db_name, user=user_name, password=user_password, connect_timeout=connect_timeout, ) today = datetime.datetime.utcnow() starttime = (today - datetime.timedelta(seconds=interval)).strftime('%Y-%m-%d %H:%M:%S.%f') endtime = today.strftime('%Y-%m-%d %H:%M:%S.%f') results = self._db_query(conn, QUERY_TABLE_COUNT) stats.gauge('aws.redshift_status.table_count', results[0][0], tags=tags) logging.debug('aws.redshift_status.table_count is %s' % results[0][0]) results = self._db_query(conn, QUERY_NODE) for row in results: gauge_tags = tags[:] gauge_tags.append('node:%s' % row[0]) stats.gauge('aws_redshift_status.node_slice', row[1], tags=gauge_tags) logging.debug('aws_redshift_status.node_slice is %s' % row[1]) results = self._db_query(conn, QUERY_TABLE_RECORD) for row in results: gauge_tags = tags[:] gauge_tags.append('table:%s' % row[0]) stats.gauge('aws_redshift_status.table_records', row[1], tags=gauge_tags) logging.debug('aws_redshift_status.table_records is %s' % row[1]) results = self._db_query(conn, QUERY_TABLE_STATUS) for row in results: gauge_tags = tags[:] gauge_tags.append('table:%s' % row[0]) stats.gauge('aws_redshift_status.table_status.size', row[1], tags=gauge_tags) logging.debug('aws_redshift_status.table_status.size is %s' % row[1]) stats.gauge('aws_redshift_status.table_status.tbl_rows', row[2], tags=gauge_tags) logging.debug('aws_redshift_status.table_status.tbl_rows is %s' % row[2]) stats.gauge('aws_redshift_status.table_status.skew_rows', row[3], tags=gauge_tags) logging.debug('aws_redshift_status.table_status.skew_rows is %s' % row[3]) for q in [ 'select', 'insert', 'update', 'delete', 'analyze' ]: results = self._db_query(conn, QUERY_LOG_TYPE % (starttime, endtime, '%s %%' % q)) for row in results: stats.gauge('aws_redshift_status.query.%s' % q, row[0], tags=tags) logging.debug('aws_redshift_status.query.%s is %s' % (q, row[0])) running_time = time.time() - start stats.gauge('aws_redshift_status.response_time', running_time, tags=tags) logging.debug('aws_redshift_status.response_time is %s' % running_time) finally: if conn: conn.close() stats.flush() stop = stats.stop() logging.debug('Stopping is %s' % stop) except Exception: logging.warning(sys.exc_info())
def ingest_currentmetrics(): stats = ThreadStats() stats.start() counter = 0 mbta_perf_api_key = os.environ.get('MBTA_PERF_API_KEY') routes = ['red', 'orange', 'blue', 'green-B', 'green-C', 'green-D', 'green-E'] for route in routes: currentmetrics_url = 'http://realtime.mbta.com/developer/api/v2.1/currentmetrics?api_key={api_key}&format=json&route={route}'.format( route = route, api_key = mbta_perf_api_key, ) currentmetrics_response = requests.get(currentmetrics_url) print("Response code from perf API for {route}: {status}, size: {size}".format( route = route, status = currentmetrics_response.status_code, size = len(currentmetrics_response.text) )) if currentmetrics_response.status_code != 200: print("Error loading perf metrics: {error}".format( error = currentmetrics_response.text )) currentmetrics = json.loads(currentmetrics_response.content) # in the absence of data, assume good service, which means 100% of customers under all thresholds metrics = { 'threshold_id_01.metric_result_last_hour': { 'value': 1, 'tags': [ 'route:{}'.format(route), 'threshold_name:Headway', 'threshold_type:wait_time_headway_based', ], }, 'threshold_id_01.metric_result_current_day': { 'value': 1, 'tags': [ 'route:{}'.format(route), 'threshold_name:Headway', 'threshold_type:wait_time_headway_based', ], }, 'threshold_id_02.metric_result_last_hour': { 'value': 1, 'tags': [ 'route:{}'.format(route), 'threshold_name:Big Gap', 'threshold_type:wait_time_headway_based', ], }, 'threshold_id_02.metric_result_current_day': { 'value': 1, 'tags': [ 'route:{}'.format(route), 'threshold_name:Big Gap', 'threshold_type:wait_time_headway_based', ], }, 'threshold_id_03.metric_result_last_hour': { 'value': 1, 'tags': [ 'route:{}'.format(route), 'threshold_name:2X Headway', 'threshold_type:wait_time_headway_based', ], }, 'threshold_id_03.metric_result_current_day': { 'value': 1, 'tags': [ 'route:{}'.format(route), 'threshold_name:2X Headway', 'threshold_type:wait_time_headway_based', ], }, 'threshold_id_04.metric_result_last_hour': { 'value': 1, 'tags': [ 'route:{}'.format(route), 'threshold_name:delayed < 3 min.', 'threshold_type:travel_time', ], }, 'threshold_id_04.metric_result_current_day': { 'value': 1, 'tags': [ 'route:{}'.format(route), 'threshold_name:delayed < 3 min.', 'threshold_type:travel_time', ], }, 'threshold_id_05.metric_result_last_hour': { 'value': 1, 'tags': [ 'route:{}'.format(route), 'threshold_name:delayed < 6 min.', 'threshold_type:travel_time', ], }, 'threshold_id_05.metric_result_current_day': { 'value': 1, 'tags': [ 'route:{}'.format(route), 'threshold_name:delayed < 6 min.', 'threshold_type:travel_time', ], }, 'threshold_id_06.metric_result_last_hour': { 'value': 1, 'tags': [ 'route:{}'.format(route), 'threshold_name:delayed 10 min.', 'threshold_type:travel_time ', ], }, 'threshold_id_06.metric_result_current_day': { 'value': 1, 'tags': [ 'route:{}'.format(route), 'threshold_name:delayed 10 min.', 'threshold_type:travel_time ', ], }, } if route.startswith('green'): for key in metrics: metrics[key]['tags'].append('route:green') if 'current_metrics' in currentmetrics: for threshold in currentmetrics['current_metrics']: metric_last_hour = '{}.metric_result_last_hour'.format(threshold['threshold_id']) metric_current_day = '{}.metric_result_current_day'.format(threshold['threshold_id']) metrics[metric_last_hour]['value'] = threshold['metric_result_last_hour'] metrics[metric_current_day]['value'] = threshold['metric_result_current_day'] for metric_name, values in metrics.items(): stats.gauge('mbta.perf.{}'.format(metric_name), values['value'], tags=values['tags']) counter += 1 if counter % 50 == 0: print("Flushing currentmetrics {}...".format(counter)) stats.flush() print("Done") print("Flushing currentmetrics {}...".format(counter)) stats.flush() print("Done")
class OpenvpnMonitor(): def __init__(self, monitor_host, monitor_port, interval, datadog=True, elstic=False): self.host = monitor_host self.port = monitor_port self.interval = interval self.s = None self.datadog = datadog self.init_datadog() self.stats = ThreadStats() self.stats.start(flush_interval=interval, flush_in_thread=False) self.tags = ['server:{}'.format(os.uname()[1]), 'type:openvpn'] def connect(self): try: self.s = socket.create_connection((self.host, self.port), 2) except: print('Unable to connect') sys.exit() def init_datadog(self): options = { 'api_key': os.getenv('DD_API_KEY'), 'app_key': os.getenv('DD_APP_KEY') } initialize(**options) logging.basicConfig(level=logging.DEBUG) def flush_datadog(self): self.stats.flush() def disconnect(self): self.s.send('quit\n'.encode('ascii')) self.s.shutdown(socket.SHUT_RDWR) self.s.close() def get_loadstats(self): self.s.send('load-stats\n'.encode('ascii')) return self.get_data() def get_status(self): self.s.send('status 2\n'.encode('ascii')) return self.get_data() def get_version(self): self.s.send('version\n'.encode('ascii')) return self.get_data() def get_data(self): socket_list = [self.s] read_sockets, write_sockets, error_sockets = select.select( socket_list, [], []) for sock in read_sockets: data = sock.recv(65565) return data.decode('utf8') def parse_version(self, version, datadog=True, elastic=False): """OpenVPN Version: OpenVPN 2.4.3 x86_64-redhat-linux-gnu [Fedora EPEL patched] [SSL (OpenSSL)] [LZO] [LZ4] [EPOLL] [PKCS11] [MH/PKTINFO] [AEAD] built on Jun 21 2017 OpenVPN Version: OpenVPN 2.3.14 x86_64-alpine-linux-musl [SSL (OpenSSL)] [LZO] [EPOLL] [MH] [IPv6] built on Dec 18 2016""" ver = version.split(" ") tags = ["version:{}_{}".format(ver[2], ver[3])] self.tags += tags def parse_loadstats(self, loadstats, datadog=True, elastic=False): pattern = re.compile( r"SUCCESS:.*nclients=(?P<nclients>\d*),bytesin=(?P<bytesin>\d*),bytesout=(?P<bytesout>\d*).*" ) for line in loadstats.splitlines(): o_stats = pattern.match(line) if o_stats: if self.datadog: self.stats.gauge('openvpn.nclients', int(o_stats.group('nclients')), tags=self.tags) self.stats.gauge('openvpn.bytesin', int(o_stats.group('bytesin')), tags=self.tags) self.stats.gauge('openvpn.bytesout', int(o_stats.group('bytesout')), tags=self.tags) def parse_status(self, status): """HEADER,CLIENT_LIST,Common Name,Real Address,Virtual Address,Bytes Received,Bytes Sent,Connected Since,Connected Since (time_t),Username HEADER,CLIENT_LIST,Common Name,Real Address,Virtual Address,Virtual IPv6 Address,Bytes Received,Bytes Sent,Connected Since,Connected Since (time_t),Username,Client ID,Peer ID CLIENT_LIST,globbi,192.168.1.112:56513,10.8.0.18,,2735402,5955826,Sun Oct 1 20:15:18 2017,1506888918,jakobant,36,1""" COMMONNAME = 1 REAL_ADDR = 2 VIRT_ADDR = 3 BYTESIN = 5 # 4 BYTESOUT = 6 # 5 USERNAME = 9 # 8 CONN_SINCET = 8 # 7 for line in status.splitlines(): if line.startswith('CLIENT_LIST'): o_stats = line.split(',') if len(o_stats) < 10: BYTESIN = 4 # 4 BYTESOUT = 5 # 5 USERNAME = 8 # 8 CONN_SINCET = 7 # 7 if self.datadog: tags = [ 'commonname:{}'.format( o_stats[COMMONNAME]), 'real_addr:{}'.format( o_stats[REAL_ADDR].split(":")[0]), 'virt_addr:{}'.format(o_stats[VIRT_ADDR]), 'username:{}'.format(o_stats[USERNAME]) ] + self.tags connected_time = int(time.time()) - int( o_stats[CONN_SINCET]) self.stats.gauge('openvpn.client.bytesin', int(o_stats[BYTESIN]), tags=tags) self.stats.gauge('openvpn.client.bytesout', int(o_stats[BYTESOUT]), tags=tags) self.stats.gauge('openvpn.client.conntime', connected_time, tags=tags) def tail_log(self, logfile): """Fri Sep 29 21:29:59 2017 192.168.1.112:62493 TLS: Username/Password authentication succeeded for username 'jakobant' Fri Sep 29 21:31:57 2017 192.168.1.112:62787 VERIFY OK: depth=1, C=IS, ST=Rkv, L=Reykjavik, O=Heima, OU=Ops, CN=Heima CA, name=EasyRSA, [email protected] Fri Sep 29 21:31:57 2017 192.168.1.112:62787 VERIFY OK: depth=0, C=IS, ST=Rkv, L=Reykjavik, O=Heima, OU=Ops, CN=globbi, name=EasyRSA, [email protected] AUTH-PAM: BACKGROUND: user 'jakobant' failed to authenticate: Authentication failure""" login = re.compile(r".*authentication succeeded.*") faild_login = re.compile( r".*(failed to authenticate|Incorrect password|was not found).*") for line in Pygtail(logfile): match = login.match(line) if match: print(line) self.stats.event('Login success', line, alert_type='success', tags=self.tags) match = faild_login.match(line) if match: print(line) self.stats.event('Authentication failure', line, alert_type='error', tags=self.tags)
def main(): start = time.time() parser = argparse.ArgumentParser() parser.add_argument("--artifacts-dir", required=True) parser.add_argument("--sha1-signing-cert", required=True) parser.add_argument("--sha384-signing-cert", required=True) parser.add_argument("--task-definition", required=True, type=argparse.FileType('r')) parser.add_argument("--filename-template", default=DEFAULT_FILENAME_TEMPLATE) parser.add_argument("--no-freshclam", action="store_true", default=False, help="Do not refresh ClamAV DB") parser.add_argument("-q", "--quiet", dest="log_level", action="store_const", const=logging.WARNING, default=logging.DEBUG) args = parser.parse_args() logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") log.setLevel(args.log_level) task = json.load(args.task_definition) # TODO: verify task["extra"]["funsize"]["partials"] with jsonschema signing_certs = { 'sha1': open(args.sha1_signing_cert, 'rb').read(), 'sha384': open(args.sha384_signing_cert, 'rb').read(), } assert (get_keysize(signing_certs['sha1']) == 2048) assert (get_keysize(signing_certs['sha384']) == 4096) # Intended for local testing. dd_api_key = os.environ.get('DATADOG_API_KEY') # Intended for Taskcluster. if not dd_api_key and os.environ.get('DATADOG_API_SECRET'): dd_api_key = get_secret( os.environ.get('DATADOG_API_SECRET')).get('key') # Create this even when not sending metrics, so the context manager # statements work. ddstats = ThreadStats(namespace='releng.releases.partials') if dd_api_key: dd_options = { 'api_key': dd_api_key, } log.info("Starting metric collection") initialize(**dd_options) ddstats.start(flush_interval=1) else: log.info("No metric collection") if args.no_freshclam: log.info("Skipping freshclam") else: log.info("Refreshing clamav db...") try: redo.retry(lambda: sh.freshclam( "--stdout", "--verbose", _timeout=300, _err_to_out=True)) log.info("Done.") except sh.ErrorReturnCode: log.warning("Freshclam failed, skipping DB update") manifest = [] for e in task["extra"]["funsize"]["partials"]: for mar in (e["from_mar"], e["to_mar"]): verify_allowed_url(mar) work_env = WorkEnv() # TODO: run setup once work_env.setup() complete_mars = {} use_old_format = False for mar_type, f in (("from", e["from_mar"]), ("to", e["to_mar"])): dest = os.path.join(work_env.workdir, "{}.mar".format(mar_type)) unpack_dir = os.path.join(work_env.workdir, mar_type) with ddstats.timer('mar.download.time'): download(f, dest) if not os.getenv("MOZ_DISABLE_MAR_CERT_VERIFICATION"): verify_signature(dest, signing_certs) complete_mars["%s_size" % mar_type] = os.path.getsize(dest) complete_mars["%s_hash" % mar_type] = get_hash(dest) with ddstats.timer('mar.unpack.time'): unpack(work_env, dest, unpack_dir) if mar_type == 'from': version = get_option(unpack_dir, filename="application.ini", section="App", option="Version") major = int(version.split(".")[0]) # The updater for versions less than 56.0 requires BZ2 # compressed MAR files if major < 56: use_old_format = True log.info("Forcing BZ2 compression for %s", f) log.info("AV-scanning %s ...", unpack_dir) metric_tags = [ "platform:{}".format(e['platform']), ] with ddstats.timer('mar.clamscan.time', tags=metric_tags): sh.clamscan("-r", unpack_dir, _timeout=600, _err_to_out=True) log.info("Done.") path = os.path.join(work_env.workdir, "to") from_path = os.path.join(work_env.workdir, "from") mar_data = { "ACCEPTED_MAR_CHANNEL_IDS": get_option(path, filename="update-settings.ini", section="Settings", option="ACCEPTED_MAR_CHANNEL_IDS"), "version": get_option(path, filename="application.ini", section="App", option="Version"), "to_buildid": get_option(path, filename="application.ini", section="App", option="BuildID"), "from_buildid": get_option(from_path, filename="application.ini", section="App", option="BuildID"), "appName": get_option(from_path, filename="application.ini", section="App", option="Name"), # Use Gecko repo and rev from platform.ini, not application.ini "repo": get_option(path, filename="platform.ini", section="Build", option="SourceRepository"), "revision": get_option(path, filename="platform.ini", section="Build", option="SourceStamp"), "from_mar": e["from_mar"], "to_mar": e["to_mar"], "platform": e["platform"], "locale": e["locale"], } # Override ACCEPTED_MAR_CHANNEL_IDS if needed if "ACCEPTED_MAR_CHANNEL_IDS" in os.environ: mar_data["ACCEPTED_MAR_CHANNEL_IDS"] = os.environ[ "ACCEPTED_MAR_CHANNEL_IDS"] for field in ("update_number", "previousVersion", "previousBuildNumber", "toVersion", "toBuildNumber"): if field in e: mar_data[field] = e[field] mar_data.update(complete_mars) # if branch not set explicitly use repo-name mar_data["branch"] = e.get("branch", mar_data["repo"].rstrip("/").split("/")[-1]) if 'dest_mar' in e: mar_name = e['dest_mar'] else: # default to formatted name if not specified mar_name = args.filename_template.format(**mar_data) mar_data["mar"] = mar_name dest_mar = os.path.join(work_env.workdir, mar_name) # TODO: download these once work_env.download_buildsystem_bits(repo=mar_data["repo"], revision=mar_data["revision"]) metric_tags = [ "branch:{}".format(mar_data['branch']), "platform:{}".format(mar_data['platform']), # If required. Shouldn't add much useful info, but increases # cardinality of metrics substantially, so avoided. # "locale:{}".format(mar_data['locale']), ] with ddstats.timer('generate_partial.time', tags=metric_tags): generate_partial(work_env, from_path, path, dest_mar, mar_data["ACCEPTED_MAR_CHANNEL_IDS"], mar_data["version"], use_old_format) mar_data["size"] = os.path.getsize(dest_mar) metric_tags.append("unit:bytes") # Allows us to find out how many releases there were between the two, # making buckets of the file sizes easier. metric_tags.append("update_number:{}".format( mar_data.get('update_number', 0))) ddstats.gauge('partial_mar_size', mar_data['size'], tags=metric_tags) mar_data["hash"] = get_hash(dest_mar) shutil.copy(dest_mar, args.artifacts_dir) work_env.cleanup() manifest.append(mar_data) manifest_file = os.path.join(args.artifacts_dir, "manifest.json") with open(manifest_file, "w") as fp: json.dump(manifest, fp, indent=2, sort_keys=True) # Warning: Assumption that one partials task will always be for one branch. metric_tags = [ "branch:{}".format(mar_data['branch']), ] ddstats.timing('task_duration', time.time() - start, start, tags=metric_tags) # Wait for all the metrics to flush. If the program ends before # they've been sent, they'll be dropped. # Should be more than the flush_interval for the ThreadStats object time.sleep(10)
def ingest_trip_updates(): stats = ThreadStats() stats.start() counter = 0 trip_feed = gtfs_realtime_pb2.FeedMessage() trip_response = requests.get('https://cdn.mbta.com/realtime/TripUpdates.pb') trip_feed.ParseFromString(trip_response.content) trip_feed_ts = trip_feed.header.timestamp for entity in trip_feed.entity: if entity.HasField('trip_update'): trip_update = entity.trip_update if trip_update.trip.route_id not in enabled_routes: continue route_name = trip_update.trip.route_id if trip_update.trip.route_id in route_names: route_name = route_names[trip_update.trip.route_id] last_stop_id = trip_update.stop_time_update[len(trip_update.stop_time_update) - 1].stop_id destination = stop_names[last_stop_id] trip_id = trip_update.trip.trip_id vehicle = trip_update.vehicle.label for stop in trip_update.stop_time_update: stop_name = stop_names[stop.stop_id] if stop.departure.time > 0: if stop.arrival.time > 0: # mid-route stop, use arrival time time = stop.arrival.time else: # first stop, use departure time time = stop.departure.time else: # last stop, ignore continue arrives_in = (time - trip_feed_ts) catchable_tag = 'catchable:false' if arrives_in > 120: catchable_tag = 'catchable:true' tags = [ 'trip_id:{}'.format(trip_id), 'stop:{}'.format(stop_name), 'destination:{}'.format(destination), 'vehicle:{}'.format(vehicle), 'route:{}'.format(route_name), catchable_tag, ] if route_name.startswith('Green'): tags.append('route:green') stats.gauge('mbta.trip.arrival_secs', arrives_in, tags=tags) stats.gauge('mbta.trip.arrival_min', arrives_in / 60, tags=tags) counter += 1 if counter % 50 == 0: print("Flushing trip updates {}...".format(counter)) stats.flush() print("Done") print("Flushing trip updates {}...".format(counter)) stats.flush() print("Done")