-
Notifications
You must be signed in to change notification settings - Fork 9
/
ic2datadog.py
executable file
·80 lines (66 loc) · 3.48 KB
/
ic2datadog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python
__author__ = 'ben.slater@instaclustr.com'
from datadog import initialize
from time import sleep
from datadog import statsd
import requests, json
from requests.auth import HTTPBasicAuth
import os
configFile = os.path.dirname(os.path.realpath(__file__)) + "/configuration.json"
f = open(configFile)
configuration = json.loads(f.read())
f.close()
dd_options = configuration['dd_options']
initialize(**dd_options)
auth_details = HTTPBasicAuth(username=configuration['ic_options']['user_name'], password=configuration['ic_options']['api_key'])
consecutive_fails = 0
while True:
response = requests.get(url="https://api.instaclustr.com/monitoring/v1/clusters/{0}?metrics={1},".format(configuration['cluster_id'], configuration['metrics_list']), auth=auth_details)
if not response.ok:
# got an error response from the Instaclustr API - raise an alert in DataDog after 3 consecutive fails
consecutive_fails += 1
print("Error retrieving metrics from Instaclustr API: {0} - {1}".format(response.status_code, response.content))
if consecutive_fails > 3:
statsd.event("Instaclustr monitoring API error", "Error code is: {0}".format(response.status_code))
sleep(20)
continue
consecutive_fails = 0
metrics = json.loads(response.content)
for node in metrics:
public_ip = node["publicIp"]
private_ip = node["privateIp"]
rack_name = node["rack"]["name"]
data_centre_custom_name = node["rack"]["dataCentre"]["customDCName"]
data_centre_name = node["rack"]["dataCentre"]["name"]
data_centre_provider = node["rack"]["dataCentre"]["provider"]
provider_account_name = node["rack"]["providerAccount"]["name"]
provider_account_provider = node["rack"]["providerAccount"]["provider"]
tag_list = ['ic_public_ip:' + public_ip,
'ic_private_ip:' + private_ip,
'ic_rack_name:' + rack_name,
'ic_data_centre_custom_name:' + data_centre_custom_name,
'ic_data_centre_name:' + data_centre_name,
'ic_data_centre_provider:' + data_centre_provider,
'ic_provider_account_name:' + provider_account_name,
'ic_provider_account_provider:' + provider_account_provider
]
if data_centre_provider == 'AWS_VPC':
tag_list = tag_list + [
'region:' + node["rack"]["dataCentre"]["name"].lower().replace("_", "-"),
'availability_zone:' + node["rack"]["name"]
]
for metric in node["payload"]:
dd_metric_name = 'instaclustr.{0}'.format(metric["metric"])
if len(metric["values"]) > 0:
if metric["metric"] == "nodeStatus":
# node status metric maps to a data dog service check
if metric["values"][0]["value"] =="WARN":
statsd.service_check(dd_metric_name, 1, tags=configuration['tags'] + tag_list) # WARN status
else:
statsd.service_check(dd_metric_name, 0, tags=configuration['tags'] + tag_list) # OK status
else:
# all other metrics map to a data dog guage
statsd.gauge(dd_metric_name, metric["values"][0]["value"], tags=configuration['tags'] + tag_list)
else:
print("Metric {0} of type '{1}' returned no value.".format(dd_metric_name,metric["type"]))
sleep(20)