/
monitor.py
92 lines (78 loc) · 2.88 KB
/
monitor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import logging.handlers
from allocations import allocations
from health import health
from shards import shards
from indices import indices
from mailer import mail
from master import get_master
from nodes import nodes
from os import mkdir
from url_checker import url_checker
try:
mkdir("logs")
except OSError:
pass
LOG_FILENAME = "logs/es_monitor.log"
LOG_MSG_FORMAT = "%(asctime)s %(name)-12s %(levelname)-8s %(message)s"
LOG_DATE_FORMAT = "%d-%m-%Y %H:%M:%S"
# Setup default stream logger
logging.basicConfig(
level=logging.INFO, format=LOG_MSG_FORMAT, datefmt=LOG_DATE_FORMAT
)
# Setup rotating log file handler
rotating_file_handler = logging.handlers.RotatingFileHandler(
LOG_FILENAME, maxBytes=1e7, backupCount=20
)
rotating_file_handler.setFormatter(
logging.Formatter(fmt=LOG_MSG_FORMAT, datefmt=LOG_DATE_FORMAT)
)
logging.root.addHandler(rotating_file_handler)
try:
import simplejson as json
except ImportError:
import json
with open("clusters.json") as f:
logging.info("")
logging.info("The Watch Dog wakes up")
for cluster, config in json.load(f).iteritems():
result = []
logging.info("")
logging.info("Begin processing Cluster [%s]", cluster)
if not config.get("enabled", True):
logging.info("Cluster [%s] is disabled", cluster)
logging.info("End processing Cluster [%s]", cluster)
continue
try:
master = get_master(cluster, config)
master_host = master["host"]
connection = master["connection"]
logging.info(
"Cluster [%s] has a valid master [%s] in the config", cluster, master_host
)
except Exception as e:
logging.error(
"Cluster [%s] does not have a valid master in the config", cluster
)
logging.info("End processing Cluster [%s]", cluster)
result.append(
{
"title": "No valid elasticsearch instance in the configuration",
"severity": "FATAL",
"body": "<br />".join(config["eshosts"].split(","))
}
)
mail(cluster, result)
continue
cluster_health = health(connection, config)
result.append(cluster_health)
# These metrics are not alarming if the cluster health is good
result.append(indices(connection, config, cluster_health["severity"]))
result.append(shards(connection, config, cluster_health["severity"]))
# These metrics are alarming irrespective of the cluster health
result.append(allocations(connection, config))
result.append(nodes(cluster, connection, config))
mail(cluster, result)
logging.info("End processing Cluster [%s]", cluster)
mail("URLs Checker", [url_checker()])
logging.info("")
logging.info("The Watch Dog rests")