예제 #1
0
파일: monitor.py 프로젝트: Stamped/Stamped
    def __init__(self, options=None):
        self.handler = StampedNotificationHandler()
        self.status = {}
        self._info = None

        if options is not None:
            self.options = options
        else:
            self.options = utils.AttributeDict({"stack": None, "time": 10, "noop": False, "verbose": False})
예제 #2
0
파일: monitor.py 프로젝트: Stamped/Stamped
class Monitor(object):
    def __init__(self, options=None):
        self.handler = StampedNotificationHandler()
        self.status = {}
        self._info = None

        if options is not None:
            self.options = options
        else:
            self.options = utils.AttributeDict({"stack": None, "time": 10, "noop": False, "verbose": False})

    def update(self, force=False):
        if self._info is None or force:
            self._info = libs.ec2_utils.get_stack(stack=self.options.stack)

            if self._info is None:
                utils.log("error retrieving stack data from AWS")

    def run(self):
        while True:
            self.update(force=True)
            self.ping()

        time.sleep(self.options.time)

    def ping(self):
        self.update()

        if self.options.stack:
            mich = None
        else:
            mich = self._info.instance.instance_id

        for node in self._info.nodes:
            node_status = 1

            # don't try to monitor myself
            if node.instance_id == mich:
                continue

            try:
                utils.logRaw("pinging node '%s.%s'..." % (node.stack, node.name), True)

                if "apiServer" in node.roles:
                    self._try_ping_apiServer(node)

                if "webServer" in node.roles:
                    self._try_ping_webServer(node)

                if "db" in node.roles:
                    self._try_ping_db(node)

                utils.logRaw("success!\n")
            except Exception, e:
                node_status = -1
                unexpected = False
                detail = None

                if isinstance(e, MonitorException):
                    logs.error("monitor error: %s" % e)
                    utils.log("monitor error: %s" % e)
                    detail = e.detail
                else:
                    logs.error("unexpected error: %s" % e)
                    utils.log("unexpected error: %s" % e)
                    unexpected = True

                # only send a notification if this node's status has changed since
                # the last time we checked, so we don't get duplicate notifications
                # related to the same incident.
                try:
                    notify = -1 != self.status[node.instance_id]
                except KeyError:
                    notify = False

                if notify and not self.options.noop:
                    subject = "%s.%s error" % (node.stack, node.name)
                    message = str(e)

                    if unexpected or e.email:
                        if detail is not None:
                            message = "%s\n\n--- detail ---\n\n%s" % (message, detail)

                        self.handler.email(subject, message)

                    if unexpected or e.sms:
                        self.handler.sms(subject, message)

            self.status[node.instance_id] = node_status