Beispiel #1
0
 def __init__(self):
     self.is_running = False
     self.hostname = socket.gethostname()
     self.logger = Log("saas_mon", config=conf)
     self.recover_thres = conf.SAAS_RECOVER_THRESHOLD or (30 * 5)
     self.bad_thres = conf.SAAS_BAD_THRESHOLD or 5
     self.alarm_q = JobQueue(self.logger)
     self.emailalarm = EmailAlarm(self.logger)
     self.last_state = True
Beispiel #2
0
 def __init__(self):
     self.is_running = False
     self.linkage_dict = dict()
     self.logger = Log("icmp_mon", config=config)
     self.alarm_q = JobQueue(self.logger)
     self.emailalarm = EmailAlarm(Log("alarm", config=config))
     self.logger_links = Log("links", config=config)
     if 'log_length_per_link' in dir(config):
         self.log_length_per_link = config.log_length_per_link
     else:
         self.log_length_per_link = 128
     if 'links' not in dir(config):
         self.logger.error("no 'links' in config")
         return
     g_alarm_levels = None
     g_recover = None
     if 'alarm_levels' in dir(config):
         g_alarm_levels = self._parse_alarm_levels(config.alarm_levels)
     if 'recover' in dir(config):
         g_recover = int(config.recover)
     links = config.links
     if isinstance(links, dict):
         for ip, v in links.iteritems():
             if not isinstance(v, dict):
                 v = dict()
             ttl = v.get('ttl')
             if ttl >= 0:
                 pass
             else:
                 ttl = 0
             alarm_levels = v.get('alarm_levels')
             if not alarm_levels and g_alarm_levels:
                 alarm_levels = g_alarm_levels
             elif alarm_levels:
                 alarm_levels = self._parse_alarm_levels(alarm_levels)
                 if not alarm_levels:
                     continue
             else:
                 self.logger.error(
                     "config: %s, missing alarm_levels value" % (ip))
                 continue
             recover = v.get('recover')
             if recover:
                 recover = int(recover)
             elif not recover and g_recover:
                 recover = g_recover
             else:
                 self.logger.error("config: %s, missing recover value" %
                                   (ip))
                 continue
             self.linkage_dict[ip] = Linkage(ip, alarm_levels, recover)
     self.logger.info("%d link loaded from config" %
                      (len(self.linkage_dict.keys())))
Beispiel #3
0
 def __init__(self):
     self.is_running = False
     self.hostname = socket.gethostname()
     self.logger = Log("saas_mon", config=conf)
     self.recover_thres = conf.SAAS_RECOVER_THRESHOLD or(30 * 5)
     self.bad_thres = conf.SAAS_BAD_THRESHOLD or 5
     self.alarm_q = JobQueue(self.logger)
     self.emailalarm = EmailAlarm(self.logger)
     self.last_state = True
 def __init__(self):
     self.is_running = False
     self.linkage_dict = dict()
     self.logger = Log("icmp_mon", config=config)
     self.alarm_q = JobQueue(self.logger)
     self.emailalarm = EmailAlarm(Log("alarm", config=config))
     self.logger_links = Log("links", config=config)
     if 'log_length_per_link' in dir(config):
         self.log_length_per_link = config.log_length_per_link
     else:
         self.log_length_per_link = 128
     if 'links' not in dir(config):
         self.logger.error("no 'links' in config")
         return
     g_alarm_levels = None
     g_recover = None
     if 'alarm_levels' in dir(config):
         g_alarm_levels = self._parse_alarm_levels(config.alarm_levels)
     if 'recover' in dir(config):
         g_recover = int(config.recover)
     links = config.links
     if isinstance(links, dict):
         for ip, v in links.iteritems():
             if not isinstance(v, dict):
                 v = dict()
             ttl = v.get('ttl')
             if ttl >= 0:
                 pass
             else:
                 ttl = 0
             alarm_levels = v.get('alarm_levels')
             if not alarm_levels and g_alarm_levels:
                 alarm_levels = g_alarm_levels
             elif alarm_levels:
                 alarm_levels = self._parse_alarm_levels(alarm_levels)
                 if not alarm_levels:
                     continue
             else:
                 self.logger.error(
                     "config: %s, missing alarm_levels value" % (ip))
                 continue
             recover = v.get('recover')
             if recover:
                 recover = int(recover)
             elif not recover and g_recover:
                 recover = g_recover
             else:
                 self.logger.error(
                     "config: %s, missing recover value" % (ip))
                 continue
             self.linkage_dict[ip] = Linkage(ip, alarm_levels, recover)
     self.logger.info("%d link loaded from config" %
                      (len(self.linkage_dict.keys())))
Beispiel #5
0
class SaasMonitor(object):

    def __init__(self):
        self.is_running = False
        self.hostname = socket.gethostname()
        self.logger = Log("saas_mon", config=conf)
        self.recover_thres = conf.SAAS_RECOVER_THRESHOLD or(30 * 5)
        self.bad_thres = conf.SAAS_BAD_THRESHOLD or 5
        self.alarm_q = JobQueue(self.logger)
        self.emailalarm = EmailAlarm(self.logger)
        self.last_state = True

    def start(self):
        if self.is_running:
            return
        self.is_running = True
        self.alarm_q.start_worker(1)
        self.logger.info("started")

    def stop(self):
        if not self.is_running:
            return
        self.is_running = False
        self.alarm_q.stop()

    def check(self):
        vps = None
        try:
            rpc = SAAS_Client(conf.HOST_ID, self.logger)
            rpc.connect()
            try:
                _id = rpc.todo(CMD.MONITOR)
            finally:
                rpc.close()
            self.logger.info("ok")
            return True
        except Exception, e:
            self.logger.exception(e)
            return False
Beispiel #6
0
class SaasMonitor(object):
    def __init__(self):
        self.is_running = False
        self.hostname = socket.gethostname()
        self.logger = Log("saas_mon", config=conf)
        self.recover_thres = conf.SAAS_RECOVER_THRESHOLD or (30 * 5)
        self.bad_thres = conf.SAAS_BAD_THRESHOLD or 5
        self.alarm_q = JobQueue(self.logger)
        self.emailalarm = EmailAlarm(self.logger)
        self.last_state = True

    def start(self):
        if self.is_running:
            return
        self.is_running = True
        self.alarm_q.start_worker(1)
        self.logger.info("started")

    def stop(self):
        if not self.is_running:
            return
        self.is_running = False
        self.alarm_q.stop()

    def check(self):
        vps = None
        try:
            rpc = SAAS_Client(conf.HOST_ID, self.logger)
            rpc.connect()
            try:
                _id = rpc.todo(CMD.MONITOR)
            finally:
                rpc.close()
            self.logger.info("ok")
            return True
        except Exception, e:
            self.logger.exception(e)
            return False
class ICMPMonitor (object):

    def __init__(self):
        self.is_running = False
        self.linkage_dict = dict()
        self.logger = Log("icmp_mon", config=config)
        self.alarm_q = JobQueue(self.logger)
        self.emailalarm = EmailAlarm(Log("alarm", config=config))
        self.logger_links = Log("links", config=config)
        if 'log_length_per_link' in dir(config):
            self.log_length_per_link = config.log_length_per_link
        else:
            self.log_length_per_link = 128
        if 'links' not in dir(config):
            self.logger.error("no 'links' in config")
            return
        g_alarm_levels = None
        g_recover = None
        if 'alarm_levels' in dir(config):
            g_alarm_levels = self._parse_alarm_levels(config.alarm_levels)
        if 'recover' in dir(config):
            g_recover = int(config.recover)
        links = config.links
        if isinstance(links, dict):
            for ip, v in links.iteritems():
                if not isinstance(v, dict):
                    v = dict()
                ttl = v.get('ttl')
                if ttl >= 0:
                    pass
                else:
                    ttl = 0
                alarm_levels = v.get('alarm_levels')
                if not alarm_levels and g_alarm_levels:
                    alarm_levels = g_alarm_levels
                elif alarm_levels:
                    alarm_levels = self._parse_alarm_levels(alarm_levels)
                    if not alarm_levels:
                        continue
                else:
                    self.logger.error(
                        "config: %s, missing alarm_levels value" % (ip))
                    continue
                recover = v.get('recover')
                if recover:
                    recover = int(recover)
                elif not recover and g_recover:
                    recover = g_recover
                else:
                    self.logger.error(
                        "config: %s, missing recover value" % (ip))
                    continue
                self.linkage_dict[ip] = Linkage(ip, alarm_levels, recover)
        self.logger.info("%d link loaded from config" %
                         (len(self.linkage_dict.keys())))

    def _parse_alarm_levels(self, alarm_levels, ip=""):
        if not isinstance(alarm_levels, (tuple, list)):
            self.logger.error("config: %s, alarm_levels is not a list" % (ip))
            return
        _alarm_levels = filter(lambda x: isinstance(x, int), alarm_levels)
        if len(_alarm_levels) != len(alarm_levels):
            self.logger.error(
                "config: %s, elements in alarm_levels must be integers" % (ip))
            return
        return _alarm_levels

    def start(self):
        if self.is_running:
            return
        self.is_running = True
        self.alarm_q.start_worker(1)
        self.logger.info("started")

    def stop(self):
        if not self.is_running:
            return
        self.is_running = False
        self.alarm_q.stop()
        self.logger.info("stopped")

    def _alarm_enqueue(self, link):
        t = "%Y-%m-%d %H:%M:%S"
        ts = "[%s]" % (time.strftime(t, time.localtime()))
        job = AlarmJob(
            self.emailalarm, ts + link.alarm_text(), link.details())
        self.alarm_q.put_job(job)

    def loop(self):
        ips = self.linkage_dict.keys()
        fping = FPing(ips)
        while self.is_running:
            start_time = time.time()
            recv_dict, error_dict = fping.ping(1)
            for ip, rtt in recv_dict.iteritems():
                link = self.linkage_dict[ip]
                res = link.new_state(True, rtt)
                if res:
                    self._alarm_enqueue(link)
                print ip, "ok", rtt
                if len(link.bitmap) == self.log_length_per_link:
                    self.logger_links.info(link.details())
                    link.reset_bitmap()
            for ip, err in error_dict.iteritems():
                link = self.linkage_dict[ip]
                res = link.new_state(False, 0)
                if res is False:
                    self._alarm_enqueue(link)
                print ip, "err", link.bitmap
                if len(link.bitmap) == self.log_length_per_link:
                    self.logger_links.info(link.details())
                    link.reset_bitmap()

            end_time = time.time()
            if end_time < start_time + 1:
                time.sleep(1 - end_time + start_time)
Beispiel #8
0
class ICMPMonitor(object):
    def __init__(self):
        self.is_running = False
        self.linkage_dict = dict()
        self.logger = Log("icmp_mon", config=config)
        self.alarm_q = JobQueue(self.logger)
        self.emailalarm = EmailAlarm(Log("alarm", config=config))
        self.logger_links = Log("links", config=config)
        if 'log_length_per_link' in dir(config):
            self.log_length_per_link = config.log_length_per_link
        else:
            self.log_length_per_link = 128
        if 'links' not in dir(config):
            self.logger.error("no 'links' in config")
            return
        g_alarm_levels = None
        g_recover = None
        if 'alarm_levels' in dir(config):
            g_alarm_levels = self._parse_alarm_levels(config.alarm_levels)
        if 'recover' in dir(config):
            g_recover = int(config.recover)
        links = config.links
        if isinstance(links, dict):
            for ip, v in links.iteritems():
                if not isinstance(v, dict):
                    v = dict()
                ttl = v.get('ttl')
                if ttl >= 0:
                    pass
                else:
                    ttl = 0
                alarm_levels = v.get('alarm_levels')
                if not alarm_levels and g_alarm_levels:
                    alarm_levels = g_alarm_levels
                elif alarm_levels:
                    alarm_levels = self._parse_alarm_levels(alarm_levels)
                    if not alarm_levels:
                        continue
                else:
                    self.logger.error(
                        "config: %s, missing alarm_levels value" % (ip))
                    continue
                recover = v.get('recover')
                if recover:
                    recover = int(recover)
                elif not recover and g_recover:
                    recover = g_recover
                else:
                    self.logger.error("config: %s, missing recover value" %
                                      (ip))
                    continue
                self.linkage_dict[ip] = Linkage(ip, alarm_levels, recover)
        self.logger.info("%d link loaded from config" %
                         (len(self.linkage_dict.keys())))

    def _parse_alarm_levels(self, alarm_levels, ip=""):
        if not isinstance(alarm_levels, (tuple, list)):
            self.logger.error("config: %s, alarm_levels is not a list" % (ip))
            return
        _alarm_levels = filter(lambda x: isinstance(x, int), alarm_levels)
        if len(_alarm_levels) != len(alarm_levels):
            self.logger.error(
                "config: %s, elements in alarm_levels must be integers" % (ip))
            return
        return _alarm_levels

    def start(self):
        if self.is_running:
            return
        self.is_running = True
        self.alarm_q.start_worker(1)
        self.logger.info("started")

    def stop(self):
        if not self.is_running:
            return
        self.is_running = False
        self.alarm_q.stop()
        self.logger.info("stopped")

    def _alarm_enqueue(self, link):
        t = "%Y-%m-%d %H:%M:%S"
        ts = "[%s]" % (time.strftime(t, time.localtime()))
        job = AlarmJob(self.emailalarm, ts + link.alarm_text(), link.details())
        self.alarm_q.put_job(job)

    def loop(self):
        ips = self.linkage_dict.keys()
        fping = FPing(ips)
        while self.is_running:
            start_time = time.time()
            recv_dict, error_dict = fping.ping(1)
            for ip, rtt in recv_dict.iteritems():
                link = self.linkage_dict[ip]
                res = link.new_state(True, rtt)
                if res:
                    self._alarm_enqueue(link)
                print ip, "ok", rtt
                if len(link.bitmap) == self.log_length_per_link:
                    self.logger_links.info(link.details())
                    link.reset_bitmap()
            for ip, err in error_dict.iteritems():
                link = self.linkage_dict[ip]
                res = link.new_state(False, 0)
                if res is False:
                    self._alarm_enqueue(link)
                if err != "timeout":
                    self.logger.error("ip %s error %s" % (ip, err))
                print ip, "err", link.bitmap
                if len(link.bitmap) == self.log_length_per_link:
                    self.logger_links.info(link.details())
                    link.reset_bitmap()

            end_time = time.time()
            if end_time < start_time + 1:
                time.sleep(1 - end_time + start_time)