def __init__(self): self.is_running = False self.hostname = socket.gethostname() self.logger = Log("saas_mon", config=conf) self.recover_thres = conf.SAAS_RECOVER_THRESHOLD or (30 * 5) self.bad_thres = conf.SAAS_BAD_THRESHOLD or 5 self.alarm_q = JobQueue(self.logger) self.emailalarm = EmailAlarm(self.logger) self.last_state = True
def __init__(self): self.is_running = False self.linkage_dict = dict() self.logger = Log("icmp_mon", config=config) self.alarm_q = JobQueue(self.logger) self.emailalarm = EmailAlarm(Log("alarm", config=config)) self.logger_links = Log("links", config=config) if 'log_length_per_link' in dir(config): self.log_length_per_link = config.log_length_per_link else: self.log_length_per_link = 128 if 'links' not in dir(config): self.logger.error("no 'links' in config") return g_alarm_levels = None g_recover = None if 'alarm_levels' in dir(config): g_alarm_levels = self._parse_alarm_levels(config.alarm_levels) if 'recover' in dir(config): g_recover = int(config.recover) links = config.links if isinstance(links, dict): for ip, v in links.iteritems(): if not isinstance(v, dict): v = dict() ttl = v.get('ttl') if ttl >= 0: pass else: ttl = 0 alarm_levels = v.get('alarm_levels') if not alarm_levels and g_alarm_levels: alarm_levels = g_alarm_levels elif alarm_levels: alarm_levels = self._parse_alarm_levels(alarm_levels) if not alarm_levels: continue else: self.logger.error( "config: %s, missing alarm_levels value" % (ip)) continue recover = v.get('recover') if recover: recover = int(recover) elif not recover and g_recover: recover = g_recover else: self.logger.error("config: %s, missing recover value" % (ip)) continue self.linkage_dict[ip] = Linkage(ip, alarm_levels, recover) self.logger.info("%d link loaded from config" % (len(self.linkage_dict.keys())))
def __init__(self): self.is_running = False self.hostname = socket.gethostname() self.logger = Log("saas_mon", config=conf) self.recover_thres = conf.SAAS_RECOVER_THRESHOLD or(30 * 5) self.bad_thres = conf.SAAS_BAD_THRESHOLD or 5 self.alarm_q = JobQueue(self.logger) self.emailalarm = EmailAlarm(self.logger) self.last_state = True
def __init__(self): self.is_running = False self.linkage_dict = dict() self.logger = Log("icmp_mon", config=config) self.alarm_q = JobQueue(self.logger) self.emailalarm = EmailAlarm(Log("alarm", config=config)) self.logger_links = Log("links", config=config) if 'log_length_per_link' in dir(config): self.log_length_per_link = config.log_length_per_link else: self.log_length_per_link = 128 if 'links' not in dir(config): self.logger.error("no 'links' in config") return g_alarm_levels = None g_recover = None if 'alarm_levels' in dir(config): g_alarm_levels = self._parse_alarm_levels(config.alarm_levels) if 'recover' in dir(config): g_recover = int(config.recover) links = config.links if isinstance(links, dict): for ip, v in links.iteritems(): if not isinstance(v, dict): v = dict() ttl = v.get('ttl') if ttl >= 0: pass else: ttl = 0 alarm_levels = v.get('alarm_levels') if not alarm_levels and g_alarm_levels: alarm_levels = g_alarm_levels elif alarm_levels: alarm_levels = self._parse_alarm_levels(alarm_levels) if not alarm_levels: continue else: self.logger.error( "config: %s, missing alarm_levels value" % (ip)) continue recover = v.get('recover') if recover: recover = int(recover) elif not recover and g_recover: recover = g_recover else: self.logger.error( "config: %s, missing recover value" % (ip)) continue self.linkage_dict[ip] = Linkage(ip, alarm_levels, recover) self.logger.info("%d link loaded from config" % (len(self.linkage_dict.keys())))
class SaasMonitor(object): def __init__(self): self.is_running = False self.hostname = socket.gethostname() self.logger = Log("saas_mon", config=conf) self.recover_thres = conf.SAAS_RECOVER_THRESHOLD or(30 * 5) self.bad_thres = conf.SAAS_BAD_THRESHOLD or 5 self.alarm_q = JobQueue(self.logger) self.emailalarm = EmailAlarm(self.logger) self.last_state = True def start(self): if self.is_running: return self.is_running = True self.alarm_q.start_worker(1) self.logger.info("started") def stop(self): if not self.is_running: return self.is_running = False self.alarm_q.stop() def check(self): vps = None try: rpc = SAAS_Client(conf.HOST_ID, self.logger) rpc.connect() try: _id = rpc.todo(CMD.MONITOR) finally: rpc.close() self.logger.info("ok") return True except Exception, e: self.logger.exception(e) return False
class SaasMonitor(object): def __init__(self): self.is_running = False self.hostname = socket.gethostname() self.logger = Log("saas_mon", config=conf) self.recover_thres = conf.SAAS_RECOVER_THRESHOLD or (30 * 5) self.bad_thres = conf.SAAS_BAD_THRESHOLD or 5 self.alarm_q = JobQueue(self.logger) self.emailalarm = EmailAlarm(self.logger) self.last_state = True def start(self): if self.is_running: return self.is_running = True self.alarm_q.start_worker(1) self.logger.info("started") def stop(self): if not self.is_running: return self.is_running = False self.alarm_q.stop() def check(self): vps = None try: rpc = SAAS_Client(conf.HOST_ID, self.logger) rpc.connect() try: _id = rpc.todo(CMD.MONITOR) finally: rpc.close() self.logger.info("ok") return True except Exception, e: self.logger.exception(e) return False
class ICMPMonitor (object): def __init__(self): self.is_running = False self.linkage_dict = dict() self.logger = Log("icmp_mon", config=config) self.alarm_q = JobQueue(self.logger) self.emailalarm = EmailAlarm(Log("alarm", config=config)) self.logger_links = Log("links", config=config) if 'log_length_per_link' in dir(config): self.log_length_per_link = config.log_length_per_link else: self.log_length_per_link = 128 if 'links' not in dir(config): self.logger.error("no 'links' in config") return g_alarm_levels = None g_recover = None if 'alarm_levels' in dir(config): g_alarm_levels = self._parse_alarm_levels(config.alarm_levels) if 'recover' in dir(config): g_recover = int(config.recover) links = config.links if isinstance(links, dict): for ip, v in links.iteritems(): if not isinstance(v, dict): v = dict() ttl = v.get('ttl') if ttl >= 0: pass else: ttl = 0 alarm_levels = v.get('alarm_levels') if not alarm_levels and g_alarm_levels: alarm_levels = g_alarm_levels elif alarm_levels: alarm_levels = self._parse_alarm_levels(alarm_levels) if not alarm_levels: continue else: self.logger.error( "config: %s, missing alarm_levels value" % (ip)) continue recover = v.get('recover') if recover: recover = int(recover) elif not recover and g_recover: recover = g_recover else: self.logger.error( "config: %s, missing recover value" % (ip)) continue self.linkage_dict[ip] = Linkage(ip, alarm_levels, recover) self.logger.info("%d link loaded from config" % (len(self.linkage_dict.keys()))) def _parse_alarm_levels(self, alarm_levels, ip=""): if not isinstance(alarm_levels, (tuple, list)): self.logger.error("config: %s, alarm_levels is not a list" % (ip)) return _alarm_levels = filter(lambda x: isinstance(x, int), alarm_levels) if len(_alarm_levels) != len(alarm_levels): self.logger.error( "config: %s, elements in alarm_levels must be integers" % (ip)) return return _alarm_levels def start(self): if self.is_running: return self.is_running = True self.alarm_q.start_worker(1) self.logger.info("started") def stop(self): if not self.is_running: return self.is_running = False self.alarm_q.stop() self.logger.info("stopped") def _alarm_enqueue(self, link): t = "%Y-%m-%d %H:%M:%S" ts = "[%s]" % (time.strftime(t, time.localtime())) job = AlarmJob( self.emailalarm, ts + link.alarm_text(), link.details()) self.alarm_q.put_job(job) def loop(self): ips = self.linkage_dict.keys() fping = FPing(ips) while self.is_running: start_time = time.time() recv_dict, error_dict = fping.ping(1) for ip, rtt in recv_dict.iteritems(): link = self.linkage_dict[ip] res = link.new_state(True, rtt) if res: self._alarm_enqueue(link) print ip, "ok", rtt if len(link.bitmap) == self.log_length_per_link: self.logger_links.info(link.details()) link.reset_bitmap() for ip, err in error_dict.iteritems(): link = self.linkage_dict[ip] res = link.new_state(False, 0) if res is False: self._alarm_enqueue(link) print ip, "err", link.bitmap if len(link.bitmap) == self.log_length_per_link: self.logger_links.info(link.details()) link.reset_bitmap() end_time = time.time() if end_time < start_time + 1: time.sleep(1 - end_time + start_time)
class ICMPMonitor(object): def __init__(self): self.is_running = False self.linkage_dict = dict() self.logger = Log("icmp_mon", config=config) self.alarm_q = JobQueue(self.logger) self.emailalarm = EmailAlarm(Log("alarm", config=config)) self.logger_links = Log("links", config=config) if 'log_length_per_link' in dir(config): self.log_length_per_link = config.log_length_per_link else: self.log_length_per_link = 128 if 'links' not in dir(config): self.logger.error("no 'links' in config") return g_alarm_levels = None g_recover = None if 'alarm_levels' in dir(config): g_alarm_levels = self._parse_alarm_levels(config.alarm_levels) if 'recover' in dir(config): g_recover = int(config.recover) links = config.links if isinstance(links, dict): for ip, v in links.iteritems(): if not isinstance(v, dict): v = dict() ttl = v.get('ttl') if ttl >= 0: pass else: ttl = 0 alarm_levels = v.get('alarm_levels') if not alarm_levels and g_alarm_levels: alarm_levels = g_alarm_levels elif alarm_levels: alarm_levels = self._parse_alarm_levels(alarm_levels) if not alarm_levels: continue else: self.logger.error( "config: %s, missing alarm_levels value" % (ip)) continue recover = v.get('recover') if recover: recover = int(recover) elif not recover and g_recover: recover = g_recover else: self.logger.error("config: %s, missing recover value" % (ip)) continue self.linkage_dict[ip] = Linkage(ip, alarm_levels, recover) self.logger.info("%d link loaded from config" % (len(self.linkage_dict.keys()))) def _parse_alarm_levels(self, alarm_levels, ip=""): if not isinstance(alarm_levels, (tuple, list)): self.logger.error("config: %s, alarm_levels is not a list" % (ip)) return _alarm_levels = filter(lambda x: isinstance(x, int), alarm_levels) if len(_alarm_levels) != len(alarm_levels): self.logger.error( "config: %s, elements in alarm_levels must be integers" % (ip)) return return _alarm_levels def start(self): if self.is_running: return self.is_running = True self.alarm_q.start_worker(1) self.logger.info("started") def stop(self): if not self.is_running: return self.is_running = False self.alarm_q.stop() self.logger.info("stopped") def _alarm_enqueue(self, link): t = "%Y-%m-%d %H:%M:%S" ts = "[%s]" % (time.strftime(t, time.localtime())) job = AlarmJob(self.emailalarm, ts + link.alarm_text(), link.details()) self.alarm_q.put_job(job) def loop(self): ips = self.linkage_dict.keys() fping = FPing(ips) while self.is_running: start_time = time.time() recv_dict, error_dict = fping.ping(1) for ip, rtt in recv_dict.iteritems(): link = self.linkage_dict[ip] res = link.new_state(True, rtt) if res: self._alarm_enqueue(link) print ip, "ok", rtt if len(link.bitmap) == self.log_length_per_link: self.logger_links.info(link.details()) link.reset_bitmap() for ip, err in error_dict.iteritems(): link = self.linkage_dict[ip] res = link.new_state(False, 0) if res is False: self._alarm_enqueue(link) if err != "timeout": self.logger.error("ip %s error %s" % (ip, err)) print ip, "err", link.bitmap if len(link.bitmap) == self.log_length_per_link: self.logger_links.info(link.details()) link.reset_bitmap() end_time = time.time() if end_time < start_time + 1: time.sleep(1 - end_time + start_time)