def __init__(self, mysql_host, mysql_database, mysql_user, mysql_password, zabbix_user, zabbix_password, zabbix_url, executor_driver): self.mysql_host = mysql_host self.mysql_database = mysql_database self.mysql_user = mysql_user self.mysql_password = mysql_password self.zabbix_user = zabbix_user self.zabbix_password = zabbix_password self.zabbix_url = zabbix_url self.events_notification_history = dict() self.db = HMonitorDB(mysql_user=options.mysql_user, mysql_passwd=options.mysql_password, mysql_host=options.mysql_host, mysql_database=options.mysql_database) self.executor = executor_driver self.notification_agents = [MailAgent(db=self.db, executor=self.executor, api_user=options.mail_api_user, api_key=options.mail_api_key, sender=options.mail_sender, endpoint=options.mail_endpoint), SmsAgent(db=self.db, executor=self.executor, username=options.sms_user, password=options.sms_password, epid=options.sms_epid, endpoint=options.sms_endpoint, charset=options.sms_charset)] self.am = AutoFixProxy(db=self.db, executor=self.executor, url=options.autofix_url)
def main(): db = HMonitorDB(mysql_user=options.mysql_user, mysql_passwd=options.mysql_password, mysql_host=options.mysql_host, mysql_database=options.mysql_database) zabbix = ZabbixProxy(username=options.zabbix_user, password=options.zabbix_password, url=options.zabbix_url) zabbix_triggers = zabbix.get_triggers() hm_triggers = zabbix.get_triggers(db=db) if options.force.upper() == "NO": db.clear_hm_triggers() for trigger in zabbix_triggers: logging.info("CREATE TRIGGER: {t}".format( t=trigger["description"] )) db.create_hm_triggers(trigger["description"], trigger["priority"], trigger["comments"]) elif options.force.upper() == "YES": logging.warn("RUN SYNC WITH FORCE OPTION, SLEEP 5 SECONDS FOR THINK " "AGAIN.") time.sleep(5) logging.warn("RUN SYNC WITH FORCE NOW") force_sync(db, zabbix_triggers, hm_triggers)
def __init__(self, *args, **kwargs): super(BaseHandler, self).__init__(*args, **kwargs) self.db = HMonitorDB(mysql_user=options.mysql_user, mysql_passwd=options.mysql_password, mysql_host=options.mysql_host, mysql_database=options.mysql_database) self.zabbix = ZabbixProxy(username=options.zabbix_user, password=options.zabbix_password, url=options.zabbix_url)
def __init__(self, worker=8, executor="ssh"): self.worker = worker self.executor_driver_name = executor self.executor = get_executor(executor) self.db = HMonitorDB(mysql_user=options.mysql_user, mysql_passwd=options.mysql_password, mysql_host=options.mysql_host, mysql_database=options.mysql_database) self.queue = Queue.Queue() for w in range(0, self.worker): t = threading.Thread(target=self.do_autofix) t.daemon = True t.start() logging.debug("START {0} WORKERS FOR AUTOFIX".format(self.worker))
class BaseHandler(tornado.web.RequestHandler): def __init__(self, *args, **kwargs): super(BaseHandler, self).__init__(*args, **kwargs) self.db = HMonitorDB(mysql_user=options.mysql_user, mysql_passwd=options.mysql_password, mysql_host=options.mysql_host, mysql_database=options.mysql_database) self.zabbix = ZabbixProxy(username=options.zabbix_user, password=options.zabbix_password, url=options.zabbix_url) def get_current_user(self): return self.get_secure_cookie("mail") def get_user(self): mail = self.get_secure_cookie("mail") user = self.db.get_user_by_mail(mail) return user
def main(): db = HMonitorDB(mysql_user=options.mysql_user, mysql_passwd=options.mysql_password, mysql_host=options.mysql_host, mysql_database=options.mysql_database) zabbix = ZabbixProxy(username=options.zabbix_user, password=options.zabbix_password, url=options.zabbix_url) zabbix_triggers = zabbix.get_triggers() hm_triggers = zabbix.get_triggers(db=db) if options.force.upper() == "NO": db.clear_hm_triggers() for trigger in zabbix_triggers: logging.info( "CREATE TRIGGER: {t}".format(t=trigger["description"])) db.create_hm_triggers(trigger["description"], trigger["priority"], trigger["comments"]) elif options.force.upper() == "YES": logging.warn("RUN SYNC WITH FORCE OPTION, SLEEP 5 SECONDS FOR THINK " "AGAIN.") time.sleep(5) logging.warn("RUN SYNC WITH FORCE NOW") force_sync(db, zabbix_triggers, hm_triggers)
class Agent(object): def __init__(self, mysql_host, mysql_database, mysql_user, mysql_password, zabbix_user, zabbix_password, zabbix_url, executor_driver): self.mysql_host = mysql_host self.mysql_database = mysql_database self.mysql_user = mysql_user self.mysql_password = mysql_password self.zabbix_user = zabbix_user self.zabbix_password = zabbix_password self.zabbix_url = zabbix_url self.events_notification_history = dict() self.db = HMonitorDB(mysql_user=options.mysql_user, mysql_passwd=options.mysql_password, mysql_host=options.mysql_host, mysql_database=options.mysql_database) self.executor = executor_driver self.notification_agents = [MailAgent(db=self.db, executor=self.executor, api_user=options.mail_api_user, api_key=options.mail_api_key, sender=options.mail_sender, endpoint=options.mail_endpoint), SmsAgent(db=self.db, executor=self.executor, username=options.sms_user, password=options.sms_password, epid=options.sms_epid, endpoint=options.sms_endpoint, charset=options.sms_charset)] self.am = AutoFixProxy(db=self.db, executor=self.executor, url=options.autofix_url) def initialize(self): for agent in self.notification_agents: agent.initialize() def _auto_fix(self, event): logging.debug("BEGIN AUTO FIX ON EVENT: {0}".format(event)) result = self.am.do_fix(event) logging.debug("AUTOFIX RESULT IS: {0}".format(result)) return result def _get_history_key(self, event): return "{t}_{h}".format(t=event["trigger_name"], h=event["hostname"]) def _is_history_expired(self, notice_obj): now = datetime.datetime.now() return (now - notice_obj["last_send_time"]).seconds > 300 def _alert_in_filter(self, event): return self.db.check_alert_in_filter(event["trigger_name"], event["hostname"]) def _do_actions(self, events): for event in events: if self._alert_in_filter(event): logging.info("EVENT MATCH ALERT FILTER. IGNORE THIS EVENT. " "{t} on {h}".format(t=event["trigger_name"], h=event["hostname"])) continue if self._auto_fix(event): # TODO(tianhuan) send notification here? self.db.expire_trigger_event(event["id"]) continue else: h_key = self._get_history_key(event) notice_obj = self.events_notification_history.get(h_key, None) if (notice_obj and not self._is_history_expired(notice_obj)): logging.debug("{e}'s history is not expired".format( e=h_key )) continue else: notice_obj = dict(last_send_time=datetime.datetime.now()) self.events_notification_history[h_key] = notice_obj # TODO(tianhuan) memory leak here? for agent in self.notification_agents: agent.notice(event) def _run_notification_agents(self): for agent in self.notification_agents: agent.run() def _run(self): # Expire events self.db.expire_trigger_events() # Trigger actions events = self.db.get_trigger_events_in_problem() logging.debug("Events in problem:\n {0}".format(events)) self._do_actions(events) def run(self): self._run_notification_agents() while True: try: self._run() time.sleep(30) except Exception as e: # TODO(tianhuan) use specific exception here logging.exception(e)
class AutoFixManager(object): def __init__(self, worker=8, executor="ssh"): self.worker = worker self.executor_driver_name = executor self.executor = get_executor(executor) self.db = HMonitorDB(mysql_user=options.mysql_user, mysql_passwd=options.mysql_password, mysql_host=options.mysql_host, mysql_database=options.mysql_database) self.queue = Queue.Queue() for w in range(0, self.worker): t = threading.Thread(target=self.do_autofix) t.daemon = True t.start() logging.debug("START {0} WORKERS FOR AUTOFIX".format(self.worker)) def add_task(self, event): self.queue.put(event) def get_autofix_script(self, trigger_name): bindings = self.db.get_autofix_bindings() for binding in bindings: if binding["trigger_name"] == trigger_name: return binding["auto_fix_script"] raise RuntimeError( "NO SUCH SCRIPT FOR TRIGGER: {t}".format(t=trigger_name)) def do_autofix(self): log_id = None try: event = self.queue.get() autofix_script = self.get_autofix_script(event["trigger_name"]) log_id = self.db.create_autofix_log( trigger_name=event["trigger_name"], hostname=event["hostname"], script=autofix_script, event_id=event["event_id"]) if log_id is None: logging.warn("AUTOFIX ALREADY IN WORKING, IGNORE THIS EVENT " "{e}.".format(e=event["trigger_name"])) else: self._do_autofix(event, autofix_script, log_id) except Exception as e: logging.exception(e) if log_id: self.db.update_autofix_log(log_id, AUTOFIX_STATUS["failed"], str(e)) else: if log_id: self.db.update_autofix_log(log_id, AUTOFIX_STATUS["success"], "") def _do_autofix(self, event, autofix_script, log_id): trigger_name = event["trigger_name"] hostname = event["hostname"] executor = self.executor(hostname=hostname, user=options.executor_user) autofix_method = get_autofix_scripts().get(autofix_script).get( "fix_method") # do autofix, if it fix failed, it should raise an exception and # upper level codes will catch it then record it into database. autofix_method(trigger_name, hostname, executor, event)
class AutoFixManager(object): def __init__(self, worker=8, executor="ssh"): self.worker = worker self.executor_driver_name = executor self.executor = get_executor(executor) self.db = HMonitorDB(mysql_user=options.mysql_user, mysql_passwd=options.mysql_password, mysql_host=options.mysql_host, mysql_database=options.mysql_database) self.queue = Queue.Queue() for w in range(0, self.worker): t = threading.Thread(target=self.do_autofix) t.daemon = True t.start() logging.debug("START {0} WORKERS FOR AUTOFIX".format(self.worker)) def add_task(self, event): self.queue.put(event) def get_autofix_script(self, trigger_name): bindings = self.db.get_autofix_bindings() for binding in bindings: if binding["trigger_name"] == trigger_name: return binding["auto_fix_script"] raise RuntimeError("NO SUCH SCRIPT FOR TRIGGER: {t}".format( t=trigger_name )) def do_autofix(self): log_id = None try: event = self.queue.get() autofix_script = self.get_autofix_script(event["trigger_name"]) log_id = self.db.create_autofix_log( trigger_name=event["trigger_name"], hostname=event["hostname"], script=autofix_script, event_id=event["event_id"] ) if log_id is None: logging.warn("AUTOFIX ALREADY IN WORKING, IGNORE THIS EVENT " "{e}.".format(e=event["trigger_name"])) else: self._do_autofix(event, autofix_script, log_id) except Exception as e: logging.exception(e) if log_id: self.db.update_autofix_log(log_id, AUTOFIX_STATUS["failed"], str(e)) else: if log_id: self.db.update_autofix_log(log_id, AUTOFIX_STATUS["success"], "") def _do_autofix(self, event, autofix_script, log_id): trigger_name = event["trigger_name"] hostname = event["hostname"] executor = self.executor(hostname=hostname, user=options.executor_user) autofix_method = get_autofix_scripts().get(autofix_script).get( "fix_method" ) # do autofix, if it fix failed, it should raise an exception and # upper level codes will catch it then record it into database. autofix_method(trigger_name, hostname, executor, event)