def update_chart_status(id, level): chart = session.query(Charts).get(id) if chart is None: return check_timestamp = util.timestamp(check_time) if level == Events.CONST_TYPE_OK: chart.warnings = 0 if chart.criticals and check_timestamp - chart.criticals > NOTIFY_SCREEN_LATENCY: chart.criticals = 0 elif level == Events.CONST_TYPE_WARNING: if not chart.criticals: chart.warnings = check_timestamp elif check_timestamp - chart.criticals > NOTIFY_SCREEN_LATENCY: chart.criticals = 0 chart.warnings = check_timestamp elif level == Events.CONST_TYPE_CRITICAL: chart.warnings = 0 chart.criticals = check_timestamp else: return session.commit()
def compare_and_notify_one(chart): logging.debug('Checking chart %d.' % chart.id) # get rules rules = get_rules(chart.id) if not rules: logging.debug('No rule to check.') return # parse haopan title if haopan.is_haopan(chart.cate_id): chart.name = haopan.format_title(chart.name.encode('utf-8')).decode('utf-8') chart_level, chart_msg = (Events.CONST_TYPE_OK, None) rule_warn = OrderedDict() for rule in rules: # get current data end = check_time start = end - datetime.timedelta(minutes=rule['latency']) data = get_data(chart.id, start, end) if data: if rule['warn_type'] == 'HWM': level, msg = get_alert_level_hwm(chart.name, rule['hwm_warning'], rule['hwm_critical'], data) elif rule['warn_type'] == 'LWM': level, msg = get_alert_level_lwm(chart.name, rule['lwm_warning'], rule['lwm_critical'], data) elif rule['warn_type'] == 'RANGE': prev_end = end - datetime.timedelta(7) prev_start = start - datetime.timedelta(7) prev_data = get_data(chart.id, prev_start, prev_end) if not prev_data: prev_end = end - datetime.timedelta(1) prev_start = start - datetime.timedelta(1) prev_data = get_data(chart.id, prev_start, prev_end) level, msg = get_alert_level_range(chart.name, rule['hwm_warning'], rule['hwm_critical'], rule['lwm_warning'], rule['lwm_critical'], data, prev_data) else: if 'rule' in chart.get_ext(): no_data = True else: level = Events.CONST_TYPE_CRITICAL msg = u'%s 没有数据' % chart.name if level > chart_level: chart_level, chart_msg = level, msg rule_warn[rule['id']] = (level, msg, rule) # for chart if chart_level == Events.CONST_TYPE_OK: logging.debug('Chart %s' % LEVEL_TEXT[chart_level]) else: logging.debug('Chart %s' % ' - '.join((LEVEL_TEXT[chart_level], chart_msg))) event_exists = session.\ query(Events).\ filter_by(cid=chart.id).\ filter(Events.type >= chart_level).\ filter(Events.time > check_time - datetime.timedelta(seconds=NOTIFY_SCREEN_LATENCY)).\ count() > 0 if event_exists: logging.debug('Chart event exists (within %d minutes).' % (NOTIFY_SCREEN_LATENCY / 60)) else: add_events(chart.id, chart_level, chart_msg) update_chart_status(chart.id, chart_level) # per user for follow in session.\ query(Follows).\ filter_by(cid=chart.id, recv_warning=True): if follow.follower in skipped_user: continue try: user = session.query(Users).get(follow.follower) if user is None: raise ValueError('User not found') if not user.email: raise ValueError('User email is empty') email_list = util.unique(user.email.splitlines()) if not email_list: raise ValueError('User email is empty') except Exception, e: logging.warn('%s: %d' % (e, follow.follower)) skipped_user.add(follow.follower) continue recv_rules = follow.get_recv_rules() if recv_rules == 'all': recv_rules = rule_warn.keys() level, msg, rule = Events.CONST_TYPE_OK, None, None for rid in recv_rules: if rid not in rule_warn: continue if rule_warn[rid][0] > level: level, msg, rule = rule_warn[rid] if level == Events.CONST_TYPE_CRITICAL: logging.debug('User %s %s' % (user.truename, ' - '.join((LEVEL_TEXT[level], msg)))) if util.timestamp(check_time) - rule['last_critical'] <= rule['interval'] * 60: logging.debug('User event exists (within %d minutes).' % rule['interval']) else: update_rule_status(rule['id']) if user.id not in user_events: user_events[user.id] = { 'truename': user.truename, 'email_list': email_list, 'events': [] } user_events[user.id]['events'].append((chart, msg))
return session.commit() def update_rule_status(id): rule = session.query(WarnRules).get(id) if rule is None: return try: content = json.loads(rule.content) except Exception, e: logging.exception(e) return check_timestamp = util.timestamp(check_time) content['last_critical'] = check_timestamp rule.content = json.dumps(content) session.commit() def add_events(id, level, message, dealt=False): event = Events() event.cid = id event.info = message event.time = datetime.datetime.now() event.type = level event.deal_status = 1 if dealt else 0 session.add(event) session.commit() def main():
row = LogstuffTmpl.gen(dt.strftime('%Y%m%d'))() row.ds_id = tid row.time = dt row.detail = detail sakuya_db.add(row) sakuya_db.commit() except Exception, e: sakuya_db.rollback() traceback.print_exc() return util.output('error', msg='Internal error.') try: chart.api_ip = util.ip2long(request['REMOTE_ADDR']) chart.api_ts = util.timestamp() sakuya_db.commit() except Exception, e: sakuya_db.rollback() return util.output('ok') socket = zmq.Socket(zmq.Context.instance(), zmq.PUSH) socket.connect(get_config('webapp')['hp_aggregate_connect']) @app.post('/api/haopan') def haopan(sakuya_db): try: dt = datetime.datetime.strptime(request.forms['time'], '%Y-%m-%d %H:%M')
def compare_and_notify_one(chart): logging.debug("Checking chart %d." % chart.id) # get rules rules = get_rules(chart.id) if not rules: logging.debug("No rule to check.") return # parse haopan title if haopan.is_haopan(chart.cate_id): chart.name = haopan.format_title(chart.name.encode("utf-8")).decode("utf-8") chart_level, chart_msg = (Events.CONST_TYPE_OK, None) rule_warn = OrderedDict() for rule in rules: # get current data end = check_time start = end - datetime.timedelta(minutes=rule["latency"]) data = get_data(chart.id, start, end) if data: if rule["warn_type"] == "HWM": level, msg = get_alert_level_hwm(chart.name, rule["hwm_warning"], rule["hwm_critical"], data) elif rule["warn_type"] == "LWM": level, msg = get_alert_level_lwm(chart.name, rule["lwm_warning"], rule["lwm_critical"], data) elif rule["warn_type"] == "RANGE": prev_end = end - datetime.timedelta(7) prev_start = start - datetime.timedelta(7) prev_data = get_data(chart.id, prev_start, prev_end) if not prev_data: prev_end = end - datetime.timedelta(1) prev_start = start - datetime.timedelta(1) prev_data = get_data(chart.id, prev_start, prev_end) level, msg = get_alert_level_range( chart.name, rule["hwm_warning"], rule["hwm_critical"], rule["lwm_warning"], rule["lwm_critical"], data, prev_data, ) else: if "rule" in chart.get_ext(): no_data = True else: level = Events.CONST_TYPE_CRITICAL msg = u"%s 没有数据" % chart.name if level > chart_level: chart_level, chart_msg = level, msg rule_warn[rule["id"]] = (level, msg, rule) # for chart if chart_level == Events.CONST_TYPE_OK: logging.debug("Chart %s" % LEVEL_TEXT[chart_level]) else: logging.debug("Chart %s" % " - ".join((LEVEL_TEXT[chart_level], chart_msg))) event_exists = ( session.query(Events) .filter_by(cid=chart.id) .filter(Events.type >= chart_level) .filter(Events.time > check_time - datetime.timedelta(seconds=NOTIFY_SCREEN_LATENCY)) .count() > 0 ) if event_exists: logging.debug("Chart event exists (within %d minutes)." % (NOTIFY_SCREEN_LATENCY / 60)) else: add_events(chart.id, chart_level, chart_msg) update_chart_status(chart.id, chart_level) # per user for follow in session.query(Follows).filter_by(cid=chart.id, recv_warning=True): if follow.follower in skipped_user: continue try: user = session.query(Users).get(follow.follower) if user is None: raise ValueError("User not found") if not user.email: raise ValueError("User email is empty") email_list = util.unique(user.email.splitlines()) if not email_list: raise ValueError("User email is empty") except Exception, e: logging.warn("%s: %d" % (e, follow.follower)) skipped_user.add(follow.follower) continue recv_rules = follow.get_recv_rules() if recv_rules == "all": recv_rules = rule_warn.keys() level, msg, rule = Events.CONST_TYPE_OK, None, None for rid in recv_rules: if rid not in rule_warn: continue if rule_warn[rid][0] > level: level, msg, rule = rule_warn[rid] if level == Events.CONST_TYPE_CRITICAL: logging.debug("User %s %s" % (user.truename, " - ".join((LEVEL_TEXT[level], msg)))) if util.timestamp(check_time) - rule["last_critical"] <= rule["interval"] * 60: logging.debug("User event exists (within %d minutes)." % rule["interval"]) else: update_rule_status(rule["id"]) if user.id not in user_events: user_events[user.id] = {"truename": user.truename, "email_list": email_list, "events": []} user_events[user.id]["events"].append((chart, msg))
session.commit() def update_rule_status(id): rule = session.query(WarnRules).get(id) if rule is None: return try: content = json.loads(rule.content) except Exception, e: logging.exception(e) return check_timestamp = util.timestamp(check_time) content["last_critical"] = check_timestamp rule.content = json.dumps(content) session.commit() def add_events(id, level, message, dealt=False): event = Events() event.cid = id event.info = message event.time = datetime.datetime.now() event.type = level event.deal_status = 1 if dealt else 0 session.add(event) session.commit()
row = LogstuffTmpl.gen(dt.strftime('%Y%m%d'))() row.ds_id = tid row.time = dt row.detail = detail sakuya_db.add(row) sakuya_db.commit() except Exception, e: sakuya_db.rollback() traceback.print_exc() return util.output('error', msg='Internal error.') try: chart.api_ip = util.ip2long(request['REMOTE_ADDR']) chart.api_ts = util.timestamp() sakuya_db.commit() except Exception, e: sakuya_db.rollback() return util.output('ok') socket = zmq.Socket(zmq.Context.instance(), zmq.PUSH) socket.connect(get_config('webapp')['hp_aggregate_connect']) @app.post('/api/haopan') def haopan(sakuya_db):