def send_alerts(settings, db): """ BLINDLY SENDS ALERTS FROM THE ALERTS TABLE, ASSUMING ALL HAVE THE SAME STRUCTURE. """ debug = settings.param.debug db.debug = debug #TODO: REMOVE, LEAVE IN DB if db.debug: db.execute("update reasons set email_subject={{subject}}, email_template={{template}} where code={{reason}}", { "template": CNV.object2JSON(TEMPLATE), "subject": CNV.object2JSON(SUBJECT), "reason": REASON }) db.flush() try: new_alerts = db.query(""" SELECT a.id alert_id, a.reason, r.description, a.details, a.severity, a.confidence, a.revision, r.email_template, r.email_subject FROM alerts a JOIN reasons r on r.code = a.reason WHERE a.last_sent IS NULL AND a.status <> 'obsolete' AND math.bayesian_add(a.severity, a.confidence) > {{alert_limit}} AND a.solution IS NULL AND a.reason in {{reasons}} AND a.create_time > {{min_time}} ORDER BY math.bayesian_add(a.severity, a.confidence) DESC, json.number(left(details, 65000), "diff_percent") DESC LIMIT 10 """, { "last_sent": datetime.utcnow() - RESEND_AFTER, "alert_limit": ALERT_LIMIT - EPSILON, "min_time": datetime.utcnow()-LOOK_BACK, "reasons": SQL("("+", ".join(db.quote_value(v) for v in SEND_REASONS)+")") }) if not new_alerts: if debug: Log.note("Nothing important to email") return for alert in new_alerts: #poor souls that signed up for emails listeners = ";".join(db.query("SELECT email FROM listeners WHERE reason={{reason}}", {"reason": alert.reason}).email) body = [HEADER] if alert.confidence >= 1: alert.confidence = 0.999999 alert.details = CNV.JSON2object(alert.details) try: alert.revision = CNV.JSON2object(alert.revision) except Exception, e: pass alert.score = str(-log(1.0-Math.bayesian_add(alert.severity, alert.confidence), 10)) #SHOW NUMBER OF NINES alert.details.url = alert.details.page_url example = alert.details.example for e in alert.details.tests.example + [example]: if e.push_date_min: e.push_date_max = (2 * e.push_date) - e.push_date_min e.date_range = (datetime.utcnow()-CNV.milli2datetime(e.push_date_min)).total_seconds()/(24*60*60) #REQUIRED FOR DATAZILLA B2G CHART REFERENCE e.date_range = nvl(nvl(*[v for v in (7, 30, 60) if v > e.date_range]), 90) #PICK FIRST v > CURRENT VALUE subject = expand_template(CNV.JSON2object(alert.email_subject), alert) body.append(expand_template(CNV.JSON2object(alert.email_template), alert)) body = "".join(body)+FOOTER if debug: Log.note("EMAIL: {{email}}", {"email": body}) if len(body) > MAX_EMAIL_LENGTH: Log.note("Truncated the email body") suffix = "... (has been truncated)" body = body[0:MAX_EMAIL_LENGTH - len(suffix)] + suffix #keep it reasonable db.call("mail.send", ( listeners, #to subject, body, #body None )) #I HOPE I CAN SEND ARRAYS OF NUMBERS db.execute( "UPDATE alerts SET last_sent={{time}} WHERE {{where}}", { "time": datetime.utcnow(), "where": esfilter2sqlwhere(db, {"terms": {"id": Q.select(new_alerts, "alert_id")}}) }) except Exception, e: Log.error("Could not send alerts", e)
def page_threshold_limit(db, debug): """ simplest of rules to test the dataflow from test_run, to alert, to email may prove slightly useful also! #point out any pages that are breaking human-set threshold limits """ db.debug = debug try: # CALCULATE HOW FAR BACK TO LOOK lasttime = db.query("SELECT last_run, description FROM reasons WHERE code={{type}}", {"type": REASON})[0] lasttime = nvl(lasttime.last_run, datetime.utcnow()) min_date = lasttime + LOOK_BACK # FIND ALL PAGES THAT HAVE LIMITS TO TEST # BRING BACK ONES THAT BREAK LIMITS # BUT DO NOT ALREADY HAVE AN ALERTS EXISTING pages = db.query( """ SELECT t.id tdad_id, t.n_replicates, t.mean, t.std, h.threshold, h.severity, h.reason, m.id alert_id FROM alert_page_thresholds h JOIN test_data_all_dimensions t ON t.page_id=h.page LEFT JOIN alerts m on m.tdad_id=t.test_run_id AND m.reason={{type}} WHERE h.threshold<t.mean AND t.push_date>{{min_date}} AND (m.id IS NULL OR m.status='obsol11ete') """, {"type": REASON, "min_date": min_date}, ) # FOR EACH PAGE THAT BREAKS LIMITS for page in pages: if page.alert_id != None: break alert = { "id": SQL("util.newid()"), "status": "new", "create_time": datetime.utcnow(), "last_updated": datetime.utcnow(), "tdad_id": page.tdad_id, "reason": REASON, "details": CNV.object2JSON( {"expected": float(page.threshold), "actual": float(page.mean), "reason": page.reason} ), "severity": page.severity, "confidence": 1.0 # USING NORMAL DIST ASSUMPTION WE CAN ADJUST # CONFIDENCE EVEN BEFORE THRESHOLD IS HIT! # FOR NOW WE KEEP IT SIMPLE } db.insert("alerts", alert) for page in pages: if page.alert_id == None: break db.update("alerts", None) # ERROR FOR NOW # OBSOLETE THE ALERTS THAT SHOULD NO LONGER GET SENT obsolete = db.query( """ SELECT m.id, m.tdad_id FROM alerts m JOIN test_data_all_dimensions t ON m.tdad_id=t.id JOIN alert_page_thresholds h on t.page_id=h.page WHERE m.reason={{reason}} AND h.threshold>=t.mean AND t.push_date>{{time}} """, {"reason": REASON, "time": min_date}, ) if obsolete: db.execute( "UPDATE alerts SET status='obsolete' WHERE {{where}}", {"where": esfilter2sqlwhere(db, {"terms": {"id": Q.select(obsolete, "id")}})}, ) db.execute( "UPDATE reasons SET last_run={{now}} WHERE code={{reason}}", {"now": datetime.utcnow(), "reason": REASON} ) except Exception, e: Log.error("Could not perform threshold comparisons", e)
if a == None: Log.error("Programmer error, changed_alerts must have {{key_value}}", {"key_value": curr.tdad.id}) if significant_difference(curr.severity, a.severity) or \ significant_difference(curr.confidence, a.confidence) or \ curr.reason != a.reason: curr.last_updated = NOW alerts_db.update("alerts", {"id": curr.id}, a) #OBSOLETE THE ALERTS THAT ARE NO LONGER VALID if obsolete_alerts: alerts_db.execute("UPDATE alerts SET status='obsolete' WHERE {{where}}", { "where": esfilter2sqlwhere( alerts_db, {"and": [ {"terms": {"id": obsolete_alerts.id}}, {"not": {"term": {"status": "obsolete"}}} ]} ) }) alerts_db.execute("UPDATE reasons SET last_run={{now}} WHERE {{where}}", { "now": NOW, "where": esfilter2sqlwhere(alerts_db, {"term": {"code": REASON}}) }) alerts_db.flush() if debug: Log.note("Marking {{num}} test_run_id as 'done'", {"num": len(all_touched)})
def talos_alert_revision(settings): assert settings.alerts != None settings.db.debug = settings.param.debug with DB(settings.alerts) as db: with ESQuery(ElasticSearch(settings.query["from"])) as esq: dbq = DBQuery(db) esq.addDimension(CNV.JSON2object(File(settings.dimension.filename).read())) #TODO: REMOVE, LEAVE IN DB if db.debug: db.execute("update reasons set email_subject={{subject}}, email_template={{template}} where code={{reason}}", { "template": CNV.object2JSON(TEMPLATE), "subject": CNV.object2JSON(SUBJECT), "reason": REASON }) db.flush() #EXISTING SUSTAINED EXCEPTIONS existing_sustained_alerts = dbq.query({ "from": "alerts", "select": "*", "where": {"and": [ {"term": {"reason": talos_sustained_median.REASON}}, {"not": {"term": {"status": "obsolete"}}}, {"range": {"create_time": {"gte": NOW - LOOK_BACK}}} ]} }) tests = Q.index(existing_sustained_alerts, ["revision", "details.Talos.Test"]) #EXISTING REVISION-LEVEL ALERTS old_alerts = dbq.query({ "from": "alerts", "select": "*", "where": {"and": [ {"term": {"reason": REASON}}, {"or": [ {"terms": {"revision": set(existing_sustained_alerts.revision)}}, {"term": {"reason": talos_sustained_median.REASON}}, {"term": {"status": "obsolete"}}, {"range": {"create_time": {"gte": NOW - LOOK_BACK}}} ]} ]} }) old_alerts = Q.unique_index(old_alerts, "revision") #SUMMARIZE known_alerts = StructList() for revision in set(existing_sustained_alerts.revision): #FIND TOTAL TDAD FOR EACH INTERESTING REVISION total_tests = esq.query({ "from": "talos", "select": {"name": "count", "aggregate": "count"}, "where": {"and":[ {"terms": {"Talos.Revision": revision}} ]} }) total_exceptions = tests[(revision, )] # FILTER BY revision parts = StructList() for g, exceptions in Q.groupby(total_exceptions, ["details.Talos.Test"]): worst_in_test = Q.sort(exceptions, ["confidence", "details.diff_percent"]).last() example = worst_in_test.details # ADD SOME DATAZILLA SPECIFIC URL PARAMETERS branch = example.Talos.Branch.replace("-Non-PGO", "") example.tbpl.url.branch = TBPL_PATH.get(branch, branch) example.mercurial.url.branch = MECURIAL_PATH.get(branch, branch) example.datazilla.url.branch = example.Talos.Branch #+ ("" if worst_in_test.Talos.Branch.pgo else "-Non-PGO") example.datazilla.url.x86 = "true" if example.Talos.Platform == "x86" else "false" example.datazilla.url.x86_64 = "true" if example.Talos.Platform == "x86_64" else "false" example.datazilla.url.stop = nvl(example.push_date_max, (2*example.push_date) - example.push_date_min) num_except = len(exceptions) if num_except == 0: continue part = { "test": g.details.Talos.Test, "num_exceptions": num_except, "num_tests": total_tests, "confidence": worst_in_test.confidence, "example": example } parts.append(part) parts = Q.sort(parts, [{"field": "confidence", "sort": -1}]) worst_in_revision = parts[0].example known_alerts.append({ "status": "new", "create_time": CNV.milli2datetime(worst_in_revision.push_date), "reason": REASON, "revision": revision, "tdad_id": revision, "details": { "revision": revision, "total_tests": total_tests, "total_exceptions": len(total_exceptions), "tests": parts, "example": worst_in_revision }, "severity": SEVERITY, "confidence": worst_in_revision.result.confidence }) known_alerts = Q.unique_index(known_alerts, "revision") #NEW ALERTS, JUST INSERT new_alerts = known_alerts - old_alerts if new_alerts: for revision in new_alerts: revision.id = SQL("util.newid()") revision.last_updated = NOW db.insert_list("alerts", new_alerts) #SHOW SUSTAINED ALERTS ARE COVERED db.execute(""" INSERT INTO hierarchy (parent, child) SELECT r.id parent, p.id child FROM alerts p LEFT JOIN hierarchy h on h.child=p.id LEFT JOIN alerts r on r.revision=p.revision AND r.reason={{parent_reason}} WHERE {{where}} """, { "where": esfilter2sqlwhere(db, {"and": [ {"term": {"p.reason": talos_sustained_median.REASON}}, {"terms": {"p.revision": Q.select(existing_sustained_alerts, "revision")}}, {"missing": "h.parent"} ]}), "parent_reason": REASON }) #CURRENT ALERTS, UPDATE IF DIFFERENT for known_alert in known_alerts & old_alerts: if len(nvl(known_alert.solution, "").strip()) != 0: continue # DO NOT TOUCH SOLVED ALERTS old_alert = old_alerts[known_alert] if old_alert.status == 'obsolete' or significant_difference(known_alert.severity, old_alert.severity) or significant_difference(known_alert.confidence, old_alert.confidence): known_alert.last_updated = NOW db.update("alerts", {"id": old_alert.id}, known_alert) #OLD ALERTS, OBSOLETE for old_alert in old_alerts - known_alerts: if old_alert.status == 'obsolete': continue db.update("alerts", {"id": old_alert.id}, {"status": "obsolete", "last_updated": NOW, "details":None})