def help_send_alerts(self, to_list): try: self.setup(to_list) ######################################################################## # TEST ######################################################################## send_alerts( settings=struct.wrap({"param": {"debug": True}}), db=self.db ) ######################################################################## # VERIFY ######################################################################## emails = self.get_new_emails() # id, to, body if len(to_list) == 0: assert len(emails) == 0 return #VERIFY ONE MAIL SENT assert len(emails) == 1 #VERIFY to MATCHES WHAT WAS PASSED TO THIS FUNCTION assert set(emails[0].to) == set(to_list), "mail.delivery not matching what's send" #VERIFY last_sent IS WRITTEN alert_state = self.db.query(""" SELECT id FROM alerts WHERE reason={{reason}} AND last_sent>={{send_time}} """, { "reason": self.reason, "send_time": self.now }) expected_marked = set([d.id for d in self.test_data if CNV.JSON2object(d.details).expect == 'pass']) actual_marked = set(Q.select(alert_state, "id")) assert expected_marked == actual_marked, expand_template( "Expecting only id in {{expected}}, but instead got {{actual}}", { "expected": str(expected_marked), "actual": str(actual_marked) }) #VERIFY BODY HAS THE CORRECT ALERTS expecting_alerts = set([d.id for d in map(lambda d: CNV.JSON2object(d.details), self.test_data) if d.expect == 'pass']) actual_alerts_sent = set([ CNV.value2int(between(b, ">>>>", "<<<<")) for b in emails[0].body.split(dzAlerts.daemons.alert.SEPARATOR) if CNV.value2int(between(b, ">>>>", "<<<<")) != None ]) assert expecting_alerts == actual_alerts_sent except Exception, e: Log.error("Test failure", e)
def main(): settings = startup.read_settings() Log.start(settings.debug) try: for repo in settings.param.repos: with DB(settings.database) as db: try: pull_repo(repo) #GET LATEST DATE existing_range = db.query(""" SELECT max(`date`) `max`, min(`date`) `min`, min(revision) min_rev, max(revision) max_rev FROM changesets WHERE repo={{repo}} """, {"repo": repo.name})[0] ranges = struct.wrap([ {"min": nvl(existing_range.max, CNV.milli2datetime(0)) + timedelta(0, 1)}, {"max": existing_range.min} ]) for r in ranges: for g, docs in Q.groupby(get_changesets(date_range=r, repo=repo), size=100): for doc in docs: doc.file_changes = None doc.file_adds = None doc.file_dels = None doc.description = doc.description[0:16000] db.insert_list("changesets", docs) db.flush() missing_revisions = sql.find_holes(db, "changesets", "revision", {"term":{"repo":repo.name}}, {"min": 0, "max": existing_range.max_rev + 1}) for _range in missing_revisions: for g, docs in Q.groupby(get_changesets(revision_range=_range, repo=repo), size=100): for doc in docs: doc.file_changes = None doc.file_adds = None doc.file_dels = None doc.description = doc.description[0:16000] db.insert_list("changesets", docs) db.flush() except Exception, e: Log.warning("Failure to pull from {{repo.name}}", {"repo":repo}, e) finally: Log.stop()
def test_1(settings): test_data1 = struct.wrap({ "header": ("date", "count", "mean-std", "mean", "mean+std", "reject"), "rows": [ ("2013-Apr-05 13:55:00", "23", "655.048136994614", "668.5652173913044", "682.0822977879948"), ("2013-Apr-05 13:59:00", "23", "657.8717192954238", "673.3478260869565", "688.8239328784892"), ("2013-Apr-05 14:05:00", "23", "658.3247270429598", "673", "687.6752729570402"), ("2013-Apr-05 14:08:00", "23", "658.5476631609771", "673.6521739130435", "688.7566846651099"), ("2013-Apr-05 14:16:00", "23", "653.2311994952266", "666.1739130434783", "679.1166265917299"), ("2013-Apr-05 14:26:00", "23", "659.5613845589426", "671.8260869565217", "684.0907893541009"), ("2013-Apr-05 14:42:00", "23", "662.3517791831357", "677.1739130434783", "691.9960469038208"), ("2013-Apr-05 15:26:00", "23", "659.8270045518033", "672", "684.1729954481967"), ("2013-Apr-05 15:30:00", "23", "659.4023663187861", "674", "688.5976336812139"), ("2013-Apr-05 15:32:00", "23", "652.8643631817508", "666.9565217391304", "681.0486802965099"), ("2013-Apr-05 15:35:00", "23", "661.6037178485499", "675.1739130434783", "688.7441082384066"), ("2013-Apr-05 15:39:00", "23", "658.0124378440726", "670.1304347826087", "682.2484317211449"), ("2013-Apr-05 16:20:00", "46", "655.9645219644624", "667.4782608695652", "678.9919997746681"), ("2013-Apr-05 16:30:00", "23", "660.2572506418051", "671.8695652173913", "683.4818797929775"), ("2013-Apr-05 16:31:00", "23", "661.011102554583", "673.4347826086956", "685.8584626628083"), ("2013-Apr-05 16:55:00", "23", "655.9407699325201", "671.304347826087", "686.6679257196539"), ("2013-Apr-05 17:07:00", "23", "657.6412277100247", "667.5217391304348", "677.4022505508448"), # ("2013-Apr-05 17:12:00", "23", "598.3432138277318", "617.7391304347826", "637.1350470418334"), # <--DIP IN DATA ("2013-Apr-05 17:23:00", "23", "801.0537973113723", "822.1739130434783", "843.2940287755843", 1) # <--SPIKE IN DATA ] }) test_data1 = [ struct.wrap({ "timestamp": CNV.datetime2unix(CNV.string2datetime(t.date, "%Y-%b-%d %H:%M:%S")), "datetime": CNV.string2datetime(t.date, "%Y-%b-%d %H:%M:%S"), "count": int(t.count), "mean": float(t.mean), "variance": pow(float(t["mean+std"]) - float(t.mean), 2), "reject": t.reject }) for t in CNV.table2list(test_data1.header, test_data1.rows) ] with DB(settings.perftest) as db: tester = test_alert_exception(db) tester.test_alert_generated(settings, test_data1)
def not_test_2(settings): """ THIS WAS TESTING FOR A DECREASE IN THE MEAN, BUT THE CURRENT CODE IGNORES THOSE """ test_data2 = struct.wrap({ "header": ("timestamp", "mean", "std", "h0_rejected", "count"), "rows": [ (1366388389, 295.36, 32.89741631, 0, 25), (1366387915, 307.92, 32.86198412, 0, 25), (1366390777, 309, 41.22802445, 0, 25), (1366398771, 309.24, 34.18488945, 0, 25), (1366401499, 308.2, 30.36170834, 0, 25), (1366412504, 192.8, 46.27634385, 1, 25), # Should be an alert (1366421699, 298.04, 29.09249617, 0, 25), (1366433920, 324.52, 28.13378752, 0, 25), (1366445744, 302.2, 28.19131072, 0, 25), (1366455408, 369.96, 31.25363979, 0, 25), (1366474119, 313.12, 33.66541252, 0, 25), (1366483789, 369.96, 30.81460693, 0, 25), (1366498412, 311.76, 36.02462121, 0, 25), (1366507773, 291.08, 27.86562996, 0, 25) ] }) test_data2 = [ struct.wrap({ "timestamp": t.timestamp, "datetime": CNV.unix2datetime(t.timestamp), "count": t.count, "mean": t.mean, "variance": pow(t.std, 2), "reject": t.h0_rejected }) for t in CNV.table2list(test_data2.header, test_data2.rows) ] with DB(settings.perftest) as db: tester = test_alert_exception(db) tester.test_alert_generated(test_data2)
def _setup(self): uid = self.db.query("SELECT util.newid() uid FROM DUAL")[0].uid ## VERFIY THE alert_reason EXISTS exists = self.db.query(""" SELECT count(1) num FROM reasons WHERE code={{reason}} """, {"reason": REASON} )[0].num if exists == 0: Log.error("Expecting the database to have an alert_reason={{reason}}", {"reason": REASON}) ## MAKE A 'PAGE' TO TEST self.db.execute("DELETE FROM pages") self.db.insert("pages", { "test_id": 0, "url": self.url }) self.page_id = self.db.query("SELECT id FROM pages")[0].id ## ADD A THRESHOLD TO TEST WITH self.db.execute(""" INSERT INTO alert_page_thresholds ( id, page, threshold, severity, reason, time_added, contact ) VALUES ( {{uid}}, {{page_id}}, {{threshold}}, {{severity}}, concat("(", {{url}}, ") for test"), now(), "*****@*****.**" ) """, { "uid": uid, "url": self.url, "page_id": self.page_id, "severity": self.severity, "threshold": 800 }) ## ENSURE THERE ARE NO ALERTS IN DB self.db.execute("DELETE FROM alerts WHERE reason={{reason}}", {"reason": REASON}) ## diff_time IS REQUIRED TO TRANSLATE THE TEST DATE DATES TO SOMETHING MORE CURRENT now_time = CNV.datetime2unix(datetime.utcnow()) max_time = max([CNV.datetime2unix(CNV.string2datetime(t.date, "%Y-%b-%d %H:%M:%S")) for t in CNV.table2list(self.test_data.header, self.test_data.rows)]) diff_time = now_time - max_time ## INSERT THE TEST RESULTS for t in CNV.table2list(self.test_data.header, self.test_data.rows): time = CNV.datetime2unix(CNV.string2datetime(t.date, "%Y-%b-%d %H:%M:%S")) time += diff_time self.db.insert("test_data_all_dimensions", { "id": SQL("util.newid()"), "test_run_id": SQL("util.newid()"), "product_id": 0, "operating_system_id": 0, "test_id": 0, "page_id": self.page_id, "date_received": time, "revision": "ba928cbd5191", "product": "Firefox", "branch": "Mozilla-Inbound", "branch_version": "23.0a1", "operating_system_name": "mac", "operating_system_version": "OS X 10.8", "processor": "x86_64", "build_type": "opt", "machine_name": "talos-mtnlion-r5-049", "pushlog_id": 19998363, "push_date": time, "test_name": "tp5o", "page_url": self.url, "mean": float(t.mean), "std": float(t["mean+std"]) - float(t.mean), "h0_rejected": 0, "p": None, "n_replicates": t.count, "fdr": 0, "trend_mean": None, "trend_std": None, "test_evaluation": 0, "status": 1 })
def send_alerts(settings, db): """ BLINDLY SENDS ALERTS FROM THE ALERTS TABLE, ASSUMING ALL HAVE THE SAME STRUCTURE. """ debug = settings.param.debug db.debug = debug #TODO: REMOVE, LEAVE IN DB if db.debug: db.execute("update reasons set email_subject={{subject}}, email_template={{template}} where code={{reason}}", { "template": CNV.object2JSON(TEMPLATE), "subject": CNV.object2JSON(SUBJECT), "reason": REASON }) db.flush() try: new_alerts = db.query(""" SELECT a.id alert_id, a.reason, r.description, a.details, a.severity, a.confidence, a.revision, r.email_template, r.email_subject FROM alerts a JOIN reasons r on r.code = a.reason WHERE a.last_sent IS NULL AND a.status <> 'obsolete' AND math.bayesian_add(a.severity, a.confidence) > {{alert_limit}} AND a.solution IS NULL AND a.reason in {{reasons}} AND a.create_time > {{min_time}} ORDER BY math.bayesian_add(a.severity, a.confidence) DESC, json.number(left(details, 65000), "diff_percent") DESC LIMIT 10 """, { "last_sent": datetime.utcnow() - RESEND_AFTER, "alert_limit": ALERT_LIMIT - EPSILON, "min_time": datetime.utcnow()-LOOK_BACK, "reasons": SQL("("+", ".join(db.quote_value(v) for v in SEND_REASONS)+")") }) if not new_alerts: if debug: Log.note("Nothing important to email") return for alert in new_alerts: #poor souls that signed up for emails listeners = ";".join(db.query("SELECT email FROM listeners WHERE reason={{reason}}", {"reason": alert.reason}).email) body = [HEADER] if alert.confidence >= 1: alert.confidence = 0.999999 alert.details = CNV.JSON2object(alert.details) try: alert.revision = CNV.JSON2object(alert.revision) except Exception, e: pass alert.score = str(-log(1.0-Math.bayesian_add(alert.severity, alert.confidence), 10)) #SHOW NUMBER OF NINES alert.details.url = alert.details.page_url example = alert.details.example for e in alert.details.tests.example + [example]: if e.push_date_min: e.push_date_max = (2 * e.push_date) - e.push_date_min e.date_range = (datetime.utcnow()-CNV.milli2datetime(e.push_date_min)).total_seconds()/(24*60*60) #REQUIRED FOR DATAZILLA B2G CHART REFERENCE e.date_range = nvl(nvl(*[v for v in (7, 30, 60) if v > e.date_range]), 90) #PICK FIRST v > CURRENT VALUE subject = expand_template(CNV.JSON2object(alert.email_subject), alert) body.append(expand_template(CNV.JSON2object(alert.email_template), alert)) body = "".join(body)+FOOTER if debug: Log.note("EMAIL: {{email}}", {"email": body}) if len(body) > MAX_EMAIL_LENGTH: Log.note("Truncated the email body") suffix = "... (has been truncated)" body = body[0:MAX_EMAIL_LENGTH - len(suffix)] + suffix #keep it reasonable db.call("mail.send", ( listeners, #to subject, body, #body None )) #I HOPE I CAN SEND ARRAYS OF NUMBERS db.execute( "UPDATE alerts SET last_sent={{time}} WHERE {{where}}", { "time": datetime.utcnow(), "where": esfilter2sqlwhere(db, {"terms": {"id": Q.select(new_alerts, "alert_id")}}) }) except Exception, e: Log.error("Could not send alerts", e)
def get_changesets(date_range=None, revision_range=None, repo=None): if date_range is not None: if date_range.max == None: if date_range.min == None: drange = ">0 0" else: drange = ">" + unicode(CNV.datetime2unix(date_range.min)) + " 0" else: if date_range.min == None: drange = "<" + unicode(CNV.datetime2unix(date_range.max) - 1) + " 0" else: drange = unicode(CNV.datetime2unix(date_range.min)) + " 0 to " + unicode( CNV.datetime2unix(date_range.max) - 1) + " 0" #GET ALL CHANGESET INFO args = [ "hg", "log", "--cwd", File(repo.directory).filename, "-v", # "-p", #TO GET PATCH CONTENTS "--style", TEMPLATE_FILE.filename ] if date_range is not None: args.extend(["--date", drange]) elif revision_range is not None: args.extend(["-r", str(revision_range.min)+":"+str(revision_range.max)]) proc = subprocess.Popen( args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1 ) def iterator(): try: while True: try: line = proc.stdout.readline() if line == '': proc.wait() if proc.returncode: Log.error("Unable to pull hg log: return code {{return_code}}", { "return_code": proc.returncode }) return except Exception, e: Log.error("Problem getting another line", e) if line.strip() == "": continue Log.note(line) # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n" # branch = "{branch}%0A" # parent = "{parent}%0A" # tag = "{tag}%0A" # child = "{child}%0A" ( date, node, rev, author, branches, files, file_adds, file_dels, p1rev, p1node, parents, children, tags, desc ) = (CNV.latin12unicode(urllib.unquote(c)) for c in line.split("\t")) file_adds = set(file_adds.split("\n")) - {""} file_dels = set(file_dels.split("\n")) - {""} files = set(files.split("\n")) - set() doc = { "repo": repo.name, "date": CNV.unix2datetime(CNV.value2number(date.split(" ")[0])), "node": node, "revision": rev, "author": author, "branches": set(branches.split("\n")) - {""}, "file_changes": files - file_adds - file_dels - {""}, "file_adds": file_adds, "file_dels": file_dels, "parents": set(parents.split("\n")) - {""} | {p1rev+":"+p1node}, "children": set(children.split("\n")) - {""}, "tags": set(tags.split("\n")) - {""}, "description": desc } doc = ElasticSearch.scrub(doc) yield doc except Exception, e: if isinstance(e, ValueError) and e.message.startswith("need more than "): Log.error("Problem iterating through log ({{message}})", { "message": line }, e) Log.error("Problem iterating through log", e)
def iterator(): try: while True: try: line = proc.stdout.readline() if line == '': proc.wait() if proc.returncode: Log.error("Unable to pull hg log: return code {{return_code}}", { "return_code": proc.returncode }) return except Exception, e: Log.error("Problem getting another line", e) if line.strip() == "": continue Log.note(line) # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n" # branch = "{branch}%0A" # parent = "{parent}%0A" # tag = "{tag}%0A" # child = "{child}%0A" ( date, node, rev, author, branches, files, file_adds, file_dels, p1rev, p1node, parents, children, tags, desc ) = (CNV.latin12unicode(urllib.unquote(c)) for c in line.split("\t")) file_adds = set(file_adds.split("\n")) - {""} file_dels = set(file_dels.split("\n")) - {""} files = set(files.split("\n")) - set() doc = { "repo": repo.name, "date": CNV.unix2datetime(CNV.value2number(date.split(" ")[0])), "node": node, "revision": rev, "author": author, "branches": set(branches.split("\n")) - {""}, "file_changes": files - file_adds - file_dels - {""}, "file_adds": file_adds, "file_dels": file_dels, "parents": set(parents.split("\n")) - {""} | {p1rev+":"+p1node}, "children": set(children.split("\n")) - {""}, "tags": set(tags.split("\n")) - {""}, "description": desc } doc = ElasticSearch.scrub(doc) yield doc except Exception, e: if isinstance(e, ValueError) and e.message.startswith("need more than "): Log.error("Problem iterating through log ({{message}})", { "message": line }, e) Log.error("Problem iterating through log", e)
def insert_test_results(self, test_data): ## diff_time IS REQUIRED TO TRANSLATE THE TEST DATE DATES TO SOMETHING MORE CURRENT now_time = CNV.datetime2unix(datetime.utcnow()) max_time = max(Q.select(test_data, "timestamp")) diff_time = now_time - max_time ## INSERT THE TEST RESULTS for t in test_data: time = t.timestamp time += diff_time self.db.insert("test_data_all_dimensions", { "id": SQL("util.newid()"), "test_run_id": SQL("util.newid()"), "product_id": 0, "operating_system_id": 0, "test_id": 0, "page_id": self.page_id, "date_received": time, "revision": "ba928cbd5191", "product": "Firefox", "branch": "Mozilla-Inbound", "branch_version": "23.0a1", "operating_system_name": "mac", "operating_system_version": "OS X 10.8", "processor": "x86_64", "build_type": "opt", "machine_name": "talos-mtnlion-r5-049", "pushlog_id": 19998363, "push_date": time, "test_name": "tp5o", "page_url": self.url, "mean": float(t.mean), "std": sqrt(t.variance), "h0_rejected": 0, "p": None, "n_replicates": t.count, "fdr": 0, "trend_mean": None, "trend_std": None, "test_evaluation": 0, "status": 1 }) self.db.flush() self.db.execute(""" INSERT INTO objectstore (id, test_run_id, date_loaded, processed_exception, branch, json_blob) SELECT {{id}}, test_run_id, {{now}}, 'complete', branch, '{}' FROM test_data_all_dimensions GROUP BY test_run_id """, { "id": SQL("util.newid()"), "now": CNV.datetime2unix(datetime.utcnow()) })
def alert_sustained_median(settings, qb, alerts_db): """ find single points that deviate from the trend """ # OBJECTSTORE = settings.objectstore.schema + ".objectstore" # TDAD = settings.perftest.schema + ".test_data_all_dimensions" TDAD = settings.query["from"] PUSH_DATE = "datazilla.date_loaded" debug = nvl(settings.param.debug, DEBUG) query = settings.query def is_bad(r): if settings.param.sustained_median.trigger < r.result.confidence: test_param = nvl(settings.param.test[literal_field(r.Talos.Test.name)], settings.param.suite[literal_field(r.Talos.Test.suite)]) if test_param == None: return True if test_param.disable: return False if test_param.better == "higher": diff = -r.diff elif test_param.better == "lower": diff = r.diff else: diff = abs(r.diff) # DEFAULT = ANY DIRECTION IS BAD if test_param.min_regression: if unicode(test_param.min_regression.strip()[-1]) == "%": min_diff = Math.abs(r.past_stats.mean * float(test_param.min_regression.strip()[:-1]) / 100.0) else: min_diff = Math.abs(float(test_param.min_regression)) else: min_diff = Math.abs(r.past_stats.mean * 0.01) if diff > min_diff: return True return False with Timer("pull combinations"): disabled_suites = [s for s, p in settings.param.suite.items() if p.disable] disabled_tests = [t for t, p in settings.param.test.items() if p.disable] temp = Query({ "from": TDAD, "select": {"name": "min_push_date", "value": PUSH_DATE, "aggregate": "min"}, "edges": query.edges, "where": {"and": [ True if settings.args.restart else {"missing": {"field": settings.param.mark_complete}}, {"exists": {"field": "result.test_name"}}, {"range": {PUSH_DATE: {"gte": OLDEST_TS}}}, {"not": {"terms": {"Talos.Test.suite": disabled_suites}}}, {"not": {"terms": {"Talos.Test.name": disabled_tests}}} # {"term": {"testrun.suite": "cart"}}, # {"term": {"result.test_name": "1-customize-enter.error.TART"}}, # {"term": {"test_machine.osversion": "OS X 10.8"}} #FOR DEBUGGING SPECIFIC SERIES # {"term": {"test_machine.type": "hamachi"}}, # {"term": {"test_machine.platform": "Gonk"}}, # {"term": {"test_machine.os": "Firefox OS"}}, # {"term": {"test_build.branch": "master"}}, # {"term": {"testrun.suite": "communications/ftu"}}, # {"term": {"result.test_name": "startup_time"}} ]}, "limit": nvl(settings.param.combo_limit, 1000) }, qb) new_test_points = qb.query(temp) #BRING IN ALL NEEDED DATA if debug: Log.note("Pull all data for {{num}} groups:\n{{groups.name}}", { "num": len(new_test_points), "groups": query.edges }) # all_min_date = Null all_touched = set() evaled_tests = set() alerts = [] # PUT ALL THE EXCEPTION ITEMS HERE for g, test_points in Q.groupby(new_test_points, query.edges): if not test_points.min_push_date: continue try: if settings.args.restart: first_sample = OLDEST_TS else: first_sample = MAX(MIN(test_points.min_push_date), OLDEST_TS) # FOR THIS g, HOW FAR BACK IN TIME MUST WE GO TO COVER OUR WINDOW_SIZE? first_in_window = qb.query({ "select": {"name": "min_date", "value": "push_date", "aggregate": "min"}, "from": { "from": TDAD, "select": {"name": "push_date", "value": PUSH_DATE}, "where": {"and": [ {"term": g}, {"range": {PUSH_DATE: {"lt": first_sample}}} ]}, "sort": {"field": PUSH_DATE, "sort": -1}, "limit": settings.param.sustained_median.window_size * 2 } }) if len(first_in_window) > settings.param.sustained_median.window_size * 2: do_all = False else: do_all = True min_date = MIN(first_sample, first_in_window.min_date) #LOAD TEST RESULTS FROM DATABASE test_results = qb.query({ "from": { "from": "talos", "select": [{"name": "push_date", "value": PUSH_DATE}] + query.select + query.edges, "where": {"and": [ {"term": g}, {"range": {PUSH_DATE: {"gte": min_date}}} ]}, }, "sort": "push_date" }) Log.note("{{num}} test results found for {{group}} dating back no further than {{start_date}}", { "num": len(test_results), "group": g, "start_date": CNV.milli2datetime(min_date) }) if debug: Log.note("Find sustained_median exceptions") #APPLY WINDOW FUNCTIONS stats = Q.run({ "from": { "from": test_results, "where": {"exists": {"field": "value"}} }, "window": [ { # WE DO NOT WANT TO CONSIDER THE POINTS BEFORE FULL WINDOW SIZE "name": "ignored", "value": lambda r, i: False if do_all or i > settings.param.sustained_median.window_size else True }, { # SO WE CAN SHOW A DATAZILLA WINDOW "name": "push_date_min", "value": lambda r: r.push_date, "sort": "push_date", "aggregate": windows.Min, "range": {"min": -settings.param.sustained_median.window_size, "max": 0} }, { # SO WE CAN SHOW A DATAZILLA WINDOW "name": "push_date_max", "value": lambda r: r.push_date, "sort": "push_date", "aggregate": windows.Max, "range": {"min": 0, "max": settings.param.sustained_median.window_size} }, { "name": "past_revision", "value": lambda r, i, rows: rows[i - 1].Talos.Revision, "sort": "push_date" }, { "name": "past_stats", "value": lambda r: r.value, "sort": "push_date", "aggregate": windows.Stats(middle=0.60), "range": {"min": -settings.param.sustained_median.window_size, "max": 0} }, { "name": "future_stats", "value": lambda r: r.value, "sort": "push_date", "aggregate": windows.Stats(middle=0.60), "range": {"min": 0, "max": settings.param.sustained_median.window_size} }, { "name": "result", "value": lambda r, i, rows: median_test( rows[-settings.param.sustained_median.window_size + i:i:].value, rows[i:settings.param.sustained_median.window_size + i:].value, interpolate=False ), "sort": "push_date" }, { "name": "diff", "value": lambda r: r.future_stats.mean - r.past_stats.mean }, { "name": "diff_percent", "value": lambda r: (r.future_stats.mean - r.past_stats.mean) / r.past_stats.mean }, { "name": "is_diff", "value": is_bad }, { #USE THIS TO FILL CONFIDENCE HOLES #WE CAN MARK IT is_diff KNOWING THERE IS A HIGHER CONFIDENCE "name": "future_is_diff", "value": lambda r, i, rows: rows[i - 1].is_diff and r.result.confidence < rows[i - 1].result.confidence, "sort": "push_date" }, { #WE CAN MARK IT is_diff KNOWING THERE IS A HIGHER CONFIDENCE "name": "past_is_diff", "value": lambda r, i, rows: rows[i - 1].is_diff and r.result.confidence < rows[i - 1].result.confidence, "sort": {"value": "push_date", "sort": -1} }, ] }) #PICK THE BEST SCORE FOR EACH is_diff==True REGION for g2, data in Q.groupby(stats, "is_diff", contiguous=True): if g2.is_diff: best = Q.sort(data, ["result.confidence", "diff"]).last() best["pass"] = True all_touched.update(Q.select(test_results, ["test_run_id", "Talos.Test"])) # TESTS THAT HAVE BEEN (RE)EVALUATED GIVEN THE NEW INFORMATION evaled_tests.update(Q.run({ "from": test_results, "select": ["test_run_id", "Talos.Test"], "where": {"term": {"ignored": False}} })) File("test_values.txt").write(CNV.list2tab(Q.select(stats, [ {"name": "push_date", "value": lambda x: CNV.datetime2string(CNV.milli2datetime(x.push_date), "%d-%b-%Y %H:%M:%S")}, "value", {"name": "revision", "value": "Talos.Revision"}, {"name": "confidence", "value": "result.confidence"}, "pass" ]))) #TESTS THAT HAVE SHOWN THEMSELVES TO BE EXCEPTIONAL new_exceptions = Q.filter(stats, {"term": {"pass": True}}) for v in new_exceptions: if v.ignored: continue alert = Struct( status="new", create_time=CNV.milli2datetime(v.push_date), tdad_id={"test_run_id": v.test_run_id, "Talos": {"Test": v.Talos.Test}}, reason=REASON, revision=v.Talos.Revision, details=v, severity=SEVERITY, confidence=v.result.confidence ) alerts.append(alert) if debug: Log.note("{{num}} new exceptions found", {"num": len(new_exceptions)}) except Exception, e: Log.warning("Problem with alert identification, continue to log existing alerts and stop cleanly", e)
from dzAlerts.util.queries.query import Query from dzAlerts.util.struct import nvl, StructList, literal_field, unwrap from dzAlerts.util.sql.db import SQL from dzAlerts.util.env.logs import Log from dzAlerts.util.struct import Struct from dzAlerts.util.queries import Q from dzAlerts.util.sql.db import DB from dzAlerts.util.times.timer import Timer SEVERITY = 0.8 # THERE ARE MANY FALSE POSITIVES (0.99 == positive indicator, 0.5==not an indicator, 0.01 == negative indicator) # MIN_CONFIDENCE = 0.9999 REASON = "talos_alert_sustained_median" # name of the reason in alert_reason NOW = datetime.utcnow() MAX_AGE = timedelta(days=90) OLDEST_TS = CNV.datetime2milli(NOW - MAX_AGE) TEMPLATE = """<div><h3>{{score}} - {{reason}}</h3><br> On page {{page_url}}<br> <a href=\"https://tbpl.mozilla.org/?tree={{branch}}&rev={{revision}}\">TBPL</a><br> <a href=\"https://hg.mozilla.org/rev/{{revision}}\">Mercurial</a><br> <a href=\"https://bugzilla.mozilla.org/show_bug.cgi?id={{bug_id}}\">Bugzilla - {{bug_description}}</a><br> <a href=\"https://datazilla.mozilla.org/?start={{push_date_min}}&stop={{push_date_max}}&product={{product}}&repository={{branch}}&os={{operating_system_name}}&os_version={{operating_system_version}}&test={{test_name}}&graph_search={{revision}}&error_bars=false&project=talos\">Datazilla</a><br> <a href=\"http://people.mozilla.com/~klahnakoski/test/es/DZ-ShowPage.html#page={{page_url}}&sampleMax={{push_date}}000&sampleMin={{push_date_min}}000&branch={{branch}}\">Kyle's ES</a><br> Raw data: {{details}} </div>""" DEBUG = True def alert_sustained_median(settings, qb, alerts_db):
def page_threshold_limit(db, debug): """ simplest of rules to test the dataflow from test_run, to alert, to email may prove slightly useful also! #point out any pages that are breaking human-set threshold limits """ db.debug = debug try: # CALCULATE HOW FAR BACK TO LOOK lasttime = db.query("SELECT last_run, description FROM reasons WHERE code={{type}}", {"type": REASON})[0] lasttime = nvl(lasttime.last_run, datetime.utcnow()) min_date = lasttime + LOOK_BACK # FIND ALL PAGES THAT HAVE LIMITS TO TEST # BRING BACK ONES THAT BREAK LIMITS # BUT DO NOT ALREADY HAVE AN ALERTS EXISTING pages = db.query( """ SELECT t.id tdad_id, t.n_replicates, t.mean, t.std, h.threshold, h.severity, h.reason, m.id alert_id FROM alert_page_thresholds h JOIN test_data_all_dimensions t ON t.page_id=h.page LEFT JOIN alerts m on m.tdad_id=t.test_run_id AND m.reason={{type}} WHERE h.threshold<t.mean AND t.push_date>{{min_date}} AND (m.id IS NULL OR m.status='obsol11ete') """, {"type": REASON, "min_date": min_date}, ) # FOR EACH PAGE THAT BREAKS LIMITS for page in pages: if page.alert_id != None: break alert = { "id": SQL("util.newid()"), "status": "new", "create_time": datetime.utcnow(), "last_updated": datetime.utcnow(), "tdad_id": page.tdad_id, "reason": REASON, "details": CNV.object2JSON( {"expected": float(page.threshold), "actual": float(page.mean), "reason": page.reason} ), "severity": page.severity, "confidence": 1.0 # USING NORMAL DIST ASSUMPTION WE CAN ADJUST # CONFIDENCE EVEN BEFORE THRESHOLD IS HIT! # FOR NOW WE KEEP IT SIMPLE } db.insert("alerts", alert) for page in pages: if page.alert_id == None: break db.update("alerts", None) # ERROR FOR NOW # OBSOLETE THE ALERTS THAT SHOULD NO LONGER GET SENT obsolete = db.query( """ SELECT m.id, m.tdad_id FROM alerts m JOIN test_data_all_dimensions t ON m.tdad_id=t.id JOIN alert_page_thresholds h on t.page_id=h.page WHERE m.reason={{reason}} AND h.threshold>=t.mean AND t.push_date>{{time}} """, {"reason": REASON, "time": min_date}, ) if obsolete: db.execute( "UPDATE alerts SET status='obsolete' WHERE {{where}}", {"where": esfilter2sqlwhere(db, {"terms": {"id": Q.select(obsolete, "id")}})}, ) db.execute( "UPDATE reasons SET last_run={{now}} WHERE code={{reason}}", {"now": datetime.utcnow(), "reason": REASON} ) except Exception, e: Log.error("Could not perform threshold comparisons", e)
def talos_alert_revision(settings): assert settings.alerts != None settings.db.debug = settings.param.debug with DB(settings.alerts) as db: with ESQuery(ElasticSearch(settings.query["from"])) as esq: dbq = DBQuery(db) esq.addDimension(CNV.JSON2object(File(settings.dimension.filename).read())) #TODO: REMOVE, LEAVE IN DB if db.debug: db.execute("update reasons set email_subject={{subject}}, email_template={{template}} where code={{reason}}", { "template": CNV.object2JSON(TEMPLATE), "subject": CNV.object2JSON(SUBJECT), "reason": REASON }) db.flush() #EXISTING SUSTAINED EXCEPTIONS existing_sustained_alerts = dbq.query({ "from": "alerts", "select": "*", "where": {"and": [ {"term": {"reason": talos_sustained_median.REASON}}, {"not": {"term": {"status": "obsolete"}}}, {"range": {"create_time": {"gte": NOW - LOOK_BACK}}} ]} }) tests = Q.index(existing_sustained_alerts, ["revision", "details.Talos.Test"]) #EXISTING REVISION-LEVEL ALERTS old_alerts = dbq.query({ "from": "alerts", "select": "*", "where": {"and": [ {"term": {"reason": REASON}}, {"or": [ {"terms": {"revision": set(existing_sustained_alerts.revision)}}, {"term": {"reason": talos_sustained_median.REASON}}, {"term": {"status": "obsolete"}}, {"range": {"create_time": {"gte": NOW - LOOK_BACK}}} ]} ]} }) old_alerts = Q.unique_index(old_alerts, "revision") #SUMMARIZE known_alerts = StructList() for revision in set(existing_sustained_alerts.revision): #FIND TOTAL TDAD FOR EACH INTERESTING REVISION total_tests = esq.query({ "from": "talos", "select": {"name": "count", "aggregate": "count"}, "where": {"and":[ {"terms": {"Talos.Revision": revision}} ]} }) total_exceptions = tests[(revision, )] # FILTER BY revision parts = StructList() for g, exceptions in Q.groupby(total_exceptions, ["details.Talos.Test"]): worst_in_test = Q.sort(exceptions, ["confidence", "details.diff_percent"]).last() example = worst_in_test.details # ADD SOME DATAZILLA SPECIFIC URL PARAMETERS branch = example.Talos.Branch.replace("-Non-PGO", "") example.tbpl.url.branch = TBPL_PATH.get(branch, branch) example.mercurial.url.branch = MECURIAL_PATH.get(branch, branch) example.datazilla.url.branch = example.Talos.Branch #+ ("" if worst_in_test.Talos.Branch.pgo else "-Non-PGO") example.datazilla.url.x86 = "true" if example.Talos.Platform == "x86" else "false" example.datazilla.url.x86_64 = "true" if example.Talos.Platform == "x86_64" else "false" example.datazilla.url.stop = nvl(example.push_date_max, (2*example.push_date) - example.push_date_min) num_except = len(exceptions) if num_except == 0: continue part = { "test": g.details.Talos.Test, "num_exceptions": num_except, "num_tests": total_tests, "confidence": worst_in_test.confidence, "example": example } parts.append(part) parts = Q.sort(parts, [{"field": "confidence", "sort": -1}]) worst_in_revision = parts[0].example known_alerts.append({ "status": "new", "create_time": CNV.milli2datetime(worst_in_revision.push_date), "reason": REASON, "revision": revision, "tdad_id": revision, "details": { "revision": revision, "total_tests": total_tests, "total_exceptions": len(total_exceptions), "tests": parts, "example": worst_in_revision }, "severity": SEVERITY, "confidence": worst_in_revision.result.confidence }) known_alerts = Q.unique_index(known_alerts, "revision") #NEW ALERTS, JUST INSERT new_alerts = known_alerts - old_alerts if new_alerts: for revision in new_alerts: revision.id = SQL("util.newid()") revision.last_updated = NOW db.insert_list("alerts", new_alerts) #SHOW SUSTAINED ALERTS ARE COVERED db.execute(""" INSERT INTO hierarchy (parent, child) SELECT r.id parent, p.id child FROM alerts p LEFT JOIN hierarchy h on h.child=p.id LEFT JOIN alerts r on r.revision=p.revision AND r.reason={{parent_reason}} WHERE {{where}} """, { "where": esfilter2sqlwhere(db, {"and": [ {"term": {"p.reason": talos_sustained_median.REASON}}, {"terms": {"p.revision": Q.select(existing_sustained_alerts, "revision")}}, {"missing": "h.parent"} ]}), "parent_reason": REASON }) #CURRENT ALERTS, UPDATE IF DIFFERENT for known_alert in known_alerts & old_alerts: if len(nvl(known_alert.solution, "").strip()) != 0: continue # DO NOT TOUCH SOLVED ALERTS old_alert = old_alerts[known_alert] if old_alert.status == 'obsolete' or significant_difference(known_alert.severity, old_alert.severity) or significant_difference(known_alert.confidence, old_alert.confidence): known_alert.last_updated = NOW db.update("alerts", {"id": old_alert.id}, known_alert) #OLD ALERTS, OBSOLETE for old_alert in old_alerts - known_alerts: if old_alert.status == 'obsolete': continue db.update("alerts", {"id": old_alert.id}, {"status": "obsolete", "last_updated": NOW, "details":None})
def setup(self, to_list): self.uid = self.db.query("SELECT util.newid() uid FROM DUAL")[0].uid #CLEAR EMAILS self.db.execute("DELETE FROM mail.delivery") self.db.execute("DELETE FROM mail.attachment") self.db.execute("DELETE FROM mail.content") #TEST NUMBER OF LISTENERS IN listeners TABLE self.db.execute("DELETE FROM listeners") for l in to_list: self.db.insert("listeners", {"email": l}) #MAKE A REASON FOR USE IN THIS TESTING self.db.execute("DELETE FROM alerts WHERE reason={{reason}}", {"reason": self.reason}) self.db.execute("DELETE FROM reasons WHERE code={{reason}}", {"reason": self.reason}) self.db.insert("reasons", { "code": self.reason, "description": ">>>>{{id}}<<<<", #SPECIAL PATTERN TO DISTINGUISH BETWEEN RESULTING MAILS "config": None, "last_run": self.now - timedelta(days=1) }) #MAKE SOME TEST DATA (AND GET ID) all_dim = struct.wrap({ "header": ("id", "test_run_id", "product_id", "operating_system_id", "test_id", "page_id", "date_received", "revision", "product", "branch", "branch_version", "operating_system_name", "operating_system_version", "processor", "build_type", "machine_name", "pushlog_id", "push_date", "test_name", "page_url", "mean", "std", "h0_rejected", "p", "n_replicates", "fdr", "trend_mean", "trend_std", "test_evaluation", "status"), "data": [ (0, 117679, 65, 20, 64, 860, 1366261267, "d6b34be6fb4c", "Firefox", "Mozilla-Inbound", "23.0a1", "win", "6.2.9200", "x86_64", "opt", "t-w864-ix-022", "19801727", "1366245741", "tp5o", "bbc.co.uk", 138.8, 40.5257120028, 0, 0.650194865224, 25, 0, 144.37333333365, 12.96130778322, 1, 1) ]}) self.db.insert_list("test_data_all_dimensions", CNV.table2list(all_dim.header, all_dim.data)) self.series = self.db.query("SELECT min(id) id FROM test_data_all_dimensions")[0].id # WE INJECT THE EXPECTED TEST RESULTS RIGHT INTO THE DETAILS, THAT WAY # WE CAN SEE THEM IN THE EMAIL DELIVERED test_data = struct.wrap({ "header": ("id", "status", "create_time", "last_updated", "last_sent", "tdad_id", "reason", "details", "severity", "confidence", "solution"), "data": [ #TEST last_sent IS NOT TOO YOUNG (self.uid + 0, "new", self.far_past, self.far_past, self.recent_past, self.series, self.reason, CNV.object2JSON({"id": 0, "expect": "fail"}), self.high_severity, self.high_confidence, None), #TEST last_sent IS TOO OLD, SHOULD BE (RE)SENT (self.uid + 1, "new", self.far_past, self.now, None, self.series, self.reason, CNV.object2JSON({"id": 1, "expect": "pass"}), self.high_severity, self.high_confidence, None), (self.uid + 2, "new", self.far_past, self.now, self.far_past, self.series, self.reason, CNV.object2JSON({"id": 2, "expect": "pass"}), self.high_severity, self.high_confidence, None), (self.uid + 3, "new", self.now, self.now, self.recent_past, self.series, self.reason, CNV.object2JSON({"id": 3, "expect": "pass"}), self.high_severity, self.high_confidence, None), #TEST obsolete ARE NOT SENT (self.uid + 4, "obsolete", self.now, self.now, self.far_past, self.series, self.reason, CNV.object2JSON({"id": 4, "expect": "fail"}), self.high_severity, self.high_confidence, None), #TEST ONLY IMPORTANT ARE SENT (self.uid + 5, "new", self.now, self.now, None, self.series, self.reason, CNV.object2JSON({"id": 5, "expect": "pass"}), self.important, 0.5, None), (self.uid + 6, "new", self.now, self.now, None, self.series, self.reason, CNV.object2JSON({"id": 6, "expect": "fail"}), self.low_severity, self.high_confidence, None), (self.uid + 7, "new", self.now, self.now, None, self.series, self.reason, CNV.object2JSON({"id": 7, "expect": "fail"}), self.high_severity, self.low_confidence, None), #TEST ONES WITH SOLUTION ARE NOT SENT (self.uid + 8, "new", self.now, self.now, None, self.series, self.reason, CNV.object2JSON({"id": 8, "expect": "fail"}), self.high_severity, self.high_confidence, "a solution!") ] }) self.test_data = CNV.table2list(test_data.header, test_data.data) self.db.insert_list("alerts", self.test_data)