def main(): settings = startup.read_settings() Log.start(settings.debug) try: Log.note("Summarize by revision {{schema}}", {"schema": settings.perftest.schema}) talos_alert_revision(settings) finally: Log.stop()
def pull_repo(repo): if not File(os.path.join(repo.directory, ".hg")).exists: File(repo.directory).delete() #REPO DOES NOT EXIST, CLONE IT with Timer("Clone hg log for {{name}}", {"name":repo.name}): proc = subprocess.Popen( ["hg", "clone", repo.url, File(repo.directory).filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1 ) try: while True: line = proc.stdout.readline() if line.startswith("abort:"): Log.error("Can not clone {{repo.url}}, because {{problem}}", { "repo": repo, "problem": line }) if line == '': break Log.note("Mercurial cloning: {{status}}", {"status": line}) finally: proc.wait() else: hgrc_file = File(os.path.join(repo.directory, ".hg", "hgrc")) if not hgrc_file.exists: hgrc_file.write("[paths]\ndefault = " + repo.url + "\n") #REPO EXISTS, PULL TO UPDATE with Timer("Pull hg log for {{name}}", {"name":repo.name}): proc = subprocess.Popen( ["hg", "pull", "--cwd", File(repo.directory).filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1 ) (output, _) = proc.communicate() if output.find("abort: repository default not found!") >= 0: File(repo.directory).delete() pull_repo(repo) return if output.find("abort: abandoned transaction found") >= 0: Log.error("Problem pulling repo, try \"hg recover\"\n{{reason|indent}}", {"reason": output}) File(repo.directory).delete() pull_repo(repo) return if output.find("abort: ") >= 0: Log.error("Problem with pull {{reason}}", {"reason": between(output, "abort:", "\n")}) Log.note("Mercurial pull results:\n{{pull_results}}", {"pull_results": output})
def calc_safe(good, bad, c): fr=confident_fail_rate(good, bad, c) Log.note("range(good={{good}}, bad={{bad}}, confidence={{confidence}}) = {{safe}}", { "good": good, "bad": bad, "confidence": c, "safe": fr }) return fr
def main(): settings = startup.read_settings("test_settings.json") Log.start(settings.debug) try: test_a_fail_b_pass() test_a_pass_b_fail() test_a_fail_b_fail() test_safe_combinations() Log.note("SUCCESS!!") finally: Log.stop()
def email_send(db, emailer, debug): db.debug = debug ##VERIFY self SHOULD BE THE ONE PERFORMING OPS (TO PREVENT MULTIPLE INSTANCES NEEDLESSLY RUNNING) try: ## EXIT EARLY IF THERE ARE NO EMAILS TO SEND has_mail = db.query("SELECT max(new_mail) new_mail FROM mail.notify") if has_mail[0]["new_mail"] == 0: Log.note("No emails to send") return ## GET LIST OF MAILS TO SEND emails = db.query(""" SELECT c.id, group_concat(d.deliver_to SEPARATOR ',') `to`, c.subject, c.body FROM mail.content c LEFT JOIN mail.delivery d ON d.content=c.id WHERE d.content IS NOT NULL AND c.date_sent IS NULL GROUP BY c.id """) ## SEND MAILS not_done = 0 ##SET TO ONE IF THERE ARE MAIL FAILURES, AND THERE ARE MAILS STILL LEFT TO SEND num_done = 0 for email in emails: try: emailer.send_email( to_addrs=email.to.split(','), subject=email.subject, html_data=email.body ) db.execute("UPDATE mail.content SET date_sent={{now}} WHERE id={{id}}", {"id": email.id, "now": datetime.utcnow()}) db.flush() num_done += len(email.to.split(',')) except Exception, e: Log.warning("Problem sending email", e) not_done = 1 db.execute("UPDATE mail.notify SET new_mail={{not_done}}", {"not_done": not_done}) Log.note(str(num_done) + " emails have been sent")
def main(): settings = startup.read_settings() Log.start(settings.debug) try: Log.note("Running email using schema {{schema}}", {"schema": settings.perftest.schema}) with DB(settings.alerts) as db: email_send( db=db, emailer=Emailer(settings.email), debug=nvl(settings.debug, False) ) except Exception, e: Log.warning("Failure to send emails", cause=e)
def make_test_database(settings): try: settings.perftest.debug = True no_schema = settings.perftest.copy() no_schema.schema = "" Log.note("CLEAR DATABASE {{database}}", {"database": settings.perftest.schema}) with DB(no_schema) as db: db.execute("DROP DATABASE IF EXISTS " + settings.perftest.schema) db.flush() db.execute("CREATE DATABASE " + settings.perftest.schema) #TEMPLATE HAS {engine} TAG THAT MUST BE REPLACED Log.note("BUILD NEW DATABASE {{database}}", {"database": settings.perftest.schema}) DB.execute_file(settings.perftest, "tests/resources/sql/schema_perftest.sql") DB.execute_file(settings.perftest, "tests/resources/sql/Add test_data_all_dimensions.sql") Log.note("MIGRATE {{database}} TO NEW SCHEMA", {"database": settings.perftest.schema}) DB.execute_file(settings.perftest, "resources/migration/alerts.sql") DB.execute_file(settings.perftest, "resources/migration/v1.2 email.sql") with DB(settings.perftest) as db: db.execute("ALTER TABLE test_data_all_dimensions DROP FOREIGN KEY `fk_test_run_id_tdad`") db.execute("ALTER TABLE pages DROP FOREIGN KEY `fk_pages_test`") db.execute("DELETE FROM mail.delivery") db.execute("DELETE FROM mail.attachment") db.execute("DELETE FROM mail.content") #ADD FUNCTIONS FOR TEST VERIFICATION DB.execute_file(settings.perftest, "tests/resources/sql/add_objectstore.sql") DB.execute_file(settings.perftest, "tests/resources/sql/json.sql") Log.note("DATABASE READY {{database}}", {"database": settings.perftest.schema}) except Exception, e: Log.error("Database setup failed", e)
def main(): try: settings = startup.read_settings() Log.start(settings.debug) DB.execute_file(settings.database, settings.sql.rstrip("/")+"/util/util.sql") DB.execute_file(settings.database, settings.sql.rstrip("/")+"/util/debug.sql") DB.execute_file(settings.database, settings.sql.rstrip("/")+"/util/cnv.sql") DB.execute_file(settings.database, settings.sql.rstrip("/")+"/util/string.sql") DB.execute_file(settings.database, settings.sql.rstrip("/")+"/util/math.sql") DB.execute_file(settings.database, settings.sql.rstrip("/")+"/util/json.sql") DB.execute_file(settings.database, settings.sql.rstrip("/")+"/util/mail.sql") DB.execute_file(settings.database, settings.sql.rstrip("/")+"/alerts.sql") Log.note("DB setup complete") except Exception, e: Log.warning("Failure to setup DB", cause=e)
def main(): settings = startup.read_settings(defs=[{ "name": ["--restart", "--reset", "--redo"], "help": "use this to recalc alerts", "action": "store_true", "dest": "restart" }]) Log.start(settings.debug) try: with startup.SingleInstance(flavor_id=settings.args.filename): Log.note("Finding exceptions in index {{index_name}}", {"index_name": settings.query["from"].name}) with ESQuery(ElasticSearch(settings.query["from"])) as qb: qb.addDimension(CNV.JSON2object(File(settings.dimension.filename).read())) with DB(settings.alerts) as alerts_db: alert_sustained_median( settings, qb, alerts_db ) except Exception, e: Log.warning("Failure to find sustained_median exceptions", e)
def send_alerts(settings, db): """ BLINDLY SENDS ALERTS FROM THE ALERTS TABLE, ASSUMING ALL HAVE THE SAME STRUCTURE. """ debug = settings.param.debug db.debug = debug #TODO: REMOVE, LEAVE IN DB if db.debug: db.execute("update reasons set email_subject={{subject}}, email_template={{template}} where code={{reason}}", { "template": CNV.object2JSON(TEMPLATE), "subject": CNV.object2JSON(SUBJECT), "reason": REASON }) db.flush() try: new_alerts = db.query(""" SELECT a.id alert_id, a.reason, r.description, a.details, a.severity, a.confidence, a.revision, r.email_template, r.email_subject FROM alerts a JOIN reasons r on r.code = a.reason WHERE a.last_sent IS NULL AND a.status <> 'obsolete' AND math.bayesian_add(a.severity, a.confidence) > {{alert_limit}} AND a.solution IS NULL AND a.reason in {{reasons}} AND a.create_time > {{min_time}} ORDER BY math.bayesian_add(a.severity, a.confidence) DESC, json.number(left(details, 65000), "diff_percent") DESC LIMIT 10 """, { "last_sent": datetime.utcnow() - RESEND_AFTER, "alert_limit": ALERT_LIMIT - EPSILON, "min_time": datetime.utcnow()-LOOK_BACK, "reasons": SQL("("+", ".join(db.quote_value(v) for v in SEND_REASONS)+")") }) if not new_alerts: if debug: Log.note("Nothing important to email") return for alert in new_alerts: #poor souls that signed up for emails listeners = ";".join(db.query("SELECT email FROM listeners WHERE reason={{reason}}", {"reason": alert.reason}).email) body = [HEADER] if alert.confidence >= 1: alert.confidence = 0.999999 alert.details = CNV.JSON2object(alert.details) try: alert.revision = CNV.JSON2object(alert.revision) except Exception, e: pass alert.score = str(-log(1.0-Math.bayesian_add(alert.severity, alert.confidence), 10)) #SHOW NUMBER OF NINES alert.details.url = alert.details.page_url example = alert.details.example for e in alert.details.tests.example + [example]: if e.push_date_min: e.push_date_max = (2 * e.push_date) - e.push_date_min e.date_range = (datetime.utcnow()-CNV.milli2datetime(e.push_date_min)).total_seconds()/(24*60*60) #REQUIRED FOR DATAZILLA B2G CHART REFERENCE e.date_range = nvl(nvl(*[v for v in (7, 30, 60) if v > e.date_range]), 90) #PICK FIRST v > CURRENT VALUE subject = expand_template(CNV.JSON2object(alert.email_subject), alert) body.append(expand_template(CNV.JSON2object(alert.email_template), alert)) body = "".join(body)+FOOTER if debug: Log.note("EMAIL: {{email}}", {"email": body}) if len(body) > MAX_EMAIL_LENGTH: Log.note("Truncated the email body") suffix = "... (has been truncated)" body = body[0:MAX_EMAIL_LENGTH - len(suffix)] + suffix #keep it reasonable db.call("mail.send", ( listeners, #to subject, body, #body None )) #I HOPE I CAN SEND ARRAYS OF NUMBERS db.execute( "UPDATE alerts SET last_sent={{time}} WHERE {{where}}", { "time": datetime.utcnow(), "where": esfilter2sqlwhere(db, {"terms": {"id": Q.select(new_alerts, "alert_id")}}) }) except Exception, e: Log.error("Could not send alerts", e)
#I HOPE I CAN SEND ARRAYS OF NUMBERS db.execute( "UPDATE alerts SET last_sent={{time}} WHERE {{where}}", { "time": datetime.utcnow(), "where": esfilter2sqlwhere(db, {"terms": {"id": Q.select(new_alerts, "alert_id")}}) }) except Exception, e: Log.error("Could not send alerts", e) if __name__ == '__main__': settings = startup.read_settings() Log.start(settings.debug) try: Log.note("Running alerts off of schema {{schema}}", {"schema": settings.perftest.schema}) with DB(settings.alerts) as db: send_alerts( settings=settings, db=db ) except Exception, e: Log.warning("Failure to run alerts", cause=e) finally: Log.stop()
def iterator(): try: while True: try: line = proc.stdout.readline() if line == '': proc.wait() if proc.returncode: Log.error("Unable to pull hg log: return code {{return_code}}", { "return_code": proc.returncode }) return except Exception, e: Log.error("Problem getting another line", e) if line.strip() == "": continue Log.note(line) # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n" # branch = "{branch}%0A" # parent = "{parent}%0A" # tag = "{tag}%0A" # child = "{child}%0A" ( date, node, rev, author, branches, files, file_adds, file_dels, p1rev, p1node, parents, children, tags, desc ) = (CNV.latin12unicode(urllib.unquote(c)) for c in line.split("\t")) file_adds = set(file_adds.split("\n")) - {""} file_dels = set(file_dels.split("\n")) - {""} files = set(files.split("\n")) - set() doc = { "repo": repo.name, "date": CNV.unix2datetime(CNV.value2number(date.split(" ")[0])), "node": node, "revision": rev, "author": author, "branches": set(branches.split("\n")) - {""}, "file_changes": files - file_adds - file_dels - {""}, "file_adds": file_adds, "file_dels": file_dels, "parents": set(parents.split("\n")) - {""} | {p1rev+":"+p1node}, "children": set(children.split("\n")) - {""}, "tags": set(tags.split("\n")) - {""}, "description": desc } doc = ElasticSearch.scrub(doc) yield doc except Exception, e: if isinstance(e, ValueError) and e.message.startswith("need more than "): Log.error("Problem iterating through log ({{message}})", { "message": line }, e) Log.error("Problem iterating through log", e)
def alert_sustained_median(settings, qb, alerts_db): """ find single points that deviate from the trend """ # OBJECTSTORE = settings.objectstore.schema + ".objectstore" # TDAD = settings.perftest.schema + ".test_data_all_dimensions" TDAD = settings.query["from"] PUSH_DATE = "datazilla.date_loaded" debug = nvl(settings.param.debug, DEBUG) query = settings.query def is_bad(r): if settings.param.sustained_median.trigger < r.result.confidence: test_param = nvl(settings.param.test[literal_field(r.Talos.Test.name)], settings.param.suite[literal_field(r.Talos.Test.suite)]) if test_param == None: return True if test_param.disable: return False if test_param.better == "higher": diff = -r.diff elif test_param.better == "lower": diff = r.diff else: diff = abs(r.diff) # DEFAULT = ANY DIRECTION IS BAD if test_param.min_regression: if unicode(test_param.min_regression.strip()[-1]) == "%": min_diff = Math.abs(r.past_stats.mean * float(test_param.min_regression.strip()[:-1]) / 100.0) else: min_diff = Math.abs(float(test_param.min_regression)) else: min_diff = Math.abs(r.past_stats.mean * 0.01) if diff > min_diff: return True return False with Timer("pull combinations"): disabled_suites = [s for s, p in settings.param.suite.items() if p.disable] disabled_tests = [t for t, p in settings.param.test.items() if p.disable] temp = Query({ "from": TDAD, "select": {"name": "min_push_date", "value": PUSH_DATE, "aggregate": "min"}, "edges": query.edges, "where": {"and": [ True if settings.args.restart else {"missing": {"field": settings.param.mark_complete}}, {"exists": {"field": "result.test_name"}}, {"range": {PUSH_DATE: {"gte": OLDEST_TS}}}, {"not": {"terms": {"Talos.Test.suite": disabled_suites}}}, {"not": {"terms": {"Talos.Test.name": disabled_tests}}} # {"term": {"testrun.suite": "cart"}}, # {"term": {"result.test_name": "1-customize-enter.error.TART"}}, # {"term": {"test_machine.osversion": "OS X 10.8"}} #FOR DEBUGGING SPECIFIC SERIES # {"term": {"test_machine.type": "hamachi"}}, # {"term": {"test_machine.platform": "Gonk"}}, # {"term": {"test_machine.os": "Firefox OS"}}, # {"term": {"test_build.branch": "master"}}, # {"term": {"testrun.suite": "communications/ftu"}}, # {"term": {"result.test_name": "startup_time"}} ]}, "limit": nvl(settings.param.combo_limit, 1000) }, qb) new_test_points = qb.query(temp) #BRING IN ALL NEEDED DATA if debug: Log.note("Pull all data for {{num}} groups:\n{{groups.name}}", { "num": len(new_test_points), "groups": query.edges }) # all_min_date = Null all_touched = set() evaled_tests = set() alerts = [] # PUT ALL THE EXCEPTION ITEMS HERE for g, test_points in Q.groupby(new_test_points, query.edges): if not test_points.min_push_date: continue try: if settings.args.restart: first_sample = OLDEST_TS else: first_sample = MAX(MIN(test_points.min_push_date), OLDEST_TS) # FOR THIS g, HOW FAR BACK IN TIME MUST WE GO TO COVER OUR WINDOW_SIZE? first_in_window = qb.query({ "select": {"name": "min_date", "value": "push_date", "aggregate": "min"}, "from": { "from": TDAD, "select": {"name": "push_date", "value": PUSH_DATE}, "where": {"and": [ {"term": g}, {"range": {PUSH_DATE: {"lt": first_sample}}} ]}, "sort": {"field": PUSH_DATE, "sort": -1}, "limit": settings.param.sustained_median.window_size * 2 } }) if len(first_in_window) > settings.param.sustained_median.window_size * 2: do_all = False else: do_all = True min_date = MIN(first_sample, first_in_window.min_date) #LOAD TEST RESULTS FROM DATABASE test_results = qb.query({ "from": { "from": "talos", "select": [{"name": "push_date", "value": PUSH_DATE}] + query.select + query.edges, "where": {"and": [ {"term": g}, {"range": {PUSH_DATE: {"gte": min_date}}} ]}, }, "sort": "push_date" }) Log.note("{{num}} test results found for {{group}} dating back no further than {{start_date}}", { "num": len(test_results), "group": g, "start_date": CNV.milli2datetime(min_date) }) if debug: Log.note("Find sustained_median exceptions") #APPLY WINDOW FUNCTIONS stats = Q.run({ "from": { "from": test_results, "where": {"exists": {"field": "value"}} }, "window": [ { # WE DO NOT WANT TO CONSIDER THE POINTS BEFORE FULL WINDOW SIZE "name": "ignored", "value": lambda r, i: False if do_all or i > settings.param.sustained_median.window_size else True }, { # SO WE CAN SHOW A DATAZILLA WINDOW "name": "push_date_min", "value": lambda r: r.push_date, "sort": "push_date", "aggregate": windows.Min, "range": {"min": -settings.param.sustained_median.window_size, "max": 0} }, { # SO WE CAN SHOW A DATAZILLA WINDOW "name": "push_date_max", "value": lambda r: r.push_date, "sort": "push_date", "aggregate": windows.Max, "range": {"min": 0, "max": settings.param.sustained_median.window_size} }, { "name": "past_revision", "value": lambda r, i, rows: rows[i - 1].Talos.Revision, "sort": "push_date" }, { "name": "past_stats", "value": lambda r: r.value, "sort": "push_date", "aggregate": windows.Stats(middle=0.60), "range": {"min": -settings.param.sustained_median.window_size, "max": 0} }, { "name": "future_stats", "value": lambda r: r.value, "sort": "push_date", "aggregate": windows.Stats(middle=0.60), "range": {"min": 0, "max": settings.param.sustained_median.window_size} }, { "name": "result", "value": lambda r, i, rows: median_test( rows[-settings.param.sustained_median.window_size + i:i:].value, rows[i:settings.param.sustained_median.window_size + i:].value, interpolate=False ), "sort": "push_date" }, { "name": "diff", "value": lambda r: r.future_stats.mean - r.past_stats.mean }, { "name": "diff_percent", "value": lambda r: (r.future_stats.mean - r.past_stats.mean) / r.past_stats.mean }, { "name": "is_diff", "value": is_bad }, { #USE THIS TO FILL CONFIDENCE HOLES #WE CAN MARK IT is_diff KNOWING THERE IS A HIGHER CONFIDENCE "name": "future_is_diff", "value": lambda r, i, rows: rows[i - 1].is_diff and r.result.confidence < rows[i - 1].result.confidence, "sort": "push_date" }, { #WE CAN MARK IT is_diff KNOWING THERE IS A HIGHER CONFIDENCE "name": "past_is_diff", "value": lambda r, i, rows: rows[i - 1].is_diff and r.result.confidence < rows[i - 1].result.confidence, "sort": {"value": "push_date", "sort": -1} }, ] }) #PICK THE BEST SCORE FOR EACH is_diff==True REGION for g2, data in Q.groupby(stats, "is_diff", contiguous=True): if g2.is_diff: best = Q.sort(data, ["result.confidence", "diff"]).last() best["pass"] = True all_touched.update(Q.select(test_results, ["test_run_id", "Talos.Test"])) # TESTS THAT HAVE BEEN (RE)EVALUATED GIVEN THE NEW INFORMATION evaled_tests.update(Q.run({ "from": test_results, "select": ["test_run_id", "Talos.Test"], "where": {"term": {"ignored": False}} })) File("test_values.txt").write(CNV.list2tab(Q.select(stats, [ {"name": "push_date", "value": lambda x: CNV.datetime2string(CNV.milli2datetime(x.push_date), "%d-%b-%Y %H:%M:%S")}, "value", {"name": "revision", "value": "Talos.Revision"}, {"name": "confidence", "value": "result.confidence"}, "pass" ]))) #TESTS THAT HAVE SHOWN THEMSELVES TO BE EXCEPTIONAL new_exceptions = Q.filter(stats, {"term": {"pass": True}}) for v in new_exceptions: if v.ignored: continue alert = Struct( status="new", create_time=CNV.milli2datetime(v.push_date), tdad_id={"test_run_id": v.test_run_id, "Talos": {"Test": v.Talos.Test}}, reason=REASON, revision=v.Talos.Revision, details=v, severity=SEVERITY, confidence=v.result.confidence ) alerts.append(alert) if debug: Log.note("{{num}} new exceptions found", {"num": len(new_exceptions)}) except Exception, e: Log.warning("Problem with alert identification, continue to log existing alerts and stop cleanly", e)
reason=REASON, revision=v.Talos.Revision, details=v, severity=SEVERITY, confidence=v.result.confidence ) alerts.append(alert) if debug: Log.note("{{num}} new exceptions found", {"num": len(new_exceptions)}) except Exception, e: Log.warning("Problem with alert identification, continue to log existing alerts and stop cleanly", e) if debug: Log.note("Get Current Alerts") #CHECK THE CURRENT ALERTS if not evaled_tests: current_alerts = StructList.EMPTY else: # THIS IS QUITE TOUCHY, IT DEPENDS ON THE JSON SERIALIZATION OF THE # GROUP (g) TO BE COMPLETE IN THE details COLUMN OF THE ALERTS DB # ANY EXTRA COLUMNS WILL CAUSE A MISMATCH # WE MUST DO THIS SO WE ONLY OBSOLETE THE ALERTS WE CHOULD HAVE # COVERED (BOTH IN TIME AND IN GROUPS) # or_list = [] # for g in touched_groups: # g = unwrap(g) # output = Struct() # for key, val in g.items():