def test_smaller_series(self): sample1 = [20, 20, 20, 20, 20, 21, 21, 21] sample2 = [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21] simple_result = median_test(sample1, sample2, interpolate=False) # EXAMPLE OF GOING WRONG smooth_result = median_test(sample1, sample2) assert smooth_result["confidence"] < 0.90, "These are not different!"
def test_bimodal_series(self): # EVEN NUMBER FROM EACH SERIES, FIRST SERIES HAS SAMPLE FROM SECOND MODE sample1 = [ 43.35910744, 43.65596955, 43.6805196, 43.78713329, 43.54635098, 43.9086471, 43.54120044, 43.27229271, 43.35015387, 40.03955818] sample2 = [ 40.18726543, 40.71542234, 40.15441333, 39.95611288, 38.30201645, 35.48697324, 40.16275306, 39.96934014] smooth_result_a = median_test(sample1, sample2) assert smooth_result_a["confidence"] > 0.997 # ODD NUMBER OF SERIES, NAIVE MEDIAN TEST WILL PICK MIDDLE VALUE (FROM # FIRST SERIES) AND ASSUME THAT MEDIAN IS 50/50 IN EITHER MODE sample1 = [43.41440184, 43.35910744, 43.65596955, 43.6805196, 43.78713329, 43.54635098, 43.9086471, 43.54120044, 43.27229271, 43.35015387] sample2 = [40.03955818, 40.18726543, 40.71542234, 40.15441333, 39.95611288, 38.30201645, 35.48697324, 40.16275306, 39.96934014] smooth_result_b = median_test(sample1, sample2) assert smooth_result_b["confidence"] > smooth_result_a["confidence"]
def test_tight_series(self): # MORE 20s sample1 = [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21] sample2 = [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21] smooth_result = median_test(sample1, sample2) assert smooth_result["confidence"] < 0.90, "These are not different!" #MORE 21s sample1 = [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21] sample2 = [20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21] smooth_result = median_test(sample1, sample2) assert smooth_result["confidence"] < 0.90, "These are not different!"
def alert_sustained_median(settings, qb, alerts_db): """ find single points that deviate from the trend """ # OBJECTSTORE = settings.objectstore.schema + ".objectstore" # TDAD = settings.perftest.schema + ".test_data_all_dimensions" TDAD = settings.query["from"] PUSH_DATE = "datazilla.date_loaded" debug = nvl(settings.param.debug, DEBUG) query = settings.query def is_bad(r): if settings.param.sustained_median.trigger < r.result.confidence: test_param = nvl(settings.param.test[literal_field(r.Talos.Test.name)], settings.param.suite[literal_field(r.Talos.Test.suite)]) if test_param == None: return True if test_param.disable: return False if test_param.better == "higher": diff = -r.diff elif test_param.better == "lower": diff = r.diff else: diff = abs(r.diff) # DEFAULT = ANY DIRECTION IS BAD if test_param.min_regression: if unicode(test_param.min_regression.strip()[-1]) == "%": min_diff = Math.abs(r.past_stats.mean * float(test_param.min_regression.strip()[:-1]) / 100.0) else: min_diff = Math.abs(float(test_param.min_regression)) else: min_diff = Math.abs(r.past_stats.mean * 0.01) if diff > min_diff: return True return False with Timer("pull combinations"): disabled_suites = [s for s, p in settings.param.suite.items() if p.disable] disabled_tests = [t for t, p in settings.param.test.items() if p.disable] temp = Query({ "from": TDAD, "select": {"name": "min_push_date", "value": PUSH_DATE, "aggregate": "min"}, "edges": query.edges, "where": {"and": [ True if settings.args.restart else {"missing": {"field": settings.param.mark_complete}}, {"exists": {"field": "result.test_name"}}, {"range": {PUSH_DATE: {"gte": OLDEST_TS}}}, {"not": {"terms": {"Talos.Test.suite": disabled_suites}}}, {"not": {"terms": {"Talos.Test.name": disabled_tests}}} # {"term": {"testrun.suite": "cart"}}, # {"term": {"result.test_name": "1-customize-enter.error.TART"}}, # {"term": {"test_machine.osversion": "OS X 10.8"}} #FOR DEBUGGING SPECIFIC SERIES # {"term": {"test_machine.type": "hamachi"}}, # {"term": {"test_machine.platform": "Gonk"}}, # {"term": {"test_machine.os": "Firefox OS"}}, # {"term": {"test_build.branch": "master"}}, # {"term": {"testrun.suite": "communications/ftu"}}, # {"term": {"result.test_name": "startup_time"}} ]}, "limit": nvl(settings.param.combo_limit, 1000) }, qb) new_test_points = qb.query(temp) #BRING IN ALL NEEDED DATA if debug: Log.note("Pull all data for {{num}} groups:\n{{groups.name}}", { "num": len(new_test_points), "groups": query.edges }) # all_min_date = Null all_touched = set() evaled_tests = set() alerts = [] # PUT ALL THE EXCEPTION ITEMS HERE for g, test_points in Q.groupby(new_test_points, query.edges): if not test_points.min_push_date: continue try: if settings.args.restart: first_sample = OLDEST_TS else: first_sample = MAX(MIN(test_points.min_push_date), OLDEST_TS) # FOR THIS g, HOW FAR BACK IN TIME MUST WE GO TO COVER OUR WINDOW_SIZE? first_in_window = qb.query({ "select": {"name": "min_date", "value": "push_date", "aggregate": "min"}, "from": { "from": TDAD, "select": {"name": "push_date", "value": PUSH_DATE}, "where": {"and": [ {"term": g}, {"range": {PUSH_DATE: {"lt": first_sample}}} ]}, "sort": {"field": PUSH_DATE, "sort": -1}, "limit": settings.param.sustained_median.window_size * 2 } }) if len(first_in_window) > settings.param.sustained_median.window_size * 2: do_all = False else: do_all = True min_date = MIN(first_sample, first_in_window.min_date) #LOAD TEST RESULTS FROM DATABASE test_results = qb.query({ "from": { "from": "talos", "select": [{"name": "push_date", "value": PUSH_DATE}] + query.select + query.edges, "where": {"and": [ {"term": g}, {"range": {PUSH_DATE: {"gte": min_date}}} ]}, }, "sort": "push_date" }) Log.note("{{num}} test results found for {{group}} dating back no further than {{start_date}}", { "num": len(test_results), "group": g, "start_date": CNV.milli2datetime(min_date) }) if debug: Log.note("Find sustained_median exceptions") #APPLY WINDOW FUNCTIONS stats = Q.run({ "from": { "from": test_results, "where": {"exists": {"field": "value"}} }, "window": [ { # WE DO NOT WANT TO CONSIDER THE POINTS BEFORE FULL WINDOW SIZE "name": "ignored", "value": lambda r, i: False if do_all or i > settings.param.sustained_median.window_size else True }, { # SO WE CAN SHOW A DATAZILLA WINDOW "name": "push_date_min", "value": lambda r: r.push_date, "sort": "push_date", "aggregate": windows.Min, "range": {"min": -settings.param.sustained_median.window_size, "max": 0} }, { # SO WE CAN SHOW A DATAZILLA WINDOW "name": "push_date_max", "value": lambda r: r.push_date, "sort": "push_date", "aggregate": windows.Max, "range": {"min": 0, "max": settings.param.sustained_median.window_size} }, { "name": "past_revision", "value": lambda r, i, rows: rows[i - 1].Talos.Revision, "sort": "push_date" }, { "name": "past_stats", "value": lambda r: r.value, "sort": "push_date", "aggregate": windows.Stats(middle=0.60), "range": {"min": -settings.param.sustained_median.window_size, "max": 0} }, { "name": "future_stats", "value": lambda r: r.value, "sort": "push_date", "aggregate": windows.Stats(middle=0.60), "range": {"min": 0, "max": settings.param.sustained_median.window_size} }, { "name": "result", "value": lambda r, i, rows: median_test( rows[-settings.param.sustained_median.window_size + i:i:].value, rows[i:settings.param.sustained_median.window_size + i:].value, interpolate=False ), "sort": "push_date" }, { "name": "diff", "value": lambda r: r.future_stats.mean - r.past_stats.mean }, { "name": "diff_percent", "value": lambda r: (r.future_stats.mean - r.past_stats.mean) / r.past_stats.mean }, { "name": "is_diff", "value": is_bad }, { #USE THIS TO FILL CONFIDENCE HOLES #WE CAN MARK IT is_diff KNOWING THERE IS A HIGHER CONFIDENCE "name": "future_is_diff", "value": lambda r, i, rows: rows[i - 1].is_diff and r.result.confidence < rows[i - 1].result.confidence, "sort": "push_date" }, { #WE CAN MARK IT is_diff KNOWING THERE IS A HIGHER CONFIDENCE "name": "past_is_diff", "value": lambda r, i, rows: rows[i - 1].is_diff and r.result.confidence < rows[i - 1].result.confidence, "sort": {"value": "push_date", "sort": -1} }, ] }) #PICK THE BEST SCORE FOR EACH is_diff==True REGION for g2, data in Q.groupby(stats, "is_diff", contiguous=True): if g2.is_diff: best = Q.sort(data, ["result.confidence", "diff"]).last() best["pass"] = True all_touched.update(Q.select(test_results, ["test_run_id", "Talos.Test"])) # TESTS THAT HAVE BEEN (RE)EVALUATED GIVEN THE NEW INFORMATION evaled_tests.update(Q.run({ "from": test_results, "select": ["test_run_id", "Talos.Test"], "where": {"term": {"ignored": False}} })) File("test_values.txt").write(CNV.list2tab(Q.select(stats, [ {"name": "push_date", "value": lambda x: CNV.datetime2string(CNV.milli2datetime(x.push_date), "%d-%b-%Y %H:%M:%S")}, "value", {"name": "revision", "value": "Talos.Revision"}, {"name": "confidence", "value": "result.confidence"}, "pass" ]))) #TESTS THAT HAVE SHOWN THEMSELVES TO BE EXCEPTIONAL new_exceptions = Q.filter(stats, {"term": {"pass": True}}) for v in new_exceptions: if v.ignored: continue alert = Struct( status="new", create_time=CNV.milli2datetime(v.push_date), tdad_id={"test_run_id": v.test_run_id, "Talos": {"Test": v.Talos.Test}}, reason=REASON, revision=v.Talos.Revision, details=v, severity=SEVERITY, confidence=v.result.confidence ) alerts.append(alert) if debug: Log.note("{{num}} new exceptions found", {"num": len(new_exceptions)}) except Exception, e: Log.warning("Problem with alert identification, continue to log existing alerts and stop cleanly", e)