def test_normal_and_small(self):
        data = [
            593, 543.5, 660.5, 612, 549.5, 561, 456, 387.5, 451.5, 390, 424.5,
            490, 446, 504, 470.5, 417.5, 517, 806.5, 413.5, 625, 494.5, 479.5,
            421, 467, 432.5, 537, 472, 618, 372.5, 474.5, 479.5, 413.5, 442.5,
            666, 453, 441, 454.5, 464, 589, 435.5, 392.5, 265.5, 471, 266,
            631.5, 422.5, 389.5, 430.5, 418, 441, 364, 269, 394, 589, 254.5,
            427, 397.5, 398, 454, 461.5, 428.5, 393.5, 458, 466, 271, 479,
            392.5, 385.5, 399.5, 450, 388, 468, 486, 381, 399, 389, 417, 473.5,
            514, 268.5, 453, 452.5, 390, 271, 271, 403, 462.5, 405, 403, 415.5,
            388, 264, 492.5, 435, 471.5, 457, 494, 427, 433, 431
        ]
        plot(data)
        description, score = deviance(data)
        self.assertEqual(description, "OK")

        data = [
            379.5, 381, 381.5, 370, 371.5, 367, 368.5, 372.5, 258, 361.5, 373,
            260, 365.5, 366.5, 366.5, 366.5, 369.5, 366.5, 359.5, 357.5, 365,
            363.5, 359.5, 360.5, 264.5, 360.5, 357.5, 370, 372, 376.5, 363,
            362, 263, 355.5, 368.5, 374, 265, 328.5, 359.5, 369, 368.5, 361.5,
            369, 370.5, 364.5, 365, 339.5, 257, 372
        ]
        plot(data)
        description, score = deviance(data)
        self.assertEqual(description, "SKEWED")
Beispiel #2
0
    def test_distracting_point(self):
        data = [
            333.5, 322.5, 316.5, 326, 321, 330, 345.5, 668.5, 342, 319.5,
            323.5, 320.5, 328, 340.5, 339, 322, 350.5, 284.5, 331, 362,
            323.5, 323, 317.5, 338
        ]
        plot(data)
        description, scale = deviance(data)
        self.assertEqual(description, "OK")  # SINGLE-POINT ERRORS WILL NOT BE DETECTED

        data = [
            454, 751.5, 715, 731.5, 443, 425, 729.5, 709, 739.5, 733.5,
            791, 720.5
        ]
        plot(data)
        description, scale = deviance(data)
        self.assertEqual(description, "OK")
Beispiel #3
0
    def test_normal(self):
        data = [
            229.5, 244, 226.5, 245, 234.5, 228, 231.5, 242, 250.5, 237.5,
            227, 245, 226.5, 238, 231.5, 233.5, 231.5, 230, 231, 242.5,
            242, 239.5, 243.5, 234, 233.5, 241.5, 241.5, 236.5, 243, 240.5,
            241, 247, 253, 244, 241.5, 226, 223.5, 221.5, 238.5, 234.5,
            242, 223.5, 220.5, 230, 235.5, 227.5, 241, 232.5, 239.5, 228.5,
            234.5, 238.5, 246, 228.5, 263.5, 244, 229.5, 249, 234.5, 248,
            231.5, 225.5, 247.5, 250, 249.5, 242, 228.5, 232.5, 229.5, 242,
            244, 203.5, 246, 240.5, 239, 245, 238.5, 244.5, 244, 251.5,
            241.5, 248.5, 239.5, 237, 234.5, 244, 224.5, 240, 238, 248,
            229, 243, 250, 230.5, 240, 244.5, 229, 248, 237.5, 241,
            232, 247.5, 236, 234, 242, 241.5, 245.5, 235.5, 242, 234,
            248.5, 249.5, 230.5, 227, 238, 246, 225, 243.5, 226.5, 233.5,
            235.5, 228, 244.5, 228, 241.5, 237.5, 240, 244, 237, 246,
            239.5, 238.5, 244, 238.5, 248, 245.5, 247, 244, 253.5, 245.5,
            256, 242.5, 248.5, 250, 246.5, 249.5, 234, 250, 252, 250,
            243, 236, 237.5, 252.5, 245, 248, 230, 246, 250.5, 247,
            246, 255, 240, 246.5, 240, 233.5, 233.5, 244, 239, 247.5,
            241, 241, 237, 240.5, 239.5, 227.5, 242.5, 248, 230.5, 248,
            229.5, 239.5, 248.5, 237.5, 244.5, 253, 236, 239.5, 245, 228,
            249, 246, 235, 234, 241, 240, 237.5, 245, 242.5, 249
        ]
        plot(data)
        description, score = deviance(data)
        self.assertEqual(description, "OK")

        data = [
            379.5, 381, 381.5, 370, 371.5, 367, 368.5, 372.5, 258, 361.5,
            373, 260, 365.5, 366.5, 366.5, 366.5, 369.5, 366.5, 359.5, 357.5,
            365, 363.5, 359.5, 360.5, 264.5, 360.5, 357.5, 370, 372, 376.5,
            363, 362, 263, 355.5, 368.5, 374, 265, 328.5, 359.5, 369,
            368.5, 361.5, 369, 370.5, 364.5, 365, 339.5, 257, 372
        ]
        plot(data)
        description, score = deviance(data)
        self.assertEqual(description, "SKEWED")
    def test_normal(self):
        num = 1000

        results = []
        for i in range(0, num):
            samples = numpy.random.normal(size=20)
            desc, score = deviance(samples)
            results.append(desc)

        self.assertEqual(list(jx.groupby(["ok", "not ok"])), [("not ok", ["not ok"]), ("ok", ["ok"])], "expecting version >=2.53")

        # normals ar OK
        for desc, n in jx.groupby(results):
            Log.note("{{desc}}: {{count}}", desc=desc, count=len(n))
            if desc == "OK":
                self.assertLessEqual(num * 0.99, len(n))
 def test_bimodal(self):
     data = [
         2178, 1288, 1211, 1265, 2361, 2275, 1197, 1241, 2317, 1309, 2368,
         1190, 1245, 2340, 1264, 2395, 2326, 2260, 2347, 1341, 1202, 1367,
         2283, 2419, 2411, 2291, 2289, 2269, 2321, 1246, 1220, 1301, 2345,
         2208, 2360, 2385, 1201, 2281, 1235, 1243, 1265, 1262, 1273, 2342,
         2259, 2312, 1192, 1391, 1259, 1226, 1283, 2289, 1359, 1442, 2528,
         2397, 2376, 2350, 1313, 2322, 2291, 2241, 2273, 2291, 1231, 1220,
         2272, 2328, 2418, 2435, 1278, 2444, 2341, 2332, 2298, 2316, 2398,
         1198, 2393, 1289, 1241, 1170, 1336, 1255, 2360, 2287, 1240, 2260,
         1323, 1231, 1272, 1183, 2285, 2258, 2307, 2307, 2165, 2406, 2295,
         1234, 1189, 1186, 2320, 2312, 2400, 2321, 1170, 2318, 1209, 1270,
         2374, 2299, 1206, 1225, 2329, 2324, 1248, 2346, 2285, 2247
     ]
     plot(data)
     description, score = deviance(data)
     self.assertEqual(description, "MODAL")
 def test_imbalance(self):
     data = [
         425, 430.5, 429, 422, 424.5, 436, 426.5, 425.5, 426, 781.5, 427.5,
         420, 431, 425.5, 784, 423.5, 421, 780.5, 427.5, 426, 425, 423,
         421.5, 424, 421.5, 425.5, 429.5, 425.5, 423.5, 426, 430.5, 423.5,
         787, 432, 432, 431, 427, 438.5, 426.5, 807.5, 431, 450, 434, 427.5,
         422.5, 432.5, 442, 427, 443, 439, 434, 446, 431, 443.5, 432, 424,
         434.5, 424, 431, 428.5, 418, 430, 426.5, 428.5, 423, 422.5, 429.5,
         425, 783.5, 429, 432, 443, 427.5, 434.5, 427.5, 428.5, 419.5,
         458.5, 426, 427.5, 431, 431.5, 428, 428.5, 424, 427.5, 427.5, 419,
         776, 414.5, 420.5, 418, 423.5, 417.5, 419, 454, 416.5, 419, 418.5,
         763.5, 785.5, 418.5, 413, 418.5, 431, 425.5, 429, 419, 427.5, 428,
         429.5, 423.5, 430.5, 426, 423.5, 419, 795.5, 427.5, 422, 429.5
     ]
     plot(data)
     description, score = deviance(data)
     self.assertEqual(description, "OUTLIERS")
Beispiel #7
0
    def test_alert(self):
        num_pre = 25
        num_post = 12
        num_resume = 2
        pre_samples = numpy.random.normal(loc=10, size=num_pre)
        post_samples = numpy.random.normal(loc=100, size=num_post - num_resume)
        resume_samples = numpy.random.normal(loc=10, size=num_resume)

        samples = list(pre_samples) + list(post_samples) + list(resume_samples)
        plot(samples)

        result, changes = perfherder_alert(samples)
        alert = any(changes)
        self.assertTrue(alert)

        desc, score = deviance(list(post_samples) + list(resume_samples))
        self.assertEqual("OUTLIERS", desc)
 def test_one_bad_point(self):
     data = [
         3117, 3215, 3219, 3174, 3011, 3017, 2984, 3075, 3248, 3120, 3158,
         2994, 3224, 3105, 3131, 3141, 3033, 2986, 3184, 3235, 3190, 3100,
         3359, 3098, 3279, 3165, 3270, 3213, 3223, 3079, 3157, 3256, 3090,
         2984, 3131, 3029, 3121, 3006, 3278, 3043, 3042, 2963, 2974, 3401,
         3226, 3307, 3092, 3156, 3291, 3030, 3162, 3154, 3072, 3265, 3284,
         3182, 2985, 2967, 3191, 3278, 3210, 3234, 3037, 3189, 3046, 2992,
         2994, 3249, 3150, 3126, 3068, 3185, 3249, 3209, 3257, 2964, 3199,
         3320, 3070, 3261, 3171, 3240, 3136, 3017, 3167, 3043, 3278, 3047,
         3272, 8104, 3103, 3163, 3200, 3233, 3162, 3366, 3213, 3047, 3018,
         3042, 3138, 3065, 3235, 3370, 3020, 3120, 3201, 3008, 3084, 3259,
         3073, 3271, 3036, 3306, 2998, 3260, 3187, 3079, 3146, 3007, 3196,
         3126, 3097, 3074, 3323, 3169, 3223, 3216, 3238, 3034, 3255, 3083,
         3208, 3071, 3243, 3192, 3284, 3241, 3190, 3062, 3376, 3277, 3222,
         3313, 3036, 3113, 3155, 3129, 3065, 3229, 2969, 3016, 3116, 3015,
         3204, 3000, 3318, 3125, 3329, 3055
     ]
     plot(data)
     description, score = deviance(data)
     self.assertEqual(description, "OK")
Beispiel #9
0
def process(
    about_deviant,
    since,
    source,
    deviant_summary,
    show=False,
    show_limit=MAX_POINTS,
    show_old=False,
    show_distribution=None,
):
    """
    :param signature_hash: The performance hash
    :param since: Only data after this date
    :param show:
    :param show_limit:
    :param show_old:
    :param show_distribution:
    :return:
    """
    sig_id = about_deviant.id
    if not isinstance(sig_id, int):
        Log.error("expecting id")

    # GET SIGNATURE DETAILS
    sig = get_signature(db_config=source, signature_id=sig_id)

    # GET SIGNATURE DETAILS
    data = get_dataum(source, sig.id, since=since, limit=show_limit)

    min_date = since.unix
    pushes = jx.sort(
        [{
            "value": median(rows.value),
            "runs": rows,
            "push": {
                "time": unwrap(t)["push.time"]
            },
        } for t, rows in jx.groupby(data, "push.time")
         if t["push\\.time"] > min_date],
        "push.time",
    )

    values = list(pushes.value)
    title = "-".join(
        map(
            str,
            [
                sig.id,
                sig.framework,
                sig.suite,
                sig.test,
                sig.repository,
                sig.platform,
                about_deviant.overall_dev_status,
            ],
        ))
    # EG https://treeherder.mozilla.org/perf.html#/graphs?highlightAlerts=1&series=mozilla-central,fee739b45f7960e4a520d8e0bd781dd9d0a3bec4,1,10&timerange=31536000
    url = "https://treeherder.mozilla.org/perf.html#/graphs?" + value2url_param(
        {
            "highlightAlerts":
            1,
            "series": [
                sig.repository, sig.id, 1,
                coalesce(sig.framework_id, sig.framework)
            ],
            "timerange":
            Duration(TREEHERDER_RANGE).seconds
        })

    Log.note("With {{title}}: {{url}}", title=title, url=url)

    with Timer("find segments"):
        new_segments, new_diffs = find_segments(values, sig.alert_change_type,
                                                sig.alert_threshold)

    # USE PERFHERDER ALERTS TO IDENTIFY OLD SEGMENTS
    old_segments = tuple(
        sorted(
            set([
                i for i, p in enumerate(pushes) if any(r.alert.id
                                                       for r in p.runs)
            ] + [0, len(pushes)])))
    old_medians = [0.0] + [
        np.median(values[s:e])
        for s, e in zip(old_segments[:-1], old_segments[1:])
    ]
    old_diffs = np.array(
        [b / a - 1 for a, b in zip(old_medians[:-1], old_medians[1:])] + [0])

    if len(new_segments) == 1:
        overall_dev_status = None
        overall_dev_score = None
        last_mean = None
        last_std = None
        last_dev_status = None
        last_dev_score = None
        relative_noise = None
        Log.note("not ")
    else:
        # NOISE OF LAST SEGMENT
        s, e = new_segments[-2], new_segments[-1]
        last_segment = np.array(values[s:e])
        ignore = IGNORE_TOP
        trimmed_segment = last_segment[np.argsort(last_segment)
                                       [ignore:-ignore]]
        last_mean = np.mean(trimmed_segment)
        last_std = np.std(trimmed_segment)
        last_dev_status, last_dev_score = deviance(trimmed_segment)
        relative_noise = last_std / last_mean

        # FOR EACH SEGMENT, NORMALIZE MEAN AND VARIANCE
        normalized = []
        for s, e in jx.pairs(new_segments):
            data = np.array(values[s:e])
            norm = (data + last_mean - np.mean(data)) * last_std / np.std(data)
            normalized.extend(norm)

        overall_dev_status, overall_dev_score = deviance(normalized)
        Log.note(
            "\n\tdeviance = {{deviance}}\n\tnoise={{std}}\n\tpushes={{pushes}}\n\tsegments={{num_segments}}",
            title=title,
            deviance=(overall_dev_status, overall_dev_score),
            std=relative_noise,
            pushes=len(values),
            num_segments=len(new_segments) - 1,
        )

        if show_distribution:
            histogram(trimmed_segment,
                      title=last_dev_status + "=" + text(last_dev_score))

    max_extra_diff = None
    max_missing_diff = None
    _is_diff = is_diff(new_segments, old_segments)
    if _is_diff:
        # FOR MISSING POINTS, CALC BIGGEST DIFF
        max_extra_diff = mo_math.MAX(
            abs(d) for s, d in zip(new_segments, new_diffs)
            if all(not (s - TOLERANCE <= o <= s + TOLERANCE)
                   for o in old_segments))
        max_missing_diff = mo_math.MAX(
            abs(d) for s, d in zip(old_segments, old_diffs)
            if all(not (s - TOLERANCE <= n <= s + TOLERANCE)
                   for n in new_segments))

        Log.alert(
            "Disagree max_extra_diff={{max_extra_diff|round(places=3)}}, max_missing_diff={{max_missing_diff|round(places=3)}}",
            max_extra_diff=max_extra_diff,
            max_missing_diff=max_missing_diff,
        )
        Log.note("old={{old}}, new={{new}}",
                 old=old_segments,
                 new=new_segments)
    else:
        Log.note("Agree")

    if show and len(pushes):
        show_old and assign_colors(values, old_segments, title="OLD " + title)
        assign_colors(values, new_segments, title="NEW " + title)
        if url:
            webbrowser.open(url)

    if isinstance(deviant_summary, bigquery.Table):
        Log.note("BigQuery summary not updated")
        return

    deviant_summary.upsert(
        where={"eq": {
            "id": sig.id
        }},
        doc=Data(
            id=sig_id,
            title=title,
            num_pushes=len(values),
            num_segments=len(new_segments) - 1,
            relative_noise=relative_noise,
            overall_dev_status=overall_dev_status,
            overall_dev_score=overall_dev_score,
            last_mean=last_mean,
            last_std=last_std,
            last_dev_status=last_dev_status,
            last_dev_score=last_dev_score,
            last_updated=Date.now(),
            is_diff=_is_diff,
            max_extra_diff=max_extra_diff,
            max_missing_diff=max_missing_diff,
            num_new_segments=len(new_segments),
            num_old_segments=len(old_segments),
        ),
    )
Beispiel #10
0
def process(sig_id,
            show=False,
            show_limit=MAX_POINTS,
            show_old=True,
            show_distribution=None):
    if not mo_math.is_integer(sig_id):
        Log.error("expecting integer id")
    sig = first(get_signature(config.database, sig_id))
    data = get_dataum(config.database, sig_id)

    min_date = (Date.today() - 3 * MONTH).unix
    pushes = jx.sort(
        [{
            "value": median(rows.value),
            "runs": rows,
            "push": {
                "time": unwrap(t)["push.time"]
            },
        } for t, rows in jx.groupby(data, "push.time")
         if t["push\\.time"] > min_date],
        "push.time",
    )

    values = pushes.value
    title = "-".join(
        map(
            text,
            [
                sig.id,
                sig.framework,
                sig.suite,
                sig.test,
                sig.platform,
                sig.repository.name,
            ],
        ))
    Log.note("With {{title}}", title=title)

    with Timer("find segments"):
        new_segments, new_diffs = find_segments(values, sig.alert_change_type,
                                                sig.alert_threshold)

    # USE PERFHERDER ALERTS TO IDENTIFY OLD SEGMENTS
    old_segments = tuple(
        sorted(
            set([
                i for i, p in enumerate(pushes) if any(r.alert.id
                                                       for r in p.runs)
            ] + [0, len(pushes)])))
    old_medians = [0] + [
        np.median(values[s:e])
        for s, e in zip(old_segments[:-1], old_segments[1:])
    ]
    old_diffs = np.array(
        [b / a - 1 for a, b in zip(old_medians[:-1], old_medians[1:])] + [0])

    if len(new_segments) == 1:
        dev_status = None
        dev_score = None
        relative_noise = None
    else:
        # MEASURE DEVIANCE (USE THE LAST SEGMENT)
        s, e = new_segments[-2], new_segments[-1]
        last_segment = np.array(values[s:e])
        trimmed_segment = last_segment[np.argsort(last_segment)
                                       [IGNORE_TOP:-IGNORE_TOP]]
        dev_status, dev_score = deviance(trimmed_segment)
        relative_noise = np.std(trimmed_segment) / np.mean(trimmed_segment)
        Log.note(
            "\n\tdeviance = {{deviance}}\n\tnoise={{std}}",
            title=title,
            deviance=(dev_status, dev_score),
            std=relative_noise,
        )

        if show_distribution:
            histogram(last_segment, title=dev_status + "=" + text(dev_score))

    max_extra_diff = None
    max_missing_diff = None
    _is_diff = is_diff(new_segments, old_segments)
    if _is_diff:
        # FOR MISSING POINTS, CALC BIGGEST DIFF
        max_extra_diff = mo_math.MAX(
            abs(d) for s, d in zip(new_segments, new_diffs)
            if all(not (s - TOLLERANCE <= o <= s + TOLLERANCE)
                   for o in old_segments))
        max_missing_diff = mo_math.MAX(
            abs(d) for s, d in zip(old_segments, old_diffs)
            if all(not (s - TOLLERANCE <= n <= s + TOLLERANCE)
                   for n in new_segments))

        Log.alert(
            "Disagree max_extra_diff={{max_extra_diff|round(places=3)}}, max_missing_diff={{max_missing_diff|round(places=3)}}",
            max_extra_diff=max_extra_diff,
            max_missing_diff=max_missing_diff,
        )
        Log.note("old={{old}}, new={{new}}",
                 old=old_segments,
                 new=new_segments)
        if show and len(pushes):
            show_old and assign_colors(
                values, old_segments, title="OLD " + title)
            assign_colors(values, new_segments, title="NEW " + title)
    else:
        Log.note("Agree")
        if show and len(pushes):
            show_old and assign_colors(
                values, old_segments, title="OLD " + title)
            assign_colors(values, new_segments, title="NEW " + title)

    summary_table.upsert(
        where={"eq": {
            "id": sig.id
        }},
        doc=Data(
            id=sig.id,
            title=title,
            num_pushes=len(pushes),
            is_diff=_is_diff,
            max_extra_diff=max_extra_diff,
            max_missing_diff=max_missing_diff,
            num_new_segments=len(new_segments),
            num_old_segments=len(old_segments),
            relative_noise=relative_noise,
            dev_status=dev_status,
            dev_score=dev_score,
            last_updated=Date.now(),
        ),
    )
Beispiel #11
0
def generate_new_alerts_in_series(signature):
    # get series data starting from either:
    # (1) the last alert, if there is one
    # (2) the alerts max age
    # (use whichever is newer)
    max_alert_age = datetime.now() - settings.PERFHERDER_ALERTS_MAX_AGE
    series = PerformanceDatum.objects.filter(signature=signature,
                                             push_timestamp__gte=max_alert_age)
    latest_alert_timestamp = (PerformanceAlert.objects.filter(
        series_signature=signature).select_related('summary__push__time').
                              order_by('-summary__push__time').values_list(
                                  'summary__push__time', flat=True)[:1])
    if latest_alert_timestamp:
        series = series.filter(push_timestamp__gt=latest_alert_timestamp[0])

    revision_data = {}
    for d in series:
        if not revision_data.get(d.push_id):
            revision_data[d.push_id] = RevisionDatum(
                int(time.mktime(d.push_timestamp.timetuple())), d.push_id, [])
        revision_data[d.push_id].values.append(d.value)

    min_back_window = signature.min_back_window
    if min_back_window is None:
        min_back_window = settings.PERFHERDER_ALERTS_MIN_BACK_WINDOW
    max_back_window = signature.max_back_window
    if max_back_window is None:
        max_back_window = settings.PERFHERDER_ALERTS_MAX_BACK_WINDOW
    fore_window = signature.fore_window
    if fore_window is None:
        fore_window = settings.PERFHERDER_ALERTS_FORE_WINDOW
    alert_threshold = signature.alert_threshold
    if alert_threshold is None:
        alert_threshold = settings.PERFHERDER_REGRESSION_THRESHOLD

    data = revision_data.values()
    analyzed_series = detect_changes(
        data,
        min_back_window=min_back_window,
        max_back_window=max_back_window,
        fore_window=fore_window,
    )

    with transaction.atomic():
        for (prev, cur) in zip(analyzed_series, analyzed_series[1:]):
            if cur.change_detected:
                prev_value = cur.historical_stats['avg']
                new_value = cur.forward_stats['avg']
                alert_properties = get_alert_properties(
                    prev_value, new_value, signature.lower_is_better)

                noise_profile = "N/A"
                try:
                    # Gather all data up to the current data point that
                    # shows the regression and obtain a noise profile on it.
                    # This helps us to ignore this alert and others in the
                    # calculation that could influence the profile.
                    noise_data = []
                    for point in analyzed_series:
                        if point == cur:
                            break
                        noise_data.append(geomean(point.values))

                    noise_profile, _ = deviance(noise_data)

                    if not isinstance(noise_profile, str):
                        raise Exception(
                            "Expecting a string as a "
                            f"noise profile, got: {type(noise_profile)}")
                except Exception:
                    # Fail without breaking the alert computation
                    newrelic.agent.record_exception()
                    logger.error("Failed to obtain a noise profile.")

                # ignore regressions below the configured regression
                # threshold
                if ((signature.alert_change_type is None
                     or signature.alert_change_type
                     == PerformanceSignature.ALERT_PCT)
                        and alert_properties.pct_change < alert_threshold) or (
                            signature.alert_change_type
                            == PerformanceSignature.ALERT_ABS
                            and alert_properties.delta < alert_threshold):
                    continue

                summary, _ = PerformanceAlertSummary.objects.get_or_create(
                    repository=signature.repository,
                    framework=signature.framework,
                    push_id=cur.push_id,
                    prev_push_id=prev.push_id,
                    defaults={
                        'manually_created': False,
                        'created':
                        datetime.utcfromtimestamp(cur.push_timestamp),
                    },
                )

                # django/mysql doesn't understand "inf", so just use some
                # arbitrarily high value for that case
                t_value = cur.t
                if t_value == float('inf'):
                    t_value = 1000

                PerformanceAlert.objects.update_or_create(
                    summary=summary,
                    series_signature=signature,
                    defaults={
                        'noise_profile': noise_profile,
                        'is_regression': alert_properties.is_regression,
                        'amount_pct': alert_properties.pct_change,
                        'amount_abs': alert_properties.delta,
                        'prev_value': prev_value,
                        'new_value': new_value,
                        't_value': t_value,
                    },
                )
Beispiel #12
0
def process(
    sig_id,
    since,
    source,
    destination,
):
    """
    :param sig_id: The performance hash
    :param since: Only data after this date
    :param show:
    :param show_limit:
    :param show_old:
    :param show_distribution:
    :return:
    """
    if not isinstance(sig_id, int):
        Log.error("expecting id")

    # GET SIGNATURE DETAILS
    sig = get_signature(source, sig_id)

    # GET SIGNATURE DETAILS
    pushes = get_dataum(source, sig_id, since, LIMIT)

    pushes = jx.sort(
        [{
            "value": median(rows.value),
            "runs": rows,
            "push": {
                "time": unwrap(t)["push.time"]
            },
        } for t, rows in jx.groupby(pushes, "push.time")
         if t["push\\.time"] > since],
        "push.time",
    )

    values = list(pushes.value)
    title = "-".join(
        map(
            str,
            [
                sig.framework,
                sig.suite,
                sig.test,
                sig.platform,
                sig.repository,
            ],
        ))
    Log.note("With {{title}}", title=title)

    if len(values) > LIMIT:
        Log.alert(
            "Too many values for {{title}} ({at least {num}}), choosing last {{limit}}",
            title=title,
            num=len(values),
            limit=LIMIT,
        )
        values = values[-LIMIT:]

    with Timer("find segments"):
        new_segments, new_diffs = find_segments(values, sig.alert_change_type,
                                                sig.alert_threshold)

    if len(new_segments) == 1:
        overall_dev_status = None
        overall_dev_score = None
        last_mean = None
        last_std = None
        last_dev_status = None
        last_dev_score = None
        relative_noise = None
    else:
        # NOISE OF LAST SEGMENT
        s, e = new_segments[-2], new_segments[-1]
        last_segment = np.array(values[s:e])
        trimmed_segment = last_segment
        last_mean = np.mean(trimmed_segment)
        last_std = np.std(trimmed_segment)
        last_dev_status, last_dev_score = deviance(trimmed_segment)
        relative_noise = last_std / last_mean

        # FOR EACH SEGMENT, NORMALIZE MEAN AND VARIANCE
        normalized = []
        for s, e in jx.pairs(new_segments):
            data = np.array(values[s:e])
            norm = (data + last_mean - np.mean(data)) * last_std / np.std(data)
            normalized.extend(norm)

        overall_dev_status, overall_dev_score = deviance(normalized)
        Log.note(
            "\n\tdeviance = {{deviance}}\n\tnoise={{std}}\n\tpushes={{pushes}}\n\tsegments={{num_segments}}",
            title=title,
            deviance=(overall_dev_status, overall_dev_score),
            std=relative_noise,
            pushes=len(values),
            num_segments=len(new_segments) - 1,
        )

    destination.add(
        Data(
            id=sig_id,
            title=title,
            num_pushes=len(values),
            num_segments=len(new_segments) - 1,
            relative_noise=relative_noise,
            overall_dev_status=overall_dev_status,
            overall_dev_score=overall_dev_score,
            last_mean=last_mean,
            last_std=last_std,
            last_dev_status=last_dev_status,
            last_dev_score=last_dev_score,
            last_updated=Date.now(),
            values=values,
        )
        | scrub(sig))