def test_detect_changes_few_revisions_many_values(): ''' Tests that we correctly detect a regression with a small number of revisions but a large number of values ''' data = [RevisionDatum(0, 0, [0]*50+[1]*30), RevisionDatum(1, 1, [0]*10+[1]*30), RevisionDatum(1, 1, [0]*10+[1]*30)] result = [(d.push_timestamp, d.change_detected) for d in detect_changes(data, min_back_window=5, max_back_window=10, fore_window=5, t_threshold=2)] assert result == [(0, False), (1, True), (1, False)]
def test_detect_changes(): data = [] times = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] values = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] for (t, v) in zip(times, values): data.append(RevisionDatum(t, t, [float(v)])) result = [ (d.push_timestamp, d.change_detected) for d in detect_changes( data, min_back_window=5, max_back_window=5, fore_window=5, t_threshold=2 ) ] assert result == [ (0, False), (1, False), (2, False), (3, False), (4, False), (5, False), (6, False), (7, False), (8, True), (9, False), (10, False), (11, False), (12, False), (13, False), (14, False), (15, False), ]
def test_detect_changes_historical_data(filename, expected_timestamps): """Parse JSON produced by http://graphs.mozilla.org/api/test/runs""" # Configuration for Analyzer FORE_WINDOW = 12 MIN_BACK_WINDOW = 12 MAX_BACK_WINDOW = 24 THRESHOLD = 7 payload = SampleData.get_perf_data(os.path.join('graphs', filename)) runs = payload['test_runs'] data = [RevisionDatum(r[2], r[2], [r[3]]) for r in runs] results = detect_changes(data, min_back_window=MIN_BACK_WINDOW, max_back_window=MAX_BACK_WINDOW, fore_window=FORE_WINDOW, t_threshold=THRESHOLD) regression_timestamps = [d.push_timestamp for d in results if d.change_detected] assert regression_timestamps == expected_timestamps
def generate_new_alerts_in_series(signature): # get series data starting from either: # (1) the last alert, if there is one # (2) the alerts max age # (use whichever is newer) max_alert_age = datetime.now() - settings.PERFHERDER_ALERTS_MAX_AGE series = PerformanceDatum.objects.filter(signature=signature, push_timestamp__gte=max_alert_age) latest_alert_timestamp = (PerformanceAlert.objects.filter( series_signature=signature).select_related('summary__push__time'). order_by('-summary__push__time').values_list( 'summary__push__time', flat=True)[:1]) if latest_alert_timestamp: series = series.filter(push_timestamp__gt=latest_alert_timestamp[0]) revision_data = {} for d in series: if not revision_data.get(d.push_id): revision_data[d.push_id] = RevisionDatum( int(time.mktime(d.push_timestamp.timetuple())), d.push_id, []) revision_data[d.push_id].values.append(d.value) min_back_window = signature.min_back_window if min_back_window is None: min_back_window = settings.PERFHERDER_ALERTS_MIN_BACK_WINDOW max_back_window = signature.max_back_window if max_back_window is None: max_back_window = settings.PERFHERDER_ALERTS_MAX_BACK_WINDOW fore_window = signature.fore_window if fore_window is None: fore_window = settings.PERFHERDER_ALERTS_FORE_WINDOW alert_threshold = signature.alert_threshold if alert_threshold is None: alert_threshold = settings.PERFHERDER_REGRESSION_THRESHOLD analyzed_series = detect_changes( revision_data.values(), min_back_window=min_back_window, max_back_window=max_back_window, fore_window=fore_window, ) with transaction.atomic(): for (prev, cur) in zip(analyzed_series, analyzed_series[1:]): if cur.change_detected: prev_value = cur.historical_stats['avg'] new_value = cur.forward_stats['avg'] alert_properties = get_alert_properties( prev_value, new_value, signature.lower_is_better) # ignore regressions below the configured regression # threshold if ((signature.alert_change_type is None or signature.alert_change_type == PerformanceSignature.ALERT_PCT) and alert_properties.pct_change < alert_threshold) or ( signature.alert_change_type == PerformanceSignature.ALERT_ABS and alert_properties.delta < alert_threshold): continue summary, _ = PerformanceAlertSummary.objects.get_or_create( repository=signature.repository, framework=signature.framework, push_id=cur.push_id, prev_push_id=prev.push_id, defaults={ 'manually_created': False, 'created': datetime.utcfromtimestamp(cur.push_timestamp), }, ) # django/mysql doesn't understand "inf", so just use some # arbitrarily high value for that case t_value = cur.t if t_value == float('inf'): t_value = 1000 PerformanceAlert.objects.update_or_create( summary=summary, series_signature=signature, defaults={ 'is_regression': alert_properties.is_regression, 'amount_pct': alert_properties.pct_change, 'amount_abs': alert_properties.delta, 'prev_value': prev_value, 'new_value': new_value, 't_value': t_value, }, )
def test_calc_t(old_data, new_data, expected): assert calc_t([RevisionDatum(0, 0, old_data)], [RevisionDatum(1, 1, new_data)]) == expected
def test_analyze_fn(revision_data, weight_fn, expected): data = [ RevisionDatum(i, i, values) for (i, values) in zip(range(len(revision_data)), revision_data) ] assert analyze(data, weight_fn) == expected
def handle(self, *args, **options): if not options['project']: raise CommandError("Must specify at least one project with " "--project") pc = PerfherderClient(server_url=options['server']) option_collection_hash = pc.get_option_collection_hash() # print csv header print(','.join([ "project", "platform", "signature", "series", "testrun_id", "push_timestamp", "change", "percent change", "t-value", "revision", ])) for project in options['project']: if options['signature']: signatures = [options['signature']] signature_data = pc.get_performance_signatures( project, signatures=signatures, interval=options['time_interval']) else: signature_data = pc.get_performance_signatures( project, interval=options['time_interval']) signatures = [] signatures_to_ignore = set() # if doing everything, only handle summary series for (signature, properties) in signature_data.items(): signatures.append(signature) if 'subtest_signatures' in properties: # Don't alert on subtests which have a summary signatures_to_ignore.update( properties['subtest_signatures']) signatures = [ signature for signature in signatures if signature not in signatures_to_ignore ] for signature in signatures: series = pc.get_performance_data( project, signatures=signature, interval=options['time_interval'])[signature] series_properties = signature_data.get(signature) data = [] for (timestamp, value) in zip(series['push_timestamp'], series['value']): data.append(RevisionDatum(timestamp, value)) for r in detect_changes(data): if r.state == 'regression': pushes = pc.get_pushes(project, id=r.testrun_id) revision = pushes[0]['revision'] if pushes else '' initial_value = r.historical_stats['avg'] new_value = r.forward_stats['avg'] if initial_value != 0: pct_change = (100.0 * abs(new_value - initial_value) / float(initial_value)) else: pct_change = 0.0 delta = new_value - initial_value print(','.join( map( str, [ project, series_properties['machine_platform'], signature, self._get_series_description( option_collection_hash, series_properties), r.testrun_id, r.push_timestamp, delta, pct_change, r.t, revision[0:12], ], )))
def generate_new_alerts_in_series(signature): # get series data starting from either: # (1) the last alert, if there is one # (2) the alerts max age # (use whichever is newer) max_alert_age = datetime.now() - settings.PERFHERDER_ALERTS_MAX_AGE series = PerformanceDatum.objects.filter(signature=signature, push_timestamp__gte=max_alert_age) latest_alert_timestamp = (PerformanceAlert.objects.filter( series_signature=signature).select_related('summary__push__time'). order_by('-summary__push__time').values_list( 'summary__push__time', flat=True)[:1]) if latest_alert_timestamp: series = series.filter(push_timestamp__gt=latest_alert_timestamp[0]) revision_data = {} for d in series: if not revision_data.get(d.push_id): revision_data[d.push_id] = RevisionDatum( int(time.mktime(d.push_timestamp.timetuple())), d.push_id, []) revision_data[d.push_id].values.append(d.value) min_back_window = signature.min_back_window if min_back_window is None: min_back_window = settings.PERFHERDER_ALERTS_MIN_BACK_WINDOW max_back_window = signature.max_back_window if max_back_window is None: max_back_window = settings.PERFHERDER_ALERTS_MAX_BACK_WINDOW fore_window = signature.fore_window if fore_window is None: fore_window = settings.PERFHERDER_ALERTS_FORE_WINDOW alert_threshold = signature.alert_threshold if alert_threshold is None: alert_threshold = settings.PERFHERDER_REGRESSION_THRESHOLD data = revision_data.values() analyzed_series = detect_changes( data, min_back_window=min_back_window, max_back_window=max_back_window, fore_window=fore_window, ) with transaction.atomic(): for (prev, cur) in zip(analyzed_series, analyzed_series[1:]): if cur.change_detected: prev_value = cur.historical_stats['avg'] new_value = cur.forward_stats['avg'] alert_properties = get_alert_properties( prev_value, new_value, signature.lower_is_better) noise_profile = "N/A" try: # Gather all data up to the current data point that # shows the regression and obtain a noise profile on it. # This helps us to ignore this alert and others in the # calculation that could influence the profile. noise_data = [] for point in analyzed_series: if point == cur: break noise_data.append(geomean(point.values)) noise_profile, _ = deviance(noise_data) if not isinstance(noise_profile, str): raise Exception( "Expecting a string as a " f"noise profile, got: {type(noise_profile)}") except Exception: # Fail without breaking the alert computation newrelic.agent.record_exception() logger.error("Failed to obtain a noise profile.") # ignore regressions below the configured regression # threshold if ((signature.alert_change_type is None or signature.alert_change_type == PerformanceSignature.ALERT_PCT) and alert_properties.pct_change < alert_threshold) or ( signature.alert_change_type == PerformanceSignature.ALERT_ABS and alert_properties.delta < alert_threshold): continue summary, _ = PerformanceAlertSummary.objects.get_or_create( repository=signature.repository, framework=signature.framework, push_id=cur.push_id, prev_push_id=prev.push_id, defaults={ 'manually_created': False, 'created': datetime.utcfromtimestamp(cur.push_timestamp), }, ) # django/mysql doesn't understand "inf", so just use some # arbitrarily high value for that case t_value = cur.t if t_value == float('inf'): t_value = 1000 PerformanceAlert.objects.update_or_create( summary=summary, series_signature=signature, defaults={ 'noise_profile': noise_profile, 'is_regression': alert_properties.is_regression, 'amount_pct': alert_properties.pct_change, 'amount_abs': alert_properties.delta, 'prev_value': prev_value, 'new_value': new_value, 't_value': t_value, }, )