def test_detect_changes(self): data = [] times = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] values = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] for (t, v) in zip(times, values): data.append(Datum(t, float(v))) result = [(d.push_timestamp, d.state) for d in detect_changes(data, min_back_window=5, max_back_window=5, fore_window=5, t_threshold=2)] self.assertEqual(result, [(1, 'good'), (2, 'good'), (3, 'good'), (4, 'good'), (5, 'good'), (6, 'good'), (7, 'good'), (8, 'regression'), (9, 'good'), (10, 'good')])
def check_json(self, filename, expected_timestamps): """Parse JSON produced by http://graphs.mozilla.org/api/test/runs""" # Configuration for Analyzer FORE_WINDOW = 12 MIN_BACK_WINDOW = 12 MAX_BACK_WINDOW = 24 THRESHOLD = 7 payload = SampleData.get_perf_data(os.path.join('graphs', filename)) runs = payload['test_runs'] data = [] for r in runs: data.append(Datum(r[2], r[3], testrun_id=r[0], revision_id=r[1][2])) results = detect_changes(data, min_back_window=MIN_BACK_WINDOW, max_back_window=MAX_BACK_WINDOW, fore_window=FORE_WINDOW, t_threshold=THRESHOLD) regression_timestamps = [d.push_timestamp for d in results if d.state == 'regression'] self.assertEqual(regression_timestamps, expected_timestamps)
def test_detect_changes(self): data = [] times = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] values = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] for (t, v) in zip(times, values): data.append(Datum(t, float(v))) result = [(d.push_timestamp, d.state) for d in detect_changes(data, min_back_window=5, max_back_window=5, fore_window=5, t_threshold=2)] self.assertEqual(result, [ (1, 'good'), (2, 'good'), (3, 'good'), (4, 'good'), (5, 'good'), (6, 'good'), (7, 'good'), (8, 'regression'), (9, 'good'), (10, 'good')])
def check_json(self, filename, expected_timestamps): """Parse JSON produced by http://graphs.mozilla.org/api/test/runs""" # Configuration for Analyzer FORE_WINDOW = 12 MIN_BACK_WINDOW = 12 MAX_BACK_WINDOW = 24 THRESHOLD = 7 payload = SampleData.get_perf_data(os.path.join('graphs', filename)) runs = payload['test_runs'] data = [] for r in runs: data.append(Datum(r[2], r[3], testrun_id=r[0], revision_id=r[1][2])) results = detect_changes(data, min_back_window=MIN_BACK_WINDOW, max_back_window=MAX_BACK_WINDOW, fore_window=FORE_WINDOW, t_threshold=THRESHOLD) regression_timestamps = [ d.push_timestamp for d in results if d.state == 'regression' ] self.assertEqual(regression_timestamps, expected_timestamps)
def handle(self, *args, **options): if options['server']: server_params = urlparse(options['server']) server_protocol = server_params.scheme server_host = server_params.netloc else: server_protocol = settings.TREEHERDER_REQUEST_PROTOCOL server_host = settings.TREEHERDER_REQUEST_HOST if not options['project']: raise CommandError("Must specify at least one project with " "--project") pc = PerfherderClient(protocol=server_protocol, host=server_host) option_collection_hash = pc.get_option_collection_hash() # print csv header print ','.join(["project", "platform", "signature", "series", "testrun_id", "push_timestamp", "change", "percent change", "t-value", "revision"]) for project in options['project']: if options['signature']: signatures = [options['signature']] signature_data = pc.get_performance_signatures( project, signatures=signatures, interval=options['time_interval']) else: signature_data = pc.get_performance_signatures( project, interval=options['time_interval']) signatures = [] signatures_to_ignore = set() # if doing everything, only handle summary series for (signature, properties) in signature_data.iteritems(): signatures.append(signature) if 'subtest_signatures' in properties: # Don't alert on subtests which have a summary signatures_to_ignore.update(properties['subtest_signatures']) signatures = [signature for signature in signatures if signature not in signatures_to_ignore] for signature in signatures: series = pc.get_performance_data( project, signatures=signature, interval=options['time_interval'])[signature] series_properties = signature_data.get(signature) data = [] for (result_set_id, timestamp, value) in zip( series['result_set_id'], series['push_timestamp'], series['value']): data.append(Datum(timestamp, value, testrun_id=result_set_id)) for r in detect_changes(data): if r.state == 'regression': resultsets = pc.get_resultsets(project, id=r.testrun_id) if len(resultsets): revision = resultsets[0]['revision'] else: revision = '' initial_value = r.historical_stats['avg'] new_value = r.forward_stats['avg'] if initial_value != 0: pct_change = 100.0 * abs(new_value - initial_value) / float(initial_value) else: pct_change = 0.0 delta = (new_value - initial_value) print ','.join(map( lambda v: str(v), [project, series_properties['machine_platform'], signature, self._get_series_description( option_collection_hash, series_properties), r.testrun_id, r.push_timestamp, delta, pct_change, r.t, revision[0:12]]))
def generate_new_alerts_in_series(signature): # get series data starting from either: # (1) the last alert, if there is one # (2) the alerts max age # (use whichever is newer) max_alert_age = (datetime.datetime.now() - settings.PERFHERDER_ALERTS_MAX_AGE) series = PerformanceDatum.objects.filter(signature=signature).filter( push_timestamp__gte=max_alert_age).order_by('push_timestamp') latest_alert_timestamp = PerformanceAlert.objects.filter( series_signature=signature).select_related( 'summary__push__time').order_by( '-summary__push__time').values_list( 'summary__push__time', flat=True)[:1] if latest_alert_timestamp: series = series.filter( push_timestamp__gt=latest_alert_timestamp[0]) data = [Datum(int(time.mktime(d.push_timestamp.timetuple())), d.value, testrun_id=d.push_id) for d in series] prev = None min_back_window = signature.min_back_window if min_back_window is None: min_back_window = settings.PERFHERDER_ALERTS_MIN_BACK_WINDOW max_back_window = signature.max_back_window if max_back_window is None: max_back_window = settings.PERFHERDER_ALERTS_MAX_BACK_WINDOW fore_window = signature.fore_window if fore_window is None: fore_window = settings.PERFHERDER_ALERTS_FORE_WINDOW alert_threshold = signature.alert_threshold if alert_threshold is None: alert_threshold = settings.PERFHERDER_REGRESSION_THRESHOLD analyzed_series = detect_changes(data, min_back_window=min_back_window, max_back_window=max_back_window, fore_window=fore_window) prev_testrun_id = None with transaction.atomic(): for (prev, cur) in zip(analyzed_series, analyzed_series[1:]): # we can have the same testrun id in a sequence if there are # retriggers, so only set the prev_testrun_id if that isn't # the case if prev.testrun_id != cur.testrun_id: prev_testrun_id = prev.testrun_id if cur.state == 'regression' and prev_testrun_id: prev_value = cur.historical_stats['avg'] new_value = cur.forward_stats['avg'] alert_properties = get_alert_properties( prev_value, new_value, signature.lower_is_better) if alert_properties.pct_change < alert_threshold: # ignore regressions below the configured regression # threshold continue summary, _ = PerformanceAlertSummary.objects.get_or_create( repository=signature.repository, framework=signature.framework, push_id=cur.testrun_id, prev_push_id=prev_testrun_id, defaults={ 'manually_created': False, 'last_updated': datetime.datetime.utcfromtimestamp( cur.push_timestamp) }) # django/mysql doesn't understand "inf", so just use some # arbitrarily high value for that case t_value = cur.t if t_value == float('inf'): t_value = 1000 PerformanceAlert.objects.update_or_create( summary=summary, series_signature=signature, defaults={ 'is_regression': alert_properties.is_regression, 'amount_pct': alert_properties.pct_change, 'amount_abs': alert_properties.delta, 'prev_value': prev_value, 'new_value': new_value, 't_value': t_value })
def generate_new_alerts_in_series(signature): # get series data starting from either: # (1) the last alert, if there is one # (2) the alerts max age # (use whichever is newer) max_alert_age = (datetime.datetime.now() - settings.PERFHERDER_ALERTS_MAX_AGE) series = PerformanceDatum.objects.filter(signature=signature).filter( push_timestamp__gte=max_alert_age).order_by('push_timestamp') latest_alert_timestamp = PerformanceAlert.objects.filter( series_signature=signature).select_related( 'summary__push__time').order_by( '-summary__push__time').values_list('summary__push__time', flat=True)[:1] if latest_alert_timestamp: series = series.filter(push_timestamp__gt=latest_alert_timestamp[0]) data = [ Datum(int(time.mktime(d.push_timestamp.timetuple())), d.value, testrun_id=d.push_id) for d in series ] prev = None min_back_window = signature.min_back_window if min_back_window is None: min_back_window = settings.PERFHERDER_ALERTS_MIN_BACK_WINDOW max_back_window = signature.max_back_window if max_back_window is None: max_back_window = settings.PERFHERDER_ALERTS_MAX_BACK_WINDOW fore_window = signature.fore_window if fore_window is None: fore_window = settings.PERFHERDER_ALERTS_FORE_WINDOW alert_threshold = signature.alert_threshold if alert_threshold is None: alert_threshold = settings.PERFHERDER_REGRESSION_THRESHOLD analyzed_series = detect_changes(data, min_back_window=min_back_window, max_back_window=max_back_window, fore_window=fore_window) prev_testrun_id = None with transaction.atomic(): for (prev, cur) in zip(analyzed_series, analyzed_series[1:]): # we can have the same testrun id in a sequence if there are # retriggers, so only set the prev_testrun_id if that isn't # the case if prev.testrun_id != cur.testrun_id: prev_testrun_id = prev.testrun_id if cur.state == 'regression' and prev_testrun_id: prev_value = cur.historical_stats['avg'] new_value = cur.forward_stats['avg'] alert_properties = get_alert_properties( prev_value, new_value, signature.lower_is_better) if alert_properties.pct_change < alert_threshold: # ignore regressions below the configured regression # threshold continue summary, _ = PerformanceAlertSummary.objects.get_or_create( repository=signature.repository, framework=signature.framework, push_id=cur.testrun_id, prev_push_id=prev_testrun_id, defaults={ 'manually_created': False, 'last_updated': datetime.datetime.utcfromtimestamp(cur.push_timestamp) }) # django/mysql doesn't understand "inf", so just use some # arbitrarily high value for that case t_value = cur.t if t_value == float('inf'): t_value = 1000 PerformanceAlert.objects.update_or_create( summary=summary, series_signature=signature, defaults={ 'is_regression': alert_properties.is_regression, 'amount_pct': alert_properties.pct_change, 'amount_abs': alert_properties.delta, 'prev_value': prev_value, 'new_value': new_value, 't_value': t_value })
def handle(self, *args, **options): if not options['project']: raise CommandError("Must specify at least one project with " "--project") pc = PerfherderClient(server_url=options['server']) option_collection_hash = pc.get_option_collection_hash() # print csv header print ','.join(["project", "platform", "signature", "series", "testrun_id", "push_timestamp", "change", "percent change", "t-value", "revision"]) for project in options['project']: if options['signature']: signatures = [options['signature']] signature_data = pc.get_performance_signatures( project, signatures=signatures, interval=options['time_interval']) else: signature_data = pc.get_performance_signatures( project, interval=options['time_interval']) signatures = [] signatures_to_ignore = set() # if doing everything, only handle summary series for (signature, properties) in signature_data.iteritems(): signatures.append(signature) if 'subtest_signatures' in properties: # Don't alert on subtests which have a summary signatures_to_ignore.update(properties['subtest_signatures']) signatures = [signature for signature in signatures if signature not in signatures_to_ignore] for signature in signatures: series = pc.get_performance_data( project, signatures=signature, interval=options['time_interval'])[signature] series_properties = signature_data.get(signature) data = [] for (result_set_id, timestamp, value) in zip( series['result_set_id'], series['push_timestamp'], series['value']): data.append(Datum(timestamp, value, testrun_id=result_set_id)) for r in detect_changes(data): if r.state == 'regression': resultsets = pc.get_resultsets(project, id=r.testrun_id) if len(resultsets): revision = resultsets[0]['revision'] else: revision = '' initial_value = r.historical_stats['avg'] new_value = r.forward_stats['avg'] if initial_value != 0: pct_change = 100.0 * abs(new_value - initial_value) / float(initial_value) else: pct_change = 0.0 delta = (new_value - initial_value) print ','.join(map( lambda v: str(v), [project, series_properties['machine_platform'], signature, self._get_series_description( option_collection_hash, series_properties), r.testrun_id, r.push_timestamp, delta, pct_change, r.t, revision[0:12]]))