def get_history(failure_classification_id, push_date, num_days, option_map, repository_ids): start_date = push_date - datetime.timedelta(days=num_days) end_date = push_date - datetime.timedelta(days=2) cache_key = 'failure_history:{}:{}'.format(failure_classification_id, push_date) previous_failures_json = cache.get(cache_key) if not previous_failures_json: failure_lines = FailureLine.objects.filter( job_log__job__result='testfailed', job_log__job__tier=1, job_log__job__failure_classification_id=failure_classification_id, job_log__job__push__repository_id__in=repository_ids, job_log__job__push__time__gt=start_date, job_log__job__push__time__lt=end_date, ).exclude(test=None).select_related( 'job_log__job__machine_platform', 'job_log__job__push').values( 'test', 'job_log__job__machine_platform__platform', 'job_log__job__option_collection_hash').distinct() previous_failures = defaultdict( lambda: defaultdict(lambda: defaultdict(int))) for line in failure_lines: previous_failures[clean_test(line['test'])][clean_platform( line['job_log__job__machine_platform__platform'] )][clean_config( option_map[line['job_log__job__option_collection_hash']])] += 1 cache.set(cache_key, json.dumps(previous_failures), ONE_WEEK_IN_SECONDS) else: previous_failures = json.loads(previous_failures_json) return previous_failures, cache_key
def get_push_failures(push, option_map): # Using .distinct(<fields>) here would help by removing duplicate FailureLines # for the same job (with different sub-tests), but it's only supported by # postgres. Just using .distinct() has no effect. new_failure_lines = FailureLine.objects.filter( action='test_result', job_log__job__push=push, job_log__job__result='testfailed', job_log__job__tier=1).exclude(test=None).select_related( 'job_log__job__job_type', 'job_log__job__machine_platform') # using a dict here to avoid duplicates due to multiple failure_lines for # each job. tests = {} for failure_line in new_failure_lines: test_name = clean_test(failure_line.test) if not test_name: continue job = failure_line.job_log.job config = clean_config(option_map[job.option_collection_hash]) platform = clean_platform(job.machine_platform.platform) jobName = job.job_type.name jobSymbol = job.job_type.symbol test_key = '{}{}{}{}'.format(test_name, config, platform, jobName) if test_key not in tests: line = { 'testName': test_name, 'jobName': jobName, 'jobSymbol': jobSymbol, 'platform': platform, 'config': config, 'key': test_key, 'failJobs': [], 'passJobs': [], 'logLines': [], 'suggestedClassification': 'New Failure', 'confidence': 0, } tests[test_key] = line # This ``test`` was either just added above, or already existed in the ``tests`` # list in a previous iteration through ``failure_lines`` test = tests[test_key] test['logLines'].append(failure_line.to_mozlog_format()) if not next( (find_job for find_job in test['failJobs'] if find_job['id'] == job.id), False): test['failJobs'].append(job_to_dict(job)) # Each line of the sorted list that is returned here represents one test file per platform/ # config. Each line will have at least one failing job, but may have several # passing/failing jobs associated with it. return sorted(tests.values(), key=lambda k: k['testName'])
def get_push_failures(push, option_map): # Using .distinct(<fields>) here would help by removing duplicate FailureLines # for the same job (with different sub-tests), but it's only supported by # postgres. Just using .distinct() has no effect. new_failure_lines = FailureLine.objects.filter( action='test_result', job_log__job__push=push, job_log__job__result='testfailed', job_log__job__tier=1).exclude(test=None).select_related( 'job_log__job__job_type', 'job_log__job__machine_platform') # using a dict here to avoid duplicates due to multiple failure_lines for # each job. tests = {} for failure_line in new_failure_lines: test_name = clean_test(failure_line.test) if not test_name: continue job = failure_line.job_log.job config = clean_config(option_map[job.option_collection_hash]) platform = clean_platform(job.machine_platform.platform) jobName = job.job_type.name jobSymbol = job.job_type.symbol test_key = '{}{}{}{}'.format(test_name, config, platform, jobName) if test_key not in tests: line = { 'testName': test_name, 'jobName': jobName, 'jobSymbol': jobSymbol, 'platform': platform, 'config': config, 'key': test_key, 'jobs': [], 'logLines': [], 'suggestedClassification': 'New Failure', 'confidence': 0, } tests[test_key] = line test = tests[test_key] test['logLines'].append(failure_line.to_mozlog_format()) if not next( (find_job for find_job in test['jobs'] if find_job['id'] == job.id), False): test['jobs'].append(model_to_dict(job)) return sorted(tests.values(), key=lambda k: k['testName'])
def get_history(failure_classification_id, push_date, num_days, option_map, repository_ids, force_update=False): start_date = push_date - datetime.timedelta(days=num_days) end_date = push_date - datetime.timedelta(days=2) cache_key = f'{CACHE_KEY_ROOT}:{failure_classification_id}:{push_date}' previous_failures_json = cache.get(cache_key) if not previous_failures_json or force_update: failure_lines = (FailureLine.objects.filter( job_log__job__result='testfailed', job_log__job__tier__lte=2, job_log__job__failure_classification_id=failure_classification_id, job_log__job__push__repository_id__in=repository_ids, job_log__job__push__time__gt=start_date, job_log__job__push__time__lt=end_date, ).exclude(test=None).select_related( 'job_log__job__machine_platform', 'job_log__job__push').values( 'action', 'test', 'signature', 'message', 'job_log__job__machine_platform__platform', 'job_log__job__option_collection_hash', ).distinct()) previous_failures = defaultdict( lambda: defaultdict(lambda: defaultdict(int))) for line in failure_lines: previous_failures[clean_test( line['action'], line['test'], line['signature'], line['message'])][clean_platform( line['job_log__job__machine_platform__platform'] )][clean_config(option_map[ line['job_log__job__option_collection_hash']])] += 1 cache.set(cache_key, json.dumps(previous_failures), ONE_WEEK_IN_SECONDS) else: previous_failures = json.loads(previous_failures_json) return previous_failures
def get_history(failure_classification_id, push_date, num_days, option_map, repository_ids, force_update=False): start_date = push_date - datetime.timedelta(days=num_days) end_date = push_date - datetime.timedelta(days=2) cache_key = 'failure_history:{}:{}'.format(failure_classification_id, push_date) previous_failures_json = cache.get(cache_key) if not previous_failures_json or force_update: failure_lines = FailureLine.objects.filter( job_log__job__result='testfailed', job_log__job__tier=1, job_log__job__failure_classification_id=failure_classification_id, job_log__job__push__repository_id__in=repository_ids, job_log__job__push__time__gt=start_date, job_log__job__push__time__lt=end_date, ).exclude( test=None ).select_related( 'job_log__job__machine_platform', 'job_log__job__push' ).values( 'test', 'job_log__job__machine_platform__platform', 'job_log__job__option_collection_hash' ).distinct() previous_failures = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) for line in failure_lines: previous_failures[ clean_test(line['test']) ][ clean_platform(line['job_log__job__machine_platform__platform']) ][ clean_config(option_map[line['job_log__job__option_collection_hash']]) ] += 1 cache.set(cache_key, json.dumps(previous_failures), ONE_WEEK_IN_SECONDS) else: previous_failures = json.loads(previous_failures_json) return previous_failures, cache_key
def test_clean_test(action, test, signature, message, expected): assert expected == clean_test(action, test, signature, message)
def get_push_failures(push, option_map): # Using .distinct(<fields>) here would help by removing duplicate FailureLines # for the same job (with different sub-tests), but it's only supported by # postgres. Just using .distinct() has no effect. new_failure_lines = FailureLine.objects.filter( action='test_result', job_log__job__push=push, job_log__job__result='testfailed', job_log__job__tier=1 ).exclude( test=None ).select_related( 'job_log__job__job_type', 'job_log__job__machine_platform' ) # using a dict here to avoid duplicates due to multiple failure_lines for # each job. tests = {} all_failed_jobs = {} for failure_line in new_failure_lines: test_name = clean_test(failure_line.test) if not test_name: continue job = failure_line.job_log.job config = clean_config(option_map[job.option_collection_hash]) platform = clean_platform(job.machine_platform.platform) job_name = job.job_type.name job_symbol = job.job_type.symbol job.job_key = '{}{}{}'.format(config, platform, job_name) all_failed_jobs[job.id] = job test_key = '{}{}{}{}'.format(test_name, config, platform, job_name) if test_key not in tests: line = { 'testName': test_name, 'jobName': job_name, 'jobSymbol': job_symbol, 'platform': platform, 'config': config, 'key': test_key, 'jobKey': job.job_key, 'failJobs': [], 'passJobs': [], 'passInFailedJobs': [], # This test passed in a job that failed for another test 'logLines': [], 'suggestedClassification': 'New Failure', 'confidence': 0, } tests[test_key] = line # This ``test`` was either just added above, or already existed in the ``tests`` # list in a previous iteration through ``failure_lines`` test = tests[test_key] test['logLines'].append(failure_line.to_mozlog_format()) if not has_job(job, test['failJobs']): test['failJobs'].append(job_to_dict(job)) # Check each test to find jobs where it passed, even if the job itself failed due to another test for test in tests.values(): for failed_job in all_failed_jobs.values(): if not has_job(failed_job, test['failJobs']) and test['jobKey'] == failed_job.job_key: test['passInFailedJobs'].append(job_to_dict(failed_job)) # Each line of the sorted list that is returned here represents one test file per platform/ # config. Each line will have at least one failing job, but may have several # passing/failing jobs associated with it. return sorted(tests.values(), key=lambda k: k['testName'])
def get_current_test_failures(push, option_map): all_testfailed = Job.objects.filter( push=push, tier__lte=2, result='testfailed', ).exclude(machine_platform__platform='lint') # Using .distinct(<fields>) here would help by removing duplicate FailureLines # for the same job (with different sub-tests), but it's only supported by # postgres. Just using .distinct() has no effect. new_failure_lines = FailureLine.objects.filter( action__in=['test_result', 'log', 'crash'], job_log__job__push=push, job_log__job__result='testfailed', job_log__job__tier__lte=2).select_related( 'job_log__job__job_type', 'job_log__job__job_group', 'job_log__job__machine_platform') # using a dict here to avoid duplicates due to multiple failure_lines for # each job. tests = {} all_failed_jobs = {} for failure_line in new_failure_lines: test_name = clean_test(failure_line.action, failure_line.test, failure_line.signature, failure_line.message) if not test_name: continue job = failure_line.job_log.job config = clean_config(option_map[job.option_collection_hash]) platform = clean_platform(job.machine_platform.platform) job_name = job.job_type.name job_symbol = job.job_type.symbol job_group = job.job_group.name job_group_symbol = job.job_group.symbol job.job_key = '{}{}{}{}'.format(config, platform, job_name, job_group) all_failed_jobs[job.id] = job test_key = re.sub( r'\W+', '', '{}{}{}{}{}'.format(test_name, config, platform, job_name, job_group)) if test_key not in tests: line = { 'testName': test_name, 'action': failure_line.action.split('_')[0], 'jobName': job_name, 'jobSymbol': job_symbol, 'jobGroup': job_group, 'jobGroupSymbol': job_group_symbol, 'platform': platform, 'config': config, 'key': test_key, 'jobKey': job.job_key, 'inProgressJobs': [], 'failJobs': [], 'passJobs': [], 'passInFailedJobs': [], # This test passed in a job that failed for another test 'logLines': [], 'suggestedClassification': 'New Failure', 'confidence': 0, 'tier': job.tier, } tests[test_key] = line # This ``test`` was either just added above, or already existed in the ``tests`` # list in a previous iteration through ``failure_lines`` test = tests[test_key] if not has_line(failure_line, test['logLines']): test['logLines'].append(failure_line.to_mozlog_format()) if not has_job(job, test['failJobs']): test['failJobs'].append(job_to_dict(job)) # Check each test to find jobs where it passed, even if the job itself failed due to another test for test in tests.values(): for failed_job in all_failed_jobs.values(): if not has_job( failed_job, test['failJobs']) and test['jobKey'] == failed_job.job_key: test['passInFailedJobs'].append(job_to_dict(failed_job)) # filter out testfailed jobs that are supported by failureline to get unsupported jobs supported_job_ids = all_failed_jobs.keys() unsupported_jobs = [ job_to_dict(job) for job in all_testfailed if job.id not in supported_job_ids ] # Each line of the sorted list that is returned here represents one test file per platform/ # config. Each line will have at least one failing job, but may have several # passing/failing jobs associated with it. return (sorted(tests.values(), key=lambda k: k['testName']), unsupported_jobs)
def get_current_test_failures(push, option_map, jobs): # Using .distinct(<fields>) here would help by removing duplicate FailureLines # for the same job (with different sub-tests), but it's only supported by # postgres. Just using .distinct() has no effect. new_failure_lines = FailureLine.objects.filter( action__in=['test_result', 'log', 'crash'], job_log__job__push=push, job_log__job__result='testfailed', job_log__job__tier__lte=2, ).select_related( 'job_log__job__job_type', 'job_log__job__job_group', 'job_log__job__machine_platform', 'job_log__job__taskcluster_metadata', ) # using a dict here to avoid duplicates due to multiple failure_lines for # each job. tests = {} all_failed_jobs = {} for failure_line in new_failure_lines: test_name = clean_test( failure_line.action, failure_line.test, failure_line.signature, failure_line.message ) if not test_name: continue job = failure_line.job_log.job config = clean_config(option_map[job.option_collection_hash]) platform = clean_platform(job.machine_platform.platform) job_name = job.job_type.name job_symbol = job.job_type.symbol job_group = job.job_group.name job_group_symbol = job.job_group.symbol job.job_key = '{}{}{}{}'.format(config, platform, job_name, job_group) all_failed_jobs[job.id] = job # The 't' ensures the key starts with a character, as required for a query selector test_key = re.sub( r'\W+', '', 't{}{}{}{}{}'.format(test_name, config, platform, job_name, job_group) ) countPassed = len(list(filter(lambda x: x['result'] == 'success', jobs[job_name]))) passFailRatio = ( countPassed / countPassed + len(list(filter(lambda x: x['result'] == 'testfailed', jobs[job_name]))) if countPassed else 0 ) isClassifiedIntermittent = any( job['failure_classification_id'] == 4 for job in jobs[job_name] ) if test_key not in tests: line = { 'testName': test_name, 'action': failure_line.action.split('_')[0], 'jobName': job_name, 'jobSymbol': job_symbol, 'jobGroup': job_group, 'jobGroupSymbol': job_group_symbol, 'platform': platform, 'config': config, 'key': test_key, 'jobKey': job.job_key, 'suggestedClassification': 'New Failure', 'confidence': 0, 'tier': job.tier, 'failedInParent': False, 'passFailRatio': passFailRatio, 'isClassifiedIntermittent': isClassifiedIntermittent, } tests[test_key] = line # Each line of the sorted list that is returned here represents one test file per platform/ # config. Each line will have at least one failing job, but may have several # passing/failing jobs associated with it. return sorted(tests.values(), key=lambda k: k['testName'])
def get_test_failures(push, failed_jobs, likely_regression_labels, result_status): # option_map is used to map platforms for the job.option_collection_hash option_map = OptionCollection.objects.get_option_collection_map() failed_job_labels = list(failed_jobs.keys()) # using a dict here to avoid duplicates due to multiple failure_lines for # each job. regressions = { 'tests': {}, 'unstructuredFailures': [], } known_issues = { 'tests': {}, 'unstructuredFailures': [], } if not len(failed_job_labels): return ('none', { 'needInvestigation': regressions, 'knownIssues': known_issues }) failure_lines = (FailureLine.objects.filter( action__in=['test_result', 'log', 'crash'], job_log__job__push=push, job_log__job__job_type__name__in=failed_job_labels, job_log__job__result='testfailed', ).select_related( 'job_log', 'job_log__job', 'job_log__job__job_type', ).values('job_log__job__job_type__name', 'test', 'signature', 'message', 'action')) # using a dict here to avoid duplicates due to multiple failure_lines for # each job. regressions = { 'tests': {}, 'unstructuredFailures': [], } known_issues = { 'tests': {}, 'unstructuredFailures': [], } investigatedTests = InvestigatedTests.objects.filter(push=push) # Keep track of these so that we can add them to the 'otherJobs' labels_without_failure_lines = failed_job_labels.copy() for failure_line in failure_lines: test_name = clean_test(failure_line['test'], failure_line['signature'], failure_line['message']) if not test_name: continue action = failure_line['action'].split('_')[0] job_name = failure_line['job_log__job__job_type__name'] classification = known_issues if job_name in likely_regression_labels: classification = regressions if job_name in labels_without_failure_lines: labels_without_failure_lines.remove(job_name) line = get_line(test_name, action, failed_jobs[job_name][0], option_map, investigatedTests) if line['key'] not in classification['tests']: classification['tests'][line['key']] = line # Any labels that were not in a FailureLine should go into the appropriate bucket 'otherJobs' list. for label in labels_without_failure_lines: bucket = regressions if label in likely_regression_labels else known_issues bucket['unstructuredFailures'].append( get_line(None, None, failed_jobs[label][0], option_map, investigatedTests)) regressions['tests'] = regressions['tests'].values() known_issues['tests'] = known_issues['tests'].values() result = 'pass' if len(regressions['tests']): result = 'fail' elif 'unknown' in result_status: result = 'unknown' return (result, { 'needInvestigation': regressions, 'knownIssues': known_issues })