def _sanitize_data(runnable_jobs_data): """We receive data from runnable jobs api and return the sanitized data that meets our needs. This is a loop to remove duplicates (including buildsystem -> * transformations if needed) By doing this, it allows us to have a single database query It returns sanitized_list which will contain a subset which excludes: * jobs that don't specify the platform * jobs that don't specify the testtype * if the job appears again, we replace build_system_type with '*'. By doing so, if a job appears under both 'buildbot' and 'taskcluster', its build_system_type will be '*' """ job_build_system_type = {} sanitized_list = [] for job in runnable_jobs_data: if not valid_platform(job['platform']): logger.info('Invalid platform %s', job['platform']) continue testtype = parse_testtype( build_system_type=job['build_system_type'], job_type_name=job['job_type_name'], platform_option=job['platform_option'], ref_data_name=job['ref_data_name'] ) if not testtype: continue # NOTE: This is *all* the data we need from the runnable API new_job = { 'build_system_type': job['build_system_type'], # e.g. {buildbot,taskcluster,*} 'platform': job['platform'], # e.g. windows8-64 'platform_option': job['platform_option'], # e.g. {opt,debug} 'testtype': testtype, # e.g. web-platform-tests-1 } key = _unique_key(new_job) # Let's build a map of all the jobs and if duplicated change the build_system_type to * if key not in job_build_system_type: job_build_system_type[key] = job['build_system_type'] sanitized_list.append(new_job) elif new_job['build_system_type'] != job_build_system_type[key]: new_job['build_system_type'] = job_build_system_type[key] # This will *replace* the previous build system type with '*' # This guarantees that we don't have duplicates sanitized_list[sanitized_list.index(new_job)]['build_system_type'] = '*' return sanitized_list
def _sanitize_data(runnable_jobs_data): """We receive data from runnable jobs api and return the sanitized data that meets our needs. This is a loop to remove duplicates (including buildsystem -> * transformations if needed) By doing this, it allows us to have a single database query It returns sanitized_list which will contain a subset which excludes: * jobs that don't specify the platform * jobs that don't specify the testtype * if the job appears again, we replace build_system_type with '*'. By doing so, if a job appears under both 'buildbot' and 'taskcluster', its build_system_type will be '*' """ job_build_system_type = {} sanitized_list = [] for job in runnable_jobs_data: if not valid_platform(job['platform']): logger.info('Invalid platform %s', job['platform']) continue testtype = parse_testtype(build_system_type=job['build_system_type'], job_type_name=job['job_type_name'], platform_option=job['platform_option'], ref_data_name=job['ref_data_name']) if not testtype: continue # NOTE: This is *all* the data we need from the runnable API new_job = { 'build_system_type': job['build_system_type'], # e.g. {buildbot,taskcluster,*} 'platform': job['platform'], # e.g. windows8-64 'platform_option': job['platform_option'], # e.g. {opt,debug} 'testtype': testtype, # e.g. web-platform-tests-1 } key = _unique_key(new_job) # Let's build a map of all the jobs and if duplicated change the build_system_type to * if key not in job_build_system_type: job_build_system_type[key] = job['build_system_type'] sanitized_list.append(new_job) elif new_job['build_system_type'] != job_build_system_type[key]: new_job['build_system_type'] = job_build_system_type[key] # This will *replace* the previous build system type with '*' # This guarantees that we don't have duplicates sanitized_list[sanitized_list.index( new_job)]['build_system_type'] = '*' return sanitized_list
def _build_ref_data_names(self, project, build_system): ''' We want all reference data names for every task that runs on a specific project. For example: * Buildbot - "Windows 8 64-bit mozilla-inbound debug test web-platform-tests-1" * TaskCluster = "test-linux64/opt-mochitest-webgl-e10s-1" ''' ignored_jobs = [] ref_data_names = {} runnable_jobs = list_runnable_jobs(project)['results'] for job in runnable_jobs: # get testtype e.g. web-platform-tests-4 testtype = parse_testtype( build_system_type=job['build_system_type'], job_type_name=job['job_type_name'], platform_option=job['platform_option'], ref_data_name=job['ref_data_name'] ) if not valid_platform(job['platform']): continue if is_job_blacklisted(testtype): ignored_jobs.append(job['ref_data_name']) continue key = unique_key(testtype=testtype, buildtype=job['platform_option'], platform=job['platform']) if build_system == '*': ref_data_names[key] = job['ref_data_name'] elif job['build_system_type'] == build_system: ref_data_names[key] = job['ref_data_name'] for ref_data_name in sorted(ignored_jobs): logger.info('Ignoring {}'.format(ref_data_name)) return ref_data_names
def get_failures_fixed_by_commit(): """ Return all job failures annotated with "fixed by commit" grouped by reason given for annotation. It returns a dictionary with a revision or bug ID as the key (bug ID is used for intermittent failures and the revision is used for real failures). For SETA's purposes we only care about revisions (real failures). The failures for *real failures* will contain all jobs that have been starred as "fixed by commit". Notice that the data does not tell you on which repository a root failure was fixed. For instance, in the raw data you might see a reference to 9fa614d8310d which is a back out and it is referenced by 12 starred jobs: https://treeherder.mozilla.org/#/jobs?repo=autoland&filter-searchStr=android%20debug%20cpp&tochange=9fa614d8310db9aabe85cc3c3cff6281fe1edb0c The raw data will show those 12 jobs. The returned data will look like this: { "44d29bac3654": [ ["android-4-0-armv7-api15", "opt", "android-lint"], ["android-4-0-armv7-api15", "opt", "android-api-15-gradle-dependencies"], ] } """ failures = defaultdict(list) option_collection_map = models.OptionCollection.objects.get_option_collection_map( ) fixed_by_commit_data_set = models.JobNote.objects.filter( failure_classification=2, created__gt=timezone.now() - timedelta(days=SETA_FIXED_BY_COMMIT_DAYS), text__isnull=False, job__repository__name__in=SETA_FIXED_BY_COMMIT_REPOS).exclude( job__signature__build_platform__in=SETA_UNSUPPORTED_PLATFORMS ).exclude(text="").select_related('job', 'job__signature', 'job__job_type') # check if at least one fixed by commit job meets our requirements without populating queryset if not fixed_by_commit_data_set.exists(): logger.warn("We couldn't find any fixed-by-commit jobs") return failures # now process the fixed by commit jobs in batches using django's queryset iterator for job_note in fixed_by_commit_data_set.iterator(): # if we have http://hg.mozilla.org/rev/<rev> and <rev>, we will only use <rev> revision_id = job_note.text.strip('/') revision_id = revision_id.split('/')[-1] # This prevents the empty string case and ignores bug ids if not revision_id or len(revision_id) < 12: continue # We currently don't guarantee that text is actually a revision # Even if not perfect the main idea is that a bunch of jobs were annotated with # a unique identifier. The assumption is that the text is unique # # I've seen these values being used: # * 12 char revision # * 40 char revision # * link to revision on hg # * revisionA & revisionB # * should be fixed by <revision> # * bug id # # Note that if some jobs are annotated with the 12char revision and others with the # 40char revision we will have two disjunct set of failures # # Some of this will be improved in https://bugzilla.mozilla.org/show_bug.cgi?id=1323536 try: # check if jobtype is supported by SETA (see treeherder/seta/settings.py) if job_note.job.signature.build_system_type != 'buildbot': if not job_note.job.job_type.name.startswith( tuple(SETA_SUPPORTED_TC_JOBTYPES)): continue testtype = parse_testtype( build_system_type=job_note.job.signature. build_system_type, # e.g. taskcluster job_type_name=job_note.job.job_type.name, # e.g. Mochitest platform_option=job_note.job.get_platform_option( option_collection_map), # e.g. 'opt' ref_data_name=job_note.job.signature. name, # buildername or task label ) if testtype: if is_job_blacklisted(testtype): continue else: logger.warning('We were unable to parse {}/{}'.format( job_note.job.job_type.name, job_note.job.signature.name)) continue # we now have a legit fixed-by-commit job failure failures[revision_id].append( unique_key( testtype=testtype, buildtype=job_note.job.get_platform_option( option_collection_map), # e.g. 'opt' platform=job_note.job.signature.build_platform)) except models.Job.DoesNotExist: logger.warning('job_note {} has no job associated to it'.format( job_note.id)) continue logger.warn("Number of fixed_by_commit revisions: {}".format( len(failures))) return failures
def get_failures_fixed_by_commit(): """ Return all job failures annotated with "fixed by commit" grouped by reason given for annotation. It returns a dictionary with a revision or bug ID as the key (bug ID is used for intermittent failures and the revision is used for real failures). For SETA's purposes we only care about revisions (real failures). The failures for *real failures* will contain all jobs that have been starred as "fixed by commit". Notice that the data does not tell you on which repository a root failure was fixed. For instance, in the raw data you might see a reference to 9fa614d8310d which is a back out and it is referenced by 12 starred jobs: https://treeherder.mozilla.org/#/jobs?repo=autoland&filter-searchStr=android%20debug%20cpp&tochange=9fa614d8310db9aabe85cc3c3cff6281fe1edb0c The raw data will show those 12 jobs. The returned data will look like this: { "44d29bac3654": [ ["android-4-0-armv7-api15", "opt", "android-lint"], ["android-4-0-armv7-api15", "opt", "android-api-15-gradle-dependencies"], ] } """ failures = defaultdict(list) option_collection_map = models.OptionCollection.objects.get_option_collection_map() fixed_by_commit_data_set = models.JobNote.objects.filter( failure_classification=2, created__gt=timezone.now() - timedelta(days=SETA_FIXED_BY_COMMIT_DAYS), text__isnull=False, job__repository__name__in=SETA_FIXED_BY_COMMIT_REPOS ).exclude( job__signature__build_platform__in=SETA_UNSUPPORTED_PLATFORMS ).exclude( text="" ).select_related('job', 'job__signature', 'job__job_type') # check if at least one fixed by commit job meets our requirements without populating queryset if not fixed_by_commit_data_set.exists(): logger.warning("We couldn't find any fixed-by-commit jobs") return failures # now process the fixed by commit jobs in batches using django's queryset iterator for job_note in fixed_by_commit_data_set.iterator(): # if we have http://hg.mozilla.org/rev/<rev> and <rev>, we will only use <rev> revision_id = job_note.text.strip('/') revision_id = revision_id.split('/')[-1] # This prevents the empty string case and ignores bug ids if not revision_id or len(revision_id) < 12: continue # We currently don't guarantee that text is actually a revision # Even if not perfect the main idea is that a bunch of jobs were annotated with # a unique identifier. The assumption is that the text is unique # # I've seen these values being used: # * 12 char revision # * 40 char revision # * link to revision on hg # * revisionA & revisionB # * should be fixed by <revision> # * bug id # # Note that if some jobs are annotated with the 12char revision and others with the # 40char revision we will have two disjunct set of failures # # Some of this will be improved in https://bugzilla.mozilla.org/show_bug.cgi?id=1323536 try: # check if jobtype is supported by SETA (see treeherder/seta/settings.py) if job_note.job.signature.build_system_type != 'buildbot': if not job_note.job.job_type.name.startswith(tuple(SETA_SUPPORTED_TC_JOBTYPES)): continue testtype = parse_testtype( build_system_type=job_note.job.signature.build_system_type, # e.g. taskcluster job_type_name=job_note.job.job_type.name, # e.g. Mochitest platform_option=job_note.job.get_platform_option(option_collection_map), # e.g. 'opt' ref_data_name=job_note.job.signature.name, # buildername or task label ) if testtype: if is_job_blacklisted(testtype): continue else: logger.warning('We were unable to parse %s/%s', job_note.job.job_type.name, job_note.job.signature.name) continue # we now have a legit fixed-by-commit job failure failures[revision_id].append(unique_key( testtype=testtype, buildtype=job_note.job.get_platform_option(option_collection_map), # e.g. 'opt' platform=job_note.job.signature.build_platform )) except models.Job.DoesNotExist: logger.warning('job_note %s has no job associated to it', job_note.id) continue logger.warning("Number of fixed_by_commit revisions: %s", len(failures)) return failures
def get_failures_fixed_by_commit(): """ Return all job failures annotated with "fixed by commit" grouped by reason given for annotation. It returns a dictionary with a revision or bug ID as the key (bug ID is used for intermittent failures and the revision is used for real failures). For SETA's purposes we only care about revisions (real failures). The failures for *real failures* will contain all jobs that have been starred as "fixed by commit". Notice that the data does not tell you on which repository a root failure was fixed. For instance, in the raw data you might see a reference to 9fa614d8310d which is a back out and it is referenced by 12 starred jobs: https://treeherder.mozilla.org/#/jobs?repo=autoland&filter-searchStr=android%20debug%20cpp&tochange=9fa614d8310db9aabe85cc3c3cff6281fe1edb0c The raw data will show those 12 jobs. The returned data will look like this: { "44d29bac3654": [ ["android-4-0-armv7-api15", "opt", "android-lint"], ["android-4-0-armv7-api15", "opt", "android-api-15-gradle-dependencies"], ] } """ failures = {} # We're assuming that sheriffs always anotate failed jobs correctly using "fixed by commit" for job_note in models.JobNote.objects.filter(failure_classification=2): # This prevents the empty string case and ignores bug ids if not job_note.text or len(job_note.text) < 12: continue # We currently don't guarantee that text is actually a revision # Even if not perfect the main idea is that a bunch of jobs were annotated with # a unique identifier. The assumption is that the text is unique # # I've seen these values being used: # * 12 char revision # * 40 char revision # * link to revision on hg # * revisionA & revisionB # * should be fixed by <revision> # * bug id # # Note that if some jobs are annotated with the 12char revision and others with the # 40char revision we will have two disjunct set of failures # # Some of this will be improved in https://bugzilla.mozilla.org/show_bug.cgi?id=1323536 if job_note.text not in failures: failures[job_note.text] = [] try: testtype = parse_testtype( build_system_type=job_note.job.signature.build_system_type, # e.g. taskcluster job_type_name=job_note.job.job_type.name, # e.g. Mochitest platform_option=job_note.job.get_platform_option(), # e.g. 'opt' ref_data_name=job_note.job.signature.name, # buildername or task label ) # This prevents any jobs that we cannot parse properly if not testtype: logger.warning('We were unable to parse {}/{}'.format( job_note.job.job_type.name, job_note.job.signature.name)) continue failures[job_note.text].append(unique_key( testtype=testtype, buildtype=job_note.job.get_platform_option(), # e.g. 'opt' platform=job_note.job.signature.build_platform )) except models.Job.DoesNotExist: logger.warning('job_note {} has no job associated to it'.format(job_note.id)) continue logger.warn("failures: {}".format(len(failures))) return failures