def __init__(self, repo_name='mozilla-inbound'): # default to query all jobs on mozilla-inbound branch self.jobtypes = [] self.jobnames = [] ignored_jobs = [] for job in RunnableJobsClient().query_runnable_jobs( repo_name)['results']: # e.g. web-platform-tests-4 # e.g. Ubuntu VM 12.04 x64 mozilla-inbound opt test web-platform-tests-4 OR # test-linux64/opt-web-platform-tests-4 testtype = job_testtype(job) if _ignore(testtype): ignored_jobs.append(job['ref_data_name']) continue self.jobtypes.append( unique_key(testtype=testtype, buildtype=job['platform_option'], platform=job['platform'])) self.jobnames.append({ 'buildplatform': job['build_system_type'], 'buildtype': job['platform_option'], 'platform': job['platform'], 'ref_data_name': job['ref_data_name'], 'testtype': testtype, }) for ref_data_name in sorted(ignored_jobs): LOG.info('Ignoring {}'.format(ref_data_name))
def build_ref_data_names(project, build_system): ''' We want all reference data names for every task that runs on a specific project. For example: * Buildbot - "Windows 8 64-bit mozilla-inbound debug test web-platform-tests-1" * TaskCluster = "test-linux64/opt-mochitest-webgl-e10s-1" ''' ignored_jobs = [] ref_data_names = {} runnable_jobs = list_runnable_jobs(project)['results'] for job in runnable_jobs: testtype = job_testtype(job) # e.g. web-platform-tests-4 if _ignore(testtype): ignored_jobs.append(job['ref_data_name']) continue key = unique_key(testtype=testtype, buildtype=job['platform_option'], platform=job['platform']) if build_system == '*': ref_data_names[key] = job['ref_data_name'] elif job['build_system_type'] == build_system: ref_data_names[key] = job['ref_data_name'] for ref_data_name in sorted(ignored_jobs): LOG.info('Ignoring {}'.format(ref_data_name)) return ref_data_names
def _ref_data_names(build_system): ''' Sample data from Treecodes().query_jobnames() (skipping irrelevant fields) { "buildplatform": "buildbot", "buildtype": "debug", "platform": "windows8-64", "ref_data_name": "Windows 8 64-bit mozilla-inbound debug test web-platform-tests-1", "testtype": "web-platform-tests-1", },{ "buildplatform": "taskcluster", "buildtype": "opt", "platform": "linux64", "ref_data_name": "test-linux64/opt-mochitest-webgl-e10s-1", "testtype": "mochitest-webgl-e10s-1", } ''' ref_data_names = {} for job in Treecodes().query_jobnames(): key = unique_key(testtype=job['testtype'], buildtype=job['buildtype'], platform=job['platform']) if build_system == '*': ref_data_names[key] = job['ref_data_name'] elif job['buildplatform'] == build_system: ref_data_names[key] = job['ref_data_name'] return ref_data_names
def __init__(self, repo_name='mozilla-inbound'): # default to query all jobs on mozilla-inbound branch self.jobtypes = [] self.jobnames = [] ignored_jobs = [] for job in RunnableJobsClient().query_runnable_jobs(repo_name)['results']: # e.g. web-platform-tests-4 # e.g. Ubuntu VM 12.04 x64 mozilla-inbound opt test web-platform-tests-4 OR # test-linux64/opt-web-platform-tests-4 testtype = parse_testtype( build_system_type=job['build_system_type'], job_type_name=job['job_type_name'], platform_option=job['platform_option'], ref_data_name=job['ref_data_name'] ) if is_job_blacklisted(testtype): ignored_jobs.append(job['ref_data_name']) continue self.jobtypes.append( unique_key(testtype=testtype, buildtype=job['platform_option'], platform=job['platform'])) self.jobnames.append({ 'buildplatform': job['build_system_type'], 'buildtype': job['platform_option'], 'platform': job['platform'], 'ref_data_name': job['ref_data_name'], 'testtype': testtype, }) for ref_data_name in sorted(ignored_jobs): LOG.info('Ignoring {}'.format(ref_data_name))
def _unique_key(job): """Return a key to query our uniqueness mapping system. This makes sure that we use a consistent key between our code and selecting jobs from the table. """ return unique_key(testtype=str(job['testtype']), buildtype=str(job['platform_option']), platform=str(job['platform']))
def get_reference_data_names(project="autoland", build_system="taskcluster"): ''' We want all reference data names for every task that runs on a specific project. For example: "test-linux64/opt-mochitest-webgl-e10s-1" ''' # we cache the reference data names in order to reduce API calls cache_key = '{}-{}-ref_data_names_cache'.format(project, build_system) ref_data_names_map = cache.get(cache_key) if ref_data_names_map: return ref_data_names_map logger.debug("We did not hit the cache.") # cache expired so re-build the reference data names map; the map # contains the ref_data_name of every Treeherder task for this project ignored_jobs = [] ref_data_names = {} runnable_jobs = list_runnable_jobs(project) for job in runnable_jobs: # get testtype e.g. web-platform-tests-4 testtype = parse_testtype( build_system_type=job['build_system_type'], job_type_name=job['job_type_name'], platform_option=job['platform_option'], ref_data_name=job['ref_data_name'], ) if not valid_platform(job['platform']): continue if is_job_blacklisted(testtype): ignored_jobs.append(job['ref_data_name']) if testtype: logger.debug( 'get_reference_data_names: blacklisted testtype {} for job {}'.format( testtype, job ) ) continue key = unique_key( testtype=testtype, buildtype=job['platform_option'], platform=job['platform'] ) if build_system == '*': ref_data_names[key] = job['ref_data_name'] elif job['build_system_type'] == build_system: ref_data_names[key] = job['ref_data_name'] logger.debug('Ignoring %s', ', '.join(sorted(ignored_jobs))) # update the cache cache.set(cache_key, ref_data_names_map, SETA_REF_DATA_NAMES_CACHE_TIMEOUT) return ref_data_names
def _unique_key(job): """Return a key to query our uniqueness mapping system. This makes sure that we use a consistent key between our code and selecting jobs from the table. """ testtype = str(job['testtype']) if not testtype: raise Exception('Bad job {}'.format(job)) return unique_key(testtype=testtype, buildtype=str(job['platform_option']), platform=str(job['platform']))
def _build_ref_data_names(self, project, build_system): ''' We want all reference data names for every task that runs on a specific project. For example: * Buildbot - "Windows 8 64-bit mozilla-inbound debug test web-platform-tests-1" * TaskCluster = "test-linux64/opt-mochitest-webgl-e10s-1" ''' ignored_jobs = [] ref_data_names = {} runnable_jobs = list_runnable_jobs(project)['results'] for job in runnable_jobs: # get testtype e.g. web-platform-tests-4 testtype = parse_testtype( build_system_type=job['build_system_type'], job_type_name=job['job_type_name'], platform_option=job['platform_option'], ref_data_name=job['ref_data_name'] ) if not valid_platform(job['platform']): continue if is_job_blacklisted(testtype): ignored_jobs.append(job['ref_data_name']) continue key = unique_key(testtype=testtype, buildtype=job['platform_option'], platform=job['platform']) if build_system == '*': ref_data_names[key] = job['ref_data_name'] elif job['build_system_type'] == build_system: ref_data_names[key] = job['ref_data_name'] for ref_data_name in sorted(ignored_jobs): logger.info('Ignoring {}'.format(ref_data_name)) return ref_data_names
def _build_ref_data_names(self, project, build_system): ''' We want all reference data names for every task that runs on a specific project. For example: * Buildbot - "Windows 8 64-bit mozilla-inbound debug test web-platform-tests-1" * TaskCluster = "test-linux64/opt-mochitest-webgl-e10s-1" ''' ignored_jobs = [] ref_data_names = {} runnable_jobs = list_runnable_jobs(project) for job in runnable_jobs: # get testtype e.g. web-platform-tests-4 testtype = parse_testtype( build_system_type=job['build_system_type'], job_type_name=job['job_type_name'], platform_option=job['platform_option'], ref_data_name=job['ref_data_name']) if not valid_platform(job['platform']): continue if is_job_blacklisted(testtype): ignored_jobs.append(job['ref_data_name']) continue key = unique_key(testtype=testtype, buildtype=job['platform_option'], platform=job['platform']) if build_system == '*': ref_data_names[key] = job['ref_data_name'] elif job['build_system_type'] == build_system: ref_data_names[key] = job['ref_data_name'] for ref_data_name in sorted(ignored_jobs): logger.info('Ignoring %s', ref_data_name) return ref_data_names
def get_failures_fixed_by_commit(): """ Return all job failures annotated with "fixed by commit" grouped by reason given for annotation. It returns a dictionary with a revision or bug ID as the key (bug ID is used for intermittent failures and the revision is used for real failures). For SETA's purposes we only care about revisions (real failures). The failures for *real failures* will contain all jobs that have been starred as "fixed by commit". Notice that the data does not tell you on which repository a root failure was fixed. For instance, in the raw data you might see a reference to 9fa614d8310d which is a back out and it is referenced by 12 starred jobs: https://treeherder.mozilla.org/#/jobs?repo=autoland&filter-searchStr=android%20debug%20cpp&tochange=9fa614d8310db9aabe85cc3c3cff6281fe1edb0c The raw data will show those 12 jobs. The returned data will look like this: { "44d29bac3654": [ ["android-4-0-armv7-api15", "opt", "android-lint"], ["android-4-0-armv7-api15", "opt", "android-api-15-gradle-dependencies"], ] } """ failures = defaultdict(list) option_collection_map = models.OptionCollection.objects.get_option_collection_map( ) fixed_by_commit_data_set = models.JobNote.objects.filter( failure_classification=2, created__gt=timezone.now() - timedelta(days=SETA_FIXED_BY_COMMIT_DAYS), text__isnull=False, job__repository__name__in=SETA_FIXED_BY_COMMIT_REPOS).exclude( job__signature__build_platform__in=SETA_UNSUPPORTED_PLATFORMS ).exclude(text="").select_related('job', 'job__signature', 'job__job_type') # check if at least one fixed by commit job meets our requirements without populating queryset if not fixed_by_commit_data_set.exists(): logger.warn("We couldn't find any fixed-by-commit jobs") return failures # now process the fixed by commit jobs in batches using django's queryset iterator for job_note in fixed_by_commit_data_set.iterator(): # if we have http://hg.mozilla.org/rev/<rev> and <rev>, we will only use <rev> revision_id = job_note.text.strip('/') revision_id = revision_id.split('/')[-1] # This prevents the empty string case and ignores bug ids if not revision_id or len(revision_id) < 12: continue # We currently don't guarantee that text is actually a revision # Even if not perfect the main idea is that a bunch of jobs were annotated with # a unique identifier. The assumption is that the text is unique # # I've seen these values being used: # * 12 char revision # * 40 char revision # * link to revision on hg # * revisionA & revisionB # * should be fixed by <revision> # * bug id # # Note that if some jobs are annotated with the 12char revision and others with the # 40char revision we will have two disjunct set of failures # # Some of this will be improved in https://bugzilla.mozilla.org/show_bug.cgi?id=1323536 try: # check if jobtype is supported by SETA (see treeherder/seta/settings.py) if job_note.job.signature.build_system_type != 'buildbot': if not job_note.job.job_type.name.startswith( tuple(SETA_SUPPORTED_TC_JOBTYPES)): continue testtype = parse_testtype( build_system_type=job_note.job.signature. build_system_type, # e.g. taskcluster job_type_name=job_note.job.job_type.name, # e.g. Mochitest platform_option=job_note.job.get_platform_option( option_collection_map), # e.g. 'opt' ref_data_name=job_note.job.signature. name, # buildername or task label ) if testtype: if is_job_blacklisted(testtype): continue else: logger.warning('We were unable to parse {}/{}'.format( job_note.job.job_type.name, job_note.job.signature.name)) continue # we now have a legit fixed-by-commit job failure failures[revision_id].append( unique_key( testtype=testtype, buildtype=job_note.job.get_platform_option( option_collection_map), # e.g. 'opt' platform=job_note.job.signature.build_platform)) except models.Job.DoesNotExist: logger.warning('job_note {} has no job associated to it'.format( job_note.id)) continue logger.warn("Number of fixed_by_commit revisions: {}".format( len(failures))) return failures
def get_failures_fixed_by_commit(): """ Return all job failures annotated with "fixed by commit" grouped by reason given for annotation. It returns a dictionary with a revision or bug ID as the key (bug ID is used for intermittent failures and the revision is used for real failures). For SETA's purposes we only care about revisions (real failures). The failures for *real failures* will contain all jobs that have been starred as "fixed by commit". Notice that the data does not tell you on which repository a root failure was fixed. For instance, in the raw data you might see a reference to 9fa614d8310d which is a back out and it is referenced by 12 starred jobs: https://treeherder.mozilla.org/#/jobs?repo=autoland&filter-searchStr=android%20debug%20cpp&tochange=9fa614d8310db9aabe85cc3c3cff6281fe1edb0c The raw data will show those 12 jobs. The returned data will look like this: { "44d29bac3654": [ ["android-4-0-armv7-api15", "opt", "android-lint"], ["android-4-0-armv7-api15", "opt", "android-api-15-gradle-dependencies"], ] } """ failures = defaultdict(list) option_collection_map = models.OptionCollection.objects.get_option_collection_map() fixed_by_commit_data_set = models.JobNote.objects.filter( failure_classification=2, created__gt=timezone.now() - timedelta(days=SETA_FIXED_BY_COMMIT_DAYS), text__isnull=False, job__repository__name__in=SETA_FIXED_BY_COMMIT_REPOS ).exclude( job__signature__build_platform__in=SETA_UNSUPPORTED_PLATFORMS ).exclude( text="" ).select_related('job', 'job__signature', 'job__job_type') # check if at least one fixed by commit job meets our requirements without populating queryset if not fixed_by_commit_data_set.exists(): logger.warning("We couldn't find any fixed-by-commit jobs") return failures # now process the fixed by commit jobs in batches using django's queryset iterator for job_note in fixed_by_commit_data_set.iterator(): # if we have http://hg.mozilla.org/rev/<rev> and <rev>, we will only use <rev> revision_id = job_note.text.strip('/') revision_id = revision_id.split('/')[-1] # This prevents the empty string case and ignores bug ids if not revision_id or len(revision_id) < 12: continue # We currently don't guarantee that text is actually a revision # Even if not perfect the main idea is that a bunch of jobs were annotated with # a unique identifier. The assumption is that the text is unique # # I've seen these values being used: # * 12 char revision # * 40 char revision # * link to revision on hg # * revisionA & revisionB # * should be fixed by <revision> # * bug id # # Note that if some jobs are annotated with the 12char revision and others with the # 40char revision we will have two disjunct set of failures # # Some of this will be improved in https://bugzilla.mozilla.org/show_bug.cgi?id=1323536 try: # check if jobtype is supported by SETA (see treeherder/seta/settings.py) if job_note.job.signature.build_system_type != 'buildbot': if not job_note.job.job_type.name.startswith(tuple(SETA_SUPPORTED_TC_JOBTYPES)): continue testtype = parse_testtype( build_system_type=job_note.job.signature.build_system_type, # e.g. taskcluster job_type_name=job_note.job.job_type.name, # e.g. Mochitest platform_option=job_note.job.get_platform_option(option_collection_map), # e.g. 'opt' ref_data_name=job_note.job.signature.name, # buildername or task label ) if testtype: if is_job_blacklisted(testtype): continue else: logger.warning('We were unable to parse %s/%s', job_note.job.job_type.name, job_note.job.signature.name) continue # we now have a legit fixed-by-commit job failure failures[revision_id].append(unique_key( testtype=testtype, buildtype=job_note.job.get_platform_option(option_collection_map), # e.g. 'opt' platform=job_note.job.signature.build_platform )) except models.Job.DoesNotExist: logger.warning('job_note %s has no job associated to it', job_note.id) continue logger.warning("Number of fixed_by_commit revisions: %s", len(failures)) return failures
def unique_identifier(self): return unique_key(testtype=self.testtype, buildtype=self.buildtype, platform=self.platform)
def get_failures_fixed_by_commit(): """ Return all job failures annotated with "fixed by commit" grouped by reason given for annotation. It returns a dictionary with a revision or bug ID as the key (bug ID is used for intermittent failures and the revision is used for real failures). For SETA's purposes we only care about revisions (real failures). The failures for *real failures* will contain all jobs that have been starred as "fixed by commit". Notice that the data does not tell you on which repository a root failure was fixed. For instance, in the raw data you might see a reference to 9fa614d8310d which is a back out and it is referenced by 12 starred jobs: https://treeherder.mozilla.org/#/jobs?repo=autoland&filter-searchStr=android%20debug%20cpp&tochange=9fa614d8310db9aabe85cc3c3cff6281fe1edb0c The raw data will show those 12 jobs. The returned data will look like this: { "44d29bac3654": [ ["android-4-0-armv7-api15", "opt", "android-lint"], ["android-4-0-armv7-api15", "opt", "android-api-15-gradle-dependencies"], ] } """ failures = {} # We're assuming that sheriffs always anotate failed jobs correctly using "fixed by commit" for job_note in models.JobNote.objects.filter(failure_classification=2): # This prevents the empty string case and ignores bug ids if not job_note.text or len(job_note.text) < 12: continue # We currently don't guarantee that text is actually a revision # Even if not perfect the main idea is that a bunch of jobs were annotated with # a unique identifier. The assumption is that the text is unique # # I've seen these values being used: # * 12 char revision # * 40 char revision # * link to revision on hg # * revisionA & revisionB # * should be fixed by <revision> # * bug id # # Note that if some jobs are annotated with the 12char revision and others with the # 40char revision we will have two disjunct set of failures # # Some of this will be improved in https://bugzilla.mozilla.org/show_bug.cgi?id=1323536 if job_note.text not in failures: failures[job_note.text] = [] try: testtype = parse_testtype( build_system_type=job_note.job.signature.build_system_type, # e.g. taskcluster job_type_name=job_note.job.job_type.name, # e.g. Mochitest platform_option=job_note.job.get_platform_option(), # e.g. 'opt' ref_data_name=job_note.job.signature.name, # buildername or task label ) # This prevents any jobs that we cannot parse properly if not testtype: logger.warning('We were unable to parse {}/{}'.format( job_note.job.job_type.name, job_note.job.signature.name)) continue failures[job_note.text].append(unique_key( testtype=testtype, buildtype=job_note.job.get_platform_option(), # e.g. 'opt' platform=job_note.job.signature.build_platform )) except models.Job.DoesNotExist: logger.warning('job_note {} has no job associated to it'.format(job_note.id)) continue logger.warn("failures: {}".format(len(failures))) return failures