Ejemplo n.º 1
0
def _sanitize_data(runnable_jobs_data):
    """We receive data from runnable jobs api and return the sanitized data that meets our needs.

    This is a loop to remove duplicates (including buildsystem -> * transformations if needed)
    By doing this, it allows us to have a single database query

    It returns sanitized_list which will contain a subset which excludes:
    * jobs that don't specify the platform
    * jobs that don't specify the testtype
    * if the job appears again, we replace build_system_type with '*'. By doing so, if a job appears
      under both 'buildbot' and 'taskcluster', its build_system_type will be '*'
    """
    job_build_system_type = {}
    sanitized_list = []
    for job in runnable_jobs_data:
        if not valid_platform(job['platform']):
            logger.info('Invalid platform %s', job['platform'])
            continue

        testtype = parse_testtype(
            build_system_type=job['build_system_type'],
            job_type_name=job['job_type_name'],
            platform_option=job['platform_option'],
            ref_data_name=job['ref_data_name']
        )

        if not testtype:
            continue

        # NOTE: This is *all* the data we need from the runnable API
        new_job = {
            'build_system_type': job['build_system_type'],  # e.g. {buildbot,taskcluster,*}
            'platform': job['platform'],  # e.g. windows8-64
            'platform_option': job['platform_option'],  # e.g. {opt,debug}
            'testtype': testtype,  # e.g. web-platform-tests-1
        }
        key = _unique_key(new_job)

        # Let's build a map of all the jobs and if duplicated change the build_system_type to *
        if key not in job_build_system_type:
            job_build_system_type[key] = job['build_system_type']
            sanitized_list.append(new_job)
        elif new_job['build_system_type'] != job_build_system_type[key]:
            new_job['build_system_type'] = job_build_system_type[key]
            # This will *replace* the previous build system type with '*'
            # This guarantees that we don't have duplicates
            sanitized_list[sanitized_list.index(new_job)]['build_system_type'] = '*'

    return sanitized_list
Ejemplo n.º 2
0
def _sanitize_data(runnable_jobs_data):
    """We receive data from runnable jobs api and return the sanitized data that meets our needs.

    This is a loop to remove duplicates (including buildsystem -> * transformations if needed)
    By doing this, it allows us to have a single database query

    It returns sanitized_list which will contain a subset which excludes:
    * jobs that don't specify the platform
    * jobs that don't specify the testtype
    * if the job appears again, we replace build_system_type with '*'. By doing so, if a job appears
      under both 'buildbot' and 'taskcluster', its build_system_type will be '*'
    """
    job_build_system_type = {}
    sanitized_list = []
    for job in runnable_jobs_data:
        if not valid_platform(job['platform']):
            logger.info('Invalid platform %s', job['platform'])
            continue

        testtype = parse_testtype(build_system_type=job['build_system_type'],
                                  job_type_name=job['job_type_name'],
                                  platform_option=job['platform_option'],
                                  ref_data_name=job['ref_data_name'])

        if not testtype:
            continue

        # NOTE: This is *all* the data we need from the runnable API
        new_job = {
            'build_system_type':
            job['build_system_type'],  # e.g. {buildbot,taskcluster,*}
            'platform': job['platform'],  # e.g. windows8-64
            'platform_option': job['platform_option'],  # e.g. {opt,debug}
            'testtype': testtype,  # e.g. web-platform-tests-1
        }
        key = _unique_key(new_job)

        # Let's build a map of all the jobs and if duplicated change the build_system_type to *
        if key not in job_build_system_type:
            job_build_system_type[key] = job['build_system_type']
            sanitized_list.append(new_job)
        elif new_job['build_system_type'] != job_build_system_type[key]:
            new_job['build_system_type'] = job_build_system_type[key]
            # This will *replace* the previous build system type with '*'
            # This guarantees that we don't have duplicates
            sanitized_list[sanitized_list.index(
                new_job)]['build_system_type'] = '*'

    return sanitized_list
Ejemplo n.º 3
0
    def _build_ref_data_names(self, project, build_system):
        '''
        We want all reference data names for every task that runs on a specific project.

        For example:
            * Buildbot - "Windows 8 64-bit mozilla-inbound debug test web-platform-tests-1"
            * TaskCluster = "test-linux64/opt-mochitest-webgl-e10s-1"
        '''
        ignored_jobs = []
        ref_data_names = {}

        runnable_jobs = list_runnable_jobs(project)['results']

        for job in runnable_jobs:
            # get testtype e.g. web-platform-tests-4
            testtype = parse_testtype(
                build_system_type=job['build_system_type'],
                job_type_name=job['job_type_name'],
                platform_option=job['platform_option'],
                ref_data_name=job['ref_data_name']
            )

            if not valid_platform(job['platform']):
                continue

            if is_job_blacklisted(testtype):
                ignored_jobs.append(job['ref_data_name'])
                continue

            key = unique_key(testtype=testtype,
                             buildtype=job['platform_option'],
                             platform=job['platform'])

            if build_system == '*':
                ref_data_names[key] = job['ref_data_name']
            elif job['build_system_type'] == build_system:
                ref_data_names[key] = job['ref_data_name']

        for ref_data_name in sorted(ignored_jobs):
            logger.info('Ignoring {}'.format(ref_data_name))

        return ref_data_names
Ejemplo n.º 4
0
def get_failures_fixed_by_commit():
    """ Return all job failures annotated with "fixed by commit" grouped by reason given for annotation.

        It returns a dictionary with a revision or bug ID as the key (bug ID is used for
        intermittent failures and the revision is used for real failures). For SETA's purposes
        we only care about revisions (real failures).
        The failures for *real failures* will contain all jobs that have been starred as "fixed by commit".

        Notice that the data does not tell you on which repository a root failure was fixed.

        For instance, in the raw data you might see a reference to 9fa614d8310d which is a back out
        and it is referenced by 12 starred jobs:
            https://treeherder.mozilla.org/#/jobs?repo=autoland&filter-searchStr=android%20debug%20cpp&tochange=9fa614d8310db9aabe85cc3c3cff6281fe1edb0c
        The raw data will show those 12 jobs.

        The returned data will look like this:
        {
           "44d29bac3654": [
              ["android-4-0-armv7-api15", "opt", "android-lint"],
              ["android-4-0-armv7-api15", "opt", "android-api-15-gradle-dependencies"],
            ]
        }
    """
    failures = defaultdict(list)
    option_collection_map = models.OptionCollection.objects.get_option_collection_map(
    )

    fixed_by_commit_data_set = models.JobNote.objects.filter(
        failure_classification=2,
        created__gt=timezone.now() - timedelta(days=SETA_FIXED_BY_COMMIT_DAYS),
        text__isnull=False,
        job__repository__name__in=SETA_FIXED_BY_COMMIT_REPOS).exclude(
            job__signature__build_platform__in=SETA_UNSUPPORTED_PLATFORMS
        ).exclude(text="").select_related('job', 'job__signature',
                                          'job__job_type')

    # check if at least one fixed by commit job meets our requirements without populating queryset
    if not fixed_by_commit_data_set.exists():
        logger.warn("We couldn't find any fixed-by-commit jobs")
        return failures

    # now process the fixed by commit jobs in batches using django's queryset iterator
    for job_note in fixed_by_commit_data_set.iterator():
        # if we have http://hg.mozilla.org/rev/<rev> and <rev>, we will only use <rev>
        revision_id = job_note.text.strip('/')
        revision_id = revision_id.split('/')[-1]

        # This prevents the empty string case and ignores bug ids
        if not revision_id or len(revision_id) < 12:
            continue

        # We currently don't guarantee that text is actually a revision
        # Even if not perfect the main idea is that a bunch of jobs were annotated with
        # a unique identifier. The assumption is that the text is unique
        #
        # I've seen these values being used:
        #  * 12 char revision
        #  * 40 char revision
        #  * link to revision on hg
        #  * revisionA & revisionB
        #  * should be fixed by <revision>
        #  * bug id
        #
        # Note that if some jobs are annotated with the 12char revision and others with the
        # 40char revision we will have two disjunct set of failures
        #
        # Some of this will be improved in https://bugzilla.mozilla.org/show_bug.cgi?id=1323536

        try:
            # check if jobtype is supported by SETA (see treeherder/seta/settings.py)
            if job_note.job.signature.build_system_type != 'buildbot':
                if not job_note.job.job_type.name.startswith(
                        tuple(SETA_SUPPORTED_TC_JOBTYPES)):
                    continue

            testtype = parse_testtype(
                build_system_type=job_note.job.signature.
                build_system_type,  # e.g. taskcluster
                job_type_name=job_note.job.job_type.name,  # e.g. Mochitest
                platform_option=job_note.job.get_platform_option(
                    option_collection_map),  # e.g. 'opt'
                ref_data_name=job_note.job.signature.
                name,  # buildername or task label
            )

            if testtype:
                if is_job_blacklisted(testtype):
                    continue
            else:
                logger.warning('We were unable to parse {}/{}'.format(
                    job_note.job.job_type.name, job_note.job.signature.name))
                continue

            # we now have a legit fixed-by-commit job failure
            failures[revision_id].append(
                unique_key(
                    testtype=testtype,
                    buildtype=job_note.job.get_platform_option(
                        option_collection_map),  # e.g. 'opt'
                    platform=job_note.job.signature.build_platform))
        except models.Job.DoesNotExist:
            logger.warning('job_note {} has no job associated to it'.format(
                job_note.id))
            continue

    logger.warn("Number of fixed_by_commit revisions: {}".format(
        len(failures)))
    return failures
Ejemplo n.º 5
0
def get_failures_fixed_by_commit():
    """ Return all job failures annotated with "fixed by commit" grouped by reason given for annotation.

        It returns a dictionary with a revision or bug ID as the key (bug ID is used for
        intermittent failures and the revision is used for real failures). For SETA's purposes
        we only care about revisions (real failures).
        The failures for *real failures* will contain all jobs that have been starred as "fixed by commit".

        Notice that the data does not tell you on which repository a root failure was fixed.

        For instance, in the raw data you might see a reference to 9fa614d8310d which is a back out
        and it is referenced by 12 starred jobs:
            https://treeherder.mozilla.org/#/jobs?repo=autoland&filter-searchStr=android%20debug%20cpp&tochange=9fa614d8310db9aabe85cc3c3cff6281fe1edb0c
        The raw data will show those 12 jobs.

        The returned data will look like this:
        {
           "44d29bac3654": [
              ["android-4-0-armv7-api15", "opt", "android-lint"],
              ["android-4-0-armv7-api15", "opt", "android-api-15-gradle-dependencies"],
            ]
        }
    """
    failures = defaultdict(list)
    option_collection_map = models.OptionCollection.objects.get_option_collection_map()

    fixed_by_commit_data_set = models.JobNote.objects.filter(
            failure_classification=2,
            created__gt=timezone.now() - timedelta(days=SETA_FIXED_BY_COMMIT_DAYS),
            text__isnull=False,
            job__repository__name__in=SETA_FIXED_BY_COMMIT_REPOS
        ).exclude(
            job__signature__build_platform__in=SETA_UNSUPPORTED_PLATFORMS
        ).exclude(
            text=""
        ).select_related('job', 'job__signature', 'job__job_type')

    # check if at least one fixed by commit job meets our requirements without populating queryset
    if not fixed_by_commit_data_set.exists():
        logger.warning("We couldn't find any fixed-by-commit jobs")
        return failures

    # now process the fixed by commit jobs in batches using django's queryset iterator
    for job_note in fixed_by_commit_data_set.iterator():
        # if we have http://hg.mozilla.org/rev/<rev> and <rev>, we will only use <rev>
        revision_id = job_note.text.strip('/')
        revision_id = revision_id.split('/')[-1]

        # This prevents the empty string case and ignores bug ids
        if not revision_id or len(revision_id) < 12:
            continue

        # We currently don't guarantee that text is actually a revision
        # Even if not perfect the main idea is that a bunch of jobs were annotated with
        # a unique identifier. The assumption is that the text is unique
        #
        # I've seen these values being used:
        #  * 12 char revision
        #  * 40 char revision
        #  * link to revision on hg
        #  * revisionA & revisionB
        #  * should be fixed by <revision>
        #  * bug id
        #
        # Note that if some jobs are annotated with the 12char revision and others with the
        # 40char revision we will have two disjunct set of failures
        #
        # Some of this will be improved in https://bugzilla.mozilla.org/show_bug.cgi?id=1323536

        try:
            # check if jobtype is supported by SETA (see treeherder/seta/settings.py)
            if job_note.job.signature.build_system_type != 'buildbot':
                if not job_note.job.job_type.name.startswith(tuple(SETA_SUPPORTED_TC_JOBTYPES)):
                    continue

            testtype = parse_testtype(
                build_system_type=job_note.job.signature.build_system_type,  # e.g. taskcluster
                job_type_name=job_note.job.job_type.name,  # e.g. Mochitest
                platform_option=job_note.job.get_platform_option(option_collection_map),  # e.g. 'opt'
                ref_data_name=job_note.job.signature.name,  # buildername or task label
            )

            if testtype:
                if is_job_blacklisted(testtype):
                    continue
            else:
                logger.warning('We were unable to parse %s/%s',
                               job_note.job.job_type.name, job_note.job.signature.name)
                continue

            # we now have a legit fixed-by-commit job failure
            failures[revision_id].append(unique_key(
                testtype=testtype,
                buildtype=job_note.job.get_platform_option(option_collection_map),  # e.g. 'opt'
                platform=job_note.job.signature.build_platform
            ))
        except models.Job.DoesNotExist:
            logger.warning('job_note %s has no job associated to it', job_note.id)
            continue

    logger.warning("Number of fixed_by_commit revisions: %s", len(failures))
    return failures
Ejemplo n.º 6
0
def get_failures_fixed_by_commit():
    """ Return all job failures annotated with "fixed by commit" grouped by reason given for annotation.

        It returns a dictionary with a revision or bug ID as the key (bug ID is used for
        intermittent failures and the revision is used for real failures). For SETA's purposes
        we only care about revisions (real failures).
        The failures for *real failures* will contain all jobs that have been starred as "fixed by commit".

        Notice that the data does not tell you on which repository a root failure was fixed.

        For instance, in the raw data you might see a reference to 9fa614d8310d which is a back out
        and it is referenced by 12 starred jobs:
            https://treeherder.mozilla.org/#/jobs?repo=autoland&filter-searchStr=android%20debug%20cpp&tochange=9fa614d8310db9aabe85cc3c3cff6281fe1edb0c
        The raw data will show those 12 jobs.

        The returned data will look like this:
        {
           "44d29bac3654": [
              ["android-4-0-armv7-api15", "opt", "android-lint"],
              ["android-4-0-armv7-api15", "opt", "android-api-15-gradle-dependencies"],
            ]
        }
    """
    failures = {}
    # We're assuming that sheriffs always anotate failed jobs correctly using "fixed by commit"
    for job_note in models.JobNote.objects.filter(failure_classification=2):
        # This prevents the empty string case and ignores bug ids
        if not job_note.text or len(job_note.text) < 12:
            continue

        # We currently don't guarantee that text is actually a revision
        # Even if not perfect the main idea is that a bunch of jobs were annotated with
        # a unique identifier. The assumption is that the text is unique
        #
        # I've seen these values being used:
        #  * 12 char revision
        #  * 40 char revision
        #  * link to revision on hg
        #  * revisionA & revisionB
        #  * should be fixed by <revision>
        #  * bug id
        #
        # Note that if some jobs are annotated with the 12char revision and others with the
        # 40char revision we will have two disjunct set of failures
        #
        # Some of this will be improved in https://bugzilla.mozilla.org/show_bug.cgi?id=1323536
        if job_note.text not in failures:
            failures[job_note.text] = []

        try:
            testtype = parse_testtype(
                build_system_type=job_note.job.signature.build_system_type,  # e.g. taskcluster
                job_type_name=job_note.job.job_type.name,  # e.g. Mochitest
                platform_option=job_note.job.get_platform_option(),  # e.g. 'opt'
                ref_data_name=job_note.job.signature.name,  # buildername or task label
            )
            # This prevents any jobs that we cannot parse properly
            if not testtype:
                logger.warning('We were unable to parse {}/{}'.format(
                               job_note.job.job_type.name, job_note.job.signature.name))
                continue

            failures[job_note.text].append(unique_key(
                testtype=testtype,
                buildtype=job_note.job.get_platform_option(),  # e.g. 'opt'
                platform=job_note.job.signature.build_platform
            ))
        except models.Job.DoesNotExist:
            logger.warning('job_note {} has no job associated to it'.format(job_note.id))
            continue

    logger.warn("failures: {}".format(len(failures)))
    return failures