def test_buildername_translation(buildername, exp_result):
    """
    test getting the right platform based on the buildername
    """

    assert buildbot.extract_platform_info(buildername) == exp_result["platform"]
    assert buildbot.extract_job_type(buildername, default="not found") == exp_result["job_type"]
    assert buildbot.extract_build_type(buildername) == exp_result["build_type"]
    assert buildbot.extract_name_info(buildername) == exp_result["name"]
Beispiel #2
0
def test_buildername_translation(buildername, exp_result):
    """
    test getting the right platform based on the buildername
    """

    assert buildbot.extract_platform_info(buildername) == exp_result["platform"]
    assert buildbot.extract_job_type(buildername, default="not found") == exp_result["job_type"]
    assert buildbot.extract_build_type(buildername) == exp_result["build_type"]
    assert buildbot.extract_name_info(buildername) == exp_result["name"]
Beispiel #3
0
    def get_platform_regex_misses(self, analysis_type, build, buildername, job_guid):

        if not buildername:
            return

        # Match platforms
        platform_target = buildbot.extract_platform_info(buildername)

        if platform_target["os"] == "unknown":
            self._load_missed_buildername(analysis_type, buildername, job_guid)
Beispiel #4
0
    def get_platform_regex_misses(self, analysis_type, build, buildername,
                                  job_guid):

        if not buildername:
            return

        # Match platforms
        platform_target = buildbot.extract_platform_info(buildername)

        if platform_target['os'] == 'unknown':
            self._load_missed_buildername(analysis_type, buildername, job_guid)
    def get_buildername_data(self, attr, value, data):
        """Callback function for the buildername property in the pulse stream"""

        #set buildername
        data[attr] = value

        #extend data with platform attributes
        platform_info = buildbot.extract_platform_info(value)
        data.update(platform_info)

        #extend data with build type attributes
        data['buildtype'] = buildbot.extract_build_type(value)

        #extend data with job type data
        data['jobtype'] = buildbot.extract_job_type(value)

        job_name_info = buildbot.extract_name_info(value)
        data['test_name'] = job_name_info["name"]
        data.update(job_name_info)

        return data
Beispiel #6
0
    def get_buildername_data(self, attr, value, data):
        """Callback function for the buildername property in the pulse stream"""

        #set buildername
        data[attr] = value

        #extend data with platform attributes
        platform_info = buildbot.extract_platform_info(value)
        data.update(platform_info)

        #extend data with build type attributes
        data['buildtype'] = buildbot.extract_build_type(value)

        #extend data with job type data
        data['jobtype'] = buildbot.extract_job_type(value)

        job_name_info = buildbot.extract_name_info(value)
        data['test_name'] = job_name_info["name"]
        data.update(job_name_info)

        return data
Beispiel #7
0
    def transform(self,
                  data,
                  source,
                  revision_filter=None,
                  project_filter=None,
                  job_group_filter=None):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        valid_projects = set(x.project for x in Datasource.objects.cached())
        revision_dict = defaultdict(list)

        # loop to catch all the revisions
        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects,
                                          project_filter):
                continue

            for rev in revisions.iterkeys():
                if common.should_skip_revision(rev, revision_filter):
                    continue
                revision_dict[project].append(rev)

        # retrieving the revision->resultset lookups
        revisions_lookup = common.lookup_revisions(revision_dict)

        job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set())
        job_ids_seen_now = set()

        th_collections = {}

        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects,
                                          project_filter):
                continue

            for revision, jobs in revisions.items():
                if common.should_skip_revision(revision, revision_filter):
                    continue

                try:
                    resultset = revisions_lookup[project][revision]
                except KeyError:
                    logger.warning(
                        "skipping jobs since %s revision %s not yet ingested",
                        project, revision)
                    continue

                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for job in jobs:
                    job_ids_seen_now.add(job['id'])

                    # Don't process jobs that were already present in this datasource
                    # the last time this task completed successfully.
                    if job['id'] in job_ids_seen_last_time:
                        continue

                    treeherder_data = {
                        'revision': revision,
                        'resultset_id': resultset['id'],
                        'project': project,
                    }

                    buildername = job['buildername']
                    platform_info = buildbot.extract_platform_info(buildername)
                    job_name_info = buildbot.extract_name_info(buildername)

                    if (job_group_filter
                            and job_name_info.get('group_symbol', '').lower()
                            != job_group_filter.lower()):
                        continue

                    if source == 'pending':
                        request_id = job['id']
                    elif source == 'running':
                        # The last element in request_ids corresponds to the request id of this job,
                        # the others are for the requests that were coalesced into this one.
                        request_id = job['request_ids'][-1]

                    new_job = {
                        'job_guid':
                        common.generate_job_guid(request_id, buildername),
                        'name':
                        job_name_info.get('name', ''),
                        'job_symbol':
                        job_name_info.get('job_symbol', ''),
                        'group_name':
                        job_name_info.get('group_name', ''),
                        'group_symbol':
                        job_name_info.get('group_symbol', ''),
                        'reference_data_name':
                        buildername,
                        'state':
                        source,
                        'submit_timestamp':
                        job['submitted_at'],
                        'build_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                        },
                        # where are we going to get this data from?
                        'machine_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                        },
                        'who':
                        'unknown',
                        'option_collection': {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(buildername):
                            True
                        },
                        'log_references': [],
                        'artifacts': [
                            {
                                'type': 'json',
                                'name': 'buildapi',
                                'log_urls': [],
                                'blob': {
                                    'buildername': buildername,
                                    'request_id': request_id
                                }
                            },
                        ]
                    }

                    if source == 'running':
                        new_job['start_timestamp'] = job['start_time']
                        # We store the original values to help debugging.
                        new_job['artifacts'].append({
                            'type': 'json',
                            'name': 'buildapi_running',
                            'log_urls': [],
                            'blob': {
                                'revision': revision,
                                'request_ids': job['request_ids'],
                                'submitted_at': job['submitted_at'],
                                'start_time': job['start_time'],
                            }
                        })

                    treeherder_data['job'] = new_job

                    if project not in th_collections:
                        th_collections[project] = TreeherderJobCollection()

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time))
        logger.info("Imported %d %s jobs, skipped %d previously seen",
                    num_new_jobs, source,
                    len(job_ids_seen_now) - num_new_jobs)

        return th_collections, job_ids_seen_now
Beispiel #8
0
    def transform(self, data, filter_to_project=None, filter_to_revision=None,
                  filter_to_job_group=None):
        """
        transform the builds4h structure into something we can ingest via
        our restful api
        """
        revisions = defaultdict(list)
        missing_resultsets = defaultdict(set)

        projects = set(x.project for x in Datasource.objects.cached())

        for build in data['builds']:
            prop = build['properties']

            if 'buildername' not in prop:
                logger.warning("skipping builds-4hr job since no buildername found")
                continue

            if 'branch' not in prop:
                logger.warning("skipping builds-4hr job since no branch found: %s", prop['buildername'])
                continue

            if prop['branch'] not in projects:
                # Fuzzer jobs specify a branch of 'idle', and we intentionally don't display them.
                if prop['branch'] != 'idle':
                    logger.warning("skipping builds-4hr job on unknown branch %s: %s", prop['branch'], prop['buildername'])
                continue

            if filter_to_project and prop['branch'] != filter_to_project:
                continue

            prop['revision'] = prop.get('revision',
                                        prop.get('got_revision',
                                                 prop.get('sourcestamp', None)))

            if not prop['revision']:
                logger.warning("skipping builds-4hr job since no revision found: %s", prop['buildername'])
                continue

            prop['revision'] = prop['revision'][0:12]

            if prop['revision'] == prop.get('l10n_revision', None):
                # Some l10n jobs specify the l10n repo revision under 'revision', rather
                # than the gecko revision. If we did not skip these, it would result in
                # fetch_missing_resultsets requests that were guaranteed to 404.
                # This needs to be fixed upstream in builds-4hr by bug 1125433.
                logger.warning("skipping builds-4hr job since revision refers to wrong repo: %s", prop['buildername'])
                continue

            revisions[prop['branch']].append(prop['revision'])

        revisions_lookup = common.lookup_revisions(revisions)

        job_ids_seen_last_time = cache.get(CACHE_KEYS['complete'], set())
        job_ids_seen_now = set()

        # Holds one collection per unique branch/project
        th_collections = {}

        for build in data['builds']:
            try:
                prop = build['properties']
                project = prop['branch']
                resultset = common.get_resultset(project,
                                                 revisions_lookup,
                                                 prop['revision'],
                                                 missing_resultsets,
                                                 logger)
            except KeyError:
                # skip this job, at least at this point
                continue
            if filter_to_revision and filter_to_revision != resultset['revision']:
                continue

            # We record the id here rather than at the start of the loop, since we
            # must not count jobs whose revisions were not yet imported as processed,
            # or we'll never process them once we've ingested their associated revision.
            job_ids_seen_now.add(build['id'])

            # Don't process jobs that were already present in builds-4hr
            # the last time this task completed successfully.
            if build['id'] in job_ids_seen_last_time:
                continue

            platform_info = buildbot.extract_platform_info(prop['buildername'])
            job_name_info = buildbot.extract_name_info(prop['buildername'])

            if (filter_to_job_group and job_name_info.get('group_symbol', '').lower() !=
                    filter_to_job_group.lower()):
                continue

            treeherder_data = {
                'revision_hash': resultset['revision_hash'],
                'resultset_id': resultset['id'],
                'project': project,
                'coalesced': []
            }

            device_name = buildbot.get_device_or_unknown(
                job_name_info.get('name', ''),
                platform_info['vm']
            )

            log_reference = []
            if 'log_url' in prop:
                log_reference.append({
                    'url': prop['log_url'],
                    'name': 'buildbot_text'
                })

            # add structured logs to the list of log references
            if 'blobber_files' in prop:
                try:
                    blobber_files = json.loads(prop['blobber_files'])
                    for bf, url in blobber_files.items():
                        if bf and url and bf.endswith('_raw.log'):
                            log_reference.append({
                                'url': url,
                                'name': 'mozlog_json'
                            })
                except Exception as e:
                    logger.warning("invalid blobber_files json for build id %s (%s): %s",
                                   build['id'], prop['buildername'], e)

            try:
                job_guid_data = self.find_job_guid(build)
                # request_ids is mandatory, but can be found in several places.
                request_ids = prop.get('request_ids', build['request_ids'])
                # The last element in request_ids corresponds to the request id of this job,
                # the others are for the requests that were coalesced into this one.
                request_id = request_ids[-1]
            except KeyError:
                continue

            treeherder_data['coalesced'] = job_guid_data['coalesced']

            job = {
                'job_guid': job_guid_data['job_guid'],
                'name': job_name_info.get('name', ''),
                'job_symbol': job_name_info.get('job_symbol', ''),
                'group_name': job_name_info.get('group_name', ''),
                'group_symbol': job_name_info.get('group_symbol', ''),
                'reference_data_name': prop['buildername'],
                'product_name': prop.get('product', ''),
                'state': 'completed',
                'result': buildbot.RESULT_DICT[build['result']],
                'reason': build['reason'],
                # scheduler, if 'who' property is not present
                'who': prop.get('who', prop.get('scheduler', '')),
                'submit_timestamp': build['requesttime'],
                'start_timestamp': build['starttime'],
                'end_timestamp': build['endtime'],
                'machine': prop.get('slavename', 'unknown'),
                # build_url not present in all builds
                'build_url': prop.get('build_url', ''),
                # build_platform same as machine_platform
                'build_platform': {
                    # platform attributes sometimes parse without results
                    'os_name': platform_info.get('os', ''),
                    'platform': platform_info.get('os_platform', ''),
                    'architecture': platform_info.get('arch', '')
                },
                'machine_platform': {
                    'os_name': platform_info.get('os', ''),
                    'platform': platform_info.get('os_platform', ''),
                    'architecture': platform_info.get('arch', '')
                },
                'device_name': device_name,
                # pgo or non-pgo dependent on buildername parsing
                'option_collection': {
                    buildbot.extract_build_type(prop['buildername']): True
                },
                'log_references': log_reference,
                'artifacts': [
                    {
                        'type': 'json',
                        'name': 'buildapi',
                        'log_urls': [],
                        'blob': {
                            'buildername': build['properties']['buildername'],
                            'request_id': request_id
                        }
                    },
                ]
            }

            treeherder_data['job'] = job

            if project not in th_collections:
                th_collections[project] = TreeherderJobCollection()

            # get treeherder job instance and add the job instance
            # to the collection instance
            th_job = th_collections[project].get_job(treeherder_data)
            th_collections[project].add(th_job)

        if missing_resultsets and not filter_to_revision:
            common.fetch_missing_resultsets("builds4h", missing_resultsets, logger)

        num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time))
        logger.info("Imported %d completed jobs, skipped %d previously seen",
                    num_new_jobs, len(job_ids_seen_now) - num_new_jobs)

        return th_collections, job_ids_seen_now
Beispiel #9
0
    def transform(self, data):
        """
        transform the builds4h structure into something we can ingest via
        our restful api
        """
        revisions = defaultdict(list)

        projects = set(x.project for x in Datasource.objects.cached())

        for build in data["builds"]:
            prop = build["properties"]

            if not "branch" in prop:
                logger.warning("property 'branch' not found in build4h")
                continue

            if not prop["branch"] in projects:
                logger.warning("skipping job on branch {0}".format(prop["branch"]))
                continue

            prop["revision"] = prop.get("revision", prop.get("got_revision", prop.get("sourcestamp", None)))

            if not prop["revision"]:
                logger.warning("property 'revision' not found in build4h")
                continue

            prop["revision"] = prop["revision"][0:12]
            revisions[prop["branch"]].append(prop["revision"])

        revisions_lookup = common.lookup_revisions(revisions)

        # Holds one collection per unique branch/project
        th_collections = {}

        for build in data["builds"]:
            prop = build["properties"]

            try:
                resultset = revisions_lookup[prop["branch"]][prop["revision"]]
            except KeyError:
                # this branch is not one of those we care about
                continue

            project = prop["branch"]

            treeherder_data = {
                "revision_hash": resultset["revision_hash"],
                "resultset_id": resultset["id"],
                "project": project,
                "coalesced": [],
            }

            platform_info = buildbot.extract_platform_info(prop["buildername"])
            job_name_info = buildbot.extract_name_info(prop["buildername"])

            if "log_url" in prop:
                log_reference = [{"url": prop["log_url"], "name": "builds-4h"}]
            else:
                log_reference = []

            job_guid_data = self.find_job_guid(build)

            treeherder_data["coalesced"] = job_guid_data["coalesced"]

            job = {
                "job_guid": job_guid_data["job_guid"],
                "name": job_name_info.get("name", ""),
                "job_symbol": job_name_info.get("job_symbol", ""),
                "group_name": job_name_info.get("group_name", ""),
                "group_symbol": job_name_info.get("group_symbol", ""),
                "buildername": prop["buildername"],
                "product_name": prop.get("product", ""),
                "state": "completed",
                "result": buildbot.RESULT_DICT[build["result"]],
                "reason": build["reason"],
                # scheduler, if 'who' property is not present
                "who": prop.get("who", prop.get("scheduler", "")),
                "submit_timestamp": build["requesttime"],
                "start_timestamp": build["starttime"],
                "end_timestamp": build["endtime"],
                "machine": prop.get("slavename", "unknown"),
                # build_url not present in all builds
                "build_url": prop.get("build_url", ""),
                # build_platform same as machine_platform
                "build_platform": {
                    # platform attributes sometimes parse without results
                    "os_name": platform_info.get("os", ""),
                    "platform": platform_info.get("os_platform", ""),
                    "architecture": platform_info.get("arch", ""),
                },
                "machine_platform": {
                    "os_name": platform_info.get("os", ""),
                    "platform": platform_info.get("os_platform", ""),
                    "architecture": platform_info.get("arch", ""),
                },
                # pgo or non-pgo dependent on buildername parsing
                "option_collection": {buildbot.extract_build_type(prop["buildername"]): True},
                "log_references": log_reference,
                "artifact": {"type": "", "name": "", "log_urls": [], "blob": ""},
            }

            treeherder_data["job"] = job

            if project not in th_collections:
                th_collections[project] = TreeherderJobCollection()

            # get treeherder job instance and add the job instance
            # to the collection instance
            th_job = th_collections[project].get_job(treeherder_data)
            th_collections[project].add(th_job)

        return th_collections
Beispiel #10
0
    def transform(self, data, source, revision_filter=None, project_filter=None,
                  job_group_filter=None):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        valid_projects = set(x.project for x in Datasource.objects.cached())
        revision_dict = defaultdict(list)
        missing_resultsets = defaultdict(set)

        # loop to catch all the revisions
        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects, project_filter):
                continue

            for rev, jobs in revisions.items():
                if common.should_skip_revision(rev, revision_filter):
                    continue
                for job in jobs:
                    if not common.is_blacklisted_buildername(job['buildername']):
                        # Add the revision to the list to be fetched so long as we
                        # find at least one valid job associated with it.
                        revision_dict[project].append(rev)
                        break

        # retrieving the revision->resultset lookups
        revisions_lookup = common.lookup_revisions(revision_dict)

        job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set())
        job_ids_seen_now = set()

        th_collections = {}

        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects, project_filter):
                continue

            for revision, jobs in revisions.items():
                if common.should_skip_revision(revision, revision_filter):
                    continue

                try:
                    resultset = common.get_resultset(project,
                                                     revisions_lookup,
                                                     revision,
                                                     missing_resultsets,
                                                     logger)
                except KeyError:
                    # There was no matching resultset, skip the job.
                    continue

                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for job in jobs:
                    buildername = job['buildername']
                    if common.is_blacklisted_buildername(buildername):
                        continue

                    job_ids_seen_now.add(job['id'])

                    # Don't process jobs that were already present in this datasource
                    # the last time this task completed successfully.
                    if job['id'] in job_ids_seen_last_time:
                        continue

                    treeherder_data = {
                        'revision_hash': resultset['revision_hash'],
                        'resultset_id': resultset['id'],
                        'project': project,
                    }

                    platform_info = buildbot.extract_platform_info(buildername)
                    job_name_info = buildbot.extract_name_info(buildername)

                    if (job_group_filter and job_name_info.get('group_symbol', '').lower() !=
                            job_group_filter.lower()):
                        continue

                    if source == 'pending':
                        request_id = job['id']
                    elif source == 'running':
                        # The last element in request_ids corresponds to the request id of this job,
                        # the others are for the requests that were coalesced into this one.
                        request_id = job['request_ids'][-1]

                    new_job = {
                        'job_guid': common.generate_job_guid(
                            request_id,
                            buildername
                        ),
                        'name': job_name_info.get('name', ''),
                        'job_symbol': job_name_info.get('job_symbol', ''),
                        'group_name': job_name_info.get('group_name', ''),
                        'group_symbol': job_name_info.get('group_symbol', ''),
                        'reference_data_name': buildername,
                        'state': source,
                        'submit_timestamp': job['submitted_at'],
                        'build_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                        },
                        # where are we going to get this data from?
                        'machine_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                        },
                        'who': 'unknown',
                        'option_collection': {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(buildername): True
                        },
                        'log_references': [],
                        'artifacts': [
                            {
                                'type': 'json',
                                'name': 'buildapi',
                                'log_urls': [],
                                'blob': {
                                    'buildername': buildername,
                                    'request_id': request_id
                                }
                            },
                        ]
                    }

                    if source == 'running':
                        new_job['start_timestamp'] = job['start_time']
                        # We store the original values to help debugging.
                        new_job['artifacts'].append(
                            {
                                'type': 'json',
                                'name': 'buildapi_running',
                                'log_urls': [],
                                'blob': {
                                    'revision': revision,
                                    'request_ids': job['request_ids'],
                                    'submitted_at': job['submitted_at'],
                                    'start_time': job['start_time'],
                                }
                            }
                        )

                    treeherder_data['job'] = new_job

                    if project not in th_collections:
                        th_collections[project] = TreeherderJobCollection()

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        if missing_resultsets and not revision_filter:
            common.fetch_missing_resultsets(source, missing_resultsets, logger)

        num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time))
        logger.info("Imported %d %s jobs, skipped %d previously seen",
                    num_new_jobs, source, len(job_ids_seen_now) - num_new_jobs)

        return th_collections, job_ids_seen_now
    def transform(self, data):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        projects = set(x.project for x in Datasource.objects.cached())
        revision_dict = defaultdict(list)
        missing_resultsets = defaultdict(set)

        # loop to catch all the revisions
        for project, revisions in data['running'].items():
            # this skips those projects we don't care about
            if project not in projects:
                continue

            for rev, jobs in revisions.items():
                revision_dict[project].append(rev)

        # retrieving the revision->resultset lookups
        revisions_lookup = common.lookup_revisions(revision_dict)

        th_collections = {}

        for project, revisions in data['running'].items():

            for revision, jobs in revisions.items():

                try:
                    resultset = common.get_resultset(project,
                                                     revisions_lookup,
                                                     revision,
                                                     missing_resultsets,
                                                     logger)
                except KeyError:
                    # skip this job, at least at this point
                    continue

                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for running_job in jobs:
                    treeherder_data = {
                        'revision_hash': resultset['revision_hash'],
                        'resultset_id': resultset['id'],
                        'project': project,
                    }

                    platform_info = buildbot.extract_platform_info(running_job['buildername'])
                    job_name_info = buildbot.extract_name_info(running_job['buildername'])
                    device_name = buildbot.get_device_or_unknown(
                        job_name_info.get('name', ''),
                        platform_info['vm']
                    )

                    new_job = {
                        'job_guid': common.generate_job_guid(
                            running_job['request_ids'][0],
                            running_job['submitted_at']
                        ),
                        'name': job_name_info.get('name', ''),
                        'job_symbol': job_name_info.get('job_symbol', ''),
                        'group_name': job_name_info.get('group_name', ''),
                        'group_symbol': job_name_info.get('group_symbol', ''),
                        'reference_data_name': running_job['buildername'],
                        'state': 'running',
                        'submit_timestamp': running_job['submitted_at'],
                        'start_timestamp': running_job['start_time'],
                        'build_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                            'vm': platform_info['vm']
                        },
                        #where are we going to get this data from?
                        'machine_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                            'vm': platform_info['vm']
                        },
                        'device_name': device_name,
                        'who': 'unknown',

                        'option_collection': {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(running_job['buildername']): True
                        },
                        'log_references': [],
                        'artifacts': [
                            {
                                'type': 'json',
                                'name': 'buildapi_running',
                                'log_urls': [],
                                'blob': running_job
                            },
                            {
                                'type': 'json',
                                'name': 'buildapi',
                                'log_urls': [],
                                'blob': {
                                    'buildername': running_job['buildername'],
                                    'request_id': running_job['request_ids'][0]
                                }
                            },
                        ]
                    }

                    treeherder_data['job'] = new_job

                    if project not in th_collections:
                        th_collections[ project ] = TreeherderJobCollection(
                            job_type='update'
                            )

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        if missing_resultsets:
            common.fetch_missing_resultsets("running", missing_resultsets, logger)

        return th_collections
Beispiel #12
0
    def transform(self, data, source, revision_filter=None, project_filter=None,
                  job_group_filter=None):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        valid_projects = set(Repository.objects.values_list('name', flat=True))
        revision_dict = defaultdict(list)

        # loop to catch all the revisions
        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects, project_filter):
                continue

            for rev in revisions.iterkeys():
                if common.should_skip_revision(rev, revision_filter):
                    continue
                revision_dict[project].append(rev)

        job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set())
        job_ids_seen_now = set()

        th_collections = {}

        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects, project_filter):
                continue

            revisions_seen_now_for_project = set()

            for revision, jobs in revisions.items():
                if common.should_skip_revision(revision, revision_filter):
                    continue

                # it should be quite rare for a job to be ingested before a
                # revision, but it could happen
                if revision not in revisions_seen_now_for_project and \
                   not Push.objects.filter(repository__name=project,
                                           revision__startswith=revision).exists():
                    logger.warning("skipping jobs since %s revision %s "
                                   "not yet ingested", project, revision)
                    continue
                revisions_seen_now_for_project.add(revision)

                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for job in jobs:
                    job_ids_seen_now.add(job['id'])

                    # Don't process jobs that we saw the last time this task
                    # completed successfully.
                    if job['id'] in job_ids_seen_last_time:
                        continue

                    treeherder_data = {
                        'revision': revision,
                        'project': project,
                    }

                    buildername = job['buildername']
                    platform_info = buildbot.extract_platform_info(buildername)
                    job_name_info = buildbot.extract_name_info(buildername)

                    if (job_group_filter and job_name_info.get('group_symbol', '').lower() !=
                            job_group_filter.lower()):
                        continue

                    if source == 'pending':
                        request_id = job['id']
                    elif source == 'running':
                        # The last element in request_ids corresponds to the request id of this job,
                        # the others are for the requests that were coalesced into this one.
                        request_id = job['request_ids'][-1]

                    new_job = {
                        'job_guid': common.generate_job_guid(
                            request_id,
                            buildername
                        ),
                        'name': job_name_info.get('name', ''),
                        'job_symbol': job_name_info.get('job_symbol', ''),
                        'group_name': job_name_info.get('group_name', ''),
                        'group_symbol': job_name_info.get('group_symbol', ''),
                        'reference_data_name': buildername,
                        'state': source,
                        'submit_timestamp': job['submitted_at'],
                        'build_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                        },
                        # where are we going to get this data from?
                        'machine_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                        },
                        'who': 'unknown',
                        'option_collection': {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(buildername): True
                        },
                        'log_references': [],
                        'artifacts': [
                            {
                                'type': 'json',
                                'name': 'buildapi',
                                'log_urls': [],
                                'blob': {
                                    'buildername': buildername,
                                    'request_id': request_id
                                }
                            },
                        ]
                    }

                    if source == 'running':
                        new_job['start_timestamp'] = job['start_time']

                    treeherder_data['job'] = new_job

                    if project not in th_collections:
                        th_collections[project] = TreeherderJobCollection()

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time))
        logger.info("Imported %d %s jobs, skipped %d previously seen",
                    num_new_jobs, source, len(job_ids_seen_now) - num_new_jobs)

        return th_collections, job_ids_seen_now
Beispiel #13
0
    def transform(self, data, project_filter=None, revision_filter=None, job_group_filter=None):
        """
        transform the builds4h structure into something we can ingest via
        our restful api
        """
        revisions = defaultdict(list)

        valid_projects = set(x.project for x in Datasource.objects.cached())

        for build in data["builds"]:
            try:
                prop = build["properties"]
                project = prop["branch"]

                if common.should_skip_project(project, valid_projects, project_filter):
                    continue

                if common.should_skip_revision(prop["revision"], revision_filter):
                    continue

            except KeyError as e:
                logger.warning("skipping builds-4hr job %s since missing property: %s", build["id"], str(e))
                continue

            revisions[project].append(prop["revision"])

        revisions_lookup = common.lookup_revisions(revisions)

        job_ids_seen_last_time = cache.get(CACHE_KEYS["complete"], set())
        job_ids_seen_now = set()

        # Holds one collection per unique branch/project
        th_collections = {}

        for build in data["builds"]:
            try:
                prop = build["properties"]
                project = prop["branch"]
                buildername = prop["buildername"]
                if common.should_skip_project(project, valid_projects, project_filter):
                    continue
                if common.should_skip_revision(prop["revision"], revision_filter):
                    continue
            except KeyError:
                continue

            try:
                resultset = revisions_lookup[project][prop["revision"]]
            except KeyError:
                logger.warning(
                    "skipping builds-4hr job %s since %s revision %s not yet ingested",
                    build["id"],
                    project,
                    prop["revision"],
                )
                continue

            # We record the id here rather than at the start of the loop, since we
            # must not count jobs whose revisions were not yet imported as processed,
            # or we'll never process them once we've ingested their associated revision.
            job_ids_seen_now.add(build["id"])

            # Don't process jobs that were already present in builds-4hr
            # the last time this task completed successfully.
            if build["id"] in job_ids_seen_last_time:
                continue

            platform_info = buildbot.extract_platform_info(buildername)
            job_name_info = buildbot.extract_name_info(buildername)

            if job_group_filter and job_name_info.get("group_symbol", "").lower() != job_group_filter.lower():
                continue

            treeherder_data = {
                "revision": prop["revision"],
                "resultset_id": resultset["id"],
                "project": project,
                "coalesced": [],
            }

            log_reference = []
            if "log_url" in prop:
                log_reference.append({"url": prop["log_url"], "name": "buildbot_text"})

            # add structured logs to the list of log references
            if "blobber_files" in prop:
                try:
                    blobber_files = json.loads(prop["blobber_files"])
                    for bf, url in blobber_files.items():
                        if bf and url and bf.endswith("_errorsummary.log"):
                            log_reference.append({"url": url, "name": "errorsummary_json"})
                except Exception as e:
                    logger.warning("invalid blobber_files json for build id %s (%s): %s", build["id"], buildername, e)

            try:
                job_guid_data = self.find_job_guid(build)
                # request_ids is mandatory, but can be found in several places.
                request_ids = prop.get("request_ids", build["request_ids"])
                # The last element in request_ids corresponds to the request id of this job,
                # the others are for the requests that were coalesced into this one.
                request_id = request_ids[-1]
            except KeyError:
                continue

            treeherder_data["coalesced"] = job_guid_data["coalesced"]

            job = {
                "job_guid": job_guid_data["job_guid"],
                "name": job_name_info.get("name", ""),
                "job_symbol": job_name_info.get("job_symbol", ""),
                "group_name": job_name_info.get("group_name", ""),
                "group_symbol": job_name_info.get("group_symbol", ""),
                "reference_data_name": buildername,
                "product_name": prop.get("product", ""),
                "state": "completed",
                "result": buildbot.RESULT_DICT[build["result"]],
                "reason": build["reason"],
                # scheduler, if 'who' property is not present
                "who": prop.get("who", prop.get("scheduler", "")),
                "submit_timestamp": build["requesttime"],
                "start_timestamp": build["starttime"],
                "end_timestamp": build["endtime"],
                "machine": prop.get("slavename", "unknown"),
                # build_platform same as machine_platform
                "build_platform": {
                    # platform attributes sometimes parse without results
                    "os_name": platform_info.get("os", ""),
                    "platform": platform_info.get("os_platform", ""),
                    "architecture": platform_info.get("arch", ""),
                },
                "machine_platform": {
                    "os_name": platform_info.get("os", ""),
                    "platform": platform_info.get("os_platform", ""),
                    "architecture": platform_info.get("arch", ""),
                },
                # pgo or non-pgo dependent on buildername parsing
                "option_collection": {buildbot.extract_build_type(buildername): True},
                "log_references": log_reference,
                "artifacts": [
                    {
                        "type": "json",
                        "name": "buildapi",
                        "log_urls": [],
                        "blob": {"buildername": buildername, "request_id": request_id},
                    }
                ],
            }

            treeherder_data["job"] = job

            if project not in th_collections:
                th_collections[project] = TreeherderJobCollection()

            # get treeherder job instance and add the job instance
            # to the collection instance
            th_job = th_collections[project].get_job(treeherder_data)
            th_collections[project].add(th_job)

        num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time))
        logger.info(
            "Imported %d completed jobs, skipped %d previously seen", num_new_jobs, len(job_ids_seen_now) - num_new_jobs
        )

        return th_collections, job_ids_seen_now
Beispiel #14
0
    def transform(self, data, source, revision_filter=None, project_filter=None, job_group_filter=None):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        valid_projects = set(x.project for x in Datasource.objects.cached())
        revision_dict = defaultdict(list)

        # loop to catch all the revisions
        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects, project_filter):
                continue

            for rev in revisions.iterkeys():
                if common.should_skip_revision(rev, revision_filter):
                    continue
                revision_dict[project].append(rev)

        # retrieving the revision->resultset lookups
        revisions_lookup = common.lookup_revisions(revision_dict)

        job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set())
        job_ids_seen_now = set()

        th_collections = {}

        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects, project_filter):
                continue

            for revision, jobs in revisions.items():
                if common.should_skip_revision(revision, revision_filter):
                    continue

                try:
                    resultset = revisions_lookup[project][revision]
                except KeyError:
                    logger.warning("skipping jobs since %s revision %s not yet ingested", project, revision)
                    continue

                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for job in jobs:
                    job_ids_seen_now.add(job["id"])

                    # Don't process jobs that were already present in this datasource
                    # the last time this task completed successfully.
                    if job["id"] in job_ids_seen_last_time:
                        continue

                    treeherder_data = {"revision": revision, "resultset_id": resultset["id"], "project": project}

                    buildername = job["buildername"]
                    platform_info = buildbot.extract_platform_info(buildername)
                    job_name_info = buildbot.extract_name_info(buildername)

                    if job_group_filter and job_name_info.get("group_symbol", "").lower() != job_group_filter.lower():
                        continue

                    if source == "pending":
                        request_id = job["id"]
                    elif source == "running":
                        # The last element in request_ids corresponds to the request id of this job,
                        # the others are for the requests that were coalesced into this one.
                        request_id = job["request_ids"][-1]

                    new_job = {
                        "job_guid": common.generate_job_guid(request_id, buildername),
                        "name": job_name_info.get("name", ""),
                        "job_symbol": job_name_info.get("job_symbol", ""),
                        "group_name": job_name_info.get("group_name", ""),
                        "group_symbol": job_name_info.get("group_symbol", ""),
                        "reference_data_name": buildername,
                        "state": source,
                        "submit_timestamp": job["submitted_at"],
                        "build_platform": {
                            "os_name": platform_info["os"],
                            "platform": platform_info["os_platform"],
                            "architecture": platform_info["arch"],
                        },
                        # where are we going to get this data from?
                        "machine_platform": {
                            "os_name": platform_info["os"],
                            "platform": platform_info["os_platform"],
                            "architecture": platform_info["arch"],
                        },
                        "who": "unknown",
                        "option_collection": {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(buildername): True
                        },
                        "log_references": [],
                        "artifacts": [
                            {
                                "type": "json",
                                "name": "buildapi",
                                "log_urls": [],
                                "blob": {"buildername": buildername, "request_id": request_id},
                            }
                        ],
                    }

                    if source == "running":
                        new_job["start_timestamp"] = job["start_time"]
                        # We store the original values to help debugging.
                        new_job["artifacts"].append(
                            {
                                "type": "json",
                                "name": "buildapi_running",
                                "log_urls": [],
                                "blob": {
                                    "revision": revision,
                                    "request_ids": job["request_ids"],
                                    "submitted_at": job["submitted_at"],
                                    "start_time": job["start_time"],
                                },
                            }
                        )

                    treeherder_data["job"] = new_job

                    if project not in th_collections:
                        th_collections[project] = TreeherderJobCollection()

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time))
        logger.info(
            "Imported %d %s jobs, skipped %d previously seen",
            num_new_jobs,
            source,
            len(job_ids_seen_now) - num_new_jobs,
        )

        return th_collections, job_ids_seen_now
Beispiel #15
0
    def transform(self,
                  data,
                  project_filter=None,
                  revision_filter=None,
                  job_group_filter=None):
        """
        transform the builds4h structure into something we can ingest via
        our restful api
        """
        revisions = defaultdict(list)

        valid_projects = set(x.project for x in Datasource.objects.cached())

        for build in data['builds']:
            try:
                prop = build['properties']
                project = prop['branch']

                if common.should_skip_project(project, valid_projects,
                                              project_filter):
                    continue

                if common.should_skip_revision(prop['revision'],
                                               revision_filter):
                    continue

            except KeyError as e:
                logger.warning(
                    "skipping builds-4hr job %s since missing property: %s",
                    build['id'], str(e))
                continue

            revisions[project].append(prop['revision'])

        revisions_lookup = common.lookup_revisions(revisions)

        job_ids_seen_last_time = cache.get(CACHE_KEYS['complete'], set())
        job_ids_seen_now = set()

        # Holds one collection per unique branch/project
        th_collections = {}

        for build in data['builds']:
            try:
                prop = build['properties']
                project = prop['branch']
                buildername = prop['buildername']
                if common.should_skip_project(project, valid_projects,
                                              project_filter):
                    continue
                if common.should_skip_revision(prop['revision'],
                                               revision_filter):
                    continue
            except KeyError:
                continue

            try:
                resultset = revisions_lookup[project][prop['revision']]
            except KeyError:
                logger.warning(
                    "skipping builds-4hr job %s since %s revision %s not yet ingested",
                    build['id'], project, prop['revision'])
                continue

            # We record the id here rather than at the start of the loop, since we
            # must not count jobs whose revisions were not yet imported as processed,
            # or we'll never process them once we've ingested their associated revision.
            job_ids_seen_now.add(build['id'])

            # Don't process jobs that were already present in builds-4hr
            # the last time this task completed successfully.
            if build['id'] in job_ids_seen_last_time:
                continue

            platform_info = buildbot.extract_platform_info(buildername)
            job_name_info = buildbot.extract_name_info(buildername)

            if (job_group_filter and job_name_info.get(
                    'group_symbol', '').lower() != job_group_filter.lower()):
                continue

            treeherder_data = {
                'revision': prop['revision'],
                'resultset_id': resultset['id'],
                'project': project,
                'coalesced': []
            }

            log_reference = []
            if 'log_url' in prop:
                log_reference.append({
                    'url': prop['log_url'],
                    'name': 'buildbot_text'
                })

            # add structured logs to the list of log references
            if 'blobber_files' in prop:
                try:
                    blobber_files = json.loads(prop['blobber_files'])
                    for bf, url in blobber_files.items():
                        if bf and url and bf.endswith('_errorsummary.log'):
                            log_reference.append({
                                'url': url,
                                'name': 'errorsummary_json'
                            })
                except Exception as e:
                    logger.warning(
                        "invalid blobber_files json for build id %s (%s): %s",
                        build['id'], buildername, e)

            try:
                job_guid_data = self.find_job_guid(build)
                # request_ids is mandatory, but can be found in several places.
                request_ids = prop.get('request_ids', build['request_ids'])
                # The last element in request_ids corresponds to the request id of this job,
                # the others are for the requests that were coalesced into this one.
                request_id = request_ids[-1]
            except KeyError:
                continue

            treeherder_data['coalesced'] = job_guid_data['coalesced']

            job = {
                'job_guid':
                job_guid_data['job_guid'],
                'name':
                job_name_info.get('name', ''),
                'job_symbol':
                job_name_info.get('job_symbol', ''),
                'group_name':
                job_name_info.get('group_name', ''),
                'group_symbol':
                job_name_info.get('group_symbol', ''),
                'reference_data_name':
                buildername,
                'product_name':
                prop.get('product', ''),
                'state':
                'completed',
                'result':
                buildbot.RESULT_DICT[build['result']],
                'reason':
                build['reason'],
                # scheduler, if 'who' property is not present
                'who':
                prop.get('who', prop.get('scheduler', '')),
                'submit_timestamp':
                build['requesttime'],
                'start_timestamp':
                build['starttime'],
                'end_timestamp':
                build['endtime'],
                'machine':
                prop.get('slavename', 'unknown'),
                # build_platform same as machine_platform
                'build_platform': {
                    # platform attributes sometimes parse without results
                    'os_name': platform_info.get('os', ''),
                    'platform': platform_info.get('os_platform', ''),
                    'architecture': platform_info.get('arch', '')
                },
                'machine_platform': {
                    'os_name': platform_info.get('os', ''),
                    'platform': platform_info.get('os_platform', ''),
                    'architecture': platform_info.get('arch', '')
                },
                # pgo or non-pgo dependent on buildername parsing
                'option_collection': {
                    buildbot.extract_build_type(buildername): True
                },
                'log_references':
                log_reference,
                'artifacts': [
                    {
                        'type': 'json',
                        'name': 'buildapi',
                        'log_urls': [],
                        'blob': {
                            'buildername': buildername,
                            'request_id': request_id
                        }
                    },
                ]
            }

            treeherder_data['job'] = job

            if project not in th_collections:
                th_collections[project] = TreeherderJobCollection()

            # get treeherder job instance and add the job instance
            # to the collection instance
            th_job = th_collections[project].get_job(treeherder_data)
            th_collections[project].add(th_job)

        num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time))
        logger.info("Imported %d completed jobs, skipped %d previously seen",
                    num_new_jobs,
                    len(job_ids_seen_now) - num_new_jobs)

        return th_collections, job_ids_seen_now
    def transform(self, data):
        """
        transform the builds4h structure into something we can ingest via
        our restful api
        """
        revisions = defaultdict(list)
        missing_resultsets = defaultdict(set)

        projects = set(x.project for x in Datasource.objects.cached())

        for build in data['builds']:
            prop = build['properties']

            if not 'branch' in prop:
                logger.warning("property 'branch' not found in build4h")
                continue

            if not prop['branch'] in projects:
                logger.warning("skipping job on unsupported branch {0}".format(prop['branch']))
                continue

            prop['revision'] = prop.get('revision',
                                        prop.get('got_revision',
                                        prop.get('sourcestamp', None)))

            if not prop['revision']:
                logger.warning("property 'revision' not found in build4h")
                continue

            prop['revision'] = prop['revision'][0:12]
            revisions[prop['branch']].append(prop['revision'])

        revisions_lookup = common.lookup_revisions(revisions)

        # Holds one collection per unique branch/project
        th_collections = {}

        for build in data['builds']:
            try:
                prop = build['properties']
                project = prop['branch']
                artifact_build = copy.deepcopy(build)
                resultset = common.get_resultset(project,
                                                 revisions_lookup,
                                                 prop['revision'],
                                                 missing_resultsets,
                                                 logger)
            except KeyError:
                # skip this job, at least at this point
                continue

            treeherder_data = {
                'revision_hash': resultset['revision_hash'],
                'resultset_id': resultset['id'],
                'project': project,
                'coalesced': []
            }

            platform_info = buildbot.extract_platform_info(prop['buildername'])
            job_name_info = buildbot.extract_name_info(prop['buildername'])

            device_name = buildbot.get_device_or_unknown(
                job_name_info.get('name', ''),
                platform_info['vm']
            )

            if 'log_url' in prop:
                log_reference = [{
                    'url': prop['log_url'],
                    'name': 'builds-4h'
                }]
            else:
                log_reference = []

            # request_id and request_time are mandatory
            # and they can be found in a couple of different places
            try:
                job_guid_data = self.find_job_guid(build)
                request_ids = build['properties'].get('request_ids',
                                      build['request_ids'])
            except KeyError:
                continue

            treeherder_data['coalesced'] = job_guid_data['coalesced']

            def prop_remove(field):
                try:
                    del(artifact_build['properties'][field])
                except:
                    pass

            prop_remove("product")
            prop_remove("project")
            prop_remove("buildername")
            prop_remove("slavename")
            prop_remove("build_url")
            prop_remove("log_url")
            prop_remove("slavebuilddir")
            prop_remove("branch")
            prop_remove("repository")
            prop_remove("revision")

            del(artifact_build['requesttime'])
            del(artifact_build['starttime'])
            del(artifact_build['endtime'])


            job = {
                'job_guid': job_guid_data['job_guid'],
                'name': job_name_info.get('name', ''),
                'job_symbol': job_name_info.get('job_symbol', ''),
                'group_name': job_name_info.get('group_name', ''),
                'group_symbol': job_name_info.get('group_symbol', ''),
                'reference_data_name': prop['buildername'],
                'product_name': prop.get('product', ''),
                'state': 'completed',
                'result': buildbot.RESULT_DICT[build['result']],
                'reason': build['reason'],
                #scheduler, if 'who' property is not present
                'who': prop.get('who', prop.get('scheduler', '')),
                'submit_timestamp': build['requesttime'],
                'start_timestamp': build['starttime'],
                'end_timestamp': build['endtime'],
                'machine': prop.get('slavename', 'unknown'),
                #build_url not present in all builds
                'build_url': prop.get('build_url', ''),
                #build_platform same as machine_platform
                'build_platform': {
                    #platform attributes sometimes parse without results
                    'os_name': platform_info.get('os', ''),
                    'platform': platform_info.get('os_platform', ''),
                    'architecture': platform_info.get('arch', '')
                },
                'machine_platform': {
                    'os_name': platform_info.get('os', ''),
                    'platform': platform_info.get('os_platform', ''),
                    'architecture': platform_info.get('arch', '')
                },
                'device_name': device_name,
                #pgo or non-pgo dependent on buildername parsing
                'option_collection': {
                    buildbot.extract_build_type(prop['buildername']): True
                },
                'log_references': log_reference,
                'artifacts': [
                    {
                        'type': 'json',
                        'name': 'buildapi_complete',
                        'log_urls': [],
                        'blob': artifact_build
                    },
                    {
                        'type': 'json',
                        'name': 'buildapi',
                        'log_urls': [],
                        'blob': {
                            'buildername': build['properties']['buildername'],
                            'request_id': request_ids[0]
                        }
                    },
                ]
            }

            treeherder_data['job'] = job

            if project not in th_collections:
                th_collections[ project ] = TreeherderJobCollection()

            # get treeherder job instance and add the job instance
            # to the collection instance
            th_job = th_collections[project].get_job(treeherder_data)
            th_collections[project].add( th_job )

        if missing_resultsets:
            common.fetch_missing_resultsets("builds4h", missing_resultsets, logger)

        return th_collections
Beispiel #17
0
    def transform(self, data):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        projects = set(x.project for x in Datasource.objects.cached())
        revision_dict = defaultdict(list)
        missing_resultsets = defaultdict(set)

        # loop to catch all the revisions
        for project, revisions in data['running'].items():
            # this skips those projects we don't care about
            if project not in projects:
                continue

            for rev, jobs in revisions.items():
                revision_dict[project].append(rev)

        # retrieving the revision->resultset lookups
        revisions_lookup = common.lookup_revisions(revision_dict)

        th_collections = {}

        for project, revisions in data['running'].items():

            for revision, jobs in revisions.items():

                try:
                    resultset = common.get_resultset(project, revisions_lookup,
                                                     revision,
                                                     missing_resultsets,
                                                     logger)
                except KeyError:
                    # skip this job, at least at this point
                    continue

                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for running_job in jobs:
                    treeherder_data = {
                        'revision_hash': resultset['revision_hash'],
                        'resultset_id': resultset['id'],
                        'project': project,
                    }

                    platform_info = buildbot.extract_platform_info(
                        running_job['buildername'])
                    job_name_info = buildbot.extract_name_info(
                        running_job['buildername'])
                    device_name = buildbot.get_device_or_unknown(
                        job_name_info.get('name', ''), platform_info['vm'])

                    new_job = {
                        'job_guid':
                        common.generate_job_guid(running_job['request_ids'][0],
                                                 running_job['submitted_at']),
                        'name':
                        job_name_info.get('name', ''),
                        'job_symbol':
                        job_name_info.get('job_symbol', ''),
                        'group_name':
                        job_name_info.get('group_name', ''),
                        'group_symbol':
                        job_name_info.get('group_symbol', ''),
                        'reference_data_name':
                        running_job['buildername'],
                        'state':
                        'running',
                        'submit_timestamp':
                        running_job['submitted_at'],
                        'start_timestamp':
                        running_job['start_time'],
                        'build_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                            'vm': platform_info['vm']
                        },
                        #where are we going to get this data from?
                        'machine_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                            'vm': platform_info['vm']
                        },
                        'device_name':
                        device_name,
                        'who':
                        'unknown',
                        'option_collection': {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(running_job['buildername']):
                            True
                        },
                        'log_references': [],
                        'artifacts': [
                            {
                                'type': 'json',
                                'name': 'buildapi_running',
                                'log_urls': [],
                                'blob': running_job
                            },
                            {
                                'type': 'json',
                                'name': 'buildapi',
                                'log_urls': [],
                                'blob': {
                                    'buildername': running_job['buildername'],
                                    'request_id':
                                    max(running_job['request_ids'])
                                }
                            },
                        ]
                    }

                    treeherder_data['job'] = new_job

                    if project not in th_collections:
                        th_collections[project] = TreeherderJobCollection(
                            job_type='update')

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        if missing_resultsets:
            common.fetch_missing_resultsets("running", missing_resultsets,
                                            logger)

        return th_collections
Beispiel #18
0
    def transform(self, data, source, filter_to_revision=None, filter_to_project=None,
                  filter_to_job_group=None):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        projects = set(x.project for x in Datasource.objects.cached())
        revision_dict = defaultdict(list)
        missing_resultsets = defaultdict(set)

        # loop to catch all the revisions
        for project, revisions in data[source].iteritems():
            # this skips those projects we don't care about
            if project not in projects:
                continue

            if filter_to_project and project != filter_to_project:
                continue

            for rev, jobs in revisions.items():
                revision_dict[project].append(rev)

        # retrieving the revision->resultset lookups
        revisions_lookup = common.lookup_revisions(revision_dict)

        th_collections = {}

        for project, revisions in data[source].iteritems():

            for revision, jobs in revisions.items():

                try:
                    resultset = common.get_resultset(project,
                                                     revisions_lookup,
                                                     revision,
                                                     missing_resultsets,
                                                     logger)
                except KeyError:
                    # skip this job, at least at this point
                    continue

                if filter_to_revision and filter_to_revision != resultset['revision']:
                    continue

                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for job in jobs:
                    treeherder_data = {
                        'revision_hash': resultset['revision_hash'],
                        'resultset_id': resultset['id'],
                        'project': project,
                    }

                    platform_info = buildbot.extract_platform_info(job['buildername'])
                    job_name_info = buildbot.extract_name_info(job['buildername'])

                    if (filter_to_job_group and job_name_info.get('group_symbol', '').lower() !=
                            filter_to_job_group.lower()):
                        continue

                    if source == 'pending':
                        request_id = job['id']
                    elif source == 'running':
                        # The last element in request_ids corresponds to the request id of this job,
                        # the others are for the requests that were coalesced into this one.
                        request_id = job['request_ids'][-1]

                    device_name = buildbot.get_device_or_unknown(
                        job_name_info.get('name', ''),
                        platform_info['vm']
                    )

                    new_job = {
                        'job_guid': common.generate_job_guid(
                            request_id,
                            job['buildername']
                        ),
                        'name': job_name_info.get('name', ''),
                        'job_symbol': job_name_info.get('job_symbol', ''),
                        'group_name': job_name_info.get('group_name', ''),
                        'group_symbol': job_name_info.get('group_symbol', ''),
                        'reference_data_name': job['buildername'],
                        'state': source,
                        'submit_timestamp': job['submitted_at'],
                        'build_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                            'vm': platform_info['vm']
                        },
                        # where are we going to get this data from?
                        'machine_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                            'vm': platform_info['vm']
                        },
                        'device_name': device_name,
                        'who': 'unknown',
                        'option_collection': {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(job['buildername']): True
                        },
                        'log_references': [],
                        'artifacts': [
                            {
                                'type': 'json',
                                'name': 'buildapi',
                                'log_urls': [],
                                'blob': {
                                    'buildername': job['buildername'],
                                    'request_id': request_id
                                }
                            },
                        ]
                    }

                    if source == 'running':
                        new_job['start_timestamp'] = job['start_time']
                        # We store the original values to help debugging.
                        new_job['artifacts'].append(
                            {
                                'type': 'json',
                                'name': 'buildapi_running',
                                'log_urls': [],
                                'blob': {
                                    'revision': revision,
                                    'request_ids': job['request_ids'],
                                    'submitted_at': job['submitted_at'],
                                    'start_time': job['start_time'],
                                }
                            }
                        )

                    treeherder_data['job'] = new_job

                    if project not in th_collections:
                        th_collections[project] = TreeherderJobCollection(
                            job_type='update'
                        )

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        if missing_resultsets and not filter_to_revision:
            common.fetch_missing_resultsets(source, missing_resultsets, logger)

        return th_collections
Beispiel #19
0
    def transform(self, data):
        """
        transform the builds4h structure into something we can ingest via
        our restful api
        """
        revisions = defaultdict(list)
        missing_resultsets = defaultdict(set)

        projects = set(x.project for x in Datasource.objects.cached())

        for build in data['builds']:
            prop = build['properties']

            if not 'branch' in prop:
                logger.warning("property 'branch' not found in build4h")
                continue

            if not prop['branch'] in projects:
                logger.warning("skipping job on unsupported branch {0}".format(
                    prop['branch']))
                continue

            prop['revision'] = prop.get(
                'revision',
                prop.get('got_revision', prop.get('sourcestamp', None)))

            if not prop['revision']:
                logger.warning("property 'revision' not found in build4h")
                continue

            prop['revision'] = prop['revision'][0:12]
            revisions[prop['branch']].append(prop['revision'])

        revisions_lookup = common.lookup_revisions(revisions)

        # Holds one collection per unique branch/project
        th_collections = {}

        for build in data['builds']:
            try:
                prop = build['properties']
                project = prop['branch']
                artifact_build = copy.deepcopy(build)
                resultset = common.get_resultset(project, revisions_lookup,
                                                 prop['revision'],
                                                 missing_resultsets, logger)
            except KeyError:
                # skip this job, at least at this point
                continue

            treeherder_data = {
                'revision_hash': resultset['revision_hash'],
                'resultset_id': resultset['id'],
                'project': project,
                'coalesced': []
            }

            platform_info = buildbot.extract_platform_info(prop['buildername'])
            job_name_info = buildbot.extract_name_info(prop['buildername'])

            device_name = buildbot.get_device_or_unknown(
                job_name_info.get('name', ''), platform_info['vm'])

            if 'log_url' in prop:
                log_reference = [{'url': prop['log_url'], 'name': 'builds-4h'}]
            else:
                log_reference = []

            # request_id and request_time are mandatory
            # and they can be found in a couple of different places
            try:
                job_guid_data = self.find_job_guid(build)
                request_ids = build['properties'].get('request_ids',
                                                      build['request_ids'])
            except KeyError:
                continue

            treeherder_data['coalesced'] = job_guid_data['coalesced']

            def prop_remove(field):
                try:
                    del (artifact_build['properties'][field])
                except:
                    pass

            prop_remove("product")
            prop_remove("project")
            prop_remove("buildername")
            prop_remove("slavename")
            prop_remove("build_url")
            prop_remove("log_url")
            prop_remove("slavebuilddir")
            prop_remove("branch")
            prop_remove("repository")
            prop_remove("revision")

            del (artifact_build['requesttime'])
            del (artifact_build['starttime'])
            del (artifact_build['endtime'])

            job = {
                'job_guid':
                job_guid_data['job_guid'],
                'name':
                job_name_info.get('name', ''),
                'job_symbol':
                job_name_info.get('job_symbol', ''),
                'group_name':
                job_name_info.get('group_name', ''),
                'group_symbol':
                job_name_info.get('group_symbol', ''),
                'reference_data_name':
                prop['buildername'],
                'product_name':
                prop.get('product', ''),
                'state':
                'completed',
                'result':
                buildbot.RESULT_DICT[build['result']],
                'reason':
                build['reason'],
                #scheduler, if 'who' property is not present
                'who':
                prop.get('who', prop.get('scheduler', '')),
                'submit_timestamp':
                build['requesttime'],
                'start_timestamp':
                build['starttime'],
                'end_timestamp':
                build['endtime'],
                'machine':
                prop.get('slavename', 'unknown'),
                #build_url not present in all builds
                'build_url':
                prop.get('build_url', ''),
                #build_platform same as machine_platform
                'build_platform': {
                    #platform attributes sometimes parse without results
                    'os_name': platform_info.get('os', ''),
                    'platform': platform_info.get('os_platform', ''),
                    'architecture': platform_info.get('arch', '')
                },
                'machine_platform': {
                    'os_name': platform_info.get('os', ''),
                    'platform': platform_info.get('os_platform', ''),
                    'architecture': platform_info.get('arch', '')
                },
                'device_name':
                device_name,
                #pgo or non-pgo dependent on buildername parsing
                'option_collection': {
                    buildbot.extract_build_type(prop['buildername']): True
                },
                'log_references':
                log_reference,
                'artifacts': [
                    {
                        'type': 'json',
                        'name': 'buildapi_complete',
                        'log_urls': [],
                        'blob': artifact_build
                    },
                    {
                        'type': 'json',
                        'name': 'buildapi',
                        'log_urls': [],
                        'blob': {
                            'buildername': build['properties']['buildername'],
                            'request_id': max(request_ids)
                        }
                    },
                ]
            }

            treeherder_data['job'] = job

            if project not in th_collections:
                th_collections[project] = TreeherderJobCollection()

            # get treeherder job instance and add the job instance
            # to the collection instance
            th_job = th_collections[project].get_job(treeherder_data)
            th_collections[project].add(th_job)

        if missing_resultsets:
            common.fetch_missing_resultsets("builds4h", missing_resultsets,
                                            logger)

        return th_collections
Beispiel #20
0
    def transform(self, data, filter_to_project=None, filter_to_revision=None,
                  filter_to_job_group=None):
        """
        transform the builds4h structure into something we can ingest via
        our restful api
        """
        revisions = defaultdict(list)
        missing_resultsets = defaultdict(set)

        projects = set(x.project for x in Datasource.objects.cached())

        for build in data['builds']:
            prop = build['properties']

            if 'buildername' not in prop:
                logger.warning("skipping builds-4hr job since no buildername found")
                continue

            if 'branch' not in prop:
                logger.warning("skipping builds-4hr job since no branch found: %s", prop['buildername'])
                continue

            if prop['branch'] not in projects:
                # Fuzzer jobs specify a branch of 'idle', and we intentionally don't display them.
                if prop['branch'] != 'idle':
                    logger.warning("skipping builds-4hr job on unknown branch %s: %s", prop['branch'], prop['buildername'])
                continue

            if filter_to_project and prop['branch'] != filter_to_project:
                continue

            prop['revision'] = prop.get('revision',
                                        prop.get('got_revision',
                                                 prop.get('sourcestamp', None)))

            if not prop['revision']:
                logger.warning("skipping builds-4hr job since no revision found: %s", prop['buildername'])
                continue

            prop['revision'] = prop['revision'][0:12]

            if prop['revision'] == prop.get('l10n_revision', None):
                # Some l10n jobs specify the l10n repo revision under 'revision', rather
                # than the gecko revision. If we did not skip these, it would result in
                # fetch_missing_resultsets requests that were guaranteed to 404.
                # This needs to be fixed upstream in builds-4hr by bug 1125433.
                logger.warning("skipping builds-4hr job since revision refers to wrong repo: %s", prop['buildername'])
                continue

            revisions[prop['branch']].append(prop['revision'])

        revisions_lookup = common.lookup_revisions(revisions)

        # Holds one collection per unique branch/project
        th_collections = {}

        for build in data['builds']:
            try:
                prop = build['properties']
                project = prop['branch']
                resultset = common.get_resultset(project,
                                                 revisions_lookup,
                                                 prop['revision'],
                                                 missing_resultsets,
                                                 logger)
            except KeyError:
                # skip this job, at least at this point
                continue
            if filter_to_revision and filter_to_revision != resultset['revision']:
                continue

            platform_info = buildbot.extract_platform_info(prop['buildername'])
            job_name_info = buildbot.extract_name_info(prop['buildername'])

            if (filter_to_job_group and job_name_info.get('group_symbol', '').lower() !=
                    filter_to_job_group.lower()):
                continue

            treeherder_data = {
                'revision_hash': resultset['revision_hash'],
                'resultset_id': resultset['id'],
                'project': project,
                'coalesced': []
            }

            device_name = buildbot.get_device_or_unknown(
                job_name_info.get('name', ''),
                platform_info['vm']
            )

            log_reference = []
            if 'log_url' in prop:
                log_reference.append({
                    'url': prop['log_url'],
                    'name': 'buildbot_text'
                })

            # add structured logs to the list of log references
            if 'blobber_files' in prop:
                blobber_files = json.loads(prop['blobber_files'])
                for bf, url in blobber_files.items():
                    if bf and url and bf.endswith('_raw.log'):
                        log_reference.append({
                            'url': url,
                            'name': 'mozlog_json'
                        })

            try:
                job_guid_data = self.find_job_guid(build)
                # request_ids is mandatory, but can be found in several places.
                request_ids = prop.get('request_ids', build['request_ids'])
                # The last element in request_ids corresponds to the request id of this job,
                # the others are for the requests that were coalesced into this one.
                request_id = request_ids[-1]
            except KeyError:
                continue

            treeherder_data['coalesced'] = job_guid_data['coalesced']

            job = {
                'job_guid': job_guid_data['job_guid'],
                'name': job_name_info.get('name', ''),
                'job_symbol': job_name_info.get('job_symbol', ''),
                'group_name': job_name_info.get('group_name', ''),
                'group_symbol': job_name_info.get('group_symbol', ''),
                'reference_data_name': prop['buildername'],
                'product_name': prop.get('product', ''),
                'state': 'completed',
                'result': buildbot.RESULT_DICT[build['result']],
                'reason': build['reason'],
                # scheduler, if 'who' property is not present
                'who': prop.get('who', prop.get('scheduler', '')),
                'submit_timestamp': build['requesttime'],
                'start_timestamp': build['starttime'],
                'end_timestamp': build['endtime'],
                'machine': prop.get('slavename', 'unknown'),
                # build_url not present in all builds
                'build_url': prop.get('build_url', ''),
                # build_platform same as machine_platform
                'build_platform': {
                    # platform attributes sometimes parse without results
                    'os_name': platform_info.get('os', ''),
                    'platform': platform_info.get('os_platform', ''),
                    'architecture': platform_info.get('arch', '')
                },
                'machine_platform': {
                    'os_name': platform_info.get('os', ''),
                    'platform': platform_info.get('os_platform', ''),
                    'architecture': platform_info.get('arch', '')
                },
                'device_name': device_name,
                # pgo or non-pgo dependent on buildername parsing
                'option_collection': {
                    buildbot.extract_build_type(prop['buildername']): True
                },
                'log_references': log_reference,
                'artifacts': [
                    {
                        'type': 'json',
                        'name': 'buildapi',
                        'log_urls': [],
                        'blob': {
                            'buildername': build['properties']['buildername'],
                            'request_id': request_id
                        }
                    },
                ]
            }

            treeherder_data['job'] = job

            if project not in th_collections:
                th_collections[project] = TreeherderJobCollection()

            # get treeherder job instance and add the job instance
            # to the collection instance
            th_job = th_collections[project].get_job(treeherder_data)
            th_collections[project].add(th_job)

        if missing_resultsets and not filter_to_revision:
            common.fetch_missing_resultsets("builds4h", missing_resultsets, logger)

        return th_collections
Beispiel #21
0
    def transform(self, data, project_filter=None, revision_filter=None,
                  job_group_filter=None):
        """
        transform the builds4h structure into something we can ingest via
        our restful api
        """
        revisions = defaultdict(list)
        missing_resultsets = defaultdict(set)

        valid_projects = set(x.project for x in Datasource.objects.cached())

        for build in data['builds']:
            try:
                prop = build['properties']
                project = prop['branch']
                buildername = prop['buildername']

                if common.should_skip_project(project, valid_projects, project_filter):
                    continue

                if common.should_skip_revision(prop['revision'], revision_filter):
                    continue

                if common.is_blacklisted_buildername(buildername):
                    continue

                prop['short_revision'] = prop['revision'][0:12]

            except KeyError as e:
                logger.warning("skipping builds-4hr job %s since missing property: %s", build['id'], str(e))
                continue

            revisions[project].append(prop['short_revision'])

        revisions_lookup = common.lookup_revisions(revisions)

        job_ids_seen_last_time = cache.get(CACHE_KEYS['complete'], set())
        job_ids_seen_now = set()

        # Holds one collection per unique branch/project
        th_collections = {}

        for build in data['builds']:
            try:
                prop = build['properties']
                project = prop['branch']
                buildername = prop['buildername']
                if common.should_skip_project(project, valid_projects, project_filter):
                    continue
                if common.should_skip_revision(prop['revision'], revision_filter):
                    continue
                if common.is_blacklisted_buildername(buildername):
                    continue
                # todo: Continue using short revisions until Bug 1199364
                resultset = common.get_resultset(project,
                                                 revisions_lookup,
                                                 prop['short_revision'],
                                                 missing_resultsets,
                                                 logger)
            except KeyError:
                # There was no matching resultset, skip the job.
                continue

            # We record the id here rather than at the start of the loop, since we
            # must not count jobs whose revisions were not yet imported as processed,
            # or we'll never process them once we've ingested their associated revision.
            job_ids_seen_now.add(build['id'])

            # Don't process jobs that were already present in builds-4hr
            # the last time this task completed successfully.
            if build['id'] in job_ids_seen_last_time:
                continue

            platform_info = buildbot.extract_platform_info(buildername)
            job_name_info = buildbot.extract_name_info(buildername)

            if (job_group_filter and job_name_info.get('group_symbol', '').lower() !=
                    job_group_filter.lower()):
                continue

            treeherder_data = {
                'revision_hash': resultset['revision_hash'],
                'resultset_id': resultset['id'],
                'project': project,
                'coalesced': []
            }

            log_reference = []
            if 'log_url' in prop:
                log_reference.append({
                    'url': prop['log_url'],
                    'name': 'buildbot_text'
                })

            # add structured logs to the list of log references
            if 'blobber_files' in prop:
                try:
                    blobber_files = json.loads(prop['blobber_files'])
                    for bf, url in blobber_files.items():
                        if bf and url and bf.endswith('_raw.log'):
                            log_reference.append({
                                'url': url,
                                'name': 'mozlog_json'
                            })
                except Exception as e:
                    logger.warning("invalid blobber_files json for build id %s (%s): %s",
                                   build['id'], buildername, e)

            try:
                job_guid_data = self.find_job_guid(build)
                # request_ids is mandatory, but can be found in several places.
                request_ids = prop.get('request_ids', build['request_ids'])
                # The last element in request_ids corresponds to the request id of this job,
                # the others are for the requests that were coalesced into this one.
                request_id = request_ids[-1]
            except KeyError:
                continue

            treeherder_data['coalesced'] = job_guid_data['coalesced']

            job = {
                'job_guid': job_guid_data['job_guid'],
                'name': job_name_info.get('name', ''),
                'job_symbol': job_name_info.get('job_symbol', ''),
                'group_name': job_name_info.get('group_name', ''),
                'group_symbol': job_name_info.get('group_symbol', ''),
                'reference_data_name': buildername,
                'product_name': prop.get('product', ''),
                'state': 'completed',
                'result': buildbot.RESULT_DICT[build['result']],
                'reason': build['reason'],
                # scheduler, if 'who' property is not present
                'who': prop.get('who', prop.get('scheduler', '')),
                'submit_timestamp': build['requesttime'],
                'start_timestamp': build['starttime'],
                'end_timestamp': build['endtime'],
                'machine': prop.get('slavename', 'unknown'),
                # build_platform same as machine_platform
                'build_platform': {
                    # platform attributes sometimes parse without results
                    'os_name': platform_info.get('os', ''),
                    'platform': platform_info.get('os_platform', ''),
                    'architecture': platform_info.get('arch', '')
                },
                'machine_platform': {
                    'os_name': platform_info.get('os', ''),
                    'platform': platform_info.get('os_platform', ''),
                    'architecture': platform_info.get('arch', '')
                },
                # pgo or non-pgo dependent on buildername parsing
                'option_collection': {
                    buildbot.extract_build_type(buildername): True
                },
                'log_references': log_reference,
                'artifacts': [
                    {
                        'type': 'json',
                        'name': 'buildapi',
                        'log_urls': [],
                        'blob': {
                            'buildername': buildername,
                            'request_id': request_id
                        }
                    },
                ]
            }

            treeherder_data['job'] = job

            if project not in th_collections:
                th_collections[project] = TreeherderJobCollection()

            # get treeherder job instance and add the job instance
            # to the collection instance
            th_job = th_collections[project].get_job(treeherder_data)
            th_collections[project].add(th_job)

        if missing_resultsets and not revision_filter:
            common.fetch_missing_resultsets("builds4h", missing_resultsets, logger)

        num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time))
        logger.info("Imported %d completed jobs, skipped %d previously seen",
                    num_new_jobs, len(job_ids_seen_now) - num_new_jobs)

        return th_collections, job_ids_seen_now
Beispiel #22
0
    def transform(self, data):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        projects = set(x.project for x in Datasource.objects.cached())
        revision_dict = defaultdict(list)

        # loop to catch all the revisions
        for project, revisions in data["running"].items():
            # this skips those projects we don't care about
            if project not in projects:
                continue

            for rev, jobs in revisions.items():
                revision_dict[project].append(rev)

        # retrieving the revision->resultset lookups
        revisions_lookup = common.lookup_revisions(revision_dict)

        th_collections = {}

        for project, revisions in revisions_lookup.items():

            for revision in revisions:

                resultset = revisions[revision]
                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for job in data["running"][project][revision]:
                    treeherder_data = {
                        "revision_hash": resultset["revision_hash"],
                        "resultset_id": resultset["id"],
                        "project": project,
                    }

                    platform_info = buildbot.extract_platform_info(job["buildername"])
                    job_name_info = buildbot.extract_name_info(job["buildername"])

                    job = {
                        "job_guid": common.generate_job_guid(job["request_ids"][0], job["submitted_at"]),
                        "name": job_name_info.get("name", ""),
                        "job_symbol": job_name_info.get("job_symbol", ""),
                        "group_name": job_name_info.get("group_name", ""),
                        "group_symbol": job_name_info.get("group_symbol", ""),
                        "buildername": job["buildername"],
                        "state": "running",
                        "submit_timestamp": job["submitted_at"],
                        "build_platform": {
                            "os_name": platform_info["os"],
                            "platform": platform_info["os_platform"],
                            "architecture": platform_info["arch"],
                            "vm": platform_info["vm"],
                        },
                        # where are we going to get this data from?
                        "machine_platform": {
                            "os_name": platform_info["os"],
                            "platform": platform_info["os_platform"],
                            "architecture": platform_info["arch"],
                            "vm": platform_info["vm"],
                        },
                        "who": "unknown",
                        "option_collection": {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(job["buildername"]): True
                        },
                        "log_references": [],
                    }

                    treeherder_data["job"] = job

                    if project not in th_collections:
                        th_collections[project] = TreeherderJobCollection(job_type="update")

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        return th_collections