Example #1
0
    def find_job_guid(self, build):
        """
        returns the job_guid, based on request id and request time.
        necessary because request id and request time is inconsistently
        represented in builds4h
        """

        # this is reused in the transformer and the analyzer, so reverting
        # the field getters to this function.

        prop = build['properties']

        try:
            # request_ids can be found in a couple of different places
            request_ids = prop.get('request_ids', build['request_ids'])
            # By experimentation we've found that the last id in the list
            # corresponds to the request that was used to schedule the job.
            request_id = request_ids[-1]
        except KeyError as e:
            logger.error("({0})request_id not found in {1}".format(
                prop["branch"], build))
            raise e

        try:
            buildername = prop['buildername']
        except KeyError as e:
            logger.error("({0})buildername not found in {1}".format(
                prop["branch"], build))
            raise e

        endtime = None
        if buildbot.RESULT_DICT[build['result']] == 'retry':
            try:
                endtime = build['endtime']
            except KeyError as e:
                logger.error("({0})endtime not found in {1}".format(
                    prop["branch"], build))
                raise e

        job_guid_data = {'job_guid': '', 'coalesced': []}

        # If request_ids contains more than one element, then jobs were coalesced into
        # this one. In that case, the last element corresponds to the request id of
        # the job that actually ran (ie this one), and the rest are for the pending
        # jobs that were coalesced. We must generate guids for these coalesced jobs,
        # so they can be marked as coalesced, and not left as orphaned pending jobs.
        coalesced_requests = request_ids[:-1]
        for coalesced_request_id in coalesced_requests:
            job_guid_data['coalesced'].append(
                common.generate_job_guid(coalesced_request_id, buildername))

        job_guid_data['job_guid'] = common.generate_job_guid(
            request_id, buildername, endtime)

        return job_guid_data
Example #2
0
    def find_job_guid(self, build):
        """
        returns the job_guid, based on request id and request time.
        necessary because request id and request time is inconsistently
        represented in builds4h
        """

        # this is reused in the transformer and the analyzer, so reverting
        # the field getters to this function.

        prop = build['properties']

        try:
            # request_ids can be found in a couple of different places
            request_ids = prop.get('request_ids', build['request_ids'])
            # By experimentation we've found that the last id in the list
            # corresponds to the request that was used to schedule the job.
            request_id = request_ids[-1]
        except KeyError as e:
            logger.error("({0})request_id not found in {1}".format(
                prop["branch"], build))
            raise e

        try:
            buildername = prop['buildername']
        except KeyError as e:
            logger.error("({0})buildername not found in {1}".format(
                prop["branch"], build))
            raise e

        endtime = None
        if buildbot.RESULT_DICT[build['result']] == 'retry':
            try:
                endtime = build['endtime']
            except KeyError as e:
                logger.error("({0})endtime not found in {1}".format(
                    prop["branch"], build))
                raise e

        job_guid_data = {'job_guid': '', 'coalesced': []}

        # If request_ids contains more than one element, then jobs were coalesced into
        # this one. In that case, the last element corresponds to the request id of
        # the job that actually ran (ie this one), and the rest are for the pending
        # jobs that were coalesced. We must generate guids for these coalesced jobs,
        # so they can be marked as coalesced, and not left as orphaned pending jobs.
        coalesced_requests = request_ids[:-1]
        for coalesced_request_id in coalesced_requests:
            job_guid_data['coalesced'].append(common.generate_job_guid(coalesced_request_id, buildername))

        job_guid_data['job_guid'] = common.generate_job_guid(request_id, buildername, endtime)

        return job_guid_data
Example #3
0
    def find_job_guid(self, build):
        """
        returns the job_guid, based on request id and request time.
        necessary because request id and request time is inconsistently
        represented in builds4h
        """
        prop = build['properties']

        #get the request_id from two possible places
        request_ids = prop.get('request_ids', [])
        request_ids_str = ""
        if request_ids == []:
            request_ids_str = ','.join(
                map(str, build.get('request_ids', []))
                )
        else:
            request_ids_str = ','.join(map(str, request_ids))

        #get the request_time from two possible places
        request_time_dict = prop.get('request_times', {})
        if request_time_dict != {}:

            request_times_str = ','.join(
                map(str, request_time_dict.values())
                )

        else:

            request_times_str = str(build['requesttime'])

        job_guid_data = { 'job_guid':'', 'coalesced':[] }

        if len(request_ids) > 1:
            # coallesced job detected, generate the coalesced
            # job guids
            for r_id in request_ids:
                r_id_str = str(r_id)
                if r_id_str in request_time_dict:
                    job_guid_data['coalesced'].append(
                        common.generate_job_guid(
                            r_id_str, request_time_dict[r_id_str]
                            ))

        job_guid_data['job_guid'] = common.generate_job_guid(
            request_ids_str, request_times_str)

        return job_guid_data
Example #4
0
    def transform(self, data, source, revision_filter=None, project_filter=None,
                  job_group_filter=None):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        valid_projects = set(x.project for x in Datasource.objects.cached())
        revision_dict = defaultdict(list)
        missing_resultsets = defaultdict(set)

        # loop to catch all the revisions
        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects, project_filter):
                continue

            for rev, jobs in revisions.items():
                if common.should_skip_revision(rev, revision_filter):
                    continue
                for job in jobs:
                    if not common.is_blacklisted_buildername(job['buildername']):
                        # Add the revision to the list to be fetched so long as we
                        # find at least one valid job associated with it.
                        revision_dict[project].append(rev)
                        break

        # retrieving the revision->resultset lookups
        revisions_lookup = common.lookup_revisions(revision_dict)

        job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set())
        job_ids_seen_now = set()

        th_collections = {}

        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects, project_filter):
                continue

            for revision, jobs in revisions.items():
                if common.should_skip_revision(revision, revision_filter):
                    continue

                try:
                    resultset = common.get_resultset(project,
                                                     revisions_lookup,
                                                     revision,
                                                     missing_resultsets,
                                                     logger)
                except KeyError:
                    # There was no matching resultset, skip the job.
                    continue

                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for job in jobs:
                    buildername = job['buildername']
                    if common.is_blacklisted_buildername(buildername):
                        continue

                    job_ids_seen_now.add(job['id'])

                    # Don't process jobs that were already present in this datasource
                    # the last time this task completed successfully.
                    if job['id'] in job_ids_seen_last_time:
                        continue

                    treeherder_data = {
                        'revision_hash': resultset['revision_hash'],
                        'resultset_id': resultset['id'],
                        'project': project,
                    }

                    platform_info = buildbot.extract_platform_info(buildername)
                    job_name_info = buildbot.extract_name_info(buildername)

                    if (job_group_filter and job_name_info.get('group_symbol', '').lower() !=
                            job_group_filter.lower()):
                        continue

                    if source == 'pending':
                        request_id = job['id']
                    elif source == 'running':
                        # The last element in request_ids corresponds to the request id of this job,
                        # the others are for the requests that were coalesced into this one.
                        request_id = job['request_ids'][-1]

                    new_job = {
                        'job_guid': common.generate_job_guid(
                            request_id,
                            buildername
                        ),
                        'name': job_name_info.get('name', ''),
                        'job_symbol': job_name_info.get('job_symbol', ''),
                        'group_name': job_name_info.get('group_name', ''),
                        'group_symbol': job_name_info.get('group_symbol', ''),
                        'reference_data_name': buildername,
                        'state': source,
                        'submit_timestamp': job['submitted_at'],
                        'build_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                        },
                        # where are we going to get this data from?
                        'machine_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                        },
                        'who': 'unknown',
                        'option_collection': {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(buildername): True
                        },
                        'log_references': [],
                        'artifacts': [
                            {
                                'type': 'json',
                                'name': 'buildapi',
                                'log_urls': [],
                                'blob': {
                                    'buildername': buildername,
                                    'request_id': request_id
                                }
                            },
                        ]
                    }

                    if source == 'running':
                        new_job['start_timestamp'] = job['start_time']
                        # We store the original values to help debugging.
                        new_job['artifacts'].append(
                            {
                                'type': 'json',
                                'name': 'buildapi_running',
                                'log_urls': [],
                                'blob': {
                                    'revision': revision,
                                    'request_ids': job['request_ids'],
                                    'submitted_at': job['submitted_at'],
                                    'start_time': job['start_time'],
                                }
                            }
                        )

                    treeherder_data['job'] = new_job

                    if project not in th_collections:
                        th_collections[project] = TreeherderJobCollection()

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        if missing_resultsets and not revision_filter:
            common.fetch_missing_resultsets(source, missing_resultsets, logger)

        num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time))
        logger.info("Imported %d %s jobs, skipped %d previously seen",
                    num_new_jobs, source, len(job_ids_seen_now) - num_new_jobs)

        return th_collections, job_ids_seen_now
Example #5
0
    def transform(self, data):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        projects = set(x.project for x in Datasource.objects.cached())
        revision_dict = defaultdict(list)
        missing_resultsets = defaultdict(set)

        # loop to catch all the revisions
        for project, revisions in data['running'].items():
            # this skips those projects we don't care about
            if project not in projects:
                continue

            for rev, jobs in revisions.items():
                revision_dict[project].append(rev)

        # retrieving the revision->resultset lookups
        revisions_lookup = common.lookup_revisions(revision_dict)

        th_collections = {}

        for project, revisions in data['running'].items():

            for revision, jobs in revisions.items():

                try:
                    resultset = common.get_resultset(project, revisions_lookup,
                                                     revision,
                                                     missing_resultsets,
                                                     logger)
                except KeyError:
                    # skip this job, at least at this point
                    continue

                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for running_job in jobs:
                    treeherder_data = {
                        'revision_hash': resultset['revision_hash'],
                        'resultset_id': resultset['id'],
                        'project': project,
                    }

                    platform_info = buildbot.extract_platform_info(
                        running_job['buildername'])
                    job_name_info = buildbot.extract_name_info(
                        running_job['buildername'])
                    device_name = buildbot.get_device_or_unknown(
                        job_name_info.get('name', ''), platform_info['vm'])

                    new_job = {
                        'job_guid':
                        common.generate_job_guid(running_job['request_ids'][0],
                                                 running_job['submitted_at']),
                        'name':
                        job_name_info.get('name', ''),
                        'job_symbol':
                        job_name_info.get('job_symbol', ''),
                        'group_name':
                        job_name_info.get('group_name', ''),
                        'group_symbol':
                        job_name_info.get('group_symbol', ''),
                        'reference_data_name':
                        running_job['buildername'],
                        'state':
                        'running',
                        'submit_timestamp':
                        running_job['submitted_at'],
                        'start_timestamp':
                        running_job['start_time'],
                        'build_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                            'vm': platform_info['vm']
                        },
                        #where are we going to get this data from?
                        'machine_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                            'vm': platform_info['vm']
                        },
                        'device_name':
                        device_name,
                        'who':
                        'unknown',
                        'option_collection': {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(running_job['buildername']):
                            True
                        },
                        'log_references': [],
                        'artifacts': [
                            {
                                'type': 'json',
                                'name': 'buildapi_running',
                                'log_urls': [],
                                'blob': running_job
                            },
                            {
                                'type': 'json',
                                'name': 'buildapi',
                                'log_urls': [],
                                'blob': {
                                    'buildername': running_job['buildername'],
                                    'request_id':
                                    max(running_job['request_ids'])
                                }
                            },
                        ]
                    }

                    treeherder_data['job'] = new_job

                    if project not in th_collections:
                        th_collections[project] = TreeherderJobCollection(
                            job_type='update')

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        if missing_resultsets:
            common.fetch_missing_resultsets("running", missing_resultsets,
                                            logger)

        return th_collections
Example #6
0
    def find_job_guid(self, build):
        """
        returns the job_guid, based on request id and request time.
        necessary because request id and request time is inconsistently
        represented in builds4h
        """

        # this is reused in the transformer and the analyzer, so reverting
        # the field getters to this function.

        # request_id and request_time are mandatory
        # and they can be found in a couple of different places
        prop = build['properties']
        try:
            request_ids = build['properties'].get('request_ids',
                                                  build['request_ids'])
        except KeyError as e:
            logger.error("({0})request_id not found in {1}".format(
                prop["branch"], build))
            raise e

        try:
            request_times = build['properties'].get('request_times',
                                                    build['requesttime'])
        except KeyError as e:
            logger.error("({0})request_time not found in {1}".format(
                prop["branch"], build))
            raise e

        endtime = None
        if buildbot.RESULT_DICT[build['result']] == 'retry':
            try:
                endtime = build['endtime']
            except KeyError as e:
                logger.error("({0})endtime not found in {1}".format(
                    prop["branch"], build))
                raise e

        request_ids_str = ",".join(map(str, request_ids))
        request_time_list = []

        if type(request_times) == dict:
            for request_id in request_ids:
                request_time_list.append(request_times[str(request_id)])
            request_times_str = ','.join(map(str, request_time_list))
        else:
            request_times_str = str(request_times)

        job_guid_data = {'job_guid': '', 'coalesced': []}

        if len(request_ids) > 1:
            # coallesced job detected, generate the coalesced
            # job guids
            for index, r_id in enumerate(request_ids):
                #skip if buildbot doesn't have a matching number of ids and times
                if len(request_time_list) > index:
                    job_guid_data['coalesced'].append(
                        common.generate_job_guid(str(r_id),
                                                 request_time_list[index]))

        job_guid_data['job_guid'] = common.generate_job_guid(
            request_ids_str, request_times_str, endtime)

        return job_guid_data
Example #7
0
    def transform(self,
                  data,
                  source,
                  revision_filter=None,
                  project_filter=None,
                  job_group_filter=None):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        valid_projects = set(x.project for x in Datasource.objects.cached())
        revision_dict = defaultdict(list)

        # loop to catch all the revisions
        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects,
                                          project_filter):
                continue

            for rev in revisions.iterkeys():
                if common.should_skip_revision(rev, revision_filter):
                    continue
                revision_dict[project].append(rev)

        # retrieving the revision->resultset lookups
        revisions_lookup = common.lookup_revisions(revision_dict)

        job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set())
        job_ids_seen_now = set()

        th_collections = {}

        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects,
                                          project_filter):
                continue

            for revision, jobs in revisions.items():
                if common.should_skip_revision(revision, revision_filter):
                    continue

                try:
                    resultset = revisions_lookup[project][revision]
                except KeyError:
                    logger.warning(
                        "skipping jobs since %s revision %s not yet ingested",
                        project, revision)
                    continue

                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for job in jobs:
                    job_ids_seen_now.add(job['id'])

                    # Don't process jobs that were already present in this datasource
                    # the last time this task completed successfully.
                    if job['id'] in job_ids_seen_last_time:
                        continue

                    treeherder_data = {
                        'revision': revision,
                        'resultset_id': resultset['id'],
                        'project': project,
                    }

                    buildername = job['buildername']
                    platform_info = buildbot.extract_platform_info(buildername)
                    job_name_info = buildbot.extract_name_info(buildername)

                    if (job_group_filter
                            and job_name_info.get('group_symbol', '').lower()
                            != job_group_filter.lower()):
                        continue

                    if source == 'pending':
                        request_id = job['id']
                    elif source == 'running':
                        # The last element in request_ids corresponds to the request id of this job,
                        # the others are for the requests that were coalesced into this one.
                        request_id = job['request_ids'][-1]

                    new_job = {
                        'job_guid':
                        common.generate_job_guid(request_id, buildername),
                        'name':
                        job_name_info.get('name', ''),
                        'job_symbol':
                        job_name_info.get('job_symbol', ''),
                        'group_name':
                        job_name_info.get('group_name', ''),
                        'group_symbol':
                        job_name_info.get('group_symbol', ''),
                        'reference_data_name':
                        buildername,
                        'state':
                        source,
                        'submit_timestamp':
                        job['submitted_at'],
                        'build_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                        },
                        # where are we going to get this data from?
                        'machine_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                        },
                        'who':
                        'unknown',
                        'option_collection': {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(buildername):
                            True
                        },
                        'log_references': [],
                        'artifacts': [
                            {
                                'type': 'json',
                                'name': 'buildapi',
                                'log_urls': [],
                                'blob': {
                                    'buildername': buildername,
                                    'request_id': request_id
                                }
                            },
                        ]
                    }

                    if source == 'running':
                        new_job['start_timestamp'] = job['start_time']
                        # We store the original values to help debugging.
                        new_job['artifacts'].append({
                            'type': 'json',
                            'name': 'buildapi_running',
                            'log_urls': [],
                            'blob': {
                                'revision': revision,
                                'request_ids': job['request_ids'],
                                'submitted_at': job['submitted_at'],
                                'start_time': job['start_time'],
                            }
                        })

                    treeherder_data['job'] = new_job

                    if project not in th_collections:
                        th_collections[project] = TreeherderJobCollection()

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time))
        logger.info("Imported %d %s jobs, skipped %d previously seen",
                    num_new_jobs, source,
                    len(job_ids_seen_now) - num_new_jobs)

        return th_collections, job_ids_seen_now
    def adapt_data(self, data):
        """Adapts the PulseDataAdapter into the treeherder input data structure"""

        resultset = common.lookup_revisions({data['branch']: [data['revision']]})
        if not resultset:
            return {}

        del resultset[data['branch']][data['revision']]['id']
        treeherder_data = resultset[data['branch']][data['revision']]
        treeherder_data['project'] = data['branch']
        ####
        #TODO: This is a temporary fix, this data will not be located
        #      in the sourceStamp in the pulse stream. It will likely
        #      be in other build properties but for now this will work.
        #      Once the new properties are added they need to be incorporated
        #      here.
        ####

        request_id = data['request_ids'][0]
        job = {
            'job_guid': common.generate_job_guid(
                #The keys in this dict are unicode but the values in
                #request_ids are not, this explicit cast could cause
                #problems if the data added to the pulse stream is
                #modified
                request_id, data['request_times'][unicode(request_id)]
            ),
            'revision_hash': treeherder_data.pop('revision_hash'),
            'name': data['test_name'],
            'product_name': data['product'],
            'state': 'completed',

            #Do we need to map this to the strings in the sample structure?
            'result': buildbot.RESULT_DICT.get(int(data['results']),'unknown'),
            'reason': data['reason'],

            #There is both a who and blame that appear to be identical in the
            #pulse stream, is who the way to go?
            'who': data['who'],

            #This assumes the 0 element in request_ids is the id for the
            #job which is not always true if there are coalesced jobs. This will need
            #to be updated when https://bugzilla.mozilla.org/show_bug.cgi?id=862633
            #is resolved.
            'submit_timestamp': data['request_times'][unicode(request_id)],
            'start_timestamp': data['times']['start_timestamp'],

            'end_timestamp': str(int(time.time())),
            'machine': data['slave'],

            'build_url': data['buildurl'],

            'build_platform': {
                'os_name': data['os'],
                'platform': data['os_platform'],
                'architecture': data['arch'],
                'vm': data['vm']
            },
            #where are we going to get this data from?
            'machine_platform': {
                'os_name': data['os'],
                'platform': data['os_platform'],
                'architecture': data['arch'],
                'vm': data['vm']
            },

            'option_collection': {
                data['buildtype']: True
            },
            'log_references': [{
                'url': data['log_url'],
                #using the jobtype as a name for now, the name allows us
                #to have different log types with their own processing
                'name': data['jobtype']
            }],

            'artifact': {}
        }

        treeherder_data['job'] = job

        return JobData(treeherder_data)
Example #9
0
    def adapt_data(self, data):
        """Adapts the PulseDataAdapter into the treeherder input data structure"""

        resultset = common.lookup_revisions({data['branch']: [data['revision']]})
        if not resultset:
            return {}

        del resultset[data['branch']][data['revision']]['id']
        treeherder_data = resultset[data['branch']][data['revision']]
        treeherder_data['project'] = data['branch']
        ####
        #TODO: This is a temporary fix, this data will not be located
        #      in the sourceStamp in the pulse stream. It will likely
        #      be in other build properties but for now this will work.
        #      Once the new properties are added they need to be incorporated
        #      here.
        ####

        request_id = data['request_ids'][0]
        job = {
            'job_guid': common.generate_job_guid(
                #The keys in this dict are unicode but the values in
                #request_ids are not, this explicit cast could cause
                #problems if the data added to the pulse stream is
                #modified
                request_id, data['request_times'][unicode(request_id)]
            ),
            'revision_hash': treeherder_data.pop('revision_hash'),
            'name': data['test_name'],
            'product_name': data['product'],
            'state': 'completed',

            #Do we need to map this to the strings in the sample structure?
            'result': buildbot.RESULT_DICT.get(int(data['results']),'unknown'),
            'reason': data['reason'],

            #There is both a who and blame that appear to be identical in the
            #pulse stream, is who the way to go?
            'who': data['who'],

            #This assumes the 0 element in request_ids is the id for the
            #job which is not always true if there are coalesced jobs. This will need
            #to be updated when https://bugzilla.mozilla.org/show_bug.cgi?id=862633
            #is resolved.
            'submit_timestamp': data['request_times'][unicode(request_id)],
            'start_timestamp': data['times']['start_timestamp'],

            'end_timestamp': str(int(time.time())),
            'machine': data['slave'],

            'build_url': data['buildurl'],

            'build_platform': {
                'os_name': data['os'],
                'platform': data['os_platform'],
                'architecture': data['arch'],
                'vm': data['vm']
            },
            #where are we going to get this data from?
            'machine_platform': {
                'os_name': data['os'],
                'platform': data['os_platform'],
                'architecture': data['arch'],
                'vm': data['vm']
            },

            'option_collection': {
                data['buildtype']: True
            },
            'log_references': [{
                'url': data['log_url'],
                #using the jobtype as a name for now, the name allows us
                #to have different log types with their own processing
                'name': data['jobtype']
            }],

            'artifact': {}
        }

        treeherder_data['job'] = job

        return JobData(treeherder_data)
Example #10
0
    def transform(self, data, source, filter_to_revision=None, filter_to_project=None,
                  filter_to_job_group=None):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        projects = set(x.project for x in Datasource.objects.cached())
        revision_dict = defaultdict(list)
        missing_resultsets = defaultdict(set)

        # loop to catch all the revisions
        for project, revisions in data[source].iteritems():
            # this skips those projects we don't care about
            if project not in projects:
                continue

            if filter_to_project and project != filter_to_project:
                continue

            for rev, jobs in revisions.items():
                revision_dict[project].append(rev)

        # retrieving the revision->resultset lookups
        revisions_lookup = common.lookup_revisions(revision_dict)

        th_collections = {}

        for project, revisions in data[source].iteritems():

            for revision, jobs in revisions.items():

                try:
                    resultset = common.get_resultset(project,
                                                     revisions_lookup,
                                                     revision,
                                                     missing_resultsets,
                                                     logger)
                except KeyError:
                    # skip this job, at least at this point
                    continue

                if filter_to_revision and filter_to_revision != resultset['revision']:
                    continue

                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for job in jobs:
                    treeherder_data = {
                        'revision_hash': resultset['revision_hash'],
                        'resultset_id': resultset['id'],
                        'project': project,
                    }

                    platform_info = buildbot.extract_platform_info(job['buildername'])
                    job_name_info = buildbot.extract_name_info(job['buildername'])

                    if (filter_to_job_group and job_name_info.get('group_symbol', '').lower() !=
                            filter_to_job_group.lower()):
                        continue

                    if source == 'pending':
                        request_id = job['id']
                    elif source == 'running':
                        # The last element in request_ids corresponds to the request id of this job,
                        # the others are for the requests that were coalesced into this one.
                        request_id = job['request_ids'][-1]

                    device_name = buildbot.get_device_or_unknown(
                        job_name_info.get('name', ''),
                        platform_info['vm']
                    )

                    new_job = {
                        'job_guid': common.generate_job_guid(
                            request_id,
                            job['buildername']
                        ),
                        'name': job_name_info.get('name', ''),
                        'job_symbol': job_name_info.get('job_symbol', ''),
                        'group_name': job_name_info.get('group_name', ''),
                        'group_symbol': job_name_info.get('group_symbol', ''),
                        'reference_data_name': job['buildername'],
                        'state': source,
                        'submit_timestamp': job['submitted_at'],
                        'build_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                            'vm': platform_info['vm']
                        },
                        # where are we going to get this data from?
                        'machine_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                            'vm': platform_info['vm']
                        },
                        'device_name': device_name,
                        'who': 'unknown',
                        'option_collection': {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(job['buildername']): True
                        },
                        'log_references': [],
                        'artifacts': [
                            {
                                'type': 'json',
                                'name': 'buildapi',
                                'log_urls': [],
                                'blob': {
                                    'buildername': job['buildername'],
                                    'request_id': request_id
                                }
                            },
                        ]
                    }

                    if source == 'running':
                        new_job['start_timestamp'] = job['start_time']
                        # We store the original values to help debugging.
                        new_job['artifacts'].append(
                            {
                                'type': 'json',
                                'name': 'buildapi_running',
                                'log_urls': [],
                                'blob': {
                                    'revision': revision,
                                    'request_ids': job['request_ids'],
                                    'submitted_at': job['submitted_at'],
                                    'start_time': job['start_time'],
                                }
                            }
                        )

                    treeherder_data['job'] = new_job

                    if project not in th_collections:
                        th_collections[project] = TreeherderJobCollection(
                            job_type='update'
                        )

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        if missing_resultsets and not filter_to_revision:
            common.fetch_missing_resultsets(source, missing_resultsets, logger)

        return th_collections
    def transform(self, data):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        projects = set(x.project for x in Datasource.objects.cached())
        revision_dict = defaultdict(list)
        missing_resultsets = defaultdict(set)

        # loop to catch all the revisions
        for project, revisions in data['running'].items():
            # this skips those projects we don't care about
            if project not in projects:
                continue

            for rev, jobs in revisions.items():
                revision_dict[project].append(rev)

        # retrieving the revision->resultset lookups
        revisions_lookup = common.lookup_revisions(revision_dict)

        th_collections = {}

        for project, revisions in data['running'].items():

            for revision, jobs in revisions.items():

                try:
                    resultset = common.get_resultset(project,
                                                     revisions_lookup,
                                                     revision,
                                                     missing_resultsets,
                                                     logger)
                except KeyError:
                    # skip this job, at least at this point
                    continue

                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for running_job in jobs:
                    treeherder_data = {
                        'revision_hash': resultset['revision_hash'],
                        'resultset_id': resultset['id'],
                        'project': project,
                    }

                    platform_info = buildbot.extract_platform_info(running_job['buildername'])
                    job_name_info = buildbot.extract_name_info(running_job['buildername'])
                    device_name = buildbot.get_device_or_unknown(
                        job_name_info.get('name', ''),
                        platform_info['vm']
                    )

                    new_job = {
                        'job_guid': common.generate_job_guid(
                            running_job['request_ids'][0],
                            running_job['submitted_at']
                        ),
                        'name': job_name_info.get('name', ''),
                        'job_symbol': job_name_info.get('job_symbol', ''),
                        'group_name': job_name_info.get('group_name', ''),
                        'group_symbol': job_name_info.get('group_symbol', ''),
                        'reference_data_name': running_job['buildername'],
                        'state': 'running',
                        'submit_timestamp': running_job['submitted_at'],
                        'start_timestamp': running_job['start_time'],
                        'build_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                            'vm': platform_info['vm']
                        },
                        #where are we going to get this data from?
                        'machine_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                            'vm': platform_info['vm']
                        },
                        'device_name': device_name,
                        'who': 'unknown',

                        'option_collection': {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(running_job['buildername']): True
                        },
                        'log_references': [],
                        'artifacts': [
                            {
                                'type': 'json',
                                'name': 'buildapi_running',
                                'log_urls': [],
                                'blob': running_job
                            },
                            {
                                'type': 'json',
                                'name': 'buildapi',
                                'log_urls': [],
                                'blob': {
                                    'buildername': running_job['buildername'],
                                    'request_id': running_job['request_ids'][0]
                                }
                            },
                        ]
                    }

                    treeherder_data['job'] = new_job

                    if project not in th_collections:
                        th_collections[ project ] = TreeherderJobCollection(
                            job_type='update'
                            )

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        if missing_resultsets:
            common.fetch_missing_resultsets("running", missing_resultsets, logger)

        return th_collections
    def find_job_guid(self, build):
        """
        returns the job_guid, based on request id and request time.
        necessary because request id and request time is inconsistently
        represented in builds4h
        """

        # this is reused in the transformer and the analyzer, so reverting
        # the field getters to this function.

        # request_id and request_time are mandatory
        # and they can be found in a couple of different places
        prop = build['properties']
        try:
            request_ids = build['properties'].get('request_ids',
                                                  build['request_ids'])
        except KeyError as e:
            logger.error("({0})request_id not found in {1}".format(
                prop["branch"], build))
            raise e

        try:
            request_times = build['properties'].get('request_times',
                                                    build['requesttime'])
        except KeyError as e:
            logger.error("({0})request_time not found in {1}".format(
                prop["branch"], build))
            raise e

        endtime = None
        if buildbot.RESULT_DICT[build['result']] == 'retry':
            try:
                endtime = build['endtime']
            except KeyError as e:
                logger.error("({0})endtime not found in {1}".format(
                    prop["branch"], build))
                raise e

        request_ids_str = ",".join(map(str, request_ids))
        request_time_list = []

        if type(request_times) == dict:
            for request_id in request_ids:
                request_time_list.append(
                    request_times[str(request_id)])
            request_times_str = ','.join(
                map(str, request_time_list))
        else:
            request_times_str = str(request_times)

        job_guid_data = {'job_guid': '', 'coalesced': []}

        if len(request_ids) > 1:
            # coallesced job detected, generate the coalesced
            # job guids
            for index, r_id in enumerate(request_ids):
                #skip if buildbot doesn't have a matching number of ids and times
                if len(request_time_list) > index:
                    job_guid_data['coalesced'].append(
                        common.generate_job_guid(
                            str(r_id), request_time_list[index]))

        job_guid_data['job_guid'] = common.generate_job_guid(
            request_ids_str, request_times_str, endtime)

        return job_guid_data
Example #13
0
    def transform(self, data, source, revision_filter=None, project_filter=None,
                  job_group_filter=None):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        valid_projects = set(Repository.objects.values_list('name', flat=True))
        revision_dict = defaultdict(list)

        # loop to catch all the revisions
        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects, project_filter):
                continue

            for rev in revisions.iterkeys():
                if common.should_skip_revision(rev, revision_filter):
                    continue
                revision_dict[project].append(rev)

        job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set())
        job_ids_seen_now = set()

        th_collections = {}

        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects, project_filter):
                continue

            revisions_seen_now_for_project = set()

            for revision, jobs in revisions.items():
                if common.should_skip_revision(revision, revision_filter):
                    continue

                # it should be quite rare for a job to be ingested before a
                # revision, but it could happen
                if revision not in revisions_seen_now_for_project and \
                   not Push.objects.filter(repository__name=project,
                                           revision__startswith=revision).exists():
                    logger.warning("skipping jobs since %s revision %s "
                                   "not yet ingested", project, revision)
                    continue
                revisions_seen_now_for_project.add(revision)

                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for job in jobs:
                    job_ids_seen_now.add(job['id'])

                    # Don't process jobs that we saw the last time this task
                    # completed successfully.
                    if job['id'] in job_ids_seen_last_time:
                        continue

                    treeherder_data = {
                        'revision': revision,
                        'project': project,
                    }

                    buildername = job['buildername']
                    platform_info = buildbot.extract_platform_info(buildername)
                    job_name_info = buildbot.extract_name_info(buildername)

                    if (job_group_filter and job_name_info.get('group_symbol', '').lower() !=
                            job_group_filter.lower()):
                        continue

                    if source == 'pending':
                        request_id = job['id']
                    elif source == 'running':
                        # The last element in request_ids corresponds to the request id of this job,
                        # the others are for the requests that were coalesced into this one.
                        request_id = job['request_ids'][-1]

                    new_job = {
                        'job_guid': common.generate_job_guid(
                            request_id,
                            buildername
                        ),
                        'name': job_name_info.get('name', ''),
                        'job_symbol': job_name_info.get('job_symbol', ''),
                        'group_name': job_name_info.get('group_name', ''),
                        'group_symbol': job_name_info.get('group_symbol', ''),
                        'reference_data_name': buildername,
                        'state': source,
                        'submit_timestamp': job['submitted_at'],
                        'build_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                        },
                        # where are we going to get this data from?
                        'machine_platform': {
                            'os_name': platform_info['os'],
                            'platform': platform_info['os_platform'],
                            'architecture': platform_info['arch'],
                        },
                        'who': 'unknown',
                        'option_collection': {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(buildername): True
                        },
                        'log_references': [],
                        'artifacts': [
                            {
                                'type': 'json',
                                'name': 'buildapi',
                                'log_urls': [],
                                'blob': {
                                    'buildername': buildername,
                                    'request_id': request_id
                                }
                            },
                        ]
                    }

                    if source == 'running':
                        new_job['start_timestamp'] = job['start_time']

                    treeherder_data['job'] = new_job

                    if project not in th_collections:
                        th_collections[project] = TreeherderJobCollection()

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time))
        logger.info("Imported %d %s jobs, skipped %d previously seen",
                    num_new_jobs, source, len(job_ids_seen_now) - num_new_jobs)

        return th_collections, job_ids_seen_now
Example #14
0
    def transform(self, data, source, revision_filter=None, project_filter=None, job_group_filter=None):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        valid_projects = set(x.project for x in Datasource.objects.cached())
        revision_dict = defaultdict(list)

        # loop to catch all the revisions
        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects, project_filter):
                continue

            for rev in revisions.iterkeys():
                if common.should_skip_revision(rev, revision_filter):
                    continue
                revision_dict[project].append(rev)

        # retrieving the revision->resultset lookups
        revisions_lookup = common.lookup_revisions(revision_dict)

        job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set())
        job_ids_seen_now = set()

        th_collections = {}

        for project, revisions in data[source].iteritems():
            if common.should_skip_project(project, valid_projects, project_filter):
                continue

            for revision, jobs in revisions.items():
                if common.should_skip_revision(revision, revision_filter):
                    continue

                try:
                    resultset = revisions_lookup[project][revision]
                except KeyError:
                    logger.warning("skipping jobs since %s revision %s not yet ingested", project, revision)
                    continue

                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for job in jobs:
                    job_ids_seen_now.add(job["id"])

                    # Don't process jobs that were already present in this datasource
                    # the last time this task completed successfully.
                    if job["id"] in job_ids_seen_last_time:
                        continue

                    treeherder_data = {"revision": revision, "resultset_id": resultset["id"], "project": project}

                    buildername = job["buildername"]
                    platform_info = buildbot.extract_platform_info(buildername)
                    job_name_info = buildbot.extract_name_info(buildername)

                    if job_group_filter and job_name_info.get("group_symbol", "").lower() != job_group_filter.lower():
                        continue

                    if source == "pending":
                        request_id = job["id"]
                    elif source == "running":
                        # The last element in request_ids corresponds to the request id of this job,
                        # the others are for the requests that were coalesced into this one.
                        request_id = job["request_ids"][-1]

                    new_job = {
                        "job_guid": common.generate_job_guid(request_id, buildername),
                        "name": job_name_info.get("name", ""),
                        "job_symbol": job_name_info.get("job_symbol", ""),
                        "group_name": job_name_info.get("group_name", ""),
                        "group_symbol": job_name_info.get("group_symbol", ""),
                        "reference_data_name": buildername,
                        "state": source,
                        "submit_timestamp": job["submitted_at"],
                        "build_platform": {
                            "os_name": platform_info["os"],
                            "platform": platform_info["os_platform"],
                            "architecture": platform_info["arch"],
                        },
                        # where are we going to get this data from?
                        "machine_platform": {
                            "os_name": platform_info["os"],
                            "platform": platform_info["os_platform"],
                            "architecture": platform_info["arch"],
                        },
                        "who": "unknown",
                        "option_collection": {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(buildername): True
                        },
                        "log_references": [],
                        "artifacts": [
                            {
                                "type": "json",
                                "name": "buildapi",
                                "log_urls": [],
                                "blob": {"buildername": buildername, "request_id": request_id},
                            }
                        ],
                    }

                    if source == "running":
                        new_job["start_timestamp"] = job["start_time"]
                        # We store the original values to help debugging.
                        new_job["artifacts"].append(
                            {
                                "type": "json",
                                "name": "buildapi_running",
                                "log_urls": [],
                                "blob": {
                                    "revision": revision,
                                    "request_ids": job["request_ids"],
                                    "submitted_at": job["submitted_at"],
                                    "start_time": job["start_time"],
                                },
                            }
                        )

                    treeherder_data["job"] = new_job

                    if project not in th_collections:
                        th_collections[project] = TreeherderJobCollection()

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time))
        logger.info(
            "Imported %d %s jobs, skipped %d previously seen",
            num_new_jobs,
            source,
            len(job_ids_seen_now) - num_new_jobs,
        )

        return th_collections, job_ids_seen_now
Example #15
0
    def transform(self, data):
        """
        transform the buildapi structure into something we can ingest via
        our restful api
        """
        projects = set(x.project for x in Datasource.objects.cached())
        revision_dict = defaultdict(list)

        # loop to catch all the revisions
        for project, revisions in data["running"].items():
            # this skips those projects we don't care about
            if project not in projects:
                continue

            for rev, jobs in revisions.items():
                revision_dict[project].append(rev)

        # retrieving the revision->resultset lookups
        revisions_lookup = common.lookup_revisions(revision_dict)

        th_collections = {}

        for project, revisions in revisions_lookup.items():

            for revision in revisions:

                resultset = revisions[revision]
                # using project and revision form the revision lookups
                # to filter those jobs with unmatched revision
                for job in data["running"][project][revision]:
                    treeherder_data = {
                        "revision_hash": resultset["revision_hash"],
                        "resultset_id": resultset["id"],
                        "project": project,
                    }

                    platform_info = buildbot.extract_platform_info(job["buildername"])
                    job_name_info = buildbot.extract_name_info(job["buildername"])

                    job = {
                        "job_guid": common.generate_job_guid(job["request_ids"][0], job["submitted_at"]),
                        "name": job_name_info.get("name", ""),
                        "job_symbol": job_name_info.get("job_symbol", ""),
                        "group_name": job_name_info.get("group_name", ""),
                        "group_symbol": job_name_info.get("group_symbol", ""),
                        "buildername": job["buildername"],
                        "state": "running",
                        "submit_timestamp": job["submitted_at"],
                        "build_platform": {
                            "os_name": platform_info["os"],
                            "platform": platform_info["os_platform"],
                            "architecture": platform_info["arch"],
                            "vm": platform_info["vm"],
                        },
                        # where are we going to get this data from?
                        "machine_platform": {
                            "os_name": platform_info["os"],
                            "platform": platform_info["os_platform"],
                            "architecture": platform_info["arch"],
                            "vm": platform_info["vm"],
                        },
                        "who": "unknown",
                        "option_collection": {
                            # build_type contains an option name, eg. PGO
                            buildbot.extract_build_type(job["buildername"]): True
                        },
                        "log_references": [],
                    }

                    treeherder_data["job"] = job

                    if project not in th_collections:
                        th_collections[project] = TreeherderJobCollection(job_type="update")

                    # get treeherder job instance and add the job instance
                    # to the collection instance
                    th_job = th_collections[project].get_job(treeherder_data)
                    th_collections[project].add(th_job)

        return th_collections