def find_job_guid(self, build): """ returns the job_guid, based on request id and request time. necessary because request id and request time is inconsistently represented in builds4h """ # this is reused in the transformer and the analyzer, so reverting # the field getters to this function. prop = build['properties'] try: # request_ids can be found in a couple of different places request_ids = prop.get('request_ids', build['request_ids']) # By experimentation we've found that the last id in the list # corresponds to the request that was used to schedule the job. request_id = request_ids[-1] except KeyError as e: logger.error("({0})request_id not found in {1}".format( prop["branch"], build)) raise e try: buildername = prop['buildername'] except KeyError as e: logger.error("({0})buildername not found in {1}".format( prop["branch"], build)) raise e endtime = None if buildbot.RESULT_DICT[build['result']] == 'retry': try: endtime = build['endtime'] except KeyError as e: logger.error("({0})endtime not found in {1}".format( prop["branch"], build)) raise e job_guid_data = {'job_guid': '', 'coalesced': []} # If request_ids contains more than one element, then jobs were coalesced into # this one. In that case, the last element corresponds to the request id of # the job that actually ran (ie this one), and the rest are for the pending # jobs that were coalesced. We must generate guids for these coalesced jobs, # so they can be marked as coalesced, and not left as orphaned pending jobs. coalesced_requests = request_ids[:-1] for coalesced_request_id in coalesced_requests: job_guid_data['coalesced'].append( common.generate_job_guid(coalesced_request_id, buildername)) job_guid_data['job_guid'] = common.generate_job_guid( request_id, buildername, endtime) return job_guid_data
def find_job_guid(self, build): """ returns the job_guid, based on request id and request time. necessary because request id and request time is inconsistently represented in builds4h """ # this is reused in the transformer and the analyzer, so reverting # the field getters to this function. prop = build['properties'] try: # request_ids can be found in a couple of different places request_ids = prop.get('request_ids', build['request_ids']) # By experimentation we've found that the last id in the list # corresponds to the request that was used to schedule the job. request_id = request_ids[-1] except KeyError as e: logger.error("({0})request_id not found in {1}".format( prop["branch"], build)) raise e try: buildername = prop['buildername'] except KeyError as e: logger.error("({0})buildername not found in {1}".format( prop["branch"], build)) raise e endtime = None if buildbot.RESULT_DICT[build['result']] == 'retry': try: endtime = build['endtime'] except KeyError as e: logger.error("({0})endtime not found in {1}".format( prop["branch"], build)) raise e job_guid_data = {'job_guid': '', 'coalesced': []} # If request_ids contains more than one element, then jobs were coalesced into # this one. In that case, the last element corresponds to the request id of # the job that actually ran (ie this one), and the rest are for the pending # jobs that were coalesced. We must generate guids for these coalesced jobs, # so they can be marked as coalesced, and not left as orphaned pending jobs. coalesced_requests = request_ids[:-1] for coalesced_request_id in coalesced_requests: job_guid_data['coalesced'].append(common.generate_job_guid(coalesced_request_id, buildername)) job_guid_data['job_guid'] = common.generate_job_guid(request_id, buildername, endtime) return job_guid_data
def find_job_guid(self, build): """ returns the job_guid, based on request id and request time. necessary because request id and request time is inconsistently represented in builds4h """ prop = build['properties'] #get the request_id from two possible places request_ids = prop.get('request_ids', []) request_ids_str = "" if request_ids == []: request_ids_str = ','.join( map(str, build.get('request_ids', [])) ) else: request_ids_str = ','.join(map(str, request_ids)) #get the request_time from two possible places request_time_dict = prop.get('request_times', {}) if request_time_dict != {}: request_times_str = ','.join( map(str, request_time_dict.values()) ) else: request_times_str = str(build['requesttime']) job_guid_data = { 'job_guid':'', 'coalesced':[] } if len(request_ids) > 1: # coallesced job detected, generate the coalesced # job guids for r_id in request_ids: r_id_str = str(r_id) if r_id_str in request_time_dict: job_guid_data['coalesced'].append( common.generate_job_guid( r_id_str, request_time_dict[r_id_str] )) job_guid_data['job_guid'] = common.generate_job_guid( request_ids_str, request_times_str) return job_guid_data
def transform(self, data, source, revision_filter=None, project_filter=None, job_group_filter=None): """ transform the buildapi structure into something we can ingest via our restful api """ valid_projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) missing_resultsets = defaultdict(set) # loop to catch all the revisions for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for rev, jobs in revisions.items(): if common.should_skip_revision(rev, revision_filter): continue for job in jobs: if not common.is_blacklisted_buildername(job['buildername']): # Add the revision to the list to be fetched so long as we # find at least one valid job associated with it. revision_dict[project].append(rev) break # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set()) job_ids_seen_now = set() th_collections = {} for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for revision, jobs in revisions.items(): if common.should_skip_revision(revision, revision_filter): continue try: resultset = common.get_resultset(project, revisions_lookup, revision, missing_resultsets, logger) except KeyError: # There was no matching resultset, skip the job. continue # using project and revision form the revision lookups # to filter those jobs with unmatched revision for job in jobs: buildername = job['buildername'] if common.is_blacklisted_buildername(buildername): continue job_ids_seen_now.add(job['id']) # Don't process jobs that were already present in this datasource # the last time this task completed successfully. if job['id'] in job_ids_seen_last_time: continue treeherder_data = { 'revision_hash': resultset['revision_hash'], 'resultset_id': resultset['id'], 'project': project, } platform_info = buildbot.extract_platform_info(buildername) job_name_info = buildbot.extract_name_info(buildername) if (job_group_filter and job_name_info.get('group_symbol', '').lower() != job_group_filter.lower()): continue if source == 'pending': request_id = job['id'] elif source == 'running': # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = job['request_ids'][-1] new_job = { 'job_guid': common.generate_job_guid( request_id, buildername ), 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': buildername, 'state': source, 'submit_timestamp': job['submitted_at'], 'build_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], }, # where are we going to get this data from? 'machine_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], }, 'who': 'unknown', 'option_collection': { # build_type contains an option name, eg. PGO buildbot.extract_build_type(buildername): True }, 'log_references': [], 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': buildername, 'request_id': request_id } }, ] } if source == 'running': new_job['start_timestamp'] = job['start_time'] # We store the original values to help debugging. new_job['artifacts'].append( { 'type': 'json', 'name': 'buildapi_running', 'log_urls': [], 'blob': { 'revision': revision, 'request_ids': job['request_ids'], 'submitted_at': job['submitted_at'], 'start_time': job['start_time'], } } ) treeherder_data['job'] = new_job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) if missing_resultsets and not revision_filter: common.fetch_missing_resultsets(source, missing_resultsets, logger) num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time)) logger.info("Imported %d %s jobs, skipped %d previously seen", num_new_jobs, source, len(job_ids_seen_now) - num_new_jobs) return th_collections, job_ids_seen_now
def transform(self, data): """ transform the buildapi structure into something we can ingest via our restful api """ projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) missing_resultsets = defaultdict(set) # loop to catch all the revisions for project, revisions in data['running'].items(): # this skips those projects we don't care about if project not in projects: continue for rev, jobs in revisions.items(): revision_dict[project].append(rev) # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) th_collections = {} for project, revisions in data['running'].items(): for revision, jobs in revisions.items(): try: resultset = common.get_resultset(project, revisions_lookup, revision, missing_resultsets, logger) except KeyError: # skip this job, at least at this point continue # using project and revision form the revision lookups # to filter those jobs with unmatched revision for running_job in jobs: treeherder_data = { 'revision_hash': resultset['revision_hash'], 'resultset_id': resultset['id'], 'project': project, } platform_info = buildbot.extract_platform_info( running_job['buildername']) job_name_info = buildbot.extract_name_info( running_job['buildername']) device_name = buildbot.get_device_or_unknown( job_name_info.get('name', ''), platform_info['vm']) new_job = { 'job_guid': common.generate_job_guid(running_job['request_ids'][0], running_job['submitted_at']), 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': running_job['buildername'], 'state': 'running', 'submit_timestamp': running_job['submitted_at'], 'start_timestamp': running_job['start_time'], 'build_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], 'vm': platform_info['vm'] }, #where are we going to get this data from? 'machine_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], 'vm': platform_info['vm'] }, 'device_name': device_name, 'who': 'unknown', 'option_collection': { # build_type contains an option name, eg. PGO buildbot.extract_build_type(running_job['buildername']): True }, 'log_references': [], 'artifacts': [ { 'type': 'json', 'name': 'buildapi_running', 'log_urls': [], 'blob': running_job }, { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': running_job['buildername'], 'request_id': max(running_job['request_ids']) } }, ] } treeherder_data['job'] = new_job if project not in th_collections: th_collections[project] = TreeherderJobCollection( job_type='update') # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) if missing_resultsets: common.fetch_missing_resultsets("running", missing_resultsets, logger) return th_collections
def find_job_guid(self, build): """ returns the job_guid, based on request id and request time. necessary because request id and request time is inconsistently represented in builds4h """ # this is reused in the transformer and the analyzer, so reverting # the field getters to this function. # request_id and request_time are mandatory # and they can be found in a couple of different places prop = build['properties'] try: request_ids = build['properties'].get('request_ids', build['request_ids']) except KeyError as e: logger.error("({0})request_id not found in {1}".format( prop["branch"], build)) raise e try: request_times = build['properties'].get('request_times', build['requesttime']) except KeyError as e: logger.error("({0})request_time not found in {1}".format( prop["branch"], build)) raise e endtime = None if buildbot.RESULT_DICT[build['result']] == 'retry': try: endtime = build['endtime'] except KeyError as e: logger.error("({0})endtime not found in {1}".format( prop["branch"], build)) raise e request_ids_str = ",".join(map(str, request_ids)) request_time_list = [] if type(request_times) == dict: for request_id in request_ids: request_time_list.append(request_times[str(request_id)]) request_times_str = ','.join(map(str, request_time_list)) else: request_times_str = str(request_times) job_guid_data = {'job_guid': '', 'coalesced': []} if len(request_ids) > 1: # coallesced job detected, generate the coalesced # job guids for index, r_id in enumerate(request_ids): #skip if buildbot doesn't have a matching number of ids and times if len(request_time_list) > index: job_guid_data['coalesced'].append( common.generate_job_guid(str(r_id), request_time_list[index])) job_guid_data['job_guid'] = common.generate_job_guid( request_ids_str, request_times_str, endtime) return job_guid_data
def transform(self, data, source, revision_filter=None, project_filter=None, job_group_filter=None): """ transform the buildapi structure into something we can ingest via our restful api """ valid_projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) # loop to catch all the revisions for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for rev in revisions.iterkeys(): if common.should_skip_revision(rev, revision_filter): continue revision_dict[project].append(rev) # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set()) job_ids_seen_now = set() th_collections = {} for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for revision, jobs in revisions.items(): if common.should_skip_revision(revision, revision_filter): continue try: resultset = revisions_lookup[project][revision] except KeyError: logger.warning( "skipping jobs since %s revision %s not yet ingested", project, revision) continue # using project and revision form the revision lookups # to filter those jobs with unmatched revision for job in jobs: job_ids_seen_now.add(job['id']) # Don't process jobs that were already present in this datasource # the last time this task completed successfully. if job['id'] in job_ids_seen_last_time: continue treeherder_data = { 'revision': revision, 'resultset_id': resultset['id'], 'project': project, } buildername = job['buildername'] platform_info = buildbot.extract_platform_info(buildername) job_name_info = buildbot.extract_name_info(buildername) if (job_group_filter and job_name_info.get('group_symbol', '').lower() != job_group_filter.lower()): continue if source == 'pending': request_id = job['id'] elif source == 'running': # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = job['request_ids'][-1] new_job = { 'job_guid': common.generate_job_guid(request_id, buildername), 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': buildername, 'state': source, 'submit_timestamp': job['submitted_at'], 'build_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], }, # where are we going to get this data from? 'machine_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], }, 'who': 'unknown', 'option_collection': { # build_type contains an option name, eg. PGO buildbot.extract_build_type(buildername): True }, 'log_references': [], 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': buildername, 'request_id': request_id } }, ] } if source == 'running': new_job['start_timestamp'] = job['start_time'] # We store the original values to help debugging. new_job['artifacts'].append({ 'type': 'json', 'name': 'buildapi_running', 'log_urls': [], 'blob': { 'revision': revision, 'request_ids': job['request_ids'], 'submitted_at': job['submitted_at'], 'start_time': job['start_time'], } }) treeherder_data['job'] = new_job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time)) logger.info("Imported %d %s jobs, skipped %d previously seen", num_new_jobs, source, len(job_ids_seen_now) - num_new_jobs) return th_collections, job_ids_seen_now
def adapt_data(self, data): """Adapts the PulseDataAdapter into the treeherder input data structure""" resultset = common.lookup_revisions({data['branch']: [data['revision']]}) if not resultset: return {} del resultset[data['branch']][data['revision']]['id'] treeherder_data = resultset[data['branch']][data['revision']] treeherder_data['project'] = data['branch'] #### #TODO: This is a temporary fix, this data will not be located # in the sourceStamp in the pulse stream. It will likely # be in other build properties but for now this will work. # Once the new properties are added they need to be incorporated # here. #### request_id = data['request_ids'][0] job = { 'job_guid': common.generate_job_guid( #The keys in this dict are unicode but the values in #request_ids are not, this explicit cast could cause #problems if the data added to the pulse stream is #modified request_id, data['request_times'][unicode(request_id)] ), 'revision_hash': treeherder_data.pop('revision_hash'), 'name': data['test_name'], 'product_name': data['product'], 'state': 'completed', #Do we need to map this to the strings in the sample structure? 'result': buildbot.RESULT_DICT.get(int(data['results']),'unknown'), 'reason': data['reason'], #There is both a who and blame that appear to be identical in the #pulse stream, is who the way to go? 'who': data['who'], #This assumes the 0 element in request_ids is the id for the #job which is not always true if there are coalesced jobs. This will need #to be updated when https://bugzilla.mozilla.org/show_bug.cgi?id=862633 #is resolved. 'submit_timestamp': data['request_times'][unicode(request_id)], 'start_timestamp': data['times']['start_timestamp'], 'end_timestamp': str(int(time.time())), 'machine': data['slave'], 'build_url': data['buildurl'], 'build_platform': { 'os_name': data['os'], 'platform': data['os_platform'], 'architecture': data['arch'], 'vm': data['vm'] }, #where are we going to get this data from? 'machine_platform': { 'os_name': data['os'], 'platform': data['os_platform'], 'architecture': data['arch'], 'vm': data['vm'] }, 'option_collection': { data['buildtype']: True }, 'log_references': [{ 'url': data['log_url'], #using the jobtype as a name for now, the name allows us #to have different log types with their own processing 'name': data['jobtype'] }], 'artifact': {} } treeherder_data['job'] = job return JobData(treeherder_data)
def transform(self, data, source, filter_to_revision=None, filter_to_project=None, filter_to_job_group=None): """ transform the buildapi structure into something we can ingest via our restful api """ projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) missing_resultsets = defaultdict(set) # loop to catch all the revisions for project, revisions in data[source].iteritems(): # this skips those projects we don't care about if project not in projects: continue if filter_to_project and project != filter_to_project: continue for rev, jobs in revisions.items(): revision_dict[project].append(rev) # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) th_collections = {} for project, revisions in data[source].iteritems(): for revision, jobs in revisions.items(): try: resultset = common.get_resultset(project, revisions_lookup, revision, missing_resultsets, logger) except KeyError: # skip this job, at least at this point continue if filter_to_revision and filter_to_revision != resultset['revision']: continue # using project and revision form the revision lookups # to filter those jobs with unmatched revision for job in jobs: treeherder_data = { 'revision_hash': resultset['revision_hash'], 'resultset_id': resultset['id'], 'project': project, } platform_info = buildbot.extract_platform_info(job['buildername']) job_name_info = buildbot.extract_name_info(job['buildername']) if (filter_to_job_group and job_name_info.get('group_symbol', '').lower() != filter_to_job_group.lower()): continue if source == 'pending': request_id = job['id'] elif source == 'running': # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = job['request_ids'][-1] device_name = buildbot.get_device_or_unknown( job_name_info.get('name', ''), platform_info['vm'] ) new_job = { 'job_guid': common.generate_job_guid( request_id, job['buildername'] ), 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': job['buildername'], 'state': source, 'submit_timestamp': job['submitted_at'], 'build_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], 'vm': platform_info['vm'] }, # where are we going to get this data from? 'machine_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], 'vm': platform_info['vm'] }, 'device_name': device_name, 'who': 'unknown', 'option_collection': { # build_type contains an option name, eg. PGO buildbot.extract_build_type(job['buildername']): True }, 'log_references': [], 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': job['buildername'], 'request_id': request_id } }, ] } if source == 'running': new_job['start_timestamp'] = job['start_time'] # We store the original values to help debugging. new_job['artifacts'].append( { 'type': 'json', 'name': 'buildapi_running', 'log_urls': [], 'blob': { 'revision': revision, 'request_ids': job['request_ids'], 'submitted_at': job['submitted_at'], 'start_time': job['start_time'], } } ) treeherder_data['job'] = new_job if project not in th_collections: th_collections[project] = TreeherderJobCollection( job_type='update' ) # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) if missing_resultsets and not filter_to_revision: common.fetch_missing_resultsets(source, missing_resultsets, logger) return th_collections
def transform(self, data): """ transform the buildapi structure into something we can ingest via our restful api """ projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) missing_resultsets = defaultdict(set) # loop to catch all the revisions for project, revisions in data['running'].items(): # this skips those projects we don't care about if project not in projects: continue for rev, jobs in revisions.items(): revision_dict[project].append(rev) # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) th_collections = {} for project, revisions in data['running'].items(): for revision, jobs in revisions.items(): try: resultset = common.get_resultset(project, revisions_lookup, revision, missing_resultsets, logger) except KeyError: # skip this job, at least at this point continue # using project and revision form the revision lookups # to filter those jobs with unmatched revision for running_job in jobs: treeherder_data = { 'revision_hash': resultset['revision_hash'], 'resultset_id': resultset['id'], 'project': project, } platform_info = buildbot.extract_platform_info(running_job['buildername']) job_name_info = buildbot.extract_name_info(running_job['buildername']) device_name = buildbot.get_device_or_unknown( job_name_info.get('name', ''), platform_info['vm'] ) new_job = { 'job_guid': common.generate_job_guid( running_job['request_ids'][0], running_job['submitted_at'] ), 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': running_job['buildername'], 'state': 'running', 'submit_timestamp': running_job['submitted_at'], 'start_timestamp': running_job['start_time'], 'build_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], 'vm': platform_info['vm'] }, #where are we going to get this data from? 'machine_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], 'vm': platform_info['vm'] }, 'device_name': device_name, 'who': 'unknown', 'option_collection': { # build_type contains an option name, eg. PGO buildbot.extract_build_type(running_job['buildername']): True }, 'log_references': [], 'artifacts': [ { 'type': 'json', 'name': 'buildapi_running', 'log_urls': [], 'blob': running_job }, { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': running_job['buildername'], 'request_id': running_job['request_ids'][0] } }, ] } treeherder_data['job'] = new_job if project not in th_collections: th_collections[ project ] = TreeherderJobCollection( job_type='update' ) # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) if missing_resultsets: common.fetch_missing_resultsets("running", missing_resultsets, logger) return th_collections
def find_job_guid(self, build): """ returns the job_guid, based on request id and request time. necessary because request id and request time is inconsistently represented in builds4h """ # this is reused in the transformer and the analyzer, so reverting # the field getters to this function. # request_id and request_time are mandatory # and they can be found in a couple of different places prop = build['properties'] try: request_ids = build['properties'].get('request_ids', build['request_ids']) except KeyError as e: logger.error("({0})request_id not found in {1}".format( prop["branch"], build)) raise e try: request_times = build['properties'].get('request_times', build['requesttime']) except KeyError as e: logger.error("({0})request_time not found in {1}".format( prop["branch"], build)) raise e endtime = None if buildbot.RESULT_DICT[build['result']] == 'retry': try: endtime = build['endtime'] except KeyError as e: logger.error("({0})endtime not found in {1}".format( prop["branch"], build)) raise e request_ids_str = ",".join(map(str, request_ids)) request_time_list = [] if type(request_times) == dict: for request_id in request_ids: request_time_list.append( request_times[str(request_id)]) request_times_str = ','.join( map(str, request_time_list)) else: request_times_str = str(request_times) job_guid_data = {'job_guid': '', 'coalesced': []} if len(request_ids) > 1: # coallesced job detected, generate the coalesced # job guids for index, r_id in enumerate(request_ids): #skip if buildbot doesn't have a matching number of ids and times if len(request_time_list) > index: job_guid_data['coalesced'].append( common.generate_job_guid( str(r_id), request_time_list[index])) job_guid_data['job_guid'] = common.generate_job_guid( request_ids_str, request_times_str, endtime) return job_guid_data
def transform(self, data, source, revision_filter=None, project_filter=None, job_group_filter=None): """ transform the buildapi structure into something we can ingest via our restful api """ valid_projects = set(Repository.objects.values_list('name', flat=True)) revision_dict = defaultdict(list) # loop to catch all the revisions for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for rev in revisions.iterkeys(): if common.should_skip_revision(rev, revision_filter): continue revision_dict[project].append(rev) job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set()) job_ids_seen_now = set() th_collections = {} for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue revisions_seen_now_for_project = set() for revision, jobs in revisions.items(): if common.should_skip_revision(revision, revision_filter): continue # it should be quite rare for a job to be ingested before a # revision, but it could happen if revision not in revisions_seen_now_for_project and \ not Push.objects.filter(repository__name=project, revision__startswith=revision).exists(): logger.warning("skipping jobs since %s revision %s " "not yet ingested", project, revision) continue revisions_seen_now_for_project.add(revision) # using project and revision form the revision lookups # to filter those jobs with unmatched revision for job in jobs: job_ids_seen_now.add(job['id']) # Don't process jobs that we saw the last time this task # completed successfully. if job['id'] in job_ids_seen_last_time: continue treeherder_data = { 'revision': revision, 'project': project, } buildername = job['buildername'] platform_info = buildbot.extract_platform_info(buildername) job_name_info = buildbot.extract_name_info(buildername) if (job_group_filter and job_name_info.get('group_symbol', '').lower() != job_group_filter.lower()): continue if source == 'pending': request_id = job['id'] elif source == 'running': # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = job['request_ids'][-1] new_job = { 'job_guid': common.generate_job_guid( request_id, buildername ), 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': buildername, 'state': source, 'submit_timestamp': job['submitted_at'], 'build_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], }, # where are we going to get this data from? 'machine_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], }, 'who': 'unknown', 'option_collection': { # build_type contains an option name, eg. PGO buildbot.extract_build_type(buildername): True }, 'log_references': [], 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': buildername, 'request_id': request_id } }, ] } if source == 'running': new_job['start_timestamp'] = job['start_time'] treeherder_data['job'] = new_job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time)) logger.info("Imported %d %s jobs, skipped %d previously seen", num_new_jobs, source, len(job_ids_seen_now) - num_new_jobs) return th_collections, job_ids_seen_now
def transform(self, data, source, revision_filter=None, project_filter=None, job_group_filter=None): """ transform the buildapi structure into something we can ingest via our restful api """ valid_projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) # loop to catch all the revisions for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for rev in revisions.iterkeys(): if common.should_skip_revision(rev, revision_filter): continue revision_dict[project].append(rev) # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set()) job_ids_seen_now = set() th_collections = {} for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for revision, jobs in revisions.items(): if common.should_skip_revision(revision, revision_filter): continue try: resultset = revisions_lookup[project][revision] except KeyError: logger.warning("skipping jobs since %s revision %s not yet ingested", project, revision) continue # using project and revision form the revision lookups # to filter those jobs with unmatched revision for job in jobs: job_ids_seen_now.add(job["id"]) # Don't process jobs that were already present in this datasource # the last time this task completed successfully. if job["id"] in job_ids_seen_last_time: continue treeherder_data = {"revision": revision, "resultset_id": resultset["id"], "project": project} buildername = job["buildername"] platform_info = buildbot.extract_platform_info(buildername) job_name_info = buildbot.extract_name_info(buildername) if job_group_filter and job_name_info.get("group_symbol", "").lower() != job_group_filter.lower(): continue if source == "pending": request_id = job["id"] elif source == "running": # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = job["request_ids"][-1] new_job = { "job_guid": common.generate_job_guid(request_id, buildername), "name": job_name_info.get("name", ""), "job_symbol": job_name_info.get("job_symbol", ""), "group_name": job_name_info.get("group_name", ""), "group_symbol": job_name_info.get("group_symbol", ""), "reference_data_name": buildername, "state": source, "submit_timestamp": job["submitted_at"], "build_platform": { "os_name": platform_info["os"], "platform": platform_info["os_platform"], "architecture": platform_info["arch"], }, # where are we going to get this data from? "machine_platform": { "os_name": platform_info["os"], "platform": platform_info["os_platform"], "architecture": platform_info["arch"], }, "who": "unknown", "option_collection": { # build_type contains an option name, eg. PGO buildbot.extract_build_type(buildername): True }, "log_references": [], "artifacts": [ { "type": "json", "name": "buildapi", "log_urls": [], "blob": {"buildername": buildername, "request_id": request_id}, } ], } if source == "running": new_job["start_timestamp"] = job["start_time"] # We store the original values to help debugging. new_job["artifacts"].append( { "type": "json", "name": "buildapi_running", "log_urls": [], "blob": { "revision": revision, "request_ids": job["request_ids"], "submitted_at": job["submitted_at"], "start_time": job["start_time"], }, } ) treeherder_data["job"] = new_job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time)) logger.info( "Imported %d %s jobs, skipped %d previously seen", num_new_jobs, source, len(job_ids_seen_now) - num_new_jobs, ) return th_collections, job_ids_seen_now
def transform(self, data): """ transform the buildapi structure into something we can ingest via our restful api """ projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) # loop to catch all the revisions for project, revisions in data["running"].items(): # this skips those projects we don't care about if project not in projects: continue for rev, jobs in revisions.items(): revision_dict[project].append(rev) # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) th_collections = {} for project, revisions in revisions_lookup.items(): for revision in revisions: resultset = revisions[revision] # using project and revision form the revision lookups # to filter those jobs with unmatched revision for job in data["running"][project][revision]: treeherder_data = { "revision_hash": resultset["revision_hash"], "resultset_id": resultset["id"], "project": project, } platform_info = buildbot.extract_platform_info(job["buildername"]) job_name_info = buildbot.extract_name_info(job["buildername"]) job = { "job_guid": common.generate_job_guid(job["request_ids"][0], job["submitted_at"]), "name": job_name_info.get("name", ""), "job_symbol": job_name_info.get("job_symbol", ""), "group_name": job_name_info.get("group_name", ""), "group_symbol": job_name_info.get("group_symbol", ""), "buildername": job["buildername"], "state": "running", "submit_timestamp": job["submitted_at"], "build_platform": { "os_name": platform_info["os"], "platform": platform_info["os_platform"], "architecture": platform_info["arch"], "vm": platform_info["vm"], }, # where are we going to get this data from? "machine_platform": { "os_name": platform_info["os"], "platform": platform_info["os_platform"], "architecture": platform_info["arch"], "vm": platform_info["vm"], }, "who": "unknown", "option_collection": { # build_type contains an option name, eg. PGO buildbot.extract_build_type(job["buildername"]): True }, "log_references": [], } treeherder_data["job"] = job if project not in th_collections: th_collections[project] = TreeherderJobCollection(job_type="update") # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) return th_collections