def test_post_job_collection(self, mock_post): """Can add a treeherder collections to a TreeherderRequest.""" mock_post.return_value = self._expected_response_return_object() tjc = TreeherderJobCollection() for job in self.job_data: tjc.add(tjc.get_job(job)) client = TreeherderClient( protocol='http', host='host', client_id='client-abc', secret='secret123', ) client.post_collection('project', tjc) path, resp = mock_post.call_args self.assertEqual(mock_post.call_count, 1) self.assertEqual( tjc.get_collection_data(), resp['json'] )
def test_objectstore_create(job_sample, jm): """ test posting data to the objectstore via webtest. extected result are: - return code 200 - return message successful - 1 job stored in the objectstore """ tjc = TreeherderJobCollection() tj = tjc.get_job(job_sample) tjc.add(tj) resp = test_utils.post_collection(jm.project, tjc) assert resp.status_int == 200 assert resp.json['message'] == 'well-formed JSON stored' stored_objs = jm.get_os_dhub().execute( proc="objectstore_test.selects.row_by_guid", placeholders=[job_sample["job"]["job_guid"]]) assert len(stored_objs) == 1 assert stored_objs[0]['job_guid'] == job_sample["job"]["job_guid"] jm.disconnect()
def test_send_with_oauth(self, mock_post, mock_time, mock_generate_nonce): """Tests that oauth data is sent to server""" mock_time.return_value = 1342229050 mock_generate_nonce.return_value = "46810593" mock_post.return_value = self._expected_response_return_object() client = TreeherderClient( protocol='http', host='host', ) tjc = TreeherderJobCollection() for job in self.job_data: tjc.add(tjc.get_job(job)) break client.post_collection('project', 'key', 'secret', tjc) self.assertEqual(mock_post.call_count, 1) path, resp = mock_post.call_args self.assertEqual(path[0], "http://host/api/project/project/objectstore/?oauth_body_hash=IKbDoi5GvTRaqjRTCDyKIN5wWiY%3D&oauth_nonce=46810593&oauth_timestamp=1342229050&oauth_consumer_key=key&oauth_signature_method=HMAC-SHA1&oauth_version=1.0&oauth_token=&user=project&oauth_signature=uq%2BrkJCRPyPUdXExSasm25ab8m4%3D")
def test_treeheder_auth(self, mock_time, mock_generate_nonce): """Tests that oauth data is sent to server""" mock_time.return_value = 1342229050 mock_generate_nonce.return_value = "46810593" tjc = TreeherderJobCollection() tjc.add(tjc.get_job(self.job_data[0])) auth = TreeherderAuth("key", "secret", "project") req = requests.Request( url="http://host/api/project/project/jobs/", json=tjc.get_collection_data(), auth=auth, method="POST" ) prepped_request = req.prepare() self.assertEqual( prepped_request.url, ( "http://host/api/project/project/jobs/?" "oauth_body_hash=IKbDoi5GvTRaqjRTCDyKIN5wWiY%3D&" "oauth_nonce=46810593&" "oauth_timestamp=1342229050&" "oauth_consumer_key=key&" "oauth_signature_method=HMAC-SHA1&" "oauth_version=1.0&" "oauth_token=&" "user=project&" "oauth_signature=DJe%2F%2FJtw7s2XUrciG%2Bl1tfJJen8%3D" ), )
def test_objectstore_create(job_sample, jm): """ test posting data to the objectstore via webtest. extected result are: - return code 200 - return message successful - 1 job stored in the objectstore """ tjc = TreeherderJobCollection() tj = tjc.get_job(job_sample) tjc.add(tj) resp = test_utils.post_collection(jm.project, tjc) assert resp.status_int == 200 assert resp.json['message'] == 'well-formed JSON stored' stored_objs = jm.get_os_dhub().execute( proc="objectstore_test.selects.row_by_guid", placeholders=[job_sample["job"]["job_guid"]] ) assert len(stored_objs) == 1 assert stored_objs[0]['job_guid'] == job_sample["job"]["job_guid"] jm.disconnect()
def test_send_with_oauth(self, mock_post, mock_time, mock_generate_nonce): """Tests that oauth data is sent to server""" mock_time.return_value = 1342229050 mock_generate_nonce.return_value = "46810593" mock_post.return_value = self._expected_response_return_object() client = TreeherderClient( protocol='http', host='host', ) tjc = TreeherderJobCollection() for job in self.job_data: tjc.add(tjc.get_job(job)) break client.post_collection('project', 'key', 'secret', tjc) self.assertEqual(mock_post.call_count, 1) path, resp = mock_post.call_args self.assertEqual( path[0], "http://host/api/project/project/objectstore/?oauth_body_hash=IKbDoi5GvTRaqjRTCDyKIN5wWiY%3D&oauth_nonce=46810593&oauth_timestamp=1342229050&oauth_consumer_key=key&oauth_signature_method=HMAC-SHA1&oauth_version=1.0&oauth_token=&user=project&oauth_signature=uq%2BrkJCRPyPUdXExSasm25ab8m4%3D" )
def test_post_job_collection(self): """Can add a treeherder collections to a TreeherderRequest.""" tjc = TreeherderJobCollection() for job in self.job_data: tjc.add(tjc.get_job(job)) client = TreeherderClient( server_url='http://host', client_id='client-abc', secret='secret123', ) def request_callback(request): # Check that the expected content was POSTed. posted_json = json.loads(request.body) self.assertEqual(posted_json, tjc.get_collection_data()) return (200, {}, '{"message": "Job successfully updated"}') url = client._get_endpoint_url(tjc.endpoint_base, project='project') responses.add_callback(responses.POST, url, match_querystring=True, callback=request_callback, content_type='application/json') client.post_collection('project', tjc)
def test_send_with_oauth(self, mock_post, mock_time, mock_generate_nonce): """Tests that oauth data is sent to server""" mock_time.return_value = 1342229050 mock_generate_nonce.return_value = "46810593" mock_post.return_value = self._expected_response_return_object() client = TreeherderClient( protocol='http', host='host', ) tjc = TreeherderJobCollection() for job in self.job_data: tjc.add(tjc.get_job(job)) break client.post_collection('project', 'key', 'secret', tjc) self.assertEqual(mock_post.call_count, 1) path, resp = mock_post.call_args self.assertEqual(path[0], "http://host/api/project/project/objectstore/?oauth_body_hash=C4jFXK8TBoFeh9wHOu1IkU7tERw%3D&oauth_nonce=46810593&oauth_timestamp=1342229050&oauth_consumer_key=key&oauth_signature_method=HMAC-SHA1&oauth_version=1.0&oauth_token=&user=project&oauth_signature=hNqHsAd7sdGyDLfWf7n9Bb%2B2rzM%3D")
def test_job_collection(self): """Confirm the collection matches the sample data""" tjc = TreeherderJobCollection() for job in self.job_data: tj = TreeherderJob(job) tjc.add(tj) self.assertTrue(len(self.job_data) == len(tjc.data))
def test_job_collection(self): """Confirm the collection matches the sample data""" tjc = TreeherderJobCollection() for job in self.job_data: tj = TreeherderJob(job) tjc.add(tj) self.assertTrue(len(self.job_data) == len(tjc.data))
def test_job_collection_job_type(self): """ Confirm that the job_type argument changes the endpoint_base property """ tjc = TreeherderJobCollection() tjc_update = TreeherderJobCollection(job_type='update') self.assertTrue(tjc.endpoint_base, 'objectstore') self.assertTrue(tjc_update.endpoint_base, 'jobs')
def running_jobs_stored(jm, running_jobs, result_set_stored): """ stores a list of buildapi running jobs """ running_jobs.update(result_set_stored[0]) tjc = TreeherderJobCollection() tj = tjc.get_job(running_jobs) tjc.add(tj) test_utils.post_collection(jm.project, tjc)
def running_jobs_stored( jm, running_jobs, result_set_stored): """ stores a list of buildapi running jobs into the objectstore """ running_jobs.update(result_set_stored[0]) tjc = TreeherderJobCollection(job_type='update') tj = tjc.get_job(running_jobs) tjc.add(tj) test_utils.post_collection(jm.project, tjc)
def completed_jobs_stored(jm, completed_jobs, result_set_stored, mock_post_json): """ stores a list of buildapi completed jobs """ completed_jobs['revision_hash'] = result_set_stored[0]['revision_hash'] tjc = TreeherderJobCollection() tj = tjc.get_job(completed_jobs) tjc.add(tj) test_utils.post_collection(jm.project, tjc)
def completed_jobs_stored( jm, completed_jobs, result_set_stored, mock_post_json): """ stores a list of buildapi completed jobs """ completed_jobs['revision_hash'] = result_set_stored[0]['revision_hash'] tjc = TreeherderJobCollection() tj = tjc.get_job(completed_jobs) tjc.add(tj) test_utils.post_collection(jm.project, tjc)
def running_jobs_stored( jm, running_jobs, result_set_stored, mock_post_json): """ stores a list of buildapi running jobs """ running_jobs.update(result_set_stored[0]) running_jobs.update({'project': jm.project}) tjc = TreeherderJobCollection() tj = tjc.get_job(running_jobs) tjc.add(tj) test_utils.post_collection(jm.project, tjc)
def pending_jobs_stored(jm, pending_jobs, result_set_stored): """ stores a list of buildapi pending jobs into the jobs store using BuildApiTreeHerderAdapter """ pending_jobs.update(result_set_stored[0]) tjc = TreeherderJobCollection() tj = tjc.get_job(pending_jobs) tjc.add(tj) test_utils.post_collection(jm.project, tjc)
def pending_jobs_stored( jm, pending_jobs, result_set_stored): """ stores a list of buildapi pending jobs into the jobs store using BuildApiTreeHerderAdapter """ pending_jobs.update(result_set_stored[0]) tjc = TreeherderJobCollection(job_type='update') tj = tjc.get_job(pending_jobs) tjc.add(tj) test_utils.post_collection(jm.project, tjc)
def completed_jobs_stored( test_repository, failure_classifications, completed_jobs, result_set_stored, mock_post_json): """ stores a list of buildapi completed jobs """ completed_jobs['revision'] = result_set_stored[0]['revision'] completed_jobs.update({'project': test_repository.name}) tjc = TreeherderJobCollection() tj = tjc.get_job(completed_jobs) tjc.add(tj) test_utils.post_collection(test_repository.name, tjc)
def running_jobs_stored( test_repository, failure_classifications, running_jobs, result_set_stored, mock_post_json): """ stores a list of buildapi running jobs """ running_jobs.update(result_set_stored[0]) running_jobs.update({'project': test_repository.name}) tjc = TreeherderJobCollection() tj = tjc.get_job(running_jobs) tjc.add(tj) test_utils.post_collection(test_repository.name, tjc)
def pending_jobs_stored( test_repository, failure_classifications, pending_jobs, result_set_stored, mock_post_json): """ stores a list of buildapi pending jobs into the jobs store using BuildApiTreeHerderAdapter """ pending_jobs.update(result_set_stored[0]) pending_jobs.update({'project': test_repository.name}) tjc = TreeherderJobCollection() tj = tjc.get_job(pending_jobs) tjc.add(tj) test_utils.post_collection(test_repository.name, tjc)
def test_post_job_collection(self, mock_post): """Can add a treeherder collections to a TreeherderRequest.""" mock_post.return_value = self._expected_response_return_object() tjc = TreeherderJobCollection() for job in self.job_data: tjc.add(tjc.get_job(job)) client = TreeherderClient(protocol="http", host="host") auth = TreeherderAuth("key", "secret", "project") client.post_collection("project", tjc, auth=auth) path, resp = mock_post.call_args self.assertEqual(mock_post.call_count, 1) self.assertEqual(tjc.get_collection_data(), resp["json"])
def test_objectstore_with_bad_key(job_sample, jm): """ test calling with the wrong project key. extected result are: - return code 403 - return message failed """ tjc = TreeherderJobCollection() tj = tjc.get_job(job_sample) tjc.add(tj) resp = test_utils.post_collection( jm.project, tjc, status=403, consumer_key='wrong-key' ) assert resp.status_int == 403 assert resp.json['response'] == "access_denied" assert resp.json['detail'] == "oauth_consumer_key does not match project, {0}, credentials".format(jm.project)
def test_objectstore_with_bad_secret(job_sample, jm): """ test calling with the wrong project secret. extected result are: - return code 403 - return message authentication failed """ tjc = TreeherderJobCollection() tj = tjc.get_job(job_sample) tjc.add(tj) resp = test_utils.post_collection( jm.project, tjc, status=403, consumer_secret='not-so-secret' ) assert resp.status_int == 403 assert resp.json['detail'] == "Client authentication failed for project, {0}".format(jm.project) assert resp.json['response'] == "invalid_client"
def test_send_without_oauth(self, mock_post, mock_time, mock_generate_nonce): """Can send data to the server.""" mock_time.return_value = 1342229050 mock_generate_nonce.return_value = "46810593" host = 'host' req = TreeherderRequest( protocol='http', host=host, project='project', oauth_key=None, oauth_secret=None, ) mock_response = mock_post.return_value tjc = TreeherderJobCollection() for job in self.job_data: tjc.add(tjc.get_job(job)) break response = req.post(tjc) self.assertEqual(mock_response, response) self.assertEqual(mock_post.call_count, 1) path, resp = mock_post.call_args deserialized_data = json.loads(resp['data']) self.assertEqual( deserialized_data, tjc.get_collection_data() ) self.assertEqual( resp['headers']['Content-Type'], 'application/json', )
def test_post_job_collection(self): """Can add a treeherder collections to a TreeherderRequest.""" tjc = TreeherderJobCollection() for job in self.job_data: tjc.add(tjc.get_job(job)) client = TreeherderClient(protocol="http", host="host", client_id="client-abc", secret="secret123") def request_callback(request): # Check that the expected content was POSTed. posted_json = json.loads(request.body) self.assertEqual(posted_json, tjc.get_collection_data()) return (200, {}, '{"message": "Job successfully updated"}') url = client._get_project_uri("project", tjc.endpoint_base) responses.add_callback( responses.POST, url, match_querystring=True, callback=request_callback, content_type="application/json" ) client.post_collection("project", tjc)
def test_treeheder_auth(self, mock_time, mock_generate_nonce): """Tests that oauth data is sent to server""" mock_time.return_value = 1342229050 mock_generate_nonce.return_value = "46810593" tjc = TreeherderJobCollection() tjc.add(tjc.get_job(self.job_data[0])) auth = TreeherderAuth('key', 'secret', 'project') req = requests.Request(url='http://host/api/project/project/jobs/', json=tjc.get_collection_data(), auth=auth, method='POST') prepped_request = req.prepare() self.assertEqual( prepped_request.url, ("http://host/api/project/project/jobs/?" "oauth_body_hash=IKbDoi5GvTRaqjRTCDyKIN5wWiY%3D&" "oauth_nonce=46810593&" "oauth_timestamp=1342229050&" "oauth_consumer_key=key&" "oauth_signature_method=HMAC-SHA1&" "oauth_version=1.0&" "oauth_token=&" "user=project&" "oauth_signature=DJe%2F%2FJtw7s2XUrciG%2Bl1tfJJen8%3D"))
def test_objectstore_with_bad_key(job_sample, jm): """ test calling with the wrong project key. extected result are: - return code 403 - return message failed """ tjc = TreeherderJobCollection() tj = tjc.get_job(job_sample) tjc.add(tj) resp = test_utils.post_collection(jm.project, tjc, status=403, consumer_key='wrong-key') assert resp.status_int == 403 assert resp.json['response'] == "access_denied" assert resp.json[ 'detail'] == "oauth_consumer_key does not match project, {0}, credentials".format( jm.project)
def test_objectstore_with_bad_secret(job_sample, jm): """ test calling with the wrong project secret. extected result are: - return code 403 - return message authentication failed """ tjc = TreeherderJobCollection() tj = tjc.get_job(job_sample) tjc.add(tj) resp = test_utils.post_collection(jm.project, tjc, status=403, consumer_secret='not-so-secret') assert resp.status_int == 403 assert resp.json[ 'detail'] == "Client authentication failed for project, {0}".format( jm.project) assert resp.json['response'] == "invalid_client"
def test_treeheder_auth(self, mock_time, mock_generate_nonce): """Tests that oauth data is sent to server""" mock_time.return_value = 1342229050 mock_generate_nonce.return_value = "46810593" tjc = TreeherderJobCollection() tjc.add(tjc.get_job(self.job_data[0])) auth = TreeherderAuth('key', 'secret', 'project') req = requests.Request(url='http://host/api/project/project/jobs/', json=tjc.get_collection_data(), auth=auth, method='POST') prepped_request = req.prepare() self.assertEqual(prepped_request.url, ("http://host/api/project/project/jobs/?" "oauth_body_hash=DEn0vGleFUlmCzsFtv1fzBEpNHg%3D&" "oauth_nonce=46810593&" "oauth_timestamp=1342229050&" "oauth_consumer_key=key&" "oauth_signature_method=HMAC-SHA1&" "oauth_version=1.0&" "oauth_token=&" "user=project&" "oauth_signature=kxmsE%2BCqRDtV%2Bqk9GYeA7n4F%2FCI%3D"))
def test_send_job_collection(self, mock_send): """Can add a treeherder collections to a TreeherderRequest.""" tjc = TreeherderJobCollection() for job in self.job_data: tjc.add(tjc.get_job(job)) req = TreeherderRequest( protocol='http', host='host', project='project', oauth_key='key', oauth_secret='secret', ) req.post(tjc) self.assertEqual(mock_send.call_count, 1) self.assertEqual( tjc.to_json(), mock_send.call_args_list[0][1]['data'] )
def test_post_job_collection(self): """Can add a treeherder collections to a TreeherderRequest.""" tjc = TreeherderJobCollection() for job in self.job_data: tjc.add(tjc.get_job(job)) client = TreeherderClient( server_url='http://host', client_id='client-abc', secret='secret123', ) def request_callback(request): # Check that the expected content was POSTed. posted_json = json.loads(request.body) self.assertEqual(posted_json, tjc.get_collection_data()) return (200, {}, '{"message": "Job successfully updated"}') url = client._get_endpoint_url(tjc.endpoint_base, project='project') responses.add_callback(responses.POST, url, match_querystring=True, callback=request_callback, content_type='application/json') client.post_collection('project', tjc)
def test_post_job_collection(self, mock_post): """Can add a treeherder collections to a TreeherderRequest.""" mock_post.return_value = self._expected_response_return_object() tjc = TreeherderJobCollection() for job in self.job_data: tjc.add(tjc.get_job(job)) client = TreeherderClient( protocol='http', host='host', ) client.post_collection('project', 'key', 'secret', tjc) path, resp = mock_post.call_args self.assertEqual(mock_post.call_count, 1) self.assertEqual(tjc.to_json(), resp['data'])
def transform(self, data, source, revision_filter=None, project_filter=None, job_group_filter=None): """ transform the buildapi structure into something we can ingest via our restful api """ valid_projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) # loop to catch all the revisions for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for rev in revisions.iterkeys(): if common.should_skip_revision(rev, revision_filter): continue revision_dict[project].append(rev) # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) job_ids_seen_last_time = cache.get(CACHE_KEYS[source], set()) job_ids_seen_now = set() th_collections = {} for project, revisions in data[source].iteritems(): if common.should_skip_project(project, valid_projects, project_filter): continue for revision, jobs in revisions.items(): if common.should_skip_revision(revision, revision_filter): continue try: resultset = revisions_lookup[project][revision] except KeyError: logger.warning( "skipping jobs since %s revision %s not yet ingested", project, revision) continue # using project and revision form the revision lookups # to filter those jobs with unmatched revision for job in jobs: job_ids_seen_now.add(job['id']) # Don't process jobs that were already present in this datasource # the last time this task completed successfully. if job['id'] in job_ids_seen_last_time: continue treeherder_data = { 'revision': revision, 'resultset_id': resultset['id'], 'project': project, } buildername = job['buildername'] platform_info = buildbot.extract_platform_info(buildername) job_name_info = buildbot.extract_name_info(buildername) if (job_group_filter and job_name_info.get('group_symbol', '').lower() != job_group_filter.lower()): continue if source == 'pending': request_id = job['id'] elif source == 'running': # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = job['request_ids'][-1] new_job = { 'job_guid': common.generate_job_guid(request_id, buildername), 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': buildername, 'state': source, 'submit_timestamp': job['submitted_at'], 'build_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], }, # where are we going to get this data from? 'machine_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], }, 'who': 'unknown', 'option_collection': { # build_type contains an option name, eg. PGO buildbot.extract_build_type(buildername): True }, 'log_references': [], 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': buildername, 'request_id': request_id } }, ] } if source == 'running': new_job['start_timestamp'] = job['start_time'] # We store the original values to help debugging. new_job['artifacts'].append({ 'type': 'json', 'name': 'buildapi_running', 'log_urls': [], 'blob': { 'revision': revision, 'request_ids': job['request_ids'], 'submitted_at': job['submitted_at'], 'start_time': job['start_time'], } }) treeherder_data['job'] = new_job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time)) logger.info("Imported %d %s jobs, skipped %d previously seen", num_new_jobs, source, len(job_ids_seen_now) - num_new_jobs) return th_collections, job_ids_seen_now
def transform(self, data, source, filter_to_revision=None, filter_to_project=None, filter_to_job_group=None): """ transform the buildapi structure into something we can ingest via our restful api """ projects = set(x.project for x in Datasource.objects.cached()) revision_dict = defaultdict(list) missing_resultsets = defaultdict(set) # loop to catch all the revisions for project, revisions in data[source].iteritems(): # this skips those projects we don't care about if project not in projects: continue if filter_to_project and project != filter_to_project: continue for rev, jobs in revisions.items(): revision_dict[project].append(rev) # retrieving the revision->resultset lookups revisions_lookup = common.lookup_revisions(revision_dict) th_collections = {} for project, revisions in data[source].iteritems(): for revision, jobs in revisions.items(): try: resultset = common.get_resultset(project, revisions_lookup, revision, missing_resultsets, logger) except KeyError: # skip this job, at least at this point continue if filter_to_revision and filter_to_revision != resultset['revision']: continue # using project and revision form the revision lookups # to filter those jobs with unmatched revision for job in jobs: treeherder_data = { 'revision_hash': resultset['revision_hash'], 'resultset_id': resultset['id'], 'project': project, } platform_info = buildbot.extract_platform_info(job['buildername']) job_name_info = buildbot.extract_name_info(job['buildername']) if (filter_to_job_group and job_name_info.get('group_symbol', '').lower() != filter_to_job_group.lower()): continue if source == 'pending': request_id = job['id'] elif source == 'running': # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = job['request_ids'][-1] device_name = buildbot.get_device_or_unknown( job_name_info.get('name', ''), platform_info['vm'] ) new_job = { 'job_guid': common.generate_job_guid( request_id, job['buildername'] ), 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': job['buildername'], 'state': source, 'submit_timestamp': job['submitted_at'], 'build_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], 'vm': platform_info['vm'] }, # where are we going to get this data from? 'machine_platform': { 'os_name': platform_info['os'], 'platform': platform_info['os_platform'], 'architecture': platform_info['arch'], 'vm': platform_info['vm'] }, 'device_name': device_name, 'who': 'unknown', 'option_collection': { # build_type contains an option name, eg. PGO buildbot.extract_build_type(job['buildername']): True }, 'log_references': [], 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': job['buildername'], 'request_id': request_id } }, ] } if source == 'running': new_job['start_timestamp'] = job['start_time'] # We store the original values to help debugging. new_job['artifacts'].append( { 'type': 'json', 'name': 'buildapi_running', 'log_urls': [], 'blob': { 'revision': revision, 'request_ids': job['request_ids'], 'submitted_at': job['submitted_at'], 'start_time': job['start_time'], } } ) treeherder_data['job'] = new_job if project not in th_collections: th_collections[project] = TreeherderJobCollection( job_type='update' ) # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) if missing_resultsets and not filter_to_revision: common.fetch_missing_resultsets(source, missing_resultsets, logger) return th_collections
def transform(self, data, filter_to_project=None, filter_to_revision=None, filter_to_job_group=None): """ transform the builds4h structure into something we can ingest via our restful api """ revisions = defaultdict(list) missing_resultsets = defaultdict(set) projects = set(x.project for x in Datasource.objects.cached()) for build in data['builds']: prop = build['properties'] if 'buildername' not in prop: logger.warning("skipping builds-4hr job since no buildername found") continue if 'branch' not in prop: logger.warning("skipping builds-4hr job since no branch found: %s", prop['buildername']) continue if prop['branch'] not in projects: # Fuzzer jobs specify a branch of 'idle', and we intentionally don't display them. if prop['branch'] != 'idle': logger.warning("skipping builds-4hr job on unknown branch %s: %s", prop['branch'], prop['buildername']) continue if filter_to_project and prop['branch'] != filter_to_project: continue prop['revision'] = prop.get('revision', prop.get('got_revision', prop.get('sourcestamp', None))) if not prop['revision']: logger.warning("skipping builds-4hr job since no revision found: %s", prop['buildername']) continue prop['revision'] = prop['revision'][0:12] if prop['revision'] == prop.get('l10n_revision', None): # Some l10n jobs specify the l10n repo revision under 'revision', rather # than the gecko revision. If we did not skip these, it would result in # fetch_missing_resultsets requests that were guaranteed to 404. # This needs to be fixed upstream in builds-4hr by bug 1125433. logger.warning("skipping builds-4hr job since revision refers to wrong repo: %s", prop['buildername']) continue revisions[prop['branch']].append(prop['revision']) revisions_lookup = common.lookup_revisions(revisions) # Holds one collection per unique branch/project th_collections = {} for build in data['builds']: try: prop = build['properties'] project = prop['branch'] resultset = common.get_resultset(project, revisions_lookup, prop['revision'], missing_resultsets, logger) except KeyError: # skip this job, at least at this point continue if filter_to_revision and filter_to_revision != resultset['revision']: continue platform_info = buildbot.extract_platform_info(prop['buildername']) job_name_info = buildbot.extract_name_info(prop['buildername']) if (filter_to_job_group and job_name_info.get('group_symbol', '').lower() != filter_to_job_group.lower()): continue treeherder_data = { 'revision_hash': resultset['revision_hash'], 'resultset_id': resultset['id'], 'project': project, 'coalesced': [] } device_name = buildbot.get_device_or_unknown( job_name_info.get('name', ''), platform_info['vm'] ) log_reference = [] if 'log_url' in prop: log_reference.append({ 'url': prop['log_url'], 'name': 'buildbot_text' }) # add structured logs to the list of log references if 'blobber_files' in prop: blobber_files = json.loads(prop['blobber_files']) for bf, url in blobber_files.items(): if bf and url and bf.endswith('_raw.log'): log_reference.append({ 'url': url, 'name': 'mozlog_json' }) try: job_guid_data = self.find_job_guid(build) # request_ids is mandatory, but can be found in several places. request_ids = prop.get('request_ids', build['request_ids']) # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = request_ids[-1] except KeyError: continue treeherder_data['coalesced'] = job_guid_data['coalesced'] job = { 'job_guid': job_guid_data['job_guid'], 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': prop['buildername'], 'product_name': prop.get('product', ''), 'state': 'completed', 'result': buildbot.RESULT_DICT[build['result']], 'reason': build['reason'], # scheduler, if 'who' property is not present 'who': prop.get('who', prop.get('scheduler', '')), 'submit_timestamp': build['requesttime'], 'start_timestamp': build['starttime'], 'end_timestamp': build['endtime'], 'machine': prop.get('slavename', 'unknown'), # build_url not present in all builds 'build_url': prop.get('build_url', ''), # build_platform same as machine_platform 'build_platform': { # platform attributes sometimes parse without results 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, 'machine_platform': { 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, 'device_name': device_name, # pgo or non-pgo dependent on buildername parsing 'option_collection': { buildbot.extract_build_type(prop['buildername']): True }, 'log_references': log_reference, 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': build['properties']['buildername'], 'request_id': request_id } }, ] } treeherder_data['job'] = job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) if missing_resultsets and not filter_to_revision: common.fetch_missing_resultsets("builds4h", missing_resultsets, logger) return th_collections
def transform(self, data, project_filter=None, revision_filter=None, job_group_filter=None): """ transform the builds4h structure into something we can ingest via our restful api """ revisions = defaultdict(list) valid_projects = set(x.project for x in Datasource.objects.cached()) for build in data['builds']: try: prop = build['properties'] project = prop['branch'] if common.should_skip_project(project, valid_projects, project_filter): continue if common.should_skip_revision(prop['revision'], revision_filter): continue except KeyError as e: logger.warning( "skipping builds-4hr job %s since missing property: %s", build['id'], str(e)) continue revisions[project].append(prop['revision']) revisions_lookup = common.lookup_revisions(revisions) job_ids_seen_last_time = cache.get(CACHE_KEYS['complete'], set()) job_ids_seen_now = set() # Holds one collection per unique branch/project th_collections = {} for build in data['builds']: try: prop = build['properties'] project = prop['branch'] buildername = prop['buildername'] if common.should_skip_project(project, valid_projects, project_filter): continue if common.should_skip_revision(prop['revision'], revision_filter): continue except KeyError: continue try: resultset = revisions_lookup[project][prop['revision']] except KeyError: logger.warning( "skipping builds-4hr job %s since %s revision %s not yet ingested", build['id'], project, prop['revision']) continue # We record the id here rather than at the start of the loop, since we # must not count jobs whose revisions were not yet imported as processed, # or we'll never process them once we've ingested their associated revision. job_ids_seen_now.add(build['id']) # Don't process jobs that were already present in builds-4hr # the last time this task completed successfully. if build['id'] in job_ids_seen_last_time: continue platform_info = buildbot.extract_platform_info(buildername) job_name_info = buildbot.extract_name_info(buildername) if (job_group_filter and job_name_info.get( 'group_symbol', '').lower() != job_group_filter.lower()): continue treeherder_data = { 'revision': prop['revision'], 'resultset_id': resultset['id'], 'project': project, 'coalesced': [] } log_reference = [] if 'log_url' in prop: log_reference.append({ 'url': prop['log_url'], 'name': 'buildbot_text' }) # add structured logs to the list of log references if 'blobber_files' in prop: try: blobber_files = json.loads(prop['blobber_files']) for bf, url in blobber_files.items(): if bf and url and bf.endswith('_errorsummary.log'): log_reference.append({ 'url': url, 'name': 'errorsummary_json' }) except Exception as e: logger.warning( "invalid blobber_files json for build id %s (%s): %s", build['id'], buildername, e) try: job_guid_data = self.find_job_guid(build) # request_ids is mandatory, but can be found in several places. request_ids = prop.get('request_ids', build['request_ids']) # The last element in request_ids corresponds to the request id of this job, # the others are for the requests that were coalesced into this one. request_id = request_ids[-1] except KeyError: continue treeherder_data['coalesced'] = job_guid_data['coalesced'] job = { 'job_guid': job_guid_data['job_guid'], 'name': job_name_info.get('name', ''), 'job_symbol': job_name_info.get('job_symbol', ''), 'group_name': job_name_info.get('group_name', ''), 'group_symbol': job_name_info.get('group_symbol', ''), 'reference_data_name': buildername, 'product_name': prop.get('product', ''), 'state': 'completed', 'result': buildbot.RESULT_DICT[build['result']], 'reason': build['reason'], # scheduler, if 'who' property is not present 'who': prop.get('who', prop.get('scheduler', '')), 'submit_timestamp': build['requesttime'], 'start_timestamp': build['starttime'], 'end_timestamp': build['endtime'], 'machine': prop.get('slavename', 'unknown'), # build_platform same as machine_platform 'build_platform': { # platform attributes sometimes parse without results 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, 'machine_platform': { 'os_name': platform_info.get('os', ''), 'platform': platform_info.get('os_platform', ''), 'architecture': platform_info.get('arch', '') }, # pgo or non-pgo dependent on buildername parsing 'option_collection': { buildbot.extract_build_type(buildername): True }, 'log_references': log_reference, 'artifacts': [ { 'type': 'json', 'name': 'buildapi', 'log_urls': [], 'blob': { 'buildername': buildername, 'request_id': request_id } }, ] } treeherder_data['job'] = job if project not in th_collections: th_collections[project] = TreeherderJobCollection() # get treeherder job instance and add the job instance # to the collection instance th_job = th_collections[project].get_job(treeherder_data) th_collections[project].add(th_job) num_new_jobs = len(job_ids_seen_now.difference(job_ids_seen_last_time)) logger.info("Imported %d completed jobs, skipped %d previously seen", num_new_jobs, len(job_ids_seen_now) - num_new_jobs) return th_collections, job_ids_seen_now