def create(self, request, project): """ POST method implementation """ try: repository = Repository.objects.get(name=project) except Repository.DoesNotExist: return Response({ "detail": "No project with name {}".format(project) }, status=HTTP_404_NOT_FOUND) # check if any revisions are shorter than the expected 40 characters # The volume of resultsets is fairly low, so this loop won't be # onerous. for resultset in request.data: for revision in resultset['revisions']: try: if len(revision['revision']) < 40: raise ValueError("Revision < 40 characters") except ValueError: # The id of the submitter will be automatically included # in the params via the ``hawk_lookup`` call params = { "revision": revision["revision"] } newrelic.agent.record_exception(params=params) store_result_set_data(repository, request.data) return Response({"message": "well-formed JSON stored"})
def test_create_error_summary(failure_classifications, jobs_with_local_log, sample_resultset, test_repository): """ check that a bug suggestions artifact gets inserted when running a parse_log task for a failed job, and that the number of bug search terms/suggestions matches the number of error lines. """ store_result_set_data(test_repository, sample_resultset) jobs = jobs_with_local_log for job in jobs: job['job']['result'] = "testfailed" job['revision'] = sample_resultset[0]['revision'] store_job_data(test_repository, jobs) bug_suggestions = get_error_summary(Job.objects.get(id=1)) # we must have one bugs item per error in bug_suggestions. # errors with no bug suggestions will just have an empty # bugs list assert TextLogError.objects.count() == len(bug_suggestions) # We really need to add some tests that check the values of each entry # in bug_suggestions, but for now this is better than nothing. expected_keys = set(["search", "search_terms", "bugs"]) for failure_line in bug_suggestions: assert set(failure_line.keys()) == expected_keys
def test_resultset_list_single_long_revision_stored_long(webapp, sample_resultset, test_repository): """ test retrieving a resultset list with store long revision, filtered by a single long revision """ long_revision = "21fb3eed1b5f3456789012345678901234567890" # store a resultset with long revision resultset = copy.deepcopy(sample_resultset[0]) resultset["revisions"][0]["revision"] = long_revision store_result_set_data(test_repository, [resultset]) resp = webapp.get( reverse("resultset-list", kwargs={"project": test_repository.name}), {"revision": long_revision} ) assert resp.status_int == 200 results = resp.json['results'] meta = resp.json['meta'] assert len(results) == 1 assert set([rs["revision"] for rs in results]) == {sample_resultset[0]['revision']} assert(meta == { 'count': 1, 'revision': long_revision, 'filter_params': { 'revisions_long_revision': long_revision }, 'repository': test_repository.name} )
def create(self, request, project): """ POST method implementation """ try: repository = Repository.objects.get(name=project) except Repository.DoesNotExist: return Response( {"detail": "No project with name {}".format(project)}, status=HTTP_404_NOT_FOUND) # check if any revisions are shorter than the expected 40 characters # The volume of resultsets is fairly low, so this loop won't be # onerous. for resultset in request.data: for revision in resultset['revisions']: try: if len(revision['revision']) < 40: raise ValueError("Revision < 40 characters") except ValueError: # The id of the submitter will be automatically included # in the params via the ``hawk_lookup`` call params = {"revision": revision["revision"]} newrelic.agent.record_exception(params=params) store_result_set_data(repository, request.data) return Response({"message": "well-formed JSON stored"})
def test_resultset_create(test_repository, sample_resultset, mock_post_json): """ test posting data to the resultset endpoint via webtest. extected result are: - return code 200 - return message successful - 1 resultset stored in the jobs schema """ assert Push.objects.count() == 0 # store the first two, so we submit all, but should properly not re- # add the others. store_result_set_data(test_repository, sample_resultset[:2]) assert Push.objects.count() == 2 trsc = TreeherderResultSetCollection() exp_revision_hashes = set() for rs in sample_resultset: rs.update({'author': 'John Doe'}) result_set = trsc.get_resultset(rs) trsc.add(result_set) exp_revision_hashes.add(rs["revision"]) test_utils.post_collection(test_repository.name, trsc) assert Push.objects.count() == len(sample_resultset) assert set(Push.objects.values_list('revision', flat=True)) == set( [rs['revision'] for rs in sample_resultset])
def process(self, message_body, exchange): try: transformer = self.get_transformer_class(exchange)(message_body) try: repo = Repository.objects.get(url=transformer.repo_url, branch=transformer.branch, active_status="active") except ObjectDoesNotExist: repo_info = message_body.get("details", message_body["payload"]) newrelic.agent.record_custom_event("skip_unknown_repository", repo_info) logger.warn("Skipping unsupported repo: {} {}".format( transformer.repo_url, transformer.branch)) return transformed_data = transformer.transform(repo.name) logger.info("Storing resultset for {} {} {}".format( repo.name, transformer.repo_url, transformer.branch)) store_result_set_data(repo, [transformed_data]) except Exception as ex: newrelic.agent.record_exception(exc=ex) logger.exception("Error transforming resultset", exc_info=ex)
def process(self, message_body, exchange): try: transformer = self.get_transformer_class(exchange)(message_body) try: repo = Repository.objects.get(url=transformer.repo_url, branch=transformer.branch, active_status="active") except ObjectDoesNotExist: repo_info = message_body.get("details", message_body["payload"]) newrelic.agent.record_custom_event("skip_unknown_repository", repo_info) logger.warn("Skipping unsupported repo: {} {}".format( transformer.repo_url, transformer.branch)) return transformed_data = transformer.transform(repo.name) logger.info("Storing resultset for {} {} {}".format( repo.name, transformer.repo_url, transformer.branch)) store_result_set_data(repo, [transformed_data]) except Exception as ex: newrelic.agent.record_exception(exc=ex) logger.exception("Error transforming resultset", exc_info=ex)
def test_create_error_summary(failure_classifications, jobs_with_local_log, sample_resultset, test_repository): """ check that a bug suggestions artifact gets inserted when running a parse_log task for a failed job, and that the number of bug search terms/suggestions matches the number of error lines. """ store_result_set_data(test_repository, sample_resultset) jobs = jobs_with_local_log for job in jobs: job['job']['result'] = "testfailed" job['revision'] = sample_resultset[0]['revision'] store_job_data(test_repository, jobs) bug_suggestions = get_error_summary(Job.objects.get(id=1)) # we must have one bugs item per error in bug_suggestions. # errors with no bug suggestions will just have an empty # bugs list assert TextLogError.objects.count() == len(bug_suggestions) # We really need to add some tests that check the values of each entry # in bug_suggestions, but for now this is better than nothing. expected_keys = set(["search", "search_terms", "bugs"]) for failure_line in bug_suggestions: assert set(failure_line.keys()) == expected_keys
def push_with_three_jobs(sample_data, sample_resultset, test_repository): """ Stores a number of jobs in the same resultset. """ num_jobs = 3 resultset = sample_resultset[0] jobs = copy.deepcopy(sample_data.job_data[0:num_jobs]) # Only store data for the first resultset.... store_result_set_data(test_repository, [resultset]) blobs = [] for index, blob in enumerate(jobs): # Modify job structure to sync with the resultset sample data if 'sources' in blob: del blob['sources'] # Skip log references since they do not work correctly in pending state. if 'log_references' in blob['job']: del blob['job']['log_references'] blob['revision'] = resultset['revision'] blob['job']['state'] = 'pending' blobs.append(blob) # Store and process the jobs so they are present in the tables. store_job_data(test_repository, blobs) return Push.objects.get(repository=test_repository, revision=resultset['revision'])
def test_ingest_running_to_retry_to_success_sample_job(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser, ingestion_cycles): # verifies that retries to success work, no matter how jobs are batched store_result_set_data(test_repository, sample_resultset) job_datum = copy.deepcopy(sample_data.job_data[0]) job_datum['revision'] = sample_resultset[0]['revision'] job = job_datum['job'] job_guid_root = job['job_guid'] job_data = [] for (state, result, job_guid) in [ ('running', 'unknown', job_guid_root), ('completed', 'retry', job_guid_root + "_" + str(job['end_timestamp'])[-5:]), ('completed', 'success', job_guid_root)]: new_job_datum = copy.deepcopy(job_datum) new_job_datum['job']['state'] = state new_job_datum['job']['result'] = result new_job_datum['job']['job_guid'] = job_guid job_data.append(new_job_datum) for (i, j) in ingestion_cycles: store_job_data(test_repository, job_data[i:j]) assert Job.objects.count() == 2 assert Job.objects.get(id=1).result == 'retry' assert Job.objects.get(id=2).result == 'success' assert JobLog.objects.count() == 2
def eleven_job_blobs(sample_data, sample_resultset, test_repository, mock_log_parser): store_result_set_data(test_repository, sample_resultset) num_jobs = 11 jobs = sample_data.job_data[0:num_jobs] max_index = len(sample_resultset) - 1 resultset_index = 0 blobs = [] for index, blob in enumerate(jobs): if resultset_index > max_index: resultset_index = 0 # Modify job structure to sync with the resultset sample data if 'sources' in blob: del blob['sources'] blob['revision'] = sample_resultset[resultset_index]['revision'] blobs.append(blob) resultset_index += 1 return blobs
def eleven_job_blobs(sample_data, sample_resultset, test_repository, mock_log_parser): store_result_set_data(test_repository, sample_resultset) num_jobs = 11 jobs = sample_data.job_data[0:num_jobs] max_index = len(sample_resultset) - 1 resultset_index = 0 blobs = [] for index, blob in enumerate(jobs): if resultset_index > max_index: resultset_index = 0 # Modify job structure to sync with the resultset sample data if 'sources' in blob: del blob['sources'] blob['revision'] = sample_resultset[resultset_index]['revision'] blobs.append(blob) resultset_index += 1 return blobs
def process(self, message_body, exchange): transformer = self.get_transformer_class(exchange)(message_body) try: newrelic.agent.add_custom_parameter("url", transformer.repo_url) newrelic.agent.add_custom_parameter("branch", transformer.branch) repo = Repository.objects.get(url=transformer.repo_url, branch=transformer.branch, active_status="active") newrelic.agent.add_custom_parameter("repository", repo.name) except ObjectDoesNotExist: repo_info = transformer.get_info() repo_info.update({ "url": transformer.repo_url, "branch": transformer.branch, }) newrelic.agent.record_custom_event("skip_unknown_repository", repo_info) logger.warn("Skipping unsupported repo: {} {}".format( transformer.repo_url, transformer.branch)) return transformed_data = transformer.transform(repo.name) logger.info("Storing resultset for {} {} {}".format( repo.name, transformer.repo_url, transformer.branch)) store_result_set_data(repo, [transformed_data])
def push_with_three_jobs(sample_data, sample_resultset, test_repository): """ Stores a number of jobs in the same resultset. """ num_jobs = 3 resultset = sample_resultset[0] jobs = copy.deepcopy(sample_data.job_data[0:num_jobs]) # Only store data for the first resultset.... store_result_set_data(test_repository, [resultset]) blobs = [] for index, blob in enumerate(jobs): # Modify job structure to sync with the resultset sample data if 'sources' in blob: del blob['sources'] # Skip log references since they do not work correctly in pending state. if 'log_references' in blob['job']: del blob['job']['log_references'] blob['revision'] = resultset['revision'] blob['job']['state'] = 'pending' blobs.append(blob) # Store and process the jobs so they are present in the tables. store_job_data(test_repository, blobs) return Push.objects.get(repository=test_repository, revision=resultset['revision'])
def test_resultset_create(test_repository, sample_resultset, mock_post_json): """ test posting data to the resultset endpoint via webtest. extected result are: - return code 200 - return message successful - 1 resultset stored in the jobs schema """ assert Push.objects.count() == 0 # store the first two, so we submit all, but should properly not re- # add the others. store_result_set_data(test_repository, sample_resultset[:2]) assert Push.objects.count() == 2 trsc = TreeherderResultSetCollection() exp_revision_hashes = set() for rs in sample_resultset: rs.update({'author': 'John Doe'}) result_set = trsc.get_resultset(rs) trsc.add(result_set) exp_revision_hashes.add(rs["revision"]) test_utils.post_collection(test_repository.name, trsc) assert Push.objects.count() == len(sample_resultset) assert set(Push.objects.values_list('revision', flat=True)) == set( [rs['revision'] for rs in sample_resultset])
def test_resultset_list_single_long_revision_stored_long( webapp, sample_resultset, test_repository): """ test retrieving a resultset list with store long revision, filtered by a single long revision """ long_revision = "21fb3eed1b5f3456789012345678901234567890" # store a resultset with long revision resultset = copy.deepcopy(sample_resultset[0]) resultset["revisions"][0]["revision"] = long_revision store_result_set_data(test_repository, [resultset]) resp = webapp.get( reverse("resultset-list", kwargs={"project": test_repository.name}), {"revision": long_revision}) assert resp.status_int == 200 results = resp.json['results'] meta = resp.json['meta'] assert len(results) == 1 assert set([rs["revision"] for rs in results]) == {sample_resultset[0]['revision']} assert (meta == { 'count': 1, 'revision': long_revision, 'filter_params': { 'revisions_long_revision': long_revision }, 'repository': test_repository.name })
def test_ingest_running_to_retry_to_success_sample_job( test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser, ingestion_cycles): # verifies that retries to success work, no matter how jobs are batched store_result_set_data(test_repository, sample_resultset) job_datum = copy.deepcopy(sample_data.job_data[0]) job_datum['revision'] = sample_resultset[0]['revision'] job = job_datum['job'] job_guid_root = job['job_guid'] job_data = [] for (state, result, job_guid) in [('running', 'unknown', job_guid_root), ('completed', 'retry', job_guid_root + "_" + str(job['end_timestamp'])[-5:]), ('completed', 'success', job_guid_root)]: new_job_datum = copy.deepcopy(job_datum) new_job_datum['job']['state'] = state new_job_datum['job']['result'] = result new_job_datum['job']['job_guid'] = job_guid job_data.append(new_job_datum) for (i, j) in ingestion_cycles: store_job_data(test_repository, job_data[i:j]) assert Job.objects.count() == 2 assert Job.objects.get(id=1).result == 'retry' assert Job.objects.get(id=2).result == 'success' assert JobLog.objects.count() == 2
def test_bad_date_value_ingestion(test_repository, failure_classifications, sample_resultset, mock_log_parser): """ Test ingesting a job blob with bad date value """ blob = job_data(start_timestamp="foo", revision=sample_resultset[0]['revision']) store_result_set_data(test_repository, sample_resultset[:1]) store_job_data(test_repository, [blob])
def test_bad_date_value_ingestion(test_repository, failure_classifications, sample_resultset, mock_log_parser): """ Test ingesting a job blob with bad date value """ blob = job_data(start_timestamp="foo", revision=sample_resultset[0]['revision']) store_result_set_data(test_repository, sample_resultset[:1]) store_job_data(test_repository, [blob])
def test_resultset_list_empty_rs_still_show(webapp, sample_resultset, test_repository): """ test retrieving a resultset list, when the resultset has no jobs. should show. """ store_result_set_data(test_repository, sample_resultset) resp = webapp.get( reverse("resultset-list", kwargs={"project": test_repository.name}), ) assert resp.status_int == 200 assert len(resp.json['results']) == 10
def test_resultset_list_empty_rs_still_show(webapp, sample_resultset, test_repository): """ test retrieving a resultset list, when the resultset has no jobs. should show. """ store_result_set_data(test_repository, sample_resultset) resp = webapp.get( reverse("resultset-list", kwargs={"project": test_repository.name}), ) assert resp.status_int == 200 assert len(resp.json['results']) == 10
def test_parse_log(test_repository, failure_classifications, jobs_with_local_log, sample_resultset): """ check that 2 job_artifacts get inserted when running a parse_log task for a successful job and that JobDetail objects get created """ store_result_set_data(test_repository, sample_resultset) jobs = jobs_with_local_log for job in jobs: # make this a successful job, to check it's still parsed for errors job['job']['result'] = "success" job['revision'] = sample_resultset[0]['revision'] store_job_data(test_repository, jobs) # this log generates 4 job detail objects at present print JobDetail.objects.count() == 4
def test_ingest_running_to_retry_sample_job(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser, same_ingestion_cycle): """Process a single job structure in the job_data.txt file""" store_result_set_data(test_repository, sample_resultset) job_data = copy.deepcopy(sample_data.job_data[:1]) job = job_data[0]['job'] job_data[0]['revision'] = sample_resultset[0]['revision'] job['state'] = 'running' job['result'] = 'unknown' def _simulate_retry_job(job): job['state'] = 'completed' job['result'] = 'retry' # convert the job_guid to what it would be on a retry job['job_guid'] = job['job_guid'] + "_" + str( job['end_timestamp'])[-5:] return job if same_ingestion_cycle: # now we simulate the complete version of the job coming in (on the # same push) new_job_datum = copy.deepcopy(job_data[0]) new_job_datum['job'] = _simulate_retry_job(new_job_datum['job']) job_data.append(new_job_datum) store_job_data(test_repository, job_data) else: # store the job in the initial state store_job_data(test_repository, job_data) # now we simulate the complete version of the job coming in and # ingest a second time job = _simulate_retry_job(job) store_job_data(test_repository, job_data) assert Job.objects.count() == 1 job = Job.objects.get(id=1) assert job.result == 'retry' # guid should be the retry one assert job.guid == job_data[-1]['job']['job_guid']
def test_ingest_running_to_retry_sample_job(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser, same_ingestion_cycle): """Process a single job structure in the job_data.txt file""" store_result_set_data(test_repository, sample_resultset) job_data = copy.deepcopy(sample_data.job_data[:1]) job = job_data[0]['job'] job_data[0]['revision'] = sample_resultset[0]['revision'] job['state'] = 'running' job['result'] = 'unknown' def _simulate_retry_job(job): job['state'] = 'completed' job['result'] = 'retry' # convert the job_guid to what it would be on a retry job['job_guid'] = job['job_guid'] + "_" + str(job['end_timestamp'])[-5:] return job if same_ingestion_cycle: # now we simulate the complete version of the job coming in (on the # same push) new_job_datum = copy.deepcopy(job_data[0]) new_job_datum['job'] = _simulate_retry_job(new_job_datum['job']) job_data.append(new_job_datum) store_job_data(test_repository, job_data) else: # store the job in the initial state store_job_data(test_repository, job_data) # now we simulate the complete version of the job coming in and # ingest a second time job = _simulate_retry_job(job) store_job_data(test_repository, job_data) assert Job.objects.count() == 1 job = Job.objects.get(id=1) assert job.result == 'retry' # guid should be the retry one assert job.guid == job_data[-1]['job']['job_guid']
def test_parse_log(test_repository, failure_classifications, jobs_with_local_log, sample_resultset): """ check that 2 job_artifacts get inserted when running a parse_log task for a successful job and that JobDetail objects get created """ store_result_set_data(test_repository, sample_resultset) jobs = jobs_with_local_log for job in jobs: # make this a successful job, to check it's still parsed for errors job['job']['result'] = "success" job['revision'] = sample_resultset[0]['revision'] store_job_data(test_repository, jobs) # this log generates 4 job detail objects at present print JobDetail.objects.count() == 4
def test_ingest_job_revision_hash_blank_revision(test_repository, failure_classifications, sample_data, mock_log_parser, sample_resultset): # Given a resultset with a revision_hash value that is NOT the # top revision SHA, ingest a job with a different revision_hash, but a # matching revision SHA. Ensure the job still goes to the right resultset. rs_revision_hash = "12345abc" resultset = sample_resultset[0].copy() resultset["revision_hash"] = rs_revision_hash store_result_set_data(test_repository, [resultset]) first_job = sample_data.job_data[0] first_job["revision_hash"] = rs_revision_hash first_job["revision"] = "" store_job_data(test_repository, [first_job]) assert Job.objects.count() == 1 assert Job.objects.get(id=1).push_id == Push.objects.values_list( 'id', flat=True).get(revision_hash=rs_revision_hash)
def test_ingest_job_revision_hash_blank_revision(test_repository, failure_classifications, sample_data, mock_log_parser, sample_resultset): # Given a resultset with a revision_hash value that is NOT the # top revision SHA, ingest a job with a different revision_hash, but a # matching revision SHA. Ensure the job still goes to the right resultset. rs_revision_hash = "12345abc" resultset = sample_resultset[0].copy() resultset["revision_hash"] = rs_revision_hash store_result_set_data(test_repository, [resultset]) first_job = sample_data.job_data[0] first_job["revision_hash"] = rs_revision_hash first_job["revision"] = "" store_job_data(test_repository, [first_job]) assert Job.objects.count() == 1 assert Job.objects.get(id=1).push_id == Push.objects.values_list( 'id', flat=True).get(revision_hash=rs_revision_hash)
def test_resultset_list_filter_by_date(webapp, test_repository, sample_resultset): """ test retrieving a resultset list, filtered by a date range """ for (i, datestr) in zip( [3, 4, 5, 6, 7], ["2013-08-09", "2013-08-10", "2013-08-11", "2013-08-12", "2013-08-13" ]): sample_resultset[i]['push_timestamp'] = utils.to_timestamp( utils.to_datetime(datestr)) store_result_set_data(test_repository, sample_resultset) resp = webapp.get( reverse("resultset-list", kwargs={"project": test_repository.name}), { "startdate": "2013-08-10", "enddate": "2013-08-13" }) assert resp.status_int == 200 results = resp.json['results'] meta = resp.json['meta'] assert len(results) == 4 assert set([rs["revision"] for rs in results]) == { u'ce17cad5d554cfffddee13d1d8421ae9ec5aad82', u'7f417c3505e3d2599ac9540f02e3dbee307a3963', u'a69390334818373e2d7e6e9c8d626a328ed37d47', u'f361dcb60bbedaa01257fbca211452972f7a74b2' } assert (meta == { u'count': 4, u'enddate': u'2013-08-13', u'filter_params': { u'push_timestamp__gte': 1376092800.0, u'push_timestamp__lt': 1376438400.0 }, u'repository': test_repository.name, u'startdate': u'2013-08-10' })
def test_resultset_list_without_jobs(webapp, test_repository, sample_resultset): """ test retrieving a resultset list without jobs """ store_result_set_data(test_repository, sample_resultset) resp = webapp.get( reverse("resultset-list", kwargs={"project": test_repository.name})) assert resp.status_int == 200 results = resp.json['results'] assert len(results) == 10 assert all([('platforms' not in result) for result in results]) meta = resp.json['meta'] assert meta == { u'count': len(results), u'filter_params': {}, u'repository': test_repository.name }
def test_ingest_running_to_retry_to_success_sample_job_multiple_retries( test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser, ingestion_cycles): # this verifies that if we ingest multiple retries: # (1) nothing errors out # (2) we end up with three jobs (the original + 2 retry jobs) store_result_set_data(test_repository, sample_resultset) job_datum = copy.deepcopy(sample_data.job_data[0]) job_datum['revision'] = sample_resultset[0]['revision'] job = job_datum['job'] job_guid_root = job['job_guid'] job_data = [] for (state, result, job_guid) in [ ('running', 'unknown', job_guid_root), ('completed', 'retry', job_guid_root + "_" + str(job['end_timestamp'])[-5:]), ('completed', 'retry', job_guid_root + "_12345"), ('completed', 'success', job_guid_root)]: new_job_datum = copy.deepcopy(job_datum) new_job_datum['job']['state'] = state new_job_datum['job']['result'] = result new_job_datum['job']['job_guid'] = job_guid job_data.append(new_job_datum) for (i, j) in ingestion_cycles: ins = job_data[i:j] store_job_data(test_repository, ins) assert Job.objects.count() == 3 assert Job.objects.get(id=1).result == 'retry' assert Job.objects.get(id=2).result == 'retry' assert Job.objects.get(id=3).result == 'success' assert JobLog.objects.count() == 3
def test_ingest_job_with_revision_hash(test_repository, failure_classifications, sample_data, mock_log_parser, sample_resultset): """ Test ingesting a job with only a revision hash, no revision. And the revision_hash must NOT be the same SHA value as the top revision. This can happen if a user submits a new resultset in the API with their own revision_hash value. If we just use the latest revision value, then their subsequent job submissions with the revision_hash they generated will fail and the jobs will be skipped. """ revision_hash = "12345abc" resultset = sample_resultset[0].copy() resultset["revision_hash"] = revision_hash store_result_set_data(test_repository, [resultset]) first_job = sample_data.job_data[0] first_job["revision_hash"] = revision_hash del first_job["revision"] store_job_data(test_repository, [first_job]) assert Job.objects.count() == 1
def test_resultset_list_without_jobs(webapp, test_repository, sample_resultset): """ test retrieving a resultset list without jobs """ store_result_set_data(test_repository, sample_resultset) resp = webapp.get( reverse("resultset-list", kwargs={"project": test_repository.name}) ) assert resp.status_int == 200 results = resp.json['results'] assert len(results) == 10 assert all([('platforms' not in result) for result in results]) meta = resp.json['meta'] assert meta == { u'count': len(results), u'filter_params': {}, u'repository': test_repository.name }
def test_ingest_job_with_revision_hash(test_repository, failure_classifications, sample_data, mock_log_parser, sample_resultset): """ Test ingesting a job with only a revision hash, no revision. And the revision_hash must NOT be the same SHA value as the top revision. This can happen if a user submits a new resultset in the API with their own revision_hash value. If we just use the latest revision value, then their subsequent job submissions with the revision_hash they generated will fail and the jobs will be skipped. """ revision_hash = "12345abc" resultset = sample_resultset[0].copy() resultset["revision_hash"] = revision_hash store_result_set_data(test_repository, [resultset]) first_job = sample_data.job_data[0] first_job["revision_hash"] = revision_hash del first_job["revision"] store_job_data(test_repository, [first_job]) assert Job.objects.count() == 1
def test_resultset_list_filter_by_date(webapp, test_repository, sample_resultset): """ test retrieving a resultset list, filtered by a date range """ for (i, datestr) in zip([3, 4, 5, 6, 7], ["2013-08-09", "2013-08-10", "2013-08-11", "2013-08-12", "2013-08-13"]): sample_resultset[i]['push_timestamp'] = utils.to_timestamp( utils.to_datetime(datestr)) store_result_set_data(test_repository, sample_resultset) resp = webapp.get( reverse("resultset-list", kwargs={"project": test_repository.name}), {"startdate": "2013-08-10", "enddate": "2013-08-13"} ) assert resp.status_int == 200 results = resp.json['results'] meta = resp.json['meta'] assert len(results) == 4 assert set([rs["revision"] for rs in results]) == { u'ce17cad5d554cfffddee13d1d8421ae9ec5aad82', u'7f417c3505e3d2599ac9540f02e3dbee307a3963', u'a69390334818373e2d7e6e9c8d626a328ed37d47', u'f361dcb60bbedaa01257fbca211452972f7a74b2' } assert(meta == { u'count': 4, u'enddate': u'2013-08-13', u'filter_params': { u'push_timestamp__gte': 1376092800.0, u'push_timestamp__lt': 1376438400.0 }, u'repository': test_repository.name, u'startdate': u'2013-08-10'} )
def test_ingest_running_to_retry_to_success_sample_job_multiple_retries( test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser, ingestion_cycles): # this verifies that if we ingest multiple retries: # (1) nothing errors out # (2) we end up with three jobs (the original + 2 retry jobs) store_result_set_data(test_repository, sample_resultset) job_datum = copy.deepcopy(sample_data.job_data[0]) job_datum['revision'] = sample_resultset[0]['revision'] job = job_datum['job'] job_guid_root = job['job_guid'] job_data = [] for (state, result, job_guid) in [('running', 'unknown', job_guid_root), ('completed', 'retry', job_guid_root + "_" + str(job['end_timestamp'])[-5:]), ('completed', 'retry', job_guid_root + "_12345"), ('completed', 'success', job_guid_root)]: new_job_datum = copy.deepcopy(job_datum) new_job_datum['job']['state'] = state new_job_datum['job']['result'] = result new_job_datum['job']['job_guid'] = job_guid job_data.append(new_job_datum) for (i, j) in ingestion_cycles: ins = job_data[i:j] store_job_data(test_repository, ins) assert Job.objects.count() == 3 assert Job.objects.get(id=1).result == 'retry' assert Job.objects.get(id=2).result == 'retry' assert Job.objects.get(id=3).result == 'success' assert JobLog.objects.count() == 3
def test_ingest_retry_sample_job_no_running(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser): """Process a single job structure in the job_data.txt file""" job_data = copy.deepcopy(sample_data.job_data[:1]) job = job_data[0]['job'] job_data[0]['revision'] = sample_resultset[0]['revision'] store_result_set_data(test_repository, sample_resultset) # complete version of the job coming in job['state'] = 'completed' job['result'] = 'retry' # convert the job_guid to what it would be on a retry retry_guid = job['job_guid'] + "_" + str(job['end_timestamp'])[-5:] job['job_guid'] = retry_guid store_job_data(test_repository, job_data) assert Job.objects.count() == 1 job = Job.objects.get(id=1) assert job.result == 'retry' assert job.guid == retry_guid
def test_ingest_retry_sample_job_no_running(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser): """Process a single job structure in the job_data.txt file""" job_data = copy.deepcopy(sample_data.job_data[:1]) job = job_data[0]['job'] job_data[0]['revision'] = sample_resultset[0]['revision'] store_result_set_data(test_repository, sample_resultset) # complete version of the job coming in job['state'] = 'completed' job['result'] = 'retry' # convert the job_guid to what it would be on a retry retry_guid = job['job_guid'] + "_" + str(job['end_timestamp'])[-5:] job['job_guid'] = retry_guid store_job_data(test_repository, job_data) assert Job.objects.count() == 1 job = Job.objects.get(id=1) assert job.result == 'retry' assert job.guid == retry_guid
def do_job_ingestion(test_repository, job_data, sample_resultset, verify_data=True): """ Ingest ``job_data`` which will be JSON job blobs. ``verify_data`` - whether or not to run the ingested jobs through the verifier. """ store_result_set_data(test_repository, sample_resultset) max_index = len(sample_resultset) - 1 resultset_index = 0 # Structures to test if we stored everything build_platforms_ref = set() machine_platforms_ref = set() machines_ref = set() options_ref = set() job_types_ref = set() products_ref = set() result_sets_ref = set() log_urls_ref = set() coalesced_job_guids = {} artifacts_ref = {} blobs = [] for index, blob in enumerate(job_data): if resultset_index > max_index: resultset_index = 0 # Modify job structure to sync with the resultset sample data if 'sources' in blob: del blob['sources'] blob['revision'] = sample_resultset[resultset_index]['revision'] blobs.append(blob) resultset_index += 1 # Build data structures to confirm everything is stored # as expected if verify_data: job_guid = blob['job']['job_guid'] job = blob['job'] build_platforms_ref.add( "-".join([ job.get('build_platform', {}).get('os_name', 'unknown'), job.get('build_platform', {}).get('platform', 'unknown'), job.get('build_platform', {}).get('architecture', 'unknown') ])) machine_platforms_ref.add( "-".join([ job.get('machine_platform', {}).get('os_name', 'unknown'), job.get('machine_platform', {}).get('platform', 'unknown'), job.get('machine_platform', {}).get('architecture', 'unknown') ])) machines_ref.add(job.get('machine', 'unknown')) options_ref = options_ref.union(job.get('option_collection', []).keys()) job_types_ref.add(job.get('name', 'unknown')) products_ref.add(job.get('product_name', 'unknown')) result_sets_ref.add(blob['revision']) log_url_list = job.get('log_references', []) for log_data in log_url_list: log_urls_ref.add(log_data['url']) artifact_name = job.get('artifact', {}).get('name') if artifact_name: artifacts_ref[artifact_name] = job.get('artifact') coalesced = blob.get('coalesced', []) if coalesced: coalesced_job_guids[job_guid] = coalesced # Store the modified json blobs store_job_data(test_repository, blobs) if verify_data: # Confirms stored data matches whats in the reference data structs verify_build_platforms(build_platforms_ref) verify_machine_platforms(machine_platforms_ref) verify_machines(machines_ref) verify_options(options_ref) verify_job_types(job_types_ref) verify_products(products_ref) verify_result_sets(test_repository, result_sets_ref) verify_log_urls(test_repository, log_urls_ref) verify_coalesced(coalesced_job_guids)
def result_set_stored(test_repository, sample_resultset): store_result_set_data(test_repository, sample_resultset) return sample_resultset
def run(self, source_url, repository_name, changeset=None, last_push_id=None): print repository_name if not last_push_id: # get the last object seen from cache. this will # reduce the number of pushes processed every time last_push_id = cache.get("{0}:last_push_id".format(repository_name)) if not changeset and last_push_id: startid_url = "{}&startID={}".format(source_url, last_push_id) logger.info("Extracted last push for '%s', '%s', from cache, " "attempting to get changes only from that point at: %s" % (repository_name, last_push_id, startid_url)) # Use the cached ``last_push_id`` value (saved from the last time # this API was called) for this repo. Use that value as the # ``startID`` to get all new pushes from that point forward. extracted_content = self.extract(startid_url) if extracted_content['lastpushid'] < last_push_id: # Push IDs from Mercurial are incremental. If we cached a value # from one call to this API, and a subsequent call told us that # the ``lastpushid`` is LOWER than the one we have cached, then # the Mercurial IDs were reset. # In this circumstance, we can't rely on the cached id, so must # throw it out and get the latest 10 pushes. logger.warning(("Got a ``lastpushid`` value of {} lower than " "the cached value of {} due to Mercurial repo reset. " "Getting latest changes for '{}' instead").format( extracted_content['lastpushid'], last_push_id, repository_name ) ) cache.delete("{0}:last_push_id".format(repository_name)) extracted_content = self.extract(source_url) else: if changeset: logger.info("Getting all pushes for '%s' corresponding to " "changeset '%s'" % (repository_name, changeset)) extracted_content = self.extract(source_url + "&changeset=" + changeset) else: logger.warning("Unable to get last push from cache for '%s', " "getting all pushes" % repository_name) extracted_content = self.extract(source_url) # ``pushes`` could be empty if there are no new ones since we last # fetched pushes = extracted_content['pushes'] if not pushes: return None last_push_id = max(map(lambda x: int(x), pushes.keys())) last_push = pushes[str(last_push_id)] top_revision = last_push["changesets"][-1]["node"] # TODO: further remove the use of client types here transformed = self.transform(pushes, repository_name) errors = [] repository = Repository.objects.get(name=repository_name) for collection in transformed[repository_name].get_chunks(chunk_size=1): try: collection.validate() store_result_set_data(repository, collection.get_collection_data()) except Exception: newrelic.agent.record_exception() errors.append({ "project": repository, "collection": "result_set", "message": traceback.format_exc() }) if errors: raise CollectionNotStoredException(errors) if not changeset: # only cache the last push if we're not fetching a specific # changeset cache.set("{0}:last_push_id".format(repository_name), last_push_id) return top_revision
def result_set_stored(test_repository, sample_resultset): store_result_set_data(test_repository, sample_resultset) return sample_resultset
def retry_mock(exc=None, countdown=None): assert isinstance(exc, MissingPushException) thread_data.retries += 1 store_result_set_data(test_repository, [rs]) return orig_retry(exc=exc, countdown=countdown)
def run(self, source_url, repository_name, changeset=None, last_push_id=None): print repository_name if not last_push_id: # get the last object seen from cache. this will # reduce the number of pushes processed every time last_push_id = cache.get( "{0}:last_push_id".format(repository_name)) if not changeset and last_push_id: startid_url = "{}&startID={}".format(source_url, last_push_id) logger.info( "Extracted last push for '%s', '%s', from cache, " "attempting to get changes only from that point at: %s" % (repository_name, last_push_id, startid_url)) # Use the cached ``last_push_id`` value (saved from the last time # this API was called) for this repo. Use that value as the # ``startID`` to get all new pushes from that point forward. extracted_content = self.extract(startid_url) if extracted_content['lastpushid'] < last_push_id: # Push IDs from Mercurial are incremental. If we cached a value # from one call to this API, and a subsequent call told us that # the ``lastpushid`` is LOWER than the one we have cached, then # the Mercurial IDs were reset. # In this circumstance, we can't rely on the cached id, so must # throw it out and get the latest 10 pushes. logger.warning( ("Got a ``lastpushid`` value of {} lower than " "the cached value of {} due to Mercurial repo reset. " "Getting latest changes for '{}' instead").format( extracted_content['lastpushid'], last_push_id, repository_name)) cache.delete("{0}:last_push_id".format(repository_name)) extracted_content = self.extract(source_url) else: if changeset: logger.info("Getting all pushes for '%s' corresponding to " "changeset '%s'" % (repository_name, changeset)) extracted_content = self.extract(source_url + "&changeset=" + changeset) else: logger.warning("Unable to get last push from cache for '%s', " "getting all pushes" % repository_name) extracted_content = self.extract(source_url) # ``pushes`` could be empty if there are no new ones since we last # fetched pushes = extracted_content['pushes'] if not pushes: return None last_push_id = max(map(lambda x: int(x), pushes.keys())) last_push = pushes[str(last_push_id)] top_revision = last_push["changesets"][-1]["node"] # TODO: further remove the use of client types here transformed = self.transform(pushes, repository_name) errors = [] repository = Repository.objects.get(name=repository_name) for collection in transformed[repository_name].get_chunks( chunk_size=1): try: collection.validate() store_result_set_data(repository, collection.get_collection_data()) except Exception: newrelic.agent.record_exception() errors.append({ "project": repository, "collection": "result_set", "message": traceback.format_exc() }) if errors: raise CollectionNotStoredException(errors) if not changeset: # only cache the last push if we're not fetching a specific # changeset cache.set("{0}:last_push_id".format(repository_name), last_push_id) return top_revision
def do_job_ingestion(test_repository, job_data, sample_resultset, verify_data=True): """ Ingest ``job_data`` which will be JSON job blobs. ``verify_data`` - whether or not to run the ingested jobs through the verifier. """ store_result_set_data(test_repository, sample_resultset) max_index = len(sample_resultset) - 1 resultset_index = 0 # Structures to test if we stored everything build_platforms_ref = set() machine_platforms_ref = set() machines_ref = set() options_ref = set() job_types_ref = set() products_ref = set() result_sets_ref = set() log_urls_ref = set() coalesced_job_guids = {} artifacts_ref = {} blobs = [] for index, blob in enumerate(job_data): if resultset_index > max_index: resultset_index = 0 # Modify job structure to sync with the resultset sample data if 'sources' in blob: del blob['sources'] blob['revision'] = sample_resultset[resultset_index]['revision'] blobs.append(blob) resultset_index += 1 # Build data structures to confirm everything is stored # as expected if verify_data: job_guid = blob['job']['job_guid'] job = blob['job'] build_platforms_ref.add("-".join([ job.get('build_platform', {}).get('os_name', 'unknown'), job.get('build_platform', {}).get('platform', 'unknown'), job.get('build_platform', {}).get('architecture', 'unknown') ])) machine_platforms_ref.add("-".join([ job.get('machine_platform', {}).get('os_name', 'unknown'), job.get('machine_platform', {}).get('platform', 'unknown'), job.get('machine_platform', {}).get('architecture', 'unknown') ])) machines_ref.add(job.get('machine', 'unknown')) options_ref = options_ref.union( job.get('option_collection', []).keys()) job_types_ref.add(job.get('name', 'unknown')) products_ref.add(job.get('product_name', 'unknown')) result_sets_ref.add(blob['revision']) log_url_list = job.get('log_references', []) for log_data in log_url_list: log_urls_ref.add(log_data['url']) artifact_name = job.get('artifact', {}).get('name') if artifact_name: artifacts_ref[artifact_name] = job.get('artifact') coalesced = blob.get('coalesced', []) if coalesced: coalesced_job_guids[job_guid] = coalesced # Store the modified json blobs store_job_data(test_repository, blobs) if verify_data: # Confirms stored data matches whats in the reference data structs verify_build_platforms(build_platforms_ref) verify_machine_platforms(machine_platforms_ref) verify_machines(machines_ref) verify_options(options_ref) verify_job_types(job_types_ref) verify_products(products_ref) verify_result_sets(test_repository, result_sets_ref) verify_log_urls(test_repository, log_urls_ref) verify_coalesced(coalesced_job_guids)