def test_ingest_pulse_jobs(pulse_jobs, test_repository, push_stored, failure_classifications, mock_log_parser): """ Ingest a job through the JSON Schema validated JobLoader used by Pulse """ jl = JobLoader() revision = push_stored[0]["revision"] for job in pulse_jobs: job["origin"]["revision"] = revision jl.process_job(job) jobs = Job.objects.all() assert len(jobs) == 5 assert [job.taskcluster_metadata for job in jobs] assert set(TaskclusterMetadata.objects.values_list( 'task_id', flat=True)) == set(['IYyscnNMTLuxzna7PNqUJQ', 'XJCbbRQ6Sp-UL1lL-tw5ng', 'ZsSzJQu3Q7q2MfehIBAzKQ', 'bIzVZt9jQQKgvQYD3a2HQw']) job_logs = JobLog.objects.filter(job_id=1) assert job_logs.count() == 2 logs_expected = [{"name": "builds-4h", "url": "http://ftp.mozilla.org/pub/mozilla.org/spidermonkey/tinderbox-builds/mozilla-inbound-linux64/mozilla-inbound_linux64_spidermonkey-warnaserr-bm57-build1-build352.txt.gz", "parse_status": 0}, {"name": "errorsummary_json", "url": "http://mozilla-releng-blobs.s3.amazonaws.com/blobs/Mozilla-Inbound-Non-PGO/sha512/05c7f57df6583c6351c6b49e439e2678e0f43c2e5b66695ea7d096a7519e1805f441448b5ffd4cc3b80b8b2c74b244288fda644f55ed0e226ef4e25ba02ca466", "parse_status": 0}] assert [{"name": item.name, "url": item.url, "parse_status": item.status} for item in job_logs.all()] == logs_expected assert JobDetail.objects.count() == 2
def test_job_transformation(pulse_jobs, transformed_pulse_jobs): import json jl = JobLoader() for idx, pulse_job in enumerate(pulse_jobs): assert jl._is_valid_job(pulse_job) assert transformed_pulse_jobs[idx] == json.loads( json.dumps(jl.transform(pulse_job)))
def test_skip_unscheduled(first_job, jm, mock_log_parser): jl = JobLoader() first_job["state"] = "unscheduled" jl.process_job_list([first_job]) jobs = jm.get_job_list(0, 10) assert len(jobs) == 0
def test_skip_unscheduled(first_job, jm): jl = JobLoader() first_job["state"] = "unscheduled" jl.process_job_list([first_job], raise_errors=True) jobs = jm.get_job_list(0, 10) assert len(jobs) == 0
def test_job_transformation(pulse_jobs, transformed_pulse_jobs): jl = JobLoader() validated_jobs = jl._get_validated_jobs_by_project(pulse_jobs) import json for (idx, job) in enumerate(validated_jobs["test_treeherder_jobs"]): assert transformed_pulse_jobs[idx] == json.loads( json.dumps(jl.transform(job)))
def test_ingest_pulse_jobs(pulse_jobs, test_project, jm, result_set_stored, mock_log_parser): """ Ingest a job through the JSON Schema validated JobLoader used by Pulse """ jl = JobLoader() jl.process_job_list(pulse_jobs) jobs = jm.get_job_list(0, 10) assert len(jobs) == 4 job_logs = JobLog.objects.filter(job__project_specific_id=jobs[0]["id"]) assert job_logs.count() == 2 logs_expected = [{"name": "builds-4h", "url": "http://ftp.mozilla.org/pub/mozilla.org/spidermonkey/tinderbox-builds/mozilla-inbound-linux64/mozilla-inbound_linux64_spidermonkey-warnaserr-bm57-build1-build352.txt.gz", "parse_status": 0}, {"name": "errorsummary_json", "url": "http://mozilla-releng-blobs.s3.amazonaws.com/blobs/Mozilla-Inbound-Non-PGO/sha512/05c7f57df6583c6351c6b49e439e2678e0f43c2e5b66695ea7d096a7519e1805f441448b5ffd4cc3b80b8b2c74b244288fda644f55ed0e226ef4e25ba02ca466", # Note that the test causes store_failure_lines to be # run, which sets this to parsed. "parse_status": 1}] assert [{"name": item.name, "url": item.url, "parse_status": item.status} for item in job_logs.all()] == logs_expected with ArtifactsModel(test_project) as am: artifacts = am.get_job_artifact_list(0, 10) assert len(artifacts) == 3 assert JobDetail.objects.count() == 2
def test_ingest_pending_pulse_job(pulse_jobs, push_stored, failure_classifications, mock_log_parser): """ Test that ingesting a pending job (1) works and (2) ingests the taskcluster metadata """ jl = JobLoader() pulse_job = pulse_jobs[0] revision = push_stored[0]["revision"] pulse_job["origin"]["revision"] = revision pulse_job["state"] = "pending" jl.process_job(pulse_job, 'https://firefox-ci-tc.services.mozilla.com') jobs = Job.objects.all() assert len(jobs) == 1 job = jobs[0] assert job.taskcluster_metadata assert job.taskcluster_metadata.task_id == 'IYyscnNMTLuxzna7PNqUJQ' # should not have processed any log or details for pending jobs assert JobLog.objects.count() == 2 # we're no longer storing artifacts in this table assert JobDetail.objects.count() == 0
def test_skip_unscheduled(first_job, failure_classifications, mock_log_parser): jl = JobLoader() first_job["state"] = "unscheduled" jl.process_job(first_job, 'https://firefox-ci-tc.services.mozilla.com') assert not Job.objects.count()
def test_skip_unscheduled(first_job, failure_classifications, mock_log_parser): jl = JobLoader() first_job["state"] = "unscheduled" jl.process_job(first_job) assert not Job.objects.count()
def process_job_with_threads(pulse_job, root_url): logger.info("Loading into DB:\t%s", pulse_job["taskId"]) with Connection(): try: JobLoader().process_job(pulse_job, root_url) except MissingPushException: logger.warning( 'The push was not in the DB. We are going to try that first') ingest_push(pulse_job["origin"]["project"], pulse_job["origin"]["revision"]) JobLoader().process_job(pulse_job, root_url)
def test_new_job_transformation(new_pulse_jobs, new_transformed_jobs, failure_classifications): jl = JobLoader() for message in new_pulse_jobs: # "task_id" which is not really the task_id job_guid = message["taskId"] (decoded_task_id, _) = job_guid.split("/") # As of slugid v2, slugid.encode() returns a string not bytestring under Python 3. taskId = slugid.encode(uuid.UUID(decoded_task_id)) transformed_job = jl.process_job(message, 'https://firefox-ci-tc.services.mozilla.com') # Not all messages from Taskcluster will be processed if transformed_job: assert new_transformed_jobs[taskId] == transformed_job
def test_job_transformation(pulse_jobs, transformed_pulse_jobs, result_set_stored): revision = result_set_stored[0]["revisions"][0]["revision"][:12] rs_lookup = {revision: {"revision_hash": "123"}} jl = JobLoader() validated_jobs = jl._get_validated_jobs_by_project(pulse_jobs) import json import pprint for (idx, job) in enumerate(validated_jobs["test_treeherder_jobs"]): xformed = jl.transform(job, rs_lookup) pprint.pprint(xformed) # assert transformed_pulse_jobs[idx] == jl.transform(job, rs_lookup) assert transformed_pulse_jobs[idx] == json.loads(json.dumps(jl.transform(job, rs_lookup)))
def test_ingest_pulse_jobs_bad_project(pulse_jobs, test_project, jm, result_set_stored, mock_log_parser): """ Ingest a job through the JSON Schema validated JobLoader used by Pulse """ jl = JobLoader() revision = result_set_stored[0]["revision"] for job in pulse_jobs: job["origin"]["revision"] = revision job["origin"]["project"] = "ferd" with pytest.raises(DatasetNotFoundError): jl.process_job_list(pulse_jobs)
def test_ingest_pulse_jobs_bad_project(pulse_jobs, test_repository, result_set_stored, failure_classifications, mock_log_parser): """ Ingest a job through the JSON Schema validated JobLoader used by Pulse """ jl = JobLoader() revision = result_set_stored[0]["revision"] for job in pulse_jobs: job["origin"]["revision"] = revision job["origin"]["project"] = "ferd" with pytest.raises(Repository.DoesNotExist): jl.process_job_list(pulse_jobs)
def test_ingest_pulse_jobs_bad_project(pulse_jobs, test_project, jm, result_set_stored, mock_log_parser): """ Ingest a job through the JSON Schema validated JobLoader used by Pulse """ jl = JobLoader() revision = result_set_stored[0]["revision"] for job in pulse_jobs: job["origin"]["revision"] = revision job["origin"]["project"] = "ferd" with pytest.raises(Repository.DoesNotExist): jl.process_job_list(pulse_jobs)
def test_ingest_pulse_jobs_bad_project(pulse_jobs, test_repository, result_set_stored, failure_classifications, mock_log_parser): """ Test ingesting a pulse job with bad repo will skip, ingest others """ jl = JobLoader() revision = result_set_stored[0]["revision"] job = pulse_jobs[0] job["origin"]["revision"] = revision job["origin"]["project"] = "ferd" jl.process_job_list(pulse_jobs) # length of pulse jobs is 5, so one will be skipped due to bad project assert Job.objects.count() == 4
def test_ingest_pulse_jobs_with_missing_resultset(pulse_jobs): """ Ingest jobs with missing resultsets, so they should throw an exception """ jl = JobLoader() job = pulse_jobs[0] job["origin"]["revision"] = "1234567890123456789012345678901234567890" with pytest.raises(MissingPushException): jl.process_job_list(pulse_jobs) # if one job isn't ready, except on the whole batch. They'll retry as a # task after the timeout. assert Job.objects.count() == 0
async def handleTask(task, root_url): taskId = task["status"]["taskId"] runs = task["status"]["runs"] # If we iterate in order of the runs, we will not be able to mark older runs as # "retry" instead of exception for run in reversed(runs): message = { "exchange": stateToExchange[run["state"]], "payload": { "status": { "taskId": taskId, "runs": runs, }, "runId": run["runId"], }, "root_url": root_url, } try: taskRuns = await handleMessage(message, task["task"]) if taskRuns: for run in taskRuns: logger.info("Loading into DB:\t%s/%s", taskId, run["retryId"]) # XXX: This seems our current bottleneck JobLoader().process_job(run, root_url) except Exception as e: logger.exception(e)
def test_ingest_pulse_jobs_with_missing_push(pulse_jobs): """ Ingest jobs with missing pushes, so they should throw an exception """ jl = JobLoader() job = pulse_jobs[0] job["origin"]["revision"] = "1234567890123456789012345678901234567890" with pytest.raises(MissingPushException): for pulse_job in pulse_jobs: jl.process_job(pulse_job, 'https://tc.example.com') # if one job isn't ready, except on the whole batch. They'll retry as a # task after the timeout. assert Job.objects.count() == 0
def test_ingest_pulse_jobs_bad_project(pulse_jobs, test_repository, push_stored, failure_classifications, mock_log_parser): """ Test ingesting a pulse job with bad repo will skip, ingest others """ jl = JobLoader() revision = push_stored[0]["revision"] job = pulse_jobs[0] job["origin"]["revision"] = revision job["origin"]["project"] = "ferd" jl.process_job_list(pulse_jobs) # length of pulse jobs is 5, so one will be skipped due to bad project assert Job.objects.count() == 4
def test_transition_running_pending_stays_running(first_job, jm): jl = JobLoader() change_state_result(first_job, jl, jm, "running", "unknown", "running", "unknown") change_state_result(first_job, jl, jm, "pending", "unknown", "running", "unknown")
def test_transition_complete_running_stays_complete(first_job, jm): jl = JobLoader() change_state_result(first_job, jl, jm, "completed", "fail", "completed", "testfailed") change_state_result(first_job, jl, jm, "running", "unknown", "completed", "testfailed")
def store_pulse_jobs(pulse_job, exchange, routing_key): """ Fetches the jobs pending from pulse exchanges and loads them. """ newrelic.agent.add_custom_parameter("exchange", exchange) newrelic.agent.add_custom_parameter("routing_key", routing_key) JobLoader().process_job(pulse_job)
def test_transition_complete_pending_stays_complete(first_job, failure_classifications, mock_log_parser): jl = JobLoader() change_state_result(first_job, jl, "completed", "fail", "completed", "testfailed") change_state_result(first_job, jl, "pending", "unknown", "completed", "testfailed")
def test_transition_running_pending_stays_running(first_job, failure_classifications, mock_log_parser): jl = JobLoader() change_state_result(first_job, jl, "running", "unknown", "running", "unknown") change_state_result(first_job, jl, "pending", "unknown", "running", "unknown")
def test_transition_running_superseded(first_job, failure_classifications, mock_log_parser): jl = JobLoader() change_state_result(first_job, jl, "running", "unknown", "running", "unknown") change_state_result(first_job, jl, "completed", "superseded", "completed", "superseded")
def test_ingest_pulse_jobs(pulse_jobs, test_project, jm, result_set_stored, mock_log_parser): """ Ingest a job through the JSON Schema validated JobLoader used by Pulse """ jl = JobLoader() jl.process_job_list(pulse_jobs) jobs = jm.get_job_list(0, 10) assert len(jobs) == 4 logs = jm.get_job_log_url_list([jobs[0]["id"]]) assert len(logs) == 1 with ArtifactsModel(test_project) as am: artifacts = am.get_job_artifact_list(0, 10) assert len(artifacts) == 4
def test_ingest_pulse_job_with_long_job_type_name( pulse_jobs, test_repository, push_stored, failure_classifications, mock_log_parser ): """ Ingest a job through the JSON Schema validated JobLoader used by Pulse """ job = pulse_jobs[0] jl = JobLoader() revision = push_stored[0]["revision"] job["display"][ "jobName" ] = "this is a very long string that exceeds the 100 character size that was the previous limit by just a little bit" job["origin"]["revision"] = revision jl.process_job(job, 'https://firefox-ci-tc.services.mozilla.com') jobs = Job.objects.all() assert len(jobs) == 1
def test_transition_pending_retry_fail_stays_retry(first_job, jm, mock_log_parser): jl = JobLoader() change_state_result(first_job, jl, jm, "pending", "unknown", "pending", "unknown") first_job["isRetried"] = True change_state_result(first_job, jl, jm, "completed", "fail", "completed", "retry") first_job["isRetried"] = False change_state_result(first_job, jl, jm, "completed", "fail", "completed", "retry")
def handle(self, *args, **options): UpdateJobFixtures.maxMessages = 100 self.stdout.write("The Pulse consumer will consume {number} messages".format(number=UpdateJobFixtures.maxMessages)) with pulse_conn as connection: consumer = prepare_consumer( connection, UpdateJobFixtures, job_sources, lambda key: "#.{}".format(key), ) try: consumer.run() except Exception: tc_messages = {} tc_tasks = {} th_jobs = {} jl = JobLoader() for message in consumer.messages: taskId = message["payload"]["status"]["taskId"] task = fetchTask(taskId) runs = handleMessage(message, task) for run in runs: try: th_jobs[taskId] = jl.transform(run) tc_messages[taskId] = message tc_tasks[taskId] = task except Exception: logger.info('Issue validating this message: %s', run) logger.info("Updating Taskcluster jobs: %s entries", len(tc_messages)) with open(os.path.join(tests_path, 'taskcluster_pulse_messages.json'), 'w') as fh: # Write new line at the end to satisfy prettier fh.write(json.dumps(tc_messages, sort_keys=True, indent=2) + "\n") logger.info("Updating Taskcluster task: %s entries", len(tc_tasks)) with open(os.path.join(tests_path, 'taskcluster_tasks.json'), 'w') as fh: # Write new line at the end to satisfy prettier fh.write(json.dumps(tc_tasks, sort_keys=True, indent=2) + "\n") logger.info("Updating transformed messages: %s entries", len(th_jobs)) with open(os.path.join(tests_path, 'taskcluster_transformed_jobs.json'), 'w') as fh: # Write new line at the end to satisfy prettier fh.write(json.dumps(th_jobs, sort_keys=True, indent=2) + "\n") self.stdout.write("Pulse Job listening stopped...")
def test_ingest_pulse_jobs(pulse_jobs, test_project, jm, result_set_stored, mock_log_parser): """ Ingest a job through the JSON Schema validated JobLoader used by Pulse """ jl = JobLoader() revision = result_set_stored[0]["revision"] for job in pulse_jobs: job["origin"]["revision"] = revision jl.process_job_list(pulse_jobs) jobs = jm.get_job_list(0, 10) assert len(jobs) == 4 job_logs = JobLog.objects.filter(job__project_specific_id=jobs[0]["id"]) assert job_logs.count() == 2 logs_expected = [ { "name": "builds-4h", "url": "http://ftp.mozilla.org/pub/mozilla.org/spidermonkey/tinderbox-builds/mozilla-inbound-linux64/mozilla-inbound_linux64_spidermonkey-warnaserr-bm57-build1-build352.txt.gz", "parse_status": 0 }, { "name": "errorsummary_json", "url": "http://mozilla-releng-blobs.s3.amazonaws.com/blobs/Mozilla-Inbound-Non-PGO/sha512/05c7f57df6583c6351c6b49e439e2678e0f43c2e5b66695ea7d096a7519e1805f441448b5ffd4cc3b80b8b2c74b244288fda644f55ed0e226ef4e25ba02ca466", # Note that the test causes store_failure_lines to be # run, which sets this to parsed. "parse_status": 1 } ] assert [{ "name": item.name, "url": item.url, "parse_status": item.status } for item in job_logs.all()] == logs_expected with ArtifactsModel(test_project) as am: artifacts = am.get_job_artifact_list(0, 10) assert len(artifacts) == 2 assert JobDetail.objects.count() == 2
def test_ingest_pulse_jobs_with_revision_hash(pulse_jobs, test_project, jm, result_set_stored, mock_log_parser): """ Ingest a revision_hash job with the JobLoader used by Pulse """ jl = JobLoader() revision_hash = Push.objects.values_list('revision_hash', flat=True).get(id=1) for job in pulse_jobs: origin = job["origin"] del(origin["revision"]) origin["revision_hash"] = revision_hash jl.process_job_list(pulse_jobs) assert Job.objects.count() == 4
def test_ingest_pulse_jobs_with_revision_hash(pulse_jobs, test_project, jm, result_set_stored, mock_log_parser): """ Ingest a revision_hash job with the JobLoader used by Pulse """ jl = JobLoader() rs = jm.get_result_set_list(0, 10)[0] revision_hash = rs["revision_hash"] for job in pulse_jobs: origin = job["origin"] del (origin["revision"]) origin["revision_hash"] = revision_hash jl.process_job_list(pulse_jobs) assert Job.objects.count() == 4
def test_transition_pending_running_complete(first_job, jm, mock_log_parser): jl = JobLoader() change_state_result(first_job, jl, jm, "pending", "unknown", "pending", "unknown") change_state_result(first_job, jl, jm, "running", "unknown", "running", "unknown") change_state_result(first_job, jl, jm, "completed", "fail", "completed", "testfailed")
def test_ingest_pulse_jobs_with_revision_hash(pulse_jobs, test_project, jm, result_set_stored, mock_log_parser): """ Ingest a revision_hash job with the JobLoader used by Pulse """ jl = JobLoader() rs = jm.get_result_set_list(0, 10)[0] revision_hash = rs["revision_hash"] for job in pulse_jobs: origin = job["origin"] del(origin["revision"]) origin["revision_hash"] = revision_hash jl.process_job_list(pulse_jobs) assert Job.objects.count() == 4
def test_ingest_pulse_jobs(pulse_jobs, test_project, jm, result_set_stored, mock_log_parser): """ Ingest a job through the JSON Schema validated JobLoader used by Pulse """ jl = JobLoader() jl.process_job_list(pulse_jobs) jobs = jm.get_job_list(0, 10) assert len(jobs) == 4 assert JobLog.objects.filter(job__project_specific_id=jobs[0]["id"]).count() == 1 with ArtifactsModel(test_project) as am: artifacts = am.get_job_artifact_list(0, 10) assert len(artifacts) == 3 assert JobDetail.objects.count() == 2
def test_ingest_pulse_jobs_with_revision_hash(pulse_jobs, test_project, jm, result_set_stored, mock_log_parser): """ Ingest a revision_hash job with the JobLoader used by Pulse """ jl = JobLoader() revision_hash = Push.objects.values_list('revision_hash', flat=True).get(id=1) for job in pulse_jobs: origin = job["origin"] del (origin["revision"]) origin["revision_hash"] = revision_hash jl.process_job_list(pulse_jobs) assert Job.objects.count() == 4
def test_ingest_pulse_job(sample_data, test_project, jm, result_set_stored): """ Ingest a job through the JSON Schema validated JobLoader used by Pulse """ revision = result_set_stored[0]["revisions"][0]["revision"] sample_jobs = sample_data.pulse_jobs for job in sample_jobs: job["origin"]["project"] = test_project job["origin"]["revision"] = revision jl = JobLoader() jl.process_job_list(sample_jobs, raise_errors=True) jobs = jm.get_job_list(0, 10) assert len(jobs) == 3 logs = jm.get_job_log_url_list([jobs[0]["id"]]) assert len(logs) == 1 with ArtifactsModel(test_project) as am: artifacts = am.get_job_artifact_list(0, 10) assert len(artifacts) == 2
def test_ingest_pending_pulse_job(pulse_jobs, push_stored, failure_classifications, mock_log_parser): """ Test that ingesting a pending job (1) works and (2) ingests the taskcluster metadata """ jl = JobLoader() pulse_job = pulse_jobs[0] revision = push_stored[0]["revision"] pulse_job["origin"]["revision"] = revision pulse_job["state"] = "pending" jl.process_job(pulse_job) jobs = Job.objects.all() assert len(jobs) == 1 job = jobs[0] assert job.taskcluster_metadata assert job.taskcluster_metadata.task_id == 'IYyscnNMTLuxzna7PNqUJQ' # should not have processed any log or details for pending jobs assert JobLog.objects.count() == 2 assert JobDetail.objects.count() == 2
def test_job_transformation(pulse_jobs, transformed_pulse_jobs): import json jl = JobLoader() for idx, pulse_job in enumerate(pulse_jobs): assert jl._is_valid_job(pulse_job) assert transformed_pulse_jobs[idx] == json.loads(json.dumps(jl.transform(pulse_job)))
def test_job_transformation(pulse_jobs, transformed_pulse_jobs): jl = JobLoader() validated_jobs = jl._get_validated_jobs_by_project(pulse_jobs) import json for (idx, job) in enumerate(validated_jobs["test_treeherder_jobs"]): assert transformed_pulse_jobs[idx] == json.loads(json.dumps(jl.transform(job)))