def test_ingest_running_to_complete_job(result_set_stored, failure_classifications, mock_buildapi_running_url, mock_buildapi_builds4h_url, mock_log_parser): """ a new buildapi running job transitions to a new completed job """ etl_process = RunningJobsProcess() etl_process.run() assert Job.objects.count() == 1 # the first job in the sample data should overwrite the running job # we just ingested. Leaving us with only 32 jobs, not 33. etl_process = Builds4hJobsProcess() etl_process.run() assert Job.objects.count() == 32 # all jobs should be completed, including the original one which # transitioned from running. for job in Job.objects.all(): assert job.state == 'completed'
def _handle(self, *args, **options): if len(args) != 2: raise CommandError("Need to specify (only) branch and changeset") (project, changeset) = args # get reference to repo rdm = RefDataManager() repos = filter(lambda x: x['name'] == project, rdm.get_all_repository_info()) if not repos: raise CommandError("No project found named '%s'" % project) repo = repos[0] # make sure all tasks are run synchronously / immediately settings.CELERY_ALWAYS_EAGER = True # get hg pushlog pushlog_url = '%s/json-pushes/?full=1' % repo['url'] # ingest this particular revision for this project process = HgPushlogProcess() process.run(pushlog_url, project, changeset=changeset) self._process_all_objects_for_project(project) Builds4hJobsProcess().run(filter_to_project=project, filter_to_revision=changeset) PendingJobsProcess().run(filter_to_project=project, filter_to_revision=changeset) RunningJobsProcess().run(filter_to_project=project, filter_to_revision=changeset) self._process_all_objects_for_project(project)
def _handle(self, *args, **options): if len(args) != 2: raise CommandError("Need to specify (only) branch and changeset") (project, changeset) = args # get reference to repo rdm = RefDataManager() repos = filter(lambda x: x['name'] == project, rdm.get_all_repository_info()) if not repos: raise CommandError("No project found named '%s'" % project) repo = repos[0] # make sure all tasks are run synchronously / immediately settings.CELERY_ALWAYS_EAGER = True # get hg pushlog pushlog_url = '%s/json-pushes/?full=1&version=2' % repo['url'] # ingest this particular revision for this project process = HgPushlogProcess() # Use the actual push SHA, in case the changeset specified was a tag # or branch name (eg tip). HgPushlogProcess returns the full SHA. push_sha = process.run(pushlog_url, project, changeset=changeset)[:12] Builds4hJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=options['filter_job_group']) PendingJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=options['filter_job_group']) RunningJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=options['filter_job_group'])
def test_ingest_running_to_complete_job(jm, result_set_stored, mock_buildapi_running_url, mock_buildapi_builds4h_url, mock_log_parser): """ a new buildapi running job transitions to a new completed job """ etl_process = RunningJobsProcess() etl_process.run() stored_running = jm.get_dhub().execute(proc="jobs_test.selects.jobs") assert len(stored_running) == 1 # the first job in the sample data should overwrite the running job # we just ingested. Leaving us with only 32 jobs, not 33. etl_process = Builds4hJobsProcess() etl_process.run() stored_obj = jm.get_dhub().execute(proc="jobs_test.selects.jobs") assert len(stored_obj) == 32 # all jobs should be completed, including the original one which # transitioned from running. for job in stored_obj: assert job['state'] == 'completed'
def test_ingest_builds4h_jobs_1_missing_resultset( jm, initial_data, sample_resultset, test_repository, mock_buildapi_builds4h_missing1_url, mock_post_json, mock_log_parser, mock_get_resultset, mock_get_remote_content, activate_responses): """ Ensure the builds4h job with the missing resultset is queued for refetching """ etl_process = Builds4hJobsProcess() _do_missing_resultset_test(jm, etl_process)
def test_ingest_builds4h_jobs_1_missing_resultset( jm, initial_data, sample_resultset, test_repository, mock_buildapi_builds4h_missing1_url, mock_post_json_data, mock_log_parser, mock_get_resultset, mock_get_remote_content): """ Ensure the builds4h job with the missing resultset is queued for refetching """ from treeherder.etl.buildapi import Builds4hJobsProcess etl_process = Builds4hJobsProcess() _do_missing_resultset_test(jm, etl_process)
def test_ingest_builds4h_jobs_missing_branch( push_stored, failure_classifications, mock_buildapi_builds4h_missing_branch_url, mock_log_parser): """ Ensure the builds4h job with the missing branch is not ingested """ etl_process = Builds4hJobsProcess() etl_process.run() assert Job.objects.count() == 0
def test_ingest_builds4h_jobs_1_missing_resultset( result_set_stored, failure_classifications, mock_buildapi_builds4h_missing1_url, mock_log_parser): """ Ensure the builds4h job with the missing resultset is not ingested """ etl_process = Builds4hJobsProcess() etl_process.run() assert Job.objects.count() == 1
def test_ingest_builds4h_jobs_missing_branch( jm, result_set_stored, mock_buildapi_builds4h_missing_branch_url, mock_log_parser): """ Ensure the builds4h job with the missing branch is not ingested """ etl_process = Builds4hJobsProcess() etl_process.run() stored_obj = jm.get_dhub().execute(proc="jobs_test.selects.jobs") assert len(stored_obj) == 0
def test_ingest_builds4h_jobs_1_missing_resultset( jm, sample_resultset, mock_buildapi_builds4h_missing1_url, mock_post_json, mock_log_parser, mock_get_resultset): """ Ensure the builds4h job with the missing resultset is not ingested """ etl_process = Builds4hJobsProcess() etl_process.run() stored_obj = jm.get_dhub().execute(proc="jobs_test.selects.jobs") assert len(stored_obj) == 1
def test_ingest_builds4h_jobs_missing_branch( jm, initial_data, sample_resultset, test_repository, mock_buildapi_builds4h_missing_branch_url, mock_post_json, mock_log_parser, mock_get_resultset, mock_get_remote_content): """ Ensure the builds4h job with the missing resultset is queued for refetching """ etl_process = Builds4hJobsProcess() etl_process.run() stored_obj = jm.get_dhub().execute(proc="jobs_test.selects.jobs") assert len(stored_obj) == 0
def test_ingest_builds4h_jobs(push_stored, failure_classifications, mock_buildapi_builds4h_url, mock_log_parser): """ a new buildapi completed job creates a new obj in the job table """ etl_process = Builds4hJobsProcess() new_jobs_were_added = etl_process.run() assert new_jobs_were_added is True assert len(cache.get(CACHE_KEYS['complete'])) == 32 new_jobs_were_added = etl_process.run() assert new_jobs_were_added is False assert Job.objects.count() == 32
def _handle(self, *args, **options): project = options['project'] changeset = options['changeset'] if not options['last_n_pushes'] and not changeset: raise CommandError('must specify --last-n-pushes or a positional ' 'changeset argument') # get reference to repo repo = Repository.objects.get(name=project, active_status='active') if options['last_n_pushes']: last_push_id = last_push_id_from_server(repo) fetch_push_id = max(1, last_push_id - options['last_n_pushes']) logger.info('last server push id: %d; fetching push %d and newer' % (last_push_id, fetch_push_id)) else: fetch_push_id = None # make sure all tasks are run synchronously / immediately settings.CELERY_ALWAYS_EAGER = True # get hg pushlog pushlog_url = '%s/json-pushes/?full=1&version=2' % repo.url # ingest this particular revision for this project process = HgPushlogProcess() # Use the actual push SHA, in case the changeset specified was a tag # or branch name (eg tip). HgPushlogProcess returns the full SHA. push_sha = process.run(pushlog_url, project, changeset=changeset, last_push_id=fetch_push_id) # Only perform additional processing if fetching a single changeset # because we only have the sha1 if the tip-most push in "last N pushes" # mode and can't filter appropriately. if not fetch_push_id: group_filter = options['filter_job_group'] Builds4hJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=group_filter) PendingJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=group_filter) RunningJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=group_filter)
def test_ingest_builds4h_jobs(jm, result_set_stored, mock_buildapi_builds4h_url, mock_log_parser): """ a new buildapi completed job creates a new obj in the job table """ etl_process = Builds4hJobsProcess() new_jobs_were_added = etl_process.run() assert new_jobs_were_added is True assert len(cache.get(CACHE_KEYS['complete'])) == 32 new_jobs_were_added = etl_process.run() assert new_jobs_were_added is False stored_obj = jm.get_dhub().execute(proc="jobs_test.selects.jobs") assert len(stored_obj) == 32
def test_ingest_builds4h_jobs(jm, initial_data, mock_buildapi_builds4h_url, mock_post_json_data, mock_log_parser, mock_get_resultset, mock_get_remote_content): """ a new buildapi completed job creates a new obj in the job table """ from treeherder.etl.buildapi import Builds4hJobsProcess etl_process = Builds4hJobsProcess() etl_process.run() jm.process_objects(20) stored_obj = jm.get_jobs_dhub().execute(proc="jobs_test.selects.jobs") jm.disconnect() assert len(stored_obj) == 20
def test_ingest_running_to_complete_job(jm, initial_data, mock_buildapi_running_url, mock_buildapi_builds4h_url, mock_post_json_data, mock_log_parser, mock_get_resultset, mock_get_remote_content): """ a new buildapi running job transitions to a new completed job Also ensure that a running job does NOT go through the objectstore. """ from treeherder.etl.buildapi import RunningJobsProcess from treeherder.etl.buildapi import Builds4hJobsProcess etl_process = RunningJobsProcess() etl_process.run() stored_running = jm.get_jobs_dhub().execute( proc="jobs_test.selects.jobs") stored_objectstore = jm.get_os_dhub().execute( proc="objectstore_test.selects.all") # ensure running jobs do not go to the objectstore, but go directly # to the jobs table without needing process_objects assert len(stored_objectstore) == 0 assert len(stored_running) == 1 # the first job in the sample data should overwrite the running job # we just ingested. Leaving us with only 20 jobs, not 21. etl_process = Builds4hJobsProcess() etl_process.run() jm.process_objects(20) stored_obj = jm.get_jobs_dhub().execute( proc="jobs_test.selects.jobs") jm.disconnect() assert len(stored_obj) == 20 # all jobs should be completed, including the original one which # transitioned from running. for job in stored_obj: assert job['state'] == 'completed'
def test_ingest_builds4h_jobs(jm, initial_data, mock_buildapi_builds4h_url, mock_post_json, mock_log_parser, mock_get_resultset, mock_get_remote_content): """ a new buildapi completed job creates a new obj in the job table """ etl_process = Builds4hJobsProcess() new_jobs_were_added = etl_process.run() assert new_jobs_were_added is True assert len(cache.get(CACHE_KEYS['complete'])) == 32 new_jobs_were_added = etl_process.run() assert new_jobs_were_added is False stored_obj = jm.get_dhub().execute(proc="jobs_test.selects.jobs") jm.disconnect() assert len(stored_obj) == 32
def fetch_buildapi_build4h(): """ Fetches the buildapi running jobs api and load them """ Builds4hJobsProcess().run()
def fetch_buildapi_build4h(): """ Fetches the buildapi running jobs api and load them to the objectstore ingestion endpoint """ Builds4hJobsProcess().run()