def test_cycle_all_data(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser, failure_lines): """ Test cycling the sample data """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(test_repository, job_data, sample_resultset, False) # set the submit time to be a week before today cycle_date_ts = datetime.datetime.now() - datetime.timedelta(weeks=1) for job in Job.objects.all(): job.submit_time = cycle_date_ts job.save() call_command('cycle_data', sleep_time=0, days=1) refresh_all() # There should be no jobs or failure lines after cycling assert Job.objects.count() == 0 assert FailureLine.objects.count() == 0 assert JobDetail.objects.count() == 0 assert JobLog.objects.count() == 0 # There should be nothing in elastic search after cycling assert TestFailureLine.search().count() == 0
def test_cycle_all_data(jm, sample_data, sample_resultset, test_repository, mock_log_parser, failure_lines): """ Test cycling the sample data """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, job_data, sample_resultset, False) time_now = time.time() cycle_date_ts = time_now - 7 * 24 * 3600 jm.execute( proc="jobs_test.updates.set_jobs_last_modified", placeholders=[cycle_date_ts] ) jobs_to_be_deleted = jm.execute( proc="jobs_test.selects.get_jobs_for_cycling", placeholders=[time_now - 24 * 3600] ) jobs_before = jm.execute(proc="jobs_test.selects.jobs") call_command('cycle_data', sleep_time=0, days=1) jobs_after = jm.execute(proc="jobs_test.selects.jobs") assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted) # There should be no jobs or failure lines after cycling assert len(jobs_after) == 0 assert FailureLine.objects.count() == 0 assert Job.objects.count() == 0
def test_cycle_job_model_reference_data(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser): job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(test_repository, job_data, sample_resultset, False) # get a list of ids of original reference data original_job_type_ids = JobType.objects.values_list('id', flat=True) original_job_group_ids = JobGroup.objects.values_list('id', flat=True) original_machine_ids = Machine.objects.values_list('id', flat=True) # create a bunch of job model data that should be cycled, since they don't # reference any current jobs jg = JobGroup.objects.create(symbol='moo', name='moo') jt = JobType.objects.create(job_group=jg, symbol='mu', name='mu') m = Machine.objects.create(name='machine_with_no_job') (jg_id, jt_id, m_id) = (jg.id, jt.id, m.id) call_command('cycle_data', sleep_time=0, days=1, chunk_size=3) # assert that reference data that should have been cycled, was cycled assert JobGroup.objects.filter(id=jg_id).count() == 0 assert JobType.objects.filter(id=jt_id).count() == 0 assert Machine.objects.filter(id=m_id).count() == 0 # assert that we still have everything that shouldn't have been cycled assert JobType.objects.filter(id__in=original_job_type_ids).count() == len(original_job_type_ids) assert JobGroup.objects.filter(id__in=original_job_group_ids).count() == len(original_job_group_ids) assert Machine.objects.filter(id__in=original_machine_ids).count() == len(original_machine_ids)
def test_cycle_one_job(jm, refdata, sample_data, initial_data, sample_resultset, mock_log_parser): """ Test cycling one job in a group of jobs to confirm there are no unexpected deletions """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset, False) time_now = time.time() cycle_date_ts = int(time_now - 7 * 24 * 3600) jm.execute(proc="jobs_test.updates.set_result_sets_push_timestamp", placeholders=[time_now]) jm.execute(proc="jobs_test.updates.set_one_result_set_push_timestamp", placeholders=[cycle_date_ts]) jobs_to_be_deleted = jm.execute(proc="jobs_test.selects.get_result_set_jobs", placeholders=[1]) jobs_before = jm.execute(proc="jobs_test.selects.jobs") call_command("cycle_data", sleep_time=0, cycle_interval=1, debug=True) jobs_after = jm.execute(proc="jobs_test.selects.jobs") # Confirm that the target result set has no jobs in the # jobs table jobs_to_be_deleted_after = jm.execute(proc="jobs_test.selects.get_result_set_jobs", placeholders=[1]) assert len(jobs_to_be_deleted_after) == 0 assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted)
def test_ingest_all_sample_jobs(test_repository, failure_classifications, sample_data, sample_push, mock_log_parser): """ Process each job structure in the job_data.txt file and verify. """ job_data = sample_data.job_data test_utils.do_job_ingestion(test_repository, job_data, sample_push)
def test_cycle_all_data_in_chunks(jm, refdata, sample_data, initial_data, sample_resultset, mock_log_parser): """ Test cycling the sample data in chunks. """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset, False) # build a date that will cause the data to be cycled time_now = time.time() cycle_date_ts = int(time_now - 7 * 24 * 3600) jm.execute(proc="jobs_test.updates.set_result_sets_push_timestamp", placeholders=[cycle_date_ts]) jobs_to_be_deleted = jm.execute(proc="jobs_test.selects.get_jobs_for_cycling", placeholders=[time_now - 24 * 3600]) jobs_before = jm.execute(proc="jobs_test.selects.jobs") call_command("cycle_data", sleep_time=0, cycle_interval=1, chunk_size=3) jobs_after = jm.execute(proc="jobs_test.selects.jobs") assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted) # There should be no jobs after cycling assert len(jobs_after) == 0
def test_cycle_all_data_in_chunks(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser): """ Test cycling the sample data in chunks. """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(test_repository, job_data, sample_resultset, False) # build a date that will cause the data to be cycled cycle_date_ts = datetime.datetime.now() - datetime.timedelta(weeks=1) for job in Job.objects.all(): job.submit_time = cycle_date_ts job.save() create_failure_lines(Job.objects.get(id=1), [(test_line, {})] * 7) assert TestFailureLine.search().count() > 0 call_command('cycle_data', sleep_time=0, days=1, chunk_size=3) refresh_all() # There should be no jobs after cycling assert Job.objects.count() == 0 assert FailureLine.objects.count() == 0 assert JobDetail.objects.count() == 0 assert TestFailureLine.search().count() == 0
def test_cycle_all_data(jm, refdata, sample_data, initial_data, sample_resultset, mock_log_parser): """ Test cycling the sample data """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset, False) time_now = time.time() cycle_date_ts = time_now - 7 * 24 * 3600 jm.execute( proc="jobs_test.updates.set_result_sets_push_timestamp", placeholders=[cycle_date_ts] ) jobs_to_be_deleted = jm.execute( proc="jobs_test.selects.get_jobs_for_cycling", placeholders=[time_now - 24 * 3600] ) jobs_before = jm.execute(proc="jobs_test.selects.jobs") call_command('cycle_data', sleep_time=0, cycle_interval=1) jobs_after = jm.execute(proc="jobs_test.selects.jobs") assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted) # There should be no jobs after cycling assert len(jobs_after) == 0
def test_ingest_all_sample_jobs(jm, sample_data, sample_resultset, test_repository, mock_log_parser): """ Process each job structure in the job_data.txt file and verify. """ job_data = sample_data.job_data test_utils.do_job_ingestion(jm, job_data, sample_resultset)
def test_ingest_single_sample_job(jm, refdata, sample_data, initial_data, mock_log_parser, sample_resultset): """Process a single job structure in the job_data.txt file""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset) jm.disconnect() refdata.disconnect()
def test_ingest_all_sample_jobs(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser): """ Process each job structure in the job_data.txt file and verify. """ job_data = sample_data.job_data test_utils.do_job_ingestion(test_repository, job_data, sample_resultset)
def test_ingesting_skip_existing(test_repository, failure_classifications, sample_data, sample_push, mock_log_parser): """Remove single existing job prior to loading""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(test_repository, job_data, sample_push) store_job_data(test_repository, sample_data.job_data[:2]) assert Job.objects.count() == 2
def test_ingest_all_sample_jobs(jm, sample_data, sample_resultset, test_repository, mock_log_parser): """ Process each job structure in the job_data.txt file and verify. """ job_data = sample_data.job_data test_utils.do_job_ingestion(jm, job_data, sample_resultset) jm.disconnect()
def test_cycle_one_job(jm, sample_data, sample_resultset, test_repository, mock_log_parser, failure_lines): """ Test cycling one job in a group of jobs to confirm there are no unexpected deletions """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, job_data, sample_resultset, False) job_not_deleted = jm.get_job(2)[0] failure_lines_remaining = create_failure_lines(test_repository, job_not_deleted["job_guid"], [(test_line, {}), (test_line, {"subtest": "subtest2"})]) time_now = time.time() cycle_date_ts = int(time_now - 7 * 24 * 3600) jm.execute( proc="jobs_test.updates.set_jobs_last_modified", placeholders=[time_now] ) jm.execute( proc="jobs_test.updates.set_one_job_last_modified_timestamp", placeholders=[cycle_date_ts] ) jobs_to_be_deleted = jm.execute( proc="jobs_test.selects.get_one_job_for_cycling", placeholders=[1] ) jobs_before = jm.execute(proc="jobs_test.selects.jobs") call_command('cycle_data', sleep_time=0, days=1, debug=True) jobs_after = jm.execute(proc="jobs_test.selects.jobs") # Confirm that the target result set has no jobs in the # jobs table jobs_to_be_deleted_after = jm.execute( proc="jobs_test.selects.get_one_job_for_cycling", placeholders=[1] ) assert len(jobs_to_be_deleted_after) == 0 assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted) assert len(jobs_after) == Job.objects.count() assert (set(item.id for item in FailureLine.objects.all()) == set(item.id for item in failure_lines_remaining))
def test_cycle_all_but_one_job( test_repository, failure_classifications, sample_data, sample_push, mock_log_parser, failure_lines, ): """ Test cycling all but one job in a group of jobs to confirm there are no unexpected deletions """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(test_repository, job_data, sample_push, False) # one job should not be deleted, set its submit time to now job_not_deleted = Job.objects.get( id=Job.objects.aggregate(Max("id"))["id__max"]) job_not_deleted.submit_time = datetime.now() job_not_deleted.save() extra_objects = { 'failure_lines': ( FailureLine, create_failure_lines(job_not_deleted, [(test_line, {}), (test_line, { "subtest": "subtest2" })]), ), } # set other job's submit time to be a week ago from now cycle_date_ts = datetime.now() - timedelta(weeks=1) for job in Job.objects.all().exclude(id=job_not_deleted.id): job.submit_time = cycle_date_ts job.save() num_job_logs_to_be_deleted = JobLog.objects.all().exclude( job__id=job_not_deleted.id).count() num_job_logs_before = JobLog.objects.count() call_command('cycle_data', 'from:treeherder', sleep_time=0, days=1, debug=True, chunk_size=1) assert Job.objects.count() == 1 assert JobLog.objects.count() == (num_job_logs_before - num_job_logs_to_be_deleted) for (object_type, objects) in extra_objects.values(): actual = set(item.id for item in object_type.objects.all()) expected = set(item.id for item in objects) assert actual == expected
def test_ingesting_skip_existing(jm, sample_data, initial_data, refdata, mock_log_parser, sample_resultset): """Remove single existing job prior to loading""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset) jm.store_job_data(sample_data.job_data[:2]) jl = jm.get_job_list(0, 10) assert len(jl) == 2
def test_ingest_single_sample_job(test_repository, failure_classifications, sample_data, sample_push, mock_log_parser): """Process a single job structure in the job_data.txt file""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(test_repository, job_data, sample_push) assert Job.objects.count() == 1 job = Job.objects.get(id=1) # Ensure we don't inadvertently change the way we generate job-related hashes. assert job.option_collection_hash == '32faaecac742100f7753f0c1d0aa0add01b4046b' assert job.signature.signature == '4dabe44cc898e585228c43ea21337a9b00f5ddf7'
def test_get_job_data(jm, test_project, refdata, sample_data, initial_data, mock_log_parser, sample_resultset): target_len = 10 job_data = sample_data.job_data[:target_len] test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset) with ArtifactsModel(test_project) as artifacts_model: job_data = artifacts_model.get_job_signatures_from_ids(range(1, 11)) assert len(job_data) is target_len
def test_remove_existing_jobs_one_existing_one_new(jm, sample_data, initial_data, refdata, mock_log_parser, sample_resultset): """Remove single existing job prior to loading""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset) data = jm._remove_existing_jobs(sample_data.job_data[:2]) assert len(data) == 1
def test_get_job_data(jm, refdata, sample_data, initial_data, mock_log_parser, sample_resultset): target_len = 10 job_data = sample_data.job_data[:target_len] test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset) job_data = jm.get_job_signatures_from_ids(range(1,11)) assert len(job_data) is target_len
def test_remove_existing_jobs_one_existing_one_new(jm, sample_data, sample_resultset, mock_log_parser): """Remove single existing job prior to loading""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(jm, job_data, sample_resultset) data = jm._remove_existing_jobs(sample_data.job_data[:2]) assert len(data) == 1
def test_ingest_all_sample_jobs(jm, refdata, sample_data, initial_data, sample_resultset, mock_log_parser): """ @@@ - Re-enable when our job_data.txt has been re-created with correct data. Process each job structure in the job_data.txt file and verify. """ job_data = sample_data.job_data test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset)
def test_get_job_data(jm, refdata, sample_data, initial_data, mock_log_parser, sample_resultset): target_len = 10 job_data = sample_data.job_data[:target_len] test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset) job_data = jm.get_job_signatures_from_ids(range(1, 11)) assert len(job_data) is target_len
def test_cycle_all_but_one_job(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser, elasticsearch, failure_lines): """ Test cycling all but one job in a group of jobs to confirm there are no unexpected deletions """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(test_repository, job_data, sample_resultset, False) # one job should not be deleted, set its submit time to now job_not_deleted = Job.objects.get(id=2) job_not_deleted.submit_time = datetime.datetime.now() job_not_deleted.save() extra_objects = { 'failure_lines': (FailureLine, create_failure_lines(job_not_deleted, [(test_line, {}), (test_line, { "subtest": "subtest2" })])), 'job_details': (JobDetail, [ JobDetail.objects.create(job=job_not_deleted, title='test', value='testvalue') ]) } # set other job's submit time to be a week ago from now cycle_date_ts = datetime.datetime.now() - datetime.timedelta(weeks=1) for job in Job.objects.all().exclude(id=job_not_deleted.id): job.submit_time = cycle_date_ts job.save() num_job_logs_to_be_deleted = JobLog.objects.all().exclude( id=job_not_deleted.id).count() num_job_logs_before = JobLog.objects.count() call_command('cycle_data', sleep_time=0, days=1, debug=True) refresh_all() assert Job.objects.count() == 1 assert JobLog.objects.count() == (num_job_logs_before - num_job_logs_to_be_deleted) for (object_type, objects) in extra_objects.values(): assert (set(item.id for item in object_type.objects.all()) == set( item.id for item in objects)) assert set( int(item.meta.id) for item in TestFailureLine.search().execute()) == set( item.id for item in extra_objects["failure_lines"][1])
def test_cycle_one_job(jm, sample_data, sample_resultset, test_repository, mock_log_parser, failure_lines): """ Test cycling one job in a group of jobs to confirm there are no unexpected deletions """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, job_data, sample_resultset, False) job_not_deleted = jm.get_job(2)[0] failure_lines_remaining = create_failure_lines(test_repository, job_not_deleted["job_guid"], [(test_line, {}), (test_line, {"subtest": "subtest2"})]) time_now = time.time() cycle_date_ts = int(time_now - 7 * 24 * 3600) jm.execute( proc="jobs_test.updates.set_jobs_last_modified", placeholders=[time_now] ) jm.execute( proc="jobs_test.updates.set_one_job_last_modified_timestamp", placeholders=[cycle_date_ts] ) jobs_to_be_deleted = jm.execute( proc="jobs_test.selects.get_one_job_for_cycling", placeholders=[1] ) jobs_before = jm.execute(proc="jobs_test.selects.jobs") call_command('cycle_data', sleep_time=0, days=1, debug=True) jobs_after = jm.execute(proc="jobs_test.selects.jobs") # Confirm that the target result set has no jobs in the # jobs table jobs_to_be_deleted_after = jm.execute( proc="jobs_test.selects.get_one_job_for_cycling", placeholders=[1] ) assert len(jobs_to_be_deleted_after) == 0 assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted) assert (set(item.id for item in FailureLine.objects.all()) == set(item.id for item in failure_lines_remaining))
def test_remove_existing_jobs_single_existing(test_repository, failure_classifications, sample_data, sample_push, mock_log_parser): """Remove single existing job prior to loading""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(test_repository, job_data, sample_push) assert Job.objects.count() == 1 data = _remove_existing_jobs(job_data) assert len(data) == 0
def test_remove_existing_jobs_single_existing(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser): """Remove single existing job prior to loading""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(test_repository, job_data, sample_resultset) assert Job.objects.count() == 1 data = _remove_existing_jobs(job_data) assert len(data) == 0
def test_ingesting_skip_existing(jm, sample_data, sample_resultset, mock_log_parser): """Remove single existing job prior to loading""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(jm, job_data, sample_resultset) jm.store_job_data(sample_data.job_data[:2]) jl = jm.get_job_list(0, 10) assert len(jl) == 2 assert Job.objects.count() == 2
def test_remove_existing_jobs_one_existing_one_new(jm, sample_data, sample_resultset, mock_log_parser): """Remove single existing job prior to loading""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(jm, job_data, sample_resultset) data = jm._remove_existing_jobs(sample_data.job_data[:2]) assert len(data) == 1 assert Job.objects.count() == 1
def test_get_job_data(jm, test_project, sample_data, sample_resultset, test_repository, mock_log_parser): target_len = 10 job_data = sample_data.job_data[:target_len] test_utils.do_job_ingestion(jm, job_data, sample_resultset) with ArtifactsModel(test_project) as artifacts_model: job_data = artifacts_model.get_job_signatures_from_ids(range(1, 11)) assert len(job_data) is target_len
def test_ingest_buildbot_tier2_job(test_repository, sample_data, sample_push, failure_classifications, mock_log_parser): """Tier is set to 2 if it matches the signature object""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(test_repository, job_data, sample_push) job = Job.objects.all().first() lower_tier_signatures = {job.signature.signature: 2} job_data_2 = copy.deepcopy(job_data) job_data_2[0]['job']['job_guid'] = "foo" store_job_data(test_repository, job_data_2, lower_tier_signatures) job2 = Job.objects.get(guid="foo") assert job2.tier == 2
def test_cycle_all_but_one_job(test_repository, failure_classifications, sample_data, sample_push, mock_log_parser, elasticsearch, failure_lines): """ Test cycling all but one job in a group of jobs to confirm there are no unexpected deletions """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(test_repository, job_data, sample_push, False) # one job should not be deleted, set its submit time to now job_not_deleted = Job.objects.get(id=2) job_not_deleted.submit_time = datetime.datetime.now() job_not_deleted.save() extra_objects = { 'failure_lines': (FailureLine, create_failure_lines( job_not_deleted, [(test_line, {}), (test_line, {"subtest": "subtest2"})])), 'job_details': (JobDetail, [JobDetail.objects.create( job=job_not_deleted, title='test', value='testvalue')]) } # set other job's submit time to be a week ago from now cycle_date_ts = datetime.datetime.now() - datetime.timedelta(weeks=1) for job in Job.objects.all().exclude(id=job_not_deleted.id): job.submit_time = cycle_date_ts job.save() num_job_logs_to_be_deleted = JobLog.objects.all().exclude( id=job_not_deleted.id).count() num_job_logs_before = JobLog.objects.count() call_command('cycle_data', sleep_time=0, days=1, debug=True) refresh_index() assert Job.objects.count() == 1 assert JobLog.objects.count() == (num_job_logs_before - num_job_logs_to_be_deleted) for (object_type, objects) in extra_objects.values(): assert (set(item.id for item in object_type.objects.all()) == set(item.id for item in objects)) # get all documents indexed_ids = set(int(item['_id']) for item in all_documents()) expected = set(item.id for item in extra_objects["failure_lines"][1]) assert indexed_ids == expected
def test_remove_existing_jobs_single_existing(jm, sample_data, sample_resultset, mock_log_parser): """Remove single existing job prior to loading""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(jm, job_data, sample_resultset) jl = jm.get_job_list(0, 10) data = jm._remove_existing_jobs(job_data) assert len(data) == 0 jl = jm.get_job_list(0, 10) assert len(jl) == 1
def test_remove_existing_jobs_single_existing(jm, sample_data, initial_data, refdata, mock_log_parser, sample_resultset): """Remove single existing job prior to loading""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset) jl = jm.get_job_list(0, 10) data = jm._remove_existing_jobs(job_data) assert len(data) == 0 jl = jm.get_job_list(0, 10) assert len(jl) == 1
def test_ingest_all_sample_jobs(jm, refdata, sample_data, initial_data, sample_resultset, mock_log_parser): """ @@@ - Re-enable when our job_data.txt has been re-created with correct data. Process each job structure in the job_data.txt file and verify. """ job_data = sample_data.job_data test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset) jm.disconnect() refdata.disconnect()
def test_remove_existing_jobs_one_existing_one_new(test_repository, failure_classifications, sample_data, sample_push, mock_log_parser): """Remove single existing job prior to loading""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(test_repository, job_data, sample_push) data = _remove_existing_jobs(sample_data.job_data[:2]) assert len(data) == 1 assert Job.objects.count() == 1
def test_ingest_buildbot_tier2_job(test_repository, sample_data, sample_push, failure_classifications, mock_log_parser): """Tier is set to 2 if it matches the signature object""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(test_repository, job_data, sample_push) job = Job.objects.all().first() lower_tier_signatures = { job.signature.signature: 2 } job_data_2 = copy.deepcopy(job_data) job_data_2[0]['job']['job_guid'] = "foo" store_job_data(test_repository, job_data_2, lower_tier_signatures) job2 = Job.objects.get(guid="foo") assert job2.tier == 2
def test_remove_existing_jobs_one_existing_one_new(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser): """Remove single existing job prior to loading""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(test_repository, job_data, sample_resultset) data = _remove_existing_jobs(sample_data.job_data[:2]) assert len(data) == 1 assert Job.objects.count() == 1
def test_remove_existing_jobs_single_existing(jm, sample_data, sample_resultset, mock_log_parser): """Remove single existing job prior to loading""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(jm, job_data, sample_resultset) jl = jm.get_job_list(0, 10) data = jm._remove_existing_jobs(job_data) assert len(data) == 0 jl = jm.get_job_list(0, 10) assert len(jl) == 1 assert Job.objects.count() == 1
def test_remove_existing_jobs_single_existing(jm, sample_data, initial_data, refdata, mock_log_parser, sample_resultset): """Remove single existing job prior to loading""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset) jl = jm.get_job_list(0, 10) print 'JOBLIST before' print json.dumps(jl, indent=4) data = jm._remove_existing_jobs(job_data) # print data assert len(data) == 0 jl = jm.get_job_list(0, 10) assert len(jl) == 1
def test_cycle_job_model_reference_data(jm, sample_data, sample_resultset, test_repository, mock_log_parser): job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, job_data, sample_resultset, False) # create a fake original runnable job, since we don't fetch those via # job ingestion RunnableJob.objects.create(build_platform=BuildPlatform.objects.all()[0], machine_platform=MachinePlatform.objects.all()[0], job_type=JobType.objects.all()[0], option_collection_hash='test1', ref_data_name='test1', build_system_type='test1', repository=test_repository) # get a list of ids of original reference data original_job_type_ids = JobType.objects.values_list('id', flat=True) original_job_group_ids = JobGroup.objects.values_list('id', flat=True) original_runnable_job_ids = RunnableJob.objects.values_list('id', flat=True) original_machine_ids = Machine.objects.values_list('id', flat=True) # create a bunch of job model data that should be cycled, since they don't # reference any current jobs jg = JobGroup.objects.create(symbol='moo', name='moo') jt = JobType.objects.create(job_group=jg, symbol='mu', name='mu') rj = RunnableJob.objects.create(build_platform=BuildPlatform.objects.all()[0], machine_platform=MachinePlatform.objects.all()[0], job_type=jt, option_collection_hash='test2', ref_data_name='test2', build_system_type='test2', repository=test_repository) m = Machine.objects.create(name='machine_with_no_job') (jg_id, jt_id, rj_id, m_id) = (jg.id, jt.id, rj.id, m.id) call_command('cycle_data', sleep_time=0, days=1, chunk_size=3) # assert that reference data that should have been cycled, was cycled assert JobGroup.objects.filter(id=jg_id).count() == 0 assert JobType.objects.filter(id=jt_id).count() == 0 assert RunnableJob.objects.filter(id=rj_id).count() == 0 assert Machine.objects.filter(id=m_id).count() == 0 # assert that we still have everything that shouldn't have been cycled assert JobType.objects.filter(id__in=original_job_type_ids).count() == len(original_job_type_ids) assert JobGroup.objects.filter(id__in=original_job_group_ids).count() == len(original_job_group_ids) assert RunnableJob.objects.filter(id__in=original_runnable_job_ids).count() == len(original_runnable_job_ids) assert Machine.objects.filter(id__in=original_machine_ids).count() == len(original_machine_ids)
def test_cycle_one_job(jm, refdata, sample_data, initial_data, sample_resultset, mock_log_parser): """ Test cycling one job in a group of jobs to confirm there are no unexpected deletions """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset, False) # set all the result_sets to a non cycle time non_cycle_date_ts = int(time.time() - (jm.DATA_CYCLE_INTERVAL - 100000)) jm.get_dhub(jm.CT_JOBS).execute( proc="jobs_test.updates.set_result_sets_push_timestamp", placeholders=[ non_cycle_date_ts ] ) # build a date that will cause the data to be cycled cycle_date_ts = int(time.time() - (jm.DATA_CYCLE_INTERVAL + 100000)) jm.get_dhub(jm.CT_JOBS).execute( proc="jobs_test.updates.set_one_result_set_push_timestamp", placeholders=[cycle_date_ts] ) jobs_to_be_deleted = jm.get_dhub(jm.CT_JOBS).execute( proc="jobs_test.selects.get_result_set_jobs", placeholders=[1] ) job_count = len(jobs_to_be_deleted) sql_targets = jm.cycle_data({}, False) assert sql_targets['jobs.deletes.cycle_job'] == job_count #Confirm that the target result set has no jobs in the #jobs table jobs_count_after_delete = jm.get_dhub(jm.CT_JOBS).execute( proc="jobs_test.selects.get_result_set_jobs", placeholders=[1] ) assert len(jobs_count_after_delete) == 0 jm.disconnect() refdata.disconnect()
def test_cycle_all_data_in_chunks(jm, sample_data, sample_resultset, test_repository, mock_log_parser): """ Test cycling the sample data in chunks. """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, job_data, sample_resultset, False) # build a date that will cause the data to be cycled time_now = time.time() cycle_date_ts = int(time_now - 7 * 24 * 3600) jm.execute( proc="jobs_test.updates.set_jobs_last_modified", placeholders=[cycle_date_ts] ) jobs_to_be_deleted = jm.execute( proc="jobs_test.selects.get_jobs_for_cycling", placeholders=[time_now - 24 * 3600] ) job = jm.get_job(jobs_to_be_deleted[0]['id'])[0] create_failure_lines(test_repository, job["job_guid"], [(test_line, {})] * 7) jobs_before = jm.execute(proc="jobs_test.selects.jobs") assert TestFailureLine.search().params(search_type="count").execute().hits.total > 0 call_command('cycle_data', sleep_time=0, days=1, chunk_size=3) refresh_all() jobs_after = jm.execute(proc="jobs_test.selects.jobs") assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted) # There should be no jobs after cycling assert len(jobs_after) == 0 assert Job.objects.count() == 0 assert FailureLine.objects.count() == 0 assert JobDetail.objects.count() == 0 assert TestFailureLine.search().params(search_type="count").execute().hits.total == 0
def test_cycle_one_job(jm, refdata, sample_data, initial_data, sample_resultset, mock_log_parser): """ Test cycling one job in a group of jobs to confirm there are no unexpected deletions """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset, False) time_now = time.time() cycle_date_ts = int(time_now - 7 * 24 * 3600) jm.jobs_execute( proc="jobs_test.updates.set_result_sets_push_timestamp", placeholders=[time_now] ) jm.jobs_execute( proc="jobs_test.updates.set_one_result_set_push_timestamp", placeholders=[cycle_date_ts] ) jobs_to_be_deleted = jm.jobs_execute( proc="jobs_test.selects.get_result_set_jobs", placeholders=[1] ) jobs_before = jm.jobs_execute(proc="jobs_test.selects.jobs") call_command('cycle_data', sleep_time=0, cycle_interval=1, debug=True) jobs_after = jm.jobs_execute(proc="jobs_test.selects.jobs") #Confirm that the target result set has no jobs in the #jobs table jobs_to_be_deleted_after = jm.jobs_execute( proc="jobs_test.selects.get_result_set_jobs", placeholders=[1] ) assert len(jobs_to_be_deleted_after) == 0 assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted)
def test_cycle_all_data_in_chunks(jm, sample_data, sample_resultset, test_repository, mock_log_parser): """ Test cycling the sample data in chunks. """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, job_data, sample_resultset, False) # build a date that will cause the data to be cycled time_now = time.time() cycle_date_ts = int(time_now - 7 * 24 * 3600) jm.execute( proc="jobs_test.updates.set_jobs_submit_timestamp", placeholders=[cycle_date_ts] ) jobs_to_be_deleted = jm.execute( proc="jobs_test.selects.get_jobs_for_cycling", placeholders=[time_now - 24 * 3600] ) job = jm.get_job(jobs_to_be_deleted[0]['id'])[0] create_failure_lines(test_repository, job["job_guid"], [(test_line, {})] * 7) jobs_before = jm.execute(proc="jobs_test.selects.jobs") assert TestFailureLine.search().params(search_type="count").execute().hits.total > 0 call_command('cycle_data', sleep_time=0, days=1, chunk_size=3) refresh_all() jobs_after = jm.execute(proc="jobs_test.selects.jobs") assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted) # There should be no jobs after cycling assert len(jobs_after) == 0 assert Job.objects.count() == 0 assert FailureLine.objects.count() == 0 assert JobDetail.objects.count() == 0 assert TestFailureLine.search().params(search_type="count").execute().hits.total == 0
def test_ingest_twice_log_parsing_status_changed(test_repository, failure_classifications, sample_data, sample_push, mock_log_parser): """Process a single job twice, but change the log parsing status between, verify that nothing changes""" job_data = sample_data.job_data[:1] job_data[0]['job']['state'] = 'running' test_utils.do_job_ingestion(test_repository, job_data, sample_push) assert JobLog.objects.count() == 1 for job_log in JobLog.objects.all(): job_log.update_status(JobLog.FAILED) job_data[0]['job']['state'] = 'completed' test_utils.do_job_ingestion(test_repository, job_data, sample_push) assert JobLog.objects.count() == 1 for job_log in JobLog.objects.all(): job_log.status == JobLog.FAILED
def test_cycle_all_data(jm, sample_data, sample_resultset, test_repository, mock_log_parser, failure_lines): """ Test cycling the sample data """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, job_data, sample_resultset, False) time_now = time.time() cycle_date_ts = time_now - 7 * 24 * 3600 jm.execute( proc="jobs_test.updates.set_jobs_submit_timestamp", placeholders=[cycle_date_ts] ) jobs_to_be_deleted = jm.execute( proc="jobs_test.selects.get_jobs_for_cycling", placeholders=[time_now - 24 * 3600] ) jobs_before = jm.execute(proc="jobs_test.selects.jobs") call_command('cycle_data', sleep_time=0, days=1) refresh_all() jobs_after = jm.execute(proc="jobs_test.selects.jobs") assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted) # There should be no jobs or failure lines after cycling assert len(jobs_after) == 0 assert FailureLine.objects.count() == 0 assert Job.objects.count() == 0 assert JobDetail.objects.count() == 0 assert JobLog.objects.count() == 0 # There should be nothing in elastic search after cycling assert TestFailureLine.search().params(search_type="count").execute().hits.total == 0
def test_cycle_all_data(test_repository, failure_classifications, sample_data, sample_push, mock_log_parser, failure_lines): """ Test cycling the sample data """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(test_repository, job_data, sample_push, False) # set the submit time to be a week before today cycle_date_ts = datetime.datetime.now() - datetime.timedelta(weeks=1) for job in Job.objects.all(): job.submit_time = cycle_date_ts job.save() call_command('cycle_data', 'from:treeherder', sleep_time=0, days=1) # There should be no jobs or failure lines after cycling assert Job.objects.count() == 0 assert FailureLine.objects.count() == 0 assert JobDetail.objects.count() == 0 assert JobLog.objects.count() == 0
def test_cycle_all_data_in_chunks( test_repository, failure_classifications, sample_data, sample_push, mock_log_parser ): """ Test cycling the sample data in chunks. """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(test_repository, job_data, sample_push, False) # build a date that will cause the data to be cycled cycle_date_ts = datetime.now() - timedelta(weeks=1) for job in Job.objects.all(): job.submit_time = cycle_date_ts job.save() create_failure_lines(Job.objects.get(id=1), [(test_line, {})] * 7) call_command('cycle_data', 'from:treeherder', sleep_time=0, days=1, chunk_size=3) # There should be no jobs after cycling assert Job.objects.count() == 0 assert FailureLine.objects.count() == 0
def test_cycle_all_data(jm, refdata, sample_data, initial_data, sample_resultset, mock_log_parser): """ Test cycling the sample data """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset, False) # build a date that will cause the data to be cycled cycle_date_ts = int(time.time() - (jm.DATA_CYCLE_INTERVAL + 100000)) jm.get_dhub(jm.CT_JOBS).execute( proc="jobs_test.updates.set_result_sets_push_timestamp", placeholders=[cycle_date_ts]) jobs_to_be_deleted = jm.get_dhub(jm.CT_JOBS).execute( proc="jobs_test.selects.get_jobs_for_cycling", placeholders=[cycle_date_ts]) job_count = len(jobs_to_be_deleted) jobs_before = jm.get_dhub( jm.CT_JOBS).execute(proc="jobs_test.selects.jobs") sql_targets = jm.cycle_data({}, False) jobs_after = jm.get_dhub(jm.CT_JOBS).execute(proc="jobs_test.selects.jobs") jm.disconnect() refdata.disconnect() assert len(jobs_before) == job_count # There should be no jobs after cycling assert len(jobs_after) == 0 assert sql_targets['jobs.deletes.cycle_job'] == job_count
def test_ingest_single_sample_job(jm, sample_data, sample_resultset, test_repository, mock_log_parser): """Process a single job structure in the job_data.txt file""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(jm, job_data, sample_resultset)
def test_cycle_one_job(jm, sample_data, sample_resultset, test_repository, mock_log_parser, elasticsearch, failure_lines): """ Test cycling one job in a group of jobs to confirm there are no unexpected deletions """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, job_data, sample_resultset, False) job_not_deleted = jm.get_job(2)[0] extra_objects = { 'failure_lines': (FailureLine, create_failure_lines(test_repository, job_not_deleted["job_guid"], [(test_line, {}), (test_line, { "subtest": "subtest2" })])), 'job_details': (JobDetail, [ JobDetail.objects.create( job=Job.objects.get(guid=job_not_deleted["job_guid"]), title='test', value='testvalue') ]) } time_now = time.time() cycle_date_ts = int(time_now - 7 * 24 * 3600) jm.execute(proc="jobs_test.updates.set_jobs_submit_timestamp", placeholders=[time_now]) jm.execute(proc="jobs_test.updates.set_one_job_submit_timestamp", placeholders=[cycle_date_ts]) jobs_to_be_deleted = jm.execute( proc="jobs_test.selects.get_one_job_for_cycling", placeholders=[1]) num_job_logs_to_be_deleted = JobLog.objects.filter( job__project_specific_id__in=[job['id'] for job in jobs_to_be_deleted]).count() jobs_before = jm.execute(proc="jobs_test.selects.jobs") job_logs_before = JobLog.objects.count() call_command('cycle_data', sleep_time=0, days=1, debug=True) refresh_all() jobs_after = jm.execute(proc="jobs_test.selects.jobs") # Confirm that the target result set has no jobs in the # jobs table jobs_to_be_deleted_after = jm.execute( proc="jobs_test.selects.get_one_job_for_cycling", placeholders=[1]) assert len(jobs_to_be_deleted_after) == 0 assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted) assert len(jobs_after) == Job.objects.count() assert JobLog.objects.count() == (job_logs_before - num_job_logs_to_be_deleted) for (object_type, objects) in extra_objects.values(): assert (set(item.id for item in object_type.objects.all()) == set( item.id for item in objects)) assert set( int(item.meta.id) for item in TestFailureLine.search().execute()) == set( item.id for item in extra_objects["failure_lines"][1])
def test_ingest_single_sample_job(test_repository, failure_classifications, sample_data, sample_push, mock_log_parser): """Process a single job structure in the job_data.txt file""" job_data = sample_data.job_data[:1] test_utils.do_job_ingestion(test_repository, job_data, sample_push)