def test_cycle_all_data_in_chunks(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser): """ Test cycling the sample data in chunks. """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(test_repository, job_data, sample_resultset, False) # build a date that will cause the data to be cycled cycle_date_ts = datetime.datetime.now() - datetime.timedelta(weeks=1) for job in Job.objects.all(): job.submit_time = cycle_date_ts job.save() create_failure_lines(Job.objects.get(id=1), [(test_line, {})] * 7) assert TestFailureLine.search().count() > 0 call_command('cycle_data', sleep_time=0, days=1, chunk_size=3) refresh_all() # There should be no jobs after cycling assert Job.objects.count() == 0 assert FailureLine.objects.count() == 0 assert JobDetail.objects.count() == 0 assert TestFailureLine.search().count() == 0
def test_cycle_all_data(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser, failure_lines): """ Test cycling the sample data """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(test_repository, job_data, sample_resultset, False) # set the submit time to be a week before today cycle_date_ts = datetime.datetime.now() - datetime.timedelta(weeks=1) for job in Job.objects.all(): job.submit_time = cycle_date_ts job.save() call_command('cycle_data', sleep_time=0, days=1) refresh_all() # There should be no jobs or failure lines after cycling assert Job.objects.count() == 0 assert FailureLine.objects.count() == 0 assert JobDetail.objects.count() == 0 assert JobLog.objects.count() == 0 # There should be nothing in elastic search after cycling assert TestFailureLine.search().count() == 0
def __call__(self, failure_lines): rv = [] self.lines += len(failure_lines) for failure_line in failure_lines: if failure_line.action != "test_result" or not failure_line.message: logger.debug("Skipped elasticsearch matching") continue match = ESMatch(message={"query": failure_line.message[:1024], "type": "phrase"}) search = (TestFailureLine.search() .filter("term", test=failure_line.test) .filter("term", status=failure_line.status) .filter("term", expected=failure_line.expected) .filter("exists", field="best_classification") .query(match)) if failure_line.subtest: search = search.filter("term", subtest=failure_line.subtest) try: self.calls += 1 resp = search.execute() except: logger.error("Elastic search lookup failed: %s %s %s %s %s", failure_line.test, failure_line.subtest, failure_line.status, failure_line.expected, failure_line.message) raise scorer = MatchScorer(failure_line.message) matches = [(item, item.message) for item in resp] best_match = scorer.best_match(matches) if best_match: logger.debug("Matched using elastic search test matcher") rv.append(Match(failure_line, ClassifiedFailure.objects.get( id=best_match[1].best_classification), best_match[0])) return rv
def query_best(self, text_log_error): failure_line = text_log_error.metadata.failure_line if failure_line.action != "test_result" or not failure_line.message: logger.debug("Skipped elasticsearch matching") return match = ESMatch(message={"query": failure_line.message[:1024], "type": "phrase"}) search = (TestFailureLine.search() .filter("term", test=failure_line.test) .filter("term", status=failure_line.status) .filter("term", expected=failure_line.expected) .filter("exists", field="best_classification") .query(match)) if failure_line.subtest: search = search.filter("term", subtest=failure_line.subtest) try: self.calls += 1 resp = search.execute() except: logger.error("Elastic search lookup failed: %s %s %s %s %s", failure_line.test, failure_line.subtest, failure_line.status, failure_line.expected, failure_line.message) raise scorer = MatchScorer(failure_line.message) matches = [(item, item.message) for item in resp] best_match = scorer.best_match(matches) if best_match: return (best_match[1].best_classification, best_match[0])
def query_best(self, text_log_error): failure_line = text_log_error.metadata.failure_line if failure_line.action != "test_result" or not failure_line.message: logger.debug("Skipped elasticsearch matching") return match = ESMatch(message={ "query": failure_line.message[:1024], "type": "phrase" }) search = (TestFailureLine.search().filter( "term", test=failure_line.test).filter( "term", status=failure_line.status).filter( "term", expected=failure_line.expected).filter( "exists", field="best_classification").query(match)) if failure_line.subtest: search = search.filter("term", subtest=failure_line.subtest) try: self.calls += 1 resp = search.execute() except Exception: logger.error("Elastic search lookup failed: %s %s %s %s %s", failure_line.test, failure_line.subtest, failure_line.status, failure_line.expected, failure_line.message) raise scorer = MatchScorer(failure_line.message) matches = [(item, item.message) for item in resp] best_match = scorer.best_match(matches) if best_match: return (best_match[1].best_classification, best_match[0])
def test_cycle_all_data_in_chunks(jm, sample_data, sample_resultset, test_repository, mock_log_parser): """ Test cycling the sample data in chunks. """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, job_data, sample_resultset, False) # build a date that will cause the data to be cycled time_now = time.time() cycle_date_ts = int(time_now - 7 * 24 * 3600) jm.execute( proc="jobs_test.updates.set_jobs_last_modified", placeholders=[cycle_date_ts] ) jobs_to_be_deleted = jm.execute( proc="jobs_test.selects.get_jobs_for_cycling", placeholders=[time_now - 24 * 3600] ) job = jm.get_job(jobs_to_be_deleted[0]['id'])[0] create_failure_lines(test_repository, job["job_guid"], [(test_line, {})] * 7) jobs_before = jm.execute(proc="jobs_test.selects.jobs") assert TestFailureLine.search().params(search_type="count").execute().hits.total > 0 call_command('cycle_data', sleep_time=0, days=1, chunk_size=3) refresh_all() jobs_after = jm.execute(proc="jobs_test.selects.jobs") assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted) # There should be no jobs after cycling assert len(jobs_after) == 0 assert Job.objects.count() == 0 assert FailureLine.objects.count() == 0 assert JobDetail.objects.count() == 0 assert TestFailureLine.search().params(search_type="count").execute().hits.total == 0
def test_cycle_all_data_in_chunks(jm, sample_data, sample_resultset, test_repository, mock_log_parser): """ Test cycling the sample data in chunks. """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, job_data, sample_resultset, False) # build a date that will cause the data to be cycled time_now = time.time() cycle_date_ts = int(time_now - 7 * 24 * 3600) jm.execute( proc="jobs_test.updates.set_jobs_submit_timestamp", placeholders=[cycle_date_ts] ) jobs_to_be_deleted = jm.execute( proc="jobs_test.selects.get_jobs_for_cycling", placeholders=[time_now - 24 * 3600] ) job = jm.get_job(jobs_to_be_deleted[0]['id'])[0] create_failure_lines(test_repository, job["job_guid"], [(test_line, {})] * 7) jobs_before = jm.execute(proc="jobs_test.selects.jobs") assert TestFailureLine.search().params(search_type="count").execute().hits.total > 0 call_command('cycle_data', sleep_time=0, days=1, chunk_size=3) refresh_all() jobs_after = jm.execute(proc="jobs_test.selects.jobs") assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted) # There should be no jobs after cycling assert len(jobs_after) == 0 assert Job.objects.count() == 0 assert FailureLine.objects.count() == 0 assert JobDetail.objects.count() == 0 assert TestFailureLine.search().params(search_type="count").execute().hits.total == 0
def test_cycle_all_but_one_job(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser, elasticsearch, failure_lines): """ Test cycling all but one job in a group of jobs to confirm there are no unexpected deletions """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(test_repository, job_data, sample_resultset, False) # one job should not be deleted, set its submit time to now job_not_deleted = Job.objects.get(id=2) job_not_deleted.submit_time = datetime.datetime.now() job_not_deleted.save() extra_objects = { 'failure_lines': (FailureLine, create_failure_lines(job_not_deleted, [(test_line, {}), (test_line, { "subtest": "subtest2" })])), 'job_details': (JobDetail, [ JobDetail.objects.create(job=job_not_deleted, title='test', value='testvalue') ]) } # set other job's submit time to be a week ago from now cycle_date_ts = datetime.datetime.now() - datetime.timedelta(weeks=1) for job in Job.objects.all().exclude(id=job_not_deleted.id): job.submit_time = cycle_date_ts job.save() num_job_logs_to_be_deleted = JobLog.objects.all().exclude( id=job_not_deleted.id).count() num_job_logs_before = JobLog.objects.count() call_command('cycle_data', sleep_time=0, days=1, debug=True) refresh_all() assert Job.objects.count() == 1 assert JobLog.objects.count() == (num_job_logs_before - num_job_logs_to_be_deleted) for (object_type, objects) in extra_objects.values(): assert (set(item.id for item in object_type.objects.all()) == set( item.id for item in objects)) assert set( int(item.meta.id) for item in TestFailureLine.search().execute()) == set( item.id for item in extra_objects["failure_lines"][1])
def test_cycle_all_but_one_job(test_repository, failure_classifications, sample_data, sample_resultset, mock_log_parser, elasticsearch, failure_lines): """ Test cycling all but one job in a group of jobs to confirm there are no unexpected deletions """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(test_repository, job_data, sample_resultset, False) # one job should not be deleted, set its submit time to now job_not_deleted = Job.objects.get(id=2) job_not_deleted.submit_time = datetime.datetime.now() job_not_deleted.save() extra_objects = { 'failure_lines': (FailureLine, create_failure_lines( job_not_deleted, [(test_line, {}), (test_line, {"subtest": "subtest2"})])), 'job_details': (JobDetail, [JobDetail.objects.create( job=job_not_deleted, title='test', value='testvalue')]) } # set other job's submit time to be a week ago from now cycle_date_ts = datetime.datetime.now() - datetime.timedelta(weeks=1) for job in Job.objects.all().exclude(id=job_not_deleted.id): job.submit_time = cycle_date_ts job.save() num_job_logs_to_be_deleted = JobLog.objects.all().exclude( id=job_not_deleted.id).count() num_job_logs_before = JobLog.objects.count() call_command('cycle_data', sleep_time=0, days=1, debug=True) refresh_all() assert Job.objects.count() == 1 assert JobLog.objects.count() == (num_job_logs_before - num_job_logs_to_be_deleted) for (object_type, objects) in extra_objects.values(): assert (set(item.id for item in object_type.objects.all()) == set(item.id for item in objects)) assert set(int(item.meta.id) for item in TestFailureLine.search().execute()) == set(item.id for item in extra_objects["failure_lines"][1])
def test_cycle_all_data(jm, sample_data, sample_resultset, test_repository, mock_log_parser, failure_lines): """ Test cycling the sample data """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, job_data, sample_resultset, False) time_now = time.time() cycle_date_ts = time_now - 7 * 24 * 3600 jm.execute( proc="jobs_test.updates.set_jobs_last_modified", placeholders=[cycle_date_ts] ) jobs_to_be_deleted = jm.execute( proc="jobs_test.selects.get_jobs_for_cycling", placeholders=[time_now - 24 * 3600] ) jobs_before = jm.execute(proc="jobs_test.selects.jobs") call_command('cycle_data', sleep_time=0, days=1) refresh_all() jobs_after = jm.execute(proc="jobs_test.selects.jobs") assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted) # There should be no jobs or failure lines after cycling assert len(jobs_after) == 0 assert FailureLine.objects.count() == 0 assert Job.objects.count() == 0 assert JobDetail.objects.count() == 0 assert JobLog.objects.count() == 0 # There should be nothing in elastic search after cycling assert TestFailureLine.search().params(search_type="count").execute().hits.total == 0
def test_cycle_all_data(jm, sample_data, sample_resultset, test_repository, mock_log_parser, failure_lines): """ Test cycling the sample data """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, job_data, sample_resultset, False) time_now = time.time() cycle_date_ts = time_now - 7 * 24 * 3600 jm.execute( proc="jobs_test.updates.set_jobs_submit_timestamp", placeholders=[cycle_date_ts] ) jobs_to_be_deleted = jm.execute( proc="jobs_test.selects.get_jobs_for_cycling", placeholders=[time_now - 24 * 3600] ) jobs_before = jm.execute(proc="jobs_test.selects.jobs") call_command('cycle_data', sleep_time=0, days=1) refresh_all() jobs_after = jm.execute(proc="jobs_test.selects.jobs") assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted) # There should be no jobs or failure lines after cycling assert len(jobs_after) == 0 assert FailureLine.objects.count() == 0 assert Job.objects.count() == 0 assert JobDetail.objects.count() == 0 assert JobLog.objects.count() == 0 # There should be nothing in elastic search after cycling assert TestFailureLine.search().params(search_type="count").execute().hits.total == 0
def test_cycle_one_job(jm, sample_data, sample_resultset, test_repository, mock_log_parser, elasticsearch, failure_lines): """ Test cycling one job in a group of jobs to confirm there are no unexpected deletions """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, job_data, sample_resultset, False) job_not_deleted = jm.get_job(2)[0] extra_objects = { 'failure_lines': (FailureLine, create_failure_lines(test_repository, job_not_deleted["job_guid"], [(test_line, {}), (test_line, { "subtest": "subtest2" })])), 'job_details': (JobDetail, [ JobDetail.objects.create( job=Job.objects.get(guid=job_not_deleted["job_guid"]), title='test', value='testvalue') ]) } time_now = time.time() cycle_date_ts = int(time_now - 7 * 24 * 3600) jm.execute(proc="jobs_test.updates.set_jobs_submit_timestamp", placeholders=[time_now]) jm.execute(proc="jobs_test.updates.set_one_job_submit_timestamp", placeholders=[cycle_date_ts]) jobs_to_be_deleted = jm.execute( proc="jobs_test.selects.get_one_job_for_cycling", placeholders=[1]) num_job_logs_to_be_deleted = JobLog.objects.filter( job__project_specific_id__in=[job['id'] for job in jobs_to_be_deleted]).count() jobs_before = jm.execute(proc="jobs_test.selects.jobs") job_logs_before = JobLog.objects.count() call_command('cycle_data', sleep_time=0, days=1, debug=True) refresh_all() jobs_after = jm.execute(proc="jobs_test.selects.jobs") # Confirm that the target result set has no jobs in the # jobs table jobs_to_be_deleted_after = jm.execute( proc="jobs_test.selects.get_one_job_for_cycling", placeholders=[1]) assert len(jobs_to_be_deleted_after) == 0 assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted) assert len(jobs_after) == Job.objects.count() assert JobLog.objects.count() == (job_logs_before - num_job_logs_to_be_deleted) for (object_type, objects) in extra_objects.values(): assert (set(item.id for item in object_type.objects.all()) == set( item.id for item in objects)) assert set( int(item.meta.id) for item in TestFailureLine.search().execute()) == set( item.id for item in extra_objects["failure_lines"][1])
def test_cycle_one_job(jm, sample_data, sample_resultset, test_repository, mock_log_parser, elasticsearch, failure_lines): """ Test cycling one job in a group of jobs to confirm there are no unexpected deletions """ job_data = sample_data.job_data[:20] test_utils.do_job_ingestion(jm, job_data, sample_resultset, False) job_not_deleted = jm.get_job(2)[0] extra_objects = { 'failure_lines': (FailureLine, create_failure_lines(test_repository, job_not_deleted["job_guid"], [(test_line, {}), (test_line, {"subtest": "subtest2"})])), 'job_details': (JobDetail, [JobDetail.objects.create( job=Job.objects.get(guid=job_not_deleted["job_guid"]), title='test', value='testvalue')]) } time_now = time.time() cycle_date_ts = int(time_now - 7 * 24 * 3600) jm.execute( proc="jobs_test.updates.set_jobs_last_modified", placeholders=[time_now] ) jm.execute( proc="jobs_test.updates.set_one_job_last_modified_timestamp", placeholders=[cycle_date_ts] ) jobs_to_be_deleted = jm.execute( proc="jobs_test.selects.get_one_job_for_cycling", placeholders=[1] ) num_job_logs_to_be_deleted = JobLog.objects.filter( job__project_specific_id__in=[job['id'] for job in jobs_to_be_deleted]).count() jobs_before = jm.execute(proc="jobs_test.selects.jobs") job_logs_before = JobLog.objects.count() call_command('cycle_data', sleep_time=0, days=1, debug=True) refresh_all() jobs_after = jm.execute(proc="jobs_test.selects.jobs") # Confirm that the target result set has no jobs in the # jobs table jobs_to_be_deleted_after = jm.execute( proc="jobs_test.selects.get_one_job_for_cycling", placeholders=[1] ) assert len(jobs_to_be_deleted_after) == 0 assert len(jobs_after) == len(jobs_before) - len(jobs_to_be_deleted) assert len(jobs_after) == Job.objects.count() assert JobLog.objects.count() == (job_logs_before - num_job_logs_to_be_deleted) for (object_type, objects) in extra_objects.values(): assert (set(item.id for item in object_type.objects.all()) == set(item.id for item in objects)) assert set(int(item.meta.id) for item in TestFailureLine.search().execute()) == set(item.id for item in extra_objects["failure_lines"][1])