def test_integration_five_person_data_retraction(self, mock_hpo_bucket, mock_bucket, mock_extract_pids): mock_hpo_bucket.return_value = self.site_bucket mock_bucket.return_value = self.bucket mock_extract_pids.return_value = self.pids lines_to_remove = {} total_lines_prior = {} for file_path in test_util.FIVE_PERSONS_FILES: # generate results files file_name = file_path.split('/')[-1] table_name = file_name.split('.')[0] lines_to_remove[file_name] = 0 total_lines_prior[file_name] = 0 with open(file_path) as f: # skip header next(f) for line in f: line = line.strip() if line != '': if (table_name in rd.PID_IN_COL1 and int(line.split(",")[0]) in self.pids) or \ (table_name in rd.PID_IN_COL2 and int(line.split(",")[1]) in self.pids): lines_to_remove[file_name] += 1 total_lines_prior[file_name] += 1 # write file to cloud for testing test_util.write_cloud_file(self.bucket, file_path, prefix=self.folder_prefix_1) test_util.write_cloud_file(self.bucket, file_path, prefix=self.folder_prefix_2) retract_result = rd.run_gcs_retraction(self.project_id, self.sandbox_dataset_id, self.pid_table_id, self.hpo_id, folder='all_folders', force_flag=True) total_lines_post = {} for file_path in test_util.FIVE_PERSONS_FILES: file_name = file_path.split('/')[-1] actual_result_contents = test_util.read_cloud_file( self.bucket, self.folder_prefix_1 + file_name) # convert to list and remove header and last list item since it is a newline total_lines_post[file_name] = len( actual_result_contents.split('\n')[1:-1]) for key in total_lines_prior.keys(): if key in lines_to_remove: self.assertEqual( lines_to_remove[key], total_lines_prior[key] - total_lines_post[key]) else: self.assertEqual(total_lines_prior[key], total_lines_post[key]) # metadata for each updated file is returned self.assertEqual(len(retract_result[self.folder_prefix_1]), len(lines_to_remove.keys()))
def _test_html_report_five_person(self, mock_check_cron): # Not sure this test is still relevant (see hpo_report module and tests) # TODO refactor or remove this test folder_prefix = '2019-01-01/' for cdm_file in test_util.FIVE_PERSONS_FILES: test_util.write_cloud_file(self.hpo_bucket, cdm_file, prefix=folder_prefix) # achilles sometimes fails due to rate limits. # using both success and failure cases allow it to fail gracefully until there is a fix for achilles with open(test_util.FIVE_PERSON_RESULTS_FILE, 'r') as f: expected_result_achilles_success = self._remove_timestamp_tags_from_results( f.read()) with open(test_util.FIVE_PERSON_RESULTS_ACHILLES_ERROR_FILE, 'r') as f: expected_result_achilles_failure = self._remove_timestamp_tags_from_results( f.read()) expected_results = [ expected_result_achilles_success, expected_result_achilles_failure ] main.app.testing = True with main.app.test_client() as c: c.get(test_util.VALIDATE_HPO_FILES_URL) actual_result = test_util.read_cloud_file( self.hpo_bucket, folder_prefix + common.RESULTS_HTML) actual_result_file = self._remove_timestamp_tags_from_results( StringIO(actual_result).getvalue()) self.assertIn(actual_result_file, expected_results)
def test_html_report_five_person(self, mock_check_cron, mock_first_run, mock_rdr_date, mock_required_files_loaded): mock_required_files_loaded.return_value = False mock_first_run.return_value = False rdr_date = '2020-01-01' mock_rdr_date.return_value = rdr_date for cdm_file in test_util.FIVE_PERSONS_FILES: test_util.write_cloud_file(self.hpo_bucket, cdm_file, prefix=self.folder_prefix) # load person table in RDR bq_utils.load_table_from_csv(self.project_id, self.rdr_dataset_id, common.PERSON, test_util.FIVE_PERSONS_PERSON_CSV) # Load measurement_concept_sets required_labs.load_measurement_concept_sets_table( project_id=self.project_id, dataset_id=self.bigquery_dataset_id) # Load measurement_concept_sets_descendants required_labs.load_measurement_concept_sets_descendants_table( project_id=self.project_id, dataset_id=self.bigquery_dataset_id) main.app.testing = True with main.app.test_client() as c: c.get(test_util.VALIDATE_HPO_FILES_URL) actual_result = test_util.read_cloud_file( self.hpo_bucket, self.folder_prefix + common.RESULTS_HTML) # ensure emails are not sent bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) self.assertFalse(main.is_first_validation_run(folder_items)) # parse html soup = bs(actual_result, parser="lxml", features="lxml") missing_pii_html_table = soup.find('table', id='missing_pii') table_headers = missing_pii_html_table.find_all('th') self.assertEqual('Missing Participant Record Type', table_headers[0].get_text()) self.assertEqual('Count', table_headers[1].get_text()) table_rows = missing_pii_html_table.find_next('tbody').find_all('tr') missing_record_types = [ table_row.find('td').text for table_row in table_rows ] self.assertIn(main_consts.EHR_NO_PII, missing_record_types) self.assertIn(main_consts.PII_NO_EHR, missing_record_types) self.assertIn(main_consts.EHR_NO_RDR.format(date=rdr_date), missing_record_types) self.assertIn(main_consts.EHR_NO_PARTICIPANT_MATCH, missing_record_types) required_lab_html_table = soup.find('table', id='required-lab') table_headers = required_lab_html_table.find_all('th') self.assertEqual(3, len(table_headers)) self.assertEqual('Ancestor Concept ID', table_headers[0].get_text()) self.assertEqual('Ancestor Concept Name', table_headers[1].get_text()) self.assertEqual('Found', table_headers[2].get_text()) table_rows = required_lab_html_table.find_next('tbody').find_all('tr') table_rows_last_column = [ table_row.find_all('td')[-1] for table_row in table_rows ] submitted_labs = [ row for row in table_rows_last_column if 'result-1' in row.attrs['class'] ] missing_labs = [ row for row in table_rows_last_column if 'result-0' in row.attrs['class'] ] self.assertTrue(len(table_rows) > 0) self.assertTrue(len(submitted_labs) > 0) self.assertTrue(len(missing_labs) > 0)
def test_html_report_five_person(self, mock_check_cron, mock_first_run, mock_required_files_loaded, mock_has_all_required_files, mock_updated_datetime_object): mock_required_files_loaded.return_value = False mock_first_run.return_value = False mock_has_all_required_files.return_value = True mock_updated_datetime_object.return_value = datetime.datetime.today( ) - datetime.timedelta(minutes=7) for cdm_file in test_util.FIVE_PERSONS_FILES: blob_name = f'{self.folder_prefix}{os.path.basename(cdm_file)}' test_blob = self.storage_bucket.blob(blob_name) test_blob.upload_from_filename(cdm_file) # load person table in RDR bq_utils.load_table_from_csv(self.project_id, self.rdr_dataset_id, common.PERSON, test_util.FIVE_PERSONS_PERSON_CSV) # Load measurement_concept_sets required_labs.load_measurement_concept_sets_table( project_id=self.project_id, dataset_id=self.bigquery_dataset_id) # Load measurement_concept_sets_descendants required_labs.load_measurement_concept_sets_descendants_table( project_id=self.project_id, dataset_id=self.bigquery_dataset_id) main.app.testing = True with main.app.test_client() as c: c.get(test_util.VALIDATE_HPO_FILES_URL) actual_result = test_util.read_cloud_file( self.hpo_bucket, self.folder_prefix + common.RESULTS_HTML) # ensure emails are not sent bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) self.assertFalse(main.is_first_validation_run(folder_items)) # parse html soup = bs(actual_result, parser="lxml", features="lxml") missing_pii_html_table = soup.find('table', id='missing_pii') table_headers = missing_pii_html_table.find_all('th') self.assertEqual('Missing Participant Record Type', table_headers[0].get_text()) self.assertEqual('Count', table_headers[1].get_text()) table_rows = missing_pii_html_table.find_next('tbody').find_all('tr') missing_record_types = [ table_row.find('td').text for table_row in table_rows ] self.assertIn(main_consts.EHR_NO_PII, missing_record_types) self.assertIn(main_consts.PII_NO_EHR, missing_record_types) # the missing from RDR component is obsolete (see DC-1932) # this is to confirm it was removed successfully from the report rdr_date = '2020-01-01' self.assertNotIn(main_consts.EHR_NO_RDR.format(date=rdr_date), missing_record_types) self.assertIn(main_consts.EHR_NO_PARTICIPANT_MATCH, missing_record_types) required_lab_html_table = soup.find('table', id='required-lab') table_headers = required_lab_html_table.find_all('th') self.assertEqual(3, len(table_headers)) self.assertEqual('Ancestor Concept ID', table_headers[0].get_text()) self.assertEqual('Ancestor Concept Name', table_headers[1].get_text()) self.assertEqual('Found', table_headers[2].get_text()) table_rows = required_lab_html_table.find_next('tbody').find_all('tr') table_rows_last_column = [ table_row.find_all('td')[-1] for table_row in table_rows ] submitted_labs = [ row for row in table_rows_last_column if 'result-1' in row.attrs['class'] ] missing_labs = [ row for row in table_rows_last_column if 'result-0' in row.attrs['class'] ] self.assertTrue(len(table_rows) > 0) self.assertTrue(len(submitted_labs) > 0) self.assertTrue(len(missing_labs) > 0)