def test_pii_files_loaded(self, mock_check_cron): # tests if pii files are loaded test_file_paths = [ test_util.PII_NAME_FILE, test_util.PII_MRN_BAD_PERSON_ID_FILE ] test_file_names = [os.path.basename(f) for f in test_file_paths] test_util.write_cloud_file(self.hpo_bucket, test_util.PII_NAME_FILE, prefix=self.folder_prefix) test_util.write_cloud_file(self.hpo_bucket, test_util.PII_MRN_BAD_PERSON_ID_FILE, prefix=self.folder_prefix) rs = resources.csv_to_list(test_util.PII_FILE_LOAD_RESULT_CSV) expected_results = [(r['file_name'], int(r['found']), int(r['parsed']), int(r['loaded'])) for r in rs] for f in common.SUBMISSION_FILES: if f not in test_file_names: expected_result = (f, 0, 0, 0) expected_results.append(expected_result) bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items, self.folder_prefix) self.assertSetEqual(set(expected_results), set(r['results']))
def test_validate_five_persons_success(self, mock_check_cron): expected_results: list = [] test_file_names: list = [ os.path.basename(f) for f in test_util.FIVE_PERSONS_FILES ] for cdm_filename in common.SUBMISSION_FILES: if cdm_filename in test_file_names: expected_result: tuple = (cdm_filename, 1, 1, 1) test_filepath: str = os.path.join(test_util.FIVE_PERSONS_PATH, cdm_filename) test_blob = self.storage_bucket.blob( f'{self.folder_prefix}{cdm_filename}') test_blob.upload_from_filename(test_filepath) else: expected_result: tuple = (cdm_filename, 0, 0, 0) expected_results.append(expected_result) bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items, self.folder_prefix) self.assertSetEqual(set(r['results']), set(expected_results)) # check tables exist and are clustered as expected for table in resources.CDM_TABLES + common.PII_TABLES: table_id = bq_utils.get_table_id(test_util.FAKE_HPO_ID, table) table_info = bq_utils.get_table_info(table_id) fields = resources.fields_for(table) field_names = [field['name'] for field in fields] if 'person_id' in field_names: self.table_has_clustering(table_info)
def test_validate_five_persons_success(self, mock_check_cron): expected_results = [] test_file_names = [ os.path.basename(f) for f in test_util.FIVE_PERSONS_FILES ] for cdm_file in common.SUBMISSION_FILES: if cdm_file in test_file_names: expected_result = (cdm_file, 1, 1, 1) test_file = os.path.join(test_util.FIVE_PERSONS_PATH, cdm_file) test_util.write_cloud_file(self.hpo_bucket, test_file, prefix=self.folder_prefix) else: expected_result = (cdm_file, 0, 0, 0) expected_results.append(expected_result) bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items, self.folder_prefix) self.assertSetEqual(set(r['results']), set(expected_results)) # check tables exist and are clustered as expected for table in resources.CDM_TABLES + common.PII_TABLES: fields_file = os.path.join(resources.fields_path, table + '.json') table_id = bq_utils.get_table_id(test_util.FAKE_HPO_ID, table) table_info = bq_utils.get_table_info(table_id) with open(fields_file, 'r') as fp: fields = json.load(fp) field_names = [field['name'] for field in fields] if 'person_id' in field_names: self.table_has_clustering(table_info)
def test_all_files_unparseable_output(self): # TODO possible bug: if no pre-existing table, results in bq table not found error for cdm_table in common.SUBMISSION_FILES: test_util.write_cloud_str(self.hpo_bucket, self.folder_prefix + cdm_table, ".\n .") bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) expected_results = [(f, 1, 0, 0) for f in common.SUBMISSION_FILES] r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items, self.folder_prefix) self.assertSetEqual(set(expected_results), set(r['results']))
def test_all_files_unparseable_output(self): # TODO possible bug: if no pre-existing table, results in bq table not found error for cdm_table in common.SUBMISSION_FILES: cdm_blob = self.storage_bucket.blob( f'{self.folder_prefix}{cdm_table}') cdm_blob.upload_from_string('.\n .') bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) expected_results = [(f, 1, 0, 0) for f in common.SUBMISSION_FILES] r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items, self.folder_prefix) self.assertSetEqual(set(expected_results), set(r['results']))
def test_bad_file_names(self): bad_file_names = [ "avisit_occurrence.csv", "condition_occurence.csv", # misspelled "person_final.csv", "procedure_occurrence.tsv" ] # unsupported file extension expected_warnings = [] for file_name in bad_file_names: test_util.write_cloud_str(self.hpo_bucket, self.folder_prefix + file_name, ".") expected_item = (file_name, common.UNKNOWN_FILE) expected_warnings.append(expected_item) bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items, self.folder_prefix) self.assertCountEqual(expected_warnings, r['warnings'])
def test_bad_file_names(self): bad_file_names: list = [ "avisit_occurrence.csv", "condition_occurence.csv", # misspelled "person_final.csv", "procedure_occurrence.tsv" ] # unsupported file extension expected_warnings: list = [] for file_name in bad_file_names: bad_blob = self.storage_bucket.blob( f'{self.folder_prefix}{file_name}') bad_blob.upload_from_string('.') expected_item: tuple = (file_name, common.UNKNOWN_FILE) expected_warnings.append(expected_item) bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items, self.folder_prefix) self.assertCountEqual(expected_warnings, r['warnings'])
def test_html_report_five_person(self, mock_check_cron, mock_first_run, mock_rdr_date, mock_required_files_loaded): mock_required_files_loaded.return_value = False mock_first_run.return_value = False rdr_date = '2020-01-01' mock_rdr_date.return_value = rdr_date for cdm_file in test_util.FIVE_PERSONS_FILES: test_util.write_cloud_file(self.hpo_bucket, cdm_file, prefix=self.folder_prefix) # load person table in RDR bq_utils.load_table_from_csv(self.project_id, self.rdr_dataset_id, common.PERSON, test_util.FIVE_PERSONS_PERSON_CSV) # Load measurement_concept_sets required_labs.load_measurement_concept_sets_table( project_id=self.project_id, dataset_id=self.bigquery_dataset_id) # Load measurement_concept_sets_descendants required_labs.load_measurement_concept_sets_descendants_table( project_id=self.project_id, dataset_id=self.bigquery_dataset_id) main.app.testing = True with main.app.test_client() as c: c.get(test_util.VALIDATE_HPO_FILES_URL) actual_result = test_util.read_cloud_file( self.hpo_bucket, self.folder_prefix + common.RESULTS_HTML) # ensure emails are not sent bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) self.assertFalse(main.is_first_validation_run(folder_items)) # parse html soup = bs(actual_result, parser="lxml", features="lxml") missing_pii_html_table = soup.find('table', id='missing_pii') table_headers = missing_pii_html_table.find_all('th') self.assertEqual('Missing Participant Record Type', table_headers[0].get_text()) self.assertEqual('Count', table_headers[1].get_text()) table_rows = missing_pii_html_table.find_next('tbody').find_all('tr') missing_record_types = [ table_row.find('td').text for table_row in table_rows ] self.assertIn(main_consts.EHR_NO_PII, missing_record_types) self.assertIn(main_consts.PII_NO_EHR, missing_record_types) self.assertIn(main_consts.EHR_NO_RDR.format(date=rdr_date), missing_record_types) self.assertIn(main_consts.EHR_NO_PARTICIPANT_MATCH, missing_record_types) required_lab_html_table = soup.find('table', id='required-lab') table_headers = required_lab_html_table.find_all('th') self.assertEqual(3, len(table_headers)) self.assertEqual('Ancestor Concept ID', table_headers[0].get_text()) self.assertEqual('Ancestor Concept Name', table_headers[1].get_text()) self.assertEqual('Found', table_headers[2].get_text()) table_rows = required_lab_html_table.find_next('tbody').find_all('tr') table_rows_last_column = [ table_row.find_all('td')[-1] for table_row in table_rows ] submitted_labs = [ row for row in table_rows_last_column if 'result-1' in row.attrs['class'] ] missing_labs = [ row for row in table_rows_last_column if 'result-0' in row.attrs['class'] ] self.assertTrue(len(table_rows) > 0) self.assertTrue(len(submitted_labs) > 0) self.assertTrue(len(missing_labs) > 0)
def test_html_report_five_person(self, mock_check_cron, mock_first_run, mock_required_files_loaded, mock_has_all_required_files, mock_updated_datetime_object): mock_required_files_loaded.return_value = False mock_first_run.return_value = False mock_has_all_required_files.return_value = True mock_updated_datetime_object.return_value = datetime.datetime.today( ) - datetime.timedelta(minutes=7) for cdm_file in test_util.FIVE_PERSONS_FILES: blob_name = f'{self.folder_prefix}{os.path.basename(cdm_file)}' test_blob = self.storage_bucket.blob(blob_name) test_blob.upload_from_filename(cdm_file) # load person table in RDR bq_utils.load_table_from_csv(self.project_id, self.rdr_dataset_id, common.PERSON, test_util.FIVE_PERSONS_PERSON_CSV) # Load measurement_concept_sets required_labs.load_measurement_concept_sets_table( project_id=self.project_id, dataset_id=self.bigquery_dataset_id) # Load measurement_concept_sets_descendants required_labs.load_measurement_concept_sets_descendants_table( project_id=self.project_id, dataset_id=self.bigquery_dataset_id) main.app.testing = True with main.app.test_client() as c: c.get(test_util.VALIDATE_HPO_FILES_URL) actual_result = test_util.read_cloud_file( self.hpo_bucket, self.folder_prefix + common.RESULTS_HTML) # ensure emails are not sent bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) self.assertFalse(main.is_first_validation_run(folder_items)) # parse html soup = bs(actual_result, parser="lxml", features="lxml") missing_pii_html_table = soup.find('table', id='missing_pii') table_headers = missing_pii_html_table.find_all('th') self.assertEqual('Missing Participant Record Type', table_headers[0].get_text()) self.assertEqual('Count', table_headers[1].get_text()) table_rows = missing_pii_html_table.find_next('tbody').find_all('tr') missing_record_types = [ table_row.find('td').text for table_row in table_rows ] self.assertIn(main_consts.EHR_NO_PII, missing_record_types) self.assertIn(main_consts.PII_NO_EHR, missing_record_types) # the missing from RDR component is obsolete (see DC-1932) # this is to confirm it was removed successfully from the report rdr_date = '2020-01-01' self.assertNotIn(main_consts.EHR_NO_RDR.format(date=rdr_date), missing_record_types) self.assertIn(main_consts.EHR_NO_PARTICIPANT_MATCH, missing_record_types) required_lab_html_table = soup.find('table', id='required-lab') table_headers = required_lab_html_table.find_all('th') self.assertEqual(3, len(table_headers)) self.assertEqual('Ancestor Concept ID', table_headers[0].get_text()) self.assertEqual('Ancestor Concept Name', table_headers[1].get_text()) self.assertEqual('Found', table_headers[2].get_text()) table_rows = required_lab_html_table.find_next('tbody').find_all('tr') table_rows_last_column = [ table_row.find_all('td')[-1] for table_row in table_rows ] submitted_labs = [ row for row in table_rows_last_column if 'result-1' in row.attrs['class'] ] missing_labs = [ row for row in table_rows_last_column if 'result-0' in row.attrs['class'] ] self.assertTrue(len(table_rows) > 0) self.assertTrue(len(submitted_labs) > 0) self.assertTrue(len(missing_labs) > 0)