def test_validate_five_persons_success(self, mock_check_cron): expected_results = [] test_file_names = [ os.path.basename(f) for f in test_util.FIVE_PERSONS_FILES ] for cdm_file in common.SUBMISSION_FILES: if cdm_file in test_file_names: expected_result = (cdm_file, 1, 1, 1) test_file = os.path.join(test_util.FIVE_PERSONS_PATH, cdm_file) test_util.write_cloud_file(self.hpo_bucket, test_file, prefix=self.folder_prefix) else: expected_result = (cdm_file, 0, 0, 0) expected_results.append(expected_result) bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items, self.folder_prefix) self.assertSetEqual(set(r['results']), set(expected_results)) # check tables exist and are clustered as expected for table in resources.CDM_TABLES + common.PII_TABLES: fields_file = os.path.join(resources.fields_path, table + '.json') table_id = bq_utils.get_table_id(test_util.FAKE_HPO_ID, table) table_info = bq_utils.get_table_info(table_id) with open(fields_file, 'r') as fp: fields = json.load(fp) field_names = [field['name'] for field in fields] if 'person_id' in field_names: self.table_has_clustering(table_info)
def test_pii_files_loaded(self, mock_check_cron): # tests if pii files are loaded test_file_paths = [ test_util.PII_NAME_FILE, test_util.PII_MRN_BAD_PERSON_ID_FILE ] test_file_names = [os.path.basename(f) for f in test_file_paths] test_util.write_cloud_file(self.hpo_bucket, test_util.PII_NAME_FILE, prefix=self.folder_prefix) test_util.write_cloud_file(self.hpo_bucket, test_util.PII_MRN_BAD_PERSON_ID_FILE, prefix=self.folder_prefix) rs = resources.csv_to_list(test_util.PII_FILE_LOAD_RESULT_CSV) expected_results = [(r['file_name'], int(r['found']), int(r['parsed']), int(r['loaded'])) for r in rs] for f in common.SUBMISSION_FILES: if f not in test_file_names: expected_result = (f, 0, 0, 0) expected_results.append(expected_result) bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items, self.folder_prefix) self.assertSetEqual(set(expected_results), set(r['results']))
def test_validate_five_persons_success(self, mock_check_cron): expected_results: list = [] test_file_names: list = [ os.path.basename(f) for f in test_util.FIVE_PERSONS_FILES ] for cdm_filename in common.SUBMISSION_FILES: if cdm_filename in test_file_names: expected_result: tuple = (cdm_filename, 1, 1, 1) test_filepath: str = os.path.join(test_util.FIVE_PERSONS_PATH, cdm_filename) test_blob = self.storage_bucket.blob( f'{self.folder_prefix}{cdm_filename}') test_blob.upload_from_filename(test_filepath) else: expected_result: tuple = (cdm_filename, 0, 0, 0) expected_results.append(expected_result) bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items, self.folder_prefix) self.assertSetEqual(set(r['results']), set(expected_results)) # check tables exist and are clustered as expected for table in resources.CDM_TABLES + common.PII_TABLES: table_id = bq_utils.get_table_id(test_util.FAKE_HPO_ID, table) table_info = bq_utils.get_table_info(table_id) fields = resources.fields_for(table) field_names = [field['name'] for field in fields] if 'person_id' in field_names: self.table_has_clustering(table_info)
def test_curation_report_ignored(self, mock_check_cron): exclude_file_list = ["person.csv"] exclude_file_list = [ self.folder_prefix + item for item in exclude_file_list ] expected_result_items = [] for file_name in exclude_file_list: test_util.write_cloud_str(self.hpo_bucket, file_name, ".") main.app.testing = True with main.app.test_client() as c: c.get(test_util.VALIDATE_HPO_FILES_URL) # check content of the bucket is correct expected_bucket_items = exclude_file_list + [ self.folder_prefix + item for item in resources.IGNORE_LIST ] list_bucket_result = gcs_utils.list_bucket(self.hpo_bucket) actual_bucket_items = [item['name'] for item in list_bucket_result] actual_bucket_items = [ item for item in actual_bucket_items if not main._is_string_excluded_file(item[len(self.folder_prefix):]) ] self.assertSetEqual(set(expected_bucket_items), set(actual_bucket_items)) # check that the errors file is empty bucket_items = gcs_utils.list_bucket(self.hpo_bucket) r = main.validate_submission(self.hpo_id, self.hpo_bucket, bucket_items, self.folder_prefix) self.assertListEqual(expected_result_items, r['errors'])
def test_all_files_unparseable_output(self): # TODO possible bug: if no pre-existing table, results in bq table not found error for cdm_table in common.SUBMISSION_FILES: test_util.write_cloud_str(self.hpo_bucket, self.folder_prefix + cdm_table, ".\n .") bucket_items = gcs_utils.list_bucket(self.hpo_bucket) expected_results = [(f, 1, 0, 0) for f in common.SUBMISSION_FILES] r = main.validate_submission(self.hpo_id, self.hpo_bucket, bucket_items, self.folder_prefix) self.assertSetEqual(set(expected_results), set(r['results']))
def test_validate_submission(self, mock_check_cron, mock_perform_validation_on_file, mock_create_standard_table): """ Checks the return value of validate_submission :param mock_check_cron: :param mock_perform_validation_on_file: :param mock_create_standard_table: :return: """ folder_prefix = '2019-01-01/' bucket_items = [{ 'name': folder_prefix + 'person.csv' }, { 'name': folder_prefix + 'invalid_file.csv' }] perform_validation_on_file_returns = dict() expected_results = [] expected_errors = [] expected_warnings = [('invalid_file.csv', 'Unknown file')] for file_name in sorted(resources.CDM_FILES) + sorted( common.PII_FILES): result = [] errors = [] found = 0 parsed = 0 loaded = 0 if file_name == 'person.csv': found = 1 parsed = 1 loaded = 1 elif file_name == 'visit_occurrence.csv': found = 1 error = (file_name, 'Fake parsing error') errors.append(error) result.append((file_name, found, parsed, loaded)) perform_validation_on_file_returns[file_name] = result, errors expected_results += result expected_errors += errors def perform_validation_on_file(cdm_file_name, found_cdm_files, hpo_id, folder_prefix, bucket): return perform_validation_on_file_returns.get(cdm_file_name) mock_perform_validation_on_file.side_effect = perform_validation_on_file actual_result = main.validate_submission(self.hpo_id, self.hpo_bucket, bucket_items, folder_prefix) self.assertListEqual(expected_results, actual_result.get('results')) self.assertListEqual(expected_errors, actual_result.get('errors')) self.assertListEqual(expected_warnings, actual_result.get('warnings'))
def test_all_files_unparseable_output(self): # TODO possible bug: if no pre-existing table, results in bq table not found error for cdm_table in common.SUBMISSION_FILES: cdm_blob = self.storage_bucket.blob( f'{self.folder_prefix}{cdm_table}') cdm_blob.upload_from_string('.\n .') bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) expected_results = [(f, 1, 0, 0) for f in common.SUBMISSION_FILES] r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items, self.folder_prefix) self.assertSetEqual(set(expected_results), set(r['results']))
def test_bad_file_names(self): bad_file_names = ["avisit_occurrence.csv", "condition_occurence.csv", # misspelled "person_final.csv", "procedure_occurrence.tsv"] # unsupported file extension expected_warnings = [] for file_name in bad_file_names: test_util.write_cloud_str(self.hpo_bucket, self.folder_prefix + file_name, ".") expected_item = (file_name, common.UNKNOWN_FILE) expected_warnings.append(expected_item) bucket_items = gcs_utils.list_bucket(self.hpo_bucket) r = main.validate_submission(self.hpo_id, self.hpo_bucket, bucket_items, self.folder_prefix) self.assertListEqual(expected_warnings, r['warnings'])
def test_bad_file_names(self): bad_file_names: list = [ "avisit_occurrence.csv", "condition_occurence.csv", # misspelled "person_final.csv", "procedure_occurrence.tsv" ] # unsupported file extension expected_warnings: list = [] for file_name in bad_file_names: bad_blob = self.storage_bucket.blob( f'{self.folder_prefix}{file_name}') bad_blob.upload_from_string('.') expected_item: tuple = (file_name, common.UNKNOWN_FILE) expected_warnings.append(expected_item) bucket_items = gcs_utils.list_bucket(self.hpo_bucket) folder_items = main.get_folder_items(bucket_items, self.folder_prefix) r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items, self.folder_prefix) self.assertCountEqual(expected_warnings, r['warnings'])