def _test_html_report_five_person(self, mock_check_cron): # Not sure this test is still relevant (see hpo_report module and tests) # TODO refactor or remove this test folder_prefix = '2019-01-01/' for cdm_file in test_util.FIVE_PERSONS_FILES: test_util.write_cloud_file(self.hpo_bucket, cdm_file, prefix=folder_prefix) # achilles sometimes fails due to rate limits. # using both success and failure cases allow it to fail gracefully until there is a fix for achilles with open(test_util.FIVE_PERSON_RESULTS_FILE, 'r') as f: expected_result_achilles_success = self._remove_timestamp_tags_from_results( f.read()) with open(test_util.FIVE_PERSON_RESULTS_ACHILLES_ERROR_FILE, 'r') as f: expected_result_achilles_failure = self._remove_timestamp_tags_from_results( f.read()) expected_results = [ expected_result_achilles_success, expected_result_achilles_failure ] main.app.testing = True with main.app.test_client() as c: c.get(test_util.VALIDATE_HPO_FILES_URL) actual_result = test_util.read_cloud_file( self.hpo_bucket, folder_prefix + common.RESULTS_HTML) actual_result_file = self._remove_timestamp_tags_from_results( StringIO.StringIO(actual_result).getvalue()) self.assertIn(actual_result_file, expected_results)
def test_pii_files_loaded(self, mock_check_cron): # tests if pii files are loaded test_file_paths = [ test_util.PII_NAME_FILE, test_util.PII_MRN_BAD_PERSON_ID_FILE ] test_file_names = [os.path.basename(f) for f in test_file_paths] test_util.write_cloud_file(self.hpo_bucket, test_util.PII_NAME_FILE, prefix=self.folder_prefix) test_util.write_cloud_file(self.hpo_bucket, test_util.PII_MRN_BAD_PERSON_ID_FILE, prefix=self.folder_prefix) rs = resources._csv_to_list(test_util.PII_FILE_LOAD_RESULT_CSV) expected_results = [(r['file_name'], int(r['found']), int(r['parsed']), int(r['loaded'])) for r in rs] for f in common.SUBMISSION_FILES: if f not in test_file_names: expected_result = (f, 0, 0, 0) expected_results.append(expected_result) bucket_items = gcs_utils.list_bucket(self.hpo_bucket) r = main.validate_submission(self.hpo_id, self.hpo_bucket, bucket_items, self.folder_prefix) self.assertSetEqual(set(expected_results), set(r['results']))
def test_validate_five_persons_success(self, mock_check_cron): expected_results = [] test_file_names = [ os.path.basename(f) for f in test_util.FIVE_PERSONS_FILES ] for cdm_file in common.SUBMISSION_FILES: if cdm_file in test_file_names: expected_result = (cdm_file, 1, 1, 1) test_file = os.path.join(test_util.FIVE_PERSONS_PATH, cdm_file) test_util.write_cloud_file(self.hpo_bucket, test_file, prefix=self.folder_prefix) else: expected_result = (cdm_file, 0, 0, 0) expected_results.append(expected_result) bucket_items = gcs_utils.list_bucket(self.hpo_bucket) r = main.validate_submission(self.hpo_id, self.hpo_bucket, bucket_items, self.folder_prefix) self.assertSetEqual(set(r['results']), set(expected_results)) # check tables exist and are clustered as expected for table in resources.CDM_TABLES + common.PII_TABLES: fields_file = os.path.join(resources.fields_path, table + '.json') table_id = bq_utils.get_table_id(test_util.FAKE_HPO_ID, table) table_info = bq_utils.get_table_info(table_id) with open(fields_file, 'r') as fp: fields = json.load(fp) field_names = [field['name'] for field in fields] if 'person_id' in field_names: self.table_has_clustering(table_info)
def test_copy_five_persons(self, mock_check_cron): # upload all five_persons files for cdm_file in test_util.FIVE_PERSONS_FILES: test_util.write_cloud_file(self.hpo_bucket, cdm_file, prefix=self.folder_prefix) test_util.write_cloud_file(self.hpo_bucket, cdm_file, prefix=self.folder_prefix + self.folder_prefix) main.app.testing = True with main.app.test_client() as c: c.get(test_util.COPY_HPO_FILES_URL) prefix = test_util.FAKE_HPO_ID + '/' + self.hpo_bucket + '/' + self.folder_prefix expected_bucket_items = [ prefix + item.split(os.sep)[-1] for item in test_util.FIVE_PERSONS_FILES ] expected_bucket_items.extend([ prefix + self.folder_prefix + item.split(os.sep)[-1] for item in test_util.FIVE_PERSONS_FILES ]) list_bucket_result = gcs_utils.list_bucket( gcs_utils.get_drc_bucket()) actual_bucket_items = [item['name'] for item in list_bucket_result] self.assertSetEqual(set(expected_bucket_items), set(actual_bucket_items))
def test_integration_five_person_data_retraction_skip(self): self.folder_prefix_1 = self.hpo_id + '/' + self.site_bucket + '/' + self.folder_1 self.folder_prefix_2 = self.hpo_id + '/' + self.site_bucket + '/' + self.folder_2 lines_to_remove = {} total_lines_prior = {} for file_path in test_util.FIVE_PERSONS_FILES: # generate results files file_name = file_path.split('/')[-1] table_name = file_name.split('.')[0] lines_to_remove[file_name] = 0 total_lines_prior[file_name] = 0 with open(file_path) as f: for line in f: line = line.strip() if line != '': if (table_name in rd.PID_IN_COL1 and rd.get_integer(line.split(",")[0]) in self.skip_pids) or \ (table_name in rd.PID_IN_COL2 and rd.get_integer(line.split(",")[1]) in self.skip_pids): lines_to_remove[file_name] += 1 total_lines_prior[file_name] += 1 # write file to cloud for testing test_util.write_cloud_file(self.bucket, file_path, prefix=self.folder_prefix_1) test_util.write_cloud_file(self.bucket, file_path, prefix=self.folder_prefix_2) retract_result = rd.run_retraction(self.skip_pids, self.bucket, self.hpo_id, self.site_bucket, folder=None, force_flag=True) total_lines_post = {} for file_path in test_util.FIVE_PERSONS_FILES: file_name = file_path.split('/')[-1] actual_result_contents = test_util.read_cloud_file( self.bucket, self.folder_prefix_1 + file_name) # convert to list and remove last list item since it is a newline total_lines_post[file_name] = len( actual_result_contents.split('\n')[:-1]) for key in total_lines_prior.keys(): if key in lines_to_remove: self.assertEqual( lines_to_remove[key], total_lines_prior[key] - total_lines_post[key]) else: self.assertEqual(total_lines_prior[key], total_lines_post[key]) # metadata for each updated file is returned for key, val in lines_to_remove.items(): if val == 0: del lines_to_remove[key] self.assertEqual(len(retract_result[self.folder_prefix_1]), len(lines_to_remove.keys()))