def _test_html_report_five_person(self, mock_check_cron):
     # Not sure this test is still relevant (see hpo_report module and tests)
     # TODO refactor or remove this test
     folder_prefix = '2019-01-01/'
     for cdm_file in test_util.FIVE_PERSONS_FILES:
         test_util.write_cloud_file(self.hpo_bucket,
                                    cdm_file,
                                    prefix=folder_prefix)
     # achilles sometimes fails due to rate limits.
     # using both success and failure cases allow it to fail gracefully until there is a fix for achilles
     with open(test_util.FIVE_PERSON_RESULTS_FILE, 'r') as f:
         expected_result_achilles_success = self._remove_timestamp_tags_from_results(
             f.read())
     with open(test_util.FIVE_PERSON_RESULTS_ACHILLES_ERROR_FILE, 'r') as f:
         expected_result_achilles_failure = self._remove_timestamp_tags_from_results(
             f.read())
     expected_results = [
         expected_result_achilles_success, expected_result_achilles_failure
     ]
     main.app.testing = True
     with main.app.test_client() as c:
         c.get(test_util.VALIDATE_HPO_FILES_URL)
         actual_result = test_util.read_cloud_file(
             self.hpo_bucket, folder_prefix + common.RESULTS_HTML)
         actual_result_file = self._remove_timestamp_tags_from_results(
             StringIO.StringIO(actual_result).getvalue())
         self.assertIn(actual_result_file, expected_results)
    def test_pii_files_loaded(self, mock_check_cron):
        # tests if pii files are loaded
        test_file_paths = [
            test_util.PII_NAME_FILE, test_util.PII_MRN_BAD_PERSON_ID_FILE
        ]
        test_file_names = [os.path.basename(f) for f in test_file_paths]
        test_util.write_cloud_file(self.hpo_bucket,
                                   test_util.PII_NAME_FILE,
                                   prefix=self.folder_prefix)
        test_util.write_cloud_file(self.hpo_bucket,
                                   test_util.PII_MRN_BAD_PERSON_ID_FILE,
                                   prefix=self.folder_prefix)

        rs = resources._csv_to_list(test_util.PII_FILE_LOAD_RESULT_CSV)
        expected_results = [(r['file_name'], int(r['found']), int(r['parsed']),
                             int(r['loaded'])) for r in rs]
        for f in common.SUBMISSION_FILES:
            if f not in test_file_names:
                expected_result = (f, 0, 0, 0)
                expected_results.append(expected_result)

        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        r = main.validate_submission(self.hpo_id, self.hpo_bucket,
                                     bucket_items, self.folder_prefix)
        self.assertSetEqual(set(expected_results), set(r['results']))
    def test_validate_five_persons_success(self, mock_check_cron):
        expected_results = []
        test_file_names = [
            os.path.basename(f) for f in test_util.FIVE_PERSONS_FILES
        ]

        for cdm_file in common.SUBMISSION_FILES:
            if cdm_file in test_file_names:
                expected_result = (cdm_file, 1, 1, 1)
                test_file = os.path.join(test_util.FIVE_PERSONS_PATH, cdm_file)
                test_util.write_cloud_file(self.hpo_bucket,
                                           test_file,
                                           prefix=self.folder_prefix)
            else:
                expected_result = (cdm_file, 0, 0, 0)
            expected_results.append(expected_result)
        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        r = main.validate_submission(self.hpo_id, self.hpo_bucket,
                                     bucket_items, self.folder_prefix)
        self.assertSetEqual(set(r['results']), set(expected_results))

        # check tables exist and are clustered as expected
        for table in resources.CDM_TABLES + common.PII_TABLES:
            fields_file = os.path.join(resources.fields_path, table + '.json')
            table_id = bq_utils.get_table_id(test_util.FAKE_HPO_ID, table)
            table_info = bq_utils.get_table_info(table_id)
            with open(fields_file, 'r') as fp:
                fields = json.load(fp)
                field_names = [field['name'] for field in fields]
                if 'person_id' in field_names:
                    self.table_has_clustering(table_info)
    def test_copy_five_persons(self, mock_check_cron):
        # upload all five_persons files
        for cdm_file in test_util.FIVE_PERSONS_FILES:
            test_util.write_cloud_file(self.hpo_bucket,
                                       cdm_file,
                                       prefix=self.folder_prefix)
            test_util.write_cloud_file(self.hpo_bucket,
                                       cdm_file,
                                       prefix=self.folder_prefix +
                                       self.folder_prefix)

        main.app.testing = True
        with main.app.test_client() as c:
            c.get(test_util.COPY_HPO_FILES_URL)
            prefix = test_util.FAKE_HPO_ID + '/' + self.hpo_bucket + '/' + self.folder_prefix
            expected_bucket_items = [
                prefix + item.split(os.sep)[-1]
                for item in test_util.FIVE_PERSONS_FILES
            ]
            expected_bucket_items.extend([
                prefix + self.folder_prefix + item.split(os.sep)[-1]
                for item in test_util.FIVE_PERSONS_FILES
            ])

            list_bucket_result = gcs_utils.list_bucket(
                gcs_utils.get_drc_bucket())
            actual_bucket_items = [item['name'] for item in list_bucket_result]
            self.assertSetEqual(set(expected_bucket_items),
                                set(actual_bucket_items))
    def test_integration_five_person_data_retraction_skip(self):
        self.folder_prefix_1 = self.hpo_id + '/' + self.site_bucket + '/' + self.folder_1
        self.folder_prefix_2 = self.hpo_id + '/' + self.site_bucket + '/' + self.folder_2
        lines_to_remove = {}
        total_lines_prior = {}
        for file_path in test_util.FIVE_PERSONS_FILES:
            # generate results files
            file_name = file_path.split('/')[-1]
            table_name = file_name.split('.')[0]
            lines_to_remove[file_name] = 0
            total_lines_prior[file_name] = 0
            with open(file_path) as f:
                for line in f:
                    line = line.strip()
                    if line != '':
                        if (table_name in rd.PID_IN_COL1 and rd.get_integer(line.split(",")[0]) in self.skip_pids) or \
                                (table_name in rd.PID_IN_COL2 and rd.get_integer(line.split(",")[1]) in self.skip_pids):
                            lines_to_remove[file_name] += 1
                        total_lines_prior[file_name] += 1

            # write file to cloud for testing
            test_util.write_cloud_file(self.bucket,
                                       file_path,
                                       prefix=self.folder_prefix_1)
            test_util.write_cloud_file(self.bucket,
                                       file_path,
                                       prefix=self.folder_prefix_2)

        retract_result = rd.run_retraction(self.skip_pids,
                                           self.bucket,
                                           self.hpo_id,
                                           self.site_bucket,
                                           folder=None,
                                           force_flag=True)

        total_lines_post = {}
        for file_path in test_util.FIVE_PERSONS_FILES:
            file_name = file_path.split('/')[-1]
            actual_result_contents = test_util.read_cloud_file(
                self.bucket, self.folder_prefix_1 + file_name)
            # convert to list and remove last list item since it is a newline
            total_lines_post[file_name] = len(
                actual_result_contents.split('\n')[:-1])

        for key in total_lines_prior.keys():
            if key in lines_to_remove:
                self.assertEqual(
                    lines_to_remove[key],
                    total_lines_prior[key] - total_lines_post[key])
            else:
                self.assertEqual(total_lines_prior[key], total_lines_post[key])

        # metadata for each updated file is returned
        for key, val in lines_to_remove.items():
            if val == 0:
                del lines_to_remove[key]
        self.assertEqual(len(retract_result[self.folder_prefix_1]),
                         len(lines_to_remove.keys()))