Exemple #1
0
    def test_validate_five_persons_success(self, mock_check_cron):
        expected_results = []
        test_file_names = [
            os.path.basename(f) for f in test_util.FIVE_PERSONS_FILES
        ]

        for cdm_file in common.SUBMISSION_FILES:
            if cdm_file in test_file_names:
                expected_result = (cdm_file, 1, 1, 1)
                test_file = os.path.join(test_util.FIVE_PERSONS_PATH, cdm_file)
                test_util.write_cloud_file(self.hpo_bucket,
                                           test_file,
                                           prefix=self.folder_prefix)
            else:
                expected_result = (cdm_file, 0, 0, 0)
            expected_results.append(expected_result)
        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        folder_items = main.get_folder_items(bucket_items, self.folder_prefix)
        r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items,
                                     self.folder_prefix)
        self.assertSetEqual(set(r['results']), set(expected_results))

        # check tables exist and are clustered as expected
        for table in resources.CDM_TABLES + common.PII_TABLES:
            fields_file = os.path.join(resources.fields_path, table + '.json')
            table_id = bq_utils.get_table_id(test_util.FAKE_HPO_ID, table)
            table_info = bq_utils.get_table_info(table_id)
            with open(fields_file, 'r') as fp:
                fields = json.load(fp)
                field_names = [field['name'] for field in fields]
                if 'person_id' in field_names:
                    self.table_has_clustering(table_info)
Exemple #2
0
    def test_pii_files_loaded(self, mock_check_cron):
        # tests if pii files are loaded
        test_file_paths = [
            test_util.PII_NAME_FILE, test_util.PII_MRN_BAD_PERSON_ID_FILE
        ]
        test_file_names = [os.path.basename(f) for f in test_file_paths]
        test_util.write_cloud_file(self.hpo_bucket,
                                   test_util.PII_NAME_FILE,
                                   prefix=self.folder_prefix)
        test_util.write_cloud_file(self.hpo_bucket,
                                   test_util.PII_MRN_BAD_PERSON_ID_FILE,
                                   prefix=self.folder_prefix)

        rs = resources.csv_to_list(test_util.PII_FILE_LOAD_RESULT_CSV)
        expected_results = [(r['file_name'], int(r['found']), int(r['parsed']),
                             int(r['loaded'])) for r in rs]
        for f in common.SUBMISSION_FILES:
            if f not in test_file_names:
                expected_result = (f, 0, 0, 0)
                expected_results.append(expected_result)

        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        folder_items = main.get_folder_items(bucket_items, self.folder_prefix)
        r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items,
                                     self.folder_prefix)
        self.assertSetEqual(set(expected_results), set(r['results']))
Exemple #3
0
    def test_validate_five_persons_success(self, mock_check_cron):
        expected_results: list = []
        test_file_names: list = [
            os.path.basename(f) for f in test_util.FIVE_PERSONS_FILES
        ]

        for cdm_filename in common.SUBMISSION_FILES:
            if cdm_filename in test_file_names:
                expected_result: tuple = (cdm_filename, 1, 1, 1)
                test_filepath: str = os.path.join(test_util.FIVE_PERSONS_PATH,
                                                  cdm_filename)
                test_blob = self.storage_bucket.blob(
                    f'{self.folder_prefix}{cdm_filename}')
                test_blob.upload_from_filename(test_filepath)

            else:
                expected_result: tuple = (cdm_filename, 0, 0, 0)
            expected_results.append(expected_result)
        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        folder_items = main.get_folder_items(bucket_items, self.folder_prefix)
        r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items,
                                     self.folder_prefix)
        self.assertSetEqual(set(r['results']), set(expected_results))

        # check tables exist and are clustered as expected
        for table in resources.CDM_TABLES + common.PII_TABLES:
            table_id = bq_utils.get_table_id(test_util.FAKE_HPO_ID, table)
            table_info = bq_utils.get_table_info(table_id)
            fields = resources.fields_for(table)
            field_names = [field['name'] for field in fields]
            if 'person_id' in field_names:
                self.table_has_clustering(table_info)
    def test_curation_report_ignored(self, mock_check_cron):
        exclude_file_list = ["person.csv"]
        exclude_file_list = [
            self.folder_prefix + item for item in exclude_file_list
        ]
        expected_result_items = []
        for file_name in exclude_file_list:
            test_util.write_cloud_str(self.hpo_bucket, file_name, ".")

        main.app.testing = True
        with main.app.test_client() as c:
            c.get(test_util.VALIDATE_HPO_FILES_URL)

        # check content of the bucket is correct
        expected_bucket_items = exclude_file_list + [
            self.folder_prefix + item for item in resources.IGNORE_LIST
        ]
        list_bucket_result = gcs_utils.list_bucket(self.hpo_bucket)
        actual_bucket_items = [item['name'] for item in list_bucket_result]
        actual_bucket_items = [
            item for item in actual_bucket_items if
            not main._is_string_excluded_file(item[len(self.folder_prefix):])
        ]
        self.assertSetEqual(set(expected_bucket_items),
                            set(actual_bucket_items))

        # check that the errors file is empty
        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        r = main.validate_submission(self.hpo_id, self.hpo_bucket,
                                     bucket_items, self.folder_prefix)
        self.assertListEqual(expected_result_items, r['errors'])
 def test_all_files_unparseable_output(self):
     # TODO possible bug: if no pre-existing table, results in bq table not found error
     for cdm_table in common.SUBMISSION_FILES:
         test_util.write_cloud_str(self.hpo_bucket, self.folder_prefix + cdm_table, ".\n .")
     bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
     expected_results = [(f, 1, 0, 0) for f in common.SUBMISSION_FILES]
     r = main.validate_submission(self.hpo_id, self.hpo_bucket, bucket_items, self.folder_prefix)
     self.assertSetEqual(set(expected_results), set(r['results']))
    def test_validate_submission(self, mock_check_cron,
                                 mock_perform_validation_on_file,
                                 mock_create_standard_table):
        """
        Checks the return value of validate_submission

        :param mock_check_cron:
        :param mock_perform_validation_on_file:
        :param mock_create_standard_table:
        :return:
        """
        folder_prefix = '2019-01-01/'
        bucket_items = [{
            'name': folder_prefix + 'person.csv'
        }, {
            'name': folder_prefix + 'invalid_file.csv'
        }]

        perform_validation_on_file_returns = dict()
        expected_results = []
        expected_errors = []
        expected_warnings = [('invalid_file.csv', 'Unknown file')]
        for file_name in sorted(resources.CDM_FILES) + sorted(
                common.PII_FILES):
            result = []
            errors = []
            found = 0
            parsed = 0
            loaded = 0
            if file_name == 'person.csv':
                found = 1
                parsed = 1
                loaded = 1
            elif file_name == 'visit_occurrence.csv':
                found = 1
                error = (file_name, 'Fake parsing error')
                errors.append(error)
            result.append((file_name, found, parsed, loaded))
            perform_validation_on_file_returns[file_name] = result, errors
            expected_results += result
            expected_errors += errors

        def perform_validation_on_file(cdm_file_name, found_cdm_files, hpo_id,
                                       folder_prefix, bucket):
            return perform_validation_on_file_returns.get(cdm_file_name)

        mock_perform_validation_on_file.side_effect = perform_validation_on_file

        actual_result = main.validate_submission(self.hpo_id, self.hpo_bucket,
                                                 bucket_items, folder_prefix)
        self.assertListEqual(expected_results, actual_result.get('results'))
        self.assertListEqual(expected_errors, actual_result.get('errors'))
        self.assertListEqual(expected_warnings, actual_result.get('warnings'))
Exemple #7
0
    def test_all_files_unparseable_output(self):
        # TODO possible bug: if no pre-existing table, results in bq table not found error
        for cdm_table in common.SUBMISSION_FILES:
            cdm_blob = self.storage_bucket.blob(
                f'{self.folder_prefix}{cdm_table}')
            cdm_blob.upload_from_string('.\n .')

        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        folder_items = main.get_folder_items(bucket_items, self.folder_prefix)
        expected_results = [(f, 1, 0, 0) for f in common.SUBMISSION_FILES]
        r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items,
                                     self.folder_prefix)
        self.assertSetEqual(set(expected_results), set(r['results']))
 def test_bad_file_names(self):
     bad_file_names = ["avisit_occurrence.csv",
                       "condition_occurence.csv",  # misspelled
                       "person_final.csv",
                       "procedure_occurrence.tsv"]  # unsupported file extension
     expected_warnings = []
     for file_name in bad_file_names:
         test_util.write_cloud_str(self.hpo_bucket, self.folder_prefix + file_name, ".")
         expected_item = (file_name, common.UNKNOWN_FILE)
         expected_warnings.append(expected_item)
     bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
     r = main.validate_submission(self.hpo_id, self.hpo_bucket, bucket_items, self.folder_prefix)
     self.assertListEqual(expected_warnings, r['warnings'])
Exemple #9
0
    def test_bad_file_names(self):
        bad_file_names: list = [
            "avisit_occurrence.csv",
            "condition_occurence.csv",  # misspelled
            "person_final.csv",
            "procedure_occurrence.tsv"
        ]  # unsupported file extension
        expected_warnings: list = []
        for file_name in bad_file_names:
            bad_blob = self.storage_bucket.blob(
                f'{self.folder_prefix}{file_name}')
            bad_blob.upload_from_string('.')

            expected_item: tuple = (file_name, common.UNKNOWN_FILE)
            expected_warnings.append(expected_item)
        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        folder_items = main.get_folder_items(bucket_items, self.folder_prefix)
        r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items,
                                     self.folder_prefix)
        self.assertCountEqual(expected_warnings, r['warnings'])