Ejemplo n.º 1
0
    def test_curation_report_ignored(self, mock_check_cron):
        exclude_file_list = ["person.csv"]
        exclude_file_list = [
            self.folder_prefix + item for item in exclude_file_list
        ]
        expected_result_items = []
        for file_name in exclude_file_list:
            test_util.write_cloud_str(self.hpo_bucket, file_name, ".")

        main.app.testing = True
        with main.app.test_client() as c:
            c.get(test_util.VALIDATE_HPO_FILES_URL)

        # check content of the bucket is correct
        expected_bucket_items = exclude_file_list + [
            self.folder_prefix + item for item in resources.IGNORE_LIST
        ]
        list_bucket_result = gcs_utils.list_bucket(self.hpo_bucket)
        actual_bucket_items = [item['name'] for item in list_bucket_result]
        actual_bucket_items = [
            item for item in actual_bucket_items if
            not main._is_string_excluded_file(item[len(self.folder_prefix):])
        ]
        self.assertSetEqual(set(expected_bucket_items),
                            set(actual_bucket_items))

        # check that the errors file is empty
        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        r = main.validate_submission(self.hpo_id, self.hpo_bucket,
                                     bucket_items, self.folder_prefix)
        self.assertListEqual(expected_result_items, r['errors'])
Ejemplo n.º 2
0
 def test_upload_object(self):
     bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
     self.assertEqual(len(bucket_items), 0)
     with open(FIVE_PERSONS_PERSON_CSV, 'rb') as fp:
         gcs_utils.upload_object(self.hpo_bucket, 'person.csv', fp)
     bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
     self.assertEqual(len(bucket_items), 1)
     bucket_item = bucket_items[0]
     self.assertEqual(bucket_item['name'], 'person.csv')
Ejemplo n.º 3
0
def bucket_access_configured(bucket_name):
    """
    Determine if the service account has appropriate permissions on the bucket

    :param bucket_name: identifies the GCS bucket
    :return: True if the service account has appropriate permissions, False otherwise
    :raises HttpError if accessing bucket fails
    """
    try:
        gcs_utils.list_bucket(bucket_name)
        return True
    except HttpError:
        raise
Ejemplo n.º 4
0
 def tearDown(self):
     self._empty_bucket()
     to_delete_list = gcs_utils.list_bucket(gcs_utils.get_drc_bucket())
     for bucket_item in to_delete_list:
         gcs_utils.delete_object(gcs_utils.get_drc_bucket(),
                                 bucket_item['name'])
     self.testbed.deactivate()
Ejemplo n.º 5
0
    def test_pii_files_loaded(self, mock_check_cron):
        # tests if pii files are loaded
        test_file_paths = [
            test_util.PII_NAME_FILE, test_util.PII_MRN_BAD_PERSON_ID_FILE
        ]
        test_file_names = [os.path.basename(f) for f in test_file_paths]
        test_util.write_cloud_file(self.hpo_bucket,
                                   test_util.PII_NAME_FILE,
                                   prefix=self.folder_prefix)
        test_util.write_cloud_file(self.hpo_bucket,
                                   test_util.PII_MRN_BAD_PERSON_ID_FILE,
                                   prefix=self.folder_prefix)

        rs = resources.csv_to_list(test_util.PII_FILE_LOAD_RESULT_CSV)
        expected_results = [(r['file_name'], int(r['found']), int(r['parsed']),
                             int(r['loaded'])) for r in rs]
        for f in common.SUBMISSION_FILES:
            if f not in test_file_names:
                expected_result = (f, 0, 0, 0)
                expected_results.append(expected_result)

        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        folder_items = main.get_folder_items(bucket_items, self.folder_prefix)
        r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items,
                                     self.folder_prefix)
        self.assertSetEqual(set(expected_results), set(r['results']))
Ejemplo n.º 6
0
    def test_errors_csv(self, mock_check_cron):
        folder_prefix = 'dummy-prefix-2018-03-22/'
        test_util.write_cloud_str(self.hpo_bucket,
                                  folder_prefix + 'person.csv', ".\n .,.,.")

        main.app.testing = True
        with main.app.test_client() as c:
            c.get(test_util.VALIDATE_HPO_FILES_URL)

            # check the result file was put in bucket
            list_bucket_result = gcs_utils.list_bucket(self.hpo_bucket)
            bucket_item_names = [
                item['name'] for item in list_bucket_result
                if item['name'].startswith(folder_prefix)
            ]
            expected_items = ['person.csv'] + common.IGNORE_LIST
            expected_items = [folder_prefix + item for item in expected_items]
            self.assertSetEqual(set(bucket_item_names), set(expected_items))

            # check content of the file is correct
            actual_result = test_util.read_cloud_file(
                self.hpo_bucket, folder_prefix + common.ERRORS_CSV)
            actual = resources._csv_file_to_list(
                StringIO.StringIO(actual_result))
            for row in actual:
                row.pop('message', None)
            expected = [{'file_name': 'person.csv', 'type': 'error'}]
            self.assertEqual(actual, expected)
Ejemplo n.º 7
0
    def test_errors_csv(self, mock_check_cron):
        folder_prefix = 'dummy-prefix-2018-03-22/'
        test_util.write_cloud_str(self.hpo_bucket,
                                  folder_prefix + 'person.csv', ".\n .,.,.")

        main.app.testing = True
        with main.app.test_client() as c:
            c.get(test_util.VALIDATE_HPO_FILES_URL)

            # check the result file was put in bucket
            list_bucket_result = gcs_utils.list_bucket(self.hpo_bucket)
            bucket_item_names = [
                item['name'] for item in list_bucket_result
                if item['name'].startswith(folder_prefix)
            ]
            expected_items = ['person.csv'] + common.IGNORE_LIST
            expected_items = [folder_prefix + item for item in expected_items]
            self.assertSetEqual(set(bucket_item_names), set(expected_items))

            # check content of the file is correct
            actual_result = test_util.read_cloud_file(
                self.hpo_bucket, folder_prefix + common.ERRORS_CSV)
            with open(test_util.BAD_PERSON_FILE_BQ_LOAD_ERRORS_CSV, 'r') as f:
                expected = f.read()
                self.assertEqual(expected, actual_result)
Ejemplo n.º 8
0
    def test_run_export(self):
        folder_prefix = 'dummy-prefix-2018-03-24/'
        main._upload_achilles_files(test_util.FAKE_HPO_ID, folder_prefix)
        main.run_export(hpo_id=test_util.FAKE_HPO_ID,
                        folder_prefix=folder_prefix)
        bucket_objects = gcs_utils.list_bucket(self.hpo_bucket)
        actual_object_names = [obj['name'] for obj in bucket_objects]
        for report in common.ALL_REPORT_FILES:
            prefix = folder_prefix + common.ACHILLES_EXPORT_PREFIX_STRING + test_util.FAKE_HPO_ID + '/'
            expected_object_name = prefix + report
            self.assertIn(expected_object_name, actual_object_names)

        datasources_json_path = folder_prefix + common.ACHILLES_EXPORT_DATASOURCES_JSON
        self.assertIn(datasources_json_path, actual_object_names)
        datasources_json = gcs_utils.get_object(self.hpo_bucket,
                                                datasources_json_path)
        datasources_actual = json.loads(datasources_json)
        datasources_expected = {
            'datasources': [{
                'name': test_util.FAKE_HPO_ID,
                'folder': test_util.FAKE_HPO_ID,
                'cdmVersion': 5
            }]
        }
        self.assertDictEqual(datasources_expected, datasources_actual)
Ejemplo n.º 9
0
    def test_run_export_with_target_bucket(self):
        folder_prefix = 'dummy-prefix-2018-03-24/'
        bucket_nyc = gcs_utils.get_hpo_bucket('nyc')
        test_util.get_synpuf_results_files()
        test_util.populate_achilles(self.hpo_bucket, hpo_id=None)
        main.run_export(folder_prefix=folder_prefix, target_bucket=bucket_nyc)
        bucket_objects = gcs_utils.list_bucket(bucket_nyc)
        actual_object_names = [obj['name'] for obj in bucket_objects]
        for report in common.ALL_REPORT_FILES:
            expected_object_name = folder_prefix + common.ACHILLES_EXPORT_PREFIX_STRING + 'default' + '/' + report
            self.assertIn(expected_object_name, actual_object_names)

        datasources_json_path = folder_prefix + common.ACHILLES_EXPORT_DATASOURCES_JSON
        self.assertIn(datasources_json_path, actual_object_names)
        datasources_json = gcs_utils.get_object(bucket_nyc,
                                                datasources_json_path)
        datasources_actual = json.loads(datasources_json)
        datasources_expected = {
            'datasources': [{
                'name': 'default',
                'folder': 'default',
                'cdmVersion': 5
            }]
        }
        self.assertDictEqual(datasources_expected, datasources_actual)
Ejemplo n.º 10
0
    def test_validate_five_persons_success(self, mock_check_cron):
        expected_results = []
        test_file_names = [
            os.path.basename(f) for f in test_util.FIVE_PERSONS_FILES
        ]

        for cdm_file in common.SUBMISSION_FILES:
            if cdm_file in test_file_names:
                expected_result = (cdm_file, 1, 1, 1)
                test_file = os.path.join(test_util.FIVE_PERSONS_PATH, cdm_file)
                test_util.write_cloud_file(self.hpo_bucket,
                                           test_file,
                                           prefix=self.folder_prefix)
            else:
                expected_result = (cdm_file, 0, 0, 0)
            expected_results.append(expected_result)
        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        folder_items = main.get_folder_items(bucket_items, self.folder_prefix)
        r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items,
                                     self.folder_prefix)
        self.assertSetEqual(set(r['results']), set(expected_results))

        # check tables exist and are clustered as expected
        for table in resources.CDM_TABLES + common.PII_TABLES:
            fields_file = os.path.join(resources.fields_path, table + '.json')
            table_id = bq_utils.get_table_id(test_util.FAKE_HPO_ID, table)
            table_info = bq_utils.get_table_info(table_id)
            with open(fields_file, 'r') as fp:
                fields = json.load(fp)
                field_names = [field['name'] for field in fields]
                if 'person_id' in field_names:
                    self.table_has_clustering(table_info)
Ejemplo n.º 11
0
 def test_run_export_with_target_bucket_and_datasource_id(
         self, mock_is_hpo_id):
     # validation/main.py INTEGRATION TEST
     mock_is_hpo_id.return_value = True
     folder_prefix = 'dummy-prefix-2018-03-24/'
     bucket_nyc = gcs_utils.get_hpo_bucket('nyc')
     main.run_export(datasource_id=FAKE_HPO_ID,
                     folder_prefix=folder_prefix,
                     target_bucket=bucket_nyc)
     bucket_objects = gcs_utils.list_bucket(bucket_nyc)
     actual_object_names = [obj['name'] for obj in bucket_objects]
     for report in common.ALL_REPORT_FILES:
         prefix = folder_prefix + common.ACHILLES_EXPORT_PREFIX_STRING + FAKE_HPO_ID + '/'
         expected_object_name = prefix + report
         self.assertIn(expected_object_name, actual_object_names)
     datasources_json_path = folder_prefix + common.ACHILLES_EXPORT_DATASOURCES_JSON
     self.assertIn(datasources_json_path, actual_object_names)
     datasources_json = gcs_utils.get_object(bucket_nyc,
                                             datasources_json_path)
     datasources_actual = json.loads(datasources_json)
     datasources_expected = {
         'datasources': [{
             'name': FAKE_HPO_ID,
             'folder': FAKE_HPO_ID,
             'cdmVersion': 5
         }]
     }
     self.assertDictEqual(datasources_expected, datasources_actual)
Ejemplo n.º 12
0
    def test_bad_file_names(self, mock_check_cron):
        folder_prefix = 'dummy-prefix-2018-03-22/'
        exclude_file_list = ["person_final.csv",
                             "condition_occurence.csv",  # misspelled
                             "avisit_occurrence.csv",
                             "procedure_occurrence.tsv"]  # unsupported file extension

        exclude_file_list = [folder_prefix + item for item in exclude_file_list]
        expected_result_items = []
        for file_name in exclude_file_list:
            test_util.write_cloud_str(self.hpo_bucket, file_name, ".")
            expected_item = dict(file_name=file_name.split('/')[1], message=main.UNKNOWN_FILE)
            expected_result_items.append(expected_item)

        main.app.testing = True
        with main.app.test_client() as c:
            c.get(test_util.VALIDATE_HPO_FILES_URL)

            # check content of the bucket is correct
            expected_bucket_items = exclude_file_list + [folder_prefix + item for item in common.IGNORE_LIST]
            # [common.RESULT_CSV, common.WARNINGS_CSV]
            list_bucket_result = gcs_utils.list_bucket(self.hpo_bucket)
            actual_bucket_items = [item['name'] for item in list_bucket_result]
            self.assertSetEqual(set(expected_bucket_items), set(actual_bucket_items))

            # check content of the warnings file is correct
            actual_result = test_util.read_cloud_file(self.hpo_bucket,
                                                      folder_prefix + common.WARNINGS_CSV)
            actual_result_file = StringIO.StringIO(actual_result)
            actual_result_items = resources._csv_file_to_list(actual_result_file)
            # sort in order to compare
            expected_result_items.sort()
            actual_result_items.sort()
            self.assertListEqual(expected_result_items, actual_result_items)
Ejemplo n.º 13
0
    def test_all_files_unparseable_output(self, mock_check_cron):
        # TODO possible bug: if no pre-existing table, results in bq table not found error
        folder_prefix = 'dummy-prefix-2018-03-22/'
        for cdm_table in common.CDM_FILES:
            test_util.write_cloud_str(self.hpo_bucket,
                                      folder_prefix + cdm_table, ".\n .")

        main.app.testing = True
        with main.app.test_client() as c:
            c.get(test_util.VALIDATE_HPO_FILES_URL)

            # check the result file was put in bucket
            list_bucket_result = gcs_utils.list_bucket(self.hpo_bucket)
            bucket_item_names = [
                item['name'] for item in list_bucket_result
                if item['name'].startswith(folder_prefix)
            ]
            expected_items = common.CDM_FILES + common.IGNORE_LIST
            expected_items = [
                folder_prefix + item_name for item_name in expected_items
            ]
            self.assertSetEqual(set(bucket_item_names), set(expected_items))

            # check content of the file is correct
            actual_result = test_util.read_cloud_file(
                self.hpo_bucket, folder_prefix + common.RESULT_CSV)
            actual_result = resources._csv_file_to_list(
                StringIO.StringIO(actual_result))
            expected = [{
                'cdm_file_name': cdm_file_name,
                'found': '1',
                'parsed': '0',
                'loaded': '0'
            } for cdm_file_name in common.CDM_FILES]
            self.assertEqual(expected, actual_result)
Ejemplo n.º 14
0
    def test_copy_five_persons(self, mock_check_cron):
        # upload all five_persons files
        for cdm_file in test_util.FIVE_PERSONS_FILES:
            test_util.write_cloud_file(self.hpo_bucket,
                                       cdm_file,
                                       prefix=self.folder_prefix)
            test_util.write_cloud_file(self.hpo_bucket,
                                       cdm_file,
                                       prefix=self.folder_prefix +
                                       self.folder_prefix)

        main.app.testing = True
        with main.app.test_client() as c:
            c.get(test_util.COPY_HPO_FILES_URL)
            prefix = test_util.FAKE_HPO_ID + '/' + self.hpo_bucket + '/' + self.folder_prefix
            expected_bucket_items = [
                prefix + item.split(os.sep)[-1]
                for item in test_util.FIVE_PERSONS_FILES
            ]
            expected_bucket_items.extend([
                prefix + self.folder_prefix + item.split(os.sep)[-1]
                for item in test_util.FIVE_PERSONS_FILES
            ])

            list_bucket_result = gcs_utils.list_bucket(
                gcs_utils.get_drc_bucket())
            actual_bucket_items = [item['name'] for item in list_bucket_result]
            self.assertSetEqual(set(expected_bucket_items),
                                set(actual_bucket_items))
Ejemplo n.º 15
0
    def test_copy_five_persons(self, mock_check_cron):
        # upload all five_persons files
        for cdm_pathfile in test_util.FIVE_PERSONS_FILES:
            test_filename: str = os.path.basename(cdm_pathfile)

            blob_name: str = f'{self.folder_prefix}{test_filename}'
            test_blob = self.storage_bucket.blob(blob_name)
            test_blob.upload_from_filename(cdm_pathfile)

            blob_name: str = f'{self.folder_prefix}{self.folder_prefix}{test_filename}'
            test_blob = self.storage_bucket.blob(blob_name)
            test_blob.upload_from_filename(cdm_pathfile)

        main.app.testing = True
        with main.app.test_client() as c:
            c.get(test_util.COPY_HPO_FILES_URL)
            prefix = test_util.FAKE_HPO_ID + '/' + self.hpo_bucket + '/' + self.folder_prefix
            expected_bucket_items = [
                prefix + item.split(os.sep)[-1]
                for item in test_util.FIVE_PERSONS_FILES
            ]
            expected_bucket_items.extend([
                prefix + self.folder_prefix + item.split(os.sep)[-1]
                for item in test_util.FIVE_PERSONS_FILES
            ])

            list_bucket_result = gcs_utils.list_bucket(
                gcs_utils.get_drc_bucket())
            actual_bucket_items = [item['name'] for item in list_bucket_result]
            self.assertSetEqual(set(expected_bucket_items),
                                set(actual_bucket_items))
Ejemplo n.º 16
0
    def test_check_processed(self, mock_updated_datetime_object):

        mock_updated_datetime_object.return_value = datetime.datetime.today(
        ) - datetime.timedelta(minutes=7)

        for fname in common.AOU_REQUIRED_FILES:
            blob_name: str = f'{self.folder_prefix}{fname}'
            test_blob = self.storage_bucket.blob(blob_name)
            test_blob.upload_from_string('\n')

            sleep(1)

        blob_name: str = f'{self.folder_prefix}{common.PROCESSED_TXT}'
        test_blob = self.storage_bucket.blob(blob_name)
        test_blob.upload_from_string('\n')

        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        result = main._get_submission_folder(self.hpo_bucket,
                                             bucket_items,
                                             force_process=False)
        self.assertIsNone(result)
        result = main._get_submission_folder(self.hpo_bucket,
                                             bucket_items,
                                             force_process=True)
        self.assertEqual(result, self.folder_prefix)
Ejemplo n.º 17
0
    def test_validate_five_persons_success(self, mock_check_cron):
        expected_results: list = []
        test_file_names: list = [
            os.path.basename(f) for f in test_util.FIVE_PERSONS_FILES
        ]

        for cdm_filename in common.SUBMISSION_FILES:
            if cdm_filename in test_file_names:
                expected_result: tuple = (cdm_filename, 1, 1, 1)
                test_filepath: str = os.path.join(test_util.FIVE_PERSONS_PATH,
                                                  cdm_filename)
                test_blob = self.storage_bucket.blob(
                    f'{self.folder_prefix}{cdm_filename}')
                test_blob.upload_from_filename(test_filepath)

            else:
                expected_result: tuple = (cdm_filename, 0, 0, 0)
            expected_results.append(expected_result)
        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        folder_items = main.get_folder_items(bucket_items, self.folder_prefix)
        r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items,
                                     self.folder_prefix)
        self.assertSetEqual(set(r['results']), set(expected_results))

        # check tables exist and are clustered as expected
        for table in resources.CDM_TABLES + common.PII_TABLES:
            table_id = bq_utils.get_table_id(test_util.FAKE_HPO_ID, table)
            table_info = bq_utils.get_table_info(table_id)
            fields = resources.fields_for(table)
            field_names = [field['name'] for field in fields]
            if 'person_id' in field_names:
                self.table_has_clustering(table_info)
Ejemplo n.º 18
0
    def _test_validate_missing_files_output(self, mock_check_cron):
        # enable exception propagation as described at https://goo.gl/LqDgnj
        folder_prefix = 'dummy-prefix-2018-03-22/'
        main.app.testing = True
        with main.app.test_client() as c:
            c.get(test_util.VALIDATE_HPO_FILES_URL)

            # check the result files were placed in bucket
            bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
            item_names = []
            for item in bucket_items:
                item_names.append(item['name'])
            for ignore_file in common.IGNORE_LIST:
                self.assertIn(folder_prefix + ignore_file, item_names)

            # check content of result.csv is correct
            # TODO fix this for all cdm files and use object comparison
            actual_result = test_util.read_cloud_file(
                self.hpo_bucket, folder_prefix + common.RESULT_CSV)
            actual = resources._csv_file_to_list(
                StringIO.StringIO(actual_result))
            expected = [{
                'cdm_file_name': cdm_file_name,
                'found': '0',
                'parsed': '0',
                'loaded': '0'
            } for cdm_file_name in common.REQUIRED_FILES]
            self.assertEqual(expected, actual)
            self.assertFalse(
                main.all_required_files_loaded(test_util.FAKE_HPO_ID,
                                               folder_prefix))
Ejemplo n.º 19
0
 def test_all_files_unparseable_output(self):
     # TODO possible bug: if no pre-existing table, results in bq table not found error
     for cdm_table in common.SUBMISSION_FILES:
         test_util.write_cloud_str(self.hpo_bucket, self.folder_prefix + cdm_table, ".\n .")
     bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
     expected_results = [(f, 1, 0, 0) for f in common.SUBMISSION_FILES]
     r = main.validate_submission(self.hpo_id, self.hpo_bucket, bucket_items, self.folder_prefix)
     self.assertSetEqual(set(expected_results), set(r['results']))
Ejemplo n.º 20
0
 def _empty_bucket(self, bucket):
     try:
         # TODO : catch specific errors
         bucket_items = gcs_utils.list_bucket(bucket)
     except:
         return
     for bucket_item in bucket_items:
         gcs_utils.delete_object(bucket, bucket_item['name'])
Ejemplo n.º 21
0
    def test_check_processed(self):
        test_util.write_cloud_str(self.hpo_bucket, self.folder_prefix + 'person.csv', '\n')
        test_util.write_cloud_str(self.hpo_bucket, self.folder_prefix + common.PROCESSED_TXT, '\n')

        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        result = main._get_submission_folder(self.hpo_bucket, bucket_items, force_process=False)
        self.assertIsNone(result)
        result = main._get_submission_folder(self.hpo_bucket, bucket_items, force_process=True)
        self.assertEqual(result, self.folder_prefix)
Ejemplo n.º 22
0
    def test_target_bucket_upload(self):
        bucket_nyc = gcs_utils.get_hpo_bucket('nyc')
        folder_prefix = 'test-folder-fake/'
        test_util.empty_bucket(bucket_nyc)

        main._upload_achilles_files(hpo_id=None, folder_prefix=folder_prefix, target_bucket=bucket_nyc)
        actual_bucket_files = set([item['name'] for item in gcs_utils.list_bucket(bucket_nyc)])
        expected_bucket_files = set(['test-folder-fake/' + item for item in common.ALL_ACHILLES_INDEX_FILES])
        self.assertSetEqual(expected_bucket_files, actual_bucket_files)
Ejemplo n.º 23
0
    def test_validate_five_persons_success(self, mock_check_cron):
        prefix = 'dummy-prefix-2018-03-22/'
        expected_result_items = resources._csv_to_list(
            test_util.FIVE_PERSONS_SUCCESS_RESULT_CSV)
        json_export_files = self.get_json_export_files(test_util.FAKE_HPO_ID)

        # upload all five_persons files
        for cdm_file in test_util.FIVE_PERSONS_FILES:
            test_util.write_cloud_file(self.hpo_bucket,
                                       cdm_file,
                                       prefix=prefix)

        expected_tables = [
            'person', 'visit_occurrence', 'condition_occurrence',
            'procedure_occurrence', 'drug_exposure', 'measurement'
        ]
        cdm_files = [table + '.csv' for table in expected_tables]

        main.app.testing = True
        with main.app.test_client() as c:
            c.get(test_util.VALIDATE_HPO_FILES_URL)

            # check the result file was put in bucket
            expected_object_names = cdm_files + common.IGNORE_LIST + json_export_files
            expected_objects = [
                prefix + item for item in expected_object_names
            ]

            list_bucket_result = gcs_utils.list_bucket(self.hpo_bucket)
            actual_objects = [item['name'] for item in list_bucket_result]
            self.assertSetEqual(set(expected_objects), set(actual_objects))

            # result says file found, parsed, loaded
            actual_result = test_util.read_cloud_file(
                self.hpo_bucket, prefix + common.RESULT_CSV)
            actual_result_file = StringIO.StringIO(actual_result)
            actual_result_items = resources._csv_file_to_list(
                actual_result_file)

            expected_result_items.sort()
            actual_result_items.sort()
            self.assertListEqual(expected_result_items, actual_result_items)
            self.assertTrue(
                main.all_required_files_loaded(test_util.FAKE_HPO_ID,
                                               folder_prefix=prefix))

        # check tables exist and are clustered as expected
        for table in expected_tables:
            fields_file = os.path.join(resources.fields_path, table + '.json')
            table_id = bq_utils.get_table_id(test_util.FAKE_HPO_ID, table)
            table_info = bq_utils.get_table_info(table_id)
            with open(fields_file, 'r') as fp:
                fields = json.load(fp)
                field_names = [field['name'] for field in fields]
                if 'person_id' in field_names:
                    self.table_has_clustering(table_info)
Ejemplo n.º 24
0
def list_bucket(bucket):
    try:
        return gcs_utils.list_bucket(bucket)
    except HttpError as err:
        if err.resp.status == 404:
            raise BucketDoesNotExistError('Failed to list objects in bucket ',
                                          bucket)
        raise
    except Exception:
        raise
Ejemplo n.º 25
0
    def test_check_processed(self):
        folder_prefix = 'folder/'
        test_util.write_cloud_str(self.hpo_bucket, folder_prefix + 'person.csv', '\n')
        test_util.write_cloud_str(self.hpo_bucket, folder_prefix + common.PROCESSED_TXT, '\n')

        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        result = main._get_to_process_list(self.hpo_bucket, bucket_items, force_process=False)
        self.assertListEqual([], result)
        result = main._get_to_process_list(self.hpo_bucket, bucket_items, force_process=True)
        self.assertListEqual(result, [folder_prefix])
Ejemplo n.º 26
0
def list_bucket(bucket):
    try:
        return gcs_utils.list_bucket(bucket)
    except HttpError as err:
        if err.resp.status == 404:
            raise BucketDoesNotExistError(
                f"Failed to list objects in bucket {bucket}", bucket)
        raise
    except Exception as e:
        msg = getattr(e, 'message', repr(e))
        logging.exception(f"Unknown error {msg}")
        raise
Ejemplo n.º 27
0
 def test_bad_file_names(self):
     bad_file_names = ["avisit_occurrence.csv",
                       "condition_occurence.csv",  # misspelled
                       "person_final.csv",
                       "procedure_occurrence.tsv"]  # unsupported file extension
     expected_warnings = []
     for file_name in bad_file_names:
         test_util.write_cloud_str(self.hpo_bucket, self.folder_prefix + file_name, ".")
         expected_item = (file_name, common.UNKNOWN_FILE)
         expected_warnings.append(expected_item)
     bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
     r = main.validate_submission(self.hpo_id, self.hpo_bucket, bucket_items, self.folder_prefix)
     self.assertListEqual(expected_warnings, r['warnings'])
Ejemplo n.º 28
0
    def test_all_files_unparseable_output(self):
        # TODO possible bug: if no pre-existing table, results in bq table not found error
        for cdm_table in common.SUBMISSION_FILES:
            cdm_blob = self.storage_bucket.blob(
                f'{self.folder_prefix}{cdm_table}')
            cdm_blob.upload_from_string('.\n .')

        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        folder_items = main.get_folder_items(bucket_items, self.folder_prefix)
        expected_results = [(f, 1, 0, 0) for f in common.SUBMISSION_FILES]
        r = main.validate_submission(self.hpo_id, self.hpo_bucket, folder_items,
                                     self.folder_prefix)
        self.assertSetEqual(set(expected_results), set(r['results']))
Ejemplo n.º 29
0
    def test_folder_list(self):
        folder_prefix_1 = 'dummy-prefix-2018-03-22-v1/'
        folder_prefix_2 = 'dummy-prefix-2018-03-22-v2/'
        folder_prefix_3 = 'dummy-prefix-2018-03-22-v3/'
        file_list = [
            folder_prefix_1 + 'person.csv', folder_prefix_2 + 'blah.csv',
            folder_prefix_3 + 'visit_occurrence.csv', 'person.csv'
        ]

        for filename in file_list:
            test_util.write_cloud_str(self.hpo_bucket, filename, ".\n .")

        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        folder_list = main._get_to_process_list(self.hpo_bucket, bucket_items)
        self.assertListEqual(folder_list, [folder_prefix_3])
Ejemplo n.º 30
0
    def _test_site_generation(self, mock_check_cron):
        self._empty_drc_bucket()
        self._empty_hpo_buckets()
        with main.app.test_request_context():
            result = main._generate_site()
            self.assertEquals(result, 'okay')

            # verify that page worked
            bucket = gcs_utils.get_drc_bucket()
            expected_files = map(lambda n: n + '.html',
                                 main.PAGE_NAMES) + [common.LOG_JSON]
            file_count = 0
            for stat in gcs_utils.list_bucket(bucket):
                filename = stat['name']
                self.assertIn(filename, expected_files)
                file_count += 1
            self.assertEquals(file_count, len(expected_files))