Esempio n. 1
0
 def setUp(self):
     self.app_id = app_identity.get_application_id()
     self.dataset_id = bq_utils.get_dataset_id()
     self.bucket = gcs_utils.get_drc_bucket()
     test_util.empty_bucket(self.bucket)
     test_util.delete_all_tables(self.dataset_id)
     self.load_test_data(hpo_id=HPO_NYC)
Esempio n. 2
0
 def setUp(self):
     self.hpo_bucket = gcs_utils.get_hpo_bucket(FAKE_HPO_ID)
     self.dataset = bq_utils.get_dataset_id()
     self.project_id = app_identity.get_application_id()
     self.storage_client = StorageClient(self.project_id)
     self.storage_client.empty_bucket(self.hpo_bucket)
     test_util.delete_all_tables(self.dataset)
    def setUp(self):
        self.project_id = bq_utils.app_identity.get_application_id()
        self.dataset_id = bq_utils.get_combined_dataset_id()
        self.sandbox_dataset_id = bq_utils.get_unioned_dataset_id()
        if not self.project_id or not self.dataset_id:
            # TODO: Fix handling of globals, push these assertions down if they are required.
            raise ValueError(
                f"missing configuration for project ('{self.project_id}') " +
                f"and/or dataset ('{self.dataset_id}')")

        # TODO: Reconcile this with a consistent integration testing model. Ideally each test should
        # clean up after itself so that we don't need this defensive check.
        test_util.delete_all_tables(self.dataset_id)

        # drop concept table
        drop_concept_table(self.dataset_id)

        create_tables = ['person', 'observation']
        table_fields = {
            'person': 'post_deid_person',
            'observation': 'observation',
            'concept': 'concept'
        }
        for tbl in ['concept']:
            if not bq_utils.table_exists(tbl, dataset_id=self.dataset_id):
                create_tables.append(tbl)
        for tbl in create_tables:
            bq_utils.create_standard_table(table_fields[tbl],
                                           tbl,
                                           dataset_id=self.dataset_id,
                                           force_all_nullable=True)
Esempio n. 4
0
 def setUpClass(cls):
     print(
         '\n**************************************************************')
     print(cls.__name__)
     print('**************************************************************')
     dataset_id = bq_utils.get_dataset_id()
     test_util.delete_all_tables(dataset_id)
     test_util.populate_achilles()
Esempio n. 5
0
    def setUp(self):
        self.project_id = app_identity.get_application_id()
        self.dataset_id = bq_utils.get_dataset_id()
        self.bucket: str = gcs_utils.get_drc_bucket()
        self.storage_client = StorageClient(self.project_id)

        self.storage_client.empty_bucket(self.bucket)
        test_util.delete_all_tables(self.dataset_id)
        self.load_test_data(hpo_id=HPO_NYC)
Esempio n. 6
0
 def setUpClass(cls):
     print(
         '\n**************************************************************')
     print(cls.__name__)
     print('**************************************************************')
     fake_bucket = gcs_utils.get_hpo_bucket(test_util.FAKE_HPO_ID)
     dataset_id = bq_utils.get_dataset_id()
     test_util.delete_all_tables(dataset_id)
     test_util.get_synpuf_results_files()
     test_util.populate_achilles(fake_bucket)
Esempio n. 7
0
 def setUp(self):
     self.hpo_bucket = gcs_utils.get_hpo_bucket(FAKE_HPO_ID)
     self.person_table_id = bq_utils.get_table_id(FAKE_HPO_ID,
                                                  common.PERSON)
     self.dataset_id = bq_utils.get_dataset_id()
     test_util.delete_all_tables(self.dataset_id)
     self.project_id = app_identity.get_application_id()
     self.TEST_FIELDS = [
         {
             "type": "integer",
             "name": "integer_field",
             "mode": "required",
             "description": "An integer field"
         },
         # DC-586 Import RDR rules should support null fields
         {
             "type": "integer",
             "name": "nullable_integer_field",
             "mode": "nullable",
             "description": "A nullable integer field"
         },
         {
             "type": "string",
             "name": "string_field",
             "mode": "required",
             "description": "A string field"
         },
         {
             "type": "date",
             "name": "date_field",
             "mode": "required",
             "description": "A date field"
         },
         {
             "type": "timestamp",
             "name": "timestamp_field",
             "mode": "required",
             "description": "A timestamp field"
         },
         {
             "type": "boolean",
             "name": "boolean_field",
             "mode": "required",
             "description": "A boolean field"
         },
         {
             "type": "float",
             "name": "float_field",
             "mode": "required",
             "description": "A float field"
         }
     ]
     self.DT_FORMAT = '%Y-%m-%d %H:%M:%S'
     self.client = StorageClient(self.project_id)
     self.client.empty_bucket(self.hpo_bucket)
Esempio n. 8
0
 def setUpClass(cls):
     print('**************************************************************')
     print(cls.__name__)
     print('**************************************************************')
     # TODO base class this
     ehr_dataset_id = bq_utils.get_dataset_id()
     rdr_dataset_id = bq_utils.get_rdr_dataset_id()
     test_util.delete_all_tables(ehr_dataset_id)
     test_util.delete_all_tables(rdr_dataset_id)
     cls.load_dataset_from_files(ehr_dataset_id,
                                 test_util.NYC_FIVE_PERSONS_PATH, True)
     cls.load_dataset_from_files(rdr_dataset_id, test_util.RDR_PATH)
    def tearDown(self):
        test_util.delete_all_tables(self.dataset_id)
        # Delete concept table
        drop_concept_table(self.dataset_id)

        # re create concept table
        q = """CREATE or REPLACE table `{project}.{dataset}.concept`  as (
            SELECT * FROM `{project}.{vocab}.concept`)""".format(
            project=self.project_id,
            dataset=self.dataset_id,
            vocab=common.VOCABULARY_DATASET)
        bq_utils.query(q)
Esempio n. 10
0
    def setUp(self):
        self.hpo_id = test_util.FAKE_HPO_ID
        self.hpo_bucket = gcs_utils.get_hpo_bucket(self.hpo_id)
        mock_get_hpo_name = mock.patch('validation.main.get_hpo_name')

        self.mock_get_hpo_name = mock_get_hpo_name.start()
        self.mock_get_hpo_name.return_value = 'Fake HPO'
        self.addCleanup(mock_get_hpo_name.stop)

        self.bigquery_dataset_id = bq_utils.get_dataset_id()
        self.folder_prefix = '2019-01-01/'
        self._empty_bucket()
        test_util.delete_all_tables(self.bigquery_dataset_id)
 def setUpClass(cls):
     print('**************************************************************')
     print(cls.__name__)
     print('**************************************************************')
     # TODO base class this
     logger.level = logging.INFO
     stream_handler = logging.StreamHandler(sys.stdout)
     logger.addHandler(stream_handler)
     ehr_dataset_id = bq_utils.get_dataset_id()
     rdr_dataset_id = bq_utils.get_rdr_dataset_id()
     test_util.delete_all_tables(ehr_dataset_id)
     test_util.delete_all_tables(rdr_dataset_id)
     cls.load_dataset_from_files(ehr_dataset_id,
                                 test_util.NYC_FIVE_PERSONS_PATH, True)
     cls.load_dataset_from_files(rdr_dataset_id, test_util.RDR_PATH)
Esempio n. 12
0
    def setUp(self):
        self.hpo_bucket = gcs_utils.get_hpo_bucket(FAKE_HPO_ID)
        self.project_id = app_identity.get_application_id()
        self.dataset_id = bq_utils.get_dataset_id()
        self.rdr_dataset_id = bq_utils.get_rdr_dataset_id()
        self.folder_prefix = '2019-01-01/'
        test_util.delete_all_tables(self.dataset_id)
        test_util.empty_bucket(self.hpo_bucket)

        mock_get_hpo_name = mock.patch('validation.main.get_hpo_name')

        self.mock_get_hpo_name = mock_get_hpo_name.start()
        self.mock_get_hpo_name.return_value = 'Fake HPO'
        self.addCleanup(mock_get_hpo_name.stop)

        self._load_data()
Esempio n. 13
0
    def setUp(self):
        self.hpo_id = test_util.FAKE_HPO_ID
        self.hpo_bucket = gcs_utils.get_hpo_bucket(self.hpo_id)
        self.project_id = app_identity.get_application_id()
        self.rdr_dataset_id = bq_utils.get_rdr_dataset_id()
        mock_get_hpo_name = mock.patch('validation.main.get_hpo_name')

        self.mock_get_hpo_name = mock_get_hpo_name.start()
        self.mock_get_hpo_name.return_value = 'Fake HPO'
        self.addCleanup(mock_get_hpo_name.stop)

        self.bigquery_dataset_id = bq_utils.get_dataset_id()
        self.folder_prefix = '2019-01-01-v1/'
        self._empty_bucket()
        test_util.delete_all_tables(self.bigquery_dataset_id)
        self._create_drug_class_table(self.bigquery_dataset_id)
    def test_execute_queries(self):
        project_id = bq_utils.app_identity.get_application_id()
        dataset_id = bq_utils.get_combined_dataset_id()
        sandbox_id = bq_utils.get_unioned_dataset_id()
        test_util.delete_all_tables(dataset_id)

        create_tables = (
            ['person'] + common.CLINICAL_DATA_TABLES +
            ['_mapping_' + t for t in common.MAPPED_CLINICAL_DATA_TABLES])
        # TODO(calbach): Make the setup/teardown of these concept tables hermetic.
        for tbl in ['concept', 'concept_ancestor']:
            if not bq_utils.table_exists(tbl, dataset_id=dataset_id):
                create_tables.push(tbl)
        for tbl in create_tables:
            bq_utils.create_standard_table(tbl,
                                           tbl,
                                           dataset_id=dataset_id,
                                           force_all_nullable=True)

        for tmpl in INSERT_FAKE_PARTICIPANTS_TMPLS:
            resp = bq_utils.query(
                tmpl.render(project_id=project_id,
                            dataset_id=dataset_id,
                            rdr_basics_concept_id=123,
                            rdr_consent_concept_id=345,
                            ehr_obs_concept_id=567,
                            rdr_basics_module_concept_id=
                            drop_participants_without_ppi_or_ehr.
                            BASICS_MODULE_CONCEPT_ID))
            self.assertTrue(resp["jobComplete"])

        clean_cdr_engine.clean_dataset(
            project_id, dataset_id, sandbox_id,
            [(drop_participants_without_ppi_or_ehr.get_queries, )])

        def table_to_person_ids(t):
            rows = bq_utils.response2rows(
                bq_utils.query("SELECT person_id FROM `{}.{}.{}`".format(
                    project_id, dataset_id, t)))
            return set([r["person_id"] for r in rows])

        # We expect participants 1, 5 to have been removed from all tables.
        self.assertEqual(set([2, 3, 4, 6]), table_to_person_ids("person"))
        self.assertEqual(set([2, 4, 6]), table_to_person_ids("observation"))
        self.assertEquals(set([3, 4]), table_to_person_ids("drug_exposure"))

        test_util.delete_all_tables(dataset_id)
Esempio n. 15
0
    def setUp(self):

        self.project_id = bq_utils.app_identity.get_application_id()
        self.hpo_ids = [NYC_HPO_ID, PITT_HPO_ID]
        self.input_dataset_id = bq_utils.get_dataset_id()
        self.output_dataset_id = bq_utils.get_unioned_dataset_id()
        # Done in tearDown().  this is redundant.
        self._empty_hpo_buckets()
        test_util.delete_all_tables(self.input_dataset_id)
        test_util.delete_all_tables(self.output_dataset_id)

        # TODO Generalize to work for all foreign key references
        # Collect all primary key fields in CDM tables
        mapped_fields = []
        for table in cdm.tables_to_map():
            field = table + '_id'
            mapped_fields.append(field)
        self.mapped_fields = mapped_fields
        self.implemented_foreign_keys = [
            eu_constants.VISIT_OCCURRENCE_ID, eu_constants.CARE_SITE_ID,
            eu_constants.LOCATION_ID
        ]
Esempio n. 16
0
 def tearDown(self):
     test_util.delete_all_tables(self.dataset_id)
Esempio n. 17
0
 def setUp(self):
     self.ehr_dataset_id = bq_utils.get_dataset_id()
     self.rdr_dataset_id = bq_utils.get_rdr_dataset_id()
     self.combined_dataset_id = bq_utils.get_combined_dataset_id()
     self.drc_bucket = gcs_utils.get_drc_bucket()
     test_util.delete_all_tables(self.combined_dataset_id)
Esempio n. 18
0
 def tearDownClass(cls):
     ehr_dataset_id = bq_utils.get_dataset_id()
     rdr_dataset_id = bq_utils.get_rdr_dataset_id()
     test_util.delete_all_tables(ehr_dataset_id)
     test_util.delete_all_tables(rdr_dataset_id)
Esempio n. 19
0
 def tearDown(self):
     self._empty_hpo_buckets()
     test_util.delete_all_tables(self.input_dataset_id)
     test_util.delete_all_tables(self.output_dataset_id)
Esempio n. 20
0
 def setUp(self):
     self.hpo_bucket = gcs_utils.get_hpo_bucket(test_util.FAKE_HPO_ID)
     test_util.empty_bucket(self.hpo_bucket)
     test_util.delete_all_tables(bq_utils.get_dataset_id())
Esempio n. 21
0
 def tearDown(self):
     self.storage_client.empty_bucket(self.hpo_bucket)
     bucket_nyc = gcs_utils.get_hpo_bucket('nyc')
     self.storage_client.empty_bucket(bucket_nyc)
     self.storage_client.empty_bucket(gcs_utils.get_drc_bucket())
     test_util.delete_all_tables(self.bigquery_dataset_id)
Esempio n. 22
0
 def tearDownClass(cls):
     dataset_id = bq_utils.get_dataset_id()
     test_util.delete_all_tables(dataset_id)
 def tearDown(self):
     test_util.delete_all_tables(self.bq_dataset_id)
     test_util.delete_all_tables(self.bq_sandbox_dataset_id)
     self.client.delete_dataset(self.bq_sandbox_dataset_id)
Esempio n. 24
0
 def tearDown(self):
     test_util.delete_all_tables(self.dataset_id)
     self.client.delete_table(self.fq_concept_table)
Esempio n. 25
0
 def tearDown(self):
     self.storage_client.empty_bucket(self.bucket)
     test_util.delete_all_tables(self.dataset_id)
Esempio n. 26
0
 def tearDown(self):
     test_util.delete_all_tables(self.dataset_id)
     self.client.empty_bucket(self.hpo_bucket)
Esempio n. 27
0
 def tearDown(self):
     test_util.delete_all_tables(bq_utils.get_dataset_id())
     test_util.empty_bucket(self.hpo_bucket)
Esempio n. 28
0
 def tearDown(self):
     self._empty_bucket()
     bucket_nyc = gcs_utils.get_hpo_bucket('nyc')
     test_util.empty_bucket(bucket_nyc)
     test_util.empty_bucket(gcs_utils.get_drc_bucket())
     test_util.delete_all_tables(self.bigquery_dataset_id)
Esempio n. 29
0
 def tearDown(self):
     test_util.empty_bucket(self.bucket)
     test_util.delete_all_tables(self.dataset_id)