コード例 #1
0
ファイル: required_labs.py プロジェクト: rfrancis1/curation
def get_lab_concept_summary_query(hpo_id):
    """
    Get the query that checks if the HPO site has submitted the required labs
    :param hpo_id: 
    :return: 
    """
    project_id = app_identity.get_application_id()
    dataset_id = bq_utils.get_dataset_id()
    hpo_measurement_table = bq_utils.get_table_id(hpo_id, common.MEASUREMENT)

    # Create measurement_concept_sets_table if not exist
    if not bq_utils.table_exists(MEASUREMENT_CONCEPT_SETS_TABLE, dataset_id):
        load_measurement_concept_sets_table(project_id, dataset_id)

    # Create measurement_concept_sets_descendants_table if not exist
    if not bq_utils.table_exists(MEASUREMENT_CONCEPT_SETS_DESCENDANTS_TABLE,
                                 dataset_id):
        load_measurement_concept_sets_descendants_table(project_id, dataset_id)

    return CHECK_REQUIRED_LAB_QUERY.format(
        project_id=project_id,
        ehr_ops_dataset_id=dataset_id,
        hpo_measurement_table=hpo_measurement_table,
        measurement_concept_sets_descendants=
        MEASUREMENT_CONCEPT_SETS_DESCENDANTS_TABLE)
コード例 #2
0
    def _load_data(self):

        # Load measurement_concept_sets
        required_labs.load_measurement_concept_sets_table(
            project_id=self.project_id, dataset_id=self.dataset_id)
        # Load measurement_concept_sets_descendants
        required_labs.load_measurement_concept_sets_descendants_table(
            project_id=self.project_id, dataset_id=self.dataset_id)

        # Create concept and concept_ancestor empty tables if not exist
        if not bq_utils.table_exists(common.CONCEPT, self.dataset_id):
            bq_utils.create_standard_table(common.CONCEPT, common.CONCEPT)
        if not bq_utils.table_exists(common.CONCEPT, self.dataset_id):
            bq_utils.create_standard_table(common.CONCEPT_ANCESTOR,
                                           common.CONCEPT_ANCESTOR)

        # we need to load measurement.csv into bigquery_dataset_id in advance for the other integration tests
        ehr_measurement_result = bq_utils.load_table_from_csv(
            project_id=self.project_id,
            dataset_id=self.dataset_id,
            table_name=bq_utils.get_table_id(FAKE_HPO_ID, common.MEASUREMENT),
            csv_path=test_util.FIVE_PERSONS_MEASUREMENT_CSV,
            fields=resources.fields_for(common.MEASUREMENT))
        bq_utils.wait_on_jobs(
            [ehr_measurement_result['jobReference']['jobId']])
コード例 #3
0
    def test_copy_rdr_tables(self):
        for table in RDR_TABLES_TO_COPY:
            self.assertFalse(
                bq_utils.table_exists(
                    table, self.combined_dataset_id))  # sanity check
            copy_rdr_table(table)
            actual = bq_utils.table_exists(table, self.combined_dataset_id)
            self.assertTrue(
                actual,
                msg='RDR table {table} should be copied'.format(table=table))

            # Check that row count in combined is same as rdr
            query = (
                'WITH rdr AS '
                ' (SELECT COUNT(1) n FROM `{rdr_dataset_id}.{table}`), '
                'combined AS '
                ' (SELECT COUNT(1) n FROM `{combined_dataset_id}.{table}`) '
                'SELECT '
                'rdr.n AS rdr_count, '
                'combined.n AS combined_count '
                'FROM rdr, combined ').format(
                    rdr_dataset_id=self.rdr_dataset_id,
                    combined_dataset_id=self.combined_dataset_id,
                    table=table)
            response = bq_utils.query(query)
            rows = bq_utils.response2rows(response)
            self.assertTrue(len(rows) == 1)  # sanity check
            row = rows[0]
            rdr_count, combined_count = row['rdr_count'], row['combined_count']
            msg_fmt = 'Table {table} has {rdr_count} in rdr and {combined_count} in combined (expected to be equal)'
            self.assertEqual(
                rdr_count, combined_count,
                msg_fmt.format(table=table,
                               rdr_count=rdr_count,
                               combined_count=combined_count))
コード例 #4
0
    def test_consented_person_id(self):
        """
        Test observation data has seven (7) persons with consent records as described below
         1: No
         2: Yes
         3: NULL
         4: No  followed by Yes
         5: Yes followed by No
         6: Yes followed by NULL
         7: NULL and Yes with same date/time
        """
        # sanity check
        # pre-conditions
        self.assertFalse(
            bq_utils.table_exists(EHR_CONSENT_TABLE_ID,
                                  self.combined_dataset_id))

        # test
        ehr_consent()

        # post conditions
        self.assertTrue(
            bq_utils.table_exists(EHR_CONSENT_TABLE_ID,
                                  self.combined_dataset_id),
            'Table {dataset}.{table} created by consented_person'.format(
                dataset=self.combined_dataset_id, table=EHR_CONSENT_TABLE_ID))
        response = bq_utils.query('SELECT * FROM {dataset}.{table}'.format(
            dataset=self.combined_dataset_id, table=EHR_CONSENT_TABLE_ID))
        rows = bq_utils.response2rows(response)
        expected = {2, 4}
        actual = set(row['person_id'] for row in rows)
        self.assertSetEqual(
            expected, actual, 'Records in {dataset}.{table}'.format(
                dataset=self.combined_dataset_id, table=EHR_CONSENT_TABLE_ID))
コード例 #5
0
    def test_integration_create_drug_route_mappings_table(self):
        if bq_utils.table_exists(populate_route_ids.DRUG_ROUTES_TABLE_ID,
                                 dataset_id=self.dataset_id):
            bq_utils.delete_table(populate_route_ids.DRUG_ROUTES_TABLE_ID,
                                  dataset_id=self.dataset_id)

        if not bq_utils.table_exists(
                populate_route_ids.DOSE_FORM_ROUTES_TABLE_ID,
                dataset_id=self.dataset_id):
            populate_route_ids.create_dose_form_route_mappings_table(
                self.project_id, self.dataset_id)

        populate_route_ids.create_drug_route_mappings_table(
            self.project_id, self.dataset_id,
            populate_route_ids.DOSE_FORM_ROUTES_TABLE_ID,
            self.route_mapping_prefix)
        time.sleep(10)
        query = ("SELECT COUNT(*) AS n "
                 "FROM `{project_id}.{dataset_id}.{table_id}`").format(
                     project_id=self.project_id,
                     dataset_id=self.dataset_id,
                     table_id=populate_route_ids.DRUG_ROUTES_TABLE_ID)

        result = bq_utils.query(query)
        actual = bq_utils.response2rows(result)
        self.assertGreater(actual[0]["n"], 0)
コード例 #6
0
def run_analyses(hpo_id):
    """
    Run the achilles analyses
    :param hpo_id:
    :return:
    """
    commands = _get_run_analysis_commands(hpo_id)
    for command in commands:
        logging.debug(' ---- Running `%s`...\n' % command)
        if sql_wrangle.is_to_temp_table(command):
            table_id = sql_wrangle.get_temp_table_name(command)
            query = sql_wrangle.get_temp_table_query(command)
            insert_query_job_result = bq_utils.query(query, False, table_id)
            query_job_id = insert_query_job_result['jobReference']['jobId']

            incomplete_jobs = bq_utils.wait_on_jobs([query_job_id])
            if len(incomplete_jobs) > 0:
                logging.critical('tempresults doesnt get created in 15 secs')
                raise RuntimeError('Tempresults taking too long to create')
        elif sql_wrangle.is_truncate(command):
            table_id = sql_wrangle.get_truncate_table_name(command)
            if bq_utils.table_exists(table_id):
                bq_utils.delete_table(table_id)
        elif sql_wrangle.is_drop(command):
            table_id = sql_wrangle.get_drop_table_name(command)
            if bq_utils.table_exists(table_id):
                bq_utils.delete_table(table_id)
        else:
            bq_utils.query(command)
コード例 #7
0
 def test_create_cdm_tables(self):
     # Sanity check
     for table in common.CDM_TABLES:
         self.assertFalse(bq_utils.table_exists(table, self.combined_dataset_id))
     create_cdm_tables()
     for table in common.CDM_TABLES:
         actual = bq_utils.table_exists(table, self.combined_dataset_id)
         self.assertTrue(actual, 'Table {table} not created in combined dataset'.format(table=table))
コード例 #8
0
ファイル: main.py プロジェクト: berneskaracay/curation
def get_query_result(hpo_id,
                     query_string,
                     table_id,
                     query_wrapper,
                     is_subquery,
                     app_id=None,
                     dataset_id=None):
    """
    :param hpo_id: the name of the hpo_id for which validation is being done
    :param query_string: variable name of the query string stored in the constants
    :param table_id: Name of the table running analysis on
    :param query_wrapper: wrapper over the unioned query if required
    :param is_subquery: binary flag(true/false) to indicate if parsing is needed or not.
    :param app_id: name of the big query application id
    :param dataset_id: name of the big query dataset id
    :return: returns dictionary of rows
    """
    if app_id is None:
        app_id = app_identity.get_application_id()
    if dataset_id is None:
        dataset_id = bq_utils.get_dataset_id()
    query = None
    result = None
    if is_subquery:
        sub_queries = []
        for table in cdm.tables_to_map():
            hpo_table = '{hpo_id}_{table_name}'.format(hpo_id=hpo_id,
                                                       table_name=table)
            if bq_utils.table_exists(hpo_table):
                sub_query = query_string.format(hpo_id=hpo_id,
                                                app_id=app_id,
                                                dataset_id=dataset_id,
                                                domain_table=table)
                sub_queries.append(sub_query)
        unioned_query = main_constants.UNION_ALL.join(sub_queries)
        if unioned_query and query_wrapper is not None:
            query = query_wrapper.format(union_of_subqueries=unioned_query)
        else:
            query = unioned_query
    else:
        table_name = '{hpo_name}_{results_table}'.format(
            hpo_name=hpo_id, results_table=table_id)
        if bq_utils.table_exists(table_name):
            query = query_string.format(application=app_id,
                                        dataset=dataset_id,
                                        table_id=table_name)
    if query:
        # Found achilles_heel_results table(s), run the query
        response = bq_utils.query(query)
        result = bq_utils.response2rows(response)
    if result is None:
        result = []
    return result
コード例 #9
0
def get_id_deduplicate_queries(project_id, dataset_id):
    """
    This function gets the queries required to remove the duplicate id columns from a dataset

    :param project_id: Project name
    :param dataset_id: Name of the dataset where a rule should be applied
    :return: a list of queries.
    """
    queries = []
    tables_with_primary_key = cdm.tables_to_map()
    for table in tables_with_primary_key:
        if 'unioned' in dataset_id:
            table_name = 'unioned_ehr_{table}'.format(table=table)
        else:
            table_name = table
        if bq_utils.table_exists(table_name, dataset_id):
            fields = resources.fields_for(table)
            # Generate column expressions for select
            col_exprs = [field['name'] for field in fields]
            cols = ',\n        '.join(col_exprs)
            query = ID_DE_DUP_QUERY.format(columns=cols,
                                           project_id=project_id,
                                           dataset_id=dataset_id,
                                           domain_table=table,
                                           table_name=table_name)
            queries.append(query)
    return queries
コード例 #10
0
    def setUp(self):
        self.project_id = bq_utils.app_identity.get_application_id()
        self.dataset_id = bq_utils.get_combined_dataset_id()
        self.sandbox_dataset_id = bq_utils.get_unioned_dataset_id()
        if not self.project_id or not self.dataset_id:
            # TODO: Fix handling of globals, push these assertions down if they are required.
            raise ValueError(
                f"missing configuration for project ('{self.project_id}') " +
                f"and/or dataset ('{self.dataset_id}')")

        # TODO: Reconcile this with a consistent integration testing model. Ideally each test should
        # clean up after itself so that we don't need this defensive check.
        test_util.delete_all_tables(self.dataset_id)

        # drop concept table
        drop_concept_table(self.dataset_id)

        create_tables = ['person', 'observation']
        table_fields = {
            'person': 'post_deid_person',
            'observation': 'observation',
            'concept': 'concept'
        }
        for tbl in ['concept']:
            if not bq_utils.table_exists(tbl, dataset_id=self.dataset_id):
                create_tables.append(tbl)
        for tbl in create_tables:
            bq_utils.create_standard_table(table_fields[tbl],
                                           tbl,
                                           dataset_id=self.dataset_id,
                                           force_all_nullable=True)
コード例 #11
0
ファイル: common_heel_errors.py プロジェクト: cl3777/curation
def most_common_heel_errors(app_id=None, dataset_id=None, hpo_ids=None):
    """
    :param app_id: Application Id
    :param dataset_id: Dataset Id
    :param hpo_ids: list of Hpo_ids
    :return: None
    """
    heel_errors = list()
    if app_id is None:
        app_id = app_identity.get_application_id()
    if dataset_id is None:
        dataset_id = bq_utils.get_dataset_id()
    if not os.path.exists(HEEL_ERRORS_JSON) and not os.path.exists(
            HEEL_ERRORS_CSV):
        for hpo_id in hpo_ids:
            if bq_utils.table_exists(
                    table_id='{hpo_id}_achilles_heel_results'.format(
                        hpo_id=hpo_id),
                    dataset_id=dataset_id):
                query = heel_error_query.format(app_id=app_id,
                                                dataset_id=dataset_id,
                                                hpo_id=hpo_id)
                query_job = bq_utils.query(query)
                result = bq_utils.response2rows(query_job)
                heel_errors.extend(result)
    with open(HEEL_ERRORS_JSON, 'w') as fp:
        json.dump(heel_errors, fp, sort_keys=True, indent=4)
    parse_json_csv()
コード例 #12
0
def drop_concept_table(dataset_id):
    if bq_utils.table_exists(common.CONCEPT):
        q = "DROP TABLE {dataset}.concept;".format(dataset=dataset_id)
        try:
            bq_utils.query(q)
        except HttpError as err:
            if err.resp.status != 404:
                raise
コード例 #13
0
ファイル: bq_utils_test.py プロジェクト: all-of-us/curation
 def test_create_standard_table(self):
     standard_tables = list(resources.CDM_TABLES) + ACHILLES_TABLES
     for standard_table in standard_tables:
         table_id = f'prefix_for_test_{standard_table}'
         result = bq_utils.create_standard_table(standard_table, table_id)
         self.assertTrue('kind' in result)
         self.assertEqual(result['kind'], 'bigquery#table')
         # sanity check
         self.assertTrue(bq_utils.table_exists(table_id))
コード例 #14
0
def exist_participant_match(ehr_dataset_id, hpo_id):
    """
    This function checks if the hpo has submitted the participant_match data 
    
    :param ehr_dataset_id: 
    :param hpo_id: 
    :return: 
    """
    return bq_utils.table_exists(
        bq_utils.get_table_id(hpo_id, PARTICIPANT_MATCH), ehr_dataset_id)
コード例 #15
0
    def test_validation_creation_and_population(self, mock_table_schema,
                                                mock_fields_for):
        # Preconditions
        mock_table_schema.return_value = self.schema
        mock_fields_for.return_value = self.id_match_fields

        expected = [{
            'person_id': 1,
            'first_name': 'missing_ehr',
            'last_name': 'missing_ehr',
            'algorithm': 'no'
        }, {
            'person_id': 2,
            'first_name': 'missing_rdr',
            'last_name': 'missing_ehr',
            'algorithm': 'no'
        }, {
            'person_id': 3,
            'first_name': 'missing_ehr',
            'last_name': 'missing_rdr',
            'algorithm': 'no'
        }, {
            'person_id': 4,
            'first_name': 'missing_rdr',
            'last_name': 'missing_rdr',
            'algorithm': 'no'
        }]

        # Creates validation table if it does not already exist
        # Will need to be created if this test is ran individually
        if not bq_utils.table_exists(self.id_match_table_id, self.dataset_id):
            id_validation.create_drc_validation_table(
                self.client,
                self.project_id,
                self.id_match_table_id,
                drc_dataset_id=self.dataset_id)

        # Test validation table population
        id_validation.populate_validation_table(self.client,
                                                self.project_id,
                                                self.id_match_table_id,
                                                self.hpo_id,
                                                drc_dataset_id=self.dataset_id)

        query_contents = CONTENT_QUERY.render(
            project_id=self.project_id,
            drc_dataset_id=self.dataset_id,
            id_match_table_id=self.id_match_table_id)

        content_job = self.client.query(query_contents)
        contents = list(content_job.result())
        actual = [dict(row.items()) for row in contents]

        self.assertCountEqual(actual, expected)
コード例 #16
0
 def test_create_table(self):
     table_id = 'some_random_table_id'
     fields = [
         dict(name='id', type='integer', mode='required'),
         dict(name='name', type='string', mode='nullable')
     ]
     result = bq_utils.create_table(table_id, fields)
     self.assertTrue('kind' in result)
     self.assertEqual(result['kind'], 'bigquery#table')
     # sanity check
     self.assertTrue(bq_utils.table_exists(table_id))
コード例 #17
0
def create_metadata_table(dataset_id, fields_list):
    """
    Creates a metadata table in a given dataset.
    :param dataset_id: name of the dataset
    :param fields_list: name of the dataset
    :return:
    """
    if not bq_utils.table_exists(METADATA_TABLE, dataset_id):
        bq_utils.create_table(table_id=METADATA_TABLE,
                              fields=fields_list,
                              dataset_id=dataset_id)
コード例 #18
0
def main(args):
    hpo_id = args.hpo_id
    for table_name in common.CDM_TABLES:
        table_id = hpo_id + '_' + table_name
        if bq_utils.table_exists(table_id):
            print table_id, ' exists'
        else:
            print table_id, ' being created'
            bq_utils.create_standard_table(table_name, table_id, False)

    _run_achilles(hpo_id)
    _run_export(hpo_id)
コード例 #19
0
 def test_create_table_drop_existing_success(self):
     table_id = 'some_random_table_id'
     fields = [dict(name='id', type='integer', mode='required'),
               dict(name='name', type='string', mode='nullable')]
     result_1 = bq_utils.create_table(table_id, fields)
     # sanity check
     table_id = result_1['tableReference']['tableId']
     self.assertTrue(bq_utils.table_exists(table_id))
     result_2 = bq_utils.create_table(table_id, fields, drop_existing=True)
     # same id and second one created after first one
     self.assertEqual(result_1['id'], result_2['id'])
     self.assertTrue(result_2['creationTime'] > result_1['creationTime'])
コード例 #20
0
ファイル: achilles.py プロジェクト: dcampbell-vumc/curation
def drop_or_truncate_table(command):
    """
    Deletes or truncates table
    Previously, deletion was used for both truncate and drop, and this function retains the behavior
    :param command: query to run
    :return: None
    """
    if sql_wrangle.is_truncate(command):
        table_id = sql_wrangle.get_truncate_table_name(command)
    else:
        table_id = sql_wrangle.get_drop_table_name(command)
    if bq_utils.table_exists(table_id):
        bq_utils.delete_table(table_id)
コード例 #21
0
def main(args):
    hpo_id = args.hpo_id
    folder = args.folder
    folder_prefix = folder + '/'
    for table_name in common.CDM_TABLES:
        table_id = hpo_id + '_' + table_name
        if bq_utils.table_exists(table_id):
            print table_id, ' exists'
        else:
            print table_id, ' being created'
            bq_utils.create_standard_table(table_name, table_id, False)

    _run_achilles(hpo_id)
    _run_export(hpo_id, folder_prefix)
    _upload_achilles_files(hpo_id, folder_prefix)
コード例 #22
0
def main(args):
    folder = args.folder
    target_bucket = args.bucket
    folder_prefix = folder + '/'
    for table_name in common.CDM_TABLES:
        table_id = table_name
        if bq_utils.table_exists(table_id):
            print table_id, ' exists'
        else:
            print table_id, ' being created'
            bq_utils.create_standard_table(table_name, table_id, False)

    _run_achilles()
    _run_export(folder_prefix=folder_prefix, target_bucket=target_bucket)
    _upload_achilles_files(folder_prefix=folder_prefix,
                           target_bucket=target_bucket)
コード例 #23
0
    def test_execute_queries(self):
        project_id = bq_utils.app_identity.get_application_id()
        dataset_id = bq_utils.get_combined_dataset_id()
        sandbox_id = bq_utils.get_unioned_dataset_id()
        test_util.delete_all_tables(dataset_id)

        create_tables = (
            ['person'] + common.CLINICAL_DATA_TABLES +
            ['_mapping_' + t for t in common.MAPPED_CLINICAL_DATA_TABLES])
        # TODO(calbach): Make the setup/teardown of these concept tables hermetic.
        for tbl in ['concept', 'concept_ancestor']:
            if not bq_utils.table_exists(tbl, dataset_id=dataset_id):
                create_tables.push(tbl)
        for tbl in create_tables:
            bq_utils.create_standard_table(tbl,
                                           tbl,
                                           dataset_id=dataset_id,
                                           force_all_nullable=True)

        for tmpl in INSERT_FAKE_PARTICIPANTS_TMPLS:
            resp = bq_utils.query(
                tmpl.render(project_id=project_id,
                            dataset_id=dataset_id,
                            rdr_basics_concept_id=123,
                            rdr_consent_concept_id=345,
                            ehr_obs_concept_id=567,
                            rdr_basics_module_concept_id=
                            drop_participants_without_ppi_or_ehr.
                            BASICS_MODULE_CONCEPT_ID))
            self.assertTrue(resp["jobComplete"])

        clean_cdr_engine.clean_dataset(
            project_id, dataset_id, sandbox_id,
            [(drop_participants_without_ppi_or_ehr.get_queries, )])

        def table_to_person_ids(t):
            rows = bq_utils.response2rows(
                bq_utils.query("SELECT person_id FROM `{}.{}.{}`".format(
                    project_id, dataset_id, t)))
            return set([r["person_id"] for r in rows])

        # We expect participants 1, 5 to have been removed from all tables.
        self.assertEqual(set([2, 3, 4, 6]), table_to_person_ids("person"))
        self.assertEqual(set([2, 4, 6]), table_to_person_ids("observation"))
        self.assertEquals(set([3, 4]), table_to_person_ids("drug_exposure"))

        test_util.delete_all_tables(dataset_id)
コード例 #24
0
    def _load_dataset(self, hpo_id):
        for cdm_table in resources.CDM_TABLES:
            cdm_file_name = os.path.join(test_util.FIVE_PERSONS_PATH,
                                         cdm_table + '.csv')
            if os.path.exists(cdm_file_name):
                test_util.write_cloud_file(self.hpo_bucket, cdm_file_name)
            else:
                test_util.write_cloud_str(self.hpo_bucket, cdm_table + '.csv',
                                          'dummy\n')
            bq_utils.load_cdm_csv(hpo_id, cdm_table)

        # ensure concept table exists
        if not bq_utils.table_exists(common.CONCEPT):
            bq_utils.create_standard_table(common.CONCEPT, common.CONCEPT)
            q = """INSERT INTO {dataset}.concept
            SELECT * FROM {vocab}.concept""".format(
                dataset=self.dataset, vocab=common.VOCABULARY_DATASET)
            bq_utils.query(q)
コード例 #25
0
def main():
    parser = get_arg_parser()
    args = parser.parse_args()

    # get credentials and create client
    impersonation_creds = auth.get_impersonation_credentials(
        args.run_as_email, SCOPES)

    client = bq.get_client(args.project_id, credentials=impersonation_creds)

    table_id = f'{IDENTITY_MATCH_TABLE}_{args.hpo_id}'

    # Creates hpo_site identity match table if it does not exist
    if not table_exists(table_id, DRC_OPS):
        create_drc_validation_table(client, args.project_id, table_id)

    # Populates the validation table for the site
    populate_validation_table(client, args.project_id, table_id, args.hpo_id)
コード例 #26
0
def load_deid_map_table(deid_map_dataset_name, age_limit):

    # Create _deid_map table in input dataset
    project_id = app_identity.get_application_id()
    client = bq.get_client(project_id)
    deid_map_table = f'{project_id}.{deid_map_dataset_name}.{DEID_MAP_TABLE}'
    # Copy master _deid_map table records to _deid_map table
    if bq_utils.table_exists(DEID_MAP_TABLE,
                             dataset_id=PIPELINE_TABLES_DATASET):
        copy_deid_map_table(deid_map_table, project_id,
                            PIPELINE_TABLES_DATASET, deid_map_dataset_name,
                            age_limit, client)
        logging.info(
            f"copied participants younger than {age_limit} to the table {deid_map_table}"
        )
    else:
        raise RuntimeError(
            f'{DEID_MAP_TABLE} is not available in {project_id}.{PIPELINE_TABLES_DATASET}'
        )
コード例 #27
0
    def _load_dataset(self, hpo_id):
        for cdm_table in resources.CDM_TABLES:

            cdm_filename: str = f'{cdm_table}.csv'
            cdm_filepath: str = os.path.join(test_util.FIVE_PERSONS_PATH,
                                             cdm_filename)

            bucket = self.storage_client.get_bucket(self.hpo_bucket)
            cdm_blob = bucket.blob(cdm_filename)
            if os.path.exists(cdm_filepath):
                cdm_blob.upload_from_filename(cdm_filepath)
            else:
                cdm_blob.upload_from_string('dummy\n')

            bq_utils.load_cdm_csv(hpo_id, cdm_table)

        # ensure concept table exists
        if not bq_utils.table_exists(common.CONCEPT):
            bq_utils.create_standard_table(common.CONCEPT, common.CONCEPT)
            q = """INSERT INTO {dataset}.concept
            SELECT * FROM {vocab}.concept""".format(
                dataset=self.dataset, vocab=common.VOCABULARY_DATASET)
            bq_utils.query(q)
コード例 #28
0
    def _create_drug_class_table(bigquery_dataset_id):

        table_name = 'drug_class'
        fields = [{
            "type": "integer",
            "name": "concept_id",
            "mode": "required"
        }, {
            "type": "string",
            "name": "concept_name",
            "mode": "required"
        }, {
            "type": "string",
            "name": "drug_class_name",
            "mode": "required"
        }]
        bq_utils.create_table(table_id=table_name,
                              fields=fields,
                              drop_existing=True,
                              dataset_id=bigquery_dataset_id)

        bq_utils.query(q=main_consts.DRUG_CLASS_QUERY.format(
            dataset_id=bigquery_dataset_id),
                       use_legacy_sql=False,
                       destination_table_id='drug_class',
                       retry_count=bq_consts.BQ_DEFAULT_RETRY_COUNT,
                       write_disposition='WRITE_TRUNCATE',
                       destination_dataset_id=bigquery_dataset_id)

        # ensure concept ancestor table exists
        if not bq_utils.table_exists(common.CONCEPT_ANCESTOR):
            bq_utils.create_standard_table(common.CONCEPT_ANCESTOR,
                                           common.CONCEPT_ANCESTOR)
            q = """INSERT INTO {dataset}.concept_ancestor
            SELECT * FROM {vocab}.concept_ancestor""".format(
                dataset=bigquery_dataset_id, vocab=common.VOCABULARY_DATASET)
            bq_utils.query(q)
コード例 #29
0
def copy_vocabulary_tables(input_dataset, dest_dataset):
    for table in VOCABULARY_TABLES:
        if bq_utils.table_exists(table, dataset_id=input_dataset):
            pass