Ejemplo n.º 1
0
def mapping_query(domain_table):
    """
    Returns query used to get mapping of all records from RDR combined with EHR records of consented participants

    :param domain_table: one of the domain tables (e.g. 'visit_occurrence', 'condition_occurrence')
    :return:
    """

    if combine_consts.PERSON_ID in [
            field['name'] for field in resources.fields_for(domain_table)
    ]:
        return combine_consts.MAPPING_QUERY_WITH_PERSON_CHECK.format(
            rdr_dataset_id=bq_utils.get_rdr_dataset_id(),
            ehr_dataset_id=bq_utils.get_dataset_id(),
            ehr_rdr_dataset_id=bq_utils.get_ehr_rdr_dataset_id(),
            domain_table=domain_table,
            mapping_constant=common.RDR_ID_CONSTANT,
            ehr_consent_table_id=combine_consts.EHR_CONSENT_TABLE_ID)
    else:
        return combine_consts.MAPPING_QUERY_WITHOUT_PERSON_CHECK.format(
            rdr_dataset_id=bq_utils.get_rdr_dataset_id(),
            ehr_dataset_id=bq_utils.get_dataset_id(),
            ehr_rdr_dataset_id=bq_utils.get_ehr_rdr_dataset_id(),
            domain_table=domain_table,
            mapping_constant=common.RDR_ID_CONSTANT)
Ejemplo n.º 2
0
def get_hpo_missing_pii_query(hpo_id):
    """
    Query to retrieve counts of drug classes in an HPO site's drug_exposure table

    :param hpo_id: identifies the HPO site
    :return: the query
    """
    person_table_id = bq_utils.get_table_id(hpo_id, common.PERSON)
    pii_name_table_id = bq_utils.get_table_id(hpo_id, common.PII_NAME)
    pii_wildcard = bq_utils.get_table_id(hpo_id, common.PII_WILDCARD)
    participant_match_table_id = bq_utils.get_table_id(
        hpo_id, common.PARTICIPANT_MATCH)
    rdr_dataset_id = bq_utils.get_rdr_dataset_id()
    rdr_date = extract_date_from_rdr_dataset_id(rdr_dataset_id)
    ehr_no_rdr_with_date = consts.EHR_NO_RDR.format(date=rdr_date)
    rdr_person_table_id = common.PERSON
    return render_query(
        consts.MISSING_PII_QUERY,
        person_table_id=person_table_id,
        rdr_dataset_id=rdr_dataset_id,
        rdr_person_table_id=rdr_person_table_id,
        ehr_no_pii=consts.EHR_NO_PII,
        ehr_no_rdr=ehr_no_rdr_with_date,
        pii_no_ehr=consts.PII_NO_EHR,
        ehr_no_participant_match=consts.EHR_NO_PARTICIPANT_MATCH,
        pii_name_table_id=pii_name_table_id,
        pii_wildcard=pii_wildcard,
        participant_match_table_id=participant_match_table_id)
Ejemplo n.º 3
0
def mapping_query(domain_table):
    """
    Returns query used to get mapping of all records from RDR combined with EHR records of consented participants

    :param domain_table: one of the domain tables (e.g. 'visit_occurrence', 'condition_occurrence')
    :return:
    """

    return '''SELECT DISTINCT
          '{rdr_dataset_id}'  AS src_dataset_id,
          {domain_table}_id  AS src_{domain_table}_id,
          'rdr' as src_hpo_id,
          {domain_table}_id + {mapping_constant}  AS {domain_table}_id
        FROM {rdr_dataset_id}.{domain_table}

        UNION ALL

        SELECT DISTINCT
          '{ehr_dataset_id}'  AS src_dataset_id,
          t.{domain_table}_id AS src_{domain_table}_id,
          v.src_hpo_id AS src_hpo_id,
          t.{domain_table}_id  AS {domain_table}_id
        FROM {ehr_dataset_id}.{domain_table} t
        JOIN {ehr_dataset_id}._mapping_{domain_table}  v on t.{domain_table}_id = v.{domain_table}_id
        WHERE EXISTS
           (SELECT 1 FROM {ehr_rdr_dataset_id}.{ehr_consent_table_id} c
            WHERE t.person_id = c.person_id)
    '''.format(rdr_dataset_id=bq_utils.get_rdr_dataset_id(),
               ehr_dataset_id=bq_utils.get_dataset_id(),
               ehr_rdr_dataset_id=bq_utils.get_ehr_rdr_dataset_id(),
               domain_table=domain_table,
               mapping_constant=common.RDR_ID_CONSTANT,
               ehr_consent_table_id=EHR_CONSENT_TABLE_ID)
Ejemplo n.º 4
0
def fact_relationship_query():
    """
    Load fact_relationship, using mapped IDs based on domain concept in fact 1 and fact 2
    :return:
    """
    return '''
    SELECT 
      fr.domain_concept_id_1 AS domain_concept_id_1,
      CASE
          WHEN domain_concept_id_1 = 21 
            THEN m1.measurement_id 
          WHEN domain_concept_id_1 = 27
            THEN o1.observation_id
      END AS fact_id_1,
      fr.domain_concept_id_2,
      CASE 
          WHEN domain_concept_id_2 = 21 
            THEN m2.measurement_id
          WHEN domain_concept_id_2 = 27
            THEN o2.observation_id
      END AS fact_id_2,
      fr.relationship_concept_id AS relationship_concept_id
    FROM {rdr_dataset_id}.fact_relationship fr
      LEFT JOIN {combined_dataset_id}.{mapping_measurement} m1
        ON m1.src_measurement_id = fr.fact_id_1 AND fr.domain_concept_id_1=21
      LEFT JOIN {combined_dataset_id}.{mapping_observation} o1
        ON o1.src_observation_id = fr.fact_id_1 AND fr.domain_concept_id_1=27
      LEFT JOIN {combined_dataset_id}.{mapping_measurement} m2
        ON m2.src_measurement_id = fr.fact_id_2 AND fr.domain_concept_id_2=21
      LEFT JOIN {combined_dataset_id}.{mapping_observation} o2
        ON o2.src_observation_id = fr.fact_id_2 AND fr.domain_concept_id_2=27
    '''.format(rdr_dataset_id=bq_utils.get_rdr_dataset_id(),
               combined_dataset_id=bq_utils.get_ehr_rdr_dataset_id(),
               mapping_measurement=mapping_table_for('measurement'),
               mapping_observation=mapping_table_for('observation'))
Ejemplo n.º 5
0
 def _all_rdr_records_included(self):
     """
     All rdr records are included whether or not there is corresponding ehr record
     """
     for domain_table in DOMAIN_TABLES:
         mapping_table = mapping_table_for(domain_table)
         q = '''SELECT rt.{domain_table}_id as id
            FROM {rdr_dataset_id}.{domain_table} rt
            LEFT JOIN {ehr_rdr_dataset_id}.{mapping_table} m
            ON rt.{domain_table}_id = m.src_{domain_table}_id
            WHERE
              m.{domain_table}_id IS NULL
            OR NOT EXISTS
              (SELECT 1 FROM {ehr_rdr_dataset_id}.{domain_table} t
               WHERE t.{domain_table}_id = m.{domain_table}_id)'''.format(
             domain_table=domain_table,
             rdr_dataset_id=bq_utils.get_rdr_dataset_id(),
             ehr_rdr_dataset_id=bq_utils.get_ehr_rdr_dataset_id(),
             mapping_table=mapping_table)
         response = bq_utils.query(q)
         rows = test_util.response2rows(response)
         self.assertEqual(
             0, len(rows),
             "RDR records should map to records in mapping and combined tables"
         )
Ejemplo n.º 6
0
 def tearDownClass(cls):
     ehr_dataset_id = bq_utils.get_dataset_id()
     rdr_dataset_id = bq_utils.get_rdr_dataset_id()
     test_util.delete_all_tables(ehr_dataset_id)
     test_util.delete_all_tables(rdr_dataset_id)
     cls.testbed.deactivate()
     logger.handlers = []
Ejemplo n.º 7
0
def clean_rdr_dataset(project_id=None, dataset_id=None):
    """
    Run all clean rules defined for the rdr dataset.

    :param project_id:  Name of the BigQuery project.
    :param dataset_id:  Name of the dataset to clean
    """
    if project_id is None:
        project_id = app_identity.get_application_id()
        LOGGER.info('Project is unspecified.  Using default value of:\t%s',
                    project_id)

    if dataset_id is None:
        dataset_id = bq_utils.get_rdr_dataset_id()
        LOGGER.info('Dataset is unspecified.  Using default value of:\t%s',
                    dataset_id)

    sandbox_dataset_id = sandbox.create_sandbox_dataset(project_id=project_id,
                                                        dataset_id=dataset_id)

    query_list = _gather_rdr_queries(project_id, dataset_id,
                                     sandbox_dataset_id)

    LOGGER.info("Cleaning rdr_dataset")
    clean_engine.clean_dataset(project_id, query_list, stage.RDR)
Ejemplo n.º 8
0
def ehr_consent_query():
    """
    Returns query used to get the `person_id` of only those participants who have consented to share EHR data

    :return:
    """
    # Consenting are strictly those whose *most recent* (based on observation_datetime) consent record is YES
    # If the most recent record is NO or NULL, they are NOT consenting
    return '''
    WITH ordered_response AS
     (SELECT
        person_id, 
        value_source_concept_id,
        observation_datetime,
        ROW_NUMBER() OVER(PARTITION BY person_id ORDER BY observation_datetime DESC, value_source_concept_id ASC) AS rn
      FROM {dataset_id}.observation
      WHERE observation_source_value = '{source_value_ehr_consent}')
    
     SELECT person_id 
     FROM ordered_response
     WHERE rn = 1 
       AND value_source_concept_id = {concept_id_consent_permission_yes}
    '''.format(
        dataset_id=bq_utils.get_rdr_dataset_id(),
        source_value_ehr_consent=SOURCE_VALUE_EHR_CONSENT,
        concept_id_consent_permission_yes=CONCEPT_ID_CONSENT_PERMISSION_YES)
Ejemplo n.º 9
0
 def _all_rdr_records_included(self):
     """
     All rdr records are included whether or not there is corresponding ehr record
     """
     for domain_table in DOMAIN_TABLES:
         mapping_table = mapping_table_for(domain_table)
         query = (
             'SELECT rt.{domain_table}_id as id '
             'FROM `{rdr_dataset_id}.{domain_table}` AS rt '
             'LEFT JOIN `{combined_dataset_id}.{mapping_table}` AS m '
             'ON rt.{domain_table}_id = m.src_{domain_table}_id '
             'WHERE '
             '  m.{domain_table}_id IS NULL '
             'OR NOT EXISTS '
             ' (SELECT 1 FROM `{combined_dataset_id}.{domain_table}` AS t '
             '  WHERE t.{domain_table}_id = m.{domain_table}_id)').format(
                 domain_table=domain_table,
                 rdr_dataset_id=bq_utils.get_rdr_dataset_id(),
                 combined_dataset_id=bq_utils.get_combined_dataset_id(),
                 mapping_table=mapping_table)
         response = bq_utils.query(query)
         rows = bq_utils.response2rows(response)
         self.assertEqual(
             0, len(rows),
             "RDR records should map to records in mapping and combined tables"
         )
Ejemplo n.º 10
0
def mapping_query(domain_table):
    """
    Returns query used to get mapping of all records from RDR combined with EHR records of consented participants

    :param domain_table: one of the domain tables (e.g. 'visit_occurrence', 'condition_occurrence')
    :return:
    """
    return '''
    WITH all_records AS
    (
        SELECT
          '{rdr_dataset_id}'  AS src_dataset_id, 
          {domain_table}_id AS src_{domain_table}_id
        FROM {rdr_dataset_id}.{domain_table}

        UNION ALL

        SELECT
          '{ehr_dataset_id}'  AS src_dataset_id, 
          {domain_table}_id AS src_{domain_table}_id
        FROM {ehr_dataset_id}.{domain_table} t
        WHERE EXISTS
           (SELECT 1 FROM {ehr_rdr_dataset_id}.{ehr_consent_table_id} c 
            WHERE t.person_id = c.person_id)
    )
    SELECT 
      ROW_NUMBER() OVER (ORDER BY src_dataset_id, src_{domain_table}_id) AS {domain_table}_id,
      src_dataset_id,
      src_{domain_table}_id
    FROM all_records
    '''.format(rdr_dataset_id=bq_utils.get_rdr_dataset_id(),
               ehr_dataset_id=bq_utils.get_dataset_id(),
               ehr_rdr_dataset_id=bq_utils.get_ehr_rdr_dataset_id(),
               domain_table=domain_table,
               ehr_consent_table_id=EHR_CONSENT_TABLE_ID)
 def setUp(self):
     self.project_id = app_identity.get_application_id()
     self.dataset_id = bq_utils.get_rdr_dataset_id()
     self.sandbox_dataset_id = sandbox.get_sandbox_dataset_id(
         self.dataset_id)
     sandbox.check_and_create_sandbox_dataset(self.project_id,
                                              self.dataset_id)
Ejemplo n.º 12
0
 def setUp(self):
     super(CombineEhrRdrTest, self).setUp()
     self.APP_ID = bq_utils.app_identity.get_application_id()
     self.ehr_dataset_id = bq_utils.get_dataset_id()
     self.rdr_dataset_id = bq_utils.get_rdr_dataset_id()
     self.combined_dataset_id = bq_utils.get_ehr_rdr_dataset_id()
     self.drc_bucket = gcs_utils.get_drc_bucket()
     test_util.delete_all_tables(self.combined_dataset_id)
Ejemplo n.º 13
0
def assert_ehr_and_rdr_tables():
    """
    Raise assertion error if any CDM tables missing from EHR or RDR dataset
    """
    ehr_dataset_id = bq_utils.get_dataset_id()
    assert_tables_in(ehr_dataset_id)
    rdr_dataset_id = bq_utils.get_rdr_dataset_id()
    assert_tables_in(rdr_dataset_id)
Ejemplo n.º 14
0
def copy_rdr_table(table):
    """
    Copy table from the RDR dataset to the combined dataset

    Note: Overwrites if a table already exists
    """
    q = '''SELECT * FROM {rdr_dataset_id}.{table}'''.format(rdr_dataset_id=bq_utils.get_rdr_dataset_id(), table=table)
    logger.debug('Query for {table} is `{q}`'.format(table=table, q=q))
    query(q, table)
Ejemplo n.º 15
0
 def get_unconsented_ehr_records_count(self, table_name):
     q = UNCONSENTED_EHR_COUNTS_QUERY.format(rdr_dataset_id=bq_utils.get_rdr_dataset_id(),
                                             ehr_dataset_id=bq_utils.get_dataset_id(),
                                             ehr_rdr_dataset_id=self.combined_dataset_id,
                                             domain_table=table_name,
                                             ehr_consent_table_id='_ehr_consent')
     response = bq_utils.query(q)
     rows = bq_utils.response2rows(response)
     return rows[0]['n']
Ejemplo n.º 16
0
def copy_rdr_table(table):
    """
    Copy table from the RDR dataset to the combined dataset

    Note: Overwrites if a table already exists
    """
    q = combine_consts.COPY_RDR_QUERY.format(
        rdr_dataset_id=bq_utils.get_rdr_dataset_id(), table=table)
    logger.debug('Query for {table} is `{q}`'.format(table=table, q=q))
    query(q, table)
Ejemplo n.º 17
0
def clean_rdr_dataset(project=None, dataset=None):
    if dataset is None or dataset == '' or dataset.isspace():
        dataset = bq_utils.get_rdr_dataset_id()
        LOGGER.info('Dataset is unspecified.  Using default value of:\t%s',
                    dataset)

    query_list = _gather_rdr_queries(project, dataset)

    LOGGER.info("Cleaning rdr_dataset")
    clean_engine.clean_dataset(project, dataset, query_list)
Ejemplo n.º 18
0
    def test_mapping_query(self):
        table_name = 'visit_occurrence'
        q = mapping_query(table_name)
        expected_query = EXPECTED_MAPPING_QUERY.format(rdr_dataset_id=bq_utils.get_rdr_dataset_id(),
                                                       ehr_dataset_id=bq_utils.get_dataset_id(),
                                                       ehr_rdr_dataset_id=self.combined_dataset_id,
                                                       domain_table=table_name,
                                                       mapping_constant=common.RDR_ID_CONSTANT,
                                                       ehr_consent_table_id=EHR_CONSENT_TABLE_ID)

        self.assertEqual(expected_query, q, "Mapping query for \n {q} \n to is not as expected".format(q=q))
Ejemplo n.º 19
0
 def setUpClass(cls):
     print('**************************************************************')
     print(cls.__name__)
     print('**************************************************************')
     # TODO base class this
     ehr_dataset_id = bq_utils.get_dataset_id()
     rdr_dataset_id = bq_utils.get_rdr_dataset_id()
     test_util.delete_all_tables(ehr_dataset_id)
     test_util.delete_all_tables(rdr_dataset_id)
     cls.load_dataset_from_files(ehr_dataset_id,
                                 test_util.NYC_FIVE_PERSONS_PATH, True)
     cls.load_dataset_from_files(rdr_dataset_id, test_util.RDR_PATH)
Ejemplo n.º 20
0
def ehr_consent_query():
    """
    Returns query used to get the `person_id` of only those participants who have consented to share EHR data

    :return:
    """
    # Consenting are strictly those whose *most recent* (based on observation_datetime) consent record is YES
    # If the most recent record is NO or NULL, they are NOT consenting
    return combine_consts.EHR_CONSENT_QUERY.format(
        dataset_id=bq_utils.get_rdr_dataset_id(),
        source_value_ehr_consent=combine_consts.SOURCE_VALUE_EHR_CONSENT,
        concept_id_consent_permission_yes=combine_consts.
        CONCEPT_ID_CONSENT_PERMISSION_YES)
Ejemplo n.º 21
0
def fact_relationship_query():
    """
    Load fact_relationship, using mapped IDs based on domain concept in fact 1 and fact 2
    :return:
    """
    return combine_consts.FACT_RELATIONSHIP_QUERY.format(
        rdr_dataset_id=bq_utils.get_rdr_dataset_id(),
        combined_dataset_id=bq_utils.get_ehr_rdr_dataset_id(),
        mapping_measurement=mapping_table_for('measurement'),
        ehr_dataset=bq_utils.get_dataset_id(),
        mapping_observation=mapping_table_for('observation'),
        measurement_domain_concept_id=common.MEASUREMENT_DOMAIN_CONCEPT_ID,
        observation_domain_concept_id=common.OBSERVATION_DOMAIN_CONCEPT_ID)
Ejemplo n.º 22
0
 def setUpClass(cls):
     print('**************************************************************')
     print(cls.__name__)
     print('**************************************************************')
     # TODO base class this
     logger.level = logging.INFO
     stream_handler = logging.StreamHandler(sys.stdout)
     logger.addHandler(stream_handler)
     ehr_dataset_id = bq_utils.get_dataset_id()
     rdr_dataset_id = bq_utils.get_rdr_dataset_id()
     test_util.delete_all_tables(ehr_dataset_id)
     test_util.delete_all_tables(rdr_dataset_id)
     cls.load_dataset_from_files(ehr_dataset_id,
                                 test_util.NYC_FIVE_PERSONS_PATH, True)
     cls.load_dataset_from_files(rdr_dataset_id, test_util.RDR_PATH)
Ejemplo n.º 23
0
def get_dataset_and_project_names():
    """
    Get project and dataset names from environment variables.

    :return: A dictionary of dataset names and project name
    """
    project_and_dataset_names = dict()
    project_and_dataset_names[clean_cdr_consts.EHR_DATASET] = bq_utils.get_dataset_id()
    project_and_dataset_names[clean_cdr_consts.UNIONED_EHR_DATASET] = bq_utils.get_unioned_dataset_id()
    project_and_dataset_names[clean_cdr_consts.RDR_DATASET] = bq_utils.get_rdr_dataset_id()
    project_and_dataset_names[clean_cdr_consts.EHR_RDR_DATASET] = bq_utils.get_ehr_rdr_dataset_id()
    project_and_dataset_names[clean_cdr_consts.EHR_RDR_DE_IDENTIFIED] = bq_utils.get_combined_deid_dataset_id()
    project_and_dataset_names[clean_cdr_consts.PROJECT] = app_identity.get_application_id()

    return project_and_dataset_names
Ejemplo n.º 24
0
def clean_rdr_dataset(project_id=None, dataset_id=None):
    """
    Run all clean rules defined for the rdr dataset.

    :param project_id:  Name of the BigQuery project.
    :param dataset_id:  Name of the dataset to clean
    """
    if dataset_id is None or dataset_id == '' or dataset_id.isspace():
        dataset_id = bq_utils.get_rdr_dataset_id()
        LOGGER.info('Dataset is unspecified.  Using default value of:\t%s', dataset_id)

    query_list = _gather_rdr_queries(project_id, dataset_id)

    LOGGER.info("Cleaning rdr_dataset")
    clean_engine.clean_dataset(project_id, dataset_id, query_list)
Ejemplo n.º 25
0
    def setUp(self):
        self.hpo_bucket = gcs_utils.get_hpo_bucket(FAKE_HPO_ID)
        self.project_id = app_identity.get_application_id()
        self.dataset_id = bq_utils.get_dataset_id()
        self.rdr_dataset_id = bq_utils.get_rdr_dataset_id()
        self.folder_prefix = '2019-01-01/'
        test_util.delete_all_tables(self.dataset_id)
        test_util.empty_bucket(self.hpo_bucket)

        mock_get_hpo_name = mock.patch('validation.main.get_hpo_name')

        self.mock_get_hpo_name = mock_get_hpo_name.start()
        self.mock_get_hpo_name.return_value = 'Fake HPO'
        self.addCleanup(mock_get_hpo_name.stop)

        self._load_data()
Ejemplo n.º 26
0
    def setUp(self):
        self.hpo_id = test_util.FAKE_HPO_ID
        self.hpo_bucket = gcs_utils.get_hpo_bucket(self.hpo_id)
        self.project_id = app_identity.get_application_id()
        self.rdr_dataset_id = bq_utils.get_rdr_dataset_id()
        mock_get_hpo_name = mock.patch('validation.main.get_hpo_name')

        self.mock_get_hpo_name = mock_get_hpo_name.start()
        self.mock_get_hpo_name.return_value = 'Fake HPO'
        self.addCleanup(mock_get_hpo_name.stop)

        self.bigquery_dataset_id = bq_utils.get_dataset_id()
        self.folder_prefix = '2019-01-01-v1/'
        self._empty_bucket()
        test_util.delete_all_tables(self.bigquery_dataset_id)
        self._create_drug_class_table(self.bigquery_dataset_id)
Ejemplo n.º 27
0
 def setUpClass(cls):
     # TODO base class this
     logger.level = logging.INFO
     stream_handler = logging.StreamHandler(sys.stdout)
     logger.addHandler(stream_handler)
     cls.testbed = testbed.Testbed()
     cls.testbed.activate()
     cls.testbed.init_app_identity_stub()
     cls.testbed.init_memcache_stub()
     cls.testbed.init_urlfetch_stub()
     cls.testbed.init_blobstore_stub()
     cls.testbed.init_datastore_v3_stub()
     ehr_dataset_id = bq_utils.get_dataset_id()
     rdr_dataset_id = bq_utils.get_rdr_dataset_id()
     test_util.delete_all_tables(ehr_dataset_id)
     test_util.delete_all_tables(rdr_dataset_id)
     cls.load_dataset_from_files(ehr_dataset_id, test_util.NYC_FIVE_PERSONS_PATH)
     cls.load_dataset_from_files(rdr_dataset_id, test_util.RDR_PATH)
Ejemplo n.º 28
0
    def setUpClass(cls):
        print('**************************************************************')
        print(cls.__name__)
        print('**************************************************************')

        super().initialize_class_vars()
        project_id = app_identity.get_application_id()
        dataset_id = bq_utils.get_rdr_dataset_id()
        sandbox_dataset_id = sandbox.get_sandbox_dataset_id(dataset_id)
        rule = PpiBranching(project_id, dataset_id, sandbox_dataset_id)
        cls.dataset_id = dataset_id
        cls.sandbox_dataset_id = sandbox_dataset_id
        cls.project_id = project_id
        cls.rule_instance = rule
        cls.fq_sandbox_table_names = [
            _fq_table_name(table)
            for table in (rule.lookup_table, rule.backup_table)
        ]
        cls.fq_table_names = [_fq_table_name(rule.observation_table)]
        super().setUpClass()
Ejemplo n.º 29
0
def load_query(domain_table):
    """
    Returns query used to load a domain table

    :param domain_table: one of the domain tables (e.g. 'visit_occurrence', 'condition_occurrence')
    :return:
    """
    rdr_dataset_id = bq_utils.get_rdr_dataset_id()
    ehr_dataset_id = bq_utils.get_dataset_id()
    ehr_rdr_dataset_id = bq_utils.get_ehr_rdr_dataset_id()
    mapping_table = mapping_table_for(domain_table)
    cols, join_expression = join_expression_generator(domain_table,
                                                      ehr_rdr_dataset_id)

    return combine_consts.LOAD_QUERY.format(
        cols=cols,
        domain_table=domain_table,
        rdr_dataset_id=rdr_dataset_id,
        ehr_dataset_id=ehr_dataset_id,
        mapping_table=mapping_table,
        join_expr=join_expression,
        ehr_rdr_dataset_id=ehr_rdr_dataset_id)
Ejemplo n.º 30
0
def fact_relationship_query():
    """
    Load fact_relationship, using mapped IDs based on domain concept in fact 1 and fact 2
    :return:
    """
    return '''
    SELECT
      fr.domain_concept_id_1 AS domain_concept_id_1,
      CASE
          WHEN domain_concept_id_1 = {measurement_domain_concept_id}
            THEN m1.measurement_id
          WHEN domain_concept_id_1 = {observation_domain_concept_id}
            THEN o1.observation_id
      END AS fact_id_1,
      fr.domain_concept_id_2,
      CASE
          WHEN domain_concept_id_2 = {measurement_domain_concept_id}
            THEN m2.measurement_id
          WHEN domain_concept_id_2 = {observation_domain_concept_id}
            THEN o2.observation_id
      END AS fact_id_2,
      fr.relationship_concept_id AS relationship_concept_id
    FROM {rdr_dataset_id}.fact_relationship fr
      LEFT JOIN {combined_dataset_id}.{mapping_measurement} m1
        ON m1.src_measurement_id = fr.fact_id_1 AND fr.domain_concept_id_1={measurement_domain_concept_id}
      LEFT JOIN {combined_dataset_id}.{mapping_observation} o1
        ON o1.src_observation_id = fr.fact_id_1 AND fr.domain_concept_id_1={observation_domain_concept_id}
      LEFT JOIN {combined_dataset_id}.{mapping_measurement} m2
        ON m2.src_measurement_id = fr.fact_id_2 AND fr.domain_concept_id_2={measurement_domain_concept_id}
      LEFT JOIN {combined_dataset_id}.{mapping_observation} o2
        ON o2.src_observation_id = fr.fact_id_2 AND fr.domain_concept_id_2={observation_domain_concept_id}
    '''.format(
        rdr_dataset_id=bq_utils.get_rdr_dataset_id(),
        combined_dataset_id=bq_utils.get_ehr_rdr_dataset_id(),
        mapping_measurement=mapping_table_for('measurement'),
        mapping_observation=mapping_table_for('observation'),
        measurement_domain_concept_id=common.MEASUREMENT_DOMAIN_CONCEPT_ID,
        observation_domain_concept_id=common.OBSERVATION_DOMAIN_CONCEPT_ID)