Esempio n. 1
0
def tables_to_map():
    """
    Determine which CDM tables must have ids remapped

    :return: the list of table names
    """
    result = []
    for table in resources.CDM_TABLES:
        if table != 'person' and bq_utils.has_primary_key(table):
            result.append(table)
    return result
Esempio n. 2
0
def table_hpo_subquery(table_name, hpo_id, input_dataset_id,
                       output_dataset_id):
    """
    Returns query used to retrieve all records in a submitted table

    :param table_name: one of the domain tables (e.g. 'visit_occurrence', 'condition_occurrence')
    :param hpo_id: identifies the HPO
    :param input_dataset_id: identifies dataset containing HPO submission
    :param output_dataset_id:
    :return:
    """
    tables_to_ref = []
    for table in resources.CDM_TABLES:
        if bq_utils.has_primary_key(table):
            tables_to_ref.append(table)

    is_id_mapped = table_name in tables_to_ref
    fields = resources.fields_for(table_name)
    table_id = bq_utils.get_table_id(hpo_id, table_name)

    # Generate column expressions for select
    if not is_id_mapped:
        # e.g. death
        col_exprs = [field['name'] for field in fields]
        cols = ',\n        '.join(col_exprs)
        return f'''
    SELECT {cols} 
    FROM `{input_dataset_id}.{table_id}`'''
    else:
        # Ensure that we
        #  1) populate primary key from the mapping table and
        #  2) populate any foreign key fields from the mapping visit table
        # NOTE: Assumes that besides person_id foreign keys exist only for visit_occurrence, location, care_site
        mapping_table = mapping_table_for(table_name) if is_id_mapped else None
        has_visit_occurrence_id = False
        has_care_site_id = False
        has_location_id = False
        id_col = f'{table_name}_id'
        col_exprs = []

        for field in fields:
            field_name = field['name']
            if field_name == id_col:
                # Use mapping for record ID column
                # m is an alias that should resolve to the associated mapping table
                if field_name == eu_constants.PERSON_ID:
                    col_expr = f'{field_name}'
                else:
                    col_expr = f'm.{field_name}'
            elif field_name == eu_constants.VISIT_OCCURRENCE_ID:
                # Replace with mapped visit_occurrence_id
                # mv is an alias that should resolve to the mapping visit table
                # Note: This is only reached when table_name != visit_occurrence
                col_expr = 'mv.' + eu_constants.VISIT_OCCURRENCE_ID
                has_visit_occurrence_id = True
            elif field_name == eu_constants.CARE_SITE_ID:
                # Replace with mapped care_site_id
                # cs is an alias that should resolve to the mapping care_site table
                # Note: This is only reached when table_name != care_site
                col_expr = 'mcs.' + eu_constants.CARE_SITE_ID
                has_care_site_id = True
            elif field_name == eu_constants.LOCATION_ID:
                # Replace with mapped location_id
                # lc is an alias that should resolve to the mapping visit table
                # Note: This is only reached when table_name != location
                col_expr = 'loc.' + eu_constants.LOCATION_ID
                has_location_id = True
            else:
                col_expr = field_name
            col_exprs.append(col_expr)
        cols = ',\n        '.join(col_exprs)

        visit_join_expr = ''
        location_join_expr = ''
        care_site_join_expr = ''

        if has_visit_occurrence_id:
            # Include a join to mapping visit table
            # Note: Using left join in order to keep records that aren't mapped to visits
            mv = mapping_table_for(common.VISIT_OCCURRENCE)
            src_visit_table_id = bq_utils.get_table_id(hpo_id,
                                                       common.VISIT_OCCURRENCE)
            visit_join_expr = f'''
            LEFT JOIN `{output_dataset_id}.{mv}` mv 
              ON t.visit_occurrence_id = mv.src_visit_occurrence_id 
             AND mv.src_table_id = '{src_visit_table_id}'
            '''

        if has_care_site_id:
            # Include a join to mapping visit table
            # Note: Using left join in order to keep records that aren't mapped to visits
            cs = mapping_table_for(common.CARE_SITE)
            src_care_site_table_id = bq_utils.get_table_id(
                hpo_id, common.CARE_SITE)
            care_site_join_expr = f'''
                        LEFT JOIN `{output_dataset_id}.{cs}` mcs 
                          ON t.care_site_id = mcs.src_care_site_id 
                         AND mcs.src_table_id = '{src_care_site_table_id}'
                        '''

        if has_location_id:
            # Include a join to mapping visit table
            # Note: Using left join in order to keep records that aren't mapped to visits
            lc = mapping_table_for(common.LOCATION)
            src_location_table_id = bq_utils.get_table_id(
                hpo_id, common.LOCATION)
            location_join_expr = f'''
                        LEFT JOIN `{output_dataset_id}.{lc}` loc 
                          ON t.location_id = loc.src_location_id 
                         AND loc.src_table_id = '{src_location_table_id}'
                        '''

        if table_name == common.PERSON:
            return f'''
                    SELECT {cols} 
                    FROM `{input_dataset_id}.{table_id}` t
                       {location_join_expr}
                       {care_site_join_expr} 
                    '''

        return f'''