def tables_to_map(): """ Determine which CDM tables must have ids remapped :return: the list of table names """ result = [] for table in resources.CDM_TABLES: if table != 'person' and bq_utils.has_primary_key(table): result.append(table) return result
def table_hpo_subquery(table_name, hpo_id, input_dataset_id, output_dataset_id): """ Returns query used to retrieve all records in a submitted table :param table_name: one of the domain tables (e.g. 'visit_occurrence', 'condition_occurrence') :param hpo_id: identifies the HPO :param input_dataset_id: identifies dataset containing HPO submission :param output_dataset_id: :return: """ tables_to_ref = [] for table in resources.CDM_TABLES: if bq_utils.has_primary_key(table): tables_to_ref.append(table) is_id_mapped = table_name in tables_to_ref fields = resources.fields_for(table_name) table_id = bq_utils.get_table_id(hpo_id, table_name) # Generate column expressions for select if not is_id_mapped: # e.g. death col_exprs = [field['name'] for field in fields] cols = ',\n '.join(col_exprs) return f''' SELECT {cols} FROM `{input_dataset_id}.{table_id}`''' else: # Ensure that we # 1) populate primary key from the mapping table and # 2) populate any foreign key fields from the mapping visit table # NOTE: Assumes that besides person_id foreign keys exist only for visit_occurrence, location, care_site mapping_table = mapping_table_for(table_name) if is_id_mapped else None has_visit_occurrence_id = False has_care_site_id = False has_location_id = False id_col = f'{table_name}_id' col_exprs = [] for field in fields: field_name = field['name'] if field_name == id_col: # Use mapping for record ID column # m is an alias that should resolve to the associated mapping table if field_name == eu_constants.PERSON_ID: col_expr = f'{field_name}' else: col_expr = f'm.{field_name}' elif field_name == eu_constants.VISIT_OCCURRENCE_ID: # Replace with mapped visit_occurrence_id # mv is an alias that should resolve to the mapping visit table # Note: This is only reached when table_name != visit_occurrence col_expr = 'mv.' + eu_constants.VISIT_OCCURRENCE_ID has_visit_occurrence_id = True elif field_name == eu_constants.CARE_SITE_ID: # Replace with mapped care_site_id # cs is an alias that should resolve to the mapping care_site table # Note: This is only reached when table_name != care_site col_expr = 'mcs.' + eu_constants.CARE_SITE_ID has_care_site_id = True elif field_name == eu_constants.LOCATION_ID: # Replace with mapped location_id # lc is an alias that should resolve to the mapping visit table # Note: This is only reached when table_name != location col_expr = 'loc.' + eu_constants.LOCATION_ID has_location_id = True else: col_expr = field_name col_exprs.append(col_expr) cols = ',\n '.join(col_exprs) visit_join_expr = '' location_join_expr = '' care_site_join_expr = '' if has_visit_occurrence_id: # Include a join to mapping visit table # Note: Using left join in order to keep records that aren't mapped to visits mv = mapping_table_for(common.VISIT_OCCURRENCE) src_visit_table_id = bq_utils.get_table_id(hpo_id, common.VISIT_OCCURRENCE) visit_join_expr = f''' LEFT JOIN `{output_dataset_id}.{mv}` mv ON t.visit_occurrence_id = mv.src_visit_occurrence_id AND mv.src_table_id = '{src_visit_table_id}' ''' if has_care_site_id: # Include a join to mapping visit table # Note: Using left join in order to keep records that aren't mapped to visits cs = mapping_table_for(common.CARE_SITE) src_care_site_table_id = bq_utils.get_table_id( hpo_id, common.CARE_SITE) care_site_join_expr = f''' LEFT JOIN `{output_dataset_id}.{cs}` mcs ON t.care_site_id = mcs.src_care_site_id AND mcs.src_table_id = '{src_care_site_table_id}' ''' if has_location_id: # Include a join to mapping visit table # Note: Using left join in order to keep records that aren't mapped to visits lc = mapping_table_for(common.LOCATION) src_location_table_id = bq_utils.get_table_id( hpo_id, common.LOCATION) location_join_expr = f''' LEFT JOIN `{output_dataset_id}.{lc}` loc ON t.location_id = loc.src_location_id AND loc.src_table_id = '{src_location_table_id}' ''' if table_name == common.PERSON: return f''' SELECT {cols} FROM `{input_dataset_id}.{table_id}` t {location_join_expr} {care_site_join_expr} ''' return f'''