def parse_domain_mapping_query_for_same_domains(project_id, dataset_id): """ This function generates a query that generates id mappings in _logging_domain_alignment for the records being copied to the same domain table :param project_id: the project_id in which the query is run :param dataset_id: the dataset_id in which the query is run :return: a query that generates id mappings for the records that will get copied over to the same domain """ union_query = EMPTY_STRING for domain_table in domain_mapping.DOMAIN_TABLE_NAMES: domain = resources.get_domain(domain_table) domain_id_field = resources.get_domain_id_field(domain_table) domain_concept_id = resources.get_domain_concept_id(domain_table) if union_query != EMPTY_STRING: union_query += UNION_ALL union_query += DOMAIN_REROUTE_INCLUDED_INNER_QUERY.render( project_id=project_id, dataset_id=dataset_id, src_table=domain_table, dest_table=domain_table, src_id=domain_id_field, dest_id=domain_id_field, domain_concept_id=domain_concept_id, domain='\'{}\''.format('\',\''.join([domain, METADATA_DOMAIN]))) return union_query
def parse_src_concept_id_update_query(project_id, dataset_id, table_name): """ Fill in template query used to generate updated domain table :param project_id: identifies the project containing the dataset :param dataset_id: identifies the dataset containing the OMOP data :param table_name: name of a domain table :return: parsed src_concept_id_update query """ fields = [field['name'] for field in resources.fields_for(table_name)] col_exprs = [] fields_to_replace = { resources.get_domain_id_field(table_name): 'dest_id', resources.get_domain_concept_id(table_name): 'new_concept_id', resources.get_domain_source_concept_id(table_name): 'new_src_concept_id' } for field_name in fields: if field_name in fields_to_replace: col_expr = 'coalesce({replace_field}, {field}) AS {field}'.format( replace_field=fields_to_replace[field_name], field=field_name) else: col_expr = field_name col_exprs.append(col_expr) cols = ', '.join(col_exprs) return SRC_CONCEPT_ID_UPDATE_QUERY.format(cols=cols, project=project_id, dataset=dataset_id, domain_table=table_name, logging_table=SRC_CONCEPT_ID_TABLE_NAME)
def parse_domain_mapping_query_cross_domain(project_id, dataset_id, dest_table): """ This function creates a query that generates id mappings in _logging_domain_alignment for the rerouting records for dest_table :param project_id: the project_id in which the query is run :param dataset_id: the dataset_id in which the query is run :param dest_table: the destination table to which the records are rerouted :return: the query that generates id mappings for the rerouting records """ union_query = EMPTY_STRING domain = resources.get_domain(dest_table) dest_id_field = resources.get_domain_id_field(dest_table) for src_table in domain_mapping.DOMAIN_TABLE_NAMES: if src_table != dest_table and domain_mapping.exist_domain_mappings( src_table, dest_table): src_id_field = resources.get_domain_id_field(src_table) domain_concept_id = resources.get_domain_concept_id(src_table) if union_query != EMPTY_STRING: union_query += UNION_ALL union_query += DOMAIN_REROUTE_INCLUDED_INNER_QUERY.render( project_id=project_id, dataset_id=dataset_id, src_table=src_table, dest_table=dest_table, src_id=src_id_field, dest_id=NULL_VALUE, domain_concept_id=domain_concept_id, domain='\'{}\''.format(domain)) criteria = domain_mapping.get_rerouting_criteria( src_table, dest_table) if criteria != EMPTY_STRING: union_query += AND + criteria output_query = EMPTY_STRING if union_query != EMPTY_STRING: # the query to get the max id for the dest table domain_query = MAXIMUM_DOMAIN_ID_QUERY.render( project_id=project_id, dataset_id=dataset_id, domain_table=dest_table, domain_id_field=dest_id_field) output_query = DOMAIN_MAPPING_OUTER_QUERY.render( union_query=union_query, domain_query=domain_query) return output_query
def parse_src_concept_id_logging_query(self, domain_table): """ Generates a query for each domain table for _logging_standard_concept_id_replacement :param domain_table: name of the domain_table for which a query needs to be generated. :return: """ dom_concept_id = resources.get_domain_concept_id(domain_table) dom_src_concept_id = resources.get_domain_source_concept_id( domain_table) return SRC_CONCEPT_ID_MAPPING_QUERY.render( table_name=domain_table, project=self.project_id, dataset=self.dataset_id, domain_concept_id=dom_concept_id, domain_source=dom_src_concept_id)
def parse_src_concept_id_logging_query(project_id, dataset_id, domain_table): """ Generates a query for each domain table for _logging_standard_concept_id_replacement :param project_id: identifies the project containing the dataset :param dataset_id: identifies the dataset containing the OMOP data :param domain_table: name of the domain_table for which a query needs to be generated. :return: """ dom_concept_id = resources.get_domain_concept_id(domain_table) dom_src_concept_id = resources.get_domain_source_concept_id(domain_table) return SRC_CONCEPT_ID_MAPPING_QUERY.format(table_name=domain_table, project=project_id, dataset=dataset_id, domain_concept_id=dom_concept_id, domain_source=dom_src_concept_id)
def test_get_domain_concept_id(self): self.assertEqual(resources.get_domain_concept_id(self.condition_table), self.condition_concept_id) self.assertEqual(resources.get_domain_concept_id(self.procedure_table), self.procedure_concept_id)
def get_clean_domain_queries(project_id, dataset_id, sandbox_dataset_id): """ This function generates a list of query dicts for dropping records that do not belong to the domain table after rerouting. :param project_id: the project_id in which the query is run :param dataset_id: the dataset_id in which the query is run :param sandbox_dataset_id: sandbox dataset for dataset_id :return: list of query dicts to run """ queries = [] sandbox_queries = [] for domain_table in domain_mapping.DOMAIN_TABLE_NAMES: #Use non-standard concept if table is observation if domain_table == OBSERVATION: domain_concept_id = 'observation_source_concept_id' else: domain_concept_id = resources.get_domain_concept_id(domain_table) sandbox_queries.append({ cdr_consts.QUERY: SANDBOX_DOMAIN_RECORD_QUERY_TEMPLATE.render( project_id=project_id, dataset_id=dataset_id, domain_table=domain_table, domain_concept_id=domain_concept_id), cdr_consts.DESTINATION_TABLE: sandbox_name_for(domain_table), cdr_consts.DISPOSITION: bq_consts.WRITE_TRUNCATE, cdr_consts.DESTINATION_DATASET: sandbox_dataset_id }) # add the clean-up query for the domain table queries.append({ cdr_consts.QUERY: CLEAN_DOMAIN_RECORD_QUERY_TEMPLATE.render( project_id=project_id, dataset_id=dataset_id, sandbox_dataset_id=sandbox_dataset_id, domain_table=domain_table, sandbox_table=sandbox_name_for(domain_table), is_mapping=False), cdr_consts.DESTINATION_TABLE: domain_table, cdr_consts.DISPOSITION: bq_consts.WRITE_TRUNCATE, cdr_consts.DESTINATION_DATASET: dataset_id }) # add the clean-up query for the corresponding mapping of the domain table queries.append({ cdr_consts.QUERY: CLEAN_DOMAIN_RECORD_QUERY_TEMPLATE.render( project_id=project_id, dataset_id=dataset_id, sandbox_dataset_id=sandbox_dataset_id, domain_table=domain_table, sandbox_table=sandbox_name_for(domain_table), is_mapping=True), cdr_consts.DESTINATION_TABLE: mapping_table_for(domain_table), cdr_consts.DISPOSITION: bq_consts.WRITE_TRUNCATE, cdr_consts.DESTINATION_DATASET: dataset_id }) return sandbox_queries + queries