예제 #1
0
def parse_domain_mapping_query_for_same_domains(project_id, dataset_id):
    """
    This function generates a query that generates id mappings in _logging_domain_alignment for
    the records being copied to the same domain table

    :param project_id: the project_id in which the query is run
    :param dataset_id: the dataset_id in which the query is run
    :return: a query that generates id mappings for the records that will get copied over to the same domain
    """
    union_query = EMPTY_STRING

    for domain_table in domain_mapping.DOMAIN_TABLE_NAMES:

        domain = resources.get_domain(domain_table)
        domain_id_field = resources.get_domain_id_field(domain_table)
        domain_concept_id = resources.get_domain_concept_id(domain_table)

        if union_query != EMPTY_STRING:
            union_query += UNION_ALL

        union_query += DOMAIN_REROUTE_INCLUDED_INNER_QUERY.render(
            project_id=project_id,
            dataset_id=dataset_id,
            src_table=domain_table,
            dest_table=domain_table,
            src_id=domain_id_field,
            dest_id=domain_id_field,
            domain_concept_id=domain_concept_id,
            domain='\'{}\''.format('\',\''.join([domain, METADATA_DOMAIN])))
    return union_query
def parse_src_concept_id_update_query(project_id, dataset_id, table_name):
    """
    Fill in template query used to generate updated domain table

    :param project_id: identifies the project containing the dataset
    :param dataset_id: identifies the dataset containing the OMOP data
    :param table_name: name of a domain table
    :return: parsed src_concept_id_update query
    """
    fields = [field['name'] for field in resources.fields_for(table_name)]
    col_exprs = []
    fields_to_replace = {
        resources.get_domain_id_field(table_name): 'dest_id',
        resources.get_domain_concept_id(table_name): 'new_concept_id',
        resources.get_domain_source_concept_id(table_name): 'new_src_concept_id'
    }
    for field_name in fields:
        if field_name in fields_to_replace:
            col_expr = 'coalesce({replace_field}, {field}) AS {field}'.format(
                replace_field=fields_to_replace[field_name],
                field=field_name)
        else:
            col_expr = field_name
        col_exprs.append(col_expr)
    cols = ', '.join(col_exprs)

    return SRC_CONCEPT_ID_UPDATE_QUERY.format(cols=cols,
                                              project=project_id,
                                              dataset=dataset_id,
                                              domain_table=table_name,
                                              logging_table=SRC_CONCEPT_ID_TABLE_NAME)
예제 #3
0
def parse_domain_mapping_query_cross_domain(project_id, dataset_id,
                                            dest_table):
    """
    This function creates a query that generates id mappings in _logging_domain_alignment
    for the rerouting records for dest_table

    :param project_id: the project_id in which the query is run
    :param dataset_id: the dataset_id in which the query is run
    :param dest_table: the destination table to which the records are rerouted
    :return: the query that generates id mappings for the rerouting records
    """
    union_query = EMPTY_STRING

    domain = resources.get_domain(dest_table)
    dest_id_field = resources.get_domain_id_field(dest_table)

    for src_table in domain_mapping.DOMAIN_TABLE_NAMES:

        if src_table != dest_table and domain_mapping.exist_domain_mappings(
                src_table, dest_table):

            src_id_field = resources.get_domain_id_field(src_table)
            domain_concept_id = resources.get_domain_concept_id(src_table)

            if union_query != EMPTY_STRING:
                union_query += UNION_ALL

            union_query += DOMAIN_REROUTE_INCLUDED_INNER_QUERY.render(
                project_id=project_id,
                dataset_id=dataset_id,
                src_table=src_table,
                dest_table=dest_table,
                src_id=src_id_field,
                dest_id=NULL_VALUE,
                domain_concept_id=domain_concept_id,
                domain='\'{}\''.format(domain))

            criteria = domain_mapping.get_rerouting_criteria(
                src_table, dest_table)

            if criteria != EMPTY_STRING:
                union_query += AND + criteria

    output_query = EMPTY_STRING

    if union_query != EMPTY_STRING:
        # the query to get the max id for the dest table
        domain_query = MAXIMUM_DOMAIN_ID_QUERY.render(
            project_id=project_id,
            dataset_id=dataset_id,
            domain_table=dest_table,
            domain_id_field=dest_id_field)

        output_query = DOMAIN_MAPPING_OUTER_QUERY.render(
            union_query=union_query, domain_query=domain_query)
    return output_query
예제 #4
0
    def parse_src_concept_id_logging_query(self, domain_table):
        """
        Generates a query for each domain table for _logging_standard_concept_id_replacement
        :param domain_table: name of the domain_table for which a query needs to be generated.
        :return:
        """
        dom_concept_id = resources.get_domain_concept_id(domain_table)
        dom_src_concept_id = resources.get_domain_source_concept_id(
            domain_table)

        return SRC_CONCEPT_ID_MAPPING_QUERY.render(
            table_name=domain_table,
            project=self.project_id,
            dataset=self.dataset_id,
            domain_concept_id=dom_concept_id,
            domain_source=dom_src_concept_id)
def parse_src_concept_id_logging_query(project_id, dataset_id, domain_table):
    """
    Generates a query for each domain table for _logging_standard_concept_id_replacement

    :param project_id: identifies the project containing the dataset
    :param dataset_id: identifies the dataset containing the OMOP data
    :param domain_table: name of the domain_table for which a query needs to be generated.
    :return:
    """
    dom_concept_id = resources.get_domain_concept_id(domain_table)
    dom_src_concept_id = resources.get_domain_source_concept_id(domain_table)

    return SRC_CONCEPT_ID_MAPPING_QUERY.format(table_name=domain_table,
                                               project=project_id,
                                               dataset=dataset_id,
                                               domain_concept_id=dom_concept_id,
                                               domain_source=dom_src_concept_id)
 def test_get_domain_concept_id(self):
     self.assertEqual(resources.get_domain_concept_id(self.condition_table),
                      self.condition_concept_id)
     self.assertEqual(resources.get_domain_concept_id(self.procedure_table),
                      self.procedure_concept_id)
예제 #7
0
def get_clean_domain_queries(project_id, dataset_id, sandbox_dataset_id):
    """
    This function generates a list of query dicts for dropping records that do not belong to the
    domain table after rerouting.
    
    :param project_id: the project_id in which the query is run
    :param dataset_id: the dataset_id in which the query is run
    :param sandbox_dataset_id: sandbox dataset for dataset_id
    :return: list of query dicts to run
    """

    queries = []
    sandbox_queries = []
    for domain_table in domain_mapping.DOMAIN_TABLE_NAMES:
        #Use non-standard concept if table is observation
        if domain_table == OBSERVATION:
            domain_concept_id = 'observation_source_concept_id'
        else:
            domain_concept_id = resources.get_domain_concept_id(domain_table)

        sandbox_queries.append({
            cdr_consts.QUERY:
            SANDBOX_DOMAIN_RECORD_QUERY_TEMPLATE.render(
                project_id=project_id,
                dataset_id=dataset_id,
                domain_table=domain_table,
                domain_concept_id=domain_concept_id),
            cdr_consts.DESTINATION_TABLE:
            sandbox_name_for(domain_table),
            cdr_consts.DISPOSITION:
            bq_consts.WRITE_TRUNCATE,
            cdr_consts.DESTINATION_DATASET:
            sandbox_dataset_id
        })
        # add the clean-up query for the domain table
        queries.append({
            cdr_consts.QUERY:
            CLEAN_DOMAIN_RECORD_QUERY_TEMPLATE.render(
                project_id=project_id,
                dataset_id=dataset_id,
                sandbox_dataset_id=sandbox_dataset_id,
                domain_table=domain_table,
                sandbox_table=sandbox_name_for(domain_table),
                is_mapping=False),
            cdr_consts.DESTINATION_TABLE:
            domain_table,
            cdr_consts.DISPOSITION:
            bq_consts.WRITE_TRUNCATE,
            cdr_consts.DESTINATION_DATASET:
            dataset_id
        })
        # add the clean-up query for the corresponding mapping of the domain table
        queries.append({
            cdr_consts.QUERY:
            CLEAN_DOMAIN_RECORD_QUERY_TEMPLATE.render(
                project_id=project_id,
                dataset_id=dataset_id,
                sandbox_dataset_id=sandbox_dataset_id,
                domain_table=domain_table,
                sandbox_table=sandbox_name_for(domain_table),
                is_mapping=True),
            cdr_consts.DESTINATION_TABLE:
            mapping_table_for(domain_table),
            cdr_consts.DISPOSITION:
            bq_consts.WRITE_TRUNCATE,
            cdr_consts.DESTINATION_DATASET:
            dataset_id
        })
    return sandbox_queries + queries