Example #1
0
def parse_domain_mapping_query_for_same_domains(project_id, dataset_id):
    """
    This function generates a query that generates id mappings in _logging_domain_alignment for
    the records being copied to the same domain table

    :param project_id: the project_id in which the query is run
    :param dataset_id: the dataset_id in which the query is run
    :return: a query that generates id mappings for the records that will get copied over to the same domain
    """
    union_query = EMPTY_STRING

    for domain_table in domain_mapping.DOMAIN_TABLE_NAMES:

        domain = resources.get_domain(domain_table)
        domain_id_field = resources.get_domain_id_field(domain_table)
        domain_concept_id = resources.get_domain_concept_id(domain_table)

        if union_query != EMPTY_STRING:
            union_query += UNION_ALL

        union_query += DOMAIN_REROUTE_INCLUDED_INNER_QUERY.render(
            project_id=project_id,
            dataset_id=dataset_id,
            src_table=domain_table,
            dest_table=domain_table,
            src_id=domain_id_field,
            dest_id=domain_id_field,
            domain_concept_id=domain_concept_id,
            domain='\'{}\''.format('\',\''.join([domain, METADATA_DOMAIN])))
    return union_query
Example #2
0
def parse_domain_mapping_query_cross_domain(project_id, dataset_id,
                                            dest_table):
    """
    This function creates a query that generates id mappings in _logging_domain_alignment
    for the rerouting records for dest_table

    :param project_id: the project_id in which the query is run
    :param dataset_id: the dataset_id in which the query is run
    :param dest_table: the destination table to which the records are rerouted
    :return: the query that generates id mappings for the rerouting records
    """
    union_query = EMPTY_STRING

    domain = resources.get_domain(dest_table)
    dest_id_field = resources.get_domain_id_field(dest_table)

    for src_table in domain_mapping.DOMAIN_TABLE_NAMES:

        if src_table != dest_table and domain_mapping.exist_domain_mappings(
                src_table, dest_table):

            src_id_field = resources.get_domain_id_field(src_table)
            domain_concept_id = resources.get_domain_concept_id(src_table)

            if union_query != EMPTY_STRING:
                union_query += UNION_ALL

            union_query += DOMAIN_REROUTE_INCLUDED_INNER_QUERY.render(
                project_id=project_id,
                dataset_id=dataset_id,
                src_table=src_table,
                dest_table=dest_table,
                src_id=src_id_field,
                dest_id=NULL_VALUE,
                domain_concept_id=domain_concept_id,
                domain='\'{}\''.format(domain))

            criteria = domain_mapping.get_rerouting_criteria(
                src_table, dest_table)

            if criteria != EMPTY_STRING:
                union_query += AND + criteria

    output_query = EMPTY_STRING

    if union_query != EMPTY_STRING:
        # the query to get the max id for the dest table
        domain_query = MAXIMUM_DOMAIN_ID_QUERY.render(
            project_id=project_id,
            dataset_id=dataset_id,
            domain_table=dest_table,
            domain_id_field=dest_id_field)

        output_query = DOMAIN_MAPPING_OUTER_QUERY.render(
            union_query=union_query, domain_query=domain_query)
    return output_query
Example #3
0
def create_domain_field_dict():
    """
    This function categorizes the CDM table fields and puts them into different 'buckets' of the dictionary.
    The purpose of creating this dictionary is to facilitate the mapping of the fields in the downstream process.
    person_id

    :return: a dictionary that contains CDM table fields
    """
    domain_fields = OrderedDict()
    for domain_table in domain_mapping.DOMAIN_TABLE_NAMES:
        _field_mappings = OrderedDict()
        common_field_mappings = OrderedDict()
        date_field_mappings = OrderedDict()
        specific_fields = []
        domain = get_domain(domain_table)
        domain_id_field = get_domain_id_field(domain_table)

        for field_name in [
                field_name for field_name in get_domain_fields(domain_table)
                if field_name != domain_id_field
        ]:

            # Added a special check for drug_exposure because the drug_exposure columns don't follow the same pattern
            # E.g. drug_exposure_start_time doesn't follow the pattern {domain}_start_datetime
            if field_name.find(domain_table) != -1:
                field_suffix = re.sub(domain_table, '', field_name)
            else:
                field_suffix = re.sub(domain.lower(), '', field_name)

            # Put different types of fields into dictionary
            if field_suffix in COMMON_DOMAIN_FIELD_SUFFIXES:
                common_field_mappings[field_suffix] = field_name
            elif field_suffix in DATE_FIELD_SUFFIXES:
                date_field_mappings[field_suffix] = field_name
            elif field_name in COMMON_DOMAIN_FIELD_SUFFIXES:
                common_field_mappings[field_name] = field_name
            elif field_name != domain_id_field:
                specific_fields.append(field_name)

        _field_mappings[DOMAIN_COMMON_FIELDS] = common_field_mappings
        _field_mappings[DOMAIN_SPECIFIC_FIELDS] = specific_fields
        _field_mappings[DOMAIN_DATE_FIELDS] = date_field_mappings
        domain_fields[domain_table] = _field_mappings

    return domain_fields
 def test_get_domain(self):
     self.assertEqual(resources.get_domain(self.condition_table),
                      self.condition)
     self.assertEqual(resources.get_domain(self.procedure_table),
                      self.procedure)