Exemple #1
0
 def test_is_field_nullable(self):
     with patch.dict(
             'cdr_cleaner.cleaning_rules.field_mapping.CDM_TABLE_SCHEMAS',
             self.cdm_schemas):
         self.assertTrue(
             field_mapping.is_field_required(self.condition_table,
                                             self.condition_occurrence_id))
         self.assertFalse(
             field_mapping.is_field_required(self.condition_table,
                                             self.condition_end_date))
def parse_remove_records_with_wrong_date_query(project_id, dataset_id,
                                               table_id, year_threshold):
    """
    This query generates the query to keep the records whose date fields are larger than and equal to the year_threshold
    :param project_id: the project id
    :param dataset_id: the dataset id
    :param table_id: the table id
    :param year_threshold: the year threshold for removing the records
    :return: a query that keep the records qualifying for the year threshold
    """

    required_date_field_names = [
        field for field in get_date_fields(table_id)
        if field_mapping.is_field_required(table_id, field)
    ]
    where_clause = ''

    for date_field_name in required_date_field_names:

        if where_clause != '':
            where_clause += AND

        where_clause += WHERE_CLAUSE_REQUIRED_FIELD.format(
            date_field_name=date_field_name, year_threshold=year_threshold)

    col_expr = generate_field_expr(table_id, year_threshold)

    return REMOVE_RECORDS_WITH_WRONG_DATE_FIELD_TEMPLATE.format(
        project_id=project_id,
        dataset_id=dataset_id,
        table_id=table_id,
        col_expr=col_expr,
        where_clause=where_clause)
def generate_field_expr(table_id, year_threshold):
    """
    This function generates the select statements for the table. For the nullable date fields, it sets the value to NULL
    if the nullable date field fails the threshold criteria
    :param table_id:
    :param year_threshold:
    :return:
    """
    col_expression_list = []

    nullable_date_field_names = [
        field for field in get_date_fields(table_id)
        if not field_mapping.is_field_required(table_id, field)
    ]

    for field_name in field_mapping.get_domain_fields(table_id):

        if field_name in nullable_date_field_names:
            col_expression_list.append(
                NULLABLE_DATE_FIELD_EXPRESSION.format(
                    date_field_name=field_name, year_threshold=year_threshold))
        else:
            col_expression_list.append(field_name)

    return ','.join(col_expression_list)
def get_cols(table_id):
    """
    Generates the fields to choose along with case statements to generate datetime

    :param table_id: table for which the fields
    :return:
    """
    table_fields = field_mapping.get_domain_fields(table_id)
    col_exprs = []
    for field in table_fields:
        if field in TABLE_DATES[table_id]:
            if field_mapping.is_field_required(table_id, field):
                col_expr = (
                    ' CASE'
                    ' WHEN EXTRACT(DATE FROM {field}) = {date_field}'
                    ' THEN {field}'
                    ' ELSE CAST(DATETIME({date_field}, EXTRACT(TIME FROM {field})) AS TIMESTAMP)'
                    ' END AS {field}').format(
                        field=field, date_field=TABLE_DATES[table_id][field])
            else:
                col_expr = (' CASE'
                            ' WHEN EXTRACT(DATE FROM {field}) = {date_field}'
                            ' THEN {field}'
                            ' ELSE NULL'
                            ' END AS {field}').format(
                                field=field,
                                date_field=TABLE_DATES[table_id][field])
        else:
            col_expr = field
        col_exprs.append(col_expr)
    cols = ', '.join(col_exprs)
    return cols
Exemple #5
0
def resolve_field_mappings(src_table, dest_table):
    """
    This function generates the content of SQL select statement for the given src_table and dest_table.
    :param src_table: the source CDM table for rerouting
    :param dest_table: the destination CDM table for rerouting
    :return: the content of the SQL select statements
    """
    select_statements = []

    field_mappings = domain_mapping.get_field_mappings(src_table, dest_table)

    for dest_field, src_field in field_mappings.items():
        if domain_mapping.value_requires_translation(src_table, dest_table,
                                                     src_field, dest_field):
            value_mappings = domain_mapping.get_value_mappings(
                src_table, dest_table, src_field, dest_field)

            if len(value_mappings) == 0:
                if field_mapping.is_field_required(dest_table, dest_field):
                    case_statements = ZERO_AS_DEST_FIELD.format(
                        dest_field=dest_field)
                else:
                    case_statements = NULL_AS_DEST_FIELD.format(
                        dest_field=dest_field)
            else:
                case_statements = '\n\t\t'.join([
                    WHEN_STATEMENT.format(src_value=s, dest_value=d)
                    for d, s in value_mappings.items()
                ])

                dummy_value = fetch_dummy_value(
                    dest_table, dest_field) if field_mapping.is_field_required(
                        dest_table, dest_field) else NULL_VALUE

                case_statements = CASE_STATEMENT.format(
                    src_field=src_field,
                    dest_field=dest_field,
                    dummy_value=dummy_value,
                    statements=case_statements)
            select_statements.append(case_statements)
        else:
            select_statements.append(
                SRC_FIELD_AS_DEST_FIELD.format(src_field=src_field,
                                               dest_field=dest_field))

    return ',\n\t'.join(select_statements)