Exemple #1
0
def insert_caseitem_note(db_config, target_db):

    chunk_size = db_config["chunk_size"]
    offset = 0
    chunk_no = 1

    sirius_details = get_mapping_dict(
        file_name=mapping_file_name,
        stage_name="sirius_details",
        only_complete_fields=False,
    )

    cases_query = (
        f'select "id", "caserecnumber" from {db_config["target_schema"]}.cases;'
    )
    cases_df = pd.read_sql_query(cases_query,
                                 db_config["db_connection_string"])
    log.debug(f"df size - cases_df: {len(cases_df)}")

    while True:

        notes_query = f'select "id", "c_case" from {db_config["target_schema"]}.notes order by "id" limit {chunk_size} offset {offset};'
        notes_df = pd.read_sql_query(notes_query,
                                     db_config["db_connection_string"])

        log.debug(f"df size - notes_df: {len(notes_df)}")

        notes_caseitem_df = notes_df.merge(
            cases_df,
            how="left",
            left_on="c_case",
            right_on="caserecnumber",
            suffixes=["_notes", "_case"],
        )

        notes_caseitem_df = notes_caseitem_df.drop(columns=["caserecnumber"])
        notes_caseitem_df = notes_caseitem_df.rename(columns={
            "id_case": "caseitem_id",
            "id_notes": "note_id"
        })

        notes_caseitem_df = notes_caseitem_df[
            notes_caseitem_df["caseitem_id"].notna()]

        notes_caseitem_df = reapply_datatypes_to_fk_cols(
            columns=["note_id", "caseitem_id"], df=notes_caseitem_df)

        if len(notes_caseitem_df) > 0:
            log.debug(f"df size - notes_caseitem_df: {len(notes_caseitem_df)}")
            target_db.insert_data(
                table_name=definition["destination_table_name"],
                df=notes_caseitem_df,
                sirius_details=sirius_details,
                chunk_no=chunk_no,
            )
        offset += chunk_size
        chunk_no += 1
        if len(notes_caseitem_df) < chunk_size:
            break
def get_basic_data_table(mapping_file_name,
                         table_definition,
                         db_config,
                         chunk_details=None):
    log.debug(f"Getting basic data using {mapping_file_name}")

    mapping_dict = get_mapping_dict(file_name=mapping_file_name,
                                    stage_name="transform_casrec")

    source_table = get_source_table(mapping_dict=mapping_dict)

    sirius_details = get_mapping_dict(
        file_name=mapping_file_name,
        stage_name="sirius_details",
        only_complete_fields=False,
    )

    source_data_query = generate_select_string_from_mapping(
        mapping=mapping_dict,
        source_table_name=source_table,
        additional_columns=table_definition["source_table_additional_columns"],
        db_schema=db_config["source_schema"],
        chunk_details=chunk_details,
    )

    source_data_df = pd.read_sql_query(sql=source_data_query,
                                       con=db_config["db_connection_string"])

    result_df = transform.perform_transformations(
        mapping_definitions=mapping_dict,
        table_definition=table_definition,
        source_data_df=source_data_df,
        db_conn_string=db_config["db_connection_string"],
        db_schema=db_config["target_schema"],
        sirius_details=sirius_details,
    )

    result_df["casrec_mapping_file_name"] = mapping_file_name

    log.debug(f"Basic data for {mapping_file_name} has {len(result_df)} rows")

    return sirius_details, result_df
def test_get_mapping_dict():

    result = get_mapping_dict(file_name="test_client_persons_mapping",
                              stage_name="transform_casrec")

    dirname = os.path.dirname(__file__)
    file_path = os.path.join(
        dirname, f"expected_results/test_client_persons_expected.json")

    with open(file_path) as expected_json:
        expected_result = json.load(expected_json)

        assert result == expected_result
Exemple #4
0
def insert_deputy_person_warning(db_config, target_db):

    sirius_details = get_mapping_dict(
        file_name=mapping_file_name,
        stage_name="sirius_details",
        only_complete_fields=False,
    )

    try:
        deputys_query = (
            f'select "id", "c_deputy_no" from {db_config["target_schema"]}.persons '
            f"where \"type\" = 'actor_deputy';")
        deputys_df = pd.read_sql_query(deputys_query,
                                       db_config["db_connection_string"])

        deputy_warning_query = f"""
                select "id", "c_deputy_no" from {db_config["target_schema"]}.warnings
                where casrec_mapping_file_name = 'deputy_violent_warnings_mapping';"""
        deputy_warning_df = pd.read_sql_query(
            deputy_warning_query, db_config["db_connection_string"])

        deputy_warning_df = deputy_warning_df.merge(
            deputys_df,
            how="left",
            left_on="c_deputy_no",
            right_on="c_deputy_no",
            suffixes=["_warning", "_deputy"],
        )

        deputy_warning_df = deputy_warning_df.rename(columns={
            "id_warning": "warning_id",
            "id_deputy": "person_id"
        })
        deputy_warning_df["casrec_details"] = None

        deputy_warning_df = reapply_datatypes_to_fk_cols(
            columns=["person_id", "warning_id"], df=deputy_warning_df)

        target_db.insert_data(
            table_name=definition["destination_table_name"],
            df=deputy_warning_df,
            sirius_details=sirius_details,
        )
    except Exception as e:
        log.debug(
            "No data to insert",
            extra={
                "file_name": "",
                "error": format_error_message(e=e),
            },
        )
Exemple #5
0
def insert_client_person_warning(db_config, target_db):

    sirius_details = get_mapping_dict(
        file_name=mapping_file_name,
        stage_name="sirius_details",
        only_complete_fields=False,
    )

    try:

        clients_query = (
            f'select "id", "caserecnumber" from {db_config["target_schema"]}.persons '
            f"where \"type\" = 'actor_client';")
        clients_df = pd.read_sql_query(clients_query,
                                       db_config["db_connection_string"])

        client_warning_query = (
            f'select "id", "c_case" from {db_config["target_schema"]}.warnings;'
        )
        client_warning_df = pd.read_sql_query(
            client_warning_query, db_config["db_connection_string"])

        client_warning_df = client_warning_df.merge(
            clients_df,
            how="left",
            left_on="c_case",
            right_on="caserecnumber",
            suffixes=["_warning", "_client"],
        )

        client_warning_df = client_warning_df.drop(columns=["caserecnumber"])
        client_warning_df = client_warning_df.rename(columns={
            "id_warning": "warning_id",
            "id_client": "person_id"
        })
        client_warning_df["casrec_details"] = None

        target_db.insert_data(
            table_name=definition["destination_table_name"],
            df=client_warning_df,
            sirius_details=sirius_details,
        )
    except Exception as e:
        log.debug(
            "No data to insert",
            extra={
                "file_name": "",
                "error": format_error_message(e=e),
            },
        )
Exemple #6
0
def get_col_definition(mapped_item):
    if isinstance(mapped_item, dict) and "mapping_table" in mapped_item:
        pieces = mapped_item["mapping_table"].split(".")
        col_mapping = helpers.get_mapping_dict(
            file_name=pieces[0] + "_mapping",
            only_complete_fields=True,
            include_pk=False,
        )
        col_definition = col_mapping[pieces[1]]
    elif isinstance(mapped_item, dict):
        col_definition = mapped_item
    else:
        col_definition = mapping_dict[mapped_item]

    return col_definition
Exemple #7
0
def pre_validation():
    if is_staging is False:
        log.info(f"Validating with SIRIUS")
        log.info(
            f"Copying casrec csv source data to Sirius for comparison work")
        copy_schema(
            log=log,
            sql_path=sql_path,
            from_config=config.db_config["migration"],
            from_schema=config.schemas["pre_transform"],
            to_config=config.db_config["target"],
            to_schema=config.schemas["pre_transform"],
        )
    else:
        log.info(f"Validating with STAGING schema")

    log.info(f"INSTALL TRANSFORMATION ROUTINES")
    execute_sql_file(sql_path, transformations_sqlfile, conn_target,
                     config.schemas["public"])

    log.info(f"GENERATE SQL")

    log.info("- Lookup Functions")
    build_lookup_functions()
    log.info("- Drop Exception Tables")
    drop_exception_tables()

    global mapping_dict

    for mapping_name in mappings_to_run:
        mapping_dict = helpers.get_mapping_dict(
            file_name=mapping_name + "_mapping",
            only_complete_fields=True,
            include_pk=False,
        )

        log.info(mapping_name)

        log.info("- Exception Table")
        build_exception_table(mapping_name)

        log.info("- Table Validation Statement")
        build_validation_statements(mapping_name)

        log.info("- Column Validation Statements")
        build_column_validation_statements(mapping_name)

    write_validation_sql()
Exemple #8
0
def insert_person_caseitem(db_config, target_db):

    sirius_details = get_mapping_dict(
        file_name=mapping_file_name,
        stage_name="sirius_details",
        only_complete_fields=False,
    )

    persons_query = (
        f'select "id", "caserecnumber" from {db_config["target_schema"]}.persons '
        f"where \"type\" = 'actor_client';")
    persons_df = pd.read_sql_query(persons_query,
                                   db_config["db_connection_string"])

    cases_query = (
        f'select "id", "caserecnumber" from {db_config["target_schema"]}.cases;'
    )
    cases_df = pd.read_sql_query(cases_query,
                                 db_config["db_connection_string"])

    person_caseitem_df = cases_df.merge(
        persons_df,
        how="left",
        left_on="caserecnumber",
        right_on="caserecnumber",
        suffixes=["_case", "_person"],
    )

    person_caseitem_df = person_caseitem_df.drop(columns=["caserecnumber"])
    person_caseitem_df = person_caseitem_df.rename(columns={
        "id_case": "caseitem_id",
        "id_person": "person_id"
    })

    target_db.insert_data(
        table_name=definition["destination_table_name"],
        df=person_caseitem_df,
        sirius_details=sirius_details,
    )