def insert_caseitem_note(db_config, target_db): chunk_size = db_config["chunk_size"] offset = 0 chunk_no = 1 sirius_details = get_mapping_dict( file_name=mapping_file_name, stage_name="sirius_details", only_complete_fields=False, ) cases_query = ( f'select "id", "caserecnumber" from {db_config["target_schema"]}.cases;' ) cases_df = pd.read_sql_query(cases_query, db_config["db_connection_string"]) log.debug(f"df size - cases_df: {len(cases_df)}") while True: notes_query = f'select "id", "c_case" from {db_config["target_schema"]}.notes order by "id" limit {chunk_size} offset {offset};' notes_df = pd.read_sql_query(notes_query, db_config["db_connection_string"]) log.debug(f"df size - notes_df: {len(notes_df)}") notes_caseitem_df = notes_df.merge( cases_df, how="left", left_on="c_case", right_on="caserecnumber", suffixes=["_notes", "_case"], ) notes_caseitem_df = notes_caseitem_df.drop(columns=["caserecnumber"]) notes_caseitem_df = notes_caseitem_df.rename(columns={ "id_case": "caseitem_id", "id_notes": "note_id" }) notes_caseitem_df = notes_caseitem_df[ notes_caseitem_df["caseitem_id"].notna()] notes_caseitem_df = reapply_datatypes_to_fk_cols( columns=["note_id", "caseitem_id"], df=notes_caseitem_df) if len(notes_caseitem_df) > 0: log.debug(f"df size - notes_caseitem_df: {len(notes_caseitem_df)}") target_db.insert_data( table_name=definition["destination_table_name"], df=notes_caseitem_df, sirius_details=sirius_details, chunk_no=chunk_no, ) offset += chunk_size chunk_no += 1 if len(notes_caseitem_df) < chunk_size: break
def get_basic_data_table(mapping_file_name, table_definition, db_config, chunk_details=None): log.debug(f"Getting basic data using {mapping_file_name}") mapping_dict = get_mapping_dict(file_name=mapping_file_name, stage_name="transform_casrec") source_table = get_source_table(mapping_dict=mapping_dict) sirius_details = get_mapping_dict( file_name=mapping_file_name, stage_name="sirius_details", only_complete_fields=False, ) source_data_query = generate_select_string_from_mapping( mapping=mapping_dict, source_table_name=source_table, additional_columns=table_definition["source_table_additional_columns"], db_schema=db_config["source_schema"], chunk_details=chunk_details, ) source_data_df = pd.read_sql_query(sql=source_data_query, con=db_config["db_connection_string"]) result_df = transform.perform_transformations( mapping_definitions=mapping_dict, table_definition=table_definition, source_data_df=source_data_df, db_conn_string=db_config["db_connection_string"], db_schema=db_config["target_schema"], sirius_details=sirius_details, ) result_df["casrec_mapping_file_name"] = mapping_file_name log.debug(f"Basic data for {mapping_file_name} has {len(result_df)} rows") return sirius_details, result_df
def test_get_mapping_dict(): result = get_mapping_dict(file_name="test_client_persons_mapping", stage_name="transform_casrec") dirname = os.path.dirname(__file__) file_path = os.path.join( dirname, f"expected_results/test_client_persons_expected.json") with open(file_path) as expected_json: expected_result = json.load(expected_json) assert result == expected_result
def insert_deputy_person_warning(db_config, target_db): sirius_details = get_mapping_dict( file_name=mapping_file_name, stage_name="sirius_details", only_complete_fields=False, ) try: deputys_query = ( f'select "id", "c_deputy_no" from {db_config["target_schema"]}.persons ' f"where \"type\" = 'actor_deputy';") deputys_df = pd.read_sql_query(deputys_query, db_config["db_connection_string"]) deputy_warning_query = f""" select "id", "c_deputy_no" from {db_config["target_schema"]}.warnings where casrec_mapping_file_name = 'deputy_violent_warnings_mapping';""" deputy_warning_df = pd.read_sql_query( deputy_warning_query, db_config["db_connection_string"]) deputy_warning_df = deputy_warning_df.merge( deputys_df, how="left", left_on="c_deputy_no", right_on="c_deputy_no", suffixes=["_warning", "_deputy"], ) deputy_warning_df = deputy_warning_df.rename(columns={ "id_warning": "warning_id", "id_deputy": "person_id" }) deputy_warning_df["casrec_details"] = None deputy_warning_df = reapply_datatypes_to_fk_cols( columns=["person_id", "warning_id"], df=deputy_warning_df) target_db.insert_data( table_name=definition["destination_table_name"], df=deputy_warning_df, sirius_details=sirius_details, ) except Exception as e: log.debug( "No data to insert", extra={ "file_name": "", "error": format_error_message(e=e), }, )
def insert_client_person_warning(db_config, target_db): sirius_details = get_mapping_dict( file_name=mapping_file_name, stage_name="sirius_details", only_complete_fields=False, ) try: clients_query = ( f'select "id", "caserecnumber" from {db_config["target_schema"]}.persons ' f"where \"type\" = 'actor_client';") clients_df = pd.read_sql_query(clients_query, db_config["db_connection_string"]) client_warning_query = ( f'select "id", "c_case" from {db_config["target_schema"]}.warnings;' ) client_warning_df = pd.read_sql_query( client_warning_query, db_config["db_connection_string"]) client_warning_df = client_warning_df.merge( clients_df, how="left", left_on="c_case", right_on="caserecnumber", suffixes=["_warning", "_client"], ) client_warning_df = client_warning_df.drop(columns=["caserecnumber"]) client_warning_df = client_warning_df.rename(columns={ "id_warning": "warning_id", "id_client": "person_id" }) client_warning_df["casrec_details"] = None target_db.insert_data( table_name=definition["destination_table_name"], df=client_warning_df, sirius_details=sirius_details, ) except Exception as e: log.debug( "No data to insert", extra={ "file_name": "", "error": format_error_message(e=e), }, )
def get_col_definition(mapped_item): if isinstance(mapped_item, dict) and "mapping_table" in mapped_item: pieces = mapped_item["mapping_table"].split(".") col_mapping = helpers.get_mapping_dict( file_name=pieces[0] + "_mapping", only_complete_fields=True, include_pk=False, ) col_definition = col_mapping[pieces[1]] elif isinstance(mapped_item, dict): col_definition = mapped_item else: col_definition = mapping_dict[mapped_item] return col_definition
def pre_validation(): if is_staging is False: log.info(f"Validating with SIRIUS") log.info( f"Copying casrec csv source data to Sirius for comparison work") copy_schema( log=log, sql_path=sql_path, from_config=config.db_config["migration"], from_schema=config.schemas["pre_transform"], to_config=config.db_config["target"], to_schema=config.schemas["pre_transform"], ) else: log.info(f"Validating with STAGING schema") log.info(f"INSTALL TRANSFORMATION ROUTINES") execute_sql_file(sql_path, transformations_sqlfile, conn_target, config.schemas["public"]) log.info(f"GENERATE SQL") log.info("- Lookup Functions") build_lookup_functions() log.info("- Drop Exception Tables") drop_exception_tables() global mapping_dict for mapping_name in mappings_to_run: mapping_dict = helpers.get_mapping_dict( file_name=mapping_name + "_mapping", only_complete_fields=True, include_pk=False, ) log.info(mapping_name) log.info("- Exception Table") build_exception_table(mapping_name) log.info("- Table Validation Statement") build_validation_statements(mapping_name) log.info("- Column Validation Statements") build_column_validation_statements(mapping_name) write_validation_sql()
def insert_person_caseitem(db_config, target_db): sirius_details = get_mapping_dict( file_name=mapping_file_name, stage_name="sirius_details", only_complete_fields=False, ) persons_query = ( f'select "id", "caserecnumber" from {db_config["target_schema"]}.persons ' f"where \"type\" = 'actor_client';") persons_df = pd.read_sql_query(persons_query, db_config["db_connection_string"]) cases_query = ( f'select "id", "caserecnumber" from {db_config["target_schema"]}.cases;' ) cases_df = pd.read_sql_query(cases_query, db_config["db_connection_string"]) person_caseitem_df = cases_df.merge( persons_df, how="left", left_on="caserecnumber", right_on="caserecnumber", suffixes=["_case", "_person"], ) person_caseitem_df = person_caseitem_df.drop(columns=["caserecnumber"]) person_caseitem_df = person_caseitem_df.rename(columns={ "id_case": "caseitem_id", "id_person": "person_id" }) target_db.insert_data( table_name=definition["destination_table_name"], df=person_caseitem_df, sirius_details=sirius_details, )