def validate_prod(self): with get_prod_connection(self.tenant) as conn: fact_table = conn.get_table('fact_asmt_outcome_vw') query = select([fact_table], and_( fact_table.c.student_id == '3efe8485-9c16-4381-ab78-692353104cce', fact_table.c.rec_status == 'D')) result = conn.execute(query).fetchall() expected_no_rows = 1 self.assertEquals( len(result), expected_no_rows, "Data has not been loaded to prod_fact_table after edmigrate") fact_table_pr = conn.get_table('fact_asmt_outcome_vw') fact_asmt_pr = select([fact_table_pr], and_( fact_table_pr.c.student_id == '3efe8485-9c16-4381-ab78-692353104cce', fact_table_pr.c.rec_status == 'D')) fact_result = conn.execute(fact_asmt_pr).fetchall() self.assertEquals( len(fact_result), expected_no_rows, "Data has not been loaded to prod_fact_table_primary after edmigrate" )
def validate_prod_after_sec_migration(self): with get_prod_connection(self.tenant) as conn: fact_table = conn.get_table('fact_asmt_outcome_vw') query = select([ fact_table.c.rec_status ]).where(fact_table.c.batch_guid == self.guid_batch_id) result = conn.execute(query).fetchall() expected_status = [('C', )] self.assertEquals(result, expected_status, "Error: Error in Status of newly added record") # for new record , status change to C new_query = select([fact_table.c.rec_status ]).where(fact_table.c.batch_guid == self.guid) new_result = conn.execute(new_query).fetchall() expected_old_status = [('I', )] self.assertEquals(new_result, expected_old_status, "Error:Error in status of inactive record ") dim_asmt = conn.get_table('dim_asmt') dim_inst_hier = conn.get_table('dim_inst_hier') dim_student = conn.get_table('dim_student') dim_tables = [dim_inst_hier, dim_asmt, dim_student] for table in dim_tables: rows_in_dim_tables = select( [table]).where(table.c.batch_guid == self.guid_batch_id) actual_dim_table_rows = conn.execute( rows_in_dim_tables).fetchall() expected_rows_in_dim_tables = 0 self.assertEquals(len(actual_dim_table_rows), expected_rows_in_dim_tables, "Error: Data has been loaded to dim_tables")
def validate_edware_prod(self): with get_prod_connection(self.tenant) as connection: fact_table = connection.get_table('fact_asmt_outcome_vw') dim_student = connection.get_table('dim_student') update_output_data = select([fact_table.c.rec_status], and_(fact_table.c.student_id == 'f7251065-ca82-4248-9397-cc722e97bbdc', fact_table.c.asmt_guid == 'a685f0ec-a0a6-4b1e-93b8-0c4298ff6374')) update_output_table = connection.execute(update_output_data).fetchall() self.assertIn(('D',), update_output_table, "Delete status D is not found in the Update record") self.assertIn(('C',), update_output_table, "Insert status C is not found in the Update record") # verify update asmt_score in fact_table update_asmt_score = select([fact_table.c.asmt_score], and_(fact_table.c.student_id == 'f7251065-ca82-4248-9397-cc722e97bbdc', fact_table.c.rec_status == 'C', fact_table.c.asmt_guid == 'a685f0ec-a0a6-4b1e-93b8-0c4298ff6374')) new_asmt_score = connection.execute(update_asmt_score).fetchall() expected_asmt_score = [(1900,)] # verify that score is updated in fact_Asmt self.assertEquals(new_asmt_score, expected_asmt_score) # verify that there is only one record with status C self.assertEquals(len(new_asmt_score), 1) # verification for dim_student update : last name change to Bush update_last_name = select([dim_student.c.last_name], and_(dim_student.c.student_id == 'f7251065-ca82-4248-9397-cc722e97bbdc', dim_student.c.batch_guid == self.guid_batch_id, dim_student.c.rec_status == "C")) result_dim_student = connection.execute(update_last_name).fetchall() expected_last_name = [('Bush',)] self.assertEquals(result_dim_student, expected_last_name) # verify that old recod is deactive inactive_rec = select([dim_student], and_(dim_student.c.student_id == 'f7251065-ca82-4248-9397-cc722e97bbdc', dim_student.c.rec_status == "I")) inactive_result = connection.execute(inactive_rec).fetchall() print(len(inactive_result))
def handle_duplicates_in_dimensions(tenant_name, guid_batch): """Handle duplicate records in dimensions by marking them as deleted Steps: 1. Soft delete records (Mark rec_status as 'S') in pre-prod dimensions that are already existing in production database The match is done based on natural_key columns of the table and some other key columns listed 2. Update the rec_id of the record marked for delete with the id of the matching record found in prod. This is needed for step which updates foreign keys in fact asmt outcome :param tenant_name: tenant name, to get target database connection :param guid_batch: batch buid @return: Number of affected rows """ affected_rows = 0 with get_target_connection(tenant_name, guid_batch) as target_conn, get_prod_connection( tenant_name) as prod_conn: tables = get_tables_starting_with(target_conn.get_metadata(), Constants.DIM_TABLES_PREFIX) for table_name in tables: target_db_helper = HandleUpsertHelper(target_conn, guid_batch, table_name) prod_db_helper = HandleUpsertHelper(prod_conn, guid_batch, table_name) for record in target_db_helper.find_all(): matched = prod_db_helper.find_by_natural_key(record) if not matched: continue # soft delete the record and set its pk as the pk of the matched record target_db_helper.soft_delete_and_update(record, matched) affected_rows += 1 return affected_rows
def handle_duplicates_in_dimensions(tenant_name, guid_batch): """Handle duplicate records in dimensions by marking them as deleted Steps: 1. Soft delete records (Mark rec_status as 'S') in pre-prod dimensions that are already existing in production database The match is done based on natural_key columns of the table and some other key columns listed 2. Update the rec_id of the record marked for delete with the id of the matching record found in prod. This is needed for step which updates foreign keys in fact asmt outcome :param tenant_name: tenant name, to get target database connection :param guid_batch: batch buid @return: Number of affected rows """ affected_rows = 0 with get_target_connection(tenant_name, guid_batch) as target_conn, get_prod_connection(tenant_name) as prod_conn: tables = get_tables_starting_with(target_conn.get_metadata(), Constants.DIM_TABLES_PREFIX) for table_name in tables: target_db_helper = HandleUpsertHelper(target_conn, guid_batch, table_name) prod_db_helper = HandleUpsertHelper(prod_conn, guid_batch, table_name) for record in target_db_helper.find_all(): matched = prod_db_helper.find_by_natural_key(record) if not matched: continue # soft delete the record and set its pk as the pk of the matched record target_db_helper.soft_delete_and_update(record, matched) affected_rows += 1 return affected_rows
def validate_after_migration(self, guid_batch_id, guid): with get_prod_connection(self.tenant) as conn: fact_table = conn.get_table('fact_asmt_outcome_vw') query = select([fact_table.c.rec_status], and_(fact_table.c.batch_guid == guid_batch_id, fact_table.c.student_id == '72947dea9aef496089d39fa47556ae')) result = conn.execute(query).fetchall() query_new = select([fact_table.c.rec_status], and_(fact_table.c.batch_guid == guid, fact_table.c.student_id == '72947dea9aef496089d39fa47556ae')) result_new = conn.execute(query_new).fetchall() self.assertEquals(result, [('C',)]) self.assertEquals(result_new, [('I',)])
def get_udl_record_by_batch_guid(batch_guid, tenant): records = [] with get_prod_connection(tenant=tenant) as connection: fact_asmt_outcome = connection.get_table('fact_asmt_outcome') fact_block_asmt_outcome = connection.get_table('fact_block_asmt_outcome') select_fao = select([fact_asmt_outcome.c.student_id.label(STUDENT_ID), fact_asmt_outcome.c.asmt_guid.label(ASMT_GUID), fact_asmt_outcome.c.date_taken.label(DATE_TAKEN)]).where(and_(fact_asmt_outcome.c.batch_guid == batch_guid, fact_asmt_outcome.c.rec_status == 'C')) select_fbao = select([fact_block_asmt_outcome.c.student_id.label(STUDENT_ID), fact_block_asmt_outcome.c.asmt_guid.label(ASMT_GUID), fact_block_asmt_outcome.c.date_taken.label(DATE_TAKEN)]).where(and_(fact_block_asmt_outcome.c.batch_guid == batch_guid, fact_block_asmt_outcome.c.rec_status == 'C')) records = connection.get_result(select_fao.union(select_fbao)) return records
def validate_prod(self): with get_prod_connection(self.tenant) as conn: fact_table = conn.get_table('fact_asmt_outcome_vw') query = select([fact_table], and_(fact_table.c.student_id == '3efe8485-9c16-4381-ab78-692353104cce', fact_table.c.rec_status == 'D')) result = conn.execute(query).fetchall() expected_no_rows = 1 self.assertEquals(len(result), expected_no_rows, "Data has not been loaded to prod_fact_table after edmigrate") fact_table_pr = conn.get_table('fact_asmt_outcome_vw') fact_asmt_pr = select([fact_table_pr], and_(fact_table_pr.c.student_id == '3efe8485-9c16-4381-ab78-692353104cce', fact_table_pr.c.rec_status == 'D')) fact_result = conn.execute(fact_asmt_pr).fetchall() self.assertEquals(len(fact_result), expected_no_rows, "Data has not been loaded to prod_fact_table_primary after edmigrate")
def test_match_all_records_to_be_deleted_with_prod(self): with get_target_connection( MatchAndDeleteFTest.tenant_code, MatchAndDeleteFTest.guid_batch ) as target_conn, get_prod_connection( MatchAndDeleteFTest.tenant_code) as prod_conn: records_to_be_deleted = move_to_target.get_records_marked_for_deletion( self.conf, target_conn, 'fact_asmt_outcome_vw') proxy_rows = move_to_target.yield_records_to_be_deleted( prod_conn, 'fact_asmt_outcome_vw', records_to_be_deleted, batch_size=10) result = [] for rows in proxy_rows: result.extend(rows) self.assertEqual(4, len(result))
def validate_prod(self, guid_batch_id): with get_prod_connection(self.tenant) as conn: fact_table = conn.get_table('fact_asmt_outcome_vw') dim_asmt = conn.get_table('dim_asmt') dim_inst_hier = conn.get_table('dim_inst_hier') dim_student = conn.get_table('dim_student') dim_tables = [fact_table, dim_inst_hier, dim_asmt, dim_student] for table in dim_tables: query = select([table.c.rec_status]).where(table.c.batch_guid == guid_batch_id) result = conn.execute(query).fetchall() rec_status = result actual_status = [('C',)] actual_rows = len(result) expected_no_rows = 1 self.assertEquals(rec_status, actual_status, "Error: Status is not C") self.assertEquals(actual_rows, expected_no_rows, "Data has not been loaded to prod after edmigrate")
def validate_edware_prod(self): second_guid = self.guid with get_prod_connection(self.tenant) as connection: fact_asmt_outcome_vw = connection.get_table('fact_asmt_outcome_vw') connection.get_table('dim_inst_hier') dim_student = connection.get_table('dim_student') tables = [fact_asmt_outcome_vw, dim_student] for table in tables: inactive_rec = select([table.c.rec_status]).where(table.c.batch_guid == self.first_guid) inactive_rec_result = connection.execute(inactive_rec).fetchall() expected_inactive_rec_result = [('I',), ('I',), ('I',)] self.assertEqual(inactive_rec_result, expected_inactive_rec_result, "Duplicate record inserted into dim tables") active_rec = select([table.c.rec_status]).where(table.c.batch_guid == second_guid) active_rec_result = connection.execute(active_rec).fetchall() expected_active_rec_result = [('C',), ('C',), ('C',)] self.assertEqual(active_rec_result, expected_active_rec_result, "Duplicate record inserted into dim tables")
def handle_updates_and_deletes(conf): """Main handler for updates and deletes The method grabs tables to be processed for updates and deletes (Fact tables) and runs process_records_to_be_deleted for each table :param conf: udl configuration object """ with get_target_connection( conf[mk.TENANT_NAME], conf[mk.TARGET_DB_SCHEMA]) as target_conn, get_prod_connection( conf[mk.TENANT_NAME]) as prod_conn: tables = get_tables_starting_with(target_conn.get_metadata(), Constants.FACT_TABLES_PREFIX) for table_name in tables: process_records_to_be_deleted(conf, target_conn, prod_conn, table_name)
def test_update_pre_prod_for_records_to_be_deleted(self): with get_target_connection( MatchAndDeleteFTest.tenant_code, MatchAndDeleteFTest.guid_batch ) as target_conn, get_prod_connection( MatchAndDeleteFTest.tenant_code) as prod_conn: records_to_be_deleted = move_to_target.get_records_marked_for_deletion( self.conf, target_conn, 'fact_asmt_outcome_vw') proxy_rows = move_to_target.yield_records_to_be_deleted( prod_conn, 'fact_asmt_outcome_vw', records_to_be_deleted, batch_size=10) for rows in proxy_rows: move_to_target.update_rec_id_for_records_to_delete( self.conf, target_conn, 'fact_asmt_outcome_vw', rows) mismatches = move_to_target.get_records_marked_for_deletion( self.conf, target_conn, 'fact_asmt_outcome_vw') self.assertEqual(0, len(mismatches))
def validate_prod(self, guid_batch_id): with get_prod_connection(self.tenant) as conn: fact_table = conn.get_table('fact_asmt_outcome_vw') dim_asmt = conn.get_table('dim_asmt') dim_inst_hier = conn.get_table('dim_inst_hier') dim_student = conn.get_table('dim_student') dim_tables = [fact_table, dim_inst_hier, dim_asmt, dim_student] for table in dim_tables: query = select([table.c.rec_status ]).where(table.c.batch_guid == guid_batch_id) result = conn.execute(query).fetchall() rec_status = result actual_status = [('C', )] actual_rows = len(result) expected_no_rows = 1 self.assertEquals(rec_status, actual_status, "Error: Status is not C") self.assertEquals( actual_rows, expected_no_rows, "Data has not been loaded to prod after edmigrate")
def validate_prod_after_sec_migration(self): with get_prod_connection(self.tenant) as conn: fact_table = conn.get_table('fact_asmt_outcome_vw') query = select([fact_table.c.rec_status]).where(fact_table.c.batch_guid == self.guid_batch_id) result = conn.execute(query).fetchall() expected_status = [('C',)] self.assertEquals(result, expected_status, "Error: Error in Status of newly added record") # for new record , status change to C new_query = select([fact_table.c.rec_status]).where(fact_table.c.batch_guid == self.guid) new_result = conn.execute(new_query).fetchall() expected_old_status = [('I',)] self.assertEquals(new_result, expected_old_status, "Error:Error in status of inactive record ") dim_asmt = conn.get_table('dim_asmt') dim_inst_hier = conn.get_table('dim_inst_hier') dim_student = conn.get_table('dim_student') dim_tables = [dim_inst_hier, dim_asmt, dim_student] for table in dim_tables: rows_in_dim_tables = select([table]).where(table.c.batch_guid == self.guid_batch_id) actual_dim_table_rows = conn.execute(rows_in_dim_tables).fetchall() expected_rows_in_dim_tables = 0 self.assertEquals(len(actual_dim_table_rows), expected_rows_in_dim_tables, "Error: Data has been loaded to dim_tables")
def get_udl_record_by_batch_guid(batch_guid, tenant): records = [] with get_prod_connection(tenant=tenant) as connection: fact_asmt_outcome = connection.get_table('fact_asmt_outcome') fact_block_asmt_outcome = connection.get_table( 'fact_block_asmt_outcome') select_fao = select([ fact_asmt_outcome.c.student_id.label(STUDENT_ID), fact_asmt_outcome.c.asmt_guid.label(ASMT_GUID), fact_asmt_outcome.c.date_taken.label(DATE_TAKEN) ]).where( and_(fact_asmt_outcome.c.batch_guid == batch_guid, fact_asmt_outcome.c.rec_status == 'C')) select_fbao = select([ fact_block_asmt_outcome.c.student_id.label(STUDENT_ID), fact_block_asmt_outcome.c.asmt_guid.label(ASMT_GUID), fact_block_asmt_outcome.c.date_taken.label(DATE_TAKEN) ]).where( and_(fact_block_asmt_outcome.c.batch_guid == batch_guid, fact_block_asmt_outcome.c.rec_status == 'C')) records = connection.get_result(select_fao.union(select_fbao)) return records
def validate_edware_prod(self): second_guid = self.guid with get_prod_connection(self.tenant) as connection: fact_asmt_outcome_vw = connection.get_table('fact_asmt_outcome_vw') connection.get_table('dim_inst_hier') dim_student = connection.get_table('dim_student') tables = [fact_asmt_outcome_vw, dim_student] for table in tables: inactive_rec = select([ table.c.rec_status ]).where(table.c.batch_guid == self.first_guid) inactive_rec_result = connection.execute( inactive_rec).fetchall() expected_inactive_rec_result = [('I', ), ('I', ), ('I', )] self.assertEqual(inactive_rec_result, expected_inactive_rec_result, "Duplicate record inserted into dim tables") active_rec = select([ table.c.rec_status ]).where(table.c.batch_guid == second_guid) active_rec_result = connection.execute(active_rec).fetchall() expected_active_rec_result = [('C', ), ('C', ), ('C', )] self.assertEqual(active_rec_result, expected_active_rec_result, "Duplicate record inserted into dim tables")
def handle_updates_and_deletes(conf): """Main handler for updates and deletes The method grabs tables to be processed for updates and deletes (Fact tables) and runs process_records_to_be_deleted for each table :param conf: udl configuration object """ with get_target_connection(conf[mk.TENANT_NAME], conf[mk.TARGET_DB_SCHEMA]) as target_conn, get_prod_connection(conf[mk.TENANT_NAME]) as prod_conn: tables = get_tables_starting_with(target_conn.get_metadata(), Constants.FACT_TABLES_PREFIX) for table_name in tables: process_records_to_be_deleted(conf, target_conn, prod_conn, table_name)
def test_match_all_records_to_be_deleted_with_prod(self): with get_target_connection(MatchAndDeleteFTest.tenant_code, MatchAndDeleteFTest.guid_batch) as target_conn, get_prod_connection(MatchAndDeleteFTest.tenant_code) as prod_conn: records_to_be_deleted = move_to_target.get_records_marked_for_deletion(self.conf, target_conn, 'fact_asmt_outcome_vw') proxy_rows = move_to_target.yield_records_to_be_deleted(prod_conn, 'fact_asmt_outcome_vw', records_to_be_deleted, batch_size=10) result = [] for rows in proxy_rows: result.extend(rows) self.assertEqual(4, len(result))
def test_update_pre_prod_for_records_to_be_deleted(self): with get_target_connection(MatchAndDeleteFTest.tenant_code, MatchAndDeleteFTest.guid_batch) as target_conn, get_prod_connection(MatchAndDeleteFTest.tenant_code) as prod_conn: records_to_be_deleted = move_to_target.get_records_marked_for_deletion(self.conf, target_conn, 'fact_asmt_outcome_vw') proxy_rows = move_to_target.yield_records_to_be_deleted(prod_conn, 'fact_asmt_outcome_vw', records_to_be_deleted, batch_size=10) for rows in proxy_rows: move_to_target.update_rec_id_for_records_to_delete(self.conf, target_conn, 'fact_asmt_outcome_vw', rows) mismatches = move_to_target.get_records_marked_for_deletion(self.conf, target_conn, 'fact_asmt_outcome_vw') self.assertEqual(0, len(mismatches))
def delete_prod_tables(self): with get_prod_connection('cat') as conn: # TODO: read from ini the name of schema metadata = conn.get_metadata() for table in reversed(metadata.sorted_tables): conn.execute(table.delete())