def task(msg): start_time = datetime.datetime.now() logger.info(task.name) logger.info('LOAD_CSV_TO_STAGING: Loading file <%s> ' % (msg[mk.FILE_TO_LOAD])) guid_batch = msg[mk.GUID_BATCH] conf = generate_conf_for_loading(msg[mk.FILE_TO_LOAD], msg[mk.ROW_START], msg[mk.LOAD_TYPE], msg[mk.HEADERS], guid_batch, msg[mk.TENANT_NAME]) load_file(conf) end_time = datetime.datetime.now() #Record benchmark benchmark = BatchTableBenchmark(msg[mk.GUID_BATCH], msg[mk.LOAD_TYPE], task.name, start_time, end_time, task_id=str(task.request.id), working_schema=conf[mk.TARGET_DB_SCHEMA], udl_leaf=True, size_records=msg[mk.SIZE_RECORDS], tenant=msg[mk.TENANT_NAME]) benchmark.record_benchmark() return msg
def test_stu_reg_row_number(self): self.load_config('studentregistration') self.conf[mk.ROW_START] = 10 self.conf[mk.GUID_BATCH] = self.generate_non_exsisting_guid_batch() load_file(self.conf) row_total_in_csv = self.get_row_number_in_csv(self.conf[mk.FILE_TO_LOAD]) row_total_in_db = self.get_row_number_in_table() self.assertEqual(row_total_in_csv, row_total_in_db)
def test_stu_reg_row_number(self): self.load_config('studentregistration') self.conf[mk.ROW_START] = 10 self.conf[mk.GUID_BATCH] = self.generate_non_exsisting_guid_batch() load_file(self.conf) row_total_in_csv = self.get_row_number_in_csv( self.conf[mk.FILE_TO_LOAD]) row_total_in_db = self.get_row_number_in_table() self.assertEqual(row_total_in_csv, row_total_in_db)
def test_assessment_row_number(self): # load data self.load_config('assessment') self.conf[mk.ROW_START] = 10 self.conf[mk.GUID_BATCH] = self.generate_non_exsisting_guid_batch() load_file(self.conf) # verify row_total_in_csv = self.get_row_number_in_csv(self.conf[mk.FILE_TO_LOAD]) row_total_in_db = self.get_row_number_in_table() self.assertEqual(row_total_in_csv, row_total_in_db)
def test_stu_reg_transformations_occur_during_load(self): self.load_config('studentregistration') self.conf[mk.ROW_START] = 124 self.conf[mk.GUID_BATCH] = self.generate_non_exsisting_guid_batch() self.conf[mk.FILE_TO_LOAD] = self.get_csv_file('student_registration_data/test_stu_reg_before_stored_proc.csv') self.conf[mk.APPLY_RULES] = True load_file(self.conf) # Get newly loaded data for comparison stu_reg_csv_file2_clean = self.get_csv_file('student_registration_data/test_stu_reg_after_stored_proc.csv') self.compare_csv_table_data(stu_reg_csv_file2_clean, 'StudentIdentifier')
def test_assessment_transformations_occur_during_load(self): self.load_config('assessment') self.conf[mk.ROW_START] = 124 self.conf[mk.GUID_BATCH] = self.generate_non_exsisting_guid_batch() self.conf[mk.FILE_TO_LOAD] = self.get_csv_file('test_file_stored_proc_data.csv') self.conf[mk.APPLY_RULES] = True load_file(self.conf) # get newly loaded data for comparison assessment_csv_file2_clean = self.get_csv_file('test_file_stored_proc_data_CLEAN.csv') self.compare_csv_table_data(assessment_csv_file2_clean, 'StudentIdentifier')
def test_stu_reg_compare_data(self): # load data self.load_config('studentregistration') self.conf[mk.ROW_START] = 24 self.conf[mk.GUID_BATCH] = self.generate_non_exsisting_guid_batch() load_file(self.conf) # get the result of db records_in_db = self.get_rows_in_table(STG_SBAC_STU_REG_COLUMNS) # read the csv file self.verify_table_content(records_in_db)
def test_assessment_row_number(self): # load data self.load_config('assessment') self.conf[mk.ROW_START] = 10 self.conf[mk.GUID_BATCH] = self.generate_non_exsisting_guid_batch() load_file(self.conf) # verify row_total_in_csv = self.get_row_number_in_csv( self.conf[mk.FILE_TO_LOAD]) row_total_in_db = self.get_row_number_in_table() self.assertEqual(row_total_in_csv, row_total_in_db)
def test_assessment_compare_data(self): # load data self.load_config('assessment') self.conf[mk.ROW_START] = 24 self.conf[mk.GUID_BATCH] = self.generate_non_exsisting_guid_batch() load_file(self.conf) # wait for a while to avoid timing issue. time.sleep(20) # get the result of db records_in_db = self.get_rows_in_table(STG_SBAC_ASMT_OUTCOME_COLUMNS) # read the csv file self.verify_regular_table_content(records_in_db)
def test_stu_reg_transformations_occur_during_load(self): self.load_config('studentregistration') self.conf[mk.ROW_START] = 124 self.conf[mk.GUID_BATCH] = self.generate_non_exsisting_guid_batch() self.conf[mk.FILE_TO_LOAD] = self.get_csv_file( 'student_registration_data/test_stu_reg_before_stored_proc.csv') self.conf[mk.APPLY_RULES] = True load_file(self.conf) # Get newly loaded data for comparison stu_reg_csv_file2_clean = self.get_csv_file( 'student_registration_data/test_stu_reg_after_stored_proc.csv') self.compare_csv_table_data(stu_reg_csv_file2_clean, 'StudentIdentifier')
def test_assessment_transformations_occur_during_load(self): self.load_config('assessment') self.conf[mk.ROW_START] = 124 self.conf[mk.GUID_BATCH] = self.generate_non_exsisting_guid_batch() self.conf[mk.FILE_TO_LOAD] = self.get_csv_file( 'test_file_stored_proc_data.csv') self.conf[mk.APPLY_RULES] = True load_file(self.conf) # get newly loaded data for comparison assessment_csv_file2_clean = self.get_csv_file( 'test_file_stored_proc_data_CLEAN.csv') self.compare_csv_table_data(assessment_csv_file2_clean, 'StudentIdentifier')
def load_file_to_stage(self, data_file, header_file, load_type, staging_table, guid): # file contain 30 rows conf = { mk.FILE_TO_LOAD: os.path.join(self.data_dir, data_file), mk.HEADERS: os.path.join(self.data_dir, header_file), mk.CSV_TABLE: 'csv_table_for_file_loader', mk.CSV_SCHEMA: self.udl2_conf['udl2_db_conn']['db_schema'], mk.REF_TABLE: Constants.UDL2_REF_MAPPING_TABLE(load_type), mk.CSV_LZ_TABLE: Constants.UDL2_CSV_LZ_TABLE, mk.FDW_SERVER: 'udl2_fdw_server', mk.TARGET_DB_SCHEMA: self.udl2_conf['udl2_db_conn']['db_schema'], mk.TARGET_DB_TABLE: staging_table, mk.APPLY_RULES: False, mk.ROW_START: 10, mk.GUID_BATCH: guid, mk.TENANT_NAME: 'cat' } load_file(conf) with get_udl_connection() as conn: _table = conn.get_table(staging_table) update_stmt = update(_table).values(record_sid=1000 + _table.c.src_file_rec_num - 1).\ where(_table.c.guid_batch == guid) conn.execute(update_stmt)