def exec_dml(self, dml, trans, commit=False): #log.debug(dml) try: out = trans.cur.execute(dml).rowcount if commit: trans.conn.commit() pfmt([[dml]], ['DML'], 'Commited'.upper()) return out except: trans.conn.rollback() pfmt([[dml]], ['DML'], 'Rolled back'.upper()) raise
def MoveSnapFolder(self, okdir): dest_folder = self.get_dest_folder(okdir) snap_ok_file = okdir + '.ok' dest_ok_file = dest_folder + '.ok' status_file = dest_folder + '.' + self.status_dir cdt = datetime.now() cd = cdt.strftime('%Y-%m-%d %H:%M:%S') pfmt([[okdir, dest_folder], [snap_ok_file, dest_ok_file]], ['From', 'To'], 'Backup') assert os.path.isdir(okdir) assert os.path.isfile(snap_ok_file) assert not os.path.isdir(dest_folder) assert not os.path.isfile(dest_ok_file) if 1: shutil.move(okdir, dest_folder) shutil.move(snap_ok_file, dest_ok_file) with open(status_file, 'w') as fw: fw.write(cd) fw.close()
def run(): skip = 2 serviceName = 'gfin' #deleted = {} #loaded = {} #not_loaded = {} #masterTbl = 'gtxMasterPKData' #do_not_delete = ['TxFinancingRateHist', masterTbl] do_not_load = ['TxFinancingRate', 'TxFinancingRateHist'] #'TxFinancingRate', for _source, val in cli.cfg['dump'].items(): cli.set_source(_source) _src_class = list(val.keys())[0] DirReader = create_reader(aname=_src_class, app_init=app_init) if 1: cli.set_source(_source) dir_scfg = cli.get_dcfg(_src_class) path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg) ok_files = InOut(file_names=[]) DirReader.glob_dir(path=path, out=ok_files, ext='*.ok') if 1: for _trg_class, val in cli.cfg['target'][_source].items(): cli.tcfg = tcfg = cli.get_tcfg(_trg_class) _dbname = tcfg["targetDb"] toDB = create_writer(aname=_trg_class, app_init=app_init) masterTabTag = tcfg['masterTableTag'] masterTbl = tcfg['targetTables'][masterTabTag][ 'table_name'] masterTblCol = tcfg['targetTables'][masterTabTag][ 'column_name'] do_not_delete = tcfg['doNotDeleteTables'] + [masterTbl] do_not_load = tcfg['doNotLoadTables'] to_conn = InOut() toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn) to_conn.cur.execute('set search_path to CIGRpt') if ok_files.file_names: # Master first try: stmt = 'drop table %s' % masterTbl to_conn.cur.execute(stmt) except Exception as ex: #raise if not 'Table "%s" does not exist' % masterTbl in str( ex): raise stmt = 'create local temporary table %s ( %s bigint not null, MartModifiedDate timestamp)\n ON COMMIT PRESERVE ROWS' % ( masterTbl, masterTblCol) pfmt([[stmt]], ['Create master temp PK']) to_conn.cur.execute(stmt) #e() stats = {} deleted = {} processed = [] not_processed = [] for okfn in ok_files.file_names: okFile = create_reader(aname='File', app_init=app_init, file_name=okfn, scfg=dir_scfg) okdir, _ = os.path.splitext(okfn) okbn = os.path.basename(okdir) #e() assert os.path.isdir(okdir) snap_df = cli.get_dest_folder(okdir) if os.path.isdir(snap_df): log.warning('[%s]Destination folder exists: [%s]' % (okdir, snap_df)) not_processed.append(okfn) continue OkReader = create_reader(aname="Dir", app_init=app_init) out_files = InOut(file_names=[]) DirReader.glob_dir(path=okdir, out=out_files, ext='*.out') apx = dict(MartModifiedDate=okFile.get_value( coords=(0, 0), skip=skip)) ftlist = [] for out_fn in out_files.file_names: print(out_fn) ftlist.append( os.path.basename(out_fn).split('.')[1]) pfmt([[x] for x in ftlist], ['Files->Tables']) #e() if 1: ctables = cli.tcfg['targetTables'].keys() extra_file_tables = list( set(ftlist) - set(ctables)) pfmt([[x] for x in extra_file_tables], ['Tables not in config.']) extra_config_tables = list( set(ctables) - set(ftlist)) pfmt([[x] for x in extra_config_tables], ['Tables in config but not in file names.']) assert not extra_file_tables, 'Tables %s are not listed in config["targetTables"].' % extra_file_tables if 0: g = raw_input("Continue?") if 1: #//create PK file fromFile = create_reader( aname='File', app_init=app_init, file_name=os.path.join(okdir, 'gfin.Instrument.out'), scfg=dir_scfg) toFile = create_reader(aname='File', app_init=app_init, file_name=os.path.join( okdir, '%s.PK.out' % serviceName), scfg=dir_scfg, parse=False) rowcnt = cli.createPrimaryKeyFile( ffObj=fromFile, pkfn=os.path.join(okdir, '%s.PK.out' % serviceName)) assert masterTabTag in ftlist, '"%s" file is missing' % masterTabTag if 1: stmt = 'TRUNCATE TABLE %s' % (masterTbl) toDB.exec_dml(stmt, trans=to_conn, commit=False) deleted[masterTbl] = -1 #e() #e() loaded = {} not_loaded = {} if 1: pkfn = [ x for x in out_files.file_names if os.path.basename(x).split('.')[1] in [masterTabTag] ][0] schema = tcfg['targetSchema'] outFile = create_reader(aname="File", app_init=app_init, file_name=pkfn, scfg=dir_scfg) fmt_cols = tcfg['targetTables'][masterTabTag].get( 'formatColumns', []) outFile.set_alt_cols() toDB.load_gfin_file(trans=to_conn, file_obj=outFile, schema=schema, table_name=masterTbl, qname='insertStmt', fmt_cols=fmt_cols, cfg=(dir_scfg, tcfg), skip=skip, apx=apx, stats=stats) loaded[out_fn] = masterTbl #e() if 1: stmt = 'SELECT count(*) FROM %s t' % masterTbl pkcnt = toDB.exec_query(stmt).fetchall()[0][0] assert pkcnt == (rowcnt - skip) for out_fn in [ x for x in out_files.file_names if not os.path.basename(x).split('.')[1] in [masterTabTag] ]: outFile = create_reader(aname="File", app_init=app_init, file_name=out_fn, scfg=dir_scfg) outCols = [ col[0] for col in outFile.get_header_cols() ] tbl = os.path.basename(out_fn).split('.')[1] assert tbl if tbl not in [masterTabTag] + do_not_load: if tbl not in do_not_delete: stmt = 'DELETE FROM %s WHERE %s in (SELECT t.%s FROM %s t)' % ( tbl, masterTblCol, masterTblCol, masterTbl) deleted[tbl] = toDB.exec_dml(stmt, trans=to_conn, commit=False) pfmt([[deleted[tbl]]], ['Deleted from %s' % tbl]) else: deleted[tbl] = -1 tblCols = toDB.get_columns(tbl).values() pfmt([[x] for x in list( set(tblCols) - set(outCols) - set(['MartModifiedDate']))], ['Columns in Source, but not Target']) missing_cols = list( set(outCols) - set(tblCols)) pfmt([(tbl, x) for x in missing_cols], ['Table', 'Missing columns']) if missing_cols: to_conn.conn.rollback() schema = tcfg["targetSchema"] toDB.desc_table(schema, tbl) raise Exception( 'File column %s missing in table "%s".' % (missing_cols, tbl)) if 1: schema = tcfg['targetSchema'] fmt_cols = tcfg['targetTables'][tbl].get( 'formatColumns', []) outFile.set_alt_cols() toDB.load_gfin_file(trans=to_conn, file_obj=outFile, schema=schema, table_name=tbl, qname='insertStmt', fmt_cols=fmt_cols, cfg=(dir_scfg, tcfg), skip=skip, apx=apx, stats=stats) loaded[out_fn] = tbl else: not_loaded[out_fn] = tbl else: toDB.commit_transaction(trans=to_conn) #pfmt([[k]+[deleted [k]]+list(v)[1:] for k,v in stats.items() if deleted [k]>=0], ['Table','Deleted', 'Accepted', 'Rejected','Line count','Skip', 'Diff'],'Load completed (deleted)'.upper()) #pfmt([(k,v) for k, v in loaded.items()], ['Loaded Files','Loaded Tables']) #pfmt([(k,v) for k, v in not_loaded.items()], ['Not loaded Files','Not loaded Tables']) pfmt( [[k] + [deleted[k]] + list(v.values())[1:] for k, v in stats.items() if deleted[k] >= 0], [ 'Table', 'Deleted', 'Accepted', 'Rejected', 'Line count', 'Skip', 'Diff' ], 'Load completed/deleted'.upper()) pfmt([(k, v) for k, v in loaded.items()], ['Loaded Files', 'Loaded Tables']) pfmt([(k, v) for k, v in not_loaded.items()], ['Not loaded Files', 'Not loaded Tables']) assert os.path.isdir(okdir) if 0: cli.MoveSnapFolder(okdir) processed.append(okfn) #break; if not ok_files.file_names: counter = itertools.count(1) pfmt([['No OK files at working dir: [ %s ]' % cli.pa[0]]], ['No files']) if processed: counter = itertools.count(1) pfmt([[next(counter), x] for x in processed], ['##', 'Processed']) if not_processed: counter = itertools.count(1) pfmt([[next(counter), x] for x in not_processed], ['##', 'Not processed (backup exists)']) if 0: email_args.update(dict(cli_stats=None)) Email.send_email(**email_args) cli.done()
def load_md5(self, trans, file_obj, table_name, qname, fmt_cols, cfg, skip=0, apx=None, stats=None): scfg, tcfg = cfg file_name = file_obj.file_name assert os.path.isfile(file_name) if 1: colsep = scfg['columnDelimiter'] assert colsep lcnt = file_obj.line_count(file_name) if 1: cols = ','.join([col[0] for col in file_obj.cols]) trans.conn.autocommit = False copyfmt = ',\n'.join([ "%s FORMAT 'hex'" % col[0] if col[0] in fmt_cols else "%s" % col[0] for col in file_obj.cols ]) assert os.path.isfile(file_obj.file_name) #print (table_name, apx) #e() apxq = ',\n'.join( [''] + ["%s AS %s" % (k, v) for k, v in apx.items()]) if apx else '' stmt = """ COPY %s (%s %s) FROM LOCAL '%s' DELIMITER '|' ESCAPE AS '^' NULL '' SKIP %d ABORT ON ERROR NO COMMIT """ % (table_name, copyfmt, apxq, file_obj.file_name, skip) try: psql(stmt, 'Load') trans.cur.execute(stmt) except: trans.conn.rollback() pfmt([[stmt]]) raise accepted, rejected = trans.cur.execute( 'SELECT GET_NUM_ACCEPTED_ROWS(),GET_NUM_REJECTED_ROWS()' ).fetchall()[0] pfmt([[lcnt - skip, accepted, rejected]], ['Line count', 'Accepted', 'Rejected'], 'Load stats') assert lcnt - skip == accepted out = OrderedDict() out['table_name'] = table_name out['accepted'] = accepted out['rejected'] = rejected out['linecount'] = lcnt out['skip'] = skip out['diff'] = lcnt - skip - accepted stats.append(out)
def __load_file_2(self, trans, file_obj, table_name, qname, cfg, create_table=False): scfg, tcfg = cfg file_name = file_obj.file_name #pp(file_obj.cols) fmt_cols = ['TxMasterGUID', 'SwapEventGUID'] if 1: assert os.path.isfile(file_name) with open(file_name, 'r') as fh: colsep = scfg['columnDelimiter'] assert colsep if create_table: self.create_table(fh, cfg, table_name) else: fh.readline() fh.readline() data = [] intdata = [[]] intcols = [] for line in [x.strip() for x in fh]: data.append([ x if x else None for i, x in enumerate(line.split(colsep)[:-1]) ]) if 1: cols = ','.join([col[0] for col in file_obj.cols]) assert len(file_obj.cols) == len(data[0]) if 0: tmpTbl = 'tmp_%s' % table_name stmt = 'CREATE LOCAL TEMPORARY TABLE %s AS SELECT * FROM %s WHERE 1=2' % ( tmpTbl, table_name) #print(stmt) trans.cur.execute(stmt) #e() assert len(intdata[0]) == len(intcols) trans.conn.autocommit = False if 0: stmt = "COPY %s FROM LOCAL '/home/s_dev_rdm/ab_gtx/iris.csv' DELIMITER '|'" % tmpTbl copyfmt = ',\n'.join([ "%s FORMAT 'hex'" % col[0] if col[0] in fmt_cols else "%s" % col[0] for col in file_obj.cols ]) stmt = "COPY %s (%s) FROM LOCAL'/home/s_dev_rdm/ab_gtx/iris.csv' DELIMITER '|'" % ( table_name, copyfmt) trans.cur.execute(stmt) pfmt( trans.cur.execute( 'SELECT GET_NUM_ACCEPTED_ROWS(),GET_NUM_REJECTED_ROWS()' ).fetchall(), ['Accepted', 'Rejected'], 'Load stats') if fmt_cols: show = [] for row in trans.cur.execute( 'select TO_HEX(%s) from %s LIMIT 5' % ('), TO_HEX('.join(fmt_cols), table_name)).fetchall(): show.append(row) #print binascii.hexlify(row[0]) pfmt(show, fmt_cols, 'Sample')
def run(): skip = 1 total_ins = 0 term_line = True #//validate cols for _source, val in cli.cfg['source'].items(): cli.set_source(_source) _src_class = list(val.keys())[0] cli.scfg = scfg = cli.get_scfg(_src_class) for _trg_class, val in cli.cfg['target'][_source].items() or []: cli.tcfg = tcfg = cli.get_tcfg(_trg_class) _dbname = tcfg["targetDb"] toDB = create_writer(aname=_trg_class, app_init=app_init) toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn) table = '%s.%s' % (tcfg['targetSchema'], tcfg['targetTable']) toDB.desc_table(schema=tcfg['targetSchema'], tbl=tcfg['targetTable'], col_ord=False) #// validate cols cfg_cols = [x[u'columnName'] for x in cli.scfg[u'columnMappings']] tcols = toDB.get_cols() t_vs_c = set(tcols) - set(cfg_cols) c_vs_t = set(cfg_cols) - set(tcols) if t_vs_c: pfmtd([dict(c_vs_t=c_vs_t)], 'Config has columns missing in target table.') raise Exception( 'Target table has columns missing in config: %s' % t_vs_c) if c_vs_t: pfmtd([dict(t_vs_c=t_vs_c)], 'Target table has columns missing in config.') raise Exception( 'Config has columns missing in target table: %s' % c_vs_t) toDB.commit_transaction(trans=to_conn) #// transfer for _source, val in cli.cfg['source'].items(): cli.set_source(_source) _src_class = list(val.keys())[0] cli.scfg = scfg = cli.get_scfg(_src_class) _dbname = cli.scfg["sourceDb"] fromDB = create_reader(aname=_src_class, app_init=app_init) fromDB.begin_transaction(env=cli.scfg['sourceDb'], out=from_conn) if 1: #//Extract to Dir for _dmp_class, val in cli.cfg['dump'][_source].items() or []: FileWriter = create_writer(aname=_dmp_class, app_init=app_init) fromDB.set_loader(FileWriter) cli.dcfg = cli.get_dcfg(_dmp_class) for _trg_class, val in cli.cfg['target'][_source].items( ) or []: cli.tcfg = tcfg = cli.get_tcfg(_trg_class) file_ins_cnt = 0 FileWriter.open_file(out=dump_file) for iq_data in fromDB.fetch_many(chunk_size=file_size_rows, source=cli.scfg, qname='sourceStmt', out=InOut(), skip_header=0, terminate_line=term_line): if not file_ins_cnt: FileWriter.create_header( file=dump_file, header=fromDB.get_header(), cfg=cli.dcfg, terminate_line=term_line) FileWriter.append_data(file=dump_file, data=iq_data, cfg=cli.dcfg) file_ins_cnt += len(iq_data.data) if not file_ins_cnt: #in case there's no data FileWriter.create_header(file=dump_file, header=fromDB.get_header(), cfg=cli.dcfg, terminate_line=term_line) FileWriter.close_file(file=dump_file) total_ins += file_ins_cnt fromDB.desc_cur(cur=from_conn.cur, colord=False) fromDB.commit_transaction(trans=from_conn) log.info('Total records saved: %d' % total_ins) #// Load to IQ for _source, val in cli.cfg['dump'].items(): cli.set_source(_source) _src_class = list(val.keys())[0] DirReader = create_reader(aname=_src_class, app_init=app_init) if 1: #//Get the file names cli.set_source(_source) dir_scfg = cli.get_dcfg(_src_class) path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg) DirReader.glob_dir(path=path, out=data_files, ext='*.*') if 1: #//Load to DB for _trg_class, val in cli.cfg['target'][_source].items() or []: cli.tcfg = tcfg = cli.get_tcfg(_trg_class) _dbname = tcfg["targetDb"] toDB = create_writer(aname=_trg_class, app_init=app_init) toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn) table = '%s.%s' % (tcfg['targetSchema'], tcfg['targetTable']) toDB.desc_table(schema=tcfg['targetSchema'], tbl=tcfg['targetTable'], col_ord=None) #// validate cols cfg_cols = [ x[u'columnName'] for x in cli.scfg[u'columnMappings'] ] acols = cli.get_alt_cols(scfg) tcols = toDB.get_cols() fcols_alt = [] for data_file in data_files.file_names: dataFile = create_reader(aname='File', app_init=app_init, file_name=data_file, scfg=dir_scfg) dataFile.describe() file_stats[data_file] = dataFile.line_count( ) - cli.header_size(dir_scfg) fcols_alt = [ acols.get(x.decode(), x.decode()) for x in dataFile.get_header(data_file, dir_scfg) ] f_vs_c = set(fcols_alt) - set(cfg_cols) c_vs_f = set(cfg_cols) - set(fcols_alt) f_vs_t = set(fcols_alt) - set(tcols) t_vs_f = set(tcols) - set(fcols_alt) if f_vs_c: pfmtd([dict(c_vs_f=c_vs_f)], 'Config has columns missing in dump file.') pfmtd([dict(f_vs_t=f_vs_t)], 'Dump file has columns missing in target table.') pfmtd([dict(t_vs_f=t_vs_f)], 'Target table has columns missing in dump file.') raise Exception( 'Target table has columns missing in config: %s' % f_vs_c) if c_vs_f: pfmtd([dict(f_vs_c=f_vs_c)], 'Dump file has columns missing in config.') pfmtd([dict(f_vs_t=f_vs_t)], 'Dump file has columns missing in target table.') pfmtd([dict(t_vs_f=t_vs_f)], 'Target table has columns missing in dump file.') raise Exception( 'Config has columns missing in target table: %s' % c_vs_f) if f_vs_t: pfmtd([dict(f_vs_c=f_vs_c)], 'Dump file has columns missing in config.') pfmtd([dict(c_vs_f=c_vs_f)], 'Config has columns missing in dump file.') pfmtd([dict(t_vs_f=t_vs_f)], 'Target table has columns missing in dump file.') raise Exception( 'Dump file has columns missing in target table: %s' % f_vs_t) if t_vs_f: pfmtd([dict(f_vs_c=f_vs_c)], 'Dump file has columns missing in config.') pfmtd([dict(c_vs_f=c_vs_f)], 'Config has columns missing in dump file.') pfmtd([dict(f_vs_t=f_vs_t)], 'Dump file has columns missing in target table.') raise Exception( 'Target table has columns missing in dump file: %s' % t_vs_f) if 1: for data_fn in [x for x in data_files.file_names]: dataFile = create_reader(aname="File", app_init=app_init, file_name=data_fn, scfg=dir_scfg) dataFile.describe() fileCols = [ col.decode() for col in dataFile.get_header_cols() ] tbl = tcfg[ "targetTable"] #tcfg. os.path.basename(data_fn).split('.')[-2] assert tbl if 1: if 0 and tbl not in do_not_delete: stmt = 'DELETE FROM %s WHERE %s in (SELECT t.%s FROM %s t)' % ( tbl, masterTblCol, masterTblCol, masterTbl) deleted[tbl] = toDB.exec_dml(stmt, trans=to_conn, commit=False) pfmt([[deleted[tbl]]], ['Deleted from %s' % tbl]) else: deleted[tbl] = -1 if 0: acols = cli.get_alt_cols(scfg) dataFile.cols_alt = [ acols.get(x.decode(), x.decode()) for x in dataFile.cols ] else: dataFile.set_alt_cols() missing_cols = list( set(dataFile.cols_alt) - set(tcols)) pfmt([(tbl, x) for x in missing_cols], ['Table', 'Missing columns']) schema = tcfg["targetSchema"] if missing_cols: pfmt([[x] for x in missing_cols], ['Columns in Source, but not Target']) to_conn.conn.rollback() toDB.desc_table(schema, tbl) raise Exception( 'File column %s missing in table "%s".' % (missing_cols, tbl)) if 1: apx = {} fmt_cols = [] toDB.load_file(trans=to_conn, file_obj=dataFile, schema=schema, table_name=tbl, qname='insertStmt', fmt_cols=fmt_cols, cfg=(dir_scfg, tcfg), skip=skip, apx=apx, stats=stats) loaded[data_fn] = tbl else: not_loaded[data_fn] = tbl else: if 1: toDB.commit_transaction(trans=to_conn) pfmt( [[k] + [deleted[k]] + list(v)[1:] for k, v in stats.items() if deleted[k] >= 0], [ 'Table', 'Deleted', 'Accepted', 'Rejected', 'Line count', 'Skip', 'Diff' ], 'Load completed (deleted)'.upper()) pfmt([(k, v) for k, v in loaded.items()], ['Loaded Files', 'Loaded Tables']) pfmt([(k, v) for k, v in not_loaded.items()], ['Not loaded Files', 'Not loaded Tables']) e() if 0: #toDB.truncate_table ( table = table ) toDB.bulk_load(trans=to_conn, file_names=data_files, qname='insertStmt', cfg=(dir_scfg, tcfg), out=insert_stats) for k in file_stats.keys(): assert insert_stats[k] == file_stats[ k], 'Insert vs file count diff: %s<>%s for file \n%s' % ( insert_stats[k], file_stats[k], k) toDB.commit_transaction(trans=to_conn) if 0: Email.send_email(**email_args)
def run(): skip = 2 do_not_load = [] for _source, val in cli.cfg['dump'].items(): cli.set_source(_source) _src_class = list(val.keys())[0] DirReader = create_reader(aname=_src_class, app_init=app_init) cli.set_source(_source) dir_scfg = cli.get_dcfg(_src_class) path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg) ok_files = InOut(file_names=[]) DirReader.glob_dir(path=path, out=ok_files, ext='*.ok') loaded = {} for _trg_class, val in cli.cfg['target'][_source].items(): cli.tcfg = tcfg = cli.get_tcfg(_trg_class) _dbname = tcfg["targetDb"] toDB = create_writer(aname=_trg_class, app_init=app_init) do_not_delete = tcfg['doNotDeleteTables'] do_not_load = tcfg['doNotLoadTables'] to_conn = InOut() toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn) toSchema = tcfg['targetSchema'] stmt = 'set search_path to %s' % toSchema psql(stmt) to_conn.cur.execute(stmt) pkstats = {} for okfn in ok_files.file_names: okFile = create_reader(aname='File', app_init=app_init, file_name=okfn, scfg=dir_scfg) okdir, okname = os.path.splitext(okfn) okbn = os.path.basename(okdir) out_files = InOut(file_names=[]) DirReader.glob_dir(path=okdir, out=out_files, ext='*.out') #e() if 1: # Check if some there are files missing in config ftlist = [] for out_fn in out_files.file_names: print(out_fn) ftlist.append(os.path.basename(out_fn).split('.')[1]) pfmt([[x] for x in ftlist], ['Files->Tables']) ctables = cli.tcfg['targetTables'].keys() extra_file_tables = list(set(ftlist) - set(ctables)) pfmt([[x] for x in extra_file_tables], ['Tables not in config.']) extra_config_tables = list(set(ctables) - set(ftlist)) pfmt([[x] for x in extra_config_tables], ['Tables in config but not in file names.']) assert not extra_file_tables, 'Tables %s are not listed in config["targetTables"].' % extra_file_tables for outfn in out_files.file_names: # Master first outFile = create_reader(aname='File', app_init=app_init, file_name=outfn, scfg=dir_scfg) outbn = os.path.basename(outfn) tbl = outbn.split('.')[1] outTbl = 'tmp_PK_%s' % tbl outCols = outFile.get_header_cols() apxCols = [('MartModifiedDate', 'timestamp'), ('AsOfFrom', 'timestamp'), ('AsOfTo', 'timestamp'), ('MD5', 'char(22)')] outTblCols = toDB.get_create_col_list(outCols, apx=apxCols) toCols = toDB.get_col_types(toSchema, tbl) pp(toCols) toDB.desc_tmp_table(outTbl, outCols + apxCols) do_not_delete.append(outTbl) try: stmt = 'drop table %s' % outTbl to_conn.cur.execute(stmt) except Exception as ex: #raise if not 'Table "%s" does not exist' % outTbl in str(ex): raise psql(outfn) stmt = 'CREATE LOCAL TEMPORARY TABLE %s ( %s )\nON COMMIT PRESERVE ROWS' % ( outTbl, ', \n'.join( ['%s %s' % tuple(col) for col in toCols])) pfmt([[stmt]], ['Create master temp PK' + outTbl]) toDB.exec_ddl(stmt) if 1: #//Load data into PK table fmt_cols = {} mmDt = okFile.get_value(coords=(0, 0), skip=skip) md5val = (base64.b64encode( hashlib.md5(b'test').digest())) apx = OrderedDict() apx['MartModifiedDate'] = mmDt apx['AsOfFrom'] = mmDt apx['AsOfTo'] = "12/31/9999" apx['MD5'] = '' #//defined on row level pk_outfn = '%s.pk' % outfn colsep = dir_scfg['columnDelimiter'] with open(pk_outfn, 'wb') as pkfh: with open(outfn, 'rb') as outfh: line = outfh.readline().strip() pkfh.write(line + colsep.join(apx.keys()).encode() + os.linesep.encode()) line = outfh.readline().strip() apxTypes = colsep.join( [col[1] for col in apxCols]) pkfh.write(line + apxTypes.encode() + os.linesep.encode()) line = outfh.readline().strip() while line: md5 = (base64.b64encode( hashlib.md5(line.replace( b'|', b'')).digest())) apx['MD5'] = md5.decode('ascii', 'ignore').strip( '=') #// REDO pkfh.write( line + colsep.join(apx.values()).encode() + os.linesep.encode()) line = outfh.readline().strip() outPkFile = create_reader(aname='File', app_init=app_init, file_name=pk_outfn, scfg=dir_scfg) outPkFile.set_alt_cols() schema = tcfg['targetSchema'] toDB.load_grds_file(trans=to_conn, file_obj=outPkFile, schema=schema, table_name=outTbl, qname='insertStmt', fmt_cols=fmt_cols, cfg=(dir_scfg, tcfg), skip=skip, stats=pkstats) loaded[outbn] = outTbl #outPkFile.delete() #pfmtd([pkstats]) #e() stats = {} deleted = {} processed = [] not_processed = [] for okfn in ok_files.file_names: okFile = create_reader(aname='File', app_init=app_init, file_name=okfn, scfg=dir_scfg) okdir, _ = os.path.splitext(okfn) okbn = os.path.basename(okdir) #e() assert os.path.isdir(okdir) snap_df = cli.get_dest_folder(okdir) if os.path.isdir(snap_df): log.warning('[%s]Destination folder exists: [%s]' % (okdir, snap_df)) not_processed.append(okfn) continue out_files = InOut(file_names=[]) DirReader.glob_dir(path=okdir, out=out_files, ext='*.out') apx = dict( MartModifiedDate=okFile.get_value(coords=(0, 0), skip=skip)) #e() if 0: g = raw_input("Continue?") not_loaded = {} for table_name in ftlist: tmpTbl = 'tmp_PK_%s' % table_name toCols = toDB.get_tab_cols(tmpTbl) #pp(toCols) toDB.desc_table(None, tmpTbl) toDB.desc_table(toSchema, table_name) #e() if table_name in ['TxnLookupMap']: tmpCols = ',\n '.join( ['tmid.%s' % col[0].decode() for col in toCols]) ins = """ insert into {0} ( {1} ) select distinct {2} from {3} tmid LEFT JOIN {0} ta ON ta.{4} = tmid.{4} AND ta.{5} = tmid.{5} AND ta.{6} = tmid.{6} AND ta.ValidFrom = tmid.ValidFrom and ta.AsOfTo = tmid.AsOfTo where ta.MD5 <> tmid.MD5 OR ta.{4} is NULL """.format(table_name, ',\n '.join([col[0].decode() for col in toCols]), tmpCols, tmpTbl, toCols[0][0].decode(), toCols[1][0].decode(), toCols[2][0].decode()) psql(ins) inserted = toDB.exec_dml(ins, trans=to_conn, commit=False) pfmtd([dict(Inserted=inserted)]) elif table_name in [ 'G3Lookup', 'GCLookup', 'GISLookup', 'GPSLookup', 'GPXLookup', 'GPosLookup', 'GTxLookup', 'FundToBusinessUnitMap', 'TxEditReason' ]: tmpCols = ',\n '.join( ['tmid.%s' % col[0].decode() for col in toCols]) ins = """ insert into {0} ( {1} ) select distinct {2} from {3} tmid LEFT JOIN {0} ta ON ta.{4} = tmid.{4} AND ta.{5} = tmid.{5} AND ta.AsOfTo = tmid.AsOfTo where ta.MD5 <> tmid.MD5 OR ta.{4} is NULL """.format(table_name, ',\n '.join([col[0].decode() for col in toCols]), tmpCols, tmpTbl, toCols[0][0].decode(), toCols[1][0].decode()) psql(ins) inserted = toDB.exec_dml(ins, trans=to_conn, commit=False) pfmtd([dict(Inserted=inserted)]) else: tmpCols = ',\n '.join( ['tmid.%s' % col[0].decode() for col in toCols]) ins = """ insert into {0} ( {1} ) select distinct {2} from {3} tmid LEFT JOIN {0} ta ON ta.{4} = tmid.{4} AND ta.AsOfTo = tmid.AsOfTo where ta.MD5 <> tmid.MD5 OR ta.{4} is NULL ; """.format(table_name, ',\n '.join([col[0].decode() for col in toCols]), tmpCols, tmpTbl, toCols[0][0].decode()) psql(ins) inserted = toDB.exec_dml(ins, trans=to_conn, commit=False) pfmtd([dict(Inserted=inserted)]) if 1: toDB.commit_transaction(trans=to_conn) pfmt([[k] + list(v.values())[1:] for k, v in pkstats.items()], [ 'Table', 'Accepted', 'Rejected', 'Line count', 'Skip', 'Diff' ], 'Load completed'.upper()) pfmt([(k, v) for k, v in loaded.items()], ['Loaded Files', 'Loaded Tables']) pfmt([(k, v) for k, v in not_loaded.items()], ['Not loaded Files', 'Not loaded Tables']) assert os.path.isdir(okdir) if 0: cli.MoveSnapFolder(okdir) processed.append(dict(ProcessedFile=okfn)) #break; if not ok_files.file_names: pfmtd([ dict(NoFiles='No OK files at working dir: [ %s ]' % cli.pa[0]) ]) pfmtd(processed) pfmtd(not_processed) if 0: email_args.update(dict(cli_stats=None)) Email.send_email(**email_args) cli.done()