def exec_ddl(self, ddl): psql(ddl) try: return self.cur.execute(ddl) except Exception as ex: raise
def insert_trans_data(self, trans, target, source, stmt, skip_header=0): pipe = source.pipe skip = str(skip_header).strip() if skip_header is not None: skip = str(skip_header).strip() assert str(skip).strip() in [ '0', '1' ], 'skip_header [%s] should be "0" or "1"' % str(skip).strip() if str(skip) == '1': pipe.readline() assert pipe start_time = time.time() xref = self.cli.tcfg["columnMap"] cols = [x for x in xref] cli.to_cols = ',\n'.join(cols) cli.to_quotes = ','.join([x for x in '?' * len(cols)]) assert cli.to_cols sql = self.get_query(target, stmt) cur = self.conn.cursor() line = pipe.readline() rows = [] while line: line.update( dict(AccountingDate=self.cli.pa[1], AsOfDateTime=self.cli.asod)) rows.append([ str(line[xref[x][0]]) if xref[x][1] in ['varchar'] else line[xref[x][0]] for x in xref ]) line = pipe.readline() chunk = 300 total = 0 cid = 0 psql(sql, 'Insert') while total < len(rows): cur.fast_executemany = True data = rows[total:][:chunk] cur.executemany(sql, data) ins = len(data) total += ins cid += 1 log.info('[{}] [{}] {}: Running: {:,.0f}, Rows: {:,.0f}'.format( self.objtype, cid, self.cln, total, ins)) log.info( '[{}]: {}: Inserted: {:,.0f}, To-Schema:{}, To-Table:{}, Skipped: {}, Elapsed: {}' .format(self.objtype, self.cln, len(rows), target['targetSchema'], target["targetTable"], skip, round((time.time() - start_time), 2))) pipe.close()
def get_col_types(self, schema, tbl): stmt = """ SELECT column_name, data_type FROM v_catalog.columns WHERE table_schema='%s' AND table_name='%s' ORDER BY ordinal_position""" % (schema, tbl) self.cur.execute(stmt) psql(stmt) out = [] return self.cur.fetchall()
def get_query_cur(self, chunk_size, source, qname, out, skip_header, terminate_line=False): assert chunk_size cli = self.cli chunk_size = self.cli.lame_duck if self.cli.lame_duck and chunk_size > self.cli.lame_duck else chunk_size assert chunk_size tf = "%Y-%m-%d.%H_%M_%S" current_ts = time.strftime(tf) id = 0 stmt = self.get_query(source, qname) log.debug(stmt) if not hasattr(self, 'cur') or not self.cur: self.cur = self.conn.cursor() cur = self.cur psql(' \n'.join(stmt.replace(',', ', ').split()), 'Extractor cmd') #e() cur.execute(stmt) return cur
def fetch_many(self, chunk_size, source, qname, out, skip_header, terminate_line=False): assert chunk_size cli = self.cli chunk_size = self.cli.lame_duck if self.cli.lame_duck and chunk_size > self.cli.lame_duck else chunk_size assert chunk_size tf = "%Y-%m-%d.%H_%M_%S" current_ts = time.strftime(tf) id = 0 stmt = self.get_query(source, qname) log.debug(stmt) if not hasattr(self, 'cur') or not self.cur: self.cur = self.conn.cursor() cur = self.cur psql(' \n'.join(stmt.replace(',', ', ').split()), 'Extractor cmd') #e() cur.execute(stmt) cols = [c[0] for c in cur.description] total_read = 0 if 1: apx_cmap, apx_cols, apx = cli.get_appendix2() header = None first = True is_apx = [] start_time = time.time() while True: print('Elapsed [%d] PRE fetch: %s' % (id, time.time() - start_time)) start_time = time.time() out.data = [] if self.cli.lame_duck and self.cli.lame_duck <= total_read: break #decrease chunk size if self.cli.lame_duck and self.cli.lame_duck - total_read < chunk_size: chunk_size = self.cli.lame_duck - total_read fetch_time = time.time() rows = cur.fetchmany(chunk_size) print('Elapsed [%d] FMANY: %s' % (id, time.time() - fetch_time)) print(len(rows)) #e() data = [] append_time = time.time() if rows: for row in rows: d = [] for x in row: if x == None: d.append(b'') continue if isinstance(x, datetime.date) or isinstance( x, datetime.datetime): d.append(str(x).encode('utf-8')) continue if isinstance(x, int) or isinstance(x, float): d.append(repr(x)) continue if sys.version_info[0] < 3: d.append(x) else: d.append(x.encode()) if apx: #pp(d) #print len(d), len(d+apx.split(cli.csep)), apx #e() cols = cols + apx_cols is_apx = ['N'] * len(d) + ['Y'] * len(apx_cols) d = d + apx.split(cli.csep.decode()) data.append( d + [''] if terminate_line else []) #data.append('^'.join(str(v) for v in d+apx)) else: if 1: is_apx = ['N'] * len(d) data.append(d) #header = [col[:2] for ] else: assert 3 == 2 data.append('^'.join(str(v) for v in d) + os.linesep) if first: pfmtd([ dict(Col=col, Row=d[i], Appendix=is_apx[i]) for i, col in enumerate(cols) ], 'First row') first = False #e() else: break out.data = data print('Elapsed [%d] APPEND: %s' % (id, time.time() - append_time)) out.chunk_id, out.current_ts, out.actor = id, current_ts, self.cln if not data: break print('Elapsed [%d] POST fetch: %s' % (id, time.time() - start_time)) yield out id += 1 total_read += len(data)
#e() if 1: info('Start') cursor = conn.cursor() dbdata = [] if 1: stmt = "SET TEMPORARY OPTION DATE_ORDER=MDY exec CIGActgH.spSOI_Cnsldtd_WRAPPER '223906','EOD','2019/1/30','DESK','*','DEFAULT','REGULAR','1','0','NONE','*','*','N','0','0','NONE','NONE','ALL','0','MONTH_END','N','ALL','*','0','*','N','*'" tbl = 'DY_FinancingPosition' stmt = "SET TEMPORARY OPTION DATE_ORDER=MDY exec CIGActgH.spTDPFinancialPosRptCnsldtd_WRAPPER '223907','2019/9/13','2019/9/13','EOD','EOD','*','*','*','NONE','*','NOW','0'" tbl = 'ME_FinancingPosition' stmt = "SET TEMPORARY OPTION DATE_ORDER=MDY exec CIGActgH.spTDPFinancialPosRptCnsldtd_WRAPPER '223906','2019/8/01','2019/8/31','EOD','EOD','*','*','*','NONE','*','NOW','0'" stmt = "select * from Position.ME_FinancingPosition where AccountingDate='2019-08-31' and client=223906" tbl = 'DY_FinancingPosition' stmt = "select * from Position.%s where AccountingDate='2019-09-13' and client=223906" % tbl psql(os.linesep.join(stmt.split()), tbl) cursor.execute(stmt) t = 0 i = 1 eol = '\n' colsep = '|' start_time = time.time() fn = '%s.csv' % (tbl) info(fn) with open(fn, 'w') as fh: fh.write(colsep.join([c[0] for c in cursor.description]) + eol) row = cursor.fetchone() if row: info('First row: %s' % row)
def exec_query(self, stml, show=False): cur = self.conn.cursor() if show: psql(stml) return cur.execute(stml)
def insert_RC_data(self, trans, target, source, stmt, insert_stats, skip_header=0): pipe = source.pipe skip = str(skip_header).strip() if skip_header is not None: skip = str(skip_header).strip() assert str(skip).strip() in [ '0', '1' ], 'skip_header [%s] should be "0" or "1"' % str(skip).strip() if str(skip) == '1': pipe.readline() assert pipe start_time = time.time() xref = self.cli.tcfg["columnMap"] cols = [v[0] for _, v in xref.items()] cli.to_cols = ',\n'.join(cols) cli.to_quotes = ','.join([x for x in '?' * len(cols)]) assert cli.to_cols sql = self.get_query(target, stmt) #cnxn = pyodbc.connect(conn_str, autocommit=True) trans.conn.set_attr(pyodbc.SQL_ATTR_TXN_ISOLATION, pyodbc.SQL_TXN_SERIALIZABLE) trans.conn.autocommit = False cur = trans.conn.cursor() fline = line = pipe.readline() self.rows = rows = [] #pp(xref) apx = {x[0]: x[2] for x in xref.values() if len(x) == 3} apx = {x: cli.get_parsed(ckey=x, cfg=apx) for x, v in apx.items()} #ppe(fline) ext_c = list(set(xref.keys()) - set(fline.keys())) if ext_c: log.warn('Config has extra columns missing in REST') #pfmtd([dict(Id=k, DB_ColName=v) for k, v in enumerate(list(sorted(ext_c)))], 'Defaulting these to nulls') ext_l = list(set(fline.keys()) - set(xref.keys())) if ext_l: log.warn('REST has extra columns missing in DB') #pfmtd([dict(Id=k, RESR_Col=v) for k, v in enumerate(ext_l)], 'Extra cols in REST') #pp(ext_l) ignore=[u'signOffVersion', u'signOffTime', u'RBDate', u'asofDate'] +[u'DataSource', u'GPOSMATTol']+[u'CCY', u'DEPolicy', u'Price', u'UnrealizedPnL', u'Fund', u'RawUnrealizedPnL', u'SwapType'] + [u'SettlementDate']+[u'BuySell', u'IndependentAmount', u'ConfirmStatus', u'RefEntityName', u'ReferenceOb', u'CounterpartyRefID', u'CDSType', u'TerminationDateUnadjusted', u'TerminationDateAdjusted', u'StandardRefObligation', u'FixedRate'] + [u'MaturityDate', u'StrikePrice', u'IsSpot'] + [u'Symbol', u'VolatilityStrike']+[u'Direction', u'MaturityDateUnadjusted', u'TradeCurrency', u'ProductType', u'UnderlyingSecurity']+[u'MaturityTenor', u'PaymentDate', u'CAP_FLOOR', u'MaturityDateAdjusted'] + \ [u'IsElectronicallyConfirmed', u'Classification'] + [u'FloatingRateIndex']+[u'IsTodayResetDate']+ \ [u'FloatRateIndexRec', u'IndexTenorRec', u'IsOldED', u'DayCountFractionPay', u'DayCountFractionRec', u'PaymentFrequencyPay', u'PaymentFrequencyRec', u'RollDate']+[u'CCP', u'CCPConfirmRefId'] +[u'IndexTenorPay', u'SpreadPay', u'FloatRateIndexPay']+ \ [u'TerminationDate', u'FloatingIndex', u'StartFlow', u'CptyRefID'] +[u'Country']+ \ [u'Barrier1Strike', u'Barrier1CCYPair', u'bdi', u'Barrier2Strike', u'Barrier2CCYPair'] + [u'PutCall', u'UnderlyingSymbol', u'OptionStyle']+[u'TerminationDateUnderlyingUnadjusted', u'CallPut', u'PayReceive']+\ [u'TerminationDateUnderlyingAdjusted']+[u'ProceedsNotional'] +[u'ContractType', u'ExecutingAccount']+[u'SSGClientNote']+[u'Issuer'] while line: line.update(apx) ext_s = set(line.keys()) - set(xref.keys()) #pp(ext_s) if ext_s - set(ignore): pfmtd([ dict(Id=k, REST_Col=v) for k, v in enumerate(list(ext_s - set(ignore))) ], 'Extra cols in REST/IGNORE') pp(list(ext_s - set(ignore))) ignore = ignore + list(ext_s - set(ignore)) #rows.append([str(line[x]) if xref[x][1] in ['varchar'] else float(line[x]) if xref[x][1] in ['varchar'] else line[x] for x in xref if x not in ext]) line = pipe.readline() print(123) e() chunk = 3 total = 0 cid = 0 psql(sql, 'Insert') if not rows: raise Exception('No data in REST pipe.') else: ignore_cols = target["ignoreSourceColumns"] if not len(fline) == len(rows[0]) + len(ignore_cols): pp(fline) pp(rows[0]) raise Exception( 'line %s <> row %s not in xref:%s, not in source:%s' % (len(fline), len(rows[0]), set(fline.keys()) - set(xref.keys()), set(xref.keys()) - set(fline.keys()))) pfmtd([ dict(Col=col, Row=rows[0][i]) for i, col in enumerate([col for col in xref]) ], 'First row') while total < len(rows): cur.fast_executemany = True data = rows[total:][:chunk] #ppe(data) cur.executemany(sql, data) cur.execute("ROLLBACK") trans.conn.rollback() ins = len(data) total += ins cid += 1 log.info('[{}] [{}] {}: Running: {:,.0f}, Rows: {:,.0f}'.format( self.objtype, cid, self.cln, total, ins)) log.info( '[{}]: {}: Inserted: {:,.0f}, To-Schema:{}, To-Table:{}, Skipped: {}, Elapsed: {}' .format(self.objtype, self.cln, len(rows), target['targetSchema'], target["targetTable"], skip, round((time.time() - start_time), 2))) pipe.close() insert_stats.inserted_cnt = total
def bulk_load_file(self, trans, file_names, qname, cfg, out, header=None): #cli=self.cli scfg, tcfg = cfg skip = scfg['writeHeader'] assert skip in [0, 1] tbl = cli.get_parsed(ckey='targetTable', cfg=tcfg) sch = tcfg['targetSchema'] assert tbl fnames = file_names.file_names start_time = time.time() total_ins = 0 cur = self.conn.cursor() cur.execute('SELECT * FROM %s.%s WHERE 1=2' % (sch, tbl)) from decimal import Decimal #for col in cur.description: # print col[0], col[1],type(col[1]), col[1] == decimal.Decimal, decimal.Decimal #e() if header: clist = ',\n '.join([c for c in header]) else: cols = [c[:2] for c in cur.description] clist = ',\n '.join([c[0] for c in cur.description]) for path in file_names.file_names: #pp(fnamed) #_, fnd = fnamed #pp(fnd) #path = fnd['path'] assert os.path.isfile(path) limit = 1000 rowid = 0 vals = [] start_time = time.time() linesep = scfg['recordDelimiter'] colsep = scfg['columnDelimiter'] data = [] #print 7777777777777, path if 0: stmt = """ LOAD TABLE %s.%s (%s) FROM '%s' quotes off escapes off format ascii delimited by '%s' skip %d row delimited by '%s' """ % (sch, tbl, clist, path, colsep, skip, linesep) stmt = """ LOAD TABLE %s.%s (%s) FROM '%s' quotes off escapes off format ascii delimited by '%s' skip %d """ % (sch, tbl, clist, path, colsep, skip) psql(stmt) #e() try: cnt = cur.execute(stmt) total_ins += cur.rowcount out[path] = cur.rowcount except pyodbc.ProgrammingError as ex: log.debug(stmt) log.error(ex) self.conn.rollback() raise log.debug('Read: %d, Inserted: %d ' % (rowid, len(vals))) self.conn.commit() #out[path]=total_ins log.info('%s: Read:%d, Inserted: %d, Elapsed: %s' % (self.cln, rowid, total_ins, round((time.time() - start_time), 2)))
def load_md5(self, trans, file_obj, table_name, qname, fmt_cols, cfg, skip=0, apx=None, stats=None): scfg, tcfg = cfg file_name = file_obj.file_name assert os.path.isfile(file_name) if 1: colsep = scfg['columnDelimiter'] assert colsep lcnt = file_obj.line_count(file_name) if 1: cols = ','.join([col[0] for col in file_obj.cols]) trans.conn.autocommit = False copyfmt = ',\n'.join([ "%s FORMAT 'hex'" % col[0] if col[0] in fmt_cols else "%s" % col[0] for col in file_obj.cols ]) assert os.path.isfile(file_obj.file_name) #print (table_name, apx) #e() apxq = ',\n'.join( [''] + ["%s AS %s" % (k, v) for k, v in apx.items()]) if apx else '' stmt = """ COPY %s (%s %s) FROM LOCAL '%s' DELIMITER '|' ESCAPE AS '^' NULL '' SKIP %d ABORT ON ERROR NO COMMIT """ % (table_name, copyfmt, apxq, file_obj.file_name, skip) try: psql(stmt, 'Load') trans.cur.execute(stmt) except: trans.conn.rollback() pfmt([[stmt]]) raise accepted, rejected = trans.cur.execute( 'SELECT GET_NUM_ACCEPTED_ROWS(),GET_NUM_REJECTED_ROWS()' ).fetchall()[0] pfmt([[lcnt - skip, accepted, rejected]], ['Line count', 'Accepted', 'Rejected'], 'Load stats') assert lcnt - skip == accepted out = OrderedDict() out['table_name'] = table_name out['accepted'] = accepted out['rejected'] = rejected out['linecount'] = lcnt out['skip'] = skip out['diff'] = lcnt - skip - accepted stats.append(out)
def load_file(self, trans, file_obj, schema, table_name, qname, fmt_cols, cfg, skip=0, apx=None, stats=None): scfg, tcfg = cfg file_name = file_obj.file_name assert os.path.isfile(file_name) if 1: colsep = scfg['columnDelimiter'] assert colsep lcnt = file_obj.line_count(file_name) if 1: pp(file_obj.cols) #cols = ','.join([col.decode() for col in file_obj.cols]) #pp(cols) trans.conn.autocommit = False copyfmt = ',\n'.join([ "%s FORMAT 'hex'" % col[0] if col[0] in fmt_cols else "%s" % col for col in file_obj.cols_alt ]) assert os.path.isfile(file_obj.file_name) stmt = """ COPY %s.%s (%s ) FROM LOCAL '%s' DELIMITER '|' ESCAPE AS '^' NULL '' SKIP %d ABORT ON ERROR NO COMMIT """ % (schema, table_name, copyfmt, file_obj.file_name, skip) try: self.desc_table(schema, table_name) psql(stmt, 'Load') trans.cur.execute(stmt) except: trans.conn.rollback() psql(stmt) raise accepted, rejected = trans.cur.execute( 'SELECT GET_NUM_ACCEPTED_ROWS(),GET_NUM_REJECTED_ROWS()' ).fetchall()[0] pfmtd([ dict(Line_count=lcnt - skip, Accepted=accepted, Rejected=rejected) ], 'Load stats') assert lcnt - skip == accepted out = OrderedDict() out['table_name'] = table_name out['accepted'] = accepted out['rejected'] = rejected out['linecount'] = lcnt out['skip'] = skip out['diff'] = lcnt - skip - accepted stats[table_name] = out
def run(): skip = 2 do_not_load = [] for _source, val in cli.cfg['dump'].items(): cli.set_source(_source) _src_class = list(val.keys())[0] DirReader = create_reader(aname=_src_class, app_init=app_init) cli.set_source(_source) dir_scfg = cli.get_dcfg(_src_class) path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg) ok_files = InOut(file_names=[]) DirReader.glob_dir(path=path, out=ok_files, ext='*.ok') loaded = {} for _trg_class, val in cli.cfg['target'][_source].items(): cli.tcfg = tcfg = cli.get_tcfg(_trg_class) _dbname = tcfg["targetDb"] toDB = create_writer(aname=_trg_class, app_init=app_init) do_not_delete = tcfg['doNotDeleteTables'] do_not_load = tcfg['doNotLoadTables'] to_conn = InOut() toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn) toSchema = tcfg['targetSchema'] stmt = 'set search_path to %s' % toSchema psql(stmt) to_conn.cur.execute(stmt) pkstats = {} for okfn in ok_files.file_names: okFile = create_reader(aname='File', app_init=app_init, file_name=okfn, scfg=dir_scfg) okdir, okname = os.path.splitext(okfn) okbn = os.path.basename(okdir) out_files = InOut(file_names=[]) DirReader.glob_dir(path=okdir, out=out_files, ext='*.out') #e() if 1: # Check if some there are files missing in config ftlist = [] for out_fn in out_files.file_names: print(out_fn) ftlist.append(os.path.basename(out_fn).split('.')[1]) pfmt([[x] for x in ftlist], ['Files->Tables']) ctables = cli.tcfg['targetTables'].keys() extra_file_tables = list(set(ftlist) - set(ctables)) pfmt([[x] for x in extra_file_tables], ['Tables not in config.']) extra_config_tables = list(set(ctables) - set(ftlist)) pfmt([[x] for x in extra_config_tables], ['Tables in config but not in file names.']) assert not extra_file_tables, 'Tables %s are not listed in config["targetTables"].' % extra_file_tables for outfn in out_files.file_names: # Master first outFile = create_reader(aname='File', app_init=app_init, file_name=outfn, scfg=dir_scfg) outbn = os.path.basename(outfn) tbl = outbn.split('.')[1] outTbl = 'tmp_PK_%s' % tbl outCols = outFile.get_header_cols() apxCols = [('MartModifiedDate', 'timestamp'), ('AsOfFrom', 'timestamp'), ('AsOfTo', 'timestamp'), ('MD5', 'char(22)')] outTblCols = toDB.get_create_col_list(outCols, apx=apxCols) toCols = toDB.get_col_types(toSchema, tbl) pp(toCols) toDB.desc_tmp_table(outTbl, outCols + apxCols) do_not_delete.append(outTbl) try: stmt = 'drop table %s' % outTbl to_conn.cur.execute(stmt) except Exception as ex: #raise if not 'Table "%s" does not exist' % outTbl in str(ex): raise psql(outfn) stmt = 'CREATE LOCAL TEMPORARY TABLE %s ( %s )\nON COMMIT PRESERVE ROWS' % ( outTbl, ', \n'.join( ['%s %s' % tuple(col) for col in toCols])) pfmt([[stmt]], ['Create master temp PK' + outTbl]) toDB.exec_ddl(stmt) if 1: #//Load data into PK table fmt_cols = {} mmDt = okFile.get_value(coords=(0, 0), skip=skip) md5val = (base64.b64encode( hashlib.md5(b'test').digest())) apx = OrderedDict() apx['MartModifiedDate'] = mmDt apx['AsOfFrom'] = mmDt apx['AsOfTo'] = "12/31/9999" apx['MD5'] = '' #//defined on row level pk_outfn = '%s.pk' % outfn colsep = dir_scfg['columnDelimiter'] with open(pk_outfn, 'wb') as pkfh: with open(outfn, 'rb') as outfh: line = outfh.readline().strip() pkfh.write(line + colsep.join(apx.keys()).encode() + os.linesep.encode()) line = outfh.readline().strip() apxTypes = colsep.join( [col[1] for col in apxCols]) pkfh.write(line + apxTypes.encode() + os.linesep.encode()) line = outfh.readline().strip() while line: md5 = (base64.b64encode( hashlib.md5(line.replace( b'|', b'')).digest())) apx['MD5'] = md5.decode('ascii', 'ignore').strip( '=') #// REDO pkfh.write( line + colsep.join(apx.values()).encode() + os.linesep.encode()) line = outfh.readline().strip() outPkFile = create_reader(aname='File', app_init=app_init, file_name=pk_outfn, scfg=dir_scfg) outPkFile.set_alt_cols() schema = tcfg['targetSchema'] toDB.load_grds_file(trans=to_conn, file_obj=outPkFile, schema=schema, table_name=outTbl, qname='insertStmt', fmt_cols=fmt_cols, cfg=(dir_scfg, tcfg), skip=skip, stats=pkstats) loaded[outbn] = outTbl #outPkFile.delete() #pfmtd([pkstats]) #e() stats = {} deleted = {} processed = [] not_processed = [] for okfn in ok_files.file_names: okFile = create_reader(aname='File', app_init=app_init, file_name=okfn, scfg=dir_scfg) okdir, _ = os.path.splitext(okfn) okbn = os.path.basename(okdir) #e() assert os.path.isdir(okdir) snap_df = cli.get_dest_folder(okdir) if os.path.isdir(snap_df): log.warning('[%s]Destination folder exists: [%s]' % (okdir, snap_df)) not_processed.append(okfn) continue out_files = InOut(file_names=[]) DirReader.glob_dir(path=okdir, out=out_files, ext='*.out') apx = dict( MartModifiedDate=okFile.get_value(coords=(0, 0), skip=skip)) #e() if 0: g = raw_input("Continue?") not_loaded = {} for table_name in ftlist: tmpTbl = 'tmp_PK_%s' % table_name toCols = toDB.get_tab_cols(tmpTbl) #pp(toCols) toDB.desc_table(None, tmpTbl) toDB.desc_table(toSchema, table_name) #e() if table_name in ['TxnLookupMap']: tmpCols = ',\n '.join( ['tmid.%s' % col[0].decode() for col in toCols]) ins = """ insert into {0} ( {1} ) select distinct {2} from {3} tmid LEFT JOIN {0} ta ON ta.{4} = tmid.{4} AND ta.{5} = tmid.{5} AND ta.{6} = tmid.{6} AND ta.ValidFrom = tmid.ValidFrom and ta.AsOfTo = tmid.AsOfTo where ta.MD5 <> tmid.MD5 OR ta.{4} is NULL """.format(table_name, ',\n '.join([col[0].decode() for col in toCols]), tmpCols, tmpTbl, toCols[0][0].decode(), toCols[1][0].decode(), toCols[2][0].decode()) psql(ins) inserted = toDB.exec_dml(ins, trans=to_conn, commit=False) pfmtd([dict(Inserted=inserted)]) elif table_name in [ 'G3Lookup', 'GCLookup', 'GISLookup', 'GPSLookup', 'GPXLookup', 'GPosLookup', 'GTxLookup', 'FundToBusinessUnitMap', 'TxEditReason' ]: tmpCols = ',\n '.join( ['tmid.%s' % col[0].decode() for col in toCols]) ins = """ insert into {0} ( {1} ) select distinct {2} from {3} tmid LEFT JOIN {0} ta ON ta.{4} = tmid.{4} AND ta.{5} = tmid.{5} AND ta.AsOfTo = tmid.AsOfTo where ta.MD5 <> tmid.MD5 OR ta.{4} is NULL """.format(table_name, ',\n '.join([col[0].decode() for col in toCols]), tmpCols, tmpTbl, toCols[0][0].decode(), toCols[1][0].decode()) psql(ins) inserted = toDB.exec_dml(ins, trans=to_conn, commit=False) pfmtd([dict(Inserted=inserted)]) else: tmpCols = ',\n '.join( ['tmid.%s' % col[0].decode() for col in toCols]) ins = """ insert into {0} ( {1} ) select distinct {2} from {3} tmid LEFT JOIN {0} ta ON ta.{4} = tmid.{4} AND ta.AsOfTo = tmid.AsOfTo where ta.MD5 <> tmid.MD5 OR ta.{4} is NULL ; """.format(table_name, ',\n '.join([col[0].decode() for col in toCols]), tmpCols, tmpTbl, toCols[0][0].decode()) psql(ins) inserted = toDB.exec_dml(ins, trans=to_conn, commit=False) pfmtd([dict(Inserted=inserted)]) if 1: toDB.commit_transaction(trans=to_conn) pfmt([[k] + list(v.values())[1:] for k, v in pkstats.items()], [ 'Table', 'Accepted', 'Rejected', 'Line count', 'Skip', 'Diff' ], 'Load completed'.upper()) pfmt([(k, v) for k, v in loaded.items()], ['Loaded Files', 'Loaded Tables']) pfmt([(k, v) for k, v in not_loaded.items()], ['Not loaded Files', 'Not loaded Tables']) assert os.path.isdir(okdir) if 0: cli.MoveSnapFolder(okdir) processed.append(dict(ProcessedFile=okfn)) #break; if not ok_files.file_names: pfmtd([ dict(NoFiles='No OK files at working dir: [ %s ]' % cli.pa[0]) ]) pfmtd(processed) pfmtd(not_processed) if 0: email_args.update(dict(cli_stats=None)) Email.send_email(**email_args) cli.done()