def upload_file(fname, cmd, rmcmd,snow_conn): print('Starting %s' % fname) compress_file(fname) subprocess.call([cmd], shell=True) to_file='%s.gz' % fname s3fn = InOut() s3fn.file_names=[] s3fn.file_names.append(to_file) Snowflake.bulk_copy( trans = snow_conn, file_names = s3fn, target=cli.tcfg, qname = 'copyStmt') os.remove(to_file) subprocess.call([rmcmd], shell=True)
def run(): lite_tbl={} for _source, val in cli.cfg['dump'].items(): cli.set_source(_source) _src_class = list(val.keys())[0] DirReader = create_reader(aname = _src_class, app_init=app_init ) if 1: cli.set_source(_source) dir_scfg = cli.get_dcfg(_src_class) path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg) out_files = InOut(file_names=[]) print path DirReader.glob_dir(path=path, out = out_files, ext='*.out') pp(out_files.file_names) for _trg_class, val in cli.cfg['target'][_source].items(): cli.tcfg = tcfg = cli.get_tcfg(_trg_class) _dbname = tcfg["targetDb"] toDB = create_writer (aname = _trg_class, app_init=app_init ) if 1: toDB.begin_transaction (env =tcfg['targetDb'] , out = lite_conn ) toDB.bulk_insert ( trans = lite_conn, file_names = out_files, qname = 'insertStmt', cfg = (dir_scfg, tcfg), create_table=True, strip_line_term=True) toDB.commit_transaction( trans = lite_conn) lite_tbl[_source] = cli.get_parsed(ckey='targetTable', cfg=tcfg) pp(lite_tbl)
def run(): stats = {} for _source, val in cli.cfg['dump'].items(): cli.set_source(_source) _src_class = val.keys()[0] DirReader = create_reader(aname=_src_class, app_init=app_init) if 1: #Get the file names cli.set_source(_source) dir_scfg = cli.get_dcfg(_src_class) path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg) #DirReader.get_files(path=path, out = data_files ) DirReader.glob_dir(path=path, out=data_files, ext='*.csv') if 1: #Load to DB to_conn = InOut() for _trg_class, val in cli.cfg['target'][_source].items() or []: cli.tcfg = tcfg = cli.get_tcfg(_trg_class) _dbname = tcfg["targetDb"] #pp(data_files.file_names) for data_file in data_files.file_names: dataFile = create_reader(aname='File', app_init=app_init, file_name=data_file, scfg=dir_scfg) dataFile.describe() if 1: toDB = create_writer(aname=_trg_class, app_init=app_init) toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn) toDB.desc_table(schema=tcfg['targetSchema'], tbl=cli.get_parsed(ckey='targetTable', cfg=tcfg), col_ord=False) #e() toDB.bulk_load_file(trans=to_conn, file_names=data_files, qname='insertStmt', cfg=(dir_scfg, tcfg), out=insert_stats) toDB.commit_transaction(trans=to_conn) if 0: stats['Dir->%s' % (_dbname)] = st = OrderedDict() st['source_cnt'] = cli.get_src_row_count( DB) if not cli.lame_duck else cli.lame_duck st['total_extracted'] = insert_stats.inserted_cnt st['total_inserted'] = insert_stats.inserted_cnt if 0: for k, v in stats.items(): assert v['source_cnt'] == v['total_extracted'] assert v['source_cnt'] == v['total_inserted'] if 0: email_args.update(dict(cli_stats=None)) Email.send_email(**email_args)
def run(): lite_tbl = {} for _source, val in cli.cfg['source'].items(): _dbname = val["sourceDb"] DB = create_reader(_dbname, app_init=app_init) FileWriter = create_writer('File', app_init=app_init) #data_files.file_names=[] if 1: cli.set_source(_source) DB.set_loader(FileWriter) total_ins = 0 FileWriter.open_file(out=dump_file) for iq_data in DB.fetch_many(chunk_size=file_size_rows, source=cli.get_scfg(), qname='sourceStmt', out=InOut(), skip_header=0): if not total_ins: FileWriter.create_header(file=dump_file, header=DB.get_header(), cfg=cli.get_dcfg()) FileWriter.append_data(file=dump_file, data=iq_data, cfg=cli.get_dcfg()) total_ins += len(iq_data.data) if not total_ins: #in case there's no data FileWriter.create_header(file=dump_file, header=DB.get_header(), cfg=cli.get_dcfg()) FileWriter.close_file(file=dump_file) if 1: Email.send_email(**email_args)
def fetch_stream(self, chunk_size, source, qname, out, skip_header): assert chunk_size chunk_size = self.cli.lame_duck if self.cli.lame_duck and chunk_size > self.cli.lame_duck else chunk_size assert chunk_size tf = "%Y-%m-%d.%H_%M_%S" current_ts = time.strftime(tf) id = 0 cur = InOut() self.open_stream(source, qname, out=cur) #e() return None
def run(): total_ins = 0 for _source, val in cli.cfg['source'].items(): cli.set_source(_source) _src_class = val.keys()[0] cli.scfg = cli.get_scfg(_src_class) _dbname = cli.scfg["sourceDb"] DB = create_reader(_dbname, app_init=app_init) FileWriter = create_writer('Dir', app_init=app_init) DB.set_loader(FileWriter) if 1: #Extract to Dir for _dmp_class, val in cli.cfg['dump'][_source].items() or []: cli.dcfg = cli.get_dcfg(_dmp_class) pp(cli.dcfg) file_ins_cnt = 0 FileWriter.open_file(out=dump_file) if 0: IQ.open_stream(dbcfg=cli.scfg, qname='sourceStmt', out=IQ_cursor) S3StreamLoader.load_stream(source=IQ_cursor, skip_header=0, out=s3_file_names) for iq_data in DB.fetch_many(chunk_size=file_size_rows, source=cli.scfg, qname='sourceStmt', out=InOut(), skip_header=0): if not file_ins_cnt: FileWriter.create_header(file=dump_file, header=DB.get_header(), cfg=cli.dcfg) FileWriter.append_data(file=dump_file, data=iq_data, cfg=cli.dcfg) file_ins_cnt += len(iq_data.data) if not file_ins_cnt: #in case there's no data FileWriter.create_header(file=dump_file, header=DB.get_header(), cfg=cli.dcfg) FileWriter.close_file(file=dump_file) total_ins += file_ins_cnt log.info('Total records saved: %d' % total_ins) if 1: Email.send_email(**email_args)
def run(): stats = {} for _source, val in cli.cfg['source'].items(): val = cli.cfg['source'][_source] _dbname = val["sourceDb"] DB = create_reader(_dbname, app_init=app_init) if 1: #Load to DB cli.set_source(_source) file_scfg = cli.cfg['dump'][_source] if 1: to_conn = InOut() #file_stats.ins_stats[_dbname]=ins={} for _target, val in cli.cfg['target'][_source].items() or []: tcfg = cli.cfg['target'][_source][_target] _todbname = val["targetDb"] toDB = create_writer(_target, app_init=app_init) rec_delim = '\n' skip_header = 0 #ins[_todbname]=manager.dict() toDB.insert_files(producer=(producer, (cli, _source)), out=file_stats, skip_header=skip_header, rec_delim=rec_delim, cfg=(file_scfg, tcfg), return_dict=return_dict) pp(file_stats.dump_files) extracted_cnt = 0 for fobj in file_stats.dump_files: extracted_cnt += fobj.extracted_cnt print toDB.counter.value() pp(return_dict.values()) stats['%s->%s' % (_dbname, _todbname)] = st = OrderedDict() st['source_cnt'] = insert_stats.source_cnt if not cli.lame_duck else cli.lame_duck st['total_extracted'] = extracted_cnt st['total_inserted'] = toDB.total_ins pp(stats) for k, v in stats.items(): assert v['source_cnt'] == v['total_extracted'] assert v['source_cnt'] == v['total_inserted'] if 1: email_args.update(dict(cli_stats=stats)) Email.send_email(**email_args)
def run(): for _source, val in cli.cfg['dump'].items(): cli.set_source(_source) _src_class = val.keys()[0] DirReader = create_reader(_src_class, app_init=app_init) if 1: #Get the file names cli.set_source(_source) dir_scfg = cli.get_dcfg(_src_class) path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg) DirReader.glob_dir(path=path, out=ok_files, ext='*.ok') for okfn in ok_files.file_names: okdir, _ = os.path.splitext(okfn) assert os.path.isdir(okdir) OkReader = create_reader("Dir", app_init=app_init) DirReader.glob_dir(path=okdir, out=out_files, ext='*.out') pp(out_files.file_names) if 1: for _trg_class, val in cli.cfg['target'][_source].items(): cli.tcfg = tcfg = cli.get_tcfg(_trg_class) _dbname = tcfg["targetDb"] toDB = create_writer(_trg_class, app_init=app_init) to_conn = InOut() for out_fn in out_files.file_names: tbl = os.path.basename(out_fn).split('.')[1] print tbl toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn) toDB.load_file(trans=to_conn, file_name=out_fn, table_name=tbl, qname='insertStmt', cfg=(dir_scfg, tcfg), create_table=True) toDB.commit_transaction(trans=to_conn) if 1: email_args.update(dict(cli_stats=None)) Email.send_email(**email_args) etl.done()
def run(): lite_tbl = {} stats = {} for _source, val in cli.cfg['dump'].items(): cli.set_source(_source) _src_class = val.keys()[0] DirReader = create_reader(_src_class, app_init=app_init) if 1: #Get the file names cli.set_source(_source) dir_scfg = cli.get_dcfg(_src_class) path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg) DirReader.get_files(path=path, out=data_files) if 1: #Load to DB to_conn = InOut() for _trg_class, val in cli.cfg['target'][_source].items() or []: cli.tcfg = tcfg = cli.get_tcfg(_trg_class) _dbname = tcfg["targetDb"] toDB = create_writer(_trg_class, app_init=app_init) toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn) toDB.bulk_load(trans=to_conn, file_names=data_files, qname='insertStmt', cfg=(dir_scfg, tcfg), out=insert_stats) toDB.commit_transaction(trans=to_conn) if 0: stats['Dir->%s' % (_dbname)] = st = OrderedDict() st['source_cnt'] = cli.get_src_row_count( DB) if not cli.lame_duck else cli.lame_duck st['total_extracted'] = insert_stats.inserted_cnt st['total_inserted'] = insert_stats.inserted_cnt if 0: for k, v in stats.items(): assert v['source_cnt'] == v['total_extracted'] assert v['source_cnt'] == v['total_inserted'] if 1: email_args.update(dict(cli_stats=None)) Email.send_email(**email_args)
def run(): total_ins = 0 for _source, val in cli.cfg['source'].items(): cli.set_source(_source) _src_class = val.keys()[0] cli.scfg = cli.get_scfg(_src_class) _dbname = cli.scfg["sourceDb"] fromDB = create_reader(_dbname, app_init=app_init) FileWriter = create_writer('Dir', app_init=app_init) fromDB.set_loader(FileWriter) fromDB.begin_transaction(env=cli.scfg['sourceDb'], out=from_conn) if 1: #Extract to Dir for _dmp_class, val in cli.cfg['dump'][_source].items() or []: cli.dcfg = cli.get_dcfg(_dmp_class) file_ins_cnt = 0 FileWriter.open_file(out=dump_file) for iq_data in fromDB.fetch_many(chunk_size=file_size_rows, source=cli.scfg, qname='sourceStmt', out=InOut(), skip_header=0): if not file_ins_cnt: FileWriter.create_header(file=dump_file, header=fromDB.get_header(), cfg=cli.dcfg) FileWriter.append_data(file=dump_file, data=iq_data, cfg=cli.dcfg) file_ins_cnt += len(iq_data.data) if not file_ins_cnt: #in case there's no data FileWriter.create_header(file=dump_file, header=fromDB.get_header(), cfg=cli.dcfg) FileWriter.close_file(file=dump_file) total_ins += file_ins_cnt fromDB.commit_transaction(trans=from_conn) log.info('Total records saved: %d' % total_ins) if 0: Email.send_email(**email_args)
def run(): SQL.set_loader(FileWriter) total_ins = 0 FileWriter.open_file(out=dump_file) for iq_data in SQL.fetch_many(chunk_size=file_size_rows, source=cli.scfg, qname='sourceStmt', out=InOut(), skip_header=0): FileWriter.append_data(file=dump_file, data=iq_data) total_ins += len(iq_data.data) FileWriter.close_file(file=dump_file) if 0: Email.send_email(**email_args)
def run(): IQ.set_loader(SQLServer) total_ins = 0 for iq_data in IQ.fetch_many(chunk_size=file_size_rows, source=cli.scfg, qname='sourceStmt', out=InOut(), skip_header=0): SQLServer.insert_array(trans=sql_conn, target=cli.tcfg, data=iq_data, stmt='insertStmt') SQLServer.commit_transaction(trans=sql_conn) total_ins += len(iq_data.data) log.info('SQLServer: Inserted:%d' % total_ins) SQLServer.commit_transaction(trans=sql_conn, close_conn=True) if 1: Email.send_email(**email_args)
def run(): IQ.set_loader(FileWriter) total_ins = 0 for _source, val in cli.cfg['source'].items(): cli.set_source(_source) _src_class = val.keys()[0] cli.scfg = cli.get_scfg(_src_class) for _dmp_class, val in cli.cfg['dump'][_source].items() or []: cli.dcfg = cli.get_dcfg(_dmp_class) pp(cli.dcfg) cli.exec_config() FileWriter.open_file(out=dump_file) for iq_data in IQ.fetch_many(chunk_size=file_size_rows, source=cli.scfg, qname='sourceStmt', out=InOut(), skip_header=0): if not total_ins: FileWriter.create_header(file=dump_file, header=IQ.get_header(), cfg=cli.dcfg) FileWriter.append_data(file=dump_file, data=iq_data, cfg=cli.dcfg) total_ins += len(iq_data.data) if not total_ins: #in case there's no data FileWriter.create_header(file=dump_file, header=IQ.get_header(), cfg=cli.dcfg) FileWriter.close_file(file=dump_file) if 0: Email.send_email(**email_args)
""" """ import sys import threading import subprocess from collections import OrderedDict from pprint import pprint as pp from include.utils import create_reader, create_writer, create_actor, InOut e = sys.exit cli, conn_pool = app_init Email = create_actor('Email', app_init=app_init) data_files = InOut() lite_conn = InOut() data_files.file_names = [] dump_file = InOut() file_size_rows = 250000 email_args = {'email_subject': 'Dir->IQ'} insert_stats = InOut(source_cnt=-1, inserted_cnt=-1) def run(): lite_tbl = {} stats = {} for _source, val in cli.cfg['dump'].items():
def producer(cli, _source): val = cli.cfg['source'][_source] _dbname = val["sourceDb"] DB = create_reader(_dbname, app_init=app_init) cnt = cli.get_src_row_count(DB) if not cli.lame_duck: assert cli.dop > 0 cli.src_chunk_size = round(cnt / cli.dop) + 1 else: cli.src_chunk_size = cli.lame_duck FileWriter = create_writer('File', app_init=app_init) data_files.file_names = [] #uploaded_files.file_names=[] #ext_files=[] if 1: cli.set_source(_source) DB.set_loader(FileWriter) total_read = 0 scfg = cli.get_scfg() source_chunk_size = int( float(cli.get_parsed(ckey='sourceChunkSize', cfg=scfg))) cid = 0 skew_pct = int(float(cli.get_parsed(ckey='fileSkewPct', cfg=scfg))) log.debug('Skew percentile = %s' % skew_pct) if skew_pct and cli.dop >= 2: delta = source_chunk_size * (skew_pct / 100.0) num_of_files = cli.dop increment = int(delta / num_of_files) chunk_map = {} accum_skew = sum( [increment * (num_of_files - i) for i in range(num_of_files)]) for i in range(num_of_files): skew = ((cnt - accum_skew) / num_of_files) + increment * (num_of_files - i) chunk_map[i] = skew + 1 if not cli.lame_duck else cli.lame_duck pp(chunk_map) #e() if not cli.lame_duck: assert sum( chunk_map.values() ) >= cnt, 'Chunk map has to cover all source records [%s <> %s]' % ( sum(chunk_map.values()), cnt) #dfiles=[] for iq_data in DB.fetch_many_async(chunk_map=chunk_map, counter=counter, source=scfg, qname='sourceStmt', out=InOut(), skip_header=0): dump_file = InOut(source_cnt=cnt) FileWriter.open_file(id=cid, out=dump_file) if 1: #not total_ins: dump_cfg = cli.get_dcfg() FileWriter.create_header(file=dump_file, header=DB.get_header(), cfg=dump_cfg) FileWriter.append_data(file=dump_file, data=iq_data, cfg=dump_cfg) total_read += len(iq_data.data) FileWriter.close_file(file=dump_file) #ext_files.append(dump_file.fpath) #dfiles.append(dump_file) dump_file.extracted_cnt = total_read yield dump_file cid += 1 else: #lame duck print source_chunk_size #e() assert source_chunk_size for iq_data in DB.fetch_many(chunk_size=source_chunk_size, source=scfg, qname='sourceStmt', out=InOut(), skip_header=0): dump_file = InOut(source_cnt=cnt) FileWriter.open_file(id=cid, out=dump_file) if 1: #not total_ins: dump_cfg = cli.get_dcfg() FileWriter.create_header(file=dump_file, header=DB.get_header(), cfg=dump_cfg) FileWriter.append_data(file=dump_file, data=iq_data, cfg=dump_cfg) total_read += len(iq_data.data) FileWriter.close_file(file=dump_file) #ext_files.append(dump_file.fpath) log.debug('File %d created:file: %d, %d records' % (cid, len(iq_data.data), source_chunk_size)) cid += 1 dump_file.extracted_cnt = total_read yield dump_file log.debug('Done extracting.....')
""" import sys from collections import OrderedDict from multiprocessing import Process, Value, Lock import multiprocessing from pprint import pprint as pp from include.utils import create_reader, create_writer, create_actor, InOut e = sys.exit cli, conn_pool = app_init Email = create_actor('Email', app_init=app_init) Dir = create_reader('Dir', app_init=app_init) file_stats = InOut(dump_files=[], ins_stats={}) data_files = InOut() data_files.file_names = [] #uploaded_files.file_names=[] email_args = {'email_subject': 'DB->file'} class Counter(object): def __init__(self, initval=0): self.val = Value('i', initval) self.cnt = Value('i', initval) self.lock = Lock() def increment(self):
def run(): skip = 2 serviceName = 'gfin' #deleted = {} #loaded = {} #not_loaded = {} #masterTbl = 'gtxMasterPKData' #do_not_delete = ['TxFinancingRateHist', masterTbl] do_not_load = ['TxFinancingRate', 'TxFinancingRateHist'] #'TxFinancingRate', for _source, val in cli.cfg['dump'].items(): cli.set_source(_source) _src_class = list(val.keys())[0] DirReader = create_reader(aname=_src_class, app_init=app_init) if 1: cli.set_source(_source) dir_scfg = cli.get_dcfg(_src_class) path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg) ok_files = InOut(file_names=[]) DirReader.glob_dir(path=path, out=ok_files, ext='*.ok') if 1: for _trg_class, val in cli.cfg['target'][_source].items(): cli.tcfg = tcfg = cli.get_tcfg(_trg_class) _dbname = tcfg["targetDb"] toDB = create_writer(aname=_trg_class, app_init=app_init) masterTabTag = tcfg['masterTableTag'] masterTbl = tcfg['targetTables'][masterTabTag][ 'table_name'] masterTblCol = tcfg['targetTables'][masterTabTag][ 'column_name'] do_not_delete = tcfg['doNotDeleteTables'] + [masterTbl] do_not_load = tcfg['doNotLoadTables'] to_conn = InOut() toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn) to_conn.cur.execute('set search_path to CIGRpt') if ok_files.file_names: # Master first try: stmt = 'drop table %s' % masterTbl to_conn.cur.execute(stmt) except Exception as ex: #raise if not 'Table "%s" does not exist' % masterTbl in str( ex): raise stmt = 'create local temporary table %s ( %s bigint not null, MartModifiedDate timestamp)\n ON COMMIT PRESERVE ROWS' % ( masterTbl, masterTblCol) pfmt([[stmt]], ['Create master temp PK']) to_conn.cur.execute(stmt) #e() stats = {} deleted = {} processed = [] not_processed = [] for okfn in ok_files.file_names: okFile = create_reader(aname='File', app_init=app_init, file_name=okfn, scfg=dir_scfg) okdir, _ = os.path.splitext(okfn) okbn = os.path.basename(okdir) #e() assert os.path.isdir(okdir) snap_df = cli.get_dest_folder(okdir) if os.path.isdir(snap_df): log.warning('[%s]Destination folder exists: [%s]' % (okdir, snap_df)) not_processed.append(okfn) continue OkReader = create_reader(aname="Dir", app_init=app_init) out_files = InOut(file_names=[]) DirReader.glob_dir(path=okdir, out=out_files, ext='*.out') apx = dict(MartModifiedDate=okFile.get_value( coords=(0, 0), skip=skip)) ftlist = [] for out_fn in out_files.file_names: print(out_fn) ftlist.append( os.path.basename(out_fn).split('.')[1]) pfmt([[x] for x in ftlist], ['Files->Tables']) #e() if 1: ctables = cli.tcfg['targetTables'].keys() extra_file_tables = list( set(ftlist) - set(ctables)) pfmt([[x] for x in extra_file_tables], ['Tables not in config.']) extra_config_tables = list( set(ctables) - set(ftlist)) pfmt([[x] for x in extra_config_tables], ['Tables in config but not in file names.']) assert not extra_file_tables, 'Tables %s are not listed in config["targetTables"].' % extra_file_tables if 0: g = raw_input("Continue?") if 1: #//create PK file fromFile = create_reader( aname='File', app_init=app_init, file_name=os.path.join(okdir, 'gfin.Instrument.out'), scfg=dir_scfg) toFile = create_reader(aname='File', app_init=app_init, file_name=os.path.join( okdir, '%s.PK.out' % serviceName), scfg=dir_scfg, parse=False) rowcnt = cli.createPrimaryKeyFile( ffObj=fromFile, pkfn=os.path.join(okdir, '%s.PK.out' % serviceName)) assert masterTabTag in ftlist, '"%s" file is missing' % masterTabTag if 1: stmt = 'TRUNCATE TABLE %s' % (masterTbl) toDB.exec_dml(stmt, trans=to_conn, commit=False) deleted[masterTbl] = -1 #e() #e() loaded = {} not_loaded = {} if 1: pkfn = [ x for x in out_files.file_names if os.path.basename(x).split('.')[1] in [masterTabTag] ][0] schema = tcfg['targetSchema'] outFile = create_reader(aname="File", app_init=app_init, file_name=pkfn, scfg=dir_scfg) fmt_cols = tcfg['targetTables'][masterTabTag].get( 'formatColumns', []) outFile.set_alt_cols() toDB.load_gfin_file(trans=to_conn, file_obj=outFile, schema=schema, table_name=masterTbl, qname='insertStmt', fmt_cols=fmt_cols, cfg=(dir_scfg, tcfg), skip=skip, apx=apx, stats=stats) loaded[out_fn] = masterTbl #e() if 1: stmt = 'SELECT count(*) FROM %s t' % masterTbl pkcnt = toDB.exec_query(stmt).fetchall()[0][0] assert pkcnt == (rowcnt - skip) for out_fn in [ x for x in out_files.file_names if not os.path.basename(x).split('.')[1] in [masterTabTag] ]: outFile = create_reader(aname="File", app_init=app_init, file_name=out_fn, scfg=dir_scfg) outCols = [ col[0] for col in outFile.get_header_cols() ] tbl = os.path.basename(out_fn).split('.')[1] assert tbl if tbl not in [masterTabTag] + do_not_load: if tbl not in do_not_delete: stmt = 'DELETE FROM %s WHERE %s in (SELECT t.%s FROM %s t)' % ( tbl, masterTblCol, masterTblCol, masterTbl) deleted[tbl] = toDB.exec_dml(stmt, trans=to_conn, commit=False) pfmt([[deleted[tbl]]], ['Deleted from %s' % tbl]) else: deleted[tbl] = -1 tblCols = toDB.get_columns(tbl).values() pfmt([[x] for x in list( set(tblCols) - set(outCols) - set(['MartModifiedDate']))], ['Columns in Source, but not Target']) missing_cols = list( set(outCols) - set(tblCols)) pfmt([(tbl, x) for x in missing_cols], ['Table', 'Missing columns']) if missing_cols: to_conn.conn.rollback() schema = tcfg["targetSchema"] toDB.desc_table(schema, tbl) raise Exception( 'File column %s missing in table "%s".' % (missing_cols, tbl)) if 1: schema = tcfg['targetSchema'] fmt_cols = tcfg['targetTables'][tbl].get( 'formatColumns', []) outFile.set_alt_cols() toDB.load_gfin_file(trans=to_conn, file_obj=outFile, schema=schema, table_name=tbl, qname='insertStmt', fmt_cols=fmt_cols, cfg=(dir_scfg, tcfg), skip=skip, apx=apx, stats=stats) loaded[out_fn] = tbl else: not_loaded[out_fn] = tbl else: toDB.commit_transaction(trans=to_conn) #pfmt([[k]+[deleted [k]]+list(v)[1:] for k,v in stats.items() if deleted [k]>=0], ['Table','Deleted', 'Accepted', 'Rejected','Line count','Skip', 'Diff'],'Load completed (deleted)'.upper()) #pfmt([(k,v) for k, v in loaded.items()], ['Loaded Files','Loaded Tables']) #pfmt([(k,v) for k, v in not_loaded.items()], ['Not loaded Files','Not loaded Tables']) pfmt( [[k] + [deleted[k]] + list(v.values())[1:] for k, v in stats.items() if deleted[k] >= 0], [ 'Table', 'Deleted', 'Accepted', 'Rejected', 'Line count', 'Skip', 'Diff' ], 'Load completed/deleted'.upper()) pfmt([(k, v) for k, v in loaded.items()], ['Loaded Files', 'Loaded Tables']) pfmt([(k, v) for k, v in not_loaded.items()], ['Not loaded Files', 'Not loaded Tables']) assert os.path.isdir(okdir) if 0: cli.MoveSnapFolder(okdir) processed.append(okfn) #break; if not ok_files.file_names: counter = itertools.count(1) pfmt([['No OK files at working dir: [ %s ]' % cli.pa[0]]], ['No files']) if processed: counter = itertools.count(1) pfmt([[next(counter), x] for x in processed], ['##', 'Processed']) if not_processed: counter = itertools.count(1) pfmt([[next(counter), x] for x in not_processed], ['##', 'Not processed (backup exists)']) if 0: email_args.update(dict(cli_stats=None)) Email.send_email(**email_args) cli.done()
import threading import subprocess from collections import OrderedDict from pprint import pprint as pp from include.utils import create_reader, create_writer, create_actor, InOut from include.fmt import pfmt e = sys.exit cli, conn_pool = app_init Email = create_actor(aname='Email', app_init=app_init) #ok_files = InOut() lite_conn = InOut() #ok_files.file_names=[] dump_file = InOut() file_size_rows = 250000 email_args = {'email_subject': 'GTX->Vertica'} insert_stats = InOut(source_cnt=-1, inserted_cnt=-1) def run(): skip = 2 serviceName = 'gfin' #deleted = {}
from include.utils import create_reader, create_writer, create_actor, InOut cli, conn_pool = app_init #IQ = create_reader('IQ', app_init=app_init ) #FileWriter = create_writer('File', app_init=app_init ) Email = create_actor('Email', app_init=app_init) IQ_cursor = InOut() s3_file_names = InOut() dump_file = InOut() file_size_rows = 250000 email_args = {'email_subject': 'IQ.procedure->IQ'} from_conn = InOut() def run(): total_ins = 0 for _source, val in cli.cfg['source'].items(): cli.set_source(_source) _src_class = val.keys()[0] cli.scfg = cli.get_scfg(_src_class) _dbname = cli.scfg["sourceDb"] fromDB = create_reader(_dbname, app_init=app_init) FileWriter = create_writer('Dir', app_init=app_init) fromDB.set_loader(FileWriter)
def run(): ext_files = [] for _source, val in cli.cfg['source'].items(): _dbname = val["sourceDb"] DB = create_reader(_dbname, app_init=app_init) FileWriter = create_writer('File', app_init=app_init) data_files.file_names = [] uploaded_files.file_names = [] if 1: cli.set_source(_source) DB.set_loader(FileWriter) total_ins = 0 scfg = cli.get_scfg() source_chunk_size = scfg['sourceChunkSize'] #maxRowsPerFile for cid, iq_data in enumerate( DB.fetch_many(chunk_size=source_chunk_size, source=scfg, qname='sourceStmt', out=InOut(), skip_header=0)): dump_file = InOut() FileWriter.open_file(id=cid, out=dump_file) if 1: #not total_ins: dump_cfg = cli.get_dcfg() FileWriter.create_header(file=dump_file, header=DB.get_header(), cfg=dump_cfg) FileWriter.append_data(file=dump_file, data=iq_data, cfg=dump_cfg) total_ins += len(iq_data.data) FileWriter.close_file(file=dump_file) ext_files.append(dump_file.fpath) #if not total_ins: #in case there's no data # FileWriter.create_header(file = dump_file, header = DB.get_header(), cfg = dump_cfg) pp(ext_files) if 1: #Load to DB cli.set_source(_source) file_scfg = cli.cfg['dump'][_source] path = cli.get_parsed(ckey='dumpDir', cfg=file_scfg) Dir.get_files(path=path, out=data_files) pp(data_files.file_names) if 1: to_conn = InOut() for _target, val in cli.cfg['target'][_source].items() or []: tcfg = cli.cfg['target'][_source][_target] _todbname = val["targetDb"] toDB = create_writer(_target, app_init=app_init) #print toDB #e() #toDB.begin_transaction ( out = to_conn ) rec_delim = '\n' skip_header = 0 #S3.upload_files ( file_names = data_files, out = uploaded_files, skip_header=skip_header, rec_delim=rec_delim) toDB.insert_files(file_names=data_files, out=uploaded_files, skip_header=skip_header, rec_delim=rec_delim, cfg=(file_scfg, tcfg)) #trans = to_conn, file_names = data_files, qname = 'insertStmt', cfg = (file_scfg, tcfg) ) #toDB.commit_transaction ( trans = to_conn) if 0: Email.send_email(**email_args)
cli, conn_pool=app_init import sys from collections import OrderedDict from pprint import pprint as pp from include.utils import create_reader, create_writer, create_actor, InOut from include.fmt import ppe e=sys.exit Email = create_actor (aname ='Email',app_init=app_init ) insert_stats= InOut(inserted_cnt=-1) read_stats= InOut(total_read=-1) email_args={'email_subject':'IQ->REST->IQ'} from_conn = InOut() trans_ids = InOut() rest_pipe = InOut() to_conn = InOut()
import sys from collections import OrderedDict from pprint import pprint as pp from include.utils import create_reader, create_writer, create_actor, InOut e = sys.exit cli, conn_pool = app_init Email = create_actor(aname='Email', app_init=app_init) Dir = create_reader(aname='Dir', app_init=app_init) dump_file = InOut() data_files = InOut() data_files.file_names = [] insert_stats = InOut(inserted_cnt=-1) file_size_rows = 250000 email_args = {'email_subject': 'IQ->file->SQL'} from_conn = InOut() term_line = False def run(): lite_tbl = {} stats = {} for _source, val in cli.cfg['source'].items(): cli.set_source(_source) _src_class = list(val.keys())[0]
def open_stream(self, dbcfg, qname, out): global actors cli = self.cli alt_cols = {} from_cols = {} for id, col in enumerate(cli.scfg["columnMappings"]): from_cols[int(id)] = col['columnName'].upper().encode() if col.get('altColName'): alt_cols[int(id)] = col['columnName'].upper().encode() assert hasattr(self, 'loader'), 'You must call "set_loader" first' if self.loader.cln not in ['Dir']: to_cols = self.loader.get_columns() assert to_cols #pp(to_cols) #e() assert len(from_cols) == len( to_cols ), 'Config vs Target column count mismatch (%d != %d)' % ( len(from_cols), len(to_cols)) miss = 0 for id, col in from_cols.items(): #print (col, to_cols.keys()) assert col in to_cols, 'Config column "%s" does not exists in Target table "%s"' % ( col, cli.tcfg['targetTable']) if not int(id) == int(to_cols[col]): log.error( 'Config column "%s" order is wrong (Config# %d != Target# %d)' % (col, id, to_cols[col])) miss += 1 assert miss == 0 else: to_cols = {} col_map = None #Out = collections.namedtuple('Out','pipe actor col_map') cli = self.cli apx = self.apx mock_file = cli.mf if not self.conn: self.begin_transaction(env=cli.scfg['sourceDb'], out=InOut()) assert self.conn stmt = self.get_query(dbcfg, qname) #pp(stmt) assert stmt from collections import OrderedDict from_cols = OrderedDict() if 1: if mock_file: log.info('%s: Using mock file: %s' % (self.cln, mock_file)) assert os.path.isfile(mock_file) import codecs mfh = codecs.open(mock_file, encoding='latin-1') #mfh=open(mock_file,'rb') if 1: header = mfh.readline().strip().split( str(self.cli.csep.decode())) for id, column in enumerate(header): from_cols[id] = column.encode().upper() to_cols[to_cols] = id #to_cols=from_cols #pp(from_cols) #e() col_map = self.get_col_map(from_cols, to_cols) pipe = FileStreamer(self.cli, fh=mfh) else: pyodbc.pooling = False cur = self.conn.cursor() start_time = time.time() if 1: if 1: log.debug(stmt) cur.execute(stmt) for id, column in enumerate(cur.description): from_cols[id] = column[0].upper().encode() if self.loader.cln in ['Dir']: if id in alt_cols: cname = alt_cols[id] else: cname = column[0].upper().encode() to_cols[cname] = id col_map = self.get_col_map(from_cols, to_cols) pipe = DbStreamer(self.cli, cur=cur, start_time=start_time) with StreamSlicer(cli, pipe, apx, max_rows_to_read=self.cli.max_rows_to_read, col_map=col_map, stmt=stmt) as pipe: out.pipe, out.actor, out.col_map = pipe, self.cln, col_map return out
""" """ import sys from pprint import pprint as pp from include.utils import create_reader, create_writer, create_actor, InOut e = sys.exit cli, conn_pool = app_init Email = create_actor('Email', app_init=app_init) dump_file = InOut() file_size_rows = 250000 email_args = {'email_subject': 'DB->file'} def run(): lite_tbl = {} for _source, val in cli.cfg['source'].items(): _dbname = val["sourceDb"] DB = create_reader(_dbname, app_init=app_init) FileWriter = create_writer('File', app_init=app_init) #data_files.file_names=[] if 1: cli.set_source(_source) DB.set_loader(FileWriter) total_ins = 0 FileWriter.open_file(out=dump_file)
from include.utils import create_reader, create_writer, create_actor, InOut cli, conn_pool=app_init IQ = create_reader('IQ', app_init=app_init ) IQ_Writer = create_writer('IQ', app_init=app_init ) Email = create_actor ('Email',app_init=app_init ) IQ_cursor = InOut() s3_file_names = InOut() snow_conn = InOut() ## ## email_args={'email_subject':'IQ.procedure->IQ'} ## ## def run(): IQ.set_loader(IQ_Writer) IQ.open_stream ( dbcfg = cli.scfg, qname = 'sourceStmt', out=IQ_cursor ) IQ_Writer.begin_transaction ( out = snow_conn ) IQ_Writer.purge_data ( trans = snow_conn, stmt = 'purgeStmt' ) IQ_Writer.bulk_copy ( trans = snow_conn, file_names = s3_file_names, target=cli.tcfg, qname = 'copyStmt', ) IQ_Writer.commit_transaction( trans = snow_conn ) IQ_Writer.delete_files ( file_names=s3_file_names) if 0: Email.send_email ( **email_args )
from include.utils import create_reader, create_writer, create_actor, InOut from include.fmt import pfmtd, psql from collections import OrderedDict cli, conn_pool=app_init Email = create_actor (aname = 'Email', app_init=app_init ) IQ_cursor = InOut() s3_file_names = InOut() dump_file = InOut() file_size_rows=25000 email_args={'email_subject':'IQ.procedure->IQ'} data_files = InOut() data_files.file_names=[] insert_stats= {} file_stats= {} from_conn = InOut() to_conn = InOut() def run(): stats={}
def run(): lite_tbl = {} stats = {} for _source, val in cli.cfg['source'].items(): cli.set_source(_source) _src_class = list(val.keys())[0] cli.scfg = scfg = cli.get_scfg(_src_class) _dbname = cli.scfg["sourceDb"] fromDB = create_reader(aname=_src_class, app_init=app_init) #FileWriter = create_writer(aname ='File', app_init=app_init ) data_files.file_names = [] if 1: cli.set_source(_source) fromDB.begin_transaction(env=cli.scfg['sourceDb'], out=from_conn) for _dmp_class, val in cli.cfg['dump'][_source].items() or []: FileWriter = create_writer(aname=_dmp_class, app_init=app_init) fromDB.set_loader(FileWriter) cli.dcfg = dcfg = cli.get_dcfg(_dmp_class) for _trg_class, val in cli.cfg['target'][_source].items( ) or []: cli.tcfg = tcfg = cli.get_tcfg(_trg_class) file_ins_cnt = 0 total_ins = 0 FileWriter.open_file(out=dump_file) print(dump_file.fpath) if 1: #for iq_data in DB.fetch_many ( chunk_size=file_size_rows, source = cli.get_scfg(), qname = 'sourceStmt', out=InOut(), skip_header=0 ): for iq_data in fromDB.fetch_many( chunk_size=file_size_rows, source=cli.scfg, qname='sourceStmt', out=InOut(), skip_header=0, terminate_line=term_line): if not file_ins_cnt: FileWriter.create_header( file=dump_file, header=fromDB.get_header(), cfg=cli.dcfg, terminate_line=term_line) FileWriter.append_data(file=dump_file, data=iq_data, cfg=cli.dcfg) file_ins_cnt += len(iq_data.data) if not file_ins_cnt: #in case there's no data FileWriter.create_header( file=dump_file, header=fromDB.get_header(), cfg=cli.dcfg, terminate_line=term_line) FileWriter.close_file(file=dump_file) total_ins += file_ins_cnt if 1: #//check if there's data in a file dataFile = create_reader(aname='File', app_init=app_init, file_name=dump_file.fpath, scfg=dcfg) dataFile.describe() lcnt = dataFile.line_count() - cli.header_size(dcfg) assert lcnt, 'Dump file is empty\n%s' % dump_file.fpath #e() if 1: #Load to DB cli.set_source(_source) dir_scfg = cli.get_dcfg(_dmp_class) path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg) DirReader = create_reader(aname=_dmp_class, app_init=app_init) DirReader.glob_dir(path=path, out=data_files, ext='*.*') if 1: to_conn = InOut() _todbname = tcfg["targetDb"] toDB = create_writer(aname=_todbname, app_init=app_init) toDB.begin_transaction(env=cli.scfg['sourceDb'], out=to_conn) #toDB.begin_transaction ( out = to_conn ) toDB.bulk_insert(trans=to_conn, file_names=data_files, qname='insertStmt', cfg=(dir_scfg, tcfg), out=insert_stats) toDB.commit_transaction(trans=to_conn) if 1: FileWriter.delete_dump(data_files) stats['%s->%s' % (_dbname, _todbname)] = st = OrderedDict() st['source_cnt'] = cli.get_src_row_count( DB) if not cli.lame_duck else cli.lame_duck st['total_extracted'] = total_ins st['total_inserted'] = insert_stats.inserted_cnt if 1: for k, v in stats.items(): assert v['source_cnt'] == v['total_extracted'], " %s <> %s" % ( v['source_cnt'], v['total_extracted']) assert v['source_cnt'] == v['total_inserted'] if 1: email_args.update(dict(cli_stats=stats)) Email.send_email(**email_args)
def run(): stats={} total_ins = 0 term_line = True #//validate cols for _source, val in cli.cfg['source'].items(): cli.set_source(_source) _src_class = list(val.keys())[0] cli.scfg= scfg=cli.get_scfg(_src_class) for _trg_class, val in cli.cfg['target'][_source].items() or []: cli.tcfg= tcfg = cli.get_tcfg(_trg_class) if tcfg.get('accountingDate', None): #//set acct_year, acct_mon for new target table naming fmt=cli.get_parsed(ckey='accountingDateFmt', cfg=tcfg) cli.set_target_table(tcfg=tcfg, acct_date=cli.get_parsed(ckey='accountingDate', cfg=tcfg), fmt=fmt) _dbname = tcfg["targetDb"] toDB = create_writer (aname =_trg_class, app_init=app_init ) toDB.begin_transaction ( env =tcfg['targetDb'] , out = to_conn ) table='%s.%s' % (tcfg['targetSchema'], tcfg['targetTable']) toDB.desc_table(schema=tcfg['targetSchema'], tbl=tcfg['targetTable'], col_ord=False) #// validate cols cfg_cols=[x[u'columnName'] for x in cli.scfg[u'columnMappings']] tcols=toDB.get_cols() t_vs_c = set(tcols) -set(cfg_cols) c_vs_t = set(cfg_cols) -set(tcols) if t_vs_c: pfmtd([dict(c_vs_t = c_vs_t)], 'Config has columns missing in target table.') raise Exception('Target table has columns missing in config: %s' % t_vs_c) if c_vs_t: pfmtd([dict(t_vs_c = t_vs_c)], 'Target table has columns missing in config.') raise Exception('Config has columns missing in target table: %s' % c_vs_t) toDB.commit_transaction ( trans = to_conn) #//transfer for _source, val in cli.cfg['source'].items(): cli.set_source(_source) _src_class = list(val.keys())[0] cli.scfg= scfg=cli.get_scfg(_src_class) _dbname=cli.scfg["sourceDb"] #// in include/extractor fromDB = create_reader(aname = _src_class, app_init=app_init ) fromDB.begin_transaction ( env =cli.scfg['sourceDb'] , out = from_conn ) if 1: #//Extract to File for _dmp_class, val in cli.cfg['dump'][_source].items() or []: FileWriter = create_writer(aname =_dmp_class, app_init=app_init ) fromDB.set_loader(FileWriter) cli.dcfg= cli.get_dcfg(_dmp_class) for _trg_class, val in cli.cfg['target'][_source].items() or []: cli.tcfg= tcfg = cli.get_tcfg(_trg_class) file_ins_cnt= 0 FileWriter.open_file( out = dump_file ) start_time = time.time() # //if fetch_many is not in IQ - it's in include/extractor/common/Extractor.py for iq_data in fromDB.fetch_many ( chunk_size=file_size_rows, source = cli.scfg, qname = 'sourceStmt', out=InOut(), skip_header=0, terminate_line= term_line): if 1: if not file_ins_cnt: FileWriter.create_header(file = dump_file, header = fromDB.get_header(), cfg=cli.dcfg, terminate_line= term_line) FileWriter.append_data ( file = dump_file, data = iq_data, cfg=cli.dcfg) file_ins_cnt+=len(iq_data.data) FileWriter.terminate(file = dump_file) print (len(iq_data.data)) print ('Elapsed read/write: %s' % (time.time() - start_time)) start_time = time.time() if not file_ins_cnt: #in case there's no data FileWriter.create_header(file = dump_file, header = fromDB.get_header(), cfg=cli.dcfg, terminate_line= term_line) #else: # FileWriter.terminate(file = dump_file) FileWriter.close_file(file = dump_file) total_ins +=file_ins_cnt fromDB.desc_cur(cur = from_conn.cur, colord=False) fromDB.commit_transaction ( trans = from_conn) log.info('Total records saved: %d' % total_ins) #// Load to IQ for _source, val in cli.cfg['dump'].items(): cli.set_source(_source) _src_class = list(val.keys())[0] DirReader = create_reader(aname = _src_class, app_init=app_init ) if 1: #//Get the file names cli.set_source(_source) dir_scfg = cli.get_dcfg(_src_class) path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg) DirReader.glob_dir(path=path, out = data_files, ext='*.*') if 1: #//Load to DB for _trg_class, val in cli.cfg['target'][_source].items() or []: cli.tcfg= tcfg = cli.get_tcfg(_trg_class) _dbname = tcfg["targetDb"] toDB = create_writer (aname =_trg_class, app_init=app_init ) toDB.begin_transaction ( env =tcfg['targetDb'] , out = to_conn ) table='%s.%s' % (tcfg['targetSchema'], tcfg['targetTable']) toDB.desc_table(schema=tcfg['targetSchema'], tbl=tcfg['targetTable'], col_ord=False) #// validate cols cfg_cols=[x[u'columnName'] for x in cli.scfg[u'columnMappings']] acols= cli.get_alt_cols(scfg) tcols=toDB.get_cols() fcols_alt=[] for data_file in data_files.file_names: dataFile = create_reader(aname = 'File', app_init=app_init, file_name=data_file, scfg=dir_scfg) dataFile.describe() file_stats[data_file] = dataFile.line_count() - cli.header_size(dir_scfg) fcols_alt=[acols.get(x.decode(),x.decode()) for x in dataFile.get_header(data_file, dir_scfg)] f_vs_c = set(fcols_alt) -set(cfg_cols) c_vs_f = set(cfg_cols) -set(fcols_alt) f_vs_t = set(fcols_alt) -set(tcols) t_vs_f = set(tcols) -set(fcols_alt) if f_vs_c: pfmtd([dict(c_vs_f = c_vs_f)], 'Config has columns missing in dump file.') pfmtd([dict(f_vs_t = f_vs_t)], 'Dump file has columns missing in target table.') pfmtd([dict(t_vs_f = t_vs_f)], 'Target table has columns missing in dump file.') raise Exception('Target table has columns missing in config: %s' % f_vs_c) if c_vs_f: pfmtd([dict(f_vs_c = f_vs_c)], 'Dump file has columns missing in config.') pfmtd([dict(f_vs_t = f_vs_t)], 'Dump file has columns missing in target table.') pfmtd([dict(t_vs_f = t_vs_f)], 'Target table has columns missing in dump file.') raise Exception('Config has columns missing in target table: %s' % c_vs_f) if f_vs_t: pfmtd([dict(f_vs_c = f_vs_c)], 'Dump file has columns missing in config.') pfmtd([dict(c_vs_f = c_vs_f)], 'Config has columns missing in dump file.') pfmtd([dict(t_vs_f = t_vs_f)], 'Target table has columns missing in dump file.') raise Exception('Dump file has columns missing in target table: %s' % f_vs_t) if t_vs_f: pfmtd([dict(f_vs_c = f_vs_c)], 'Dump file has columns missing in config.') pfmtd([dict(c_vs_f = c_vs_f)], 'Config has columns missing in dump file.') pfmtd([dict(f_vs_t = f_vs_t)], 'Dump file has columns missing in target table.') raise Exception('Target table has columns missing in dump file: %s' % t_vs_f) #toDB.truncate_table ( table = table ) toDB.bulk_load ( trans = to_conn, file_names = data_files, qname = 'insertStmt', cfg = (dir_scfg, tcfg), out=insert_stats, header=fcols_alt) toDB.commit_transaction ( trans = to_conn) for k in file_stats.keys(): assert file_stats[k], 'Dump file is empty' assert insert_stats[k] not in [-1], 'Insert failed' assert insert_stats[k] == file_stats[k], 'Insert vs file count diff: %s<>%s for file \n%s' % (insert_stats[k] , file_stats[k], k) if 1: stmt = cli.get_parsed(ckey='afterCountStmt', cfg=tcfg) cur = toDB.exec_query(stmt) after_cnt= cur.fetchall()[0][0] print(after_cnt) stats['%s->%s' % (_source, _trg_class)] =st= OrderedDict() st['source_cnt'] = total_ins st['total_inserted'] = sum(insert_stats.values()) st['after_count'] = after_cnt st['rollback'] = cli.get_parsed(ckey='rollbackStmt', cfg=tcfg) st['purge'] = cli.get_parsed(ckey='purgeStmt', cfg=tcfg) if 1: #//validate try: assert st['source_cnt'] == st['total_inserted'], "source_cnt %s <> total_inserted %s" % ( st['source_cnt'], st['total_inserted']) assert st['source_cnt'] == st['after_count'] , "source_cnt %s <> after_count %s" % ( st['source_cnt'], st['after_count']) except Exception as ex: del_cnt = toDB.exec_dml( dml=st['rollback'], trans=to_conn, commit=True) log.info('Rolled back recs: %d' % del_cnt) raise if 1: #//purge purge_cnt = toDB.exec_dml( dml=st['purge'], trans=to_conn, commit=True) log.info('Purged old recs: %d' % purge_cnt) toDB.commit_transaction( trans = to_conn ) if 0: Email.send_email( **email_args )
""" """ import sys import threading import subprocess from pprint import pprint as pp from include.utils import create_reader, create_writer, create_actor, InOut e = sys.exit cli, conn_pool = app_init Dir = create_reader('Dir', app_init=app_init) SQLite = create_writer('SQLite', app_init=app_init) Email = create_actor('Email', app_init=app_init) data_files = InOut() lite_conn = InOut() ## ## email_args = {'email_subject': 'File->SQLite'} ## ## data_files.file_names = [] def run(): Dir.get_files(out=data_files)
""" time python cli.py -nopp 1 -dcf config/db_config.json -pcf config/proc/file_s3_snow/DY_Position_SD.json --proc_params \ test.csv.gz -ld 100\ 2>&1| tee DY_Position_SD.log """ import threading import subprocess from include.utils import create_reader, create_writer, create_actor, InOut cli, conn_pool = app_init Snowflake = create_writer('Snowflake', app_init=app_init) S3 = create_writer('S3', app_init=app_init) Email = create_actor('Email', app_init=app_init) s3_file_names = InOut() snow_conn = InOut() ## ## email_args = {'email_subject': 'File->Snowflake'} ## ## threads = {} s3_file_names.file_names = [] def run(): Snowflake.begin_transaction(out=snow_conn) assert isinstance(