def get_log_stats(self, ll): stats = {} regs = [ ('total_elapsed', 'Cli: Total elapsed: (?P<total_elapsed>[0-9., ]+sec/[0-9., ]+min)' ), ('env', 'Env: \[(?P<env>[A-Z]+)\]'), ('table', ', To-Table:(?P<table>[a-zA-Z_]+)'), ('schema', ', To-Schema:(?P<schema>[a-zA-Z_\.]+)'), ('cli_elapsed', 'Cli: Total elapsed: (?P<cli_elapsed>[0-9., ]+sec/[0-9., ]+min)'), ('sql_ins', 'SQLServer: Read:(?P<sql_ins>[0-9,.]+),'), ('dump_fn', 'Dump file: (?P<dump_fn>[a-zA-Z_\.\/0-9,.]+)'), ] logdata = os.linesep.join(ll) for reg in regs: group, regexp = reg m = re.search(regexp, logdata) try: stats[group] = m.group(group) except: stats[group] = 'n/a' #print stats headers = ['env'] data = [[stats.get(h) for h in headers]] ptitle = 'IQ' iq_stats = get_formatted(ptitle, data, headers, join=True) print iq_stats #print stats headers = ['env', 'dump_fn'] data = [[stats.get(h) for h in headers]] ptitle = 'Dump file' snow_stats = get_formatted(ptitle, data, headers, join=True) print snow_stats headers = ['env', 'sql_ins'] data = [[stats.get(h) for h in headers]] ptitle = 'SQL Server' snow_stats = get_formatted(ptitle, data, headers, join=True) print snow_stats #pp(cli.scfg) #e() html_body = """ Parameters provided: %s<br> Sources: %s <br> Started On: %s<br> Ended On: %s<br> Records Added: %s<br> Logs Path: %s<br> """ % (sys.argv, cli.cfg['source'].keys(), cli.asod, time.strftime("%Y-%m-%d %H:%M:%S"), stats['sql_ins'], log.file_name) out = {} return html_body, out
def get_log_stats(self, ll): stats = {} regs = [ ('total_elapsed', 'Cli: Total elapsed: (?P<total_elapsed>[0-9., ]+sec/[0-9., ]+min)' ), ('env', 'Env: \[(?P<env>[A-Z]+)\]'), ('table', ', To-Table:(?P<table>[a-zA-Z_]+)'), ('schema', ', To-Schema:(?P<schema>[a-zA-Z_\.]+)'), ('cli_elapsed', 'Cli: Total elapsed: (?P<cli_elapsed>[0-9., ]+sec/[0-9., ]+min)'), ('sql_ins', 'SQLServer: Inserted:(?P<sql_ins>[0-9,.]+)'), ] logdata = os.linesep.join(ll) for reg in regs: group, regexp = reg m = re.search(regexp, logdata) try: stats[group] = m.group(group) except: stats[group] = 'n/a' #print stats headers = ['env'] data = [[stats.get(h) for h in headers]] ptitle = 'SQLServer' iq_stats = get_formatted(ptitle, data, headers, join=True) print iq_stats #print stats headers = ['env', 'schema', 'table', 'sql_ins'] data = [[stats.get(h) for h in headers]] ptitle = 'File' snow_stats = get_formatted(ptitle, data, headers, join=True) print snow_stats html_body = """ Parameters provided: %s<br> Source: %s - %s<br> Target: %s - %s<br> Filter: %s<br> Started On: %s<br> Ended On: %s<br> Records Added: %s<br> Logs Path: %s<br> """ % (sys.argv, cli.scfg["sourceDb"], cli.pa[0], cli.tcfg["targetDb"], cli.pa[2], cli.pa[1], cli.asod, time.strftime("%Y-%m-%d %H:%M:%S"), stats['sql_ins'], log.file_name) return html_body, {'SQLServer': iq_stats, 'File': snow_stats}
def show_row(self, data, file_cols): if 1: from include.fmt import get_formatted cols = [] for i, col in enumerate(file_cols): cols.append(col[0]) print get_formatted( '', [ [j, cols[j], file_cols[j][1], d, type(d)] for j, d in enumerate( data[0]) #if file_obj.cols[j][1] in \ #['timestamp','integer', 'date', 'double', 'tinyint','bigint','varchar(255)','varchar(32)','varchar(64)'] #['binary(16)'] ], ['id', 'col', 'type', 'data', 'dtype'], join=True)
def show_row(self, data, file_cols): if 1: from include.fmt import get_formatted cols = [] for i, col in enumerate(file_cols): cols.append(col[0]) print( get_formatted('', [[j, cols[j], file_cols[j][1], d, type(d)] for j, d in enumerate(data[0])], ['id', 'col', 'type', 'data', 'dtype'], join=True))
def get_log_stats(self, ll): stats = {} regs = [ ('first_row', 'First row elapsed: (?P<first_row>[0-9., ]+sec/[0-9., ]+min)'), ('s3_loaded', 'S3: Loaded:(?P<s3_loaded>[0-9,]+), '), ('s3_read', 'S3: Loaded:[0-9,]+, Read:(?P<s3_read>[0-9,]+), '), ('s3_raw', 'S3: Total:(?P<s3_raw>[0-9., A-Z]+),'), ('snow_del', 'Records deleted: (?P<snow_del>[0-9]+)'), ('snow_ins', 'Snowflake: Inserted:(?P<snow_ins>[0-9,.]+),'), ('s3_files', ', Files:(?P<s3_files>[0-9]+)'), ('s3_compressed', 'Compressed \(gz\)\:(?P<s3_compressed>[0-9., A-Z]+)'), ('total_elapsed', 'Cli: Total elapsed: (?P<total_elapsed>[0-9., ]+sec/[0-9., ]+min)' ), ('env', 'Env: \[(?P<env>[A-Z]+)\]'), ('table', ', To-Table:(?P<table>[a-zA-Z_]+)'), ('schema', ', To-Schema:(?P<schema>[a-zA-Z_\.]+)'), ('cli_elapsed', 'Cli: Total elapsed: (?P<cli_elapsed>[0-9., ]+sec/[0-9., ]+min)'), ('proc', 'Procedure: [a-zA-Z]+\.(?P<proc>[0-9a-zA-Z]+_WRAPPER) '), ('client', 'ClientId: (?P<client>[0-9]+)'), ('asof_dt', 'AsOfDate: (?P<asof_dt>[0-9/]+)') ] logdata = os.linesep.join(ll) for reg in regs: group, regexp = reg m = re.search(regexp, logdata) try: stats[group] = m.group(group) except: stats[group] = 'n/a' #print stats headers = [ 'env', 'proc', 'client', 'asof_dt', 'first_row' ] #,'s3_files','s3_read','s3_loaded','s3_raw','s3_compressed','snow_del', 'snow_ins','total_elapsed'] data = [[stats.get(h) for h in headers]] ptitle = 'IQ' iq_stats = get_formatted(ptitle, data, headers, join=True) print iq_stats #print stats headers = [ 'env', 's3_files', 's3_read', 's3_loaded', 's3_raw', 's3_compressed' ] data = [[stats.get(h) for h in headers]] ptitle = 'S3' s3_stats = get_formatted(ptitle, data, headers, join=True) print s3_stats #print stats headers = ['env', 'schema', 'table', 'snow_del', 'snow_ins'] data = [[stats.get(h) for h in headers]] ptitle = 'Snowflake' snow_stats = get_formatted(ptitle, data, headers, join=True) print snow_stats html_body = """ Parameters provided: %s<br> Source: %s - %s<br> Target: %s - %s<br> Started On: %s<br> Ended On: %s<br> Records Added: %s<br> Logs Path: %s<br> """ % (sys.argv, cli.scfg["sourceDb"], cli.scfg["procName"], cli.tcfg["targetDb"], cli.tcfg["targetTable"], cli.asod, time.strftime("%Y-%m-%d %H:%M:%S"), stats['snow_ins'], log.file_name) return html_body, { 'IQ': iq_stats, 'S3': s3_stats, 'Snowflake': snow_stats }
def get_log_stats(self, ll): stats = {} regs = [ ('first_row', 'First row elapsed: (?P<first_row>[0-9., ]+sec/[0-9., ]+min)'), ('s3_loaded', 'S3: Loaded:(?P<s3_loaded>[0-9,]+), '), ('s3_read', 'S3: Loaded:[0-9,]+, Read:(?P<s3_read>[0-9,]+), '), ('s3_raw', 'S3: Total:(?P<s3_raw>[0-9., A-Z]+),'), ('snow_del', 'Records deleted: (?P<snow_del>[0-9]+)'), ('snow_ins', 'Snowflake: Inserted:(?P<snow_ins>[0-9,.]+),'), ('s3_files', ', Files:(?P<s3_files>[0-9]+)'), ('s3_compressed', 'Compressed \(gz\)\:(?P<s3_compressed>[0-9., A-Z]+)'), ('total_elapsed', 'Cli: Total elapsed: (?P<total_elapsed>[0-9., ]+sec/[0-9., ]+min)' ), ('env', 'Env: \[(?P<env>[A-Z]+)\]'), ('table', ', To-Table:(?P<table>[a-zA-Z_]+)'), ('schema', ', To-Schema:(?P<schema>[a-zA-Z_\.]+)'), ('cli_elapsed', 'Cli: Total elapsed: (?P<cli_elapsed>[0-9., ]+sec/[0-9., ]+min)'), ('proc', 'Procedure: [a-zA-Z]+\.(?P<proc>[0-9a-zA-Z]+_WRAPPER) '), ('client', 'ClientId: (?P<client>[0-9]+)'), ('asof_dt', 'AsOfDate: (?P<asof_dt>[0-9/]+)') ] #pp(ll) logdata = os.linesep.join(ll) #pp(logdata) for reg in regs: group, regexp = reg m = re.search(regexp, logdata) try: stats[group] = m.group(group) except: stats[group] = 'n/a' #print stats headers = ['env', 'proc', 'client', 'asof_dt', 'first_row'] data = [[stats.get(h) for h in headers]] ptitle = 'SQLServer' iq_stats = get_formatted(ptitle, data, headers, join=True) print iq_stats #print stats headers = [ 'env', 's3_files', 's3_read', 's3_loaded', 's3_raw', 's3_compressed' ] data = [[stats.get(h) for h in headers]] ptitle = 'S3' s3_stats = get_formatted(ptitle, data, headers, join=True) print s3_stats #print stats headers = ['env', 'schema', 'table', 'snow_del', 'snow_ins'] data = [[stats.get(h) for h in headers]] ptitle = 'SnowFlake' snow_stats = get_formatted(ptitle, data, headers, join=True) print snow_stats if 0: #print stats headers = ['cli_elapsed'] data = [[stats.get(h) for h in headers]] ptitle = 'Cli' cli_stats = get_formatted(ptitle, data, headers, join=True) return iq_stats, s3_stats, snow_stats
def load_file_2(self, trans, file_obj, table_name, qname, cfg, create_table=False): scfg, tcfg = cfg file_name = file_obj.file_name #pp(file_obj.cols) if 1: assert os.path.isfile(file_name) with open(file_name, 'r') as fh: colsep = scfg['columnDelimiter'] assert colsep if create_table: self.create_table(fh, cfg, table_name) else: fh.readline() fh.readline() data = [] intdata = [[]] intcols = [] for line in [x.strip() for x in fh]: #print line #data.append([x for x in line.split(colsep)[:-1]]) data.append([ x if x else None for i, x in enumerate(line.split(colsep)[:-1]) ]) for i, x in enumerate(line.split(colsep)[:-1]): if file_obj.cols[i][1] in ['binary(16)']: intdata[0].append(x if x else None) intcols.append(file_obj.cols[i]) #break #print (intdata) #pp(data) #self.insert_data_byrow(trans, table_name, data, cfg, file_obj.cols) if 1: cols = ','.join([col[0] for col in file_obj.cols]) assert len(file_obj.cols) == len(data[0]) trans.cur.fast_executemany = True #trans.cur.fast_executemany = False qmarks = '?,' * len(file_obj.cols) #data[0][1]='test' #data[0][21]='0' #import uuid #intdata[0][0]= '12957e5c1a49d09d30c2469eb1c60400' #bytearray(b'12957e5c1a49d09d30c2469eb1') #u'00000000-0000-0000-0000-000000000000' #unicode(uuid.uuid4()) #pp(data[0]) if 1: tmpTbl = 'tmp_%s' % table_name stmt = 'CREATE LOCAL TEMPORARY TABLE %s AS SELECT * FROM %s WHERE 1=2' % ( tmpTbl, table_name) print(stmt) trans.cur.execute(stmt) #e() #print 777, len(intdata) assert len(intdata[0]) == len(intcols) #pp(intcols) #pp(intdata) #e() if 0: intTbl = 'int_%s' % table_name stmt = 'CREATE LOCAL TEMPORARY TABLE %s AS SELECT * FROM %s WHERE 1=2' % ( intTbl, table_name) print(stmt) trans.cur.execute(stmt) #e() #e() qmarks = ',\n'.join([ 'HEX_TO_BINARY(?)' if col[0] in ['TxMasterGUID', 'SwapEventGUID'] else '?' for col in file_obj.cols ]) stmt = 'INSERT INTO %s (%s) values(%s)' % ( tmpTbl, ',\n'.join([col[0] for col in file_obj.cols ]), qmarks.strip(',')) #print stmt pfmt([[stmt]]) #e() trans.conn.autocommit = False if 0: from include.fmt import get_formatted cols = [] for i, col in enumerate(file_obj.cols): cols.append(col[0]) print get_formatted( '', [ [j, cols[j], file_obj.cols[j][1], d, type(d)] for j, d in enumerate( intdata[0]) #if file_obj.cols[j][1] in \ #['timestamp','integer', 'date', 'double', 'tinyint','bigint','varchar(255)','varchar(32)','varchar(64)'] #['binary(16)'] ], ['id', 'col', 'type', 'data', 'dtype'], join=True) #self.show_row(data, file_obj.cols) #e() #trans.cur.executemany(stmt, [data[0]] ) #pp(trans.cur.fetchall()) if 0: stmt = "COPY %s FROM LOCAL '/home/s_dev_rdm/ab_gtx/iris.csv' DELIMITER '|'" % tmpTbl stmt = "CREATE LOCAL TEMPORARY TABLE tmp123 (%s)" % ',\n'.join( ['%s varchar(100)' % col[0] for col in file_obj.cols]) stmt = """ CREATE LOCAL TEMPORARY TABLE tmp123 (TxMasterID varchar(100), TxMasterGUID binary(16), TxDetailVersion varchar(100), TxType varchar(100), TxTypeCode varchar(100), TxSubType varchar(100), TradeDate varchar(100), SettleDate varchar(100), DelayedDeliveryDate varchar(100), UserName varchar(100), Division varchar(100), TraderID varchar(100), TraderName varchar(100), Note varchar(100), FOTradeID varchar(100), ValidFromTime varchar(100), ValidToTime varchar(100), ParentTxMasterID varchar(100), IsNotionalSettlement varchar(100), ClearingQueueTxStatusTypeCode varchar(100), PositionQueueTxStatusTypeCode varchar(100), SwapEventGUID binary(16), Desk varchar(100), DeskCode varchar(100), ClearingMethod varchar(100), ClearingMethodID varchar(100), ClearingMethodDescription varchar(100), Price varchar(100), UnitAmount varchar(100), ExecutingAccount varchar(100), ExecutingAccountID varchar(100), ClearingAccount varchar(100), ClearingAccountID varchar(100), SubAccount varchar(100), SubAccountCode varchar(100), CashSubAccount varchar(100), CashSubAccountCode varchar(100), NetSettlement varchar(100), CPartyNetSettlement varchar(100), GrossAmount varchar(100), SettlementNetID varchar(100), SettlementAdjustedDate varchar(100), SettlementAmount varchar(100), SettlementCurrencyID varchar(100), SettlementCurrency varchar(100), SettlementCounterpartyRef varchar(100), SettlementOTCStatusCode varchar(100), SettlementOTCStatus varchar(100), SettlementStatusCode varchar(100), SettlementStatus varchar(100), SettlementMatchStatusCode varchar(100), SettlementMatchStatus varchar(100), SettlementTypeCode varchar(100), SettlementType varchar(100), SettlementFlagCode varchar(100), SettlementFlag varchar(100), SettlementModifiedBy varchar(100), SettlementModifiedDateTime varchar(100), ClientID varchar(100), GroupID varchar(100), GrossNotionalAmount varchar(100), NetNotionalSecurityQty varchar(100), Quantity varchar(100), CashQuantity varchar(100), TradeCurrencyID varchar(100), ValuationCurrencyID varchar(100), AccruedInterest varchar(100), IsShortSale varchar(100), AccrualStatus varchar(100), AccrualStatusCode varchar(100), PrimaryInstrumentID varchar(100), SwapCollectionID varchar(100), EditSourceCode varchar(100), EditSource varchar(100), EditReasonCode varchar(100), EditReason varchar(100), ExecutionTime varchar(100), CurrentFace varchar(100), TradingPlaceID varchar(100), ExecutingType varchar(100), ClearingType varchar(100), ActingAs varchar(100), FuturesTradeType varchar(100), FuturesTradeTypeCode varchar(100), TBASettlementType varchar(100), TBASettlementTypeCode varchar(100), CapitalMarketType varchar(100), CapitalMarketTypeCode varchar(100), IsDeltaAdjust varchar(100), IsCorpAct varchar(100), FxRateIsMarketConvention varchar(100), OpeningOrClosing varchar(100), SwapID varchar(100), DividendInterestLiabilityRate varchar(100), DividendInterestWithholdingRate varchar(100), DividendInterestWithholdingAmount varchar(100), DividendStatus varchar(100), DividendStatusCode varchar(100), DeclaredDividendRate varchar(100), GrossDividendRate varchar(100), ReversedTxMasterID varchar(100), ReversalTxMasterID varchar(100), RebookedTxMasterID varchar(100), HasDividendReinvestmentOption varchar(100), GlobalDividendType varchar(100), GlobalDividendTypeCode varchar(100), CorporateActionID varchar(100), HasPIKOption varchar(100), FxRate varchar(100), OtherAmount varchar(100), ExDate varchar(100), RecordDate varchar(100), FrankedPercent varchar(100), IssuingCountry varchar(100), IssuingCountryID varchar(100), ConversionType varchar(100), ConversionTypeCode varchar(100), LotID varchar(100), ExternalTxIdentifierTypeKey varchar(100), IsOptDiv varchar(100), CAEventTypeID varchar(100)) """ trans.cur.execute(stmt) pfmt([[stmt]]) copyfmt = ',\n'.join([ "%s FORMAT 'hex'" % col[0] if col[0] in ['TxMasterGUID', 'SwapEventGUID'] else "%s" % col[0] for col in file_obj.cols ]) stmt = "COPY %s (%s) FROM LOCAL'/home/s_dev_rdm/ab_gtx/iris.csv' DELIMITER '|'" % ( tmpTbl, copyfmt) pfmt([[stmt]]) #e() trans.cur.execute(stmt) print 555, trans.cur.rowcount print 88888888, table_name, trans.cur.execute( 'select count(*) from %s' % tmpTbl).fetchall() if 1: import binascii for row in trans.cur.execute( 'select TxMasterGUID,SwapEventGUID, ExternalTxIdentifierTypeKey, IsOptDiv , CAEventTypeID from %s' % tmpTbl).fetchall(): print row print binascii.hexlify(row[0])
def pfmt(data, header=['Col_1'], title=''): print get_formatted(title, data, header, join=True)
def get_log_stats(self, ll): cli = self stats = {} regs = [ ('total_elapsed', 'Cli: Total elapsed: (?P<total_elapsed>[0-9., ]+sec/[0-9., ]+min)' ), ('env', 'Env: \[(?P<env>[A-Z]+)\]'), ('table', ', To-Table:(?P<table>[a-zA-Z_]+)'), ('schema', ', To-Schema:(?P<schema>[a-zA-Z_\.]+)'), ('cli_elapsed', 'Cli: Total elapsed: (?P<cli_elapsed>[0-9., ]+sec/[0-9., ]+min)'), ('sql_ins', 'SQLServer: Inserted:(?P<sql_ins>[0-9,.]+)'), ] logdata = os.linesep.join(ll) for reg in regs: group, regexp = reg m = re.search(regexp, logdata) try: stats[group] = m.group(group) except: stats[group] = 'n/a' #print stats headers = ['env'] data = [[stats.get(h) for h in headers]] ptitle = 'IQ' iq_stats = get_formatted(ptitle, data, headers, join=True) #print iq_stats #print stats headers = ['env', 'schema', 'table', 'sql_ins'] data = [[stats.get(h) for h in headers]] ptitle = 'SQLServer' snow_stats = get_formatted(ptitle, data, headers, join=True) #print snow_stats #pp(cli.scfg) #e() html_body = """ Parameters provided: %s<br> Sources: %s <br> Started On: %s<br> Ended On: %s<br> Records Added: %s<br> Logs Path: %s<br> """ % (sys.argv, cli.tear['source'].keys(), cli.asod, time.strftime("%Y-%m-%d %H:%M:%S"), stats['sql_ins'], log.file_name) out = {} for rname, rdata in self.report.items(): #out[rname] = get_formatted(rname,rdata[1],rdata[0],join = True) #print out[rname] headers = ['env', 'Source 1', 'Source 2', 'NumOfDiffs'] out[rname] = get_formatted( rname, [[cli.rte] + cli.tear['source'].keys() + [len(rdata[1])]], headers, join=True) print out[rname] return html_body, out, self.to_file
def get_log_stats(self, ll): #pp(ll) stats={} regs=[('src_first_row', '\[DbStreamer\] First row elapsed: (?P<src_first_row>[0-9., ]+sec/[0-9., ]+min)'), ('RefIDs_count','REST: Read:(?P<RefIDs_count>[0-9]+)'), ('rest_first_row', '\[DataStreamer\] First row elapsed: (?P<rest_first_row>[0-9., ]+sec)'), ('rest_status','Response received from FICC: (?P<rest_status>[0-9]+)'), ('rest_recs','Records: (?P<rest_recs>[0-9]+)'), ('snow_del', 'Records deleted: (?P<snow_del>[0-9]+)'), ('snow_ins', 'Snowflake: Inserted: (?P<snow_ins>[0-9,.]+),'), ('cli_elapsed', 'Cli: Total elapsed: (?P<cli_elapsed>[0-9., ]+sec/[0-9., ]+min)'), ('env', 'Env: \[(?P<env>[A-Z]+)\]'), ('from_table','From-Table:(?P<from_table>[a-zA-Z_]+)'), ('from_schema', 'From-Schema:(?P<from_schema>[a-zA-Z_\.]+)'), ('to_table','To-Table:(?P<to_table>[a-zA-Z_]+)'), ('to_schema', 'To-Schema:(?P<to_schema>[a-zA-Z_\.]+)') ] logdata=os.sep.join(ll) for reg in regs: group, regexp = reg m = re.search(regexp, logdata) try: stats[group]= m.group(group) except: stats[group]='n/a' #print stats headers=['env', 'from_schema','from_table','src_first_row','RefIDs_count'] data=[[stats.get(h) for h in headers ]] ptitle='Ref IDs' refs_stats=get_formatted(ptitle,data,headers,join = True) print refs_stats #print stats headers=['env', 'rest_first_row','rest_status','rest_recs'] data=[[stats.get(h) for h in headers ]] ptitle='Rest' rest_stats=get_formatted(ptitle,data,headers,join = True) print rest_stats #print stats headers=['env','to_schema', 'to_table', 'snow_del', 'snow_ins'] data=[[stats.get(h) for h in headers ]] ptitle='Snowflake' snow_stats=get_formatted(ptitle,data,headers,join = True) print snow_stats html_body=""" Parameters provided: %s<br> Source: %s - %s<br> REST: %s<br> Target: %s - %s<br> Started On: %s<br> Ended On: %s<br> Records Added: %s<br> Logs Path: %s<br> """ % ( sys.argv, cli.scfg["sourceDb"], cli.scfg["sourceTable"],cli.rcfg["sourceUrl"], cli.tcfg["targetDb"], cli.tcfg["targetTable"], cli.asod, time.strftime("%Y-%m-%d %H:%M:%S"), stats['snow_ins'], log.file_name) return html_body, {'Ref IDs':refs_stats,'Rest':rest_stats,'Snowflake':snow_stats}