def connectDb(): try: print '1. Connecting to mysql ....' db = mconnect.connectMysql('', 'root', '', 'localhost') print '2. Creating db...' mconnect.checkDb('ods', db) print '3. Creating procuser...' mconnect.checkUser('procuser', db) print '4. Connecting to Mysql as procuser' db = mconnect.connectMysql('ods', 'procuser', 'c@llhome', 'localhost') cur = db.cursor() print '5. Show tables' cur.execute('SHOW TABLES') datList = cur.fetchall() tablenameList = {} if not datList: for dat in datList: tablenameList.append(dat[0]) print '6. Connecting to oracle.......' constr = 'ods/ods@callhomeods:1521/callhomeods' oraconn = oracon.openconnect(constr) print '7. Creating table........' checkCreatetable(cur, tablenameList, oraconn, oracon) db.close() oraconn.close() return except: print "Error reported: " + str(sys.exc_info()[1])
def firstInstall(): machine_type = raw_input('Type of machine:') if machine_type == 'datamartextract': checkmakedir('/mysql/') sh.copyfile('/etc/my.cnf', '/etc/my.cnf.bak') orgfl = open('/etc/my.cnf', 'r') readLines = orgfl.readlines() newfl = open('/etc/my.cnf.new', 'w') newlines = [] for ln in readLines: if string.find(ln, 'datadir') >= 0: newlines.append('datadir=/mysql/') else: newlines.append(string.replace(ln, '\n', '')) newfl.write(string.join(newlines, '\n')) newfl.close() sh.copyfile('/etc/my.cnf.new', '/etc/my.cnf') os.system('service mysqld stop') os.system('service mysqld start') dbname = 'ods' username = '******' passwd = 'datamart' host = 'localhost' cnx = mysqlcon.connectMysql('', 'root', '', 'localhost') mysqlcon.CheckCreateDb(dbname, cnx) mysqlcon.CheckCreateUser(dbname, username, passwd, cnx)
def setconnection(dbhost, dbtype, dbsource, dbuser, sqlstmt): try: userList = [["produser", "pr0duser"], ["datamart", "datamart"], ["dbadmin", "c@llhome"], ["datapulluser", "cAllhome"]] retArr = [] usr = [] for usr in userList: if usr[0] == dbuser: passwd = usr[1] if dbtype == 'oracle': connstr = dbuser + '/' + passwd + '@' + dbhost + '/' + dbsource + ':1521' oraconn = oracon.openconnect(connstr) dbresulSet = oracon.execSql(oraconn, sqlstmt) if dbtype == 'mysql': myconn = mysql.connectMysql(dbsource, dbuser, passwd, dbhost) cur_mysql = myconn.cursor() cur_mysql.execute(sqlstmt) dbresulSet = cur_mysql.fetchall() if dbtype == 'vertica': conn = vconn.vertica_connect(dbhost, 5433, dbuser, passwd, dbsource) vdat = vconn.vertica_sql_execute(conn, sqlstmt) dbresulSet = vdat.fetchall() retArr = redata(dbresulSet) if dbtype == 'oracle': dbresulSet.close() oraconn.close() if dbtype == 'mysql': cur_mysql.close() if dbtype == 'vertica': vdat.close() return retArr except: errlog = open('log/datamart_error.log', 'a') function = 'set connection' timestr = time.strftime('%m/%d/%Y %H:%M:%S') + ' ' + function errlog.write(timestr + '\t Error reported: ' + str(sys.exc_info()[1]) + '\n') errlog.write(timestr + '\t SQL STMT: ' + sqlstmt + '\n') errlog.close()
def splitFile(fileid, filename, filepath, datatype, inservserial, datafolder, machineid): lsconn = lconnect.connectMysql('ods', 'procuser', 'c@llhome', 'localhost') lscurr = lsconn.cursor() try: # Setting the path to the individual inserv splitfilepath = datafolder + '/' + str(inservserial) + '/' # statslog.logwrite(fl,'Processing file ..'+ filename); inputfile = filepath + '/' + filename # statslog.logwrite(fl,'Setup input file......'+inputfile); srcfl = open(inputfile) # statslog.logwrite(fl,'Reading file ..'+ filename); readalllines = srcfl.readlines() srcfl.close sqlstmt = 'select SPLIT_FILE_TYPE, SPLIT_FILE_SEARCH_TAG, SPLIT_FILE_SKIP_LINES, ifnull(SPLIT_FILE_END_TAG,\' \') SPLIT_FILE_END_TAG,' sqlstmt += 'SPLIT_FILE_LINE_SEPERATOR,STATSID from STAT_SPLIT_FILE_LOOKUP where datatype=\'' + datatype + '\'' # statslog.logwrite(fl,'Reading file Mysql structure for the input file...'); # statslog.logwrite(fl,sqlstmt); lscurr.execute(sqlstmt) # statslog.logwrite(fl,'Received structure...'); filenameList = filename.split('.') splitfllist = list() i = 0 for splt in filenameList: if i > 0: splitfllist.append(splt) i += 1 currlinenum = 0 readfileln = readalllines # statslog.logwrite(fl,'Reading os version...'); for ln in readfileln: if string.find(ln, 'Release version') >= 0: osver = string.strip(ln[15:len(ln)]) # statslog.logwrite(fl,'Looping through the structure...'); datrec = lscurr.fetchall() for dat in datrec: currspllist = list() readline = 0 readfileln = readalllines cpstat = 0 for ln in readfileln: if string.find(ln, dat[1]) >= 0: if readline == 0: spflnm = dat[0] + '.' + str( inservserial) + '.' + string.join( splitfllist, '.') cpstat = 1 if cpstat == 1: currspllist.append(ln) readline += 1 if string.strip(ln[0:len(dat[3])]) == string.strip(dat[3]): cpstat = 0 break if not ln: print 'ln is null' if currlinenum >= len(readfileln): cpstat = 0 break nextstr = readfileln[currlinenum + 1] print 'nexstr:' + nextstr if not nextstr or string.strip( nextstr[0:len(dat[3])]) == string.strip( dat[3]) or len( string.strip(nextstr) ) == 0 or string.find(nextstr, '<') >= 0: cpstat = 0 break currlinenum += 1 splafterskip = list() currline = 0 splafterskip.append(osver + '\n') if len(currspllist) > 0: # statslog.logwrite(fl,'Creating directory......'+splitfilepath); checkmakedir(splitfilepath, 'log/splitter_' + filename + '.log') # statslog.logwrite(fl,'Writing file ....'+splitfilepath+'/'+spflnm); spfl = open(splitfilepath + '/' + spflnm, 'w') for curr in currspllist: if currline >= dat[2] and currline < len(currspllist) - 1: if len(string.strip(curr)) > 0: if string.find(string.strip(curr), '-----') == -1: if curr: splafterskip.append(curr) currline += 1 spfl.write(string.join(splafterskip)) spfl.close() #sqlstmt='begin dataload.addsplitfile(inputfileid=>'+str(fileid)+',splitfiletype=>\''+lkp[0]+'\',splitfilename=>\''+spflnm+'\''; #sqlstmt+=',splitfilepath=>\''+splitfilepath+'\',statsid=>'+str(lkp[5])+'); end;'; sqlstmt = 'INSERT INTO STATS_SPLIT_FILES(STATS_FILEID, STATS_SPLITFILE_NAME, STATS_SPLITFILE_PATH, STATSID, STATS_SPLIT_FILE_TYPE) values ' sqlstmt += '(' + str( fileid ) + ',\'' + spflnm + '\',\'' + splitfilepath + '\',' + str( dat[5]) + ',\'' + dat[0] + '\')' # statslog.logwrite(fl,sqlstmt); lscurr.execute(sqlstmt) sqlstmt = 'Update STATSPROCESSTRANSACT set FILE_PROCESS_STATUS=1 where STATS_FILEID=' + str( fileid) lscurr.execute(sqlstmt) lscurr.close() lsconn.close() #os.remove('log/splitter_'+filename+'.log'); except: fl = statslog.logcreate('log/splitter_' + filename + '.log') statslog.logwrite(fl, "Error reported: " + str(sys.exc_info()[1])) sqlstmt = 'Update STATSPROCESSTRANSACT set FILE_PROCESS_STATUS=3 where STATS_FILEID=' + str( fileid) lscurr.execute(sqlstmt) sqlstmt = 'delete from STATS_SPLIT_FILES where STATS_FILEID=' + str( fileid) lscurr.execute(sqlstmt) lscurr.close() lsconn.close()
def splitter(): oraconn = oracon.openconnect(constr) lconn = lconnect.connectMysql('ods', 'procuser', 'c@llhome', 'localhost') lcurr = lconn.cursor() fl = statslog.logcreate('log/splitter.log') try: statslog.logwrite(fl, 'Getting Host IP...\n') ipadd = hostlib.retHostIP() statslog.logwrite(fl, 'Getting number of threads...\n') sqlstmt = 'select machineid,numberofthreads,dataprocessing_folder,splitterloc from statsprocessingmachine where ipaddress=\'' + ipadd + '\' and enable =1' numthreadrec = oracon.execSql(oraconn, sqlstmt) for numthread in numthreadrec: numthreads = numthread[1] machineid = numthread[0] datafolder = numthread[3] numsplitterthreads = int(numthreads / 2) statslog.logwrite(fl, 'Getting number of datatypes...\n') sqlstmt = 'select distinct datatype from stat_split_file_lookup' datatyprec = oracon.execSql(oraconn, sqlstmt) dtct = 0 for dt in datatyprec: dtct += 1 numsplitthread = int(numsplitterthreads / dtct) datatyprec = oracon.execSql(oraconn, sqlstmt) lconn = lconnect.connectMysql('ods', 'procuser', 'c@llhome', 'localhost') lcurr = lconn.cursor() for dtype in datatyprec: sqlstmt = 'SELECT COUNT(1) FROM STATSPROCESSTRANSACT WHERE FILE_PROCESS_STATUS=0 and DATATYPE=\'' + dtype[ 0] + '\'' lcurr.execute(sqlstmt) reccount = lcurr.fetchall() numInMySql = 0 for rec in reccount: numInMySql = rec[0] numsplitterthreads = numsplitthread - numInMySql if numsplitterthreads > 0: statslog.logwrite(fl, 'Processing for... :' + dtype[0]) sqlstmt = 'SELECT STATS_FILEID,STATS_FILE_NAME,STATS_FILE_PATH,DATATYPE FROM STATSPROCESSTRANSACT ' sqlstmt += 'WHERE FILE_PROCESS_STATUS=0 AND MACHINEID=' + str( machineid) sqlstmt += ' AND ROWNUM <=' + str( numsplitterthreads ) + ' AND DATATYPE= \'' + dtype[0] + '\' ORDER BY STATS_FILEID' filerec = oracon.execSql(oraconn, sqlstmt) statslog.logwrite(fl, 'Inserting data into mysql...\n') sqlstmt = 'INSERT INTO STATSPROCESSTRANSACT (STATS_FILEID,STATS_FILE_NAME,STATS_FILE_PATH,DATATYPE) VALUES ' sql = [] for flrec in filerec: sql.append('(' + str(flrec[0]) + ',\'' + flrec[1] + '\',\'' + flrec[2] + '\',\'' + flrec[3] + '\')') sqlst = 'begin Update STATSPROCESSTRANSACT set FILE_PROCESS_STATUS=2 where STATS_FILEID=' + str( flrec[0]) + '; commit; end;' oracon.execSql(oraconn, sqlst) if len(sql) > 0: sqlstmt = sqlstmt + string.join(sql, ',') lcurr.execute(sqlstmt) lconn.close() oraconn.close() lconn = lconnect.connectMysql('ods', 'procuser', 'c@llhome', 'localhost') lcurr = lconn.cursor() statslog.logwrite(fl, 'Querying mysql...') sqlstmt = 'SELECT STATS_FILEID,STATS_FILE_NAME,STATS_FILE_PATH,DATATYPE FROM STATSPROCESSTRANSACT where ifnull(FILE_PROCESS_STATUS,0)=0 LIMIT ' + str( numsplitthread) lcurr.execute(sqlstmt) ctr = 0 statrow = lcurr.fetchall() pythonRuns = commands.getoutput( "ps -ef | grep python | grep -v grep | wc -l") if int(pythonRuns) - 3 < numsplitthread: residual = (numsplitthread - int(pythonRuns)) + 3 sqlstmt = 'SELECT STATS_FILEID,STATS_FILE_NAME,STATS_FILE_PATH,DATATYPE FROM STATSPROCESSTRANSACT where ifnull(FILE_PROCESS_STATUS,0)=0 LIMIT ' + str( residual) lcurr.execute(sqlstmt) ctr = 0 statrow = lcurr.fetchall() for statrec in statrow: statslog.logwrite(fl, 'Splitting file...:' + statrec[1]) flpthlist = string.split(statrec[2], '/') inservserial = flpthlist[len(flpthlist) - 2] statslog.logwrite(fl, 'Starting process for ... ' + statrec[1]) p = Process(target=splitFile, args=( statrec[0], statrec[1], statrec[2], statrec[3], inservserial, datafolder + '/splitter', machineid, )) p.daemon = True p.start() sqlstmt = 'Update STATSPROCESSTRANSACT set FILE_PROCESS_STATUS=2 where STATS_FILEID=' + str( statrec[0]) lcurr.execute(sqlstmt) ctr += 1 lcurr.close() lconn.close() os.remove('log/splitter.log') except: statslog.logwrite(fl, "Error reported: " + str(sys.exc_info()[1]))
def splitter(): oraconn = oracon.openconnect(constr) lconn = lconnect.connectMysql('ods', 'procuser', 'c@llhome', 'localhost') lcurr = lconn.cursor() try: sqlstmt = 'update STATSPROCESSTRANSACT set FILE_PROCESS_STATUS=0 where FILE_PROCESS_STATUS=2' lcurr.execute(sqlstmt) ipadd = hostlib.retHostIP() sqlstmt = 'select machineid,numberofthreads,dataprocessing_folder,splitterloc,number_of_files_per_run,delay_seconds from statsprocessingmachine where ipaddress=\'' + ipadd + '\' and enable =1' numthreadrec = oracon.execSql(oraconn, sqlstmt) for numthread in numthreadrec: numthreads = numthread[1] machineid = numthread[0] datafolder = numthread[3] files_perrun = numthread[4] delay_seconds = numthread[5] nummysqltreads = int(numthreads) numsplitterthreads = int(numthreads) sqlstmt = 'select distinct datatype from stat_split_file_lookup' datatyprec = oracon.execSql(oraconn, sqlstmt) #dtct=0; datatypes = [] for dt in datatyprec: #dtct+=1; datatypes.append(dt[0]) #numsplitthread=int(numsplitterthreads/dtct); #datatyprec=oracon.execSql(oraconn,sqlstmt); #lconn = lconnect.connectMysql('ods','procuser','c@llhome','localhost'); #lcurr=lconn.cursor(); sqlstmt = 'select count(1) from statsprocesstransact where machineid=' + str( machineid ) + ' and FILE_PROCESS_STATUS=0 and datatype in (\'' + string.join( datatypes, '\'' + ',' + '\'') + '\')' datatypct = oracon.execSql(oraconn, sqlstmt) for dtctrec in datatypct: tot = dtctrec[0] datatypelist = [] sqlstmt = 'select datatype,count(1) from statsprocesstransact where machineid=' + str( machineid ) + ' and FILE_PROCESS_STATUS=0 and datatype in (\'' + string.join( datatypes, '\'' + ',' + '\'') + '\') group by datatype' datatyprec = oracon.execSql(oraconn, sqlstmt) for dt in datatyprec: recperdatatype = int( ((dt[1] * 1.0) / (tot * 1.0)) * numsplitterthreads) datatypelist.append(dt[0] + ',' + str(recperdatatype)) for dtype in datatypelist: datatype = string.split(dtype, ',') sqlstmt = 'SELECT COUNT(1) FROM STATSPROCESSTRANSACT WHERE IFNULL(FILE_PROCESS_STATUS,0)=0 AND DATATYPE= \'' + datatype[ 0] + '\'' lcurr.execute(sqlstmt) reccount = lcurr.fetchall() numInMySql = 0 for rec in reccount: numInMySql = rec[0] numsplitterthreads = int(datatype[1]) - int(numInMySql) if numsplitterthreads > 0: sqlstmt = 'SELECT STATS_FILEID,STATS_FILE_NAME,STATS_FILE_PATH,DATATYPE FROM STATSPROCESSTRANSACT ' sqlstmt += 'WHERE FILE_PROCESS_STATUS=0 AND MACHINEID=' + str( machineid) sqlstmt += ' AND ROWNUM <=' + str( numsplitterthreads) + ' AND DATATYPE= \'' + datatype[ 0] + '\' ORDER BY STATS_FILEID' filerec = oracon.execSql(oraconn, sqlstmt) sqlstmt = 'INSERT INTO STATSPROCESSTRANSACT (STATS_FILEID,STATS_FILE_NAME,STATS_FILE_PATH,DATATYPE) VALUES ' sql = [] for flrec in filerec: sql.append('(' + str(flrec[0]) + ',\'' + flrec[1] + '\',\'' + flrec[2] + '\',\'' + flrec[3] + '\')') sqlst = 'begin Update STATSPROCESSTRANSACT set FILE_PROCESS_STATUS=2 where STATS_FILEID=' + str( flrec[0]) + '; commit; end;' oracon.execSql(oraconn, sqlst) if len(sql) > 0: sqlstmt = sqlstmt + string.join(sql, ',') lcurr.execute(sqlstmt) lconn.close() oraconn.close() lconn = lconnect.connectMysql('ods', 'procuser', 'c@llhome', 'localhost') lcurr = lconn.cursor() ctr = 0 pythonRuns = commands.getoutput( "ps -ef | grep parser | grep -v grep | wc -l") if int(pythonRuns) - 3 < nummysqltreads: residual = (nummysqltreads - int(pythonRuns)) sqlstmt = 'SELECT COUNT(1) FROM STATSPROCESSTRANSACT where ifnull(FILE_PROCESS_STATUS,0)=0' lcurr.execute(sqlstmt) totalcurr = lcurr.fetchall() for lcur in totalcurr: total_recs = lcur[0] sqlstmt = 'SELECT DATATYPE,COUNT(1) FROM STATSPROCESSTRANSACT where ifnull(FILE_PROCESS_STATUS,0)=0 GROUP BY DATATYPE' lcurr.execute(sqlstmt) datrec = lcurr.fetchall() for lcur in datrec: datatype = lcur[0] numrecs = int( round((lcur[1] * 1.0 / total_recs * 1.0) * residual * 1.0, 0)) sqlstmt = 'SELECT distinct STATS_FILEID,STATS_FILE_NAME,STATS_FILE_PATH,DATATYPE FROM STATSPROCESSTRANSACT where ifnull(FILE_PROCESS_STATUS,0)=0 and DATATYPE=\'' + datatype + '\' LIMIT ' + str( numrecs) lcurr.execute(sqlstmt) ctr = 0 statrow = lcurr.fetchall() for statrec in statrow: flpthlist = string.split(statrec[2], '/') inservserial = flpthlist[len(flpthlist) - 2] p = Process(target=splitFile, args=( statrec[0], statrec[1], statrec[2], statrec[3], inservserial, datafolder + '/splitter', machineid, )) p.daemon = True p.start() while ctr > files_perrun: time.sleep(delay_seconds) sqlstmt = 'SELECT COUNT(1) FROM STATSPROCESSTRANSACT WHERE FILE_PROCESS_STATUS=2' lcurr.execute(sqlstmt) rowRec = lcurr.fetchall() for rec in rowRec: ctr = rec[0] sqlstmt = 'Update STATSPROCESSTRANSACT set FILE_PROCESS_STATUS=2 where STATS_FILEID=' + str( statrec[0]) + ' and ifnull(FILE_PROCESS_STATUS,0)=0' lcurr.execute(sqlstmt) ctr += 1 lcurr.close() lconn.close() #os.remove('log/splitter.log'); except: fl = statslog.logcreate('log/splitter.log') statslog.logwrite(fl, "Error reported: " + str(sys.exc_info()[1]))
def create_table(tablename, part_column, sourcedb, dbuser, dbtype, hostname): try: dbname = 'ods' username = '******' passwd = 'datamart' host = 'localhost' myconn = mysql.connectMysql(dbname, username, passwd, host) tableinsert = [] datamartlogger = open('log/datamart_extract.log', 'a') cur_mysql = myconn.cursor() columnList = [] sqlstmt = 'DROP TABLE IF EXISTS ' + tablename cur_mysql.execute(sqlstmt) timestr = time.strftime('%m/%d/%Y %H:%M:%S') datamartlogger.write(timestr + '\t' + sqlstmt + '\n') insert_stmt = 'insert into ' + tablename + '(' createstmt = 'create table ' + tablename + '(' selectstmt = 'select ' collist = [] colact = [] insertact = [] sqlstmt = 'select count(1) from ' + tablename OracleCount = setconnection(hostname, dbtype, sourcedb, dbuser, sqlstmt) for orct in OracleCount: reccount = orct[0] datamartlogger.write(timestr + '\t' + tablename + ' has ' + str(reccount) + ' rows\n') sqlstmt = 'select COLUMN_NAME, lower(DATA_TYPE),DATA_LENGTH FROM USER_TAB_COLUMNS WHERE TABLE_NAME = \'' + tablename + '\' ORDER BY COLUMN_ID' OracleColResultset = setconnection(hostname, dbtype, sourcedb, dbuser, sqlstmt) #Creating table in MySQL based on different column datatype for col in OracleColResultset: collist.append(col[0]) if col[1] == 'varchar2': #converting varchar2 to varchar datatype = 'varchar(' + str(col[2]) + ')' colact.append('chr(39)||replace(' + col[0] + ',\'|\',\' \')||chr(39)') insertact.append('none') if col[1] == 'number': #converting number to decimal(20,2) datatype = 'decimal(20,2)' colact.append('to_char(nvl(' + col[0] + ',0))') insertact.append('none') if col[1] == 'date': #converting date to datetime datatype = 'datetime' colact.append('chr(39)||to_char(' + col[0] + ',\'YYYYMMDDHH24MISS\')||chr(39)') insertact.append('str_to_date(,\'%Y%m%D%H%i%S\'') if col[1] == 'char': #converting char to varchar datatype = 'varchar(' + str(col[2]) + ')' colact.append('chr(39)||' + col[0] + '||chr(39)') insertact.append('none') columnList.append(col[0] + ' ' + datatype) selectstmt += string.join(colact, '||\'|\'||') + ' from ' + tablename partstmt = ' ' if len(string.strip(part_column)) > 0: selectstmt += ' where ' + part_column + ' =' partstmt = 'select distinct ' + part_column + ' from ' + tablename + ' order by ' + part_column insert_stmt += string.join(collist, ',') + ') values ' tableinsert.append([ tablename, selectstmt, insert_stmt, string.join(insertact, ';'), partstmt, reccount ]) sqlstmt = createstmt + string.join(columnList, ',') + ')' cur_mysql.execute(sqlstmt) cur_mysql.close() datamartlogger.close() return tableinsert #Tableinsert is the combined list of tablename,selectstmt,insertstmt except: errlog = open('log/datamart_error.log', 'a') function = 'create_table' timestr = time.strftime('%m/%d/%Y %H:%M:%S') + ' ' + function errlog.write(timestr + '\t Error reported for the table: ' + tablename + '\n') timestr = time.strftime('%m/%d/%Y %H:%M:%S') + ' ' + function #errlog.write(timestr+'\t'+col[0]+'\n'); errlog.write(timestr + '\t Error reported: ' + str(sys.exc_info()[1]) + '\n') errlog.close()
#!/usr/bin/env python import oracleconnect as oracon import mysqlconnect as mysqlconn import string import sys constr = 'ods/ods@callhomeods:1521/callhomeods' try: lc = mysqlconn.connectMysql('ods', 'procuser', 'c@llhome', 'localhost') curr = lc.cursor() sqlstmt = 'SELECT STATS_FILEID FROM STATSPROCESSTRANSACT WHERE IFNULL(FILE_PROCESS_STATUS,0) IN (0,2)' curr.execute(sqlstmt) fileidrec = curr.fetchall() fileidList = [] print 'Getting filelist from mysql.........' recordsprocessed = 0 totaldone = 0 for idrec in fileidrec: fileidList.append(str(idrec[0])) recordsprocessed += 1 if recordsprocessed > 999: fileids = string.join(fileidList, ',') sqlstmt = 'begin update STATSPROCESSTRANSACT SET FILE_PROCESS_STATUS=0 WHERE STATS_FILEID IN (' + fileids + '); commit; end;' oraconn = oracon.openconnect(constr) oracon.execSql(oraconn, sqlstmt) totaldone += recordsprocessed recordsprocessed = 0 print 'Total records updated...:' + str(totaldone) fileidList = [] if len(fileidList) > 0:
#!/usr/bin/env python import oracleconnect as oracon import mysqlconnect as mconnect import string import time import hostlib as hst import os import statslog import sys constr = 'ods/ods@callhomeods:1521/callhomeods' lconn = mconnect.connectMysql('ods', 'procuser', 'c@llhome', 'localhost') lcurr = lconn.cursor() hstname = hst.retHostName() ipadd = hst.retHostIP() SplitConStr = 'ods/ods@callhomedb03:1521/callhomedb03' ProcConStr = 'ods/ods@callhomedb04:1521/callhomedb04' sqlArr = [] MysqlArr = [] oraconn = oracon.openconnect(constr) sql = 'select purpose from statsprocessingmachine where ipaddress=\'' + ipadd + '\'' recon = oracon.execSql(oraconn, sql) for rec in recon: purpose = rec[0] oraconn.close() try:
def updateMetadata(): try: lsconn = lconnect.connectMysql('ods', 'procuser', 'c@llhome', 'localhost') lscurr = lsconn.cursor() oraconn = oracon.openconnect(constr) sqlstmt = 'SELECT STATSID, STATS_STRUCTURE_ID, STATSNAME, STATS_VERSION, STATS_FIRST_ROW_VERSION, STATS_SINGLE_ROW, STATS_END_OF_ROW FROM STATSSTRUCTURE ORDER BY 1' statrec = oracon.execSql(oraconn, sqlstmt) sqlstmt = 'SELECT DATATYPE, SPLIT_FILE_TYPE, SPLIT_FILE_SEARCH_TAG, SPLIT_FILE_SKIP_LINES, SPLIT_FILE_END_TAG, SPLIT_FILE_LINE_SEPERATOR, STATSID FROM STAT_SPLIT_FILE_LOOKUP' splitrec = oracon.execSql(oraconn, sqlstmt) sqlstmt = 'SELECT STATS_STRUCTURE_ID, PROCESS_SEQUENCE, FUNCTIONID FROM PROCESSLOGIC' processrec = oracon.execSql(oraconn, sqlstmt) sqlstmt = 'SELECT FUNCTIONID, FUNCTIONNAME, FUNCTIONPARAMETERS, FUNCTIONDESC FROM PROCESSFUNCTION' profunc = oracon.execSql(oraconn, sqlstmt) for rec in statrec: sqlstmt = 'SELECT COUNT(1) FROM STATSSTRUCTURE WHERE STATSID=' + str( rec[0]) + ' and STATS_STRUCTURE_ID=' + str(rec[1]) lscurr.execute(sqlstmt) recData = lscurr.fetchall() recCount = 0 for recdat in recData: recCount = recdat[0] if recCount == 0: if not rec[6]: end_row = '' else: end_row = rec[6] sqlstmt = 'INSERT INTO STATSSTRUCTURE (STATSID, STATS_STRUCTURE_ID, STATSNAME, STATS_VERSION, STATS_FIRST_ROW_VERSION, STATS_SINGLE_ROW, STATS_END_OF_ROW) VALUES ' sqlstmt += ' (' + str(rec[0]) + ',' + str( rec[1] ) + ',\'' + rec[2] + '\',\'' + rec[3] + '\',\'' + rec[ 4] + '\',\'' + rec[5] + '\',\'' + end_row + '\')' lscurr.execute(sqlstmt) for rec in splitrec: sqlstmt = 'SELECT COUNT(1) FROM STAT_SPLIT_FILE_LOOKUP WHERE DATATYPE=\'' + str( rec[0]) + '\' and SPLIT_FILE_TYPE=\'' + rec[1] + '\'' lscurr.execute(sqlstmt) recData = lscurr.fetchall() recCount = 0 for recdat in recData: recCount = recdat[0] if recCount == 0: if not rec[5]: line_sep = '' else: line_sep = rec[5] sqlstmt = 'INSERT INTO STAT_SPLIT_FILE_LOOKUP (DATATYPE, SPLIT_FILE_TYPE, SPLIT_FILE_SEARCH_TAG, SPLIT_FILE_SKIP_LINES, SPLIT_FILE_END_TAG, SPLIT_FILE_LINE_SEPERATOR, STATSID) ' sqlstmt += 'VALUES (\'' + str(rec[0]) + '\',\'' + str( rec[1]) + '\',\'' + rec[2] + '\',' + str( rec[3] ) + ',\'' + rec[4] + '\',\'' + line_sep + '\',' + str( rec[6]) + ')' lscurr.execute(sqlstmt) for rec in processrec: sqlstmt = 'SELECT COUNT(1) FROM PROCESSLOGIC WHERE STATS_STRUCTURE_ID=' + str( rec[0]) + ' and PROCESS_SEQUENCE=' + str( rec[1]) + ' and FUNCTIONID=' + str(rec[2]) #sqlstmt='SELECT COUNT(1) FROM PROCESSLOGIC WHERE STATS_STRUCTURE_ID='+str(rec[0]); lscurr.execute(sqlstmt) recData = lscurr.fetchall() recCount = 0 for recdat in recData: recCount = recdat[0] if recCount == 0: sqlstmt = 'INSERT INTO PROCESSLOGIC (STATS_STRUCTURE_ID, PROCESS_SEQUENCE, FUNCTIONID) ' sqlstmt += 'VALUES (' + str(rec[0]) + ',' + str( rec[1]) + ',' + str(rec[2]) + ')' lscurr.execute(sqlstmt) #else: ##fix logic if needed # sqlstmt='select STATS_STRUCTURE_ID, PROCESS_SEQUENCE, FUNCTIONID FROM PROCESSLOGIC WHERE STATS_STRUCTURE_ID='+str(rec[0])+' and PROCESS_SEQUENCE='+str(rec[1]); # lscurr.execute(sqlstmt); # dat=lscurr.fetchall(); # for dt in dat: # if dt[2] != rec[2]: # sqlstmt='update PROCESSLOGIC SET FUNCTIONID='+str(rec[2])+' WHERE STATS_STRUCTURE_ID='+str(rec[0])+' and PROCESS_SEQUENCE='+str(rec[1]); # lscurr.execute(sqlstmt); sqlstmt = 'SELECT STATS_STRUCTURE_ID, PROCESS_SEQUENCE, FUNCTIONID FROM PROCESSLOGIC' lscurr.execute(sqlstmt) mySqlDat = lscurr.fetchall() for myRec in mySqlDat: sqlstmt = 'SELECT COUNT(1) FROM PROCESSLOGIC where STATS_STRUCTURE_ID=' + str( myRec[0]) + ' AND PROCESS_SEQUENCE=' + str( myRec[1]) + ' AND FUNCTIONID=' + str(myRec[2]) processrec = oracon.execSql(oraconn, sqlstmt) for recdat in processrec: recCount = recdat[0] if recCount == 0: sqlstmt = 'delete from PROCESSLOGIC where STATS_STRUCTURE_ID=' + str( myRec[0]) + ' and PROCESS_SEQUENCE =' + str( myRec[1]) + ' and FUNCTIONID =' + str(myRec[2]) lscurr.execute(sqlstmt) for rec in profunc: sqlstmt = 'SELECT COUNT(1) FROM PROCESSFUNCTION WHERE FUNCTIONID=' + str( rec[0]) + ' and FUNCTIONNAME=\'' + str(rec[1]) + '\'' lscurr.execute(sqlstmt) recData = lscurr.fetchall() recCount = 0 for recdat in recData: recCount = recdat[0] if recCount == 0: sqlstmt = 'INSERT INTO PROCESSFUNCTION (FUNCTIONID, FUNCTIONNAME, FUNCTIONPARAMETERS, FUNCTIONDESC) ' sqlstmt += 'VALUES (' + str(rec[0]) + ',\'' + str( rec[1]) + '\',\'' + str(rec[2]) + '\',\'' + str( rec[3]) + '\')' lscurr.execute(sqlstmt) lscurr.close() lsconn.close() oraconn.close() except: fl = statslog.logcreate('log/copymeta.log') statslog.logwrite(fl, 'Error reported: ' + str(sys.exc_info()[1]))
import requests import mysqlconnect from urllib.parse import urlencode from pyquery import PyQuery as pq import time urlSearch = 'https://www.zhihu.com/api/v4/search_v3?' headers = { 'cookie': 'd_c0="AJDAEUUImwqPTkKyoYgudon8wR6W8cgZ_gM=|1474982733"; q_c1=23b9eda7df81459c9789a908930cca72|1507096790000|1474982733000; _zap=55c589c0-280d-4fe5-8a8d-f5aaa935ad0c; _xsrf=Gs0BGPtSCbvuI78Y3ma5t01tgIhum0RR; q_c1=23b9eda7df81459c9789a908930cca72|1533896590000|1474982733000; __utma=155987696.718434429.1525358318.1533909362.1533971909.3; __gads=ID=a5be66fc4c81514b:T=1554558539:S=ALNI_MZ2Wtus8SRwdPsdt5NIdcbqpzUDcg; tgw_l7_route=4860b599c6644634a0abcd4d10d37251; capsion_ticket="2|1:0|10:1555688182|14:capsion_ticket|44:ODgwMzMxNmYwOWFkNDllOWE1NmEzMzAxNjdkMGJkNDE=|6c9064c327dd03cc62dcbed0c978b3cbcfc872a83df6efef02724e6549cb879d"; z_c0="2|1:0|10:1555688239|4:z_c0|92:Mi4xOXhCWkF3QUFBQUFBa01BUlJRaWJDaWNBQUFDRUFsVk5MM2poWEFCUmpFRl9hSU12ckJPUU13Qk53WlRFWEFvX0tR|574bfa02342878f5394354e72dfc9ab57f324c1e1893d1ab531861041f70421d"; tst=r', 'user-agent': 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36' } #拿到数据库的游标 conn = mysqlconnect.connectMysql() cur = conn.cursor() #创建数据表,如果已存在就不建 def createTables(): #存放知乎精选回答 if not mysqlconnect.tableExsist(cur, 'searchanswer'): cur.execute(''' create table searchanswer( id int unsigned primary key auto_increment, title text not null, author text, avatar text, article mediumtext )default charset=utf8mb4;
def processFile(STATS_SPLITFILE_ID, STATS_SPLITFILE_NAME, STATS_SPLITFILE_PATH, STATS_SPLITFILE_TYPE, INSERVSERIAL, DATAFOLDER, MACHINEID): try: fl = statslog.logcreate("log/" + STATS_SPLITFILE_NAME) lc = mysqlconn.connectMysql('ods', 'procuser', 'c@llhome', 'localhost') curr = lc.cursor() errlog = 'log/' + STATS_SPLITFILE_NAME + '.err' if versionExist(STATS_SPLITFILE_TYPE) == 1: version = prclib.getVersion(STATS_SPLITFILE_NAME, STATS_SPLITFILE_PATH) else: version = '2.3.1' sqlstmt = 'SELECT COUNT(1) FROM STATSSTRUCTURE WHERE STATS_VERSION=\'' + version + '\' and STATSNAME=\'' + STATS_SPLITFILE_TYPE + '\'' curr.execute(sqlstmt) dat = curr.fetchall() for datrec in dat: count = datrec[0] if count > 0: sqlstmt = 'SELECT STATS_STRUCTURE_ID FROM STATSSTRUCTURE WHERE STATS_VERSION=\'' + version + '\' and STATSNAME=\'' + STATS_SPLITFILE_TYPE + '\'' curr.execute(sqlstmt) verdat = curr.fetchall() STATS_STRUCTURE_ID = 0 for ver in verdat: STATS_STRUCTURE_ID = ver[0] if STATS_STRUCTURE_ID > 0: sqlstmt = 'SELECT PROCESS_SEQUENCE,FUNCTIONID,PARAMETER_VALUES FROM PROCESSLOGIC WHERE STATS_STRUCTURE_ID=' + str( STATS_STRUCTURE_ID) + ' ORDER BY PROCESS_SEQUENCE' curr.execute(sqlstmt) prcdat = curr.fetchall() linearray = prclib.readFile(STATS_SPLITFILE_NAME, STATS_SPLITFILE_PATH) for prc in prcdat: sqlstmt = 'SELECT FUNCTIONNAME FROM PROCESSFUNCTION WHERE FUNCTIONID=' + str( prc[1]) curr.execute(sqlstmt) funcNamerec = curr.fetchall() for fu in funcNamerec: functionName = fu[0] func = getattr(prclib, functionName) linearray = func(linearray) filepath = DATAFOLDER + '/' + str(INSERVSERIAL) + '/' checkmakedir(filepath, errlog) outputFl = open(filepath + STATS_SPLITFILE_NAME, 'w') outputFl.write(linearray) sqlstmt = 'insert into STATSOUTPUT(STATS_SPLITFILE_ID,STATS_OUTPUTFILE_NAME,STATS_OUTPUTFILE_PATH,STATS_FILE_CREATE_DATE,STATS_FILE_LOAD_STATUS,STATS_STRUCTURE_TYPE_ID) values ' sqlstmt += '(' + str( STATS_SPLITFILE_ID ) + ',\'' + STATS_SPLITFILE_NAME + '\',\'' + filepath + '\',sysdate(),0,' + str( STATS_STRUCTURE_ID) + ')' curr.execute(sqlstmt) sqlstmt = 'Update STATS_SPLIT_FILES set FILE_PROCESS_STATUS=1 where STATS_SPLITFILE_ID=' + str( STATS_SPLITFILE_ID) curr.execute(sqlstmt) curr.close() lc.close() else: sqlstmt = 'DELETE FROM STATS_SPLIT_FILES WHERE STATS_SPLITFILE_ID=' + str( STATS_SPLITFILE_ID) curr.execute(sqlstmt) curr.close() lc.close() else: fl = statslog.logcreate("log/" + STATS_SPLITFILE_NAME) statslog.logwrite( fl, "Error reported: Version " + version + " not yet supported") sqlstmt = 'Update STATS_SPLIT_FILES set FILE_PROCESS_STATUS=3 where STATS_SPLITFILE_ID=' + str( STATS_SPLITFILE_ID) lc = mysqlconn.connectMysql('ods', 'procuser', 'c@llhome', 'localhost') curr = lc.cursor() curr.execute(sqlstmt) curr.close() lc.close() except: fl = statslog.logcreate("log/" + STATS_SPLITFILE_NAME) statslog.logwrite(fl, "Error reported: " + str(sys.exc_info()[1])) sqlstmt = 'Update STATS_SPLIT_FILES set FILE_PROCESS_STATUS=3 where STATS_SPLITFILE_ID=' + str( STATS_SPLITFILE_ID) lc = mysqlconn.connectMysql('ods', 'procuser', 'c@llhome', 'localhost') curr = lc.cursor() curr.execute(sqlstmt) curr.close() lc.close()
def procExec(): try: oraconn = oracon.openconnect(constr) lconn = mysqlconn.connectMysql('ods', 'procuser', 'c@llhome', 'localhost') lcurr = lconn.cursor() ipadd = hostlib.retHostIP() sqlstmt = 'select machineid,numberofthreads,processloc,number_of_files_per_run,delay_seconds from statsprocessingmachine where ipaddress=\'' + ipadd + '\' and enable =1' numthreadrec = oracon.execSql(oraconn, sqlstmt) for numthread in numthreadrec: numthreads = numthread[1] machineid = numthread[0] datafolder = numthread[2] files_per_run = numthread[3] delay_seconds = numthread[4] #statslog.logwrite(fl,str(numthreads)+':'+str(files_per_run)+':'+str(delay_seconds)); numthreads = int(numthreads) sqlstmt = 'select distinct statsname from STATSSTRUCTURE a,PROCESSLOGIC b where a.STATS_STRUCTURE_ID = b.STATS_STRUCTURE_ID' lcurr.execute(sqlstmt) recc = lcurr.fetchall() statList = [] for rec in recc: statList.append('\'' + rec[0] + '\'') statsStr = string.join(statList, ',') sqlstmt = 'update STATS_SPLIT_FILES set FILE_PROCESS_STATUS=0 WHERE FILE_PROCESS_STATUS=2' lcurr.execute(sqlstmt) sqlstmt = 'select count(1) from STATS_SPLIT_FILES where ifnull(FILE_PROCESS_STATUS,0)=0 and STATS_SPLIT_FILE_TYPE in (' + statsStr + ')' lcurr.execute(sqlstmt) recc = lcurr.fetchall() numRec = 0 for rec in recc: numRec = rec[0] numthreadstocopy = numthreads - numRec #statslog.logwrite(fl,str(numthreadstocopy)); if numthreadstocopy > 0: sqlstmt = '' rows = 1000 totrows = rows while totrows <= numthreadstocopy: sqlstmt = 'select STATS_FILEID,STATS_SPLITFILE_ID,STATS_SPLITFILE_NAME,STATS_SPLITFILE_PATH,STATSID,STATS_SPLIT_FILE_TYPE from STATS_SPLIT_FILES ' sqlstmt += ' where FILE_PROCESS_STATUS=0 and MACHINEID=' + str( machineid ) + ' and ROWNUM <=' + str( rows ) + ' and STATS_SPLIT_FILE_TYPE in (' + statsStr + ') order by STATS_SPLITFILE_ID desc' #statslog.logwrite(fl,sqlstmt); FileRec = oracon.execSql(oraconn, sqlstmt) sqlstmt = 'INSERT INTO STATS_SPLIT_FILES (STATS_FILEID,STATS_SPLITFILE_ID,STATS_SPLITFILE_NAME,STATS_SPLITFILE_PATH,STATSID,STATS_SPLIT_FILE_TYPE) VALUES ' sql = [] for flrec in FileRec: sql.append('(' + str(flrec[0]) + ',' + str(flrec[1]) + ',\'' + flrec[2] + '\',\'' + flrec[3] + '\',' + str(flrec[4]) + ',\'' + flrec[5] + '\')') sqlst = 'begin Update STATS_SPLIT_FILES set FILE_PROCESS_STATUS=2 where STATS_SPLITFILE_ID=' + str( flrec[1]) + '; commit; end;' oracon.execSql(oraconn, sqlst) if len(sql) > 0: sqlstmt += string.join(sql, ',') lcurr.execute(sqlstmt) totrows = totrows + rows pythonRuns = commands.getoutput( "ps -ef | grep parsetabber | grep -v grep | wc -l") while int(pythonRuns) - 3 <= files_per_run: sqlstmt = 'SELECT a.STATS_SPLITFILE_ID,a.STATS_SPLITFILE_NAME,a.STATS_SPLITFILE_PATH,STATS_SPLIT_FILE_TYPE from STATS_SPLIT_FILES a where ifnull(FILE_PROCESS_STATUS,0)=0 and STATS_SPLIT_FILE_TYPE in (' + statsStr + ') LIMIT ' + str( files_per_run - int(pythonRuns)) #statslog.logwrite(fl,sqlstmt); if (files_per_run - int(pythonRuns)) > 0: ctr = 0 lcurr = lconn.cursor() lcurr.execute(sqlstmt) filelist = lcurr.fetchall() for flrec in filelist: flpthlist = string.split(flrec[2], '/') inservserial = flpthlist[len(flpthlist) - 2] p = Process(target=processFile, args=( flrec[0], flrec[1], flrec[2], flrec[3], inservserial, datafolder, machineid, )) p.daemon = True p.start() ctr += 1 sqlstmt = 'Update STATS_SPLIT_FILES set FILE_PROCESS_STATUS=2 where STATS_SPLITFILE_ID=' + str( flrec[0]) lcurr.execute(sqlstmt) lcurr.close() if ctr == 0: break pythonRuns = commands.getoutput( "ps -ef | grep parsetabber | grep -v grep | wc -l") while int(pythonRuns) > files_per_run: time.sleep(delay_seconds) pythonRuns = commands.getoutput( "ps -ef | grep parsetabber | grep -v grep | wc -l") pythonRuns = commands.getoutput( "ps -ef | grep parsetabber | grep -v grep | wc -l") lconn.close() except: fl = statslog.logcreate("log/parser.log") statslog.logwrite(fl, "Error reported: " + str(sys.exc_info()[1]))