def saveJobDb(self, jobResourceDic): print('>>> Saving job related cpu/memory information into sqlite3 ...') common.debug('Saving job resource info into sqlite3 ...') jobList = list(jobResourceDic.keys()) jobRangeDic = common.getJobRangeDic(jobList) keyList = ['SAMPLE_TIME', 'HOST_NAME', 'CPU', 'MEMORY'] keyString = sqlite3_common.genSqlTableKeyString(keyList) for jobRange in jobRangeDic.keys(): jobResourceSqlDic = {} dbFile = str(self.dbPath) +'/job/' + str(jobRange) + '.db' (result, dbConn) = sqlite3_common.connectDbFile(dbFile, mode='write') if result == 'passed': jobTableList = sqlite3_common.getSqlTableList(dbFile, dbConn) else: jobTableList = [] print('*Error*: Failed on connecting sqlite3 database "' + str(dbFile) + '".') continue for job in jobRangeDic[jobRange]: jobResourceSqlDic[job] = { 'drop': False, 'keyString': '', 'valueString': '', } tableName = job print(' Sampling for job "' + str(job) + '" ...') if tableName in jobTableList: returnCode = self.checkOldSqlTable(dbFile, dbConn, tableName) if returnCode == 1: jobResourceSqlDic[job]['drop'] = True jobResourceSqlDic[job]['keyString'] = keyString else: jobResourceSqlDic[job]['keyString'] = keyString valueList = [sampleTime, hostname, jobResourceDic[job]['cpu'], jobResourceDic[job]['memory']] valueString = sqlite3_common.genSqlTableValueString(valueList) jobResourceSqlDic[job]['valueString'] = valueString for job in jobResourceSqlDic.keys(): tableName = job if jobResourceSqlDic[job]['drop']: print(' Dropping table "' + str(tableName) + '" ...') sqlite3_common.dropSqlTable(dbFile, dbConn, tableName, commit=False) if jobResourceSqlDic[job]['keyString'] != '': print(' Creating table "' + str(tableName) + '" ...') sqlite3_common.createSqlTable(dbFile, dbConn, tableName, jobResourceSqlDic[job]['keyString'], commit=False) if jobResourceSqlDic[job]['valueString'] != '': print(' Updating table "' + str(tableName) + '" with content "' + str(jobResourceSqlDic[job]['valueString']) + '" ...') sqlite3_common.insertIntoSqlTable(dbFile, dbConn, tableName, jobResourceSqlDic[job]['valueString'], commit=False) if result == 'passed': dbConn.commit() dbConn.close() common.debug('Saving job resource info done.')
def connectUserDb(self, user): dbFile = str(self.dbPath) + '/user/' + str(user) + '.db' (result, dbConn) = sqlite3_common.connectDbFile(dbFile, mode='write') if result != 'passed': tableList = [] print('*Error*: Failed on connecting sqlite3 database "' + str(dbFile) + '".') else: tableList = sqlite3_common.getSqlTableList(dbFile, dbConn) return (result, dbFile, dbConn, tableList)
def __init__(self): self.user = getpass.getuser() self.queueDbFile = str(config.dbPath) + '/monitor/queue.db' (self.queueDbFileConnectResult, self.queueDbConn) = sqlite3_common.connectDbFile(self.queueDbFile) if self.queueDbFileConnectResult == 'failed': common.printWarning( '*Warning*: Failed on connectiong queue database file "' + str(self.queueDbFile) + '".') self.jobFirstLoad = True self.queueFirstLoad = True
def connectJobDb(self, job): jobRangeDic = common.getJobRangeDic([ job, ]) jobRangeList = list(jobRangeDic.keys()) jobRange = jobRangeList[0] dbFile = str(self.dbPath) + '/job/' + str(jobRange) + '.db' (result, dbConn) = sqlite3_common.connectDbFile(dbFile, mode='read') if result != 'passed': tableList = [] print('*Error*: Failed on connecting sqlite3 database "' + str(dbFile) + '".') else: tableList = sqlite3_common.getSqlTableList(dbFile, dbConn) return (result, dbFile, dbConn, tableList)
def sampleJobInfo(self): """ Sample job info, especially the memory usage info. """ self.getDateInfo() print('>>> Sampling job info ...') command = 'bjobs -u all -r -UF' bjobsDic = lsf_common.getBjobsUfInfo(command) jobList = list(bjobsDic.keys()) jobRangeDic = common.getJobRangeDic(jobList) jobSqlDic = {} keyList = ['sampleTime', 'mem'] for jobRange in jobRangeDic.keys(): jobDbFile = str(self.dbPath) + '/job/' + str(jobRange) + '.db' (result, jobDbConn) = sqlite3_common.connectDbFile(jobDbFile, mode='read') if result == 'passed': jobTableList = sqlite3_common.getSqlTableList( jobDbFile, jobDbConn) else: jobTableList = [] for job in jobRangeDic[jobRange]: jobTableName = 'job_' + str(job) print(' Sampling for job "' + str(job) + '" ...') jobSqlDic[job] = { 'drop': False, 'keyString': '', 'valueString': '', } # If job table (with old data) has been on the jobDbFile, drop it. if jobTableName in jobTableList: dataDic = sqlite3_common.getSqlTableData( jobDbFile, jobDbConn, jobTableName, ['sampleTime']) if dataDic: if len(dataDic['sampleTime']) > 0: lastSampleTime = dataDic['sampleTime'][-1] lastSeconds = int( time.mktime( datetime.datetime.strptime( str(lastSampleTime), "%Y%m%d_%H%M%S").timetuple())) if self.currentSeconds - lastSeconds > 3600: common.printWarning( ' *Warning*: table "' + str(jobTableName) + '" already existed even one hour ago, will drop it.' ) jobSqlDic[job]['drop'] = True jobTableList.remove(jobTableName) # If job table is not on the jobDbFile, create it. if jobTableName not in jobTableList: keyString = sqlite3_common.genSqlTableKeyString(keyList) jobSqlDic[job]['keyString'] = keyString # Insert sql table value. valueList = [self.sampleTime, bjobsDic[job]['mem']] valueString = sqlite3_common.genSqlTableValueString(valueList) jobSqlDic[job]['valueString'] = valueString if result == 'passed': jobDbConn.commit() jobDbConn.close() for jobRange in jobRangeDic.keys(): jobDbFile = str(self.dbPath) + '/job/' + str(jobRange) + '.db' (result, jobDbConn) = sqlite3_common.connectDbFile(jobDbFile, mode='write') if result != 'passed': return for job in jobRangeDic[jobRange]: jobTableName = 'job_' + str(job) if jobSqlDic[job]['drop']: sqlite3_common.dropSqlTable(jobDbFile, jobDbConn, jobTableName, commit=False) if jobSqlDic[job]['keyString'] != '': sqlite3_common.createSqlTable(jobDbFile, jobDbConn, jobTableName, jobSqlDic[job]['keyString'], commit=False) if jobSqlDic[job]['valueString'] != '': sqlite3_common.insertIntoSqlTable( jobDbFile, jobDbConn, jobTableName, jobSqlDic[job]['valueString'], commit=False) jobDbConn.commit() jobDbConn.close() print(' Committing the update to sqlite3 ...') print(' Done (' + str(len(jobList)) + ' jobs).')
def sampleUserInfo(self): """ Sample user info and save it into sqlite db. """ self.getDateInfo() userDbFile = str(self.dbPath) + '/user.db' (result, userDbConn) = sqlite3_common.connectDbFile(userDbFile, mode='write') if result != 'passed': return print('>>> Sampling user info into ' + str(userDbFile) + ' ...') userTableList = sqlite3_common.getSqlTableList(userDbFile, userDbConn) busersDic = lsf_common.getBusersInfo() userList = busersDic['USER/GROUP'] userSqlDic = {} keyList = ['sampleTime', 'NJOBS', 'PEND', 'RUN', 'SSUSP', 'USUSP'] for i in range(len(userList)): user = userList[i] userSqlDic[user] = { 'keyString': '', 'valueString': '', } userTableName = 'user_' + str(user) print(' Sampling for user "' + str(user) + '" ...') # Generate sql table. if userTableName not in userTableList: keyString = sqlite3_common.genSqlTableKeyString(keyList) userSqlDic[user]['keyString'] = keyString # Insert sql table value. valueList = [ self.sampleTime, busersDic['NJOBS'][i], busersDic['PEND'][i], busersDic['RUN'][i], busersDic['SSUSP'][i], busersDic['USUSP'][i] ] valueString = sqlite3_common.genSqlTableValueString(valueList) userSqlDic[user]['valueString'] = valueString for user in userList: userTableName = 'user_' + str(user) if userSqlDic[user]['keyString'] != '': sqlite3_common.createSqlTable(userDbFile, userDbConn, userTableName, userSqlDic[user]['keyString'], commit=False) if userSqlDic[user]['valueString'] != '': sqlite3_common.insertIntoSqlTable( userDbFile, userDbConn, userTableName, userSqlDic[user]['valueString'], commit=False) print(' Committing the update to sqlite3 ...') # Clean up user database, only keep 10000 items. for user in userList: userTableName = 'user_' + str(user) userTableCount = int( sqlite3_common.getSqlTableCount(userDbFile, userDbConn, userTableName)) if userTableCount != 'N/A': if int(userTableCount) > 10000: rowId = 'sampleTime' beginLine = 0 endLine = int(userTableCount) - 10000 print(' Deleting database "' + str(userDbFile) + '" table "' + str(userTableName) + '" ' + str(beginLine) + '-' + str(endLine) + ' lines to only keep 10000 items.') sqlite3_common.deleteSqlTableRows(userDbFile, userDbConn, userTableName, rowId, beginLine, endLine) userDbConn.commit() userDbConn.close()
def sampleLoadInfo(self): """ Sample host load info and save it into sqlite db. """ self.getDateInfo() loadDbFile = str(self.dbPath) + '/load.db' (result, loadDbConn) = sqlite3_common.connectDbFile(loadDbFile, mode='write') if result != 'passed': return print('>>> Sampling host load info into ' + str(loadDbFile) + ' ...') loadTableList = sqlite3_common.getSqlTableList(loadDbFile, loadDbConn) lsloadDic = lsf_common.getLsloadInfo() hostList = lsloadDic['HOST_NAME'] loadSqlDic = {} keyList = ['sampleTime', 'ut', 'tmp', 'swp', 'mem'] for i in range(len(hostList)): host = hostList[i] loadSqlDic[host] = { 'keyString': '', 'valueString': '', } loadTableName = 'load_' + str(host) print(' Sampling for host "' + str(host) + '" ...') # Generate sql table. if loadTableName not in loadTableList: keyString = sqlite3_common.genSqlTableKeyString(keyList) loadSqlDic[host]['keyString'] = keyString # Insert sql table value. valueList = [ self.sampleTime, lsloadDic['ut'][i], lsloadDic['tmp'][i], lsloadDic['swp'][i], lsloadDic['mem'][i] ] valueString = sqlite3_common.genSqlTableValueString(valueList) loadSqlDic[host]['valueString'] = valueString for host in hostList: loadTableName = 'load_' + str(host) if loadSqlDic[host]['keyString'] != '': sqlite3_common.createSqlTable(loadDbFile, loadDbConn, loadTableName, loadSqlDic[host]['keyString'], commit=False) if loadSqlDic[host]['valueString'] != '': sqlite3_common.insertIntoSqlTable( loadDbFile, loadDbConn, loadTableName, loadSqlDic[host]['valueString'], commit=False) print(' Committing the update to sqlite3 ...') # Clean up load database, only keep 10000 items. for host in hostList: loadTableName = 'load_' + str(host) loadTableCount = int( sqlite3_common.getSqlTableCount(loadDbFile, loadDbConn, loadTableName)) if loadTableCount != 'N/A': if int(loadTableCount) > 10000: rowId = 'sampleTime' beginLine = 0 endLine = int(loadTableCount) - 10000 print(' Deleting database "' + str(loadDbFile) + '" table "' + str(loadTableName) + '" ' + str(beginLine) + '-' + str(endLine) + ' lines to only keep 10000 items.') sqlite3_common.deleteSqlTableRows(loadDbFile, loadDbConn, loadTableName, rowId, beginLine, endLine) loadDbConn.commit() loadDbConn.close()
def sampleHostInfo(self): """ Sample host info and save it into sqlite db. """ self.getDateInfo() hostDbFile = str(self.dbPath) + '/host.db' (result, hostDbConn) = sqlite3_common.connectDbFile(hostDbFile, mode='write') if result != 'passed': return print('>>> Sampling host info into ' + str(hostDbFile) + ' ...') hostTableList = sqlite3_common.getSqlTableList(hostDbFile, hostDbConn) bhostsDic = lsf_common.getBhostsInfo() hostList = bhostsDic['HOST_NAME'] hostSqlDic = {} keyList = ['sampleTime', 'NJOBS', 'RUN', 'SSUSP', 'USUSP'] for i in range(len(hostList)): host = hostList[i] hostSqlDic[host] = { 'keyString': '', 'valueString': '', } hostTableName = 'host_' + str(host) print(' Sampling for host "' + str(host) + '" ...') # Generate sql table. if hostTableName not in hostTableList: keyString = sqlite3_common.genSqlTableKeyString(keyList) hostSqlDic[host]['keyString'] = keyString # Insert sql table value. valueList = [ self.sampleTime, bhostsDic['NJOBS'][i], bhostsDic['RUN'][i], bhostsDic['SSUSP'][i], bhostsDic['USUSP'][i] ] valueString = sqlite3_common.genSqlTableValueString(valueList) hostSqlDic[host]['valueString'] = valueString for host in hostList: hostTableName = 'host_' + str(host) if hostSqlDic[host]['keyString'] != '': sqlite3_common.createSqlTable(hostDbFile, hostDbConn, hostTableName, hostSqlDic[host]['keyString'], commit=False) if hostSqlDic[host]['valueString'] != '': sqlite3_common.insertIntoSqlTable( hostDbFile, hostDbConn, hostTableName, hostSqlDic[host]['valueString'], commit=False) print(' Committing the update to sqlite3 ...') # Clean up host database, only keep 10000 items. for host in hostList: hostTableName = 'host_' + str(host) hostTableCount = int( sqlite3_common.getSqlTableCount(hostDbFile, hostDbConn, hostTableName)) if hostTableCount != 'N/A': if int(hostTableCount) > 10000: rowId = 'sampleTime' beginLine = 0 endLine = int(hostTableCount) - 10000 print(' Deleting database "' + str(hostDbFile) + '" table "' + str(hostTableName) + '" ' + str(beginLine) + '-' + str(endLine) + ' lines to only keep 10000 items.') sqlite3_common.deleteSqlTableRows(hostDbFile, hostDbConn, hostTableName, rowId, beginLine, endLine) hostDbConn.commit() hostDbConn.close()
def sampleQueueInfo(self): """ Sample queue info and save it into sqlite db. """ self.getDateInfo() queueDbFile = str(self.dbPath) + '/queue.db' (result, queueDbConn) = sqlite3_common.connectDbFile(queueDbFile, mode='write') if result != 'passed': return print('>>> Sampling queue info into ' + str(queueDbFile) + ' ...') queueTableList = sqlite3_common.getSqlTableList( queueDbFile, queueDbConn) bqueuesDic = lsf_common.getBqueuesInfo() queueList = bqueuesDic['QUEUE_NAME'] queueList.append('ALL') queueSqlDic = {} keyList = ['sampleTime', 'NJOBS', 'PEND', 'RUN', 'SUSP'] for i in range(len(queueList)): queue = queueList[i] queueSqlDic[queue] = { 'keyString': '', 'valueString': '', } queueTableName = 'queue_' + str(queue) print(' Sampling for queue "' + str(queue) + '" ...') # Generate sql table. if queueTableName not in queueTableList: keyString = sqlite3_common.genSqlTableKeyString(keyList) queueSqlDic[queue]['keyString'] = keyString # Insert sql table value. if queue == 'ALL': valueList = [ self.sampleTime, sum([int(i) for i in bqueuesDic['NJOBS']]), sum([int(i) for i in bqueuesDic['PEND']]), sum([int(i) for i in bqueuesDic['RUN']]), sum([int(i) for i in bqueuesDic['SUSP']]) ] else: valueList = [ self.sampleTime, bqueuesDic['NJOBS'][i], bqueuesDic['PEND'][i], bqueuesDic['RUN'][i], bqueuesDic['SUSP'][i] ] valueString = sqlite3_common.genSqlTableValueString(valueList) queueSqlDic[queue]['valueString'] = valueString for queue in queueList: queueTableName = 'queue_' + str(queue) if queueSqlDic[queue]['keyString'] != '': sqlite3_common.createSqlTable(queueDbFile, queueDbConn, queueTableName, queueSqlDic[queue]['keyString'], commit=False) if queueSqlDic[queue]['valueString'] != '': sqlite3_common.insertIntoSqlTable( queueDbFile, queueDbConn, queueTableName, queueSqlDic[queue]['valueString'], commit=False) print(' Committing the update to sqlite3 ...') # Clean up queue database, only keep 10000 items. for queue in queueList: queueTableName = 'queue_' + str(queue) queueTableCount = int( sqlite3_common.getSqlTableCount(queueDbFile, queueDbConn, queueTableName)) if queueTableCount != 'N/A': if int(queueTableCount) > 10000: rowId = 'sampleTime' beginLine = 0 endLine = int(queueTableCount) - 10000 print(' Deleting database "' + str(queueDbFile) + '" table "' + str(queueTableName) + '" ' + str(beginLine) + '-' + str(endLine) + ' lines to only keep 10000 items.') sqlite3_common.deleteSqlTableRows(queueDbFile, queueDbConn, queueTableName, rowId, beginLine, endLine) queueDbConn.commit() queueDbConn.close()
def drawJobMemCurve(self, job): """ Draw memory usage curve for specified job. """ jobRangeDic = common.getJobRangeDic([ job, ]) jobRangeList = list(jobRangeDic.keys()) jobRange = jobRangeList[0] self.jobDbFile = str( config.dbPath) + '/monitor/job/' + str(jobRange) + '.db' (self.jobDbFileConnectResult, self.jobDbConn) = sqlite3_common.connectDbFile(self.jobDbFile) if self.jobDbFileConnectResult == 'failed': common.printWarning( '*Warning*: Failed on connectiong job database file "' + str(self.jobDbFile) + '".') return runTimeList = [] memList = [] if self.jobFirstLoad: common.printWarning( '*Warning*: It is the first time loading job database, it may cost a little time ...' ) self.jobFirstLoad = False print('Getting history of job memory usage for job "' + str(job) + '".') tableName = 'job_' + str(job) dataDic = sqlite3_common.getSqlTableData(self.jobDbFile, self.jobDbConn, tableName, ['sampleTime', 'mem']) if not dataDic: common.printWarning('*Warning*: job information is missing for "' + str(job) + '".') return else: runTimeList = dataDic['sampleTime'] memList = dataDic['mem'] realRunTimeList = [] realMemList = [] firstRunTime = datetime.datetime.strptime(str( runTimeList[0]), '%Y%m%d_%H%M%S').timestamp() for i in range(len(runTimeList)): runTime = runTimeList[i] currentRunTime = datetime.datetime.strptime( str(runTime), '%Y%m%d_%H%M%S').timestamp() realRunTime = int((currentRunTime - firstRunTime) / 60) realRunTimeList.append(realRunTime) mem = memList[i] if mem == '': mem = '0' realMem = round(int(mem) / 1024, 1) realMemList.append(realMem) memCurveFig = str(config.tmpPath) + '/' + str( self.user) + '_' + str(job) + '.png' jobNum = common.stringToInt(job) print('Save job memory curve as "' + str(memCurveFig) + '".') common.drawPlot(realRunTimeList, realMemList, 'runTime (Minitu)', 'memory (G)', yUnit='G', title='job : ' + str(job), saveName=memCurveFig, figureNum=jobNum) if self.jobDbFileConnectResult == 'passed': self.jobDbConn.close()