Beispiel #1
0
def connectDbFile(dbFile, mode='read'):
    result = 'passed'
    conn = ''

    if mode == 'write':
        journalDbFile = str(dbFile) + '-journal'
        if os.path.exists(journalDbFile) and (mode == 'write'):
            common.printWarning(
                '*Warning*: database file "' + str(dbFile) +
                '" is on another connection, will not connect it.')
            result = 'locked'
            return (result, conn)
    elif mode == 'read':
        if not os.path.exists(dbFile):
            common.printError('*Error*: "' + str(dbFile) +
                              '" No such database file.')
            result = 'failed'
            return (result, conn)

    try:
        conn = sqlite3.connect(dbFile)
    except Exception as error:
        common.printError('*Error*: Failed on connecting database file "' +
                          str(dbFile) + '": ' + str(error))
        result = 'failed'

    return (result, conn)
Beispiel #2
0
def getQueueHostInfo():
    """
    Get hosts on (specified) queues.
    """
    queueHostDic = {}
    queueCompile = re.compile('^QUEUE:\s*(\S+)\s*$')
    hostsCompile = re.compile('^HOSTS:\s*(.*?)\s*$')
    queue = ''

    lines = os.popen('bqueues -l').readlines()

    for line in lines:
        line = line.strip()

        if queueCompile.match(line):
            myMatch = queueCompile.match(line)
            queue = myMatch.group(1)
            queueHostDic[queue] = []

        if hostsCompile.match(line):
            myMatch = hostsCompile.match(line)
            hostsString = myMatch.group(1)

            if re.search('all', hostsString):
                common.printWarning(
                    '*Warning* (getQueueHostInfo) : queue "' + str(queue) +
                    '" is not well configured, all of the hosts are on the same queue.'
                )
                queueHostDic[queue] = getHostList()
            else:
                queueHostDic.setdefault(queue, [])
                hostsList = hostsString.split()

                for hosts in hostsList:
                    if re.match('.+/', hosts):
                        hostGroupName = re.sub('/$', '', hosts)
                        hostList = getHostGroupMembers(hostGroupName)

                        if len(hostList) > 0:
                            queueHostDic[queue].extend(hostList)
                    elif re.match('^(.+)\+\d+$', hosts):
                        myMatch = re.match('^(.+)\+\d+$', hosts)
                        hostGroupName = myMatch.group(1)
                        hostList = getHostGroupMembers(hostGroupName)

                        if len(hostList) == 0:
                            queueHostDic[queue].append(hosts)
                        else:
                            queueHostDic[queue].extend(hostList)
                    else:
                        queueHostDic[queue].append(hosts)

    return (queueHostDic)
Beispiel #3
0
    def checkOldSqlTable(self, dbFile, orig_conn, tableName, keyList=['SAMPLE_TIME',]):
        dataDic = sqlite3_common.getSqlTableData(dbFile, orig_conn, tableName, keyList)

        if dataDic:
            if len(dataDic['SAMPLE_TIME']) > 0:
                lastSampleTime = dataDic['SAMPLE_TIME'][-1]
                lastSeconds = int(time.mktime(datetime.datetime.strptime(str(lastSampleTime), "%Y-%m-%d %H:%M:%S").timetuple()))
                if currentSeconds-lastSeconds > 3600:
                    common.printWarning('*Warning*: table "' + str(tableName) + '" already existed even one hour ago, will drop it.')
                    return(1)

        return(0)
    def __init__(self):
        self.user = getpass.getuser()
        self.queueDbFile = str(config.dbPath) + '/monitor/queue.db'

        (self.queueDbFileConnectResult,
         self.queueDbConn) = sqlite3_common.connectDbFile(self.queueDbFile)
        if self.queueDbFileConnectResult == 'failed':
            common.printWarning(
                '*Warning*: Failed on connectiong queue database file "' +
                str(self.queueDbFile) + '".')

        self.jobFirstLoad = True
        self.queueFirstLoad = True
Beispiel #5
0
def getCommandDict(command):
    """
    Collect (common) LSF command info into a dict.
    It only works with the Title-Item type informations.
    """
    myDic = collections.OrderedDict()
    keyList = []
    lines = os.popen(command).readlines()

    for i in range(len(lines)):
        line = lines[i].strip()

        # Some speciall preprocess.
        if re.search('lsload', command):
            line = re.sub('\*', ' ', line)

        if i == 0:
            keyList = line.split()

            for key in keyList:
                myDic[key] = []
        else:
            commandInfo = line.split()

            if len(commandInfo) < len(keyList):
                common.printWarning(
                    '*Warning* (getCommandDict) : For command "' +
                    str(command) +
                    '", below info line is incomplate/unexpected.')
                common.printWarning('           ' + str(line))

            for j in range(len(keyList)):
                key = keyList[j]

                if j < len(commandInfo):
                    value = commandInfo[j]
                else:
                    value = ''

                myDic[key].append(value)

    return (myDic)
Beispiel #6
0
    def sampleJobInfo(self):
        """
        Sample job info, especially the memory usage info.
        """
        self.getDateInfo()

        print('>>> Sampling job info ...')

        command = 'bjobs -u all -r -UF'
        bjobsDic = lsf_common.getBjobsUfInfo(command)
        jobList = list(bjobsDic.keys())
        jobRangeDic = common.getJobRangeDic(jobList)
        jobSqlDic = {}

        keyList = ['sampleTime', 'mem']

        for jobRange in jobRangeDic.keys():
            jobDbFile = str(self.dbPath) + '/job/' + str(jobRange) + '.db'
            (result, jobDbConn) = sqlite3_common.connectDbFile(jobDbFile,
                                                               mode='read')

            if result == 'passed':
                jobTableList = sqlite3_common.getSqlTableList(
                    jobDbFile, jobDbConn)
            else:
                jobTableList = []

            for job in jobRangeDic[jobRange]:
                jobTableName = 'job_' + str(job)

                print('    Sampling for job "' + str(job) + '" ...')

                jobSqlDic[job] = {
                    'drop': False,
                    'keyString': '',
                    'valueString': '',
                }

                # If job table (with old data) has been on the jobDbFile, drop it.
                if jobTableName in jobTableList:
                    dataDic = sqlite3_common.getSqlTableData(
                        jobDbFile, jobDbConn, jobTableName, ['sampleTime'])

                    if dataDic:
                        if len(dataDic['sampleTime']) > 0:
                            lastSampleTime = dataDic['sampleTime'][-1]
                            lastSeconds = int(
                                time.mktime(
                                    datetime.datetime.strptime(
                                        str(lastSampleTime),
                                        "%Y%m%d_%H%M%S").timetuple()))

                            if self.currentSeconds - lastSeconds > 3600:
                                common.printWarning(
                                    '    *Warning*: table "' +
                                    str(jobTableName) +
                                    '" already existed even one hour ago, will drop it.'
                                )
                                jobSqlDic[job]['drop'] = True
                                jobTableList.remove(jobTableName)

                # If job table is not on the jobDbFile, create it.
                if jobTableName not in jobTableList:
                    keyString = sqlite3_common.genSqlTableKeyString(keyList)
                    jobSqlDic[job]['keyString'] = keyString

                # Insert sql table value.
                valueList = [self.sampleTime, bjobsDic[job]['mem']]
                valueString = sqlite3_common.genSqlTableValueString(valueList)
                jobSqlDic[job]['valueString'] = valueString

            if result == 'passed':
                jobDbConn.commit()
                jobDbConn.close()

        for jobRange in jobRangeDic.keys():
            jobDbFile = str(self.dbPath) + '/job/' + str(jobRange) + '.db'
            (result, jobDbConn) = sqlite3_common.connectDbFile(jobDbFile,
                                                               mode='write')

            if result != 'passed':
                return

            for job in jobRangeDic[jobRange]:
                jobTableName = 'job_' + str(job)

                if jobSqlDic[job]['drop']:
                    sqlite3_common.dropSqlTable(jobDbFile,
                                                jobDbConn,
                                                jobTableName,
                                                commit=False)

                if jobSqlDic[job]['keyString'] != '':
                    sqlite3_common.createSqlTable(jobDbFile,
                                                  jobDbConn,
                                                  jobTableName,
                                                  jobSqlDic[job]['keyString'],
                                                  commit=False)

                if jobSqlDic[job]['valueString'] != '':
                    sqlite3_common.insertIntoSqlTable(
                        jobDbFile,
                        jobDbConn,
                        jobTableName,
                        jobSqlDic[job]['valueString'],
                        commit=False)

            jobDbConn.commit()
            jobDbConn.close()

        print('    Committing the update to sqlite3 ...')
        print('    Done (' + str(len(jobList)) + ' jobs).')
    def drawJobMemCurve(self, job):
        """
        Draw memory usage curve for specified job.
        """
        jobRangeDic = common.getJobRangeDic([
            job,
        ])
        jobRangeList = list(jobRangeDic.keys())
        jobRange = jobRangeList[0]
        self.jobDbFile = str(
            config.dbPath) + '/monitor/job/' + str(jobRange) + '.db'

        (self.jobDbFileConnectResult,
         self.jobDbConn) = sqlite3_common.connectDbFile(self.jobDbFile)
        if self.jobDbFileConnectResult == 'failed':
            common.printWarning(
                '*Warning*: Failed on connectiong job database file "' +
                str(self.jobDbFile) + '".')
            return

        runTimeList = []
        memList = []

        if self.jobFirstLoad:
            common.printWarning(
                '*Warning*: It is the first time loading job database, it may cost a little time ...'
            )
            self.jobFirstLoad = False

        print('Getting history of job memory usage for job "' + str(job) +
              '".')
        tableName = 'job_' + str(job)
        dataDic = sqlite3_common.getSqlTableData(self.jobDbFile,
                                                 self.jobDbConn, tableName,
                                                 ['sampleTime', 'mem'])

        if not dataDic:
            common.printWarning('*Warning*: job information is missing for "' +
                                str(job) + '".')
            return
        else:
            runTimeList = dataDic['sampleTime']
            memList = dataDic['mem']
            realRunTimeList = []
            realMemList = []
            firstRunTime = datetime.datetime.strptime(str(
                runTimeList[0]), '%Y%m%d_%H%M%S').timestamp()

            for i in range(len(runTimeList)):
                runTime = runTimeList[i]
                currentRunTime = datetime.datetime.strptime(
                    str(runTime), '%Y%m%d_%H%M%S').timestamp()
                realRunTime = int((currentRunTime - firstRunTime) / 60)
                realRunTimeList.append(realRunTime)
                mem = memList[i]
                if mem == '':
                    mem = '0'
                realMem = round(int(mem) / 1024, 1)
                realMemList.append(realMem)

            memCurveFig = str(config.tmpPath) + '/' + str(
                self.user) + '_' + str(job) + '.png'
            jobNum = common.stringToInt(job)

            print('Save job memory curve as "' + str(memCurveFig) + '".')
            common.drawPlot(realRunTimeList,
                            realMemList,
                            'runTime (Minitu)',
                            'memory (G)',
                            yUnit='G',
                            title='job : ' + str(job),
                            saveName=memCurveFig,
                            figureNum=jobNum)

        if self.jobDbFileConnectResult == 'passed':
            self.jobDbConn.close()
    def drawQueueJobNumCurve(self, queue):
        """
        Draw (PEND/RUN) job number curve for specified queue.
        """
        if self.queueDbFileConnectResult == 'failed':
            common.printWarning(
                '*Warning*: Failed on connectiong queue database file "' +
                str(self.queueDbFile) + '".')
            return

        dateList = []
        pendList = []
        runList = []
        tmpPendList = []
        tmpRunList = []

        if self.queueFirstLoad:
            common.printWarning(
                '*Warning*: It is the first time loading queue database, it may cost a little time ...'
            )
            self.queueFirstLoad = False

        print('Getting history of queue PEND/RUN job number for queue "' +
              str(queue) + '".')
        tableName = 'queue_' + str(queue)
        dataDic = sqlite3_common.getSqlTableData(self.queueDbFile,
                                                 self.queueDbConn, tableName,
                                                 ['sampleTime', 'PEND', 'RUN'])

        if not dataDic:
            common.printWarning(
                '*Warning*: queue information is missing for "' + str(queue) +
                '".')
            return
        else:
            origSampleTimeList = dataDic['sampleTime']
            origPendList = dataDic['PEND']
            origRunList = dataDic['RUN']

            for i in range(len(origSampleTimeList)):
                sampleTime = origSampleTimeList[i]
                date = re.sub('_.*', '', sampleTime)
                pendNum = origPendList[i]
                runNum = origRunList[i]

                if (i != 0) and ((i == len(origSampleTimeList) - 1) or
                                 (date not in dateList)):
                    pendAvg = int(sum(tmpPendList) / len(tmpPendList))
                    pendList.append(pendAvg)
                    runAvg = int(sum(tmpRunList) / len(tmpRunList))
                    runList.append(runAvg)

                if date not in dateList:
                    dateList.append(date)
                    tmpPendList = []
                    tmpRunList = []

                tmpPendList.append(int(pendNum))
                tmpRunList.append(int(runNum))

            # Cut dateList/pendList/runList, only save 15 days result.a
            if len(dateList) > 15:
                dateList = dateList[-15:]
                pendList = pendList[-15:]
                runList = runList[-15:]

            if len(dateList) == 0:
                common.printWarning(
                    '*Warning*: PEND/RUN job number information is missing for queue "'
                    + str(queue) + '".')
                return
            else:
                queueJobNumCurveFig = str(config.tmpPath) + '/' + str(
                    self.user) + '_' + str(queue) + '_jobNum.png'
                queueNum = common.stringToInt(queue)

                print('Save queue PEND/RUN job numeber curve as "' +
                      str(queueJobNumCurveFig) + '".')
                common.drawPlots(dateList, [pendList, runList],
                                 'DATE',
                                 'NUM', ['PEND', 'RUN'],
                                 xIsString=True,
                                 title='queue : ' + str(queue),
                                 saveName=queueJobNumCurveFig,
                                 figureNum=queueNum)
Beispiel #9
0
    def genHostsTabTable(self):
        print('* Updating hosts information, please wait a moment ...')

        self.hostsTabTable.setShowGrid(True)
        self.hostsTabTable.setSortingEnabled(True)
        self.hostsTabTable.setColumnCount(10)
        self.hostsTabTable.setHorizontalHeaderLabels([
            'Host', 'Status', 'Queue', 'Njobs', 'Ncpus', 'Ut (%)', 'Mem (G)',
            'Maxmem (G)', 'swp (G)', 'maxswp (G)'
        ])

        queue = self.hostsTabQueueCombo.currentText().strip()

        bhostsDic = openlava_common.getBhostsInfo()
        lshostsDic = openlava_common.getLshostsInfo()
        lsloadDic = openlava_common.getLsloadInfo()
        hostQueueDic = openlava_common.getHostQueueInfo()

        # Get expected host list
        self.queueHostList = []

        if queue == 'ALL':
            self.queueHostList = self.hostList
        else:
            for host in self.hostList:
                if host in hostQueueDic:
                    if queue in hostQueueDic[host]:
                        self.queueHostList.append(host)

        self.hostsTabTable.setRowCount(len(self.queueHostList))

        for i in range(len(self.queueHostList)):
            host = self.queueHostList[i]

            j = 0
            self.hostsTabTable.setItem(i, j, QTableWidgetItem(host))

            j = j + 1
            index = bhostsDic['HOST_NAME'].index(host)
            status = bhostsDic['STATUS'][index]
            item = QTableWidgetItem(status)
            if str(status) == 'closed':
                item.setFont(QFont('song', 10, QFont.Bold))
                item.setForeground(QBrush(Qt.red))
            self.hostsTabTable.setItem(i, j, item)

            j = j + 1
            if host in hostQueueDic.keys():
                queues = ' '.join(hostQueueDic[host])
                item = QTableWidgetItem(queues)
                self.hostsTabTable.setItem(i, j, item)

            j = j + 1
            index = bhostsDic['HOST_NAME'].index(host)
            njobs = bhostsDic['NJOBS'][index]
            if not re.match('^[0-9]+$', njobs):
                common.printWarning('*Warning*: host(' + str(host) +
                                    ') NJOBS info "' + str(njobs) +
                                    '": invalid value, reset it to "0".')
                njobs = 0
            item = QTableWidgetItem()
            item.setData(Qt.DisplayRole, int(njobs))
            self.hostsTabTable.setItem(i, j, item)

            j = j + 1
            index = lshostsDic['HOST_NAME'].index(host)
            ncpus = lshostsDic['ncpus'][index]
            if not re.match('^[0-9]+$', ncpus):
                common.printWarning('*Warning*: host(' + str(host) +
                                    ') ncpus info "' + str(ncpus) +
                                    '": invalid value, reset it to "0".')
                ncpus = 0
            item = QTableWidgetItem()
            item.setData(Qt.DisplayRole, int(ncpus))
            self.hostsTabTable.setItem(i, j, item)

            j = j + 1
            index = lsloadDic['HOST_NAME'].index(host)
            ut = lsloadDic['ut'][index]
            ut = re.sub('%', '', ut)
            if not re.match('^[0-9]+$', ut):
                common.printWarning('*Warning*: host(' + str(host) +
                                    ') ut info "' + str(ut) +
                                    '": invalid value, reset it to "0".')
                ut = 0
            item = QTableWidgetItem()
            item.setData(Qt.DisplayRole, int(ut))
            self.hostsTabTable.setItem(i, j, item)

            j = j + 1
            index = lsloadDic['HOST_NAME'].index(host)
            mem = lsloadDic['mem'][index]
            if re.search('M', mem):
                mem = re.sub('M', '', mem)
                mem = int(mem) / 1024
            elif re.search('G', mem):
                mem = re.sub('G', '', mem)
            else:
                common.printWarning('*Warning*: host(' + str(host) +
                                    ') mem info "' + str(mem) +
                                    '": unrecognized unit, reset it to "0".')
                mem = 0
            item = QTableWidgetItem()
            item.setData(Qt.DisplayRole, int(mem))
            self.hostsTabTable.setItem(i, j, item)

            j = j + 1
            index = lshostsDic['HOST_NAME'].index(host)
            maxmem = lshostsDic['maxmem'][index]
            if re.search('M', maxmem):
                maxmem = re.sub('M', '', maxmem)
                maxmem = int(maxmem) / 1024
            elif re.search('G', maxmem):
                maxmem = re.sub('G', '', maxmem)
            else:
                common.printWarning('*Warning*: host(' + str(host) +
                                    ') maxmem info "' + str(maxmem) +
                                    '": unrecognized unit, reset it to "0".')
                maxmem = 0
            item = QTableWidgetItem()
            item.setData(Qt.DisplayRole, int(maxmem))
            self.hostsTabTable.setItem(i, j, item)

            j = j + 1
            index = lsloadDic['HOST_NAME'].index(host)
            swp = lsloadDic['swp'][index]
            if re.search('M', swp):
                swp = re.sub('M', '', swp)
                swp = int(swp) / 1024
            elif re.search('G', swp):
                swp = re.sub('G', '', swp)
            else:
                common.printWarning('*Warning*: host(' + str(host) +
                                    ') swp info "' + str(swp) +
                                    '": unrecognized unit, reset it to "0".')
                swp = 0
            item = QTableWidgetItem()
            item.setData(Qt.DisplayRole, int(swp))
            self.hostsTabTable.setItem(i, j, item)

            j = j + 1
            index = lshostsDic['HOST_NAME'].index(host)
            maxswp = lshostsDic['maxswp'][index]
            if re.search('M', maxswp):
                maxswp = re.sub('M', '', maxswp)
                maxswp = int(maxswp) / 1024
            elif re.search('G', maxswp):
                maxswp = re.sub('G', '', maxswp)
            else:
                common.printWarning('*Warning*: host(' + str(host) +
                                    ') maxswp info "' + str(maxswp) +
                                    '": unrecognized unit, reset it to "0".')
                maxswp = 0
            item = QTableWidgetItem()
            item.setData(Qt.DisplayRole, int(maxswp))
            self.hostsTabTable.setItem(i, j, item)
Beispiel #10
0
 def guiWarning(self, warningMessage):
     """
     Show the specified warning message on both of command line and GUI window.
     """
     common.printWarning(warningMessage)
     QMessageBox.warning(self, 'openlavaMonitor Warning', warningMessage)