def parseAllQueuesInfo(queues, pbsHost=None): handlers = dict() #Reading server-level attributes if pbsHost: cmd = shlex.split('qstat -B -f @%s' % pbsHost) else: cmd = shlex.split('qstat -B -f') slh = QueueInfoHandler() CommonUtils.parseStream(cmd, slh) for queue in queues: if pbsHost: cmd = shlex.split('qstat -Q -f %s\@%s' % (queue, pbsHost)) else: cmd = shlex.split('qstat -Q -f %s' % queue) logger.debug("Calling executable: " + repr(cmd)) handlers[queue] = QueueInfoHandler(slh) CommonUtils.parseStream(cmd, handlers[queue]) return handlers
def parse(resultContainer, pbsHost=None, filename=None): if filename: cmd = shlex.split('cat ' + filename) else: if pbsHost: cmd = shlex.split('qstat -f @%s' % pbsHost) else: cmd = shlex.split('qstat -f') container = PBSJobHandler(resultContainer) CommonUtils.parseStream(cmd, container)
def parseGPUInfo(cudaHost, filename=None): if filename: cmd = shlex.split('cat ' + filename) else: smi_cmd = '"nvidia-smi --query-compute-apps=gpu_uuid,pid --format=csv,noheader"' ssh_opts = '-o PasswordAuthentication=no' cmd = shlex.split('ssh %s %s %s' % (ssh_opts, cudaHost, smi_cmd)) logger.debug("Calling executable: " + repr(cmd)) container = GPUInfoHandler() CommonUtils.parseStream(cmd, container) return container
def parseQueueInfo(queue, pbsHost=None, filename=None): if filename: cmd = shlex.split('cat ' + filename) else: if pbsHost: cmd = shlex.split('qstat -Q -f %s\@%s' % (queue, pbsHost)) else: cmd = shlex.split('qstat -Q -f %s' % queue) logger.debug("Calling executable: " + repr(cmd)) container = QueueInfoHandler() CommonUtils.parseStream(cmd, container) return container
def parseLRMSVersion(pbsHost=None, filename=None): if filename: cmd = shlex.split('cat ' + filename) else: if pbsHost: cmd = shlex.split('qstat -B -f %s' % pbsHost) else: cmd = shlex.split('qstat -B -f') logger.debug("Calling executable: " + repr(cmd)) container = LRMSVersionHandler() CommonUtils.parseStream(cmd, container) return container.version
def parseJobLimit(pbsHost=None, keyfile=None, filename=None): if filename: cmd = shlex.split('cat ' + filename) else: tmps = 'diagnose -g' if pbsHost: tmps += ' --host=%s' % pbsHost if keyfile: tmps += ' --keyfile=%s' % keyfile cmd = shlex.split(tmps) container = DiagnoseHandler() CommonUtils.parseStream(cmd, container) return container
def parseCPUInfo(pbsHost=None, filename=None): if filename: cmd = shlex.split('cat ' + filename) else: if pbsHost: cmd = shlex.split('pbsnodes -a -s %s' % pbsHost) else: cmd = shlex.split('pbsnodes -a') logger.debug("Calling executable: " + repr(cmd)) container = CPUInfoHandler() CommonUtils.parseStream(cmd, container) return container
def run(self): currState = None try: line = self.stream.readline() while line: parsed = self.pRegex.match(line) if parsed: logger.debug("Detected item: %s" % line.strip()) if parsed.group(1) == 'state': currState = parsed.group(2).strip() elif parsed.group(1) == 'np': procNum = int(parsed.group(2).strip()) if not ('down' in currState or 'offline' in currState or 'unknown' in currState): self.totalCPU += procNum if currState == 'free': self.freeCPU += procNum elif parsed.group(1) == 'jobs': jobList = parsed.group(2).strip() if currState == 'free' and len(jobList) > 0: self.freeCPU -= jobList.count(',') + 1 line = self.stream.readline() except: logger.debug("Error parsing pbsnodes output", exc_info=True) self.errList.append(CommonUtils.errorMsgFromTrace())
def run(self): try: line = self.stream.readline() while line: tmptuple = line.strip().split(',') if len(tmptuple) == 2: gpu_uuid = tmptuple[0].strip() if not gpu_uuid in self.num_of_procs: self.num_of_procs[gpu_uuid] = 1 else: self.num_of_procs[gpu_uuid] += 1 line = self.stream.readline() except: logger.debug("Error parsing nvidia-smi output", exc_info=True) self.errList.append(CommonUtils.errorMsgFromTrace())
def run(self): currState = None currNode = None try: line = self.stream.readline() while line: parsed = self.pRegex.match(line) if parsed: logger.debug("Detected item: %s" % line.strip()) if parsed.group(1) == 'state': currState = parsed.group(2).strip() elif parsed.group(1) == 'np': procNum = int(parsed.group(2).strip()) if not ('down' in currState or 'offline' in currState or 'unknown' in currState): self.totalCPU += procNum if currState == 'free': self.freeCPU += procNum elif parsed.group(1) == 'jobs': jobList = parsed.group(2).strip() if currState == 'free' and len(jobList) > 0: self.freeCPU -= jobList.count(',') + 1 elif parsed.group(1) == 'gpu_status': gpuNodeInfo = dict() gpuNodeInfo['node_state'] = currState gpuNodeInfo['total_gpus'] = 0 gpuNodeInfo['free_gpus'] = 0 for gpuStats in self.gpuRegex.split( parsed.group(2).strip()): gpuStats = gpuStats.strip() if len(gpuStats) == 0: continue curr_gpu_use = 100 curr_mem_use = 100 for pStr in gpuStats.split(';'): res = self.pRegex.match(pStr) if res.group(1) == 'gpu_utilization': curr_gpu_use = int( re.match('\d+', res.group(2)).group(0)) elif res.group(1) == 'gpu_memory_utilization': curr_mem_use = int( re.match('\d+', res.group(2)).group(0)) if curr_gpu_use == 0 and curr_mem_use == 0: gpuNodeInfo['free_gpus'] += 1 gpuNodeInfo['total_gpus'] += 1 self.gpuTable[currNode] = gpuNodeInfo else: tmps = line.strip() if len(tmps): currNode = tmps line = self.stream.readline() except: logger.debug("Error parsing pbsnodes output", exc_info=True) self.errList.append(CommonUtils.errorMsgFromTrace())
def run(self): currState = None currNode = None try: line = self.stream.readline() while line: parsed = self.pRegex.match(line) if parsed: logger.debug("Detected item: %s" % line.strip()) if parsed.group(1) == 'state': currState = parsed.group(2).strip() elif parsed.group(1) == 'np': procNum = int(parsed.group(2).strip()) if not ('down' in currState or 'offline' in currState or 'unknown' in currState): self.totalCPU += procNum if currState == 'free': self.freeCPU += procNum elif parsed.group(1) == 'jobs': jobList = parsed.group(2).strip() if currState == 'free' and len(jobList) > 0: self.freeCPU -= jobList.count(',') + 1 elif parsed.group(1) == 'gpu_status': gpuNodeInfo = dict() gpuNodeInfo['node_state'] = currState gpuNodeInfo['total_gpus'] = 0 gpuNodeInfo['free_gpus'] = 0 for gpuStats in self.gpuRegex.split(parsed.group(2).strip()): gpuStats = gpuStats.strip() if len(gpuStats) == 0: continue curr_gpu_use = 100 curr_mem_use = 100 for pStr in gpuStats.split(';'): res = self.pRegex.match(pStr) if res.group(1) == 'gpu_utilization': curr_gpu_use = int(re.match('\d+', res.group(2)).group(0)) elif res.group(1) == 'gpu_memory_utilization': curr_mem_use = int(re.match('\d+', res.group(2)).group(0)) if curr_gpu_use == 0 and curr_mem_use ==0: gpuNodeInfo['free_gpus'] += 1 gpuNodeInfo['total_gpus'] += 1 self.gpuTable[currNode] = gpuNodeInfo else: tmps = line.strip() if len(tmps): currNode = tmps line = self.stream.readline() except: logger.debug("Error parsing pbsnodes output", exc_info=True) self.errList.append(CommonUtils.errorMsgFromTrace())