def read_jobs(self): self.jobs = {} execute = excute_local if not self._ssh else execute_remote handle = execute('%s/squeue -a -h -t all -o \'JobId=%%i TimeUsed=%%M Partition=%%P JobState=%%T ' 'ReqNodes=%%D ReqCPUs=%%C TimeLimit=%%l Name=%%j NodeList=%%N\'' % (self._path)) if handle.returncode: raise ArcError('squeue error: %s' % '\n'.join(handle.stderr), 'SLURMInfo') for line in handle.stdout: try: job = dict(item.split('=', 1) for item in LRMSInfo.split(line.strip())) if job.has_key('TimeUsed'): job['TimeUsed'] = SLURMInfo.as_period(job['TimeUsed']) if job.has_key('TimeLimit'): job['TimeLimit'] = SLURMInfo.as_period(job['TimeLimit']) self.jobs[job['JobId']] = job except ValueError: # Couldn't split: blank line, header etc .. continue
def read_nodes(self): self.nodes = {} execute = excute_local if not self._ssh else execute_remote handle = execute('%s/scontrol show node --oneliner' % (self._path)) if handle.returncode: raise ArcError('scontrol error: %s' % '\n'.join(handle.stderr), 'SLURMInfo') for line in handle.stdout: try: _ = dict(item.split('=', 1) for item in LRMSInfo.split(line.strip())) record = dict((k, _[k]) for k in ('NodeName', 'CPUTot', 'RealMemory', 'State', 'Sockets', 'OS', 'Arch')) # Node status can be followed by different symbols # according to it being unresponsive, powersaving, etc. # Get rid of them record['State'] = record['State'].rstrip('*~#+') self.nodes[record['NodeName']] = record except KeyError: # Node is probably down if attributes are missing, just skip it continue except ValueError: # Couldn't split: blank line, header etc .. continue
def read_partitions(self): self.partitions = {} execute = excute_local if not self._ssh else execute_remote handle = execute('%s/sinfo -a -h -o \'PartitionName=%%P TotalCPUs=%%C ' 'TotalNodes=%%D MaxTime=%%l\'' % (self._path)) if handle.returncode: raise ArcError('sinfo error: %s' % '\n'.join(handle.stderr), 'SLURMInfo') for line in handle.stdout: try: part = dict(item.split('=', 1) for item in LRMSInfo.split(line.strip())) part['PartitionName'] = part['PartitionName'].rstrip('*') part['MaxTime'] = SLURMInfo.as_period(part['MaxTime']) # Format of '%C' is: Number of CPUs by state in the format 'allocated/idle/other/total' part['AllocatedCPUs'], part['IdleCPUs'], part['OtherCPUs'], part['TotalCPUs'] = \ map(SLURMInfo.parse_number, part['TotalCPUs'].split('/')) part['TotalNodes'] = SLURMInfo.parse_number(part['TotalNodes']) self.partitions[part['PartitionName']] = part; except ValueError: # Couldn't split: blank line, header etc .. continue
def get_lrms_options_schema(): return LRMSInfo.get_lrms_options_schema(slurm_bin_path = '*')
def get_lrms_options_schema(): return LRMSInfo.get_lrms_options_schema()