def obtainSystemConstants(cls): # expect qhost output is in the form: # HOSTNAME ARCH NCPU NSOC NCOR NTHR NLOAD MEMTOT MEMUSE SWAPTO SWAPUS # ---------------------------------------------------------------------------------------------- # global - - - - - - - - - - # compute-1-1 lx-amd64 72 2 36 72 0.49 188.8G 79.6G 92.7G 19.2G # compute-1-10 lx-amd64 72 2 36 72 0.22 188.8G 51.1G 92.7G 2.8G lines = call_command(["qhost"]).strip().split('\n') items = lines[0].strip().split() num_columns = len(items) cpu_index = None mem_index = None for i in range(num_columns): if items[i] == 'NCPU': cpu_index = i elif items[i] == 'MEMTOT': mem_index = i if cpu_index is None or mem_index is None: raise RuntimeError('qhost command does not return NCPU or MEMTOT columns') maxCPU = 0 maxMEM = MemoryString("0") for line in lines[2:]: items = line.strip().split() if len(items) < num_columns: raise RuntimeError('qhost output has a varying number of columns') if items[cpu_index] != '-' and int(items[cpu_index]) > maxCPU: maxCPU = int(items[cpu_index]) if items[mem_index] != '-' and MemoryString(items[mem_index]) > maxMEM: maxMEM = MemoryString(items[mem_index]) if maxCPU is 0 or maxMEM is 0: raise RuntimeError('qhost returned null NCPU or MEMTOT info') return maxCPU, maxMEM
def getJobExitCode(self, lsfJobID): # the task is set as part of the job ID if using getBatchSystemID() if "NOT_SUBMITTED" in lsfJobID: logger.error("bjobs detected job failed to submit") return 1 job, task = (lsfJobID, None) if '.' in lsfJobID: job, task = lsfJobID.split('.', 1) self.parseMaxMem(job) # first try bjobs to find out job state if check_lsf_json_output_supported: args = [ "bjobs", "-json", "-o", "user exit_code stat exit_reason pend_reason", str(job) ] logger.debug("Checking job exit code for job via bjobs: " "{}".format(job)) stdout = call_command(args) bjobs_records = self.parseBjobs(stdout) if bjobs_records: process_output = bjobs_records[0] return self.parse_bjobs_record(process_output, job) return self.fallbackGetJobExitCode(job)
def getJobExitCode(self, torqueJobID): if self._version == "pro": args = ["qstat", "-x", "-f", str(torqueJobID).split('.')[0]] elif self._version == "oss": args = ["qstat", "-f", str(torqueJobID).split('.')[0]] stdout = call_command(args) for line in stdout.split('\n'): line = line.strip() # Case differences due to PBSPro vs OSS Torque qstat outputs if line.startswith("failed") or line.startswith( "FAILED") and int(line.split()[1]) == 1: return 1 if line.startswith("exit_status") or line.startswith( "Exit_status"): status = line.split(' = ')[1] logger.debug('Exit Status: ' + status) return int(status) if 'unknown job id' in line.lower(): # some clusters configure Torque to forget everything about just # finished jobs instantly, apparently for performance reasons logger.debug( 'Batch system no longer remembers about job {}'.format( torqueJobID)) # return assumed success; status files should reveal failure return 0 return None
def _getJobDetailsFromSacct(self, slurmJobID): # SLURM job exit codes are obtained by running sacct. args = ['sacct', '-n', # no header '-j', str(slurmJobID), # job '--format', 'State,ExitCode', # specify output columns '-P', # separate columns with pipes '-S', '1970-01-01'] # override start time limit stdout = call_command(args) for line in stdout.split('\n'): logger.debug("%s output %s", args[0], line) values = line.strip().split('|') if len(values) < 2: continue state, exitcode = values logger.debug("sacct job state is %s", state) # If Job is in a running state, return None to indicate we don't have an update status, signal = [int(n) for n in exitcode.split(':')] if signal > 0: # A non-zero signal may indicate e.g. an out-of-memory killed job status = 128 + signal logger.debug("sacct exit code is %s, returning status %d", exitcode, status) return state, status logger.debug("Did not find exit code for job in sacct output") return None, None
def submitJob(self, subLine): combinedEnv = self.boss.environment combinedEnv.update(os.environ) stdout = call_command(subLine, env=combinedEnv) line = stdout.split('\n')[0] result = int(line.strip().split()[1].strip('<>')) logger.debug("Got the job id: {}".format(result)) return result
def getRunningJobIDs(self): times = {} with self.runningJobsLock: currentjobs = dict((str(self.batchJobIDs[x][0].strip()), x) for x in self.runningJobs) logger.debug("getRunningJobIDs current jobs are: " + str(currentjobs)) # Skip running qstat if we don't have any current jobs if not currentjobs: return times # Only query for job IDs to avoid clogging the batch system on heavily loaded clusters # PBS plain qstat will return every running job on the system. jobids = sorted(list(currentjobs.keys())) if self._version == "pro": stdout = call_command(['qstat', '-x'] + jobids) elif self._version == "oss": stdout = call_command(['qstat'] + jobids) # qstat supports XML output which is more comprehensive, but PBSPro does not support it # so instead we stick with plain commandline qstat tabular outputs for currline in stdout.split('\n'): items = currline.strip().split() if items: jobid = items[0].strip() if jobid in currentjobs: logger.debug("getRunningJobIDs job status for is: " + items[4]) if jobid in currentjobs and items[4] == 'R': walltime = items[3] logger.debug( "getRunningJobIDs qstat reported walltime is: " + walltime) # normal qstat has a quirk with job time where it reports '0' # when initially running; this catches this case if walltime == '0': walltime = time.mktime( time.strptime(walltime, "%S")) else: walltime = time.mktime( time.strptime(walltime, "%H:%M:%S")) times[currentjobs[jobid]] = walltime logger.debug("Job times from qstat are: " + str(times)) return times
def submitJob(self, subLine): try: output = call_command(subLine) # sbatch prints a line like 'Submitted batch job 2954103' result = int(output.strip().split()[-1]) logger.debug("sbatch submitted job %d", result) return result except OSError as e: logger.error("sbatch command failed") raise e
def fallbackRunningJobIDs(self, currentjobs): times = {} stdout = call_command(["bjobs", "-o", "jobid stat start_time delimiter='|'"]) for curline in stdout.split('\n'): items = curline.strip().split('|') if items[0] in currentjobs and items[1] == 'RUN': jobstart = parse(items[2], default=datetime.now(tzlocal())) times[currentjobs[items[0]]] = datetime.now(tzlocal()) \ - jobstart return times
def _getJobDetailsFromSacct(self, job_id_list: list) -> dict: """ Get SLURM job exit codes for the jobs in `job_id_list` by running `sacct`. :param job_id_list: list of integer batch job IDs. :return: dict of job statuses, where key is the job-id, and value is a tuple containing the job's state and exit code. """ job_ids = ",".join(str(id) for id in job_id_list) args = [ 'sacct', '-n', # no header '-j', job_ids, # job '--format', 'JobIDRaw,State,ExitCode', # specify output columns '-P', # separate columns with pipes '-S', '1970-01-01' ] # override start time limit stdout = call_command(args) # Collect the job statuses in a dict; key is the job-id, value is a tuple containing # job state and exit status. Initialize dict before processing output of `sacct`. job_statuses = {} for job_id in job_id_list: job_statuses[job_id] = (None, None) for line in stdout.splitlines(): #logger.debug("%s output %s", args[0], line) values = line.strip().split('|') if len(values) < 3: continue job_id_raw, state, exitcode = values logger.debug("%s state of job %s is %s", args[0], job_id_raw, state) # JobIDRaw is in the form JobID[.JobStep]; we're not interested in job steps. job_id_parts = job_id_raw.split(".") if len(job_id_parts) > 1: continue job_id = int(job_id_parts[0]) status, signal = [int(n) for n in exitcode.split(':')] if signal > 0: # A non-zero signal may indicate e.g. an out-of-memory killed job status = 128 + signal logger.debug("%s exit code of job %d is %s, return status %d", args[0], job_id, exitcode, status) job_statuses[job_id] = state, status logger.debug("%s returning job statuses: %s", args[0], job_statuses) return job_statuses
def getRunningJobIDs(self): times = {} with self.runningJobsLock: currentjobs = dict((str(self.batchJobIDs[x][0]), x) for x in self.runningJobs) stdout = call_command(["qstat"]) for currline in stdout.split('\n'): items = currline.strip().split() if items: if items[0] in currentjobs and items[4] == 'r': jobstart = " ".join(items[5:7]) jobstart = time.mktime(time.strptime(jobstart, "%m/%d/%Y %H:%M:%S")) times[currentjobs[items[0]]] = time.time() - jobstart return times
def getWaitDuration(cls): # Extract the slurm batchsystem config for the appropriate value lines = call_command(['scontrol', 'show', 'config']).split('\n') time_value_list = [] for line in lines: values = line.split() if len(values) > 0 and (values[0] == "SchedulerTimeSlice" or values[0] == "AcctGatherNodeFreq"): time_name = values[values.index('=') + 1:][1] time_value = int(values[values.index('=') + 1:][0]) if time_name == 'min': time_value *= 60 # Add a 20% ceiling on the wait duration relative to the scheduler update duration time_value_list.append(math.ceil(time_value * 1.2)) return max(time_value_list)
def getRunningJobIDs(self): times = {} with self.runningJobsLock: currentjobs = dict( (str(self.batchJobIDs[x][0]), x) for x in self.runningJobs) stdout = call_command( ["bjobs", "-o", "jobid stat start_time delimiter='|'"]) for curline in stdout.split('\n'): items = curline.strip().split('|') if items[0] in currentjobs and items[1] == 'RUN': jobstart = parse(items[2], default=datetime.now(tzlocal())) times[currentjobs[items[0]]] = datetime.now(tzlocal()) \ - jobstart return times
def _pbsVersion(self): """ Determines PBS/Torque version via pbsnodes """ try: out = call_command(["pbsnodes", "--version"]) if "PBSPro" in out: logger.debug("PBS Pro proprietary Torque version detected") self._version = "pro" else: logger.debug("Torque OSS version detected") self._version = "oss" except CalledProcessErrorStderr as e: if e.returncode != 0: logger.error("Could not determine PBS/Torque version") return self._version
def submitJob(self, subLine): combinedEnv = self.boss.environment combinedEnv.update(os.environ) stdout = call_command(subLine, env=combinedEnv) # Example success: Job <39605914> is submitted to default queue <general>. # Example fail: Service class does not exist. Job not submitted. result_search = re.search('Job <(.*)> is submitted', stdout) if result_search: result = int(result_search.group(1)) logger.debug("Got the job id: {}".format(result)) else: logger.error("Could not submit job\nReason: {}".format(stdout)) temp_id = randint(10000000, 99999999) #Flag this job to be handled by getJobExitCode result = "NOT_SUBMITTED_{}".format(temp_id) return result
def _getJobDetailsFromScontrol(self, slurmJobID): args = ['scontrol', 'show', 'job', str(slurmJobID)] stdout = call_command(args) if isinstance(stdout, str): values = stdout.strip().split() elif isinstance(stdout, bytes): values = stdout.decode('utf-8').strip().split() # If job information is not available an error is issued: # slurm_load_jobs error: Invalid job id specified # There is no job information, so exit. if len(values) > 0 and values[0] == 'slurm_load_jobs': return (None, None) job = dict() for item in values: logger.debug(f"{args[0]} output {item}") # Output is in the form of many key=value pairs, multiple pairs on each line # and multiple lines in the output. Each pair is pulled out of each line and # added to a dictionary for v in values: bits = v.split('=') job[bits[0]] = bits[1] state = job['JobState'] try: exitcode = job['ExitCode'] if exitcode is not None: status, signal = [int(n) for n in exitcode.split(':')] if signal > 0: # A non-zero signal may indicate e.g. an out-of-memory killed job status = 128 + signal logger.debug("scontrol exit code is %s, returning status %d", exitcode, status) rc = status else: rc = None except KeyError: rc = None return state, rc
def _getJobDetailsFromScontrol(self, slurmJobID): args = ['scontrol', 'show', 'job', str(slurmJobID)] stdout = call_command(args) if isinstance(stdout, str): lines = stdout.splitlines() elif isinstance(stdout, bytes): lines = stdout.decode('utf-8').splitlines() job = dict() for line in lines: for item in line.split(): logger.debug(f"{args[0]} output {item}") # Output is in the form of many key=value pairs, multiple pairs on each line # and multiple lines in the output. Each pair is pulled out of each line and # added to a dictionary. # Note: In some cases, the value itself may contain white-space. So, if we find # a key without a value, we consider that key part of the previous value. bits = item.split('=', 1) if len(bits) == 1: job[key] += ' ' + bits[0] else: key = bits[0] job[key] = bits[1] state = job['JobState'] try: exitcode = job['ExitCode'] if exitcode is not None: status, signal = [int(n) for n in exitcode.split(':')] if signal > 0: # A non-zero signal may indicate e.g. an out-of-memory killed job status = 128 + signal logger.debug( "scontrol exit code is %s, returning status %d", exitcode, status) rc = status else: rc = None except KeyError: rc = None return state, rc
def getRunningJobIDs(self): times = {} with self.runningJobsLock: currentjobs = dict((str(self.batchJobIDs[x][0]), x) for x in self.runningJobs) if check_lsf_json_output_supported: stdout = call_command(["bjobs","-json","-o", "jobid stat start_time"]) bjobs_records = self.parseBjobs(stdout) if bjobs_records: for single_item in bjobs_records: if single_item['STAT'] == 'RUN' and single_item['JOBID'] in currentjobs: jobstart = parse(single_item['START_TIME'], default=datetime.now(tzlocal())) times[currentjobs[single_item['JOBID']]] = datetime.now(tzlocal()) \ - jobstart else: times = self.fallbackRunningJobIDs(currentjobs) return times
def getJobExitCodeBACCT(self,job): # if not found in bjobs, then try bacct (slower than bjobs) logger.debug("bjobs failed to detect job - trying bacct: " "{}".format(job)) args = ["bacct", "-l", str(job)] stdout = call_command(args) process_output = stdout.split('\n') for line in process_output: if line.find("Completed <done>") > -1 or line.find("<DONE>") > -1: logger.debug("Detected job completed for job: " "{}".format(job)) return 0 elif line.find("Completed <exit>") > -1 or line.find("<EXIT>") > -1: logger.error("Detected job failed for job: " "{}".format(job)) return 1 logger.debug("Can't determine exit code for job or job still " "running: {}".format(job)) return None
def obtainSystemConstants(cls): # sinfo -Ne --format '%m,%c' # sinfo arguments: # -N for node-oriented # -h for no header # -e for exact values (e.g. don't return 32+) # --format to get memory, cpu max_cpu = 0 max_mem = MemoryString('0') lines = call_command(['sinfo', '-Nhe', '--format', '%m %c']).split('\n') for line in lines: logger.debug("sinfo output %s", line) values = line.split() if len(values) < 2: continue mem, cpu = values max_cpu = max(max_cpu, int(cpu)) max_mem = max(max_mem, MemoryString(mem + 'M')) if max_cpu == 0 or max_mem.byteVal() == 0: raise RuntimeError('sinfo did not return memory or cpu info') return max_cpu, max_mem
def getRunningJobIDs(self): # Should return a dictionary of Job IDs and number of seconds times = {} with self.runningJobsLock: currentjobs = dict((str(self.batchJobIDs[x][0]), x) for x in self.runningJobs) # currentjobs is a dictionary that maps a slurm job id (string) to our own internal job id # squeue arguments: # -h for no header # --format to get jobid i, state %t and time days-hours:minutes:seconds lines = call_command(['squeue', '-h', '--format', '%i %t %M']).split('\n') for line in lines: values = line.split() if len(values) < 3: continue slurm_jobid, state, elapsed_time = values if slurm_jobid in currentjobs and state == 'R': seconds_running = self.parse_elapsed(elapsed_time) times[currentjobs[slurm_jobid]] = seconds_running return times
def fallbackGetJobExitCode(self, job): args = ["bjobs", "-l", str(job)] logger.debug( "Checking job exit code for job via bjobs (fallback): " "{}".format(job)) stdout = call_command(args) output = stdout.replace("\n ", "") process_output = output.split('\n') started = 0 for line in process_output: if "Done successfully" in line or "Status <DONE>" in line: logger.debug("bjobs detected job completed for job: " "{}".format(job)) return 0 elif "New job is waiting for scheduling" in line: logger.debug("bjobs detected job pending scheduling for " "job: {}".format(job)) return None elif "PENDING REASONS" in line or "Status <PEND>" in line: logger.debug("bjobs detected job pending for job: " "{}".format(job)) return None elif "Exited with exit code" in line: exit = int(line[line.find("Exited with exit code ") + 22:].split('.')[0]) logger.error("bjobs detected job exit code " "{} for job {}".format(exit, job)) return exit elif "Completed <exit>" in line: logger.error("bjobs detected job failed for job: " "{}".format(job)) return 1 elif line.find("Started on ") > -1 or "Status <RUN>" in line: started = 1 if started == 1: logger.debug("bjobs detected job started but not completed: " "{}".format(job)) return None return self.getJobExitCodeBACCT(job)
def obtainSystemConstants(cls): stdout = call_command(["lshosts"]) line = stdout.split('\n')[0] items = line.strip().split() num_columns = len(items) cpu_index = None mem_index = None for i in range(num_columns): if items[i] == 'ncpus': cpu_index = i elif items[i] == 'maxmem': mem_index = i if cpu_index is None or mem_index is None: raise RuntimeError( "lshosts command does not return ncpus or maxmem columns") maxCPU = 0 maxMEM = MemoryString("0") for line in stdout.split('\n')[1:]: items = line.strip().split() if items: if len(items) < num_columns: raise RuntimeError( "lshosts output has a varying number of columns") if items[cpu_index] != '-' and int( items[cpu_index]) > int(maxCPU): maxCPU = int(items[cpu_index]) if items[mem_index] != '-' and MemoryString( items[mem_index]) > maxMEM: maxMEM = MemoryString(items[mem_index]) if maxCPU == 0 or maxMEM == MemoryString("0"): raise RuntimeError("lshosts returns null ncpus or maxmem info") logger.debug("Got the maxMEM: {}".format(maxMEM)) logger.debug("Got the maxCPU: {}".format(maxCPU)) return maxCPU, maxMEM
def submitJob(self, subLine): stdout = call_command(subLine) output = stdout.split('\n')[0].strip() result = int(output) return result
def killJob(self, jobID): call_command(['qdel', self.getBatchSystemID(jobID)])
def getJobExitCode(self, lsfJobID): # the task is set as part of the job ID if using getBatchSystemID() job, task = (lsfJobID, None) if '.' in lsfJobID: job, task = lsfJobID.split('.', 1) # first try bjobs to find out job state args = ["bjobs", "-l", str(job)] logger.debug("Checking job exit code for job via bjobs: " "{}".format(job)) stdout = call_command(args) output = stdout.replace("\n ", "") process_output = output.split('\n') started = 0 for line in process_output: if "Done successfully" in line or "Status <DONE>" in line: logger.debug("bjobs detected job completed for job: " "{}".format(job)) return 0 elif "New job is waiting for scheduling" in line: logger.debug("bjobs detected job pending scheduling for " "job: {}".format(job)) return None elif "PENDING REASONS" in line or "Status <PEND>" in line: logger.debug("bjobs detected job pending for job: " "{}".format(job)) return None elif "Exited with exit code" in line: exit = int(line[line.find("Exited with exit code ") + 22:].split('.')[0]) logger.error("bjobs detected job exit code " "{} for job {}".format(exit, job)) return exit elif "Completed <exit>" in line: logger.error("bjobs detected job failed for job: " "{}".format(job)) return 1 elif line.find("Started on ") > -1 or "Status <RUN>" in line: started = 1 if started == 1: logger.debug("bjobs detected job started but not completed: " "{}".format(job)) return None # if not found in bjobs, then try bacct (slower than bjobs) logger.debug("bjobs failed to detect job - trying bacct: " "{}".format(job)) args = ["bacct", "-l", str(job)] stdout = call_command(args) process_output = stdout.split('\n') for line in process_output: if line.find("Completed <done>") > -1 or line.find( "<DONE>") > -1: logger.debug("Detected job completed for job: " "{}".format(job)) return 0 elif line.find("Completed <exit>") > -1 or line.find( "<EXIT>") > -1: logger.error("Detected job failed for job: " "{}".format(job)) return 1 logger.debug("Can't determine exit code for job or job still " "running: {}".format(job)) return None
def _getJobDetailsFromScontrol(self, job_id_list: list) -> dict: """ Get SLURM job exit codes for the jobs in `job_id_list` by running `scontrol`. :param job_id_list: list of integer batch job IDs. :return: dict of job statuses, where key is the job-id, and value is a tuple containing the job's state and exit code. """ args = ['scontrol', 'show', 'job'] # `scontrol` can only return information about a single job, # or all the jobs it knows about. if len(job_id_list) == 1: args.append(str(job_id_list[0])) stdout = call_command(args) # Job records are separated by a blank line. if isinstance(stdout, str): job_records = stdout.strip().split('\n\n') elif isinstance(stdout, bytes): job_records = stdout.decode('utf-8').strip().split('\n\n') # Collect the job statuses in a dict; key is the job-id, value is a tuple containing # job state and exit status. Initialize dict before processing output of `scontrol`. job_statuses = {} for job_id in job_id_list: job_statuses[job_id] = (None, None) # `scontrol` will report "No jobs in the system", if there are no jobs in the system, # and if no job-id was passed as argument to `scontrol`. if len(job_records ) > 0 and job_records[0] == "No jobs in the system": return job_statuses for record in job_records: job = {} for line in record.splitlines(): for item in line.split(): #logger.debug("%s output %s", args[0], item) # Output is in the form of many key=value pairs, multiple pairs on each line # and multiple lines in the output. Each pair is pulled out of each line and # added to a dictionary. # Note: In some cases, the value itself may contain white-space. So, if we find # a key without a value, we consider that key part of the previous value. bits = item.split('=', 1) if len(bits) == 1: job[key] += ' ' + bits[0] else: key = bits[0] job[key] = bits[1] # The first line of the record contains the JobId. Stop processing the remainder # of this record, if we're not interested in this job. job_id = int(job['JobId']) if job_id not in job_id_list: logger.debug("%s job %d is not in the list", args[0], job_id) break if job_id not in job_id_list: continue state = job['JobState'] logger.debug("%s state of job %s is %s", args[0], job_id, state) try: exitcode = job['ExitCode'] if exitcode is not None: status, signal = [int(n) for n in exitcode.split(':')] if signal > 0: # A non-zero signal may indicate e.g. an out-of-memory killed job status = 128 + signal logger.debug( "%s exit code of job %d is %s, return status %d", args[0], job_id, exitcode, status) rc = status else: rc = None except KeyError: rc = None job_statuses[job_id] = (state, rc) logger.debug("%s returning job statuses: %s", args[0], job_statuses) return job_statuses
def submitJob(self, subLine): return call_command(subLine)
def getJobExitCode(self, lsfJobID): # the task is set as part of the job ID if using getBatchSystemID() if "NOT_SUBMITTED" in lsfJobID: logger.error("bjobs detected job failed to submit") return 1 job, task = (lsfJobID, None) if '.' in lsfJobID: job, task = lsfJobID.split('.', 1) self.parseMaxMem(job) # first try bjobs to find out job state if check_lsf_json_output_supported: args = [ "bjobs", "-json", "-o", "user exit_code stat exit_reason pend_reason", str(job) ] logger.debug("Checking job exit code for job via bjobs: " "{}".format(job)) stdout = call_command(args) bjobs_records = self.parseBjobs(stdout) if bjobs_records: process_output = bjobs_records[0] if 'STAT' in process_output: process_status = process_output['STAT'] if process_status == 'DONE': logger.debug( "bjobs detected job completed for job: {}". format(job)) return 0 if process_status == 'PEND': pending_info = "" if 'PEND_REASON' in process_output: if process_output['PEND_REASON']: pending_info = "\n" + \ process_output['PEND_REASON'] logger.debug( "bjobs detected job pending with: {}\nfor job: {}" .format(pending_info, job)) return None if process_status == 'EXIT': exit_code = 1 exit_reason = "" if 'EXIT_CODE' in process_output: exit_code_str = process_output['EXIT_CODE'] if exit_code_str: exit_code = int(exit_code_str) if 'EXIT_REASON' in process_output: exit_reason = process_output['EXIT_REASON'] exit_info = "" if exit_code: exit_info = "\nexit code: {}".format(exit_code) if exit_reason: exit_info += "\nexit reason: {}".format( exit_reason) logger.error( "bjobs detected job failed with: {}\nfor job: {}" .format(exit_info, job)) if "TERM_MEMLIMIT" in exit_reason: return BatchJobExitReason.MEMLIMIT return exit_code if process_status == 'RUN': logger.debug( "bjobs detected job started but not completed for job: {}" .format(job)) return None if process_status in {'PSUSP', 'USUSP', 'SSUSP'}: logger.debug( "bjobs detected job suspended for job: {}". format(job)) return None return self.getJobExitCodeBACCT(job) else: return self.fallbackGetJobExitCode(job)
def test_call_command_err(self): with self.assertRaisesRegex( CalledProcessErrorStderr, "^Command '\\['cat', '/dev/Frankenheimer']' exit status 1: cat: /dev/Frankenheimer: No such file or directory\n$" ): call_command(["cat", "/dev/Frankenheimer"])
def test_call_command_ok(self): o = call_command(["echo", "Fred"]) self.assertEqual("Fred\n", o) self.assertTrue(isinstance(o, str), str(type(o)))