def isJobCompleted(self, jobID): count = 0 while True: (stdout, stderr, exitStatus) = shellCommand("qstat -f %s" % jobID) # qstat appears to have worked correctly, we can stop trying. if exitStatus == 0 or count >= self.qstat_max_tries: break count += 1 sleep(self.qstat_error_delay) if exitStatus != 0: raise Exception("qstat -f %s returned non-zero exit status %d times,\ panicking" % (jobID, count)) else: # try to fetch the exit status of the job command from the output of # qstat. jobState = None exitStatus = None for line in stdout.split('\n'): ws = line.split() if len(ws) == 3: if ws[0] == 'job_state' and ws[1] == '=': jobState = ws[2] elif ws[0] == 'exit_status' and ws[1] == '=' and \ ws[2].isdigit(): exitStatus = int(ws[2]) if jobState.upper() == 'C': # Job has completed. return (True, exitStatus) else: # Job has not completed. return (False, exitStatus)
def jobInQstat(self, jobID): (stdout, stderr, exitStatus) = shellCommand('qstat') if exitStatus != 0: raise Exception('qstat returned a non-zero exit status:' + stdout) if not stdout: return False line_token_gen = (line.split() for line in stdout.rstrip().split('\n')) jobids = {lts[0] for lts in line_token_gen} return jobID in jobids
def launch(self): file = NamedTemporaryFile() file.write(str(self)) file.flush() command = 'qsub ' + file.name (stdout, stderr, returnCode) = shellCommand(command) file.close() if returnCode == 0: return stdout else: raise(Exception('qsub command failed with exit status: ' + str(returnCode)))
def launch(self): file = NamedTemporaryFile() file.write(str(self)) file.flush() command = 'qsub ' + file.name (stdout, stderr, returnCode) = shellCommand(command) file.close() if returnCode == 0: return stdout else: raise (Exception('qsub command failed with exit status: ' + str(returnCode)))
def isJobCompleted(jobID): (stdout, stderr, returnCode) = shellCommand("qstat %s" % jobID) if returnCode != 0: return True else: try: lines = stdout.split('\n') statusLine = lines[2] statusVal = statusLine.split()[4] except: return "bad result from qstat" return statusVal is 'C'
def runStage(stage, logger, options, *args): command = getCommand(stage, options) commandStr = command(*args) logInfo(stage + ': ' + commandStr, logger) if getStageOptions(options, stage, 'distributed'): distributedCommand(stage, commandStr, options) else: (stdoutStr, stderrStr, returncode) = shellCommand(commandStr) if returncode != 0: msg = ("Failed to run '%s'\n%s%sNon-zero exit status %s" % (commandStr, stdoutStr, stderrStr, returncode)) logInfo(msg, logger)
def runStage(stage, logger, options, *args): command = getCommand(stage, options) commandStr = command(*args) logStr = stage + ": " + commandStr logInfo(logStr, logger) if getStageOptions(options, stage, "distributed"): exitStatus = distributedCommand(stage, commandStr, options) return exitStatus else: (stdoutStr, stderrStr, exitStatus) = shellCommand(commandStr) if exitStatus != 0: msg = "Failed to run '%s'\n%s%sNon-zero exit status %s" % (commandStr, stdoutStr, stderrStr, exitStatus) logInfo(msg, logger) return exitStatus
def isJobCompleted(jobID, manager='pbs'): if manager == 'slurm': jobcmd = "sleep 5; sacct -nbXj %s" % jobID # sleep to let deamon catch up else: jobcmd = "qstat -f %s" % jobID count = 0 while True: (stdout, stderr, exitStatus) = shellCommand(jobcmd) # qstat appears to have worked correctly, we can stop trying. if exitStatus == 0 or count >= QSTAT_MAX_TRIES: break count += 1 sleep(QSTAT_ERROR_DELAY) if exitStatus != 0: #return (True, exitStatus) raise Exception("'%s' returned non-zero exit status %d times, panicking" % (jobcmd, count)) else: # try to fetch the exit status of the job command from the output of qstat. jobState = 'R' #changed default state to 'R' exitStatus = None for line in stdout.split('\n'): ws = line.split() if len(ws) == 3: if manager == 'slurm': if ws[0] == str(jobID): # all states that cause job termination (ie Completed) if ws[1].strip() in ['CANCELLED', 'COMPLETED', 'FAILED', 'NODE_FAIL', 'PREEMPTED', 'SUSPENDED', 'TIMEOUT', 'CANCELLED+']: jobState = 'C' exitStatus = int(ws[2].split(':')[0]) # code:signal # check both exit codes exitStatus = int(ws[2].split(':')[1]) if \ exitStatus == 0 else exitStatus # if cancelled, set exitStatus as 1 if ws[1].strip() in ['CANCELLED', 'CANCELLED+']: exitStatus = 1 else: jobState = 'R' else: if ws[0] == 'job_state' and ws[1] == '=': jobState = ws[2] elif ws[0] == 'exit_status' and ws[1] == '=' and ws[2].isdigit(): exitStatus = int(ws[2]) if jobState.upper() == 'C': # Job has completed. return (True, exitStatus) else: # Job has not completed. return (False, exitStatus)
def runStage(stage, *args): command = getCommand(stage, pipeline_options) commandStr = command(*args) logStr = stage + ': ' + commandStr logInfo(logStr, pipeline_logger) if getStageOptions(pipeline_options, stage, 'distributed'): exitStatus = distributedCommand(stage, commandStr, pipeline_options) return exitStatus else: (stdoutStr, stderrStr, exitStatus) = shellCommand(commandStr) if exitStatus != 0: msg = ("Failed to run '%s'\n%s%sNon-zero exit status %s" % (commandStr, stdoutStr, stderrStr, exitStatus)) logInfo(msg, pipeline_logger) return exitStatus
def launch(self): file = NamedTemporaryFile() file.write(str(self)) file.flush() command = 'qsub ' + file.name (stdout, stderr, returnCode) = shellCommand(command) file.close() if returnCode == 0: # Get jobid from stdout (bio21 hpc sge) try: return JOBID_RE.match(stdout).group(1) except AttributeError: raise (Exception('qsub returned unexpected stdout:' + str(returnCode))) else: raise (Exception('qsub command failed with exit status: ' + str(returnCode)))
def chromInfo(refFile): (stdout, stderr, code) = shellCommand('infoseq -noheading ' + refFile) if (code != 0): print('error from infoseq: ') print(stderr) sys.exit(code) lines = stdout.splitlines() # extract the chromosome name and length from the output of infoseq chroms = [] for l in lines: words = l.split() # just skip bad lines if len(words) >= 6: chrName = words[2] chrLen = words[5] chroms.append((chrName, chrLen)) return chroms
def run_job_and_wait(self, stage, verbose=0): logFilename = os.path.join(self.logDir, stage + '.sh') file = NamedTemporaryFile(dir='') file.write(str(self)) file.flush() stderr_file = os.path.join(self.logDir, stage + '.%j.stderr') stdout_file = os.path.join(self.logDir, stage + '.%j.stdout') command = 'srun --error={stderr} --output={stdout} {memory} {literals} {queue} {jobname} {walltime} bash {file_name}'.format( stderr=stderr_file, stdout=stdout_file, memory=self.mem, literals=self.literals, queue=self.queue, jobname=self.name, walltime=self.walltime, file_name=file.name) with open(logFilename, 'w') as logFile: logFile.write(self.__str__()) logFile.write('\n# ' + command + '\n') if verbose > 0: print('stage = ' + stage) (stdout, stderr, returnCode) = shellCommand(command) file.close() return returnCode
def launch(self): cmd=""" #PBS -N MergeKGSAHGP #PBS -l nodes=1:ppn=3,walltime=100:00:00,mem=1GB #PBS -q WitsLong sleep 20 exit 0 """ file = NamedTemporaryFile() file.write(str(self)) file.flush() command = 'qsub ' + file.name (stdout, stderr, returnCode) = shellCommand(command) file.close() if returnCode == 0: return stdout else: raise(Exception('qsub command failed with exit status: ' + str(returnCode)))