Exemple #1
0
 def getStatusForAllJobs(self):
     statusDict = {}
     proc = subprocess.Popen([ "qstat" ], stdin=open(os.devnull), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     outMsg = proc.communicate()[0]
     if proc.returncode > 0:
         # SGE unavailable for the moment, don't update the job status
         return
     
     for line in outMsg.splitlines():
         words = line.split()
         if len(words) >= 5 and words[0].isdigit():
             jobId = words[0]
             statusLetter = self.getStatusLetter(words, 4)
             if statusLetter == self.errorStatus:
                 self.errorReasons[jobId] = self.getErrorReason(jobId)
                 self.killJob(jobId)
                 continue
             
             status = self.allStatuses.get(statusLetter)
             if status:
                 statusDict[jobId] = status
             else:
                 log.info("WARNING: unexpected job status " + repr(statusLetter) + " received from SGE!")
                 statusDict[jobId] = statusLetter
     return statusDict
Exemple #2
0
 def findJobId(self, stdout):
     jobId = ""
     for line in stdout.splitlines():
         if line.find("has been submitted") != -1:
             jobId = self.getJobId(line)
         else:
             log.info("Unexpected output from qsub : " + line.strip())
     return jobId
Exemple #3
0
 def findJobId(self, stdout):
     jobId = ""
     for line in stdout.splitlines():
         if line.find("submitted to cluster") != -1:
             jobId = self.getJobId(line)
         elif line.find("Submitting job") != -1 or line.find("Logging submit event") != -1:
             continue
         else:
             log.info("Unexpected output from condor_submit : " + line.strip())
     return jobId 
Exemple #4
0
 def retryAccountInfo(self, jobId):
     sleepTime = 0.5
     acctError = ""
     for i in range(9): # would be 10 but we had one already
         # assume failure is because the job hasn't propagated yet, wait a bit
         sleep(sleepTime)
         if sleepTime < 5:
             sleepTime *= 2
         acctOutput, acctError = self.getAccountInfo(jobId)
         if acctOutput is not None:
             return acctOutput, acctError
         else:
             log.info("Waiting " + str(sleepTime) + " seconds before retrying account info for job " + jobId)
     return None, acctError