def processFinishedJob(self, jobBatchSystemID, resultStatus, wallTime=None): """ Function reads a processed jobWrapper file and updates it state. """ if wallTime is not None and self.clusterScaler is not None: issuedJob = self.jobBatchSystemIDToIssuedJob[jobBatchSystemID] self.clusterScaler.addCompletedJob(issuedJob, wallTime) jobStoreID = self.removeJobID(jobBatchSystemID) if self.jobStore.exists(jobStoreID): logger.debug("Job %s continues to exist (i.e. has more to do)" % jobStoreID) jobWrapper = self.jobStore.load(jobStoreID) if jobWrapper.logJobStoreFileID is not None: logger.warn("The jobWrapper seems to have left a log file, indicating failure: %s", jobStoreID) with jobWrapper.getLogFileHandle( self.jobStore ) as logFileStream: logStream( logFileStream, jobStoreID, logger.warn ) if resultStatus != 0: # If the batch system returned a non-zero exit code then the worker # is assumed not to have captured the failure of the job, so we # reduce the retry count here. if jobWrapper.logJobStoreFileID is None: logger.warn("No log file is present, despite jobWrapper failing: %s", jobStoreID) jobWrapper.setupJobAfterFailure(self.config) self.jobStore.update(jobWrapper) elif jobStoreID in self.toilState.hasFailedSuccessors: # If the job has completed okay, we can remove it from the list of jobs with failed successors self.toilState.hasFailedSuccessors.remove(jobStoreID) self.toilState.updatedJobs.add((jobWrapper, resultStatus)) #Now we know the #jobWrapper is done we can add it to the list of updated jobWrapper files logger.debug("Added jobWrapper: %s to active jobs", jobStoreID) else: #The jobWrapper is done if resultStatus != 0: logger.warn("Despite the batch system claiming failure the " "jobWrapper %s seems to have finished and been removed", jobStoreID) self._updatePredecessorStatus(jobStoreID)
def processFinishedJob(self, jobBatchSystemID, resultStatus): """ Function reads a processed jobWrapper file and updates it state. """ jobStoreID = self.removeJob(jobBatchSystemID).jobStoreID if self.jobStore.exists(jobStoreID): jobWrapper = self.jobStore.load(jobStoreID) if jobWrapper.logJobStoreFileID is not None: logger.warn( "The jobWrapper seems to have left a log file, indicating failure: %s", jobStoreID) with jobWrapper.getLogFileHandle( self.jobStore) as logFileStream: logStream(logFileStream, jobStoreID, logger.warn) if resultStatus != 0: if jobWrapper.logJobStoreFileID is None: logger.warn( "No log file is present, despite jobWrapper failing: %s", jobStoreID) jobWrapper.setupJobAfterFailure(self.config) self.toilState.updatedJobs.add( (jobWrapper, resultStatus)) #Now we know the #jobWrapper is done we can add it to the list of updated jobWrapper files logger.debug("Added jobWrapper: %s to active jobs", jobStoreID) else: #The jobWrapper is done if resultStatus != 0: logger.warn( "Despite the batch system claiming failure the " "jobWrapper %s seems to have finished and been removed", jobStoreID) self._updatePredecessorStatus(jobStoreID)
def processFinishedJob(self, jobBatchSystemID, resultStatus, wallTime=None): """ Function reads a processed jobWrapper file and updates it state. """ def processRemovedJob(jobStoreID): if resultStatus != 0: logger.warn("Despite the batch system claiming failure the " "jobWrapper %s seems to have finished and been removed", jobStoreID) self._updatePredecessorStatus(jobStoreID) if wallTime is not None and self.clusterScaler is not None: issuedJob = self.jobBatchSystemIDToIssuedJob[jobBatchSystemID] self.clusterScaler.addCompletedJob(issuedJob, wallTime) jobStoreID = self.removeJobID(jobBatchSystemID) if self.jobStore.exists(jobStoreID): logger.debug("Job %s continues to exist (i.e. has more to do)" % jobStoreID) try: jobWrapper = self.jobStore.load(jobStoreID) except NoSuchJobException: # Avoid importing AWSJobStore as the corresponding extra might be missing if self.jobStore.__class__.__name__ == 'AWSJobStore': # We have a ghost job - the job has been deleted but a stale read from # SDB gave us a false positive when we checked for its existence. # Process the job from here as any other job removed from the job store. # This is a temporary work around until https://github.com/BD2KGenomics/toil/issues/1091 # is completed logger.warn('Got a stale read from SDB for job %s', jobStoreID) processRemovedJob(jobStoreID) return else: raise if jobWrapper.logJobStoreFileID is not None: logger.warn("The jobWrapper seems to have left a log file, indicating failure: %s", jobStoreID) with jobWrapper.getLogFileHandle( self.jobStore ) as logFileStream: logStream( logFileStream, jobStoreID, logger.warn ) if resultStatus != 0: # If the batch system returned a non-zero exit code then the worker # is assumed not to have captured the failure of the job, so we # reduce the retry count here. if jobWrapper.logJobStoreFileID is None: logger.warn("No log file is present, despite jobWrapper failing: %s", jobStoreID) jobWrapper.setupJobAfterFailure(self.config) self.jobStore.update(jobWrapper) elif jobStoreID in self.toilState.hasFailedSuccessors: # If the job has completed okay, we can remove it from the list of jobs with failed successors self.toilState.hasFailedSuccessors.remove(jobStoreID) self.toilState.updatedJobs.add((jobWrapper, resultStatus)) #Now we know the #jobWrapper is done we can add it to the list of updated jobWrapper files logger.debug("Added jobWrapper: %s to active jobs", jobStoreID) else: #The jobWrapper is done processRemovedJob(jobStoreID)
def processFinishedJob(self, jobBatchSystemID, resultStatus): """ Function reads a processed jobWrapper file and updates it state. """ jobStoreID = self.removeJob(jobBatchSystemID).jobStoreID if self.jobStore.exists(jobStoreID): jobWrapper = self.jobStore.load(jobStoreID) if jobWrapper.logJobStoreFileID is not None: logger.warn("The jobWrapper seems to have left a log file, indicating failure: %s", jobStoreID) with jobWrapper.getLogFileHandle( self.jobStore ) as logFileStream: logStream( logFileStream, jobStoreID, logger.warn ) if resultStatus != 0: if jobWrapper.logJobStoreFileID is None: logger.warn("No log file is present, despite jobWrapper failing: %s", jobStoreID) jobWrapper.setupJobAfterFailure(self.config) self.toilState.updatedJobs.add((jobWrapper, resultStatus)) #Now we know the #jobWrapper is done we can add it to the list of updated jobWrapper files logger.debug("Added jobWrapper: %s to active jobs", jobStoreID) else: #The jobWrapper is done if resultStatus != 0: logger.warn("Despite the batch system claiming failure the " "jobWrapper %s seems to have finished and been removed", jobStoreID) self._updatePredecessorStatus(jobStoreID)
def main(): """Reports the state of the toil. """ ########################################## # Construct the arguments. ########################################## parser = getBasicOptionParser() parser.add_argument( "jobStore", type=str, help=( "Store in which to place job management files \ and the global accessed temporary files" "(If this is a file path this needs to be globally accessible " "by all machines running jobs).\n" "If the store already exists and restart is false an" " ExistingJobStoreException exception will be thrown." ), ) parser.add_argument( "--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of \ jobs that failed. default=%(default)s", default=False, ) parser.add_argument( "--failIfNotComplete", dest="failIfNotComplete", action="store_true", help="Return exit value of 1 if toil jobs not all completed. default=%(default)s", default=False, ) parser.add_argument("--version", action="version", version=version) options = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) ########################################## # Do some checks. ########################################## logger.info("Checking if we have files for toil") assert options.jobStore != None ########################################## # Survey the status of the job and report. ########################################## jobStore = loadJobStore(options.jobStore) try: rootJob = Job._loadRootJob(jobStore) except JobException: print "The root job of the jobStore is not present, the toil workflow has probably completed okay" sys.exit(0) toilState = ToilState(jobStore, rootJob) failedJobs = [ job for job in toilState.updatedJobs | set(toilState.successorCounts.keys()) if job.remainingRetryCount == 0 ] print "There are %i active jobs, %i parent jobs with children, and \ %i totally failed jobs currently in toil workflow: %s" % ( len(toilState.updatedJobs), len(toilState.successorCounts), len(failedJobs), options.jobStore, ) if options.verbose: # Verbose currently means outputting the files that have failed. for job in failedJobs: if job.logJobStoreFileID is not None: with job.getLogFileHandle(jobStore) as logFileHandle: logStream(logFileHandle, job.jobStoreID, logger.warn) else: print "Log file for job %s is not present" % job.jobStoreID if len(failedJobs) == 0: print "There are no failed jobs to report" if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and options.failIfNotComplete: sys.exit(1)
def main(): """Reports the state of the toil. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser() parser.add_argument("jobStore", type=str, help=("Store in which to place job management files \ and the global accessed temporary files" "(If this is a file path this needs to be globally accessible " "by all machines running jobs).\n" "If the store already exists and restart is false an" " JobStoreCreationException exception will be thrown.")) parser.add_argument("--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of \ jobs that failed. default=%(default)s", default=False) parser.add_argument("--failIfNotComplete", dest="failIfNotComplete", action="store_true", help="Return exit value of 1 if toil jobs not all completed. default=%(default)s", default=False) parser.add_argument("--version", action='version', version=version) options = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for Toil") assert options.jobStore is not None ########################################## #Survey the status of the job and report. ########################################## jobStore = Toil.loadOrCreateJobStore(options.jobStore) try: rootJob = jobStore.loadRootJob() except JobException: print('The root job of the job store is absent, the workflow completed successfully.', file=sys.stderr) sys.exit(0) toilState = ToilState(jobStore, rootJob ) # The first element of the toilState.updatedJobs tuple is the jobWrapper we want to inspect totalJobs = set(toilState.successorCounts.keys()) | \ {jobTuple[0] for jobTuple in toilState.updatedJobs} failedJobs = [ job for job in totalJobs if job.remainingRetryCount == 0 ] print('There are %i active jobs, %i parent jobs with children, and %i totally failed jobs ' 'currently in %s.' % (len(toilState.updatedJobs), len(toilState.successorCounts), len(failedJobs), options.jobStore), file=sys.stderr) if options.verbose: #Verbose currently means outputting the files that have failed. for job in failedJobs: if job.logJobStoreFileID is not None: with job.getLogFileHandle(jobStore) as logFileHandle: logStream(logFileHandle, job.jobStoreID, logger.warn) else: print('Log file for job %s is absent.' % job.jobStoreID, file=sys.stderr) if len(failedJobs) == 0: print('There are no failed jobs to report.', file=sys.stderr) if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \ options.failIfNotComplete: sys.exit(1)
def main(): """Reports the state of the toil. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser() parser.add_argument("jobStore", type=str, help=("Store in which to place job management files \ and the global accessed temporary files" "(If this is a file path this needs to be globally accessible " "by all machines running jobs).\n" "If the store already exists and restart is false an" " ExistingJobStoreException exception will be thrown.")) parser.add_argument("--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of \ jobs that failed. default=%(default)s", default=False) parser.add_argument("--failIfNotComplete", dest="failIfNotComplete", action="store_true", help="Return exit value of 1 if toil jobs not all completed. default=%(default)s", default=False) parser.add_argument("--version", action='version', version=version) options = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for toil") assert options.jobStore != None ########################################## #Survey the status of the job and report. ########################################## jobStore = loadJobStore(options.jobStore) try: rootJob = Job._loadRootJob(jobStore) except JobException: print "The root job of the jobStore is not present, the toil workflow has probably completed okay" sys.exit(0) toilState = ToilState(jobStore, rootJob ) # The first element of the toilState.updatedJobs tuple is the jobWrapper we want to inspect totalJobs = set(toilState.successorCounts.keys()) | \ {jobTuple[0] for jobTuple in toilState.updatedJobs} failedJobs = [ job for job in totalJobs if job.remainingRetryCount == 0 ] print "There are %i active jobs, %i parent jobs with children, and \ %i totally failed jobs currently in toil workflow: %s" % \ (len(toilState.updatedJobs), len(toilState.successorCounts), len(failedJobs), options.jobStore) if options.verbose: #Verbose currently means outputting the files that have failed. for job in failedJobs: if job.logJobStoreFileID is not None: with job.getLogFileHandle(jobStore) as logFileHandle: logStream(logFileHandle, job.jobStoreID, logger.warn) else: print "Log file for job %s is not present" % job.jobStoreID if len(failedJobs) == 0: print "There are no failed jobs to report" if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \ options.failIfNotComplete: sys.exit(1)
def main(): """Reports the state of the toil. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser() parser.add_argument("jobStore", type=str, help="The location of a job store that holds the information about the " "workflow whose status is to be reported on." + jobStoreLocatorHelp) parser.add_argument("--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of \ jobs that failed. default=%(default)s", default=False) parser.add_argument("--failIfNotComplete", dest="failIfNotComplete", action="store_true", help="Return exit value of 1 if toil jobs not all completed. default=%(default)s", default=False) parser.add_argument("--version", action='version', version=version) options = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for Toil") assert options.jobStore is not None config = Config() config.setOptions(options) ########################################## #Survey the status of the job and report. ########################################## jobStore = Toil.resumeJobStore(config.jobStore) try: rootJob = jobStore.loadRootJob() except JobException: print('The root job of the job store is absent, the workflow completed successfully.', file=sys.stderr) sys.exit(0) toilState = ToilState(jobStore, rootJob ) # The first element of the toilState.updatedJobs tuple is the jobGraph we want to inspect totalJobs = set(toilState.successorCounts.keys()) | \ {jobTuple[0] for jobTuple in toilState.updatedJobs} failedJobs = [ job for job in totalJobs if job.remainingRetryCount == 0 ] print('There are %i active jobs, %i parent jobs with children, and %i totally failed jobs ' 'currently in %s.' % (len(toilState.updatedJobs), len(toilState.successorCounts), len(failedJobs), config.jobStore), file=sys.stderr) if options.verbose: #Verbose currently means outputting the files that have failed. for job in failedJobs: if job.logJobStoreFileID is not None: with job.getLogFileHandle(jobStore) as logFileHandle: logStream(logFileHandle, job.jobStoreID, logger.warn) else: print('Log file for job %s is absent.' % job.jobStoreID, file=sys.stderr) if len(failedJobs) == 0: print('There are no failed jobs to report.', file=sys.stderr) if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \ options.failIfNotComplete: sys.exit(1)
def main(): """Reports the state of the toil. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser() parser.add_argument( "jobStore", type=str, help="The location of a job store that holds the information about the " "workflow whose status is to be reported on." + jobStoreLocatorHelp) parser.add_argument( "--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of \ jobs that failed. default=%(default)s", default=False) parser.add_argument( "--failIfNotComplete", dest="failIfNotComplete", action="store_true", help= "Return exit value of 1 if toil jobs not all completed. default=%(default)s", default=False) parser.add_argument("--version", action='version', version=version) options = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for Toil") assert options.jobStore is not None ########################################## #Survey the status of the job and report. ########################################## jobStore = Toil.resumeJobStore(options.jobStore) try: rootJob = jobStore.loadRootJob() except JobException: print( 'The root job of the job store is absent, the workflow completed successfully.', file=sys.stderr) sys.exit(0) toilState = ToilState(jobStore, rootJob) # The first element of the toilState.updatedJobs tuple is the jobWrapper we want to inspect totalJobs = set(toilState.successorCounts.keys()) | \ {jobTuple[0] for jobTuple in toilState.updatedJobs} failedJobs = [job for job in totalJobs if job.remainingRetryCount == 0] print( 'There are %i active jobs, %i parent jobs with children, and %i totally failed jobs ' 'currently in %s.' % (len(toilState.updatedJobs), len( toilState.successorCounts), len(failedJobs), options.jobStore), file=sys.stderr) if options.verbose: #Verbose currently means outputting the files that have failed. for job in failedJobs: if job.logJobStoreFileID is not None: with job.getLogFileHandle(jobStore) as logFileHandle: logStream(logFileHandle, job.jobStoreID, logger.warn) else: print('Log file for job %s is absent.' % job.jobStoreID, file=sys.stderr) if len(failedJobs) == 0: print('There are no failed jobs to report.', file=sys.stderr) if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \ options.failIfNotComplete: sys.exit(1)
def main(): """Reports the state of the toil. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser( "usage: %prog [--jobStore] JOB_TREE_DIR [options]", "%prog 0.1") parser.add_option( "--jobStore", dest="jobStore", help= "Job store path. Can also be specified as the single argument to the script.\ default=%default", default=os.path.abspath("./toil")) parser.add_option( "--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of \ jobs that failed. default=%default", default=False) parser.add_option( "--failIfNotComplete", dest="failIfNotComplete", action="store_true", help= "Return exit value of 1 if toil jobs not all completed. default=%default", default=False) options, args = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) assert len(args) <= 1 #Only toil may be specified as argument if len(args) == 1: #Allow toil directory as arg options.jobStore = args[0] ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for toil") assert options.jobStore != None ########################################## #Survey the status of the job and report. ########################################## jobStore = loadJobStore(options.jobStore) try: rootJob = Job._loadRootJob(jobStore) except JobException: print "The root job of the jobStore is not present, the toil workflow has probably completed okay" sys.exit(0) toilState = ToilState(jobStore, rootJob) failedJobs = [ job for job in toilState.updatedJobs | \ set(toilState.successorCounts.keys()) \ if job.remainingRetryCount == 0 ] print "There are %i active jobs, %i parent jobs with children, and \ %i totally failed jobs currently in toil workflow: %s" % \ (len(toilState.updatedJobs), len(toilState.successorCounts), len(failedJobs), options.jobStore) if options.verbose: #Verbose currently means outputting the files that have failed. for job in failedJobs: if job.logJobStoreFileID is not None: with job.getLogFileHandle(jobStore) as logFileHandle: logStream(logFileHandle, job.jobStoreID, logger.warn) else: print "Log file for job %s is not present" % job.jobStoreID if len(failedJobs) == 0: print "There are no failed jobs to report" if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \ options.failIfNotComplete: sys.exit(1)
def main(): """Reports the state of the toil. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [--jobStore] JOB_TREE_DIR [options]", "%prog 0.1") parser.add_option("--jobStore", dest="jobStore", help="Job store path. Can also be specified as the single argument to the script.\ default=%default", default=os.path.abspath("./toil")) parser.add_option("--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of \ jobs that failed. default=%default", default=False) parser.add_option("--failIfNotComplete", dest="failIfNotComplete", action="store_true", help="Return exit value of 1 if toil jobs not all completed. default=%default", default=False) options, args = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) assert len(args) <= 1 #Only toil may be specified as argument if len(args) == 1: #Allow toil directory as arg options.jobStore = args[0] ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for toil") assert options.jobStore != None ########################################## #Survey the status of the job and report. ########################################## jobStore = loadJobStore(options.jobStore) try: rootJob = Job._loadRootJob(jobStore) except JobException: print "The root job of the jobStore is not present, the toil workflow has probably completed okay" sys.exit(0) toilState = ToilState(jobStore, rootJob ) failedJobs = [ job for job in toilState.updatedJobs | \ set(toilState.successorCounts.keys()) \ if job.remainingRetryCount == 0 ] print "There are %i active jobs, %i parent jobs with children, and \ %i totally failed jobs currently in toil workflow: %s" % \ (len(toilState.updatedJobs), len(toilState.successorCounts), len(failedJobs), options.jobStore) if options.verbose: #Verbose currently means outputting the files that have failed. for job in failedJobs: if job.logJobStoreFileID is not None: with job.getLogFileHandle(jobStore) as logFileHandle: logStream(logFileHandle, job.jobStoreID, logger.warn) else: print "Log file for job %s is not present" % job.jobStoreID if len(failedJobs) == 0: print "There are no failed jobs to report" if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \ options.failIfNotComplete: sys.exit(1)
def main(): """Reports the state of the toil. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser( "usage: %prog [--toil] JOB_TREE_DIR [options]", "%prog 0.1") parser.add_option( "--toil", dest="toil", help= "Batchjob store path. Can also be specified as the single argument to the script.\ default=%default", default='./toil') parser.add_option( "--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of \ jobs that failed. default=%default", default=False) parser.add_option( "--failIfNotComplete", dest="failIfNotComplete", action="store_true", help= "Return exit value of 1 if toil jobs not all completed. default=%default", default=False) options, args = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) assert len(args) <= 1 #Only toil may be specified as argument if len(args) == 1: #Allow toil directory as arg options.toil = args[0] ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for toil") assert options.toil != None ########################################## #Survey the status of the batchjob and report. ########################################## jobStore = loadJobStore(options.toil) config = jobStore.config toilState = jobStore.loadToilState( ) #This initialises the object toil.toilState used to track the active toil failedJobs = [ batchjob for batchjob in toilState.updatedJobs | \ set(toilState.childCounts.keys()) \ if batchjob.remainingRetryCount == 0 ] print "There are %i active jobs, %i parent jobs with children, \ %i totally failed jobs and %i empty jobs (i.e. finished but not cleaned up) \ currently in toil: %s" % \ (len(toilState.updatedJobs), len(toilState.childCounts), len(failedJobs), len(toilState.shellJobs), options.toil) if options.verbose: #Verbose currently means outputting the files that have failed. for batchjob in failedJobs: if batchjob.logJobStoreFileID is not None: with batchjob.getLogFileHandle(jobStore) as logFileHandle: logStream(logFileHandle, batchjob.jobStoreID, logger.warn) else: print "Log file for batchjob %s is not present" % batchjob.jobStoreID if len(failedJobs) == 0: print "There are no failed jobs to report" if (len(toilState.updatedJobs) + len(toilState.childCounts)) != 0 and \ options.failIfNotComplete: sys.exit(1)
def main(): """Reports the state of the toil. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [--toil] JOB_TREE_DIR [options]", "%prog 0.1") parser.add_option("--toil", dest="toil", help="Batchjob store path. Can also be specified as the single argument to the script.\ default=%default", default='./toil') parser.add_option("--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of \ jobs that failed. default=%default", default=False) parser.add_option("--failIfNotComplete", dest="failIfNotComplete", action="store_true", help="Return exit value of 1 if toil jobs not all completed. default=%default", default=False) options, args = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) assert len(args) <= 1 #Only toil may be specified as argument if len(args) == 1: #Allow toil directory as arg options.toil = args[0] ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for toil") assert options.toil != None ########################################## #Survey the status of the batchjob and report. ########################################## jobStore = loadJobStore(options.toil) config = jobStore.config toilState = jobStore.loadToilState() #This initialises the object toil.toilState used to track the active toil failedJobs = [ batchjob for batchjob in toilState.updatedJobs | \ set(toilState.childCounts.keys()) \ if batchjob.remainingRetryCount == 0 ] print "There are %i active jobs, %i parent jobs with children, \ %i totally failed jobs and %i empty jobs (i.e. finished but not cleaned up) \ currently in toil: %s" % \ (len(toilState.updatedJobs), len(toilState.childCounts), len(failedJobs), len(toilState.shellJobs), options.toil) if options.verbose: #Verbose currently means outputting the files that have failed. for batchjob in failedJobs: if batchjob.logJobStoreFileID is not None: with batchjob.getLogFileHandle(jobStore) as logFileHandle: logStream(logFileHandle, batchjob.jobStoreID, logger.warn) else: print "Log file for batchjob %s is not present" % batchjob.jobStoreID if len(failedJobs) == 0: print "There are no failed jobs to report" if (len(toilState.updatedJobs) + len(toilState.childCounts)) != 0 and \ options.failIfNotComplete: sys.exit(1)