def test_wait(self): """pbs.qwait should wait for a pbs job to finish running.""" if os.path.exists(self.temp_output_filename): os.remove(self.temp_output_filename) pbs_id = pbs.qsub(self.pbs_script_filename) pbs.qwait(pbs_id) os.system('ls > /dev/null') # This triggers the panfs file system to make the file appear. assert os.path.exists(self.temp_output_filename), "pbs.qwait returned, but the expected output does not yet exist."
def submit_files_until_done(filenames, wait_for_all=False, delay_check=0.5, sleep_seconds=60 * 5, quiet=False, fail_when_max=False, retry_on_failure=True): global max_submissions submitted_ids = [] num_to_submit = len(filenames) while filenames: num_submitted = len(pbs.qstat(user=os.environ['USER'])) if (num_submitted < max_submissions): if os.path.exists(filenames[0]): try: job_id = pbs.qsub(filenames[0], verbose=not quiet) if delay_check: time.sleep(delay_check) pbs.qstat(job_id=job_id) # If this doesn't throw, then it was submitted successfully if not quiet: print 'Submitted %s as "%s" at %s (%s/%s left to submit)' % (filenames[0], job_id, time.asctime(), len(filenames[1:]), num_to_submit) filenames = filenames[1:] submitted_ids.append(job_id) num_submitted = num_submitted + 1 if not quiet: print 'I think submitted %d/%d' % (num_submitted,max_submissions) sys.stderr.flush() sys.stdout.flush() except pbs.PBSUtilError: traceback.print_exc() if not quiet: print 'Failed to submit %s at %s (%s left to submit)' % (filenames[0], time.asctime(), len(filenames[1:])) sys.stderr.flush() sys.stdout.flush() if not retry_on_failure: raise QSubFailure() time.sleep(max(int(round(sleep_seconds/2)), 1)) # Maybe we saturated the queue. else: if not quiet: print 'ERROR: Cannot submit %s because it does not exist.' % filenames[0] sys.stderr.flush() sys.stdout.flush() filenames = filenames[1:] else: if fail_when_max: raise ReachedMax() sys.stdout.write('Queue is currently full.') sys.stdout.flush() time.sleep(sleep_seconds) if wait_for_all: for job_id in submitted_ids: pbs.qwait(job_id) return submitted_ids
def wait_for_jobs(job_ids): for job_id in job_ids: pbs.qwait(job_id=job_id)
def main(argv): parser = optparse.OptionParser() parser.add_option('-o', dest='output_file_name', default=None, help="Destination file for the output. ") parser.add_option('-i', action='store', dest='input_file_name', help="Write input file contents to command's stdin.") parser.add_option('-v', action='store_true', dest='verbose', default=False, help="Display the script to be submitted.") parser.add_option('-q', action='store_true', dest='quiet', default=False, help="Do not include information about processor nodes in the output, or the state of the job.") parser.add_option('-w', action='store', dest='script_name', default=None, help="Save the script as this file name.") parser.add_option('-x', action='store_true', dest='dont_submit', default=False, help="Don't submit the job. This is really only useful when combined with '-w'.") parser.add_option('-W', action='store_true', dest='wait', default=False, help="Wait for the job to finish, and display its output.") parser.add_option('-n', action='store', dest='job_name', default=None, help="PBS jobname") parser.add_option('-p', action='store', dest='numcpu', default=None, help="Number of cpus to use.") parser.add_option('--num-nodes', action='store', dest='numnodes', default=None, help="Number of nodes to use.") expect_another = False for idx, arg in enumerate(argv): if expect_another: expect_another = False continue if arg[0] == '-': if arg[-1] in ['o', 'i', 'w', 'n', 'p']: expect_another = True continue break if arg[0] == '-': # no command was given idx += 1 (options, args) = parser.parse_args(argv[:idx]) command_parts = argv[idx:] command = ' '.join(command_parts) with tempfile_util.Session(local=True) as session: # Pick a file to output to if options.output_file_name is not None: output_file_name = options.output_file_name wait = False elif options.script_name: output_file_name = options.script_name + '.out' session.add_name(output_file_name + '.err') else: output_file_name = session.temp_file_name('.submit_command.out') session.add_name(output_file_name + '.err') if options.script_name: print 'Outputting to ', output_file_name wait = True # Pick a script name if options.script_name is not None: script_file_name = options.script_name else: script_file_name = session.temp_file_name('.submit_command.pbs') job_id = None if options.job_name is not None: job_name = options.job_name elif output_file_name is not None: job_name = output_file_name else: job_name = filter(lambda x: x.isalpha(), command_parts[0]) # Create the script run_command_here(script_file_name, command, input_file_name=options.input_file_name, output_file_name=output_file_name, job_name=job_name, verbose=not options.quiet, numcpu=options.numcpu, numnodes=options.numnodes) if options.verbose: with open(script_file_name) as script_file: print script_file.read() # Maybe submit it if options.script_name is not None: print 'Wrote %s.' % script_file_name if options.dont_submit: return else: try: if not options.quiet: print 'Submitting...' job_id, = nice_submit.submit_files_until_done([script_file_name], wait_for_all=False, quiet=options.quiet) # If they directed the output, just exit if not options.wait: return if not options.quiet: print 'Waiting on job ', job_id, ' to finish running.' pbs.qwait(job_id) except KeyboardInterrupt: print '^C' if job_id: print 'Killing ', job_id pbs.qdel(job_id) if os.path.exists(output_file_name): with open(output_file_name) as output_file: print output_file.read() else: print "Output file doesn't seem to exist! Try:\ncat %s\n" % output_file_name if os.path.exists(output_file_name + '.err'): with open(output_file_name + '.err') as err_file: print err_file.read()