def submit_files_until_done(filenames, wait_for_all=False, delay_check=0.5, sleep_seconds=60 * 5, quiet=False, fail_when_max=False, retry_on_failure=True): global max_submissions submitted_ids = [] num_to_submit = len(filenames) while filenames: num_submitted = len(pbs.qstat(user=os.environ['USER'])) if (num_submitted < max_submissions): if os.path.exists(filenames[0]): try: job_id = pbs.qsub(filenames[0], verbose=not quiet) if delay_check: time.sleep(delay_check) pbs.qstat(job_id=job_id) # If this doesn't throw, then it was submitted successfully if not quiet: print 'Submitted %s as "%s" at %s (%s/%s left to submit)' % (filenames[0], job_id, time.asctime(), len(filenames[1:]), num_to_submit) filenames = filenames[1:] submitted_ids.append(job_id) num_submitted = num_submitted + 1 if not quiet: print 'I think submitted %d/%d' % (num_submitted,max_submissions) sys.stderr.flush() sys.stdout.flush() except pbs.PBSUtilError: traceback.print_exc() if not quiet: print 'Failed to submit %s at %s (%s left to submit)' % (filenames[0], time.asctime(), len(filenames[1:])) sys.stderr.flush() sys.stdout.flush() if not retry_on_failure: raise QSubFailure() time.sleep(max(int(round(sleep_seconds/2)), 1)) # Maybe we saturated the queue. else: if not quiet: print 'ERROR: Cannot submit %s because it does not exist.' % filenames[0] sys.stderr.flush() sys.stdout.flush() filenames = filenames[1:] else: if fail_when_max: raise ReachedMax() sys.stdout.write('Queue is currently full.') sys.stdout.flush() time.sleep(sleep_seconds) if wait_for_all: for job_id in submitted_ids: pbs.qwait(job_id) return submitted_ids
def __clients(self): """Return a list of the living pbs clients.""" map_name = self.map_name client_jobs = [] got_clients = False while not got_clients: try: for job in pbs.qstat(user=os.environ['USER']): if job.name == map_name: client_jobs.append(job) got_clients=True except pbs.PBSUtilQStatError, e: logging.warning('ERROR: Bad qstat output. %s' % e) time.sleep(5) got_clients=False
def kill_all_jobs_named(username, name): for job in pbs.qstat(user=username): if job.name.find(name) >= 0: print 'Killing ', job pbs.qdel(job)
def qdel_range(min_id, max_id): for job_stat in pbs.qstat(): if int(job_stat.id) >= min_id and int(job_stat.id) <= max_id: print 'Deleting %s ' % job_stat pbs.qdel(job_stat)
def jobs_running(): return 0 < len(pbs.qstat(user=os.environ['USER']))
def test_qsub_submits(self): """check that qsub successfully submits a script.""" pbs_id = pbs.qsub(self.pbs_script_filename) assert pbs.qstat(job_id=pbs_id), "failed to find stats for %s which was just submitted." % pbs_id
def test_qstat_real(self): """pbs.qstat should return a non false result when given something actually submitted.""" qsub_process = subprocess.Popen(["qsub", self.pbs_script_filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE) qsub_output = qsub_process.communicate()[0] assert pbs.qstat(job_id=qsub_output.splitlines()[0].split('.')[0])
def kill_all_user_jobs(username): for job in pbs.qstat(user=username): print 'Killing ', job pbs.qdel(job)