def check_qsub_job_status(job_id, desired_status="r"): ''' Use 'qstat' to check on the run status of a qsub job returns True or False if the job status matches the desired_status job running: desired_status = "r" job waiting: desired_status = "qw" NOTE: This does not work in Python 3+ because of string decoding requirements on the qstat_stdout object ''' import re from sh import qstat job_id_pattern = r"^.*{0}.*\s{1}\s.*$".format(job_id, desired_status) qstat_stdout = qstat() # qstat_stdout = subprocess_cmd('qstat', return_stdout = True) job_match = re.findall(str(job_id_pattern), str(qstat_stdout), re.MULTILINE) job_status = bool(job_match) if job_status == True: status = True return (job_status) elif job_status == False: return (job_status)
def check_qsub_job_status(job_id, desired_status = "r"): ''' Use 'qstat' to check on the run status of a qsub job returns True or False if the job status matches the desired_status job running: desired_status = "r" job waiting: desired_status = "qw" ''' import re from sh import qstat job_id_pattern = r"^.*{0}.*\s{1}\s.*$".format(job_id, desired_status) # using the 'sh' package qstat_stdout = qstat() # using the standard subprocess package # qstat_stdout = subprocess_cmd('qstat', return_stdout = True) job_match = re.findall(str(job_id_pattern), str(qstat_stdout), re.MULTILINE) job_status = bool(job_match) if job_status == True: status = True return(job_status) elif job_status == False: return(job_status)
def run_phil_pipeline(self, base_paramfile, opt_params, run_name): tempdir_container = os.path.join(self.wrkdir, 'phil_univax_out', base_paramfile, '%s.%d' % (run_name, randint(0,sys.maxsize))) sh.mkdir('-p',tempdir_container) tempdir = mkdtemp(prefix='phl-', dir=tempdir_container) basename = os.path.basename(tempdir) event_report_file = os.path.join(tempdir, 'events.json_lines') poe_output_file = os.path.join(tempdir, 'poe_output') poe_format = 'csv' qsub = sh.qsub.bake('-h','-v','PHIL_HOME=%s,OMP_NUM_THREADS=16' % self.phil_home) paramfile = open(os.path.join(tempdir,'params'), 'w') params = self.read_phil_base_params_from_file(base_paramfile) params.update({ 'outdir': tempdir, 'event_report_file' : event_report_file, 'seed': randint(1, 2147483647) }) params.update(opt_params) for param, value in params.items(): paramfile.write('%s = %s\n' % (param, str(value))) paramfile.close() lockfile = os.path.join(tempdir, 'lockfile') statusfile = os.path.join(tempdir, 'statusfile') sh.cp(params['primary_cases_file[0]'], tempdir) sh.cp(params['vaccination_capacity_file'], tempdir) sh.cp('config.yaml', tempdir) qsub_template_args = dict( stdout = os.path.join(tempdir, 'stdout'), stderr = os.path.join(tempdir, 'stderr'), lockfile = lockfile, statusfile = statusfile, tempdir = tempdir, jobname = basename, #reservation = 'philo.0', paramfile = paramfile.name, synthetic_population = self.synthetic_population, event_report_file = event_report_file, poe_output_file = poe_output_file, poe_format = poe_format) with open(self.qsub_template_file, 'r') as f: qsub_template = jinja2.Template(f.read()) qsub_file = os.path.join(tempdir, 'qsub.py') with open(qsub_file, 'w') as f: f.write(qsub_template.render(qsub_template_args)) jobid = qsub(qsub_file).strip() sh.ln('-s', tempdir, os.path.join(tempdir_container, jobid)) sh.touch(lockfile) sh.qalter('-h','n', jobid) while sh.qstat('-x', jobid, _ok_code=[0,153]).exit_code == 0: time.sleep(randint(1,4)) n_check = 3 for _n in range(n_check+1): try: if os.path.isfile(lockfile): raise Exception('Lockfile present but %s not in queue!' % jobid) with open(statusfile, 'r') as f: stat = f.read() if len(stat) > 0: raise Exception(stat) break except Exception as e: if _n == n_check: raise(e) else: time.sleep(randint(10,20)) return (tempdir, '%s.%s' % (poe_output_file, poe_format))