def check_qsub_job_status(job_id, desired_status="r"):
    '''
    Use 'qstat' to check on the run status of a qsub job
    returns True or False if the job status matches the desired_status
    job running:
    desired_status = "r"
    job waiting:
    desired_status = "qw"
    NOTE: This does not work in Python 3+ because of string decoding requirements on the qstat_stdout object
    '''
    import re
    from sh import qstat
    job_id_pattern = r"^.*{0}.*\s{1}\s.*$".format(job_id, desired_status)
    qstat_stdout = qstat()
    # qstat_stdout = subprocess_cmd('qstat', return_stdout = True)
    job_match = re.findall(str(job_id_pattern), str(qstat_stdout),
                           re.MULTILINE)
    job_status = bool(job_match)
    if job_status == True:
        status = True
        return (job_status)
    elif job_status == False:
        return (job_status)
Exemple #2
0
def check_qsub_job_status(job_id, desired_status = "r"):
    '''
    Use 'qstat' to check on the run status of a qsub job
    returns True or False if the job status matches the desired_status
    job running:
    desired_status = "r"
    job waiting:
    desired_status = "qw"
    '''
    import re
    from sh import qstat
    job_id_pattern = r"^.*{0}.*\s{1}\s.*$".format(job_id, desired_status)
    # using the 'sh' package
    qstat_stdout = qstat()
    # using the standard subprocess package
    # qstat_stdout = subprocess_cmd('qstat', return_stdout = True)
    job_match = re.findall(str(job_id_pattern), str(qstat_stdout), re.MULTILINE)
    job_status = bool(job_match)
    if job_status == True:
        status = True
        return(job_status)
    elif job_status == False:
        return(job_status)
Exemple #3
0
    def run_phil_pipeline(self, base_paramfile, opt_params, run_name):
        tempdir_container = os.path.join(self.wrkdir, 'phil_univax_out', base_paramfile, '%s.%d' % (run_name, randint(0,sys.maxsize)))
        sh.mkdir('-p',tempdir_container)
        tempdir = mkdtemp(prefix='phl-', dir=tempdir_container)
        basename = os.path.basename(tempdir)
        event_report_file = os.path.join(tempdir, 'events.json_lines')
        poe_output_file = os.path.join(tempdir, 'poe_output')
        poe_format = 'csv'
        qsub = sh.qsub.bake('-h','-v','PHIL_HOME=%s,OMP_NUM_THREADS=16' % self.phil_home)

        paramfile = open(os.path.join(tempdir,'params'), 'w')
        params = self.read_phil_base_params_from_file(base_paramfile)
        params.update({
            'outdir': tempdir,
            'event_report_file' : event_report_file,
            'seed': randint(1, 2147483647)
        })
        params.update(opt_params)

        for param, value in params.items():
            paramfile.write('%s = %s\n' % (param, str(value)))
        paramfile.close()

        lockfile = os.path.join(tempdir, 'lockfile')
        statusfile = os.path.join(tempdir, 'statusfile')

        sh.cp(params['primary_cases_file[0]'], tempdir)
        sh.cp(params['vaccination_capacity_file'], tempdir)
        sh.cp('config.yaml', tempdir)

        qsub_template_args = dict(
            stdout = os.path.join(tempdir, 'stdout'),
            stderr = os.path.join(tempdir, 'stderr'),
            lockfile = lockfile, statusfile = statusfile,
            tempdir = tempdir, jobname = basename,
            #reservation = 'philo.0',
            paramfile = paramfile.name,
            synthetic_population = self.synthetic_population,
            event_report_file = event_report_file,
            poe_output_file = poe_output_file, poe_format = poe_format)

        with open(self.qsub_template_file, 'r') as f:
            qsub_template = jinja2.Template(f.read())
      
        qsub_file = os.path.join(tempdir, 'qsub.py')
        with open(qsub_file, 'w') as f:
            f.write(qsub_template.render(qsub_template_args))

        jobid = qsub(qsub_file).strip()
        sh.ln('-s', tempdir, os.path.join(tempdir_container, jobid))
        sh.touch(lockfile)
        sh.qalter('-h','n', jobid)

        while sh.qstat('-x', jobid, _ok_code=[0,153]).exit_code == 0:
            time.sleep(randint(1,4))

        n_check = 3
        for _n in range(n_check+1):
            try:
                if os.path.isfile(lockfile):
                    raise Exception('Lockfile present but %s not in queue!' % jobid)

                with open(statusfile, 'r') as f:
                    stat = f.read()
                    if len(stat) > 0:
                        raise Exception(stat)
                break
            except Exception as e:
                if _n == n_check:
                    raise(e)
                else:
                    time.sleep(randint(10,20))

        return (tempdir, '%s.%s' % (poe_output_file, poe_format))