Ejemplo n.º 1
0
def train_and_parse_fold(fold_dir, dev_loc, i, label_set, no_extra_features,
                         allow_reattach, allow_unshift, allow_move_top,
                         allow_invert):
    name = fold_dir.join('name').open().read().strip()
    train_args = [
        'BASE_DIR', 'DEV_LOC', 'LABEL_SET', 'FEAT_STR', 'THRESH', 'REPAIR_STR'
    ]
    if no_extra_features:
        feat_str = '-x'
    else:
        feat_str = ''
    repair_str = []
    if allow_reattach:
        repair_str.append('-r')
    if allow_move_top:
        repair_str.append('-m')
    if allow_unshift:
        repair_str.append('-u')
    if allow_invert:
        repair_str.append('-v')
    repair_str = ' '.join(repair_str)
    thresh = 5 * i if i >= 1 else 5
    arg_vals = [fold_dir, dev_loc, label_set, feat_str, thresh, repair_str]
    env_str = ','.join('%s=%s' % (k, v) for k, v in zip(train_args, arg_vals))
    sh.qsub('pbs/train.sh',
            o=fold_dir.join('out'),
            e=fold_dir.join('err'),
            v=env_str,
            N=name)
def qsub(resource_file, exec_path):
    """

    :param resource_file: path to json resource file
    """
    res = _read_resource_file(resource_file)

    path = os.path.expandvars(os.path.expanduser(exec_path))

    walltime = res["resource"]["walltime"]

    max_cpus = 0
    nodes = len(res["node"])
    for node, systems in res["node"].iteritems():
        cpus = 0
        for system in systems.itervalues():
            cpus += system['cpus']
        max_cpus = max(cpus, max_cpus)

    sh.qsub(os.path.join(path, "submit_new.sh"),
            N="lambdathesis",
            l="nodes=%s:ppn=%s:walltime=%s" % (nodes, max_cpus, walltime),
            j="oe",
            o="lambda-out",
            e="lambda-err",
            m="n",
            V="True")
Ejemplo n.º 3
0
    def submitJob(self, j):
        runHash = j.runHash
        prevJob = self.jobs.find_one(runHash=runHash)

        dbJob = prevJob if prevJob != None else dict(
            runHash=runHash, name=j.name, params=j.params)
        dbJob.update(retVal='',
                     pbsId='',
                     time=time.time(),
                     qsubParams=j.qsubParams,
                     status=JobStatus.NotSubmitted,
                     runFunc=j.runFunc)

        if prevJob == None:
            self.jobs.insert(dbJob)
        else:
            self.jobs.update(dbJob, ['id'])

        jobStr = """#!/bin/bash
#PBS -N {0}
#PBS -o {1}
#PBS -e {2}

python -m pypalmetto run '{3}'
        """.format(j.name, self.getJobOutFile(j), self.getJobErrFile(j),
                   j.runHash)
        #print("About to run qsub with:")
        #print(jobStr)
        qsubParams = j.qsubParamsRaw.copy()
        qsubParams.update({'_in': jobStr})
        pbsId = str(sh.qsub(**qsubParams)).strip()

        dbJob = self.jobs.find_one(runHash=runHash)
        dbJob.update(status=JobStatus.Queued, pbsId=pbsId)
        self.jobs.update(dbJob, ['id'])
Ejemplo n.º 4
0
def run_nas(yagi,
            num_train,
            gpu_num,
            file_to_run,
            id,
            batch_size,
            archs_per_task=5,
            num_min_depth=20,
            num_max_depth=70):
    """Runs the influence function calculation on a specified yagi

    Arguments:
        start: int, per class test sample index at which to start
        per_class: int, how many images to process per class
        gpu_num: str, gpu id to run the influence function on. can be a single
            number or a comma seperated string of multiple ids
        file_to_run: str, filename of the script to run on yagi
        batch_size: int, reduce for small GPU mem machines
        recursion_depth: int, pass
        r_avg: int, pass"""
    print(f'running random NAS: {yagi}, num_train: {num_train}')
    time = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S.%f")
    env_vars = os.environ.copy()
    env_vars["CUDA_VISIBLE_DEVICES"] = gpu_num
    sh.qsub('-q',
            f'main.q@{yagi}.vision.is.tohoku',
            '-v',
            f'time={time}',
            '-v',
            f'num_train={num_train}',
            '-v',
            f'id={id}',
            '-v',
            f'batch_size={batch_size}',
            '-v',
            f'archs_per_task={archs_per_task}',
            '-v',
            f'num_min_depth={num_min_depth}',
            '-v',
            f'num_max_depth={num_max_depth}',
            file_to_run,
            _env=env_vars)
Ejemplo n.º 5
0
def train_and_parse_fold(fold_dir, dev_loc, i, label_set, no_extra_features,
    allow_reattach, allow_unshift, allow_move_top, allow_invert):
    name = fold_dir.join('name').open().read().strip()
    train_args = ['BASE_DIR', 'DEV_LOC', 'LABEL_SET', 'FEAT_STR', 'THRESH',
                   'REPAIR_STR']
    if no_extra_features:
        feat_str = '-x'
    else:
        feat_str = ''
    repair_str = []
    if allow_reattach:
        repair_str.append('-r')
    if allow_move_top:
        repair_str.append('-m')
    if allow_unshift:
        repair_str.append('-u')
    if allow_invert:
        repair_str.append('-v')
    repair_str = ' '.join(repair_str)
    thresh = 5 * i if i >= 1 else 5
    arg_vals = [fold_dir, dev_loc, label_set, feat_str, thresh, repair_str]
    env_str = ','.join('%s=%s' % (k, v) for k, v in zip(train_args, arg_vals))
    sh.qsub('pbs/train.sh', o=fold_dir.join('out'), e=fold_dir.join('err'), v=env_str, N=name)
Ejemplo n.º 6
0
def main():
    args = docopt(__doc__, version='Version 1.0')

    # copy-pasted from pipeline.py :(
    cfg_f = args['--config']
    cfg_y = yaml.load(open(cfg_f))
    cfg = pipeline.Config(cfg_y)
    # it's probably better to have separate log files.
    if args['--log']:
        _log = Path(args['--log'])
        if _log.exists():
            print "Removing old log file %s" % _log
            _log.remove()
        log = open(args['--log'], 'a')
    else:
        log = sys.stdout

    sheet = open(args['<samplesheet>'])
    rows = csv.DictReader(sheet,
                          fieldnames=['read_dirs', 'control_dirs'],
                          delimiter='\t')
    rows = list(rows)
    base_out = args['--outdir'] or "."  # os.getcwd()?
    sampledir = args['--sampledir']

    def p_run(rows):
        weave = partial(weave_files, sampledir)
        fqs_and_controls = list(map(weave, rows))
        run2_func = partial(pipeline.run2, cfg, log, base_out)
        pool = multiprocessing.Pool(len(rows))
        print "Launching %d processes.\n==========================\n\n" % len(
            rows)
        pool.map(run2_func, fqs_and_controls)
        pool.close()
        pool.join()

    if False: p_run(rows)
    if args['--qsub']:
        for i, row in enumerate(rows):
            #outdir = os.path.join(base_out, "sheet-sample-%d" % i)
            fastqs, controls = weave_files(sampledir, row)
            import tempfile
            import sh
            temp = tempfile.NamedTemporaryFile(prefix='pathos_sheet',
                                               suffix='qsub',
                                               delete=False)
            template = "{script} --fastq {fastqs} -c {cfg} -o {odir} --control {controls}"
            cmd = template.format(
                script='python /u/michael.panciera/CURRENT/pathos/pipeline.py',
                fastqs=' '.join(fastqs),
                controls=' '.join(controls),
                cfg=args['--config'],
                odir=base_out)
            temp.write(
                "module load mpi\nmodule load bowtie\nmodule load blast\n")
            temp.write(cmd)
            temp.close()
            script = temp.name
            #print "qsub {script} -q batch -l nodes={node}:ppn={cores}".format(script=temp.name, node=amedpbswrair007.amed.ds.army.mil, cores=4)
            #print " -q batch -l nodes={node}:ppn={cores}".format(script=temp.name, node=amedpbswrair007.amed.ds.army.mil, cores=4)
            sample_num = row['read_dirs'].split(SEP)[0]
            sh.qsub(
                script,
                '-N',
                "sheet-sample-%s" % sample_num,
                # "-M", "EMAIL HERE",
                # '-l', "nodes=1:ppn=8:mem=80514472881")
                '-l',
                "nodes=1:ppn=12",
                '-l',
                "mem=80514472881")
            print "Running %s" % script
    else:
        print "No --qsub flag, didn't run anything."
Ejemplo n.º 7
0
def submit_jobs(jobs, args, log, timeout):
    log_message(1, ["Entering submit_jobs to schedule node tests"])
    main_dir = os.getcwd()

    for job in jobs:
        # Create job directory
        job.set_rootdir(args.path)
        cp(args.case, job.path)
        os.chdir(job.path)

        try:
            if args.batch == "LSF":
                from sh import bsub
                with open(os.path.join(job.path, "run_case.lsf"), 'r') as jf:
                    temp = bsub(_in=jf,
                                m=' '.join(job.nodes),
                                P=args.account,
                                q=args.queue,
                                _timeout=timeout)
                    job.jobid = temp.split('<')[1].split('>')[0]
                    log_message(
                        1, ["Job {} submitted with bsub".format(job.name)])
            elif args.batch == "PBS":
                from sh import qsub
                sel_hosts = "select=" + '+'.join([
                    "ncpus=36:mpiprocs=36:host={}".format(nid)
                    for nid in job.nodes
                ])

                if args.force:
                    temp = qsub("-l",
                                sel_hosts,
                                "-A",
                                args.account,
                                "-q",
                                args.queue,
                                "-h",
                                os.path.join(job.path, "run_case.pbs"),
                                _timeout=timeout)
                else:
                    temp = qsub("-l",
                                sel_hosts,
                                "-A",
                                args.account,
                                "-q",
                                args.queue,
                                os.path.join(job.path, "run_case.pbs"),
                                _timeout=timeout)

                job.jobid = temp.split('.')[0]
                log_message(1, [
                    "Job {} submitted with qsub (hold = {})".format(
                        job.name, args.force)
                ])

            log["num_active"] += 1
        except TimeoutException:
            log_message(1,
                        ["Could not submit job {}, skipping".format(job.name)])
            log["errors"].append("   submit failed   - " + job.name)
            log["num_errors"] += 1

        log["num_jobs"] += 1
        os.chdir(main_dir)

        # If it's been a while, check status
        if int(time.time() - log["last_time"]) >= 10:
            print_status(jobs, log)

        log_message(1, ["Finished submitting {} jobs".format(log["num_jobs"])])