Ejemplo n.º 1
0
def submit_job(thecommand):
    s = Slurm("sync", {
        "nodes": 92,
        "ntasks": 4416,
        "partition": 'skx-normal',
        "time": "06:00:00"
    })
    s.run(thecommand)
Ejemplo n.º 2
0
def submit_job(thecommand):
    s = Slurm(
        "sync", {
            "nodes": 81,
            "ntasks": 3888,
            "partition": 'skx-normal',
            "time": "20:00:00",
            "account": "TG-EAR140030"
        })
    s.run(thecommand)
Ejemplo n.º 3
0
def submit_job(thecommand):
    s = Slurm(
        "sync", {
            "nodes": N_node,
            "ntasks": ntasks,
            "partition": partition,
            "time": time,
            "account": account
        })
    s.run(thecommand)
def submit_job(thecommand):
    s = Slurm(
        "process_sync", {
            "nodes": N_node,
            "ntasks": N_cores,
            "time": "04:00:00",
            "cpus-per-task": 1,
            "mem-per-cpu": "2G"
        })
    s.run(thecommand)
Ejemplo n.º 5
0
def submit_job(thecommand):
    s = Slurm(
        "sync", {
            "nodes": 28,
            "ntasks": 1344,
            "partition": 'skx-normal',
            "time": "06:00:00",
            "account": "TG-EAR130011"
        })
    s.run(thecommand)
Ejemplo n.º 6
0
def run_parallel(ks, param_combinations, partition='low'):    
    '''run each parameter combination in parallel (requires slurmpy package)
    '''
    from slurmpy import Slurm
    s = Slurm("fit_mog", {"partition": partition, "time": "4-0"})
    
    for i in range(len(param_combinations)):
        param_str = f'module load python; python3 {opj(DIR_FILE, "sim_gaussian_mixture.py")} '
        for j, key in enumerate(ks):
            param_str += '--' + key + ' ' + str(param_combinations[i][j]) + ' '
        print(f'scheduled: {param_str}\n\t({i}/{len(param_combinations)})')
        s.run(param_str)
Ejemplo n.º 7
0
def submit_sbatch(template, conf):
    logging.warning(hist_fn)

    logging.info('Starting batch submission')

    if "benchmark" in conf:
        write_bench_start(conf["benchmark"])

    submit_func = "sbatch"
    rand_hash = ""  #gen_hash(template)
    job_id = '${SLURM_JOB_ID}'
    program_start = configure(conf, job_id, rand_hash)
    s = Slurm(conf["name"], conf["SLURM_CONF_GLOBAL"])
    conf["DRIVER"]["mstr_bench"] = conf["COMPUTE"]["mstr_bench"]
    logging.info('Command to be executed: %s', conf["DRIVER"]["program"])
    job_id = s.run(template, cmd_kwargs=conf["DRIVER"], _cmd=submit_func)

    job_id = str(job_id)

    logging.info('Batch job ID: %s', job_id)
    condition = True
    time.sleep(5)

    while condition:
        p = Popen(["squeue", "-j", job_id], stdout=PIPE, stderr=PIPE)
        (out, err) = p.communicate()
        out = str(out, 'utf-8')

        logging.debug("Squeue output: %s", out)

        out = out.split(os.linesep)
        out.pop(0)
        queue = [
            l.strip().split(' ')[0] for l in out if l.strip().split(' ') != ''
        ]

        condition = job_id in queue
        if condition:
            logging.info('Job still running, sleeping for 5 mins')
            time.sleep(5 * 60)

    logging.info('Batch Job terminated')
    result = 'UNKNOWN'
    logfile = [
        op.join(d, f) for d, s, lf in os.walk(op.abspath('logs')) for f in lf
        if '{}.err'.format(job_id) in f
    ]

    if len(logfile) > 0:
        logging.info('Driver logfile: %s', logfile[0])
        result = job_status(logfile[0])
    else:
        logging.warning('No logfile generated.')

    if "benchmark" in conf:
        write_bench_end(conf["benchmark"])
        write_bench_result(conf["benchmark"], result)
Ejemplo n.º 8
0
import itertools
from slurmpy import Slurm

partition = 'gpu_yugroup'

# sweep lambda_reg
params_to_vary = {
    '--reg1': [0, 1e-1, 5e-1, 1e0, 1e1, 1e2, 1e3],
    '--reg2': [0],
}


# run
s = Slurm("decode", {"partition": partition, "time": "3-0", "gres": "gpu:1"})
ks = sorted(params_to_vary.keys())
vals = [params_to_vary[k] for k in ks]
param_combinations = list(itertools.product(*vals)) # list of tuples
print(param_combinations)
# for param_delete in params_to_delete:
#     param_combinations.remove(param_delete)

# iterate
for i in range(len(param_combinations)):
    param_str = 'module load python; module load pytorch; python ../train.py '
    for j, key in enumerate(ks):
        param_str += key + ' ' + str(param_combinations[i][j]) + ' '
    s.run(param_str)
from slurmpy import Slurm

# paths and constant values
nproc_old = 441  # number of processors used in bin files
old_mesh_dir = "/scratch/05880/tg851791/asdf_sync/model_generating/tao_1d_ref_knl/DATABASES_MPI"  # the mesh files
old_model_dir = "/scratch/05880/tg851791/work/generate_hybrid_v703/gll_work/model/s362ani_good_min_tao_smooth"  # the model files
model_tags = "vpv,vph,vsv,vsh,eta,qmu,rho"  # vlues to generate
# output directory
output_file = "/scratch/05880/tg851791/work/generate_hybrid_v703/gll_work/ppm/notopo/3d"
# region as lon1/lat1/lon2/lat2/dep1/dep2 (eg: if lon1=30, lon2=20, get points like 30, 29, ...)
region = "74/0/175/62/0/800"
npts = "249/405/321"  # number of poins, including the edge points.
# use 18*18 cores, can be set anyway you like. (two directions, divide subregions)
nproc = "18/18"

command = "date;"
# check if ../../specfem_gll.jl/src/program/get_ppm_model.jl is actually the path of get_ppm_model.jl
command += f"ibrun julia ../../specfem_gll.jl/src/program/get_ppm_model.jl --nproc_old {nproc_old} --old_mesh_dir {old_mesh_dir} --old_model_dir {old_model_dir} --model_tags {model_tags} --output_file {output_file} --region {region} --npts {npts} --nproc {nproc};"
command += "date;"

# run 2h18min for my region, 60d*60d, 336*336NEX 21*21 proc. It's safe to set a longer time.
s = Slurm("ppm", {"partition": "normal",
                  "nodes": 5, "ntasks": 324, "time": "04:00:00", "account": "TG-EAR130011"})

s.run(command)
Ejemplo n.º 10
0
    def wrapper(*args, **kwargs):
        ctx = click.get_current_context()
        ctx.grand_parent = ctx.parent.parent
        if not ctx.grand_parent.params["sbatch"]:
            return ctx.invoke(func, *args, **kwargs)

        run = ctx.grand_parent.params["run"]
        if run:
            click.secho("SBATCH MODE! Submitting to SLURM queue.", fg="green")

        directory = ctx.parent.params.get("directory")
        if not directory:
            raise EmmetCliError(
                f"{ctx.parent.command_path} needs --directory option!")

        track_dir = os.path.join(directory, ".emmet")
        if run and not os.path.exists(track_dir):
            os.mkdir(track_dir)
            logger.debug(f"{track_dir} created")

        bb = ctx.grand_parent.params["bb"]
        yes = ctx.grand_parent.params["yes"]
        if bb:
            if not yes:
                click.confirm("Did you run `module unload esslurm`?",
                              abort=True)
            subdir = directory.rsplit(os.sep, 1)[1]
            stage_in = f"#DW stage_in source={directory} "
            stage_in += f"destination=$DW_JOB_STRIPED/{subdir} type=directory"
            script = [
                "#DW jobdw capacity=10TB access_mode=striped type=scratch",
                stage_in,
                "srun hostname",
                "",
            ]

            command = "\n".join(script)
            slurm_kwargs = {
                "qos": "premium",
                "nodes": 1,
                "tasks-per-node": 1,
                "constraint": "haswell",
                "time": "48:00:00",
            }
        else:
            if not yes:
                click.confirm("Did you run `module load esslurm`?", abort=True)
            slurm_kwargs = {
                "qos": "xfer",
                "time": "48:00:00",
                "licenses": "SCRATCH",
                "mem": "30GB",
            }
            command = ""

        s = Slurm(
            ctx.command_path.replace(" ", "-"),
            slurm_kwargs=slurm_kwargs,
            date_in_name=False,
            scripts_dir=track_dir,
            log_dir=track_dir,
            bash_strict=False,
        )

        command += reconstruct_command(sbatch=True)
        slurmpy_stderr = io.StringIO()
        with contextlib.redirect_stderr(slurmpy_stderr):
            s.run(command, _cmd="sbatch" if run else "cat", tries=1)  # 6 days
        ret = slurmpy_stderr.getvalue()[2:-1]
        logger.info("\n" + ret.encode("utf-8").decode("unicode_escape"))
        # TODO add jobid to SUBMITTED.value
        return ReturnCodes.SUBMITTED if run else ReturnCodes.SUCCESS
Ejemplo n.º 11
0
models = [
    'random_forest', 'gradient_boosting', 'skope_rules', 'rulefit', 'fplasso',
    'fpskope', 'grl', 'oner', 'brs'
]

extra_args = ''
if args.ignore_cache:
    extra_args += ' --ignore_cache'
if args.low_data:
    extra_args += ' --low_data'

if not args.test_only:

    for model in models:
        s.run(f'python experiments/compare_models.py --model {model} --cv' +
              extra_args)

    brl_job_ids = []
    for i in range(26):
        job_id = s.run(
            f'python experiments/compare_models.py --model brl --parallel_id {i} --cv'
            + extra_args)
        brl_job_ids.append(job_id)
    # s.run(f'python experiments/combine.py --model brl', depends_on=brl_job_ids)

if not args.val_only:

    models += [
        'stbl_l2_mm0', 'stbl_l2_mm1', 'stbl_l2_mm2', 'stbl_l1_mm0',
        'stbl_l1_mm1', 'stbl_l1_mm2'
    ]
Ejemplo n.º 12
0
from slurmpy import Slurm

# * background+tao/crust1.0 -> background+tao/crust1.0 -> background+tao/crust1.0+min/crust2.0
print("start job 4")
nproc_old = 336
old_mesh_dir = "/work/05880/tg851791/stampede2/specfem/20190115/tao_h_files/DATABASES_MPI"
old_model_dir = "/scratch/05880/tg851791/binfile/perturbation/tao_perturbation_smooth"
nproc_new = 441
new_mesh_dir = "/work/05880/tg851791/stampede2/specfem/20190115/simulation_taoreg/DATABASES_MPI"
new_model_dir = "/scratch/05880/tg851791/binfile/interp/s362ani_addmin-pert"
model_tags = ",".join(["vph", "vpv", "vsh", "vsv", "eta", "qmu", "rho"])
output_dir = "/scratch/05880/tg851791/binfile/interp/s362ani_addmin_addtao-pert"

command4 = f"ibrun julia src/program/xsem_interp_mesh2.jl --nproc_old {nproc_old} --old_mesh_dir {old_mesh_dir} --old_model_dir {old_model_dir} --nproc_new {nproc_new} --new_mesh_dir {new_mesh_dir} --new_model_dir {new_model_dir} --model_tags {model_tags} --output_dir {output_dir}"
s4 = Slurm("bg+tao", {
    "partition": "skx-normal",
    "nodes": 10,
    "ntasks": 441,
    "time": "00:60:00"
})
jobid_s4 = s4.run(command4)
Ejemplo n.º 13
0
def schedule(config: dict, name_addition: str = None):
    """
    Schedules a given configuration as a new job

    Args:
        config (dict): job configuration
        name_addition (str, optional): Defaults to None. Addition to the job name

    Raises:
        RuntimeError: When requested scheduler is not available
    """

    executer = config['scheduler']['type'].lower()
    if executer not in ['slurm', 'bash']:
        logger.error("Only SLURM or bash are supported at the moment!")
        raise RuntimeError("Unsupported Job Manager!")

    # If a host entry matches replace the found parameters
    if 'host' in config['scheduler']:
        hostname = socket.getfqdn()
        logger.debug("Hostname: " + hostname)
        if hostname in config['scheduler']['host']:
            logger.debug("Found host entry for this hostname")
            for k, v in config['scheduler']['host'][hostname][
                    'parameters'].items():
                config['scheduler']['parameters'][k] = v

    # Create Slurm job script, allow empty parameters
    try:
        parameters = {
            i: config['scheduler']['parameters'][i]
            for i in config['scheduler']['parameters']
        }
    except KeyError:
        parameters = {}

    # Check if a log directory is set
    log_directory = None
    if 'log-directory' in config['script']:
        log_directory = config['script']['log-directory']

    # Check for job name
    job_name = "ace"
    if 'job-name' in config['scheduler']:
        job_name = config['scheduler']['job-name']

    job = Slurm(job_name, parameters, log_directory=log_directory)

    body = config['script']['body']

    env_vars = []
    auto_args = []

    # Add evn var with job id
    env_vars.append("jobId=" + name_addition)

    for k, v in config['script']['parameters'].items():
        # Check if variable already set
        if k in os.environ:
            logger.warning(k + " environment variable already set!")

        # Set env variable
        if type(v) is dict:
            env_vars.append(k + "=\"" + v['values'] + "\"")
        else:
            env_vars.append(k + "=\"" + v + "\"")
            auto_args.append("--" + k + "=${" + k + "}")

    # Create auto_args
    if 'auto_args' in os.environ:
        logger.warning("auto_args environment variable already set!")
    env_vars.append("")  # Add a new line between args and auto_args
    env_vars.append("auto_args=\"" + " ".join(auto_args) + "\"")

    # Handle times keyword
    prefix = ''
    suffix = ''
    if 'times' in config['script']:
        prefix = "for run in {1.." + config['script']['times'] + "}\ndo\n\n\n"
        suffix = "done"

    # Handle before_script
    before_script = ''
    if 'before_script' in config:
        before_script = config['before_script']

    # Handle after_script
    after_script = ''
    if 'after_script' in config:
        after_script = config['after_script']

    # Join body
    body = before_script + "\n\n" + prefix + \
        "\n".join(env_vars) + "\n\n\n" + body + \
        "\n\n\n" + suffix + "\n\n" + after_script

    # Schedule job script
    if executer == 'bash':
        config['jobid'] = job.run(body,
                                  _cmd='bash',
                                  name_addition=name_addition)
    else:
        config['jobid'] = job.run(body, name_addition=name_addition)
Ejemplo n.º 14
0
                           'work',
                           work_dir + cdn_ident)

        cmd += ["--cli", "--work_dir", work_dir]


    s = Slurm("incrementation", slurm_conf)

    if cdn["framework"] == "spark":
        cmd = " ".join(cmd)
        cmd = "\"{}\"".format(cmd)

        print("Submitting command: ", cmd)

        s.run("bash " + spark_template, cmd_kwargs={"spscript": cmd,
                                                    "parallelism": slurm_conf["cpus-per-task"]},
              _cmd=sys.argv[2])
    else:
        with open(legends[cdn["dataset"]]) as legend:
            images = legend.read().split()
            num_images = len(images)
            pn_images = num_images/num_nodes
            pn_remain = num_images % num_nodes

            count_rem = 0
            idx = 0

            for i in range(0, num_images - pn_remain, pn_images):
                files = None

                if count_rem < pn_remain:
Ejemplo n.º 15
0
from slurmpy import Slurm

# ! change setting first!!!
# per_s362ani_good + per_tao ->  per_s362ani_good_tao
nproc_old = 336
old_mesh_dir = "/scratch/05880/tg851791/work/generate_hybrid_v703/gll_work/control_file/tao"
old_model_dir = "/scratch/05880/tg851791/work/generate_hybrid_v703/gll_work/perturbation/per_tao"
nproc_new = 324
new_mesh_dir = "/scratch/05880/tg851791/work/generate_small_v703/specfem/s362ani_good/DATABASES_MPI"
new_model_dir = "/scratch/05880/tg851791/work/generate_small_v703/perturbation/per_s362ani_good"
model_tags = ",".join(["vph", "vpv", "vsh", "vsv", "eta", "qmu", "rho"])
output_dir = "/scratch/05880/tg851791/work/generate_small_v703/perturbation/per_s362ani_good_tao"

command1 = f"ibrun julia ../specfem_gll.jl/src/program/xsem_interp_mesh2.jl --nproc_old {nproc_old} --old_mesh_dir {old_mesh_dir} --old_model_dir {old_model_dir} --nproc_new {nproc_new} --new_mesh_dir {new_mesh_dir} --new_model_dir {new_model_dir} --model_tags {model_tags} --output_dir {output_dir}"

s = Slurm("interp", {"partition": "skx-normal",
                     "nodes": 10, "ntasks": 324, "time": "00:60:00", "account": "TG-EAR140030"})

s.run(f"date; {command1}; date;")
Ejemplo n.º 16
0
samples = read_samplesheet(args.sample, args.project)

if args.mkref:
    jobids = []
    for sample in samples:
        fastq = samples[sample]
        run_bwa = align_and_convert(config, fastq, args, sample)
        bwa = Slurm("bwaAln-{}".format(sample), {
            "account": config["slurm"]["account"],
            "partition": "node",
            "time": config["slurm"]["time"]
        },
                    log_dir="{}/logs".format(args.out),
                    scripts_dir="{}/scripts".format(args.out))
        jobids.append(bwa.run(run_bwa))

    wcxmkref = Slurm("wcxmkref", {
        "account": config["slurm"]["account"],
        "partition": "node",
        "time": config["slurm"]["time"]
    },
                     log_dir="{}/logs".format(args.out),
                     scripts_dir="{}/scripts".format(args.out))
    wcxmkref.run(mkref(config, args), depends_on=jobids)

elif args.mkmodel:
    f = open("{}.PREFACE.config.tab".format(args.out.rstrip("/")), "w")
    f.write("ID\tfilepath\tgender\tFF\n")
    for sample in samples:
        for line in open("{}/{}/{}.AMYCNE.tab".format(args.out, sample,
Ejemplo n.º 17
0
def main():

    parser = argparse.ArgumentParser(description='Pilot-Agent scheduling for SLURM')
    parser.add_argument('template', type=str, help="SLURM batch script template")
    parser.add_argument('params', type=argparse.FileType('r'), help="SLURM batch script params (JSON)")
    parser.add_argument('-y', '--yarn', action='store_true', help="Yarn scheduler will be used")
    parser.add_argument('-D', '--no_submit', action='store_true', help="Create but do not submit sbatch scripts" )
    args = parser.parse_args()

    conf = None
    with args.params as f:
        conf = json.load(f)

    if args.yarn and 'COMPUTE' in os.environ:
        open(op.join(os.environ['HADOOP_HOME'], 'etc/hadoop/slaves'), 'w').close()
    elif args.yarn:
        open(op.join(conf["COMPUTE"]["HADOOP_HOME"], 'etc/hadoop/slaves'), 'w').close()

    submit_func = "bash" if args.no_submit else "sbatch"

    s = Slurm(conf["name"], conf["SLURM_CONF_GLOBAL"])

    program_start = datetime.now().strftime("%Y-%m-%d")

    rand_hash = '{0}-{1}'.format(hashlib.sha1(args.template.encode("utf-8")).hexdigest(), hashlib.md5(os.urandom(16)).hexdigest())
    job_id = rand_hash if args.no_submit else '${SLURM_JOB_ID}' 

    if not "COMPUTE" in conf:
        conf["COMPUTE"] = {}

    if not "mstr_bench" in conf["COMPUTE"]:
        conf["COMPUTE"]["mstr_bench"] = op.join(conf["logdir"], "master-{0}-benchmarks.{1}.out".format(program_start, job_id))

    if not "mstr_log" in conf["COMPUTE"]:
        conf["COMPUTE"]["mstr_log"] = op.join(conf["logdir"], "master-{0}-{1}.out".format(program_start, rand_hash))

    conf["COMPUTE"]["mstr_lock"] = op.join(conf["logdir"], "master-{0}-{1}.lock".format(program_start, rand_hash))
    
    conf["COMPUTE"]["logdir"] = conf["logdir"]   

    # if you want to run one master and worker locally, might as well submit to local
    rm_nnodes = 1 if args.no_submit else 0

    for i in range(conf["num_nodes"] - rm_nnodes):
            
        # SLURM batch submit workers
        if args.no_submit:
            thread = threading.Thread(target=s.run, kwargs=dict(command=args.template, cmd_kwargs=conf["COMPUTE"], _cmd=submit_func))
            thread.daemon = True
            thread.start()
        else:
            s.run(args.template, name_addition=rand_hash, cmd_kwargs=conf["COMPUTE"], _cmd=submit_func)
        
    while conf["num_nodes"] - rm_nnodes > 0 and not op.isfile(conf["COMPUTE"]["mstr_log"]):
        time.sleep(5)

    if conf["num_nodes"] - rm_nnodes > 0:
        master_url = ""

        with open(conf["COMPUTE"]["mstr_log"], 'r') as f:
            master_url = f.readline().strip('\n')



    program = None
    driver_out = op.join(conf["logdir"], "driver-{0}-{1}.out".format(program_start, rand_hash))

    if not args.no_submit:
        fw = open(driver_out, "wb")
        fr = open(driver_out, "r")
        p = Popen(conf["DRIVER"]["slurm_alloc"], stdin = PIPE, stdout = fw, stderr = fw, bufsize = 1)
        for module in conf["DRIVER"]["modules"]:
            p.stdin.write("module load {}\n".format(module).encode('utf-8'))
        
        p.stdin.write("echo start $(date +%s.%N)\n".encode('utf-8'))
        program = ("spark-submit --master {0} --executor-cores=${{SLURM_CPUS_PER_TASK}} "
                    "--executor-memory=${{SLURM_MEM_PER_NODE}}M  --driver-memory=${{SLURM_MEM_PER_NODE}}M {1}\n") \
                            .format(master_url, conf["DRIVER"]["program"])
        p.stdin.write(program.encode('utf-8'))
        
        out = fr.read()

        p.stdin.write("echo end $(date +%s.%N)\n".encode('utf-8'))
        p.stdin.write("echo 'SUCCEEDED' >> {}".format(conf["COMPUTE"]["mstr_log"]).encode('utf-8'))
        fw.close()
        fr.close()
    elif conf["num_nodes"] == 1:
        program = ("spark-submit --master local[*] {}\n").format(conf["DRIVER"]["program"])
        p = Popen(program.split(), stdout = PIPE, stderr = PIPE)
        stdin, stderr = p.communicate()
        print(stdin, stderr)
    else:
        program = ("spark-submit --master {0} {1}\n").format(master_url, conf["DRIVER"]["program"])
        p = Popen(program.split(), stdout = PIPE, stderr = PIPE)
        stdin, stderr = p.communicate()
        print(stdin, stderr)
Ejemplo n.º 18
0
def cluster(tool, invocation, clowdrloc, dataloc, cluster, **kwargs):
    """cluster
    Launches a pipeline locally through the Clowdr wrappers.

    Parameters
    ----------
    tool : str
        Path to a boutiques descriptor for the tool to be run
    invocation : str
        Path to a boutiques invocation for the tool and parameters to be run
    clowdrloc : str
        Path for storing Clowdr intermediate files and outputs
    dataloc : str
        Path for accessing input data. If local, provide the hostname and
        optionally a path. If on S3, provide an S3 path.
    cluster : str
        Scheduler on the cluster being used. Currently, the only supported mode
        is slurm.
    **kwargs : dict
        Arbitrary keyword arguments. Currently supported arguments:
        - account : str
            Account for the cluster scheduler
        - jobname : str
            Base-name for the jobs as they will appear in the scheduler
        - verbose : bool
            Toggle verbose output printing
        - dev : bool
            Toggle dev mode (only runs first execution in the specified set)

        Additionally, transfers all keyword arguments accepted by both of
        "controller.metadata.consolidateTask" and "task.processTask"

    Returns
    -------
    int
        The exit-code returned by the task being executed
    """
    # TODO: scrub inputs
    tool = utils.truepath(tool)
    if kwargs.get("simg"):
        kwargs["simg"] = utils.truepath(kwargs["simg"])

    from slurmpy import Slurm

    if kwargs.get("verbose"):
        print("Consolidating metadata...")
    [tasks, invocs] = metadata.consolidateTask(tool, invocation, clowdrloc,
                                               dataloc, **kwargs)
    if kwargs.get("dev"):
        tasks = [tasks[0]]  # Just launch the first task in dev

    taskdir = op.dirname(utils.truepath(tasks[0]))
    try:
        os.mkdir(taskdir)
    except FileExistsError:
        pass
    os.chdir(taskdir)

    with open(tool) as fhandle:
        container = json.load(fhandle).get("container-image")
    if container:
        if kwargs.get("verbose"):
            print("Getting container...")
        outp = utils.getContainer(taskdir, container, **kwargs)
        if kwargs.get("verbose"):
            print(outp)

    jobname = kwargs.get("jobname") if kwargs.get("jobname") else "clowdrtask"
    slurm_args = {}
    if kwargs.get("slurm_args"):
        for opt in kwargs.get("slurm_args").split(","):
            k, v = opt.split(":")[0], opt.split(":")[1:]
            v = ":".join(v)
            slurm_args[k] = v
    job = Slurm(jobname, slurm_args)

    script = "clowdr run {} -c {} --local"
    if kwargs.get("workdir"):
        script += " -w {}".format(kwargs["workdir"])
    if kwargs.get("volumes"):
        script += " ".join(
            [" -v {}".format(vol) for vol in kwargs.get("volumes")])

    for task in tasks:
        job.run(script.format(task, taskdir))

    if kwargs.get("verbose"):
        print(taskdir)
    return taskdir
Ejemplo n.º 19
0
    if cdn["filesystem"] != "mem":
        work_dir = op.join(filesystems[cdn["filesystem"]], 'work',
                           work_dir + cdn_ident)

        cmd += ["--cli", "--work_dir", work_dir]

    s = Slurm("incrementation", slurm_conf)

    if cdn["framework"] == "spark":
        cmd = " ".join(cmd)
        cmd = "\"{}\"".format(cmd)

        print("Submitting command: ", cmd)

        s.run("bash " + spark_template,
              cmd_kwargs={"spscript": cmd},
              _cmd=sys.argv[2])
    else:
        with open(legends[cdn["dataset"]]) as legend:
            images = legend.read().split()
            num_images = len(images)
            pn_images = num_images / num_nodes
            pn_remain = num_images % num_nodes

            count_rem = 0
            idx = 0

            for i in range(0, num_images - pn_remain, pn_images):
                files = None

                if count_rem < pn_remain: