def test_metadata_to_remote(self): [tasks, invocs] = metadata.consolidateTask(self.descriptor, self.invocation1, self.provdir, self.dataloc2, verbose=True, bids=False) metadata.prepareForRemote(tasks, self.provdir, self.dataloc2)
def test_metadata_directory_invocs(self): [tasks, invocs] = metadata.consolidateTask(self.descriptor, self.invocation4, self.provdir, self.dataloc1, verbose=True, bids=False) self.assertTrue( len(tasks) == len(invocs) and len(tasks) == len(os.listdir(self.invocation4)))
def test_metadata_single_invoc(self): [tasks, invocs] = metadata.consolidateTask(self.descriptor, self.invocation1, self.provdir, self.dataloc1, verbose=True, bids=False) self.assertTrue(len(tasks) == len(invocs) == 1) [tasks, invocs] = metadata.consolidateTask(self.descriptor, self.invocation1, self.provdir, self.dataloc1, verbose=True, bids=True) with open(self.invocation1) as f: participants = len(json.load(f)["participant_label"]) self.assertTrue(len(tasks) == len(invocs) == participants) [tasks, invocs] = metadata.consolidateTask(self.descriptor, self.invocation2, self.provdir, self.dataloc1, verbose=True, bids=True) with open(self.invocation2) as f: dat = json.load(f) total = len(dat["participant_label"]) * len(dat["session_label"]) self.assertTrue(len(tasks) == len(invocs) == total) [tasks, invocs] = metadata.consolidateTask(self.descriptor, self.invocation3, self.provdir, self.dataloc1, verbose=True, bids=True) with open(self.invocation3) as f: dat = json.load(f) total = len(dat["session_label"]) self.assertTrue(len(tasks) == len(invocs) == total)
def cloud(descriptor, invocation, provdir, s3, cloud, credentials, **kwargs): """cloud Launches a pipeline locally at scale through Clowdr. Parameters ---------- descriptor : str Path to a boutiques descriptor for the tool to be run invocation : str Path to a boutiques invocation for the tool and parameters to be run provdir : str Path on S3 for storing Clowdr intermediate files and outputs s3 : str Path on S3 for accessing input data cloud : str Which endpoint to use for deployment credentials : str Credentials for Amazon with access to dataloc, clowdrloc, and Batch **kwargs : dict Arbitrary keyword arguments (i.e. {'verbose': True}) Returns ------- int The exit-code returned by the task being executed """ # TODO: scrub inputs better descriptor = descriptor.name provdir = provdir.strip('/') # Create temp dir for clowdrloc tmploc = utils.truepath(tempfile.mkdtemp()) [tasks, invocs] = metadata.consolidateTask(descriptor, invocation, tmploc, s3, **kwargs) metadata.prepareForRemote(tasks, tmploc, provdir) resource = launcher.configureResource(cloud, credentials, **kwargs) tasks_remote = [task for task in utils.post(tmploc, provdir) if "task-" in task] if kwargs.get("dev"): tasks_remote = [tasks_remote[0]] # Just launch the first in dev mode jids = [] for task in tasks_remote: jids += [resource.launchJob(task)] taskdir = op.dirname(utils.truepath(tasks_remote[0])) print(taskdir) return taskdir, jids
def test_metadata_sweep(self): [tasks, invocs] = metadata.consolidateTask( self.descriptor, self.invocation5, self.provdir, self.dataloc1, verbose=True, sweep=["participant_label", "analysis_level"], setup=True) with open(self.invocation5) as fhandle: dat = json.load(fhandle) total = len(dat["participant_label"]) * len(dat["analysis_level"]) self.assertTrue(len(tasks) == len(invocs) == total)
def cloud(tool, invocation, clowdrloc, dataloc, endpoint, auth, **kwargs): """cloud Launches a pipeline locally at scale through Clowdr. Parameters ---------- tool : str Path to a boutiques descriptor for the tool to be run invocation : str Path to a boutiques invocation for the tool and parameters to be run clowdrloc : str Path on S3 for storing Clowdr intermediate files and outputs dataloc : str Path on S3 for accessing input data endpoint : str Which endpoint to use for deployment auth : str Credentials for Amazon with access to dataloc, clowdrloc, and Batch **kwargs : dict Arbitrary keyword arguments (i.e. {'verbose': True}) Returns ------- int The exit-code returned by the task being executed """ # TODO: scrub inputs better clowdrloc = clowdrloc.strip('/') # Create temp dir for clowdrloc tmploc = utils.truepath(tempfile.mkdtemp()) [tasks, invocs] = metadata.consolidateTask(tool, invocation, tmploc, dataloc, **kwargs) metadata.prepareForRemote(tasks, tmploc, clowdrloc) tasks_remote = [ task for task in utils.post(tmploc, clowdrloc) if "task-" in task ] resource = launcher.configureResource(endpoint, auth, **kwargs) jids = [] for task in tasks_remote: jids += [resource.launchJob(task)] taskdir = op.dirname(utils.truepath(tasks_remote[0])) print(taskdir) return taskdir, jids
def local(tool, invocation, clowdrloc, dataloc, **kwargs): """local Launches a pipeline locally through the Clowdr wrappers. Parameters ---------- tool : str Path to a boutiques descriptor for the tool to be run invocation : str Path to a boutiques invocation for the tool and parameters to be run clowdrloc : str Path for storing Clowdr intermediate files and outputs dataloc : str Path for accessing input data. If local, provide the hostname and optionally a path. If on S3, provide an S3 path. **kwargs : dict Arbitrary keyword arguments. Currently supported arguments: - verbose : bool Toggle verbose output printing - dev : bool Toggle dev mode (only runs first execution in the specified set) Additionally, transfers all keyword arguments accepted by "processTask" Returns ------- int The exit-code returned by the task being executed """ # TODO: scrub inputs [tasks, invocs] = metadata.consolidateTask(tool, invocation, clowdrloc, dataloc, **kwargs) if kwargs.get("dev"): tasks = [tasks[0]] # Just launch the first task in dev taskdir = op.dirname(utils.truepath(tasks[0])) os.chdir(taskdir) for task in tasks: processTask(task, taskdir, local=True, **kwargs) if kwargs.get("verbose"): print(taskdir) return taskdir
def cluster(tool, invocation, clowdrloc, dataloc, cluster, **kwargs): """cluster Launches a pipeline locally through the Clowdr wrappers. Parameters ---------- tool : str Path to a boutiques descriptor for the tool to be run invocation : str Path to a boutiques invocation for the tool and parameters to be run clowdrloc : str Path for storing Clowdr intermediate files and outputs dataloc : str Path for accessing input data. If local, provide the hostname and optionally a path. If on S3, provide an S3 path. cluster : str Scheduler on the cluster being used. Currently, the only supported mode is slurm. **kwargs : dict Arbitrary keyword arguments. Currently supported arguments: - account : str Account for the cluster scheduler - jobname : str Base-name for the jobs as they will appear in the scheduler - verbose : bool Toggle verbose output printing - dev : bool Toggle dev mode (only runs first execution in the specified set) Additionally, transfers all keyword arguments accepted by both of "controller.metadata.consolidateTask" and "task.processTask" Returns ------- int The exit-code returned by the task being executed """ # TODO: scrub inputs tool = utils.truepath(tool) if kwargs.get("simg"): kwargs["simg"] = utils.truepath(kwargs["simg"]) from slurmpy import Slurm if kwargs.get("verbose"): print("Consolidating metadata...") [tasks, invocs] = metadata.consolidateTask(tool, invocation, clowdrloc, dataloc, **kwargs) if kwargs.get("dev"): tasks = [tasks[0]] # Just launch the first task in dev taskdir = op.dirname(utils.truepath(tasks[0])) try: os.mkdir(taskdir) except FileExistsError: pass os.chdir(taskdir) with open(tool) as fhandle: container = json.load(fhandle).get("container-image") if container: if kwargs.get("verbose"): print("Getting container...") outp = utils.getContainer(taskdir, container, **kwargs) if kwargs.get("verbose"): print(outp) jobname = kwargs.get("jobname") if kwargs.get("jobname") else "clowdrtask" slurm_args = {} if kwargs.get("slurm_args"): for opt in kwargs.get("slurm_args").split(","): k, v = opt.split(":")[0], opt.split(":")[1:] v = ":".join(v) slurm_args[k] = v job = Slurm(jobname, slurm_args) script = "clowdr run {} -c {} --local" if kwargs.get("workdir"): script += " -w {}".format(kwargs["workdir"]) if kwargs.get("volumes"): script += " ".join( [" -v {}".format(vol) for vol in kwargs.get("volumes")]) for task in tasks: job.run(script.format(task, taskdir)) if kwargs.get("verbose"): print(taskdir) return taskdir
def local(descriptor, invocation, provdir, backoff_time=36000, sweep=[], verbose=False, workdir=None, simg=None, rerun=None, run_id=None, volumes=None, s3=None, cluster=None, jobname=None, clusterargs=None, dev=False, groupby=None, user=False, setup=False, **kwargs): """cluster Launches a pipeline locally through the Clowdr wrappers. Parameters ---------- tool : str Path to a boutiques descriptor for the tool to be run invocation : str Path to a boutiques invocation for the tool and parameters to be run clowdrloc : str Path for storing Clowdr intermediate files and outputs dataloc : str Path for accessing input data. If local, provide the hostname and optionally a path. If on S3, provide an S3 path. cluster : str Scheduler on the cluster being used. Currently, the only supported mode is slurm. **kwargs : dict Arbitrary keyword arguments. Currently supported arguments: - account : str Account for the cluster scheduler - jobname : str Base-name for the jobs as they will appear in the scheduler - backoff_time: int Time limit for wait times when resubmitting jobs to a scheduler - verbose : bool Toggle verbose output printing - dev : bool Toggle dev mode (only runs first execution in the specified set) Additionally, transfers all keyword arguments accepted by both of "controller.metadata.consolidateTask" and "task.TaskHandler" Returns ------- int The exit-code returned by the task being executed """ # TODO: scrub inputs descriptor = descriptor.name tool = utils.truepath(descriptor) if simg: simg = utils.truepath(simg) if verbose: print("Consolidating metadata...") dataloc = s3 if s3 else "localhost" if rerun: if not run_id: raise SystemExit("**Error: Option --rerun requires --run_id") # TODO: add option for tasks within the rerun, addition to blanket modes tasks = rerunner.getTasks(provdir, run_id, rerun) if not len(tasks): if verbose: print("No tasks to run.") return 0 else: [tasks, invocs] = metadata.consolidateTask(descriptor, invocation, provdir, dataloc, sweep=sweep, **kwargs) taskdir = op.dirname(utils.truepath(tasks[0])) try: os.mkdir(taskdir) except FileExistsError: pass os.chdir(taskdir) if setup: print(taskdir) return taskdir with open(tool) as fhandle: container = json.load(fhandle).get("container-image") if container: if verbose: print("Getting container...") outp = utils.getContainer(taskdir, container, **kwargs) if cluster: from slurmpy import Slurm jobname = jobname if jobname else "clowdr" cargs = {} if clusterargs: for opt in clusterargs.split(","): k, v = opt.split(":")[0], opt.split(":")[1:] v = ":".join(v) cargs[k] = v job = Slurm(jobname, cargs) script = "clowdr task {} -p {} --local" if workdir: script += " -w {}".format(workdir) if volumes: script += " ".join([" -v {}".format(vol) for vol in volumes]) if verbose: script += " -V" # Groups tasks into collections to be run together (default size = 1) gsize = groupby if groupby else 1 taskgroups = [tasks[i:i+gsize] for i in range(0, len(tasks), gsize)] if dev: taskgroups = [taskgroups[0]] # Just launch the first in dev mode if verbose: print("Launching tasks...") for taskgroup in taskgroups: if verbose: print("... Processing task(s): {}".format(", ".join(taskgroup))) if cluster: tmptaskgroup = " ".join(taskgroup) func = job.run args = [script.format(tmptaskgroup, taskdir)] # Submit. If submission fails, retry with fibonnaci back-off utils.backoff(func, args, {}, backoff_time=backoff_time, **kwargs) else: runtask(taskgroup, provdir=taskdir, local=True, verbose=verbose, workdir=workdir, volumes=volumes, user=user, **kwargs) if verbose: print(taskdir) return taskdir