Example #1
0
    def test_metadata_to_remote(self):
        [tasks, invocs] = metadata.consolidateTask(self.descriptor,
                                                   self.invocation1,
                                                   self.provdir,
                                                   self.dataloc2,
                                                   verbose=True,
                                                   bids=False)

        metadata.prepareForRemote(tasks, self.provdir, self.dataloc2)
Example #2
0
 def test_metadata_directory_invocs(self):
     [tasks, invocs] = metadata.consolidateTask(self.descriptor,
                                                self.invocation4,
                                                self.provdir,
                                                self.dataloc1,
                                                verbose=True,
                                                bids=False)
     self.assertTrue(
         len(tasks) == len(invocs)
         and len(tasks) == len(os.listdir(self.invocation4)))
Example #3
0
    def test_metadata_single_invoc(self):
        [tasks, invocs] = metadata.consolidateTask(self.descriptor,
                                                   self.invocation1,
                                                   self.provdir,
                                                   self.dataloc1,
                                                   verbose=True,
                                                   bids=False)
        self.assertTrue(len(tasks) == len(invocs) == 1)

        [tasks, invocs] = metadata.consolidateTask(self.descriptor,
                                                   self.invocation1,
                                                   self.provdir,
                                                   self.dataloc1,
                                                   verbose=True,
                                                   bids=True)
        with open(self.invocation1) as f:
            participants = len(json.load(f)["participant_label"])
        self.assertTrue(len(tasks) == len(invocs) == participants)

        [tasks, invocs] = metadata.consolidateTask(self.descriptor,
                                                   self.invocation2,
                                                   self.provdir,
                                                   self.dataloc1,
                                                   verbose=True,
                                                   bids=True)
        with open(self.invocation2) as f:
            dat = json.load(f)
            total = len(dat["participant_label"]) * len(dat["session_label"])
        self.assertTrue(len(tasks) == len(invocs) == total)

        [tasks, invocs] = metadata.consolidateTask(self.descriptor,
                                                   self.invocation3,
                                                   self.provdir,
                                                   self.dataloc1,
                                                   verbose=True,
                                                   bids=True)
        with open(self.invocation3) as f:
            dat = json.load(f)
            total = len(dat["session_label"])
        self.assertTrue(len(tasks) == len(invocs) == total)
Example #4
0
def cloud(descriptor, invocation, provdir, s3, cloud, credentials, **kwargs):
    """cloud
    Launches a pipeline locally at scale through Clowdr.

    Parameters
    ----------
    descriptor : str
        Path to a boutiques descriptor for the tool to be run
    invocation : str
        Path to a boutiques invocation for the tool and parameters to be run
    provdir : str
        Path on S3 for storing Clowdr intermediate files and outputs
    s3 : str
        Path on S3 for accessing input data
    cloud : str
        Which endpoint to use for deployment
    credentials : str
        Credentials for Amazon with access to dataloc, clowdrloc, and Batch
    **kwargs : dict
        Arbitrary keyword arguments (i.e. {'verbose': True})

    Returns
    -------
    int
        The exit-code returned by the task being executed
    """
    # TODO: scrub inputs better
    descriptor = descriptor.name
    provdir = provdir.strip('/')

    # Create temp dir for clowdrloc
    tmploc = utils.truepath(tempfile.mkdtemp())

    [tasks, invocs] = metadata.consolidateTask(descriptor, invocation, tmploc,
                                               s3, **kwargs)
    metadata.prepareForRemote(tasks, tmploc, provdir)
    resource = launcher.configureResource(cloud, credentials, **kwargs)

    tasks_remote = [task for task in utils.post(tmploc, provdir)
                    if "task-" in task]

    if kwargs.get("dev"):
        tasks_remote = [tasks_remote[0]]  # Just launch the first in dev mode

    jids = []
    for task in tasks_remote:
        jids += [resource.launchJob(task)]

    taskdir = op.dirname(utils.truepath(tasks_remote[0]))
    print(taskdir)
    return taskdir, jids
Example #5
0
    def test_metadata_sweep(self):
        [tasks, invocs] = metadata.consolidateTask(
            self.descriptor,
            self.invocation5,
            self.provdir,
            self.dataloc1,
            verbose=True,
            sweep=["participant_label", "analysis_level"],
            setup=True)

        with open(self.invocation5) as fhandle:
            dat = json.load(fhandle)
            total = len(dat["participant_label"]) * len(dat["analysis_level"])
        self.assertTrue(len(tasks) == len(invocs) == total)
Example #6
0
def cloud(tool, invocation, clowdrloc, dataloc, endpoint, auth, **kwargs):
    """cloud
    Launches a pipeline locally at scale through Clowdr.

    Parameters
    ----------
    tool : str
        Path to a boutiques descriptor for the tool to be run
    invocation : str
        Path to a boutiques invocation for the tool and parameters to be run
    clowdrloc : str
        Path on S3 for storing Clowdr intermediate files and outputs
    dataloc : str
        Path on S3 for accessing input data
    endpoint : str
        Which endpoint to use for deployment
    auth : str
        Credentials for Amazon with access to dataloc, clowdrloc, and Batch
    **kwargs : dict
        Arbitrary keyword arguments (i.e. {'verbose': True})

    Returns
    -------
    int
        The exit-code returned by the task being executed
    """
    # TODO: scrub inputs better
    clowdrloc = clowdrloc.strip('/')

    # Create temp dir for clowdrloc
    tmploc = utils.truepath(tempfile.mkdtemp())

    [tasks, invocs] = metadata.consolidateTask(tool, invocation, tmploc,
                                               dataloc, **kwargs)
    metadata.prepareForRemote(tasks, tmploc, clowdrloc)
    tasks_remote = [
        task for task in utils.post(tmploc, clowdrloc) if "task-" in task
    ]

    resource = launcher.configureResource(endpoint, auth, **kwargs)
    jids = []
    for task in tasks_remote:
        jids += [resource.launchJob(task)]

    taskdir = op.dirname(utils.truepath(tasks_remote[0]))
    print(taskdir)
    return taskdir, jids
Example #7
0
def local(tool, invocation, clowdrloc, dataloc, **kwargs):
    """local
    Launches a pipeline locally through the Clowdr wrappers.

    Parameters
    ----------
    tool : str
        Path to a boutiques descriptor for the tool to be run
    invocation : str
        Path to a boutiques invocation for the tool and parameters to be run
    clowdrloc : str
        Path for storing Clowdr intermediate files and outputs
    dataloc : str
        Path for accessing input data. If local, provide the hostname and
        optionally a path. If on S3, provide an S3 path.
    **kwargs : dict
        Arbitrary keyword arguments. Currently supported arguments:
        - verbose : bool
            Toggle verbose output printing
        - dev : bool
            Toggle dev mode (only runs first execution in the specified set)

        Additionally, transfers all keyword arguments accepted by "processTask"

    Returns
    -------
    int
        The exit-code returned by the task being executed
    """
    # TODO: scrub inputs
    [tasks, invocs] = metadata.consolidateTask(tool, invocation, clowdrloc,
                                               dataloc, **kwargs)
    if kwargs.get("dev"):
        tasks = [tasks[0]]  # Just launch the first task in dev

    taskdir = op.dirname(utils.truepath(tasks[0]))
    os.chdir(taskdir)
    for task in tasks:
        processTask(task, taskdir, local=True, **kwargs)

    if kwargs.get("verbose"):
        print(taskdir)
    return taskdir
Example #8
0
def cluster(tool, invocation, clowdrloc, dataloc, cluster, **kwargs):
    """cluster
    Launches a pipeline locally through the Clowdr wrappers.

    Parameters
    ----------
    tool : str
        Path to a boutiques descriptor for the tool to be run
    invocation : str
        Path to a boutiques invocation for the tool and parameters to be run
    clowdrloc : str
        Path for storing Clowdr intermediate files and outputs
    dataloc : str
        Path for accessing input data. If local, provide the hostname and
        optionally a path. If on S3, provide an S3 path.
    cluster : str
        Scheduler on the cluster being used. Currently, the only supported mode
        is slurm.
    **kwargs : dict
        Arbitrary keyword arguments. Currently supported arguments:
        - account : str
            Account for the cluster scheduler
        - jobname : str
            Base-name for the jobs as they will appear in the scheduler
        - verbose : bool
            Toggle verbose output printing
        - dev : bool
            Toggle dev mode (only runs first execution in the specified set)

        Additionally, transfers all keyword arguments accepted by both of
        "controller.metadata.consolidateTask" and "task.processTask"

    Returns
    -------
    int
        The exit-code returned by the task being executed
    """
    # TODO: scrub inputs
    tool = utils.truepath(tool)
    if kwargs.get("simg"):
        kwargs["simg"] = utils.truepath(kwargs["simg"])

    from slurmpy import Slurm

    if kwargs.get("verbose"):
        print("Consolidating metadata...")
    [tasks, invocs] = metadata.consolidateTask(tool, invocation, clowdrloc,
                                               dataloc, **kwargs)
    if kwargs.get("dev"):
        tasks = [tasks[0]]  # Just launch the first task in dev

    taskdir = op.dirname(utils.truepath(tasks[0]))
    try:
        os.mkdir(taskdir)
    except FileExistsError:
        pass
    os.chdir(taskdir)

    with open(tool) as fhandle:
        container = json.load(fhandle).get("container-image")
    if container:
        if kwargs.get("verbose"):
            print("Getting container...")
        outp = utils.getContainer(taskdir, container, **kwargs)
        if kwargs.get("verbose"):
            print(outp)

    jobname = kwargs.get("jobname") if kwargs.get("jobname") else "clowdrtask"
    slurm_args = {}
    if kwargs.get("slurm_args"):
        for opt in kwargs.get("slurm_args").split(","):
            k, v = opt.split(":")[0], opt.split(":")[1:]
            v = ":".join(v)
            slurm_args[k] = v
    job = Slurm(jobname, slurm_args)

    script = "clowdr run {} -c {} --local"
    if kwargs.get("workdir"):
        script += " -w {}".format(kwargs["workdir"])
    if kwargs.get("volumes"):
        script += " ".join(
            [" -v {}".format(vol) for vol in kwargs.get("volumes")])

    for task in tasks:
        job.run(script.format(task, taskdir))

    if kwargs.get("verbose"):
        print(taskdir)
    return taskdir
Example #9
0
def local(descriptor, invocation, provdir, backoff_time=36000, sweep=[],
          verbose=False, workdir=None, simg=None, rerun=None, run_id=None,
          volumes=None, s3=None, cluster=None, jobname=None, clusterargs=None,
          dev=False, groupby=None, user=False, setup=False, **kwargs):
    """cluster
    Launches a pipeline locally through the Clowdr wrappers.

    Parameters
    ----------
    tool : str
        Path to a boutiques descriptor for the tool to be run
    invocation : str
        Path to a boutiques invocation for the tool and parameters to be run
    clowdrloc : str
        Path for storing Clowdr intermediate files and outputs
    dataloc : str
        Path for accessing input data. If local, provide the hostname and
        optionally a path. If on S3, provide an S3 path.
    cluster : str
        Scheduler on the cluster being used. Currently, the only supported mode
        is slurm.
    **kwargs : dict
        Arbitrary keyword arguments. Currently supported arguments:
        - account : str
            Account for the cluster scheduler
        - jobname : str
            Base-name for the jobs as they will appear in the scheduler
        - backoff_time: int
            Time limit for wait times when resubmitting jobs to a scheduler
        - verbose : bool
            Toggle verbose output printing
        - dev : bool
            Toggle dev mode (only runs first execution in the specified set)

        Additionally, transfers all keyword arguments accepted by both of
        "controller.metadata.consolidateTask" and "task.TaskHandler"

    Returns
    -------
    int
        The exit-code returned by the task being executed
    """
    # TODO: scrub inputs
    descriptor = descriptor.name
    tool = utils.truepath(descriptor)
    if simg:
        simg = utils.truepath(simg)

    if verbose:
        print("Consolidating metadata...")

    dataloc = s3 if s3 else "localhost"
    if rerun:
        if not run_id:
            raise SystemExit("**Error: Option --rerun requires --run_id")
        # TODO: add option for tasks within the rerun, addition to blanket modes
        tasks = rerunner.getTasks(provdir, run_id, rerun)
        if not len(tasks):
            if verbose:
                print("No tasks to run.")
            return 0

    else:
        [tasks, invocs] = metadata.consolidateTask(descriptor, invocation,
                                                   provdir, dataloc,
                                                   sweep=sweep, **kwargs)

    taskdir = op.dirname(utils.truepath(tasks[0]))
    try:
        os.mkdir(taskdir)
    except FileExistsError:
        pass
    os.chdir(taskdir)

    if setup:
        print(taskdir)
        return taskdir

    with open(tool) as fhandle:
        container = json.load(fhandle).get("container-image")

    if container:
        if verbose:
            print("Getting container...")
        outp = utils.getContainer(taskdir, container, **kwargs)

    if cluster:
        from slurmpy import Slurm
        jobname = jobname if jobname else "clowdr"
        cargs = {}
        if clusterargs:
            for opt in clusterargs.split(","):
                k, v = opt.split(":")[0], opt.split(":")[1:]
                v = ":".join(v)
                cargs[k] = v
        job = Slurm(jobname, cargs)

        script = "clowdr task {} -p {} --local"
        if workdir:
            script += " -w {}".format(workdir)
        if volumes:
            script += " ".join([" -v {}".format(vol)
                                for vol in volumes])
        if verbose:
            script += " -V"

    # Groups tasks into collections to be run together (default size = 1)
    gsize = groupby if groupby else 1
    taskgroups = [tasks[i:i+gsize] for i in range(0, len(tasks), gsize)]

    if dev:
        taskgroups = [taskgroups[0]]  # Just launch the first in dev mode

    if verbose:
        print("Launching tasks...")

    for taskgroup in taskgroups:
        if verbose:
            print("... Processing task(s): {}".format(", ".join(taskgroup)))

        if cluster:
            tmptaskgroup = " ".join(taskgroup)
            func = job.run
            args = [script.format(tmptaskgroup, taskdir)]
            # Submit. If submission fails, retry with fibonnaci back-off
            utils.backoff(func, args, {},
                          backoff_time=backoff_time, **kwargs)
        else:
            runtask(taskgroup, provdir=taskdir, local=True, verbose=verbose,
                    workdir=workdir, volumes=volumes, user=user,  **kwargs)

    if verbose:
        print(taskdir)
    return taskdir