Esempio n. 1
0
def share(provdir, **kwargs):
    """share
    Launches a simple web server which showcases all runs at the clowdrloc.

    Parameters
    ----------
    provdir : str
        Path with Clowdr metdata files (returned from "local" and "deploy")
    **kwargs : dict
        Arbitrary keyword arguments (i.e. {'verbose': True})

    Returns
    -------
    None
    """
    if provdir.startswith("s3://"):
        # Create temp dir for clowdrloc
        tmploc = utils.truepath(tempfile.mkdtemp())
        utils.get(provdir, tmploc, **kwargs)
        tmpdir = op.join(tmploc, utils.splitS3Path(provdir)[1])
        provdir = tmpdir
        if kwargs.get("verbose"):
            print("Local cache of directory: {}".format(provdir))

    if op.isfile(provdir):
        if kwargs.get("verbose"):
            print("Summary file provided - no need to generate.")
        summary = provdir
        with open(summary) as fhandle:
            experiment_dict = json.load(fhandle)
    else:
        summary = op.join(provdir, 'clowdr-summary.json')
        experiment_dict = consolidate.summary(provdir, summary)

    customDash = portal.CreatePortal(experiment_dict)
    app = customDash.launch()

    host = kwargs["host"] if kwargs.get("host") else "0.0.0.0"
    app.run_server(host=host, debug=kwargs.get("debug"))
Esempio n. 2
0
def consolidateTask(tool, invocation, clowdrloc, dataloc, **kwargs):
    """consolidate
    Creates Clowdr task JSON files which summarize all associated metadata

    Parameters
    ----------
    tool : str
        Path to a boutiques descriptor for the tool to be run
    invocation : str
        Path to a boutiques invocation for the tool and parameters to be run
    clowdrloc : str
        Path for storing Clowdr intermediate files and outputs
    dataloc : str
        Path for accessing input data
    **kwargs : dict
        Arbitrary keyword arguments (i.e. {'verbose': True})

    Returns
    -------
    tuple: (list, list)
        The task dictionary JSONs, and associated Boutiques invocation files.
    """

    ts = time.time()
    dt = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H-%M-%S')
    randx = utils.randstring(8)
    modif = "{}-{}".format(dt, randx)

    # Scrub inputs
    tool = utils.truepath(tool)
    invocation = utils.truepath(invocation)
    clowdrloc = utils.truepath(clowdrloc)
    dataloc = utils.truepath(dataloc)

    # Initialize task dictionary
    taskdict = {}
    with open(tool) as fhandle:
        toolname = json.load(fhandle)["name"].replace(' ', '-')
    taskloc = op.join(clowdrloc, modif, 'clowdr')
    os.makedirs(taskloc)

    taskdict["taskloc"] = op.join(clowdrloc, modif, toolname)
    taskdict["dataloc"] = [dataloc]
    taskdict["invocation"] = utils.get(invocation, taskloc)[0]
    taskdict["tool"] = utils.get(tool, taskloc)[0]

    # Case 1: User supplies directory of invocations
    if op.isdir(invocation):
        tempinvocations = os.listdir(invocation)
        taskdicts = []
        invocations = []
        for invoc in tempinvocations:
            tempdict = deepcopy(taskdict)
            tempinvo = utils.get(op.join(invocation, invoc), taskloc)
            tempdict["invocation"] = utils.truepath(tempinvo[0])
            invocations += tempinvo
            taskdicts += [tempdict]

    # Case 2: User supplies a single invocation
    else:
        # Case 2a: User is running a BIDS app
        if kwargs.get("bids"):
            taskdicts, invocations = bidsTasks(taskloc, taskdict)

        # Case 2b: User is quite simply just launching a single invocation
        else:
            taskdicts = [taskdict]
            invocations = [taskdict["invocation"]]

    # Post-case: User is performing a parameter sweep over invocations
    sweep = kwargs.get("sweep")
    if sweep:
        for sweep_param in sweep:
            taskdicts, invocations = sweepTasks(taskdicts, invocations,
                                                sweep_param)

    # Store task definition files to disk
    taskdictnames = []
    for idx, taskdict in enumerate(taskdicts):
        taskfname = op.join(taskloc, "task-{}.json".format(idx))
        taskdictnames += [taskfname]
        with open(taskfname, 'w') as fhandle:
            fhandle.write(json.dumps(taskdict, indent=4, sort_keys=True))

    return (taskdictnames, invocations)
Esempio n. 3
0
def processTask(metadata, clowdrloc=None, verbose=False, **kwargs):
    # Get metadata
    if clowdrloc is None:
        localtaskdir = "/clowtask/"
    else:
        localtaskdir = clowdrloc

    localtaskdir = op.join(localtaskdir, "clowtask_" + utils.randstring(3))
    if not op.exists(localtaskdir):
        os.makedirs(localtaskdir)

    if (verbose):
        print("Fetching metadata...")
    remotetaskdir = op.dirname(metadata)
    metadata = utils.get(metadata, localtaskdir)[0]
    task_id = metadata.split('.')[0].split('-')[-1]
    # The above grabs an ID from the form: fname-#.ext

    # Parse metadata
    metadata = json.load(open(metadata))
    descriptor = metadata['tool']
    invocation = metadata['invocation']
    input_data = metadata['dataloc']
    output_loc = utils.truepath(metadata['taskloc'])

    if (verbose):
        print("Fetching descriptor and invocation...")
    # Get descriptor and invocation
    desc_local = utils.get(descriptor, localtaskdir)[0]
    invo_local = utils.get(invocation, localtaskdir)[0]

    # Get input data, if running remotely
    if not kwargs.get("local") and \
       any([dl.startswith("s3://") for dl in input_data]):
        if (verbose):
            print("Fetching input data...")
        localdatadir = op.join(localtaskdir, "data")
        for dataloc in input_data:
            utils.get(dataloc, localdatadir)
        # Move to correct location
        os.chdir(localdatadir)
    else:
        if (verbose):
            print("Skipping data fetch (local execution)...")
        if kwargs.get("workdir") and op.exists(kwargs.get("workdir")):
            os.chdir(kwargs["workdir"])

    if (verbose):
        print("Beginning execution...")
    # Launch task
    start_time = time.time()
    if kwargs.get("volumes"):
        volumes = " ".join(kwargs.get("volumes"))
        bosh_output = bosh.execute('launch', desc_local, invo_local, '-v',
                                   volumes)
    else:
        bosh_output = bosh.execute('launch', desc_local, invo_local)
        if (verbose):
            print(bosh_output)
    duration = time.time() - start_time

    # Get list of bosh exec outputs
    with open(desc_local) as fhandle:
        outputs_all = json.load(fhandle)["output-files"]

    outputs_present = []
    outputs_all = bosh.evaluate(desc_local, invo_local, 'output-files/')
    for outfile in outputs_all.values():
        outputs_present += [outfile] if op.exists(outfile) else []

    # Write stdout to file
    stdoutf = "stdout-{}.txt".format(task_id)
    with open(op.join(localtaskdir, stdoutf), "w") as fhandle:
        fhandle.write(bosh_output.stdout)
    utils.post(op.join(localtaskdir, stdoutf), remotetaskdir)

    # Write sterr to file
    stderrf = "stderr-{}.txt".format(task_id)
    with open(op.join(localtaskdir, stderrf), "w") as fhandle:
        fhandle.write(bosh_output.stderr)
    utils.post(op.join(localtaskdir, stderrf), remotetaskdir)

    # Write summary values to file, including:
    summary = {
        "duration": duration,
        "exitcode": bosh_output.exit_code,
        "outputs": [],
        "stdout": op.join(remotetaskdir, stdoutf),
        "stderr": op.join(remotetaskdir, stderrf)
    }

    if not kwargs.get("local"):
        if (verbose):
            print("Uploading outputs...")
        # Push outputs
        for local_output in outputs_present:
            if (verbose):
                print("{} --> {}".format(local_output, output_loc))
            summary["outputs"] += utils.post(local_output, output_loc)
    else:
        if (verbose):
            print("Skipping uploading outputs (local execution)...")
        summary["outputs"] = outputs_present

    summarf = "summary-{}.json".format(task_id)
    with open(op.join(localtaskdir, summarf), "w") as fhandle:
        fhandle.write(json.dumps(summary) + "\n")
    utils.post(op.join(localtaskdir, summarf), remotetaskdir)
Esempio n. 4
0
    def manageTask(self, taskfile, provdir=None, verbose=False, **kwargs):
        # Get metadata
        if provdir is None:
            self.localtaskdir = "/clowtask/"
        else:
            self.localtaskdir = provdir

        # The below grabs an ID from the form: /some/path/to/fname-#.ext
        self.task_id = taskfile.split('.')[0].split('-')[-1]

        self.localtaskdir = op.join(self.localtaskdir,
                                    "clowtask_" + self.task_id)
        if not op.exists(self.localtaskdir):
            os.makedirs(self.localtaskdir)

        if (verbose):
            print("Fetching metadata...", flush=True)
        remotetaskdir = op.dirname(taskfile)
        taskfile = utils.get(taskfile, self.localtaskdir)[0]

        # Parse metadata
        taskinfo = json.load(open(taskfile))
        descriptor = taskinfo['tool']
        invocation = taskinfo['invocation']
        input_data = taskinfo['dataloc']
        output_loc = utils.truepath(taskinfo['taskloc'])

        if (verbose):
            print("Fetching descriptor and invocation...", flush=True)
        # Get descriptor and invocation
        desc_local = utils.get(descriptor, self.localtaskdir)[0]
        invo_local = utils.get(invocation, self.localtaskdir)[0]

        # Get input data, if running remotely
        if not kwargs.get("local") and \
           any([dl.startswith("s3://") for dl in input_data]):
            if (verbose):
                print("Fetching input data...", flush=True)
            localdatadir = op.join("/data")
            local_input_data = []
            for dataloc in input_data:
                local_input_data += utils.get(dataloc, localdatadir)
            # Move to correct location
            os.chdir(localdatadir)
        else:
            if (verbose):
                print("Skipping data fetch (local execution)...", flush=True)
            if kwargs.get("workdir") and op.exists(kwargs.get("workdir")):
                os.chdir(kwargs["workdir"])

        if (verbose):
            print("Beginning execution...", flush=True)
        # Launch task
        copts = ['launch', desc_local, invo_local]
        if kwargs.get("volumes"):
            copts += ['-v'] + kwargs.get("volumes")
        if kwargs.get("user"):
            copts += ['-u']

        start_time = time.time()
        self.provLaunch(copts, verbose=verbose, **kwargs)
        if (verbose):
            print(self.output, flush=True)
        duration = time.time() - start_time

        # Get list of bosh exec outputs
        with open(desc_local) as fhandle:
            outputs_all = json.load(fhandle)["output-files"]

        outputs_present = []
        outputs_all = bosh.evaluate(desc_local, invo_local, 'output-files/')
        for outfile in outputs_all.values():
            outputs_present += [outfile] if op.exists(outfile) else []

        # Write memory/cpu stats to file
        usagef = "task-{}-usage.csv".format(self.task_id)
        self.cpu_ram_usage.to_csv(op.join(self.localtaskdir, usagef),
                                  sep=',',
                                  index=False)
        utils.post(op.join(self.localtaskdir, usagef), remotetaskdir)

        # Write stdout to file
        stdoutf = "task-{}-stdout.txt".format(self.task_id)
        with open(op.join(self.localtaskdir, stdoutf), "w") as fhandle:
            fhandle.write(self.output.stdout)
        utils.post(op.join(self.localtaskdir, stdoutf), remotetaskdir)

        # Write sterr to file
        stderrf = "task-{}-stderr.txt".format(self.task_id)
        with open(op.join(self.localtaskdir, stderrf), "w") as fhandle:
            fhandle.write(self.output.stderr)
        utils.post(op.join(self.localtaskdir, stderrf), remotetaskdir)

        start_time = datetime.fromtimestamp(mktime(localtime(start_time)))
        summary = {
            "duration": duration,
            "launchtime": str(start_time),
            "exitcode": self.output.exit_code,
            "outputs": [],
            "usage": op.join(remotetaskdir, usagef),
            "stdout": op.join(remotetaskdir, stdoutf),
            "stderr": op.join(remotetaskdir, stderrf)
        }

        if not kwargs.get("local"):
            if (verbose):
                print("Uploading outputs...", flush=True)
            # Push outputs
            for local_output in outputs_present:
                if (verbose):
                    print("{} --> {}".format(local_output, output_loc),
                          flush=True)
                tmpouts = utils.post(local_output, output_loc)
                print(tmpouts)
                summary["outputs"] += tmpouts
        else:
            if (verbose):
                print("Skipping uploading outputs (local execution)...",
                      flush=True)
            summary["outputs"] = outputs_present

        summarf = "task-{}-summary.json".format(self.task_id)
        with open(op.join(self.localtaskdir, summarf), "w") as fhandle:
            fhandle.write(json.dumps(summary, indent=4, sort_keys=True) + "\n")
        utils.post(op.join(self.localtaskdir, summarf), remotetaskdir)

        # If not local, delete all: inputs, outputs, and summaries
        if not kwargs.get("local"):
            for local_output in outputs_present:
                utils.remove(local_output)
            utils.remove(self.localtaskdir)
            for local_input in local_input_data:
                utils.remove(local_input)