def share(provdir, **kwargs): """share Launches a simple web server which showcases all runs at the clowdrloc. Parameters ---------- provdir : str Path with Clowdr metdata files (returned from "local" and "deploy") **kwargs : dict Arbitrary keyword arguments (i.e. {'verbose': True}) Returns ------- None """ if provdir.startswith("s3://"): # Create temp dir for clowdrloc tmploc = utils.truepath(tempfile.mkdtemp()) utils.get(provdir, tmploc, **kwargs) tmpdir = op.join(tmploc, utils.splitS3Path(provdir)[1]) provdir = tmpdir if kwargs.get("verbose"): print("Local cache of directory: {}".format(provdir)) if op.isfile(provdir): if kwargs.get("verbose"): print("Summary file provided - no need to generate.") summary = provdir with open(summary) as fhandle: experiment_dict = json.load(fhandle) else: summary = op.join(provdir, 'clowdr-summary.json') experiment_dict = consolidate.summary(provdir, summary) customDash = portal.CreatePortal(experiment_dict) app = customDash.launch() host = kwargs["host"] if kwargs.get("host") else "0.0.0.0" app.run_server(host=host, debug=kwargs.get("debug"))
def consolidateTask(tool, invocation, clowdrloc, dataloc, **kwargs): """consolidate Creates Clowdr task JSON files which summarize all associated metadata Parameters ---------- tool : str Path to a boutiques descriptor for the tool to be run invocation : str Path to a boutiques invocation for the tool and parameters to be run clowdrloc : str Path for storing Clowdr intermediate files and outputs dataloc : str Path for accessing input data **kwargs : dict Arbitrary keyword arguments (i.e. {'verbose': True}) Returns ------- tuple: (list, list) The task dictionary JSONs, and associated Boutiques invocation files. """ ts = time.time() dt = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H-%M-%S') randx = utils.randstring(8) modif = "{}-{}".format(dt, randx) # Scrub inputs tool = utils.truepath(tool) invocation = utils.truepath(invocation) clowdrloc = utils.truepath(clowdrloc) dataloc = utils.truepath(dataloc) # Initialize task dictionary taskdict = {} with open(tool) as fhandle: toolname = json.load(fhandle)["name"].replace(' ', '-') taskloc = op.join(clowdrloc, modif, 'clowdr') os.makedirs(taskloc) taskdict["taskloc"] = op.join(clowdrloc, modif, toolname) taskdict["dataloc"] = [dataloc] taskdict["invocation"] = utils.get(invocation, taskloc)[0] taskdict["tool"] = utils.get(tool, taskloc)[0] # Case 1: User supplies directory of invocations if op.isdir(invocation): tempinvocations = os.listdir(invocation) taskdicts = [] invocations = [] for invoc in tempinvocations: tempdict = deepcopy(taskdict) tempinvo = utils.get(op.join(invocation, invoc), taskloc) tempdict["invocation"] = utils.truepath(tempinvo[0]) invocations += tempinvo taskdicts += [tempdict] # Case 2: User supplies a single invocation else: # Case 2a: User is running a BIDS app if kwargs.get("bids"): taskdicts, invocations = bidsTasks(taskloc, taskdict) # Case 2b: User is quite simply just launching a single invocation else: taskdicts = [taskdict] invocations = [taskdict["invocation"]] # Post-case: User is performing a parameter sweep over invocations sweep = kwargs.get("sweep") if sweep: for sweep_param in sweep: taskdicts, invocations = sweepTasks(taskdicts, invocations, sweep_param) # Store task definition files to disk taskdictnames = [] for idx, taskdict in enumerate(taskdicts): taskfname = op.join(taskloc, "task-{}.json".format(idx)) taskdictnames += [taskfname] with open(taskfname, 'w') as fhandle: fhandle.write(json.dumps(taskdict, indent=4, sort_keys=True)) return (taskdictnames, invocations)
def processTask(metadata, clowdrloc=None, verbose=False, **kwargs): # Get metadata if clowdrloc is None: localtaskdir = "/clowtask/" else: localtaskdir = clowdrloc localtaskdir = op.join(localtaskdir, "clowtask_" + utils.randstring(3)) if not op.exists(localtaskdir): os.makedirs(localtaskdir) if (verbose): print("Fetching metadata...") remotetaskdir = op.dirname(metadata) metadata = utils.get(metadata, localtaskdir)[0] task_id = metadata.split('.')[0].split('-')[-1] # The above grabs an ID from the form: fname-#.ext # Parse metadata metadata = json.load(open(metadata)) descriptor = metadata['tool'] invocation = metadata['invocation'] input_data = metadata['dataloc'] output_loc = utils.truepath(metadata['taskloc']) if (verbose): print("Fetching descriptor and invocation...") # Get descriptor and invocation desc_local = utils.get(descriptor, localtaskdir)[0] invo_local = utils.get(invocation, localtaskdir)[0] # Get input data, if running remotely if not kwargs.get("local") and \ any([dl.startswith("s3://") for dl in input_data]): if (verbose): print("Fetching input data...") localdatadir = op.join(localtaskdir, "data") for dataloc in input_data: utils.get(dataloc, localdatadir) # Move to correct location os.chdir(localdatadir) else: if (verbose): print("Skipping data fetch (local execution)...") if kwargs.get("workdir") and op.exists(kwargs.get("workdir")): os.chdir(kwargs["workdir"]) if (verbose): print("Beginning execution...") # Launch task start_time = time.time() if kwargs.get("volumes"): volumes = " ".join(kwargs.get("volumes")) bosh_output = bosh.execute('launch', desc_local, invo_local, '-v', volumes) else: bosh_output = bosh.execute('launch', desc_local, invo_local) if (verbose): print(bosh_output) duration = time.time() - start_time # Get list of bosh exec outputs with open(desc_local) as fhandle: outputs_all = json.load(fhandle)["output-files"] outputs_present = [] outputs_all = bosh.evaluate(desc_local, invo_local, 'output-files/') for outfile in outputs_all.values(): outputs_present += [outfile] if op.exists(outfile) else [] # Write stdout to file stdoutf = "stdout-{}.txt".format(task_id) with open(op.join(localtaskdir, stdoutf), "w") as fhandle: fhandle.write(bosh_output.stdout) utils.post(op.join(localtaskdir, stdoutf), remotetaskdir) # Write sterr to file stderrf = "stderr-{}.txt".format(task_id) with open(op.join(localtaskdir, stderrf), "w") as fhandle: fhandle.write(bosh_output.stderr) utils.post(op.join(localtaskdir, stderrf), remotetaskdir) # Write summary values to file, including: summary = { "duration": duration, "exitcode": bosh_output.exit_code, "outputs": [], "stdout": op.join(remotetaskdir, stdoutf), "stderr": op.join(remotetaskdir, stderrf) } if not kwargs.get("local"): if (verbose): print("Uploading outputs...") # Push outputs for local_output in outputs_present: if (verbose): print("{} --> {}".format(local_output, output_loc)) summary["outputs"] += utils.post(local_output, output_loc) else: if (verbose): print("Skipping uploading outputs (local execution)...") summary["outputs"] = outputs_present summarf = "summary-{}.json".format(task_id) with open(op.join(localtaskdir, summarf), "w") as fhandle: fhandle.write(json.dumps(summary) + "\n") utils.post(op.join(localtaskdir, summarf), remotetaskdir)
def manageTask(self, taskfile, provdir=None, verbose=False, **kwargs): # Get metadata if provdir is None: self.localtaskdir = "/clowtask/" else: self.localtaskdir = provdir # The below grabs an ID from the form: /some/path/to/fname-#.ext self.task_id = taskfile.split('.')[0].split('-')[-1] self.localtaskdir = op.join(self.localtaskdir, "clowtask_" + self.task_id) if not op.exists(self.localtaskdir): os.makedirs(self.localtaskdir) if (verbose): print("Fetching metadata...", flush=True) remotetaskdir = op.dirname(taskfile) taskfile = utils.get(taskfile, self.localtaskdir)[0] # Parse metadata taskinfo = json.load(open(taskfile)) descriptor = taskinfo['tool'] invocation = taskinfo['invocation'] input_data = taskinfo['dataloc'] output_loc = utils.truepath(taskinfo['taskloc']) if (verbose): print("Fetching descriptor and invocation...", flush=True) # Get descriptor and invocation desc_local = utils.get(descriptor, self.localtaskdir)[0] invo_local = utils.get(invocation, self.localtaskdir)[0] # Get input data, if running remotely if not kwargs.get("local") and \ any([dl.startswith("s3://") for dl in input_data]): if (verbose): print("Fetching input data...", flush=True) localdatadir = op.join("/data") local_input_data = [] for dataloc in input_data: local_input_data += utils.get(dataloc, localdatadir) # Move to correct location os.chdir(localdatadir) else: if (verbose): print("Skipping data fetch (local execution)...", flush=True) if kwargs.get("workdir") and op.exists(kwargs.get("workdir")): os.chdir(kwargs["workdir"]) if (verbose): print("Beginning execution...", flush=True) # Launch task copts = ['launch', desc_local, invo_local] if kwargs.get("volumes"): copts += ['-v'] + kwargs.get("volumes") if kwargs.get("user"): copts += ['-u'] start_time = time.time() self.provLaunch(copts, verbose=verbose, **kwargs) if (verbose): print(self.output, flush=True) duration = time.time() - start_time # Get list of bosh exec outputs with open(desc_local) as fhandle: outputs_all = json.load(fhandle)["output-files"] outputs_present = [] outputs_all = bosh.evaluate(desc_local, invo_local, 'output-files/') for outfile in outputs_all.values(): outputs_present += [outfile] if op.exists(outfile) else [] # Write memory/cpu stats to file usagef = "task-{}-usage.csv".format(self.task_id) self.cpu_ram_usage.to_csv(op.join(self.localtaskdir, usagef), sep=',', index=False) utils.post(op.join(self.localtaskdir, usagef), remotetaskdir) # Write stdout to file stdoutf = "task-{}-stdout.txt".format(self.task_id) with open(op.join(self.localtaskdir, stdoutf), "w") as fhandle: fhandle.write(self.output.stdout) utils.post(op.join(self.localtaskdir, stdoutf), remotetaskdir) # Write sterr to file stderrf = "task-{}-stderr.txt".format(self.task_id) with open(op.join(self.localtaskdir, stderrf), "w") as fhandle: fhandle.write(self.output.stderr) utils.post(op.join(self.localtaskdir, stderrf), remotetaskdir) start_time = datetime.fromtimestamp(mktime(localtime(start_time))) summary = { "duration": duration, "launchtime": str(start_time), "exitcode": self.output.exit_code, "outputs": [], "usage": op.join(remotetaskdir, usagef), "stdout": op.join(remotetaskdir, stdoutf), "stderr": op.join(remotetaskdir, stderrf) } if not kwargs.get("local"): if (verbose): print("Uploading outputs...", flush=True) # Push outputs for local_output in outputs_present: if (verbose): print("{} --> {}".format(local_output, output_loc), flush=True) tmpouts = utils.post(local_output, output_loc) print(tmpouts) summary["outputs"] += tmpouts else: if (verbose): print("Skipping uploading outputs (local execution)...", flush=True) summary["outputs"] = outputs_present summarf = "task-{}-summary.json".format(self.task_id) with open(op.join(self.localtaskdir, summarf), "w") as fhandle: fhandle.write(json.dumps(summary, indent=4, sort_keys=True) + "\n") utils.post(op.join(self.localtaskdir, summarf), remotetaskdir) # If not local, delete all: inputs, outputs, and summaries if not kwargs.get("local"): for local_output in outputs_present: utils.remove(local_output) utils.remove(self.localtaskdir) for local_input in local_input_data: utils.remove(local_input)