def prep_cwl(samples, workflow_fn, out_dir, out_file, integrations=None): """Output a CWL description with sub-workflows and steps. """ step_dir = utils.safe_makedir(os.path.join(out_dir, "steps")) sample_json, variables, keyvals = _flatten_samples(samples, out_file) file_estimates = _calc_input_estimates(keyvals, integrations) out = _cwl_workflow_template(variables) parent_wfs = [] steps, wfoutputs = workflow_fn() for cur in workflow.generate(variables, steps, wfoutputs): if cur[0] == "step": _, name, parallel, inputs, outputs, programs, disk = cur step_file = _write_tool(step_dir, name, inputs, outputs, parallel, programs, file_estimates, disk, samples) out["steps"].append(_step_template(name, step_file, inputs, outputs, parallel)) elif cur[0] == "upload": out["outputs"] = cur[1] elif cur[0] == "wf_start": parent_wfs.append(out) out = _cwl_workflow_template(cur[1]) elif cur[0] == "wf_finish": _, name, parallel, inputs, outputs = cur wf_out_file = "wf-%s.cwl" % name with open(os.path.join(out_dir, wf_out_file), "w") as out_handle: yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False) out = parent_wfs.pop(-1) out["steps"].append(_step_template(name, wf_out_file, inputs, outputs, parallel)) else: raise ValueError("Unexpected workflow value %s" % str(cur)) with open(out_file, "w") as out_handle: yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False) return out_file, sample_json
def prep_cwl(samples, workflow_fn, out_dir, out_file, integrations=None): """Output a CWL description with sub-workflows and steps. """ step_dir = utils.safe_makedir(os.path.join(out_dir, "steps")) get_retriever = GetRetriever(integrations, samples) variables, keyvals = _flatten_samples(samples, out_file, get_retriever) cur_remotes = _get_cur_remotes(keyvals) file_estimates = _calc_input_estimates(keyvals, get_retriever) out = _cwl_workflow_template(variables) parent_wfs = [] step_parallelism = {} steps, wfoutputs = workflow_fn(samples) used_inputs = set([]) for cur in workflow.generate(variables, steps, wfoutputs): if cur[0] == "step": _, name, parallel, inputs, outputs, image, programs, disk, cores = cur step_file = _write_tool(step_dir, name, inputs, outputs, parallel, image, programs, file_estimates, disk, cores, samples, cur_remotes) out["steps"].append(_step_template(name, step_file, inputs, outputs, parallel, step_parallelism)) used_inputs |= set(x["id"] for x in inputs) elif cur[0] == "expressiontool": _, name, inputs, outputs, expression, parallel = cur step_file = _write_expressiontool(step_dir, name, inputs, outputs, expression, parallel) out["steps"].append(_step_template(name, step_file, inputs, outputs, parallel, step_parallelism)) used_inputs |= set(x["id"] for x in inputs) elif cur[0] == "upload": for output in cur[1]: wf_output = copy.deepcopy(output) if "outputSource" not in wf_output: wf_output["outputSource"] = wf_output.pop("source") wf_output = _clean_record(wf_output) out["outputs"].append(wf_output) elif cur[0] == "wf_start": parent_wfs.append(out) out = _cwl_workflow_template(cur[1]) elif cur[0] == "wf_finish": _, name, parallel, inputs, outputs, scatter = cur wf_out_file = "wf-%s.cwl" % name with open(os.path.join(out_dir, wf_out_file), "w") as out_handle: yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False) out = parent_wfs.pop(-1) out["steps"].append(_step_template(name, wf_out_file, inputs, outputs, parallel, step_parallelism, scatter)) used_inputs |= set(x["id"] for x in inputs) else: raise ValueError("Unexpected workflow value %s" % str(cur)) step_parallelism[name] = parallel with open(out_file, "w") as out_handle: out["inputs"] = [x for x in out["inputs"] if x["id"] in used_inputs] yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False) sample_json = "%s-samples.json" % utils.splitext_plus(out_file)[0] out_clean = _clean_final_outputs(copy.deepcopy({k: v for k, v in keyvals.items() if k in used_inputs}), get_retriever) with open(sample_json, "w") as out_handle: json.dump(out_clean, out_handle, sort_keys=True, indent=4, separators=(',', ': ')) return out_file, sample_json
def prep_cwl(samples, workflow_fn, out_dir, out_file, integrations=None): """Output a CWL description with sub-workflows and steps. """ step_dir = utils.safe_makedir(os.path.join(out_dir, "steps")) sample_json, variables, keyvals = _flatten_samples(samples, out_file, integrations) file_estimates = _calc_input_estimates(keyvals, integrations) out = _cwl_workflow_template(variables) parent_wfs = [] steps, wfoutputs = workflow_fn(samples) for cur in workflow.generate(variables, steps, wfoutputs): if cur[0] == "step": _, name, parallel, inputs, outputs, image, programs, disk, cores = cur step_file = _write_tool(step_dir, name, inputs, outputs, parallel, image, programs, file_estimates, disk, cores, samples) out["steps"].append( _step_template(name, step_file, inputs, outputs, parallel)) elif cur[0] == "upload": for output in cur[1]: wf_output = copy.deepcopy(output) if "outputSource" not in wf_output: wf_output["outputSource"] = wf_output.pop("source") wf_output = _clean_record(wf_output) out["outputs"].append(wf_output) elif cur[0] == "wf_start": parent_wfs.append(out) out = _cwl_workflow_template(cur[1]) elif cur[0] == "wf_finish": _, name, parallel, inputs, outputs, scatter = cur wf_out_file = "wf-%s.cwl" % name with open(os.path.join(out_dir, wf_out_file), "w") as out_handle: yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False) out = parent_wfs.pop(-1) out["steps"].append( _step_template(name, wf_out_file, inputs, outputs, parallel, scatter)) else: raise ValueError("Unexpected workflow value %s" % str(cur)) with open(out_file, "w") as out_handle: yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False) return out_file, sample_json
def prep_cwl(samples, workflow_fn, out_dir, out_file, integrations=None): """Output a CWL description with sub-workflows and steps. """ step_dir = utils.safe_makedir(os.path.join(out_dir, "steps")) variables, keyvals = _flatten_samples(samples, out_file, integrations) file_estimates = _calc_input_estimates(keyvals, integrations) out = _cwl_workflow_template(variables) parent_wfs = [] steps, wfoutputs = workflow_fn(samples) used_inputs = set([]) for cur in workflow.generate(variables, steps, wfoutputs): if cur[0] == "step": _, name, parallel, inputs, outputs, image, programs, disk, cores = cur step_file = _write_tool(step_dir, name, inputs, outputs, parallel, image, programs, file_estimates, disk, cores, samples) out["steps"].append(_step_template(name, step_file, inputs, outputs, parallel)) used_inputs |= set(x["id"] for x in inputs) elif cur[0] == "upload": for output in cur[1]: wf_output = copy.deepcopy(output) if "outputSource" not in wf_output: wf_output["outputSource"] = wf_output.pop("source") wf_output = _clean_record(wf_output) out["outputs"].append(wf_output) elif cur[0] == "wf_start": parent_wfs.append(out) out = _cwl_workflow_template(cur[1]) elif cur[0] == "wf_finish": _, name, parallel, inputs, outputs, scatter = cur wf_out_file = "wf-%s.cwl" % name with open(os.path.join(out_dir, wf_out_file), "w") as out_handle: yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False) out = parent_wfs.pop(-1) out["steps"].append(_step_template(name, wf_out_file, inputs, outputs, parallel, scatter)) used_inputs |= set(x["id"] for x in inputs) else: raise ValueError("Unexpected workflow value %s" % str(cur)) with open(out_file, "w") as out_handle: out["inputs"] = [x for x in out["inputs"] if x["id"] in used_inputs] yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False) sample_json = "%s-samples.json" % utils.splitext_plus(out_file)[0] out_clean = _clean_final_outputs(copy.deepcopy({k: v for k, v in keyvals.items() if k in used_inputs}), integrations) with open(sample_json, "w") as out_handle: json.dump(out_clean, out_handle, sort_keys=True, indent=4, separators=(',', ': ')) return out_file, sample_json
def prep_cwl(samples, workflow_fn, out_dir, out_file, integrations=None, add_container_tag=None): """Output a CWL description with sub-workflows and steps. """ if add_container_tag is None: container_tags = None elif add_container_tag.lower() == "quay_lookup": container_tags = {} else: container_tags = collections.defaultdict(lambda: add_container_tag) step_dir = utils.safe_makedir(os.path.join(out_dir, "steps")) get_retriever = GetRetriever(integrations, samples) variables, keyvals = _flatten_samples(samples, out_file, get_retriever) cur_remotes = _get_cur_remotes(keyvals) file_estimates = _calc_input_estimates(keyvals, get_retriever) out = _cwl_workflow_template(variables) parent_wfs = [] step_parallelism = {} steps, wfoutputs = workflow_fn(samples) used_inputs = set([]) for cur in workflow.generate(variables, steps, wfoutputs): if cur[0] == "step": _, name, parallel, inputs, outputs, image, programs, disk, cores, no_files = cur step_file = _write_tool(step_dir, name, inputs, outputs, parallel, image, programs, file_estimates, disk, cores, samples, cur_remotes, no_files, container_tags) out["steps"].append(_step_template(name, step_file, inputs, outputs, parallel, step_parallelism)) used_inputs |= set(x["id"] for x in inputs) elif cur[0] == "expressiontool": _, name, inputs, outputs, expression, parallel = cur step_file = _write_expressiontool(step_dir, name, inputs, outputs, expression, parallel) out["steps"].append(_step_template(name, step_file, inputs, outputs, parallel, step_parallelism)) used_inputs |= set(x["id"] for x in inputs) elif cur[0] == "upload": for output in cur[1]: wf_output = copy.deepcopy(output) if "outputSource" not in wf_output: wf_output["outputSource"] = wf_output.pop("source") wf_output = _clean_record(wf_output) # Avoid input/output naming clashes if wf_output["id"] in used_inputs: wf_output["id"] = "%s_out" % wf_output["id"] out["outputs"].append(wf_output) elif cur[0] == "wf_start": parent_wfs.append(out) out = _cwl_workflow_template(cur[1]) elif cur[0] == "wf_finish": _, name, parallel, inputs, outputs, scatter = cur wf_out_file = "wf-%s.cwl" % name with open(os.path.join(out_dir, wf_out_file), "w") as out_handle: yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False) out = parent_wfs.pop(-1) out["steps"].append(_step_template(name, wf_out_file, inputs, outputs, parallel, step_parallelism, scatter)) used_inputs |= set(x["id"] for x in inputs) else: raise ValueError("Unexpected workflow value %s" % str(cur)) step_parallelism[name] = parallel with open(out_file, "w") as out_handle: out["inputs"] = [x for x in out["inputs"] if x["id"] in used_inputs] yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False) sample_json = "%s-samples.json" % utils.splitext_plus(out_file)[0] out_clean = _clean_final_outputs(copy.deepcopy({k: v for k, v in keyvals.items() if k in used_inputs}), get_retriever) with open(sample_json, "w") as out_handle: json.dump(out_clean, out_handle, sort_keys=True, indent=4, separators=(',', ': ')) return out_file, sample_json