コード例 #1
0
ファイル: create.py プロジェクト: mutual-ai/bcbio-nextgen
def prep_cwl(samples, workflow_fn, out_dir, out_file, integrations=None):
    """Output a CWL description with sub-workflows and steps.
    """
    step_dir = utils.safe_makedir(os.path.join(out_dir, "steps"))
    sample_json, variables, keyvals = _flatten_samples(samples, out_file)
    file_estimates = _calc_input_estimates(keyvals, integrations)
    out = _cwl_workflow_template(variables)
    parent_wfs = []
    steps, wfoutputs = workflow_fn()
    for cur in workflow.generate(variables, steps, wfoutputs):
        if cur[0] == "step":
            _, name, parallel, inputs, outputs, programs, disk = cur
            step_file = _write_tool(step_dir, name, inputs, outputs, parallel, programs,
                                    file_estimates, disk, samples)
            out["steps"].append(_step_template(name, step_file, inputs, outputs, parallel))
        elif cur[0] == "upload":
            out["outputs"] = cur[1]
        elif cur[0] == "wf_start":
            parent_wfs.append(out)
            out = _cwl_workflow_template(cur[1])
        elif cur[0] == "wf_finish":
            _, name, parallel, inputs, outputs = cur
            wf_out_file = "wf-%s.cwl" % name
            with open(os.path.join(out_dir, wf_out_file), "w") as out_handle:
                yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False)
            out = parent_wfs.pop(-1)
            out["steps"].append(_step_template(name, wf_out_file, inputs, outputs, parallel))
        else:
            raise ValueError("Unexpected workflow value %s" % str(cur))

    with open(out_file, "w") as out_handle:
        yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False)
    return out_file, sample_json
コード例 #2
0
def prep_cwl(samples, workflow_fn, out_dir, out_file, integrations=None):
    """Output a CWL description with sub-workflows and steps.
    """
    step_dir = utils.safe_makedir(os.path.join(out_dir, "steps"))
    get_retriever = GetRetriever(integrations, samples)
    variables, keyvals = _flatten_samples(samples, out_file, get_retriever)
    cur_remotes = _get_cur_remotes(keyvals)
    file_estimates = _calc_input_estimates(keyvals, get_retriever)
    out = _cwl_workflow_template(variables)
    parent_wfs = []
    step_parallelism = {}
    steps, wfoutputs = workflow_fn(samples)
    used_inputs = set([])
    for cur in workflow.generate(variables, steps, wfoutputs):
        if cur[0] == "step":
            _, name, parallel, inputs, outputs, image, programs, disk, cores = cur
            step_file = _write_tool(step_dir, name, inputs, outputs, parallel, image, programs,
                                    file_estimates, disk, cores, samples, cur_remotes)
            out["steps"].append(_step_template(name, step_file, inputs, outputs, parallel, step_parallelism))
            used_inputs |= set(x["id"] for x in inputs)
        elif cur[0] == "expressiontool":
            _, name, inputs, outputs, expression, parallel = cur
            step_file = _write_expressiontool(step_dir, name, inputs, outputs, expression, parallel)
            out["steps"].append(_step_template(name, step_file, inputs, outputs, parallel, step_parallelism))
            used_inputs |= set(x["id"] for x in inputs)
        elif cur[0] == "upload":
            for output in cur[1]:
                wf_output = copy.deepcopy(output)
                if "outputSource" not in wf_output:
                    wf_output["outputSource"] = wf_output.pop("source")
                wf_output = _clean_record(wf_output)
                out["outputs"].append(wf_output)
        elif cur[0] == "wf_start":
            parent_wfs.append(out)
            out = _cwl_workflow_template(cur[1])
        elif cur[0] == "wf_finish":
            _, name, parallel, inputs, outputs, scatter = cur
            wf_out_file = "wf-%s.cwl" % name
            with open(os.path.join(out_dir, wf_out_file), "w") as out_handle:
                yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False)
            out = parent_wfs.pop(-1)
            out["steps"].append(_step_template(name, wf_out_file, inputs, outputs, parallel,
                                               step_parallelism, scatter))
            used_inputs |= set(x["id"] for x in inputs)
        else:
            raise ValueError("Unexpected workflow value %s" % str(cur))
        step_parallelism[name] = parallel

    with open(out_file, "w") as out_handle:
        out["inputs"] = [x for x in out["inputs"] if x["id"] in used_inputs]
        yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False)
    sample_json = "%s-samples.json" % utils.splitext_plus(out_file)[0]
    out_clean = _clean_final_outputs(copy.deepcopy({k: v for k, v in keyvals.items() if k in used_inputs}),
                                     get_retriever)
    with open(sample_json, "w") as out_handle:
        json.dump(out_clean, out_handle, sort_keys=True, indent=4, separators=(',', ': '))
    return out_file, sample_json
コード例 #3
0
ファイル: create.py プロジェクト: ohofmann/bcbio-nextgen
def prep_cwl(samples, workflow_fn, out_dir, out_file, integrations=None):
    """Output a CWL description with sub-workflows and steps.
    """
    step_dir = utils.safe_makedir(os.path.join(out_dir, "steps"))
    sample_json, variables, keyvals = _flatten_samples(samples, out_file,
                                                       integrations)
    file_estimates = _calc_input_estimates(keyvals, integrations)
    out = _cwl_workflow_template(variables)
    parent_wfs = []
    steps, wfoutputs = workflow_fn(samples)
    for cur in workflow.generate(variables, steps, wfoutputs):
        if cur[0] == "step":
            _, name, parallel, inputs, outputs, image, programs, disk, cores = cur
            step_file = _write_tool(step_dir, name, inputs, outputs, parallel,
                                    image, programs, file_estimates, disk,
                                    cores, samples)
            out["steps"].append(
                _step_template(name, step_file, inputs, outputs, parallel))
        elif cur[0] == "upload":
            for output in cur[1]:
                wf_output = copy.deepcopy(output)
                if "outputSource" not in wf_output:
                    wf_output["outputSource"] = wf_output.pop("source")
                wf_output = _clean_record(wf_output)
                out["outputs"].append(wf_output)
        elif cur[0] == "wf_start":
            parent_wfs.append(out)
            out = _cwl_workflow_template(cur[1])
        elif cur[0] == "wf_finish":
            _, name, parallel, inputs, outputs, scatter = cur
            wf_out_file = "wf-%s.cwl" % name
            with open(os.path.join(out_dir, wf_out_file), "w") as out_handle:
                yaml.safe_dump(out,
                               out_handle,
                               default_flow_style=False,
                               allow_unicode=False)
            out = parent_wfs.pop(-1)
            out["steps"].append(
                _step_template(name, wf_out_file, inputs, outputs, parallel,
                               scatter))
        else:
            raise ValueError("Unexpected workflow value %s" % str(cur))

    with open(out_file, "w") as out_handle:
        yaml.safe_dump(out,
                       out_handle,
                       default_flow_style=False,
                       allow_unicode=False)
    return out_file, sample_json
コード例 #4
0
ファイル: create.py プロジェクト: biocyberman/bcbio-nextgen
def prep_cwl(samples, workflow_fn, out_dir, out_file, integrations=None):
    """Output a CWL description with sub-workflows and steps.
    """
    step_dir = utils.safe_makedir(os.path.join(out_dir, "steps"))
    variables, keyvals = _flatten_samples(samples, out_file, integrations)
    file_estimates = _calc_input_estimates(keyvals, integrations)
    out = _cwl_workflow_template(variables)
    parent_wfs = []
    steps, wfoutputs = workflow_fn(samples)
    used_inputs = set([])
    for cur in workflow.generate(variables, steps, wfoutputs):
        if cur[0] == "step":
            _, name, parallel, inputs, outputs, image, programs, disk, cores = cur
            step_file = _write_tool(step_dir, name, inputs, outputs, parallel, image, programs,
                                    file_estimates, disk, cores, samples)
            out["steps"].append(_step_template(name, step_file, inputs, outputs, parallel))
            used_inputs |= set(x["id"] for x in inputs)
        elif cur[0] == "upload":
            for output in cur[1]:
                wf_output = copy.deepcopy(output)
                if "outputSource" not in wf_output:
                    wf_output["outputSource"] = wf_output.pop("source")
                wf_output = _clean_record(wf_output)
                out["outputs"].append(wf_output)
        elif cur[0] == "wf_start":
            parent_wfs.append(out)
            out = _cwl_workflow_template(cur[1])
        elif cur[0] == "wf_finish":
            _, name, parallel, inputs, outputs, scatter = cur
            wf_out_file = "wf-%s.cwl" % name
            with open(os.path.join(out_dir, wf_out_file), "w") as out_handle:
                yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False)
            out = parent_wfs.pop(-1)
            out["steps"].append(_step_template(name, wf_out_file, inputs, outputs, parallel, scatter))
            used_inputs |= set(x["id"] for x in inputs)
        else:
            raise ValueError("Unexpected workflow value %s" % str(cur))

    with open(out_file, "w") as out_handle:
        out["inputs"] = [x for x in out["inputs"] if x["id"] in used_inputs]
        yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False)
    sample_json = "%s-samples.json" % utils.splitext_plus(out_file)[0]
    out_clean = _clean_final_outputs(copy.deepcopy({k: v for k, v in keyvals.items() if k in used_inputs}),
                                     integrations)
    with open(sample_json, "w") as out_handle:
        json.dump(out_clean, out_handle, sort_keys=True, indent=4, separators=(',', ': '))
    return out_file, sample_json
コード例 #5
0
ファイル: create.py プロジェクト: chapmanb/bcbio-nextgen
def prep_cwl(samples, workflow_fn, out_dir, out_file, integrations=None,
             add_container_tag=None):
    """Output a CWL description with sub-workflows and steps.
    """
    if add_container_tag is None:
        container_tags = None
    elif add_container_tag.lower() == "quay_lookup":
        container_tags = {}
    else:
        container_tags = collections.defaultdict(lambda: add_container_tag)
    step_dir = utils.safe_makedir(os.path.join(out_dir, "steps"))
    get_retriever = GetRetriever(integrations, samples)
    variables, keyvals = _flatten_samples(samples, out_file, get_retriever)
    cur_remotes = _get_cur_remotes(keyvals)
    file_estimates = _calc_input_estimates(keyvals, get_retriever)
    out = _cwl_workflow_template(variables)
    parent_wfs = []
    step_parallelism = {}
    steps, wfoutputs = workflow_fn(samples)
    used_inputs = set([])
    for cur in workflow.generate(variables, steps, wfoutputs):
        if cur[0] == "step":
            _, name, parallel, inputs, outputs, image, programs, disk, cores, no_files = cur
            step_file = _write_tool(step_dir, name, inputs, outputs, parallel, image, programs,
                                    file_estimates, disk, cores, samples, cur_remotes, no_files, container_tags)
            out["steps"].append(_step_template(name, step_file, inputs, outputs, parallel, step_parallelism))
            used_inputs |= set(x["id"] for x in inputs)
        elif cur[0] == "expressiontool":
            _, name, inputs, outputs, expression, parallel = cur
            step_file = _write_expressiontool(step_dir, name, inputs, outputs, expression, parallel)
            out["steps"].append(_step_template(name, step_file, inputs, outputs, parallel, step_parallelism))
            used_inputs |= set(x["id"] for x in inputs)
        elif cur[0] == "upload":
            for output in cur[1]:
                wf_output = copy.deepcopy(output)
                if "outputSource" not in wf_output:
                    wf_output["outputSource"] = wf_output.pop("source")
                wf_output = _clean_record(wf_output)
                # Avoid input/output naming clashes
                if wf_output["id"] in used_inputs:
                    wf_output["id"] = "%s_out" % wf_output["id"]
                out["outputs"].append(wf_output)
        elif cur[0] == "wf_start":
            parent_wfs.append(out)
            out = _cwl_workflow_template(cur[1])
        elif cur[0] == "wf_finish":
            _, name, parallel, inputs, outputs, scatter = cur
            wf_out_file = "wf-%s.cwl" % name
            with open(os.path.join(out_dir, wf_out_file), "w") as out_handle:
                yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False)
            out = parent_wfs.pop(-1)
            out["steps"].append(_step_template(name, wf_out_file, inputs, outputs, parallel,
                                               step_parallelism, scatter))
            used_inputs |= set(x["id"] for x in inputs)
        else:
            raise ValueError("Unexpected workflow value %s" % str(cur))
        step_parallelism[name] = parallel

    with open(out_file, "w") as out_handle:
        out["inputs"] = [x for x in out["inputs"] if x["id"] in used_inputs]
        yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False)
    sample_json = "%s-samples.json" % utils.splitext_plus(out_file)[0]
    out_clean = _clean_final_outputs(copy.deepcopy({k: v for k, v in keyvals.items() if k in used_inputs}),
                                     get_retriever)
    with open(sample_json, "w") as out_handle:
        json.dump(out_clean, out_handle, sort_keys=True, indent=4, separators=(',', ': '))
    return out_file, sample_json