Exemple #1
0
def summarize_sv(items):
    """CWL target: summarize structural variants for multiple samples.

    XXX Need to support non-VCF output as tabix indexed output
    """
    items = [
        utils.to_single_data(x)
        for x in vcvalidate.summarize_grading(items, "svvalidate")
    ]
    out = {
        "sv": {
            "calls": [],
            "prioritize": {
                "tsv": [],
                "raw": []
            }
        },
        "svvalidate": vcvalidate.combine_validations(items, "svvalidate")
    }
    added = set([])
    # Standard callers
    for data in items:
        if data.get("sv"):
            names = dd.get_batches(data)
            if not names:
                names = [dd.get_sample_name(data)]
            batch_name = names[0]
            cur_name = "%s-%s" % (batch_name, data["sv"]["variantcaller"])
            if data["sv"].get("vrn_file"):
                ext = utils.splitext_plus(data["sv"]["vrn_file"])[-1]
                if cur_name not in added and ext.startswith(".vcf"):
                    added.add(cur_name)
                    out_file = os.path.join(
                        utils.safe_makedir(
                            os.path.join(dd.get_work_dir(data), "sv",
                                         "calls")), "%s%s" % (cur_name, ext))
                    utils.copy_plus(data["sv"]["vrn_file"], out_file)
                    out_file = vcfutils.bgzip_and_index(
                        out_file, data["config"])
                    out["sv"]["calls"].append(out_file)
    # prioritization
    for pdata in _group_by_sample(items):
        prioritysv = [
            x for x in prioritize.run([utils.deepish_copy(pdata)])[0].get(
                "sv", []) if x["variantcaller"] == "sv-prioritize"
        ]
        if prioritysv:
            out["sv"]["prioritize"]["tsv"].append(prioritysv[0]["vrn_file"])
            out["sv"]["prioritize"]["raw"].extend(
                prioritysv[0]["raw_files"].values())
    return [out]
Exemple #2
0
def summarize_sv(items):
    """CWL target: summarize structural variants for multiple samples.

    XXX Need to support non-VCF output as tabix indexed output
    """
    items = [utils.to_single_data(x) for x in vcvalidate.summarize_grading(items, "svvalidate")]
    out = {"sv": {"calls": [],
                  "supplemental": [],
                  "prioritize": {"tsv": [],
                                 "raw": []}},
           "svvalidate": vcvalidate.combine_validations(items, "svvalidate")}
    added = set([])
    # Standard callers
    for data in items:
        if data.get("sv"):
            if data["sv"].get("vrn_file"):
                ext = utils.splitext_plus(data["sv"]["vrn_file"])[-1]
                cur_name = _useful_basename(data)
                if cur_name not in added and ext.startswith(".vcf"):
                    added.add(cur_name)
                    out_file = os.path.join(utils.safe_makedir(os.path.join(dd.get_work_dir(data),
                                                                            "sv", "calls")),
                                            "%s%s" % (cur_name, ext))
                    utils.copy_plus(data["sv"]["vrn_file"], out_file)
                    out_file = vcfutils.bgzip_and_index(out_file, data["config"])
                    out["sv"]["calls"].append(out_file)
            if data["sv"].get("supplemental"):
                out["sv"]["supplemental"].extend([x for x in data["sv"]["supplemental"] if x])
    # prioritization
    for pdata in _group_by_sample(items):
        prioritysv = [x for x in prioritize.run([utils.deepish_copy(pdata)])[0].get("sv", [])
                      if x["variantcaller"] == "sv-prioritize"]
        if prioritysv:
            out["sv"]["prioritize"]["tsv"].append(prioritysv[0]["vrn_file"])
            out["sv"]["prioritize"]["raw"].extend(prioritysv[0]["raw_files"].values())
    return [out]