Beispiel #1
0
def run(items):
    paired = vcfutils.get_paired(items)
    # paired is PairedInfo of one T/N pair (or just T) - named tuple, paired.tumor_config
    if not paired:
        logger.info("Skipping PureCN; no somatic tumor calls in batch: %s" %
                    " ".join([dd.get_sample_name(d) for d in items]))
        return items
    work_dir = _sv_workdir(paired.tumor_data)
    normaldb = tz.get_in(["algorithm", "background", "cnv_reference", "purecn_normaldb"], paired.tumor_config)
    # the right way of running purecn is with normaldb
    if normaldb:
        purecn_out = _run_purecn_normaldb(paired, work_dir)
        purecn_out = _run_purecn_dx(purecn_out, paired)
    else:
        purecn_out = _run_purecn(paired, work_dir)
    out = []
    if paired.normal_data:
        out.append(paired.normal_data)
    if purecn_out:
        purecn_out["variantcaller"] = "purecn"
        if "loh" in purecn_out:
            from bcbio.structural import titancna
            purecn_out["vrn_file"] = titancna.to_vcf(purecn_out["loh"], "PureCN", _get_header, _loh_to_vcf,
                                                     paired.tumor_data, sep=",")
            purecn_out["lohsummary"] = loh.summary_status(purecn_out, paired.tumor_data)
        if "sv" not in paired.tumor_data:
            paired.tumor_data["sv"] = []
        paired.tumor_data["sv"].append(purecn_out)
    out.append(paired.tumor_data)
    return out
Beispiel #2
0
def run(items):
    paired = vcfutils.get_paired(items)
    if not paired:
        logger.info("Skipping PureCN; no somatic tumor calls in batch: %s" %
                    " ".join([dd.get_sample_name(d) for d in items]))
        return items
    work_dir = _sv_workdir(paired.tumor_data)
    purecn_out = _run_purecn(paired, work_dir)
    # XXX Currently finding edge case failures with Dx calling, needs additional testing
    # purecn_out = _run_purecn_dx(purecn_out, paired)
    out = []
    if paired.normal_data:
        out.append(paired.normal_data)
    if purecn_out:
        purecn_out["variantcaller"] = "purecn"
        if "loh" in purecn_out:
            from bcbio.structural import titancna
            purecn_out["vrn_file"] = titancna.to_vcf(purecn_out["loh"],
                                                     "PureCN",
                                                     _get_header,
                                                     _loh_to_vcf,
                                                     paired.tumor_data,
                                                     sep=",")
            purecn_out["lohsummary"] = loh.summary_status(
                purecn_out, paired.tumor_data)
        if "sv" not in paired.tumor_data:
            paired.tumor_data["sv"] = []
        paired.tumor_data["sv"].append(purecn_out)
    out.append(paired.tumor_data)
    return out
Beispiel #3
0
def run(items):
    paired = vcfutils.get_paired(items)
    if not paired:
        logger.info("Skipping PureCN; no somatic tumor calls in batch: %s" %
                    " ".join([dd.get_sample_name(d) for d in items]))
        return items
    work_dir = _sv_workdir(paired.tumor_data)
    purecn_out = _run_purecn(paired, work_dir)
    # XXX Currently finding edge case failures with Dx calling, needs additional testing
    # purecn_out = _run_purecn_dx(purecn_out, paired)
    out = []
    if paired.normal_data:
        out.append(paired.normal_data)
    if purecn_out:
        purecn_out["variantcaller"] = "purecn"
        if "loh" in purecn_out:
            from bcbio.structural import titancna
            purecn_out["vrn_file"] = titancna.to_vcf(purecn_out["loh"], "PureCN", _get_header, _loh_to_vcf,
                                                     paired.tumor_data, sep=",")
            purecn_out["lohsummary"] = loh.summary_status(purecn_out, paired.tumor_data)
        if "sv" not in paired.tumor_data:
            paired.tumor_data["sv"] = []
        paired.tumor_data["sv"].append(purecn_out)
    out.append(paired.tumor_data)
    return out
Beispiel #4
0
def _finalize_sv(solution_file, data):
    """Add output files from TitanCNA calling optional solution.
    """
    out = {"variantcaller": "titancna"}
    with open(solution_file) as in_handle:
        solution = dict(
            zip(in_handle.readline().strip("\r\n").split("\t"),
                in_handle.readline().strip("\r\n").split("\t")))
    if solution.get("path"):
        out["purity"] = solution["purity"]
        out["ploidy"] = solution["ploidy"]
        out["cellular_prevalence"] = [
            x.strip() for x in solution["cellPrev"].split(",")
        ]
        base = os.path.basename(solution["path"])
        out["plot"] = dict([(n, solution["path"] + ext) for (n, ext) in [(
            "rplots",
            ".Rplots.pdf"), ("cf", "/%s_CF.pdf" %
                             base), ("cna", "/%s_CNA.pdf" %
                                     base), ("loh", "/%s_LOH.pdf" % base)]
                            if os.path.exists(solution["path"] + ext)])
        out["subclones"] = "%s.segs.txt" % solution["path"]
        out["hetsummary"] = solution_file
        out["vrn_file"] = to_vcf(out["subclones"], "TitanCNA", _get_header,
                                 _seg_to_vcf, data)
        out["lohsummary"] = loh.summary_status(out, data)
    return out