コード例 #1
0
def run(items):
    paired = vcfutils.get_paired(items)
    # paired is PairedInfo of one T/N pair (or just T) - named tuple, paired.tumor_config
    if not paired:
        logger.info("Skipping PureCN; no somatic tumor calls in batch: %s" %
                    " ".join([dd.get_sample_name(d) for d in items]))
        return items
    work_dir = _sv_workdir(paired.tumor_data)
    normaldb = tz.get_in(["algorithm", "background", "cnv_reference", "purecn_normaldb"], paired.tumor_config)
    # the right way of running purecn is with normaldb
    if normaldb:
        purecn_out = _run_purecn_normaldb(paired, work_dir)
        purecn_out = _run_purecn_dx(purecn_out, paired)
    else:
        purecn_out = _run_purecn(paired, work_dir)
    out = []
    if paired.normal_data:
        out.append(paired.normal_data)
    if purecn_out:
        purecn_out["variantcaller"] = "purecn"
        if "loh" in purecn_out:
            from bcbio.structural import titancna
            purecn_out["vrn_file"] = titancna.to_vcf(purecn_out["loh"], "PureCN", _get_header, _loh_to_vcf,
                                                     paired.tumor_data, sep=",")
            purecn_out["lohsummary"] = loh.summary_status(purecn_out, paired.tumor_data)
        if "sv" not in paired.tumor_data:
            paired.tumor_data["sv"] = []
        paired.tumor_data["sv"].append(purecn_out)
    out.append(paired.tumor_data)
    return out
コード例 #2
0
ファイル: gatkcnv.py プロジェクト: dauss75/bcbio-nextgen
def _run_paired(paired):
    """Run somatic variant calling pipeline.
    """
    from bcbio.structural import titancna
    work_dir = _sv_workdir(paired.tumor_data)
    seg_files = model_segments(
        tz.get_in(["depth", "bins", "normalized"], paired.tumor_data),
        work_dir, paired)
    call_file = call_copy_numbers(seg_files["seg"], work_dir,
                                  paired.tumor_data)
    out = []
    if paired.normal_data:
        out.append(paired.normal_data)
    if "sv" not in paired.tumor_data:
        paired.tumor_data["sv"] = []
    paired.tumor_data["sv"].append({
        "variantcaller":
        "gatk-cnv",
        "call_file":
        call_file,
        "vrn_file":
        titancna.to_vcf(call_file, "GATK4-CNV", _get_seg_header, _seg_to_vcf,
                        paired.tumor_data),
        "seg":
        seg_files["seg"],
        "plot":
        plot_model_segments(seg_files, work_dir, paired.tumor_data)
    })
    out.append(paired.tumor_data)
    return out
コード例 #3
0
ファイル: purecn.py プロジェクト: chapmanb/bcbio-nextgen
def run(items):
    paired = vcfutils.get_paired(items)
    if not paired:
        logger.info("Skipping PureCN; no somatic tumor calls in batch: %s" %
                    " ".join([dd.get_sample_name(d) for d in items]))
        return items
    work_dir = _sv_workdir(paired.tumor_data)
    purecn_out = _run_purecn(paired, work_dir)
    # XXX Currently finding edge case failures with Dx calling, needs additional testing
    # purecn_out = _run_purecn_dx(purecn_out, paired)
    out = []
    if paired.normal_data:
        out.append(paired.normal_data)
    if purecn_out:
        purecn_out["variantcaller"] = "purecn"
        if "loh" in purecn_out:
            from bcbio.structural import titancna
            purecn_out["vrn_file"] = titancna.to_vcf(purecn_out["loh"], "PureCN", _get_header, _loh_to_vcf,
                                                     paired.tumor_data, sep=",")
            purecn_out["lohsummary"] = loh.summary_status(purecn_out, paired.tumor_data)
        if "sv" not in paired.tumor_data:
            paired.tumor_data["sv"] = []
        paired.tumor_data["sv"].append(purecn_out)
    out.append(paired.tumor_data)
    return out
コード例 #4
0
def run(items):
    paired = vcfutils.get_paired(items)
    if not paired:
        logger.info("Skipping PureCN; no somatic tumor calls in batch: %s" %
                    " ".join([dd.get_sample_name(d) for d in items]))
        return items
    work_dir = _sv_workdir(paired.tumor_data)
    purecn_out = _run_purecn(paired, work_dir)
    # XXX Currently finding edge case failures with Dx calling, needs additional testing
    # purecn_out = _run_purecn_dx(purecn_out, paired)
    out = []
    if paired.normal_data:
        out.append(paired.normal_data)
    if purecn_out:
        purecn_out["variantcaller"] = "purecn"
        if "loh" in purecn_out:
            from bcbio.structural import titancna
            purecn_out["vrn_file"] = titancna.to_vcf(purecn_out["loh"],
                                                     "PureCN",
                                                     _get_header,
                                                     _loh_to_vcf,
                                                     paired.tumor_data,
                                                     sep=",")
            purecn_out["lohsummary"] = loh.summary_status(
                purecn_out, paired.tumor_data)
        if "sv" not in paired.tumor_data:
            paired.tumor_data["sv"] = []
        paired.tumor_data["sv"].append(purecn_out)
    out.append(paired.tumor_data)
    return out
コード例 #5
0
ファイル: purple.py プロジェクト: chapmanb/bcbio-nextgen
def _run_purple(paired, het_file, depth_file, vrn_files, work_dir):
    """Run PURPLE with pre-calculated AMBER and COBALT compatible inputs.
    """
    purple_dir = utils.safe_makedir(os.path.join(work_dir, "purple"))
    out_file = os.path.join(purple_dir, "%s.purple.cnv" % dd.get_sample_name(paired.tumor_data))
    if not utils.file_exists(out_file):
        with file_transaction(paired.tumor_data, out_file) as tx_out_file:
            cmd = ["PURPLE"] + _get_jvm_opts(tx_out_file, paired.tumor_data) + \
                  ["-amber", os.path.dirname(het_file), "-baf", het_file,
                   "-cobalt", os.path.dirname(depth_file),
                   "-gc_profile", dd.get_variation_resources(paired.tumor_data)["gc_profile"],
                   "-output_dir", os.path.dirname(tx_out_file),
                   "-ref_genome", "hg38" if dd.get_genome_build(paired.tumor_data) == "hg38" else "hg19",
                   "-run_dir", work_dir,
                   "-threads", dd.get_num_cores(paired.tumor_data),
                   "-tumor_sample", dd.get_sample_name(paired.tumor_data),
                   "-ref_sample", dd.get_sample_name(paired.normal_data)]
            if vrn_files:
                cmd += ["-somatic_vcf", vrn_files[0]["vrn_file"]]
            # Avoid X11 display errors when writing plots
            cmd = "unset DISPLAY && %s" % " ".join([str(x) for x in cmd])
            do.run(cmd, "PURPLE: purity and ploidy estimation")
            for f in os.listdir(os.path.dirname(tx_out_file)):
                if f != os.path.basename(tx_out_file):
                    shutil.move(os.path.join(os.path.dirname(tx_out_file), f),
                                os.path.join(purple_dir, f))
    out_file_export = os.path.join(purple_dir, "%s-purple-cnv.tsv" % (dd.get_sample_name(paired.tumor_data)))
    if not utils.file_exists(out_file_export):
        utils.symlink_plus(out_file, out_file_export)
    out = {"variantcaller": "purple", "call_file": out_file_export,
           "vrn_file": titancna.to_vcf(out_file_export, "PURPLE", _get_header, _export_to_vcf,
                                       paired.tumor_data),
           "plot": {}, "metrics": {}}
    for name, ext in [("copy_number", "copyNumber"), ("minor_allele", "minor_allele"), ("variant", "variant")]:
        plot_file = os.path.join(purple_dir, "plot", "%s.%s.png" % (dd.get_sample_name(paired.tumor_data), ext))
        if os.path.exists(plot_file):
            out["plot"][name] = plot_file
    purity_file = os.path.join(purple_dir, "%s.purple.purity" % dd.get_sample_name(paired.tumor_data))
    with open(purity_file) as in_handle:
        header = in_handle.readline().replace("#", "").split("\t")
        vals = in_handle.readline().split("\t")
        for h, v in zip(header, vals):
            try:
                v = float(v)
            except ValueError:
                pass
            out["metrics"][h] = v
    return out
コード例 #6
0
ファイル: gatkcnv.py プロジェクト: chapmanb/bcbio-nextgen
def _run_paired(paired):
    """Run somatic variant calling pipeline.
    """
    from bcbio.structural import titancna
    work_dir = _sv_workdir(paired.tumor_data)
    seg_files = model_segments(tz.get_in(["depth", "bins", "normalized"], paired.tumor_data),
                               work_dir, paired)
    call_file = call_copy_numbers(seg_files["seg"], work_dir, paired.tumor_data)
    out = []
    if paired.normal_data:
        out.append(paired.normal_data)
    if "sv" not in paired.tumor_data:
        paired.tumor_data["sv"] = []
    paired.tumor_data["sv"].append({"variantcaller": "gatk-cnv",
                                    "call_file": call_file,
                                    "vrn_file": titancna.to_vcf(call_file, "GATK4-CNV", _get_seg_header,
                                                                _seg_to_vcf, paired.tumor_data),
                                    "seg": seg_files["seg"],
                                    "plot": plot_model_segments(seg_files, work_dir, paired.tumor_data)})
    out.append(paired.tumor_data)
    return out
コード例 #7
0
def _run_purple(paired, het_file, depth_file, vrn_files, work_dir):
    """Run PURPLE with pre-calculated AMBER and COBALT compatible inputs.
    """
    purple_dir = utils.safe_makedir(os.path.join(work_dir, "purple"))
    out_file = os.path.join(
        purple_dir, "%s.purple.cnv" % dd.get_sample_name(paired.tumor_data))
    if not utils.file_exists(out_file):
        with file_transaction(paired.tumor_data, out_file) as tx_out_file:
            cmd = ["PURPLE"] + _get_jvm_opts(tx_out_file, paired.tumor_data) + \
                  ["-amber", os.path.dirname(het_file), "-baf", het_file,
                   "-cobalt", os.path.dirname(depth_file),
                   "-gc_profile", dd.get_variation_resources(paired.tumor_data)["gc_profile"],
                   "-output_dir", os.path.dirname(tx_out_file),
                   "-ref_genome", "hg38" if dd.get_genome_build(paired.tumor_data) == "hg38" else "hg19",
                   "-run_dir", work_dir,
                   "-threads", dd.get_num_cores(paired.tumor_data),
                   "-tumor_sample", dd.get_sample_name(paired.tumor_data),
                   "-ref_sample", dd.get_sample_name(paired.normal_data)]
            if vrn_files:
                cmd += ["-somatic_vcf", vrn_files[0]["vrn_file"]]
            # Avoid X11 display errors when writing plots
            cmd = "unset DISPLAY && %s" % " ".join([str(x) for x in cmd])
            do.run(cmd, "PURPLE: purity and ploidy estimation")
            for f in os.listdir(os.path.dirname(tx_out_file)):
                if f != os.path.basename(tx_out_file):
                    shutil.move(os.path.join(os.path.dirname(tx_out_file), f),
                                os.path.join(purple_dir, f))
    out_file_export = os.path.join(
        purple_dir,
        "%s-purple-cnv.tsv" % (dd.get_sample_name(paired.tumor_data)))
    if not utils.file_exists(out_file_export):
        utils.symlink_plus(out_file, out_file_export)
    out = {
        "variantcaller":
        "purple",
        "call_file":
        out_file_export,
        "vrn_file":
        titancna.to_vcf(out_file_export, "PURPLE", _get_header, _export_to_vcf,
                        paired.tumor_data),
        "plot": {},
        "metrics": {}
    }
    for name, ext in [("copy_number", "copyNumber"),
                      ("minor_allele", "minor_allele"),
                      ("variant", "variant")]:
        plot_file = os.path.join(
            purple_dir, "plot",
            "%s.%s.png" % (dd.get_sample_name(paired.tumor_data), ext))
        if os.path.exists(plot_file):
            out["plot"][name] = plot_file
    purity_file = os.path.join(
        purple_dir, "%s.purple.purity" % dd.get_sample_name(paired.tumor_data))
    with open(purity_file) as in_handle:
        header = in_handle.readline().replace("#", "").split("\t")
        vals = in_handle.readline().split("\t")
        for h, v in zip(header, vals):
            try:
                v = float(v)
            except ValueError:
                pass
            out["metrics"][h] = v
    return out