def _run_purecn(paired, work_dir): """Run PureCN.R wrapper with pre-segmented CNVkit inputs. """ out_base, out, all_files = _get_purecn_files(paired, work_dir) cnr_file = tz.get_in(["depth", "bins", "normalized"], paired.tumor_data) if not utils.file_uptodate(out["rds"], cnr_file): cnvkit_base = os.path.join( utils.safe_makedir(os.path.join(work_dir, "cnvkit")), dd.get_sample_name(paired.tumor_data)) seg_file = cnvkit.segment_from_cnr(cnr_file, paired.tumor_data, cnvkit_base) from bcbio import heterogeneity vcf_file = heterogeneity.get_variants(paired.tumor_data)[0]["vrn_file"] with file_transaction(paired.tumor_data, out_base) as tx_out_base: cmd = [ "PureCN.R", "--seed", "42", "--out", tx_out_base, "--rds", "%s.rds" % tx_out_base, "--sampleid", dd.get_sample_name(paired.tumor_data), "--genome", dd.get_genome_build(paired.tumor_data), "--vcf", vcf_file, "--tumor", cnr_file, "--segfile", seg_file, "--funsegmentation", "none" ] do.run(cmd, "PureCN copy number calling") for f in all_files: shutil.move(os.path.join(os.path.dirname(tx_out_base), f), os.path.join(os.path.dirname(out_base), f)) return out
def run(items): from bcbio import heterogeneity paired = vcfutils.get_paired(items) if not paired: logger.info("Skipping TitanCNA; no somatic tumor calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) cn_file = _titan_cn_file(dd.get_normalized_depth(paired.tumor_data), work_dir, paired.tumor_data) het_file = _titan_het_file(heterogeneity.get_variants(paired.tumor_data), work_dir, paired) if _should_run(het_file): ploidy_outdirs = [] for ploidy in [2, 3, 4]: for num_clusters in [1, 2, 3]: out_dir = _run_titancna(cn_file, het_file, ploidy, num_clusters, work_dir, paired.tumor_data) ploidy_outdirs.append((ploidy, out_dir)) solution_file = _run_select_solution(ploidy_outdirs, work_dir, paired.tumor_data) else: logger.info("Skipping TitanCNA; not enough input data: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items out = [] if paired.normal_data: out.append(paired.normal_data) if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append( _finalize_sv(solution_file, paired.tumor_data)) out.append(paired.tumor_data) return out
def _run_purecn(paired, work_dir): """Run PureCN.R wrapper with pre-segmented CNVkit or GATK4 inputs. """ segfns = {"cnvkit": _segment_normalized_cnvkit, "gatk-cnv": _segment_normalized_gatk} out_base, out, all_files = _get_purecn_files(paired, work_dir) cnr_file = tz.get_in(["depth", "bins", "normalized"], paired.tumor_data) if not utils.file_uptodate(out["rds"], cnr_file): cnr_file, seg_file = segfns[cnvkit.bin_approach(paired.tumor_data)](cnr_file, work_dir, paired) from bcbio import heterogeneity vcf_file = heterogeneity.get_variants(paired.tumor_data, include_germline=False)[0]["vrn_file"] vcf_file = germline.filter_to_pass_and_reject(vcf_file, paired, out_dir=work_dir) with file_transaction(paired.tumor_data, out_base) as tx_out_base: # Use UCSC style naming for human builds to support BSgenome genome = ("hg19" if dd.get_genome_build(paired.tumor_data) in ["GRCh37", "hg19"] else dd.get_genome_build(paired.tumor_data)) cmd = ["PureCN.R", "--seed", "42", "--out", tx_out_base, "--rds", "%s.rds" % tx_out_base, "--sampleid", dd.get_sample_name(paired.tumor_data), "--genome", genome, "--vcf", vcf_file, "--tumor", cnr_file, "--segfile", seg_file, "--funsegmentation", "Hclust", "--maxnonclonal", "0.3"] if dd.get_num_cores(paired.tumor_data) > 1: cmd += ["--cores", str(dd.get_num_cores(paired.tumor_data))] do.run(cmd, "PureCN copy number calling") for f in all_files: shutil.move(os.path.join(os.path.dirname(tx_out_base), f), os.path.join(os.path.dirname(out_base), f)) return out
def _run_purecn(paired, work_dir): """Run PureCN.R wrapper with pre-segmented CNVkit or GATK4 inputs. """ segfns = { "cnvkit": _segment_normalized_cnvkit, "gatk-cnv": _segment_normalized_gatk } out_base, out, all_files = _get_purecn_files(paired, work_dir) failed_file = out_base + "-failed.log" cnr_file = tz.get_in(["depth", "bins", "normalized"], paired.tumor_data) if not utils.file_uptodate( out["rds"], cnr_file) and not utils.file_exists(failed_file): cnr_file, seg_file = segfns[cnvkit.bin_approach(paired.tumor_data)]( cnr_file, work_dir, paired) from bcbio import heterogeneity vcf_file = heterogeneity.get_variants( paired.tumor_data, include_germline=False)[0]["vrn_file"] vcf_file = germline.filter_to_pass_and_reject(vcf_file, paired, out_dir=work_dir) with file_transaction(paired.tumor_data, out_base) as tx_out_base: # Use UCSC style naming for human builds to support BSgenome genome = ("hg19" if dd.get_genome_build(paired.tumor_data) in [ "GRCh37", "hg19" ] else dd.get_genome_build(paired.tumor_data)) cmd = [ "PureCN.R", "--seed", "42", "--out", tx_out_base, "--rds", "%s.rds" % tx_out_base, "--sampleid", dd.get_sample_name(paired.tumor_data), "--genome", genome, "--vcf", vcf_file, "--tumor", cnr_file, "--segfile", seg_file, "--funsegmentation", "Hclust", "--maxnonclonal", "0.3" ] if dd.get_num_cores(paired.tumor_data) > 1: cmd += ["--cores", str(dd.get_num_cores(paired.tumor_data))] try: cmd = "export R_LIBS_USER=%s && %s && %s" % ( utils.R_sitelib(), utils.get_R_exports(), " ".join( [str(x) for x in cmd])) do.run(cmd, "PureCN copy number calling") except subprocess.CalledProcessError as msg: if _allowed_errors(str(msg)): logger.info( "PureCN failed to find solution for %s: skipping" % dd.get_sample_name(paired.tumor_data)) with open(failed_file, "w") as out_handle: out_handle.write(str(msg)) else: logger.exception() raise for f in all_files: if os.path.exists(os.path.join(os.path.dirname(tx_out_base), f)): shutil.move(os.path.join(os.path.dirname(tx_out_base), f), os.path.join(os.path.dirname(out_base), f)) out = _get_purecn_files(paired, work_dir, require_exist=True)[1] return out if (out.get("rds") and os.path.exists(out["rds"])) else None
def run(items): paired = vcfutils.get_paired(items) if not paired or not paired.normal_name: logger.info( "Skipping PURPLE; need tumor/normal somatic calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) from bcbio import heterogeneity het_file = _amber_het_file(heterogeneity.get_variants(paired.tumor_data), work_dir, paired) depth_file = _run_cobalt(paired, work_dir) print(het_file, depth_file) return items
def _compatible_small_variants(data, items): """Retrieve small variant (SNP, indel) VCFs compatible with CNVkit. """ from bcbio import heterogeneity VarFile = collections.namedtuple("VarFile", ["name", "sample", "normal"]) out = [] paired = vcfutils.get_paired(items) for v in heterogeneity.get_variants(data, include_germline=not paired): vrn_file = v["vrn_file"] base, ext = utils.splitext_plus(os.path.basename(vrn_file)) if paired: out.append(VarFile(vrn_file, paired.tumor_name, paired.normal_name)) else: out.append(VarFile(vrn_file, dd.get_sample_name(data), None)) return out
def _run_purecn(paired, work_dir): """Run PureCN.R wrapper with pre-segmented CNVkit inputs. """ out_base, out, all_files = _get_purecn_files(paired, work_dir) cnr_file = tz.get_in(["depth", "bins", "normalized"], paired.tumor_data) if not utils.file_uptodate(out["rds"], cnr_file): cnvkit_base = os.path.join( utils.safe_makedir(os.path.join(work_dir, "cnvkit")), dd.get_sample_name(paired.tumor_data)) cnr_file = chromhacks.bed_to_standardonly( cnr_file, paired.tumor_data, headers="chromosome", include_sex_chroms=True, out_dir=os.path.dirname(cnvkit_base)) cnr_file = _remove_overlaps(cnr_file, os.path.dirname(cnvkit_base), paired.tumor_data) seg_file = cnvkit.segment_from_cnr(cnr_file, paired.tumor_data, cnvkit_base) from bcbio import heterogeneity vcf_file = heterogeneity.get_variants( paired.tumor_data, include_germline=False)[0]["vrn_file"] vcf_file = germline.filter_to_pass_and_reject(vcf_file, paired, out_dir=work_dir) with file_transaction(paired.tumor_data, out_base) as tx_out_base: # Use UCSC style naming for human builds to support BSgenome genome = ("hg19" if dd.get_genome_build(paired.tumor_data) in [ "GRCh37", "hg19" ] else dd.get_genome_build(paired.tumor_data)) cmd = [ "PureCN.R", "--seed", "42", "--out", tx_out_base, "--rds", "%s.rds" % tx_out_base, "--sampleid", dd.get_sample_name(paired.tumor_data), "--genome", genome, "--vcf", vcf_file, "--tumor", cnr_file, "--segfile", seg_file, "--funsegmentation", "Hclust", "--maxnonclonal", "0.3" ] if dd.get_num_cores(paired.tumor_data) > 1: cmd += ["--cores", str(dd.get_num_cores(paired.tumor_data))] do.run(cmd, "PureCN copy number calling") for f in all_files: shutil.move(os.path.join(os.path.dirname(tx_out_base), f), os.path.join(os.path.dirname(out_base), f)) return out
def _run_purecn(paired, work_dir): """Run PureCN.R wrapper with pre-segmented CNVkit or GATK4 inputs. """ segfns = {"cnvkit": _segment_normalized_cnvkit, "gatk-cnv": _segment_normalized_gatk} out_base, out, all_files = _get_purecn_files(paired, work_dir) failed_file = out_base + "-failed.log" cnr_file = tz.get_in(["depth", "bins", "normalized"], paired.tumor_data) if not utils.file_uptodate(out["rds"], cnr_file) and not utils.file_exists(failed_file): cnr_file, seg_file = segfns[cnvkit.bin_approach(paired.tumor_data)](cnr_file, work_dir, paired) from bcbio import heterogeneity vcf_file = heterogeneity.get_variants(paired.tumor_data, include_germline=False)[0]["vrn_file"] vcf_file = germline.filter_to_pass_and_reject(vcf_file, paired, out_dir=work_dir) with file_transaction(paired.tumor_data, out_base) as tx_out_base: # Use UCSC style naming for human builds to support BSgenome genome = ("hg19" if dd.get_genome_build(paired.tumor_data) in ["GRCh37", "hg19"] else dd.get_genome_build(paired.tumor_data)) cmd = ["PureCN.R", "--seed", "42", "--out", tx_out_base, "--rds", "%s.rds" % tx_out_base, "--sampleid", dd.get_sample_name(paired.tumor_data), "--genome", genome, "--vcf", vcf_file, "--tumor", cnr_file, "--segfile", seg_file, "--funsegmentation", "Hclust", "--maxnonclonal", "0.3"] if dd.get_num_cores(paired.tumor_data) > 1: cmd += ["--cores", str(dd.get_num_cores(paired.tumor_data))] try: cmd = "export R_LIBS_USER=%s && %s && %s" % (utils.R_sitelib(), utils.get_R_exports(), " ".join([str(x) for x in cmd])) do.run(cmd, "PureCN copy number calling") except subprocess.CalledProcessError as msg: if _allowed_errors(str(msg)): logger.info("PureCN failed to find solution for %s: skipping" % dd.get_sample_name(paired.tumor_data)) with open(failed_file, "w") as out_handle: out_handle.write(str(msg)) else: logger.exception() raise for f in all_files: if os.path.exists(os.path.join(os.path.dirname(tx_out_base), f)): shutil.move(os.path.join(os.path.dirname(tx_out_base), f), os.path.join(os.path.dirname(out_base), f)) out = _get_purecn_files(paired, work_dir, require_exist=True)[1] return out if (out.get("rds") and os.path.exists(out["rds"])) else None
def run(items): paired = vcfutils.get_paired(items) if not paired or not paired.normal_name: logger.info("Skipping PURPLE; need tumor/normal somatic calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) from bcbio import heterogeneity vrn_files = heterogeneity.get_variants(paired.tumor_data, include_germline=False) het_file = _amber_het_file("pon", vrn_files, work_dir, paired) depth_file = _run_cobalt(paired, work_dir) purple_out = _run_purple(paired, het_file, depth_file, vrn_files, work_dir) out = [] if paired.normal_data: out.append(paired.normal_data) if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append(purple_out) out.append(paired.tumor_data) return out
def run(items): paired = vcfutils.get_paired(items) if not paired or not paired.normal_name: logger.info( "Skipping PURPLE; need tumor/normal somatic calls in batch: %s" % " ".join([dd.get_sample_name(d) for d in items])) return items work_dir = _sv_workdir(paired.tumor_data) from bcbio import heterogeneity vrn_files = heterogeneity.get_variants(paired.tumor_data, include_germline=False) het_file = _amber_het_file("pon", vrn_files, work_dir, paired) depth_file = _run_cobalt(paired, work_dir) purple_out = _run_purple(paired, het_file, depth_file, vrn_files, work_dir) out = [] if paired.normal_data: out.append(paired.normal_data) if "sv" not in paired.tumor_data: paired.tumor_data["sv"] = [] paired.tumor_data["sv"].append(purple_out) out.append(paired.tumor_data) return out