def _run_purecn(paired, work_dir): """Run PureCN.R wrapper with pre-segmented CNVkit inputs. """ out_base, out, all_files = _get_purecn_files(paired, work_dir) cnr_file = tz.get_in(["depth", "bins", "normalized"], paired.tumor_data) if not utils.file_uptodate(out["rds"], cnr_file): cnvkit_base = os.path.join( utils.safe_makedir(os.path.join(work_dir, "cnvkit")), dd.get_sample_name(paired.tumor_data)) seg_file = cnvkit.segment_from_cnr(cnr_file, paired.tumor_data, cnvkit_base) from bcbio import heterogeneity vcf_file = heterogeneity.get_variants(paired.tumor_data)[0]["vrn_file"] with file_transaction(paired.tumor_data, out_base) as tx_out_base: cmd = [ "PureCN.R", "--seed", "42", "--out", tx_out_base, "--rds", "%s.rds" % tx_out_base, "--sampleid", dd.get_sample_name(paired.tumor_data), "--genome", dd.get_genome_build(paired.tumor_data), "--vcf", vcf_file, "--tumor", cnr_file, "--segfile", seg_file, "--funsegmentation", "none" ] do.run(cmd, "PureCN copy number calling") for f in all_files: shutil.move(os.path.join(os.path.dirname(tx_out_base), f), os.path.join(os.path.dirname(out_base), f)) return out
def _segment_normalized_cnvkit(cnr_file, work_dir, paired): """Segmentation of normalized inputs using CNVkit. """ cnvkit_base = os.path.join(utils.safe_makedir(os.path.join(work_dir, "cnvkit")), dd.get_sample_name(paired.tumor_data)) cnr_file = chromhacks.bed_to_standardonly(cnr_file, paired.tumor_data, headers="chromosome", include_sex_chroms=True, out_dir=os.path.dirname(cnvkit_base)) cnr_file = _remove_overlaps(cnr_file, os.path.dirname(cnvkit_base), paired.tumor_data) seg_file = cnvkit.segment_from_cnr(cnr_file, paired.tumor_data, cnvkit_base) return cnr_file, seg_file
def _run_purecn(paired, work_dir): """Run PureCN.R wrapper with pre-segmented CNVkit inputs. """ out_base, out, all_files = _get_purecn_files(paired, work_dir) cnr_file = tz.get_in(["depth", "bins", "normalized"], paired.tumor_data) if not utils.file_uptodate(out["rds"], cnr_file): cnvkit_base = os.path.join( utils.safe_makedir(os.path.join(work_dir, "cnvkit")), dd.get_sample_name(paired.tumor_data)) cnr_file = chromhacks.bed_to_standardonly( cnr_file, paired.tumor_data, headers="chromosome", include_sex_chroms=True, out_dir=os.path.dirname(cnvkit_base)) cnr_file = _remove_overlaps(cnr_file, os.path.dirname(cnvkit_base), paired.tumor_data) seg_file = cnvkit.segment_from_cnr(cnr_file, paired.tumor_data, cnvkit_base) from bcbio import heterogeneity vcf_file = heterogeneity.get_variants( paired.tumor_data, include_germline=False)[0]["vrn_file"] vcf_file = germline.filter_to_pass_and_reject(vcf_file, paired, out_dir=work_dir) with file_transaction(paired.tumor_data, out_base) as tx_out_base: # Use UCSC style naming for human builds to support BSgenome genome = ("hg19" if dd.get_genome_build(paired.tumor_data) in [ "GRCh37", "hg19" ] else dd.get_genome_build(paired.tumor_data)) cmd = [ "PureCN.R", "--seed", "42", "--out", tx_out_base, "--rds", "%s.rds" % tx_out_base, "--sampleid", dd.get_sample_name(paired.tumor_data), "--genome", genome, "--vcf", vcf_file, "--tumor", cnr_file, "--segfile", seg_file, "--funsegmentation", "Hclust", "--maxnonclonal", "0.3" ] if dd.get_num_cores(paired.tumor_data) > 1: cmd += ["--cores", str(dd.get_num_cores(paired.tumor_data))] do.run(cmd, "PureCN copy number calling") for f in all_files: shutil.move(os.path.join(os.path.dirname(tx_out_base), f), os.path.join(os.path.dirname(out_base), f)) return out