def _run_wham(inputs, background_bams): """Run WHAM on a defined set of inputs and targets. """ out_file = os.path.join(_sv_workdir(inputs[0]), "%s-wham.vcf.gz" % dd.get_sample_name(inputs[0])) if not utils.file_exists(out_file): with file_transaction(inputs[0], out_file) as tx_out_file: cores = dd.get_cores(inputs[0]) ref_file = dd.get_ref_file(inputs[0]) include_chroms = ",".join([c.name for c in ref.file_contigs(ref_file) if chromhacks.is_autosomal_or_x(c.name)]) all_bams = ",".join([x["align_bam"] for x in inputs] + background_bams) cmd = ("whamg -x {cores} -a {ref_file} -f {all_bams} -c {include_chroms} " "| bgzip -c > {tx_out_file}") do.run(cmd.format(**locals()), "WHAM SV caller: %s" % ", ".join(dd.get_sample_name(d) for d in inputs)) return vcfutils.bgzip_and_index(out_file, inputs[0]["config"])
def _run_titancna(cn_file, het_file, ploidy, num_clusters, work_dir, data): """Run titanCNA wrapper script on given ploidy and clusters. """ sample = dd.get_sample_name(data) cores = dd.get_num_cores(data) export_cmd = utils.get_R_exports() ploidy_dir = utils.safe_makedir(os.path.join(work_dir, "run_ploidy%s" % ploidy)) cluster_dir = "%s_cluster%02d" % (sample, num_clusters) out_dir = os.path.join(ploidy_dir, cluster_dir) if not utils.file_uptodate(out_dir + ".titan.txt", cn_file): with tx_tmpdir(data) as tmp_dir: with utils.chdir(tmp_dir): cmd = ("{export_cmd} && titanCNA.R --id {sample} --hetFile {het_file} --cnFile {cn_file} " "--numClusters {num_clusters} --ploidy {ploidy} --numCores {cores} --outDir {tmp_dir} " "--libdir None") chroms = ["'%s'" % c.name.replace("chr", "") for c in ref.file_contigs(dd.get_ref_file(data)) if chromhacks.is_autosomal_or_x(c.name)] if "'X'" not in chroms: chroms += ["'X'"] # Use UCSC style naming for human builds to support BSgenome genome_build = ("hg19" if dd.get_genome_build(data) in ["GRCh37", "hg19"] else dd.get_genome_build(data)) cmd += """ --chrs "c(%s)" """ % ",".join(chroms) cmd += " --genomeBuild {genome_build}" if data["genome_build"] in ("hg19", "hg38"): cmd += " --genomeStyle UCSC" if data["genome_build"] in ["hg38"]: data_dir = os.path.normpath(os.path.join( os.path.dirname(os.path.realpath(os.path.join( os.path.dirname(utils.Rscript_cmd()), "titanCNA.R"))), os.pardir, os.pardir, "data")) cytoband_file = os.path.join(data_dir, "cytoBand_hg38.txt") assert os.path.exists(cytoband_file), cytoband_file cmd += " --cytobandFile %s" % cytoband_file # TitanCNA's model is influenced by the variance in read coverage data # and data type: set reasonable defaults for non-WGS runs # (see https://github.com/gavinha/TitanCNA/tree/master/scripts/R_scripts) if dd.get_coverage_interval(data) != "genome": cmd += " --alphaK=2500 --alphaKHigh=2500" do.run(cmd.format(**locals()), "TitanCNA CNV detection: ploidy %s, cluster %s" % (ploidy, num_clusters)) for fname in glob.glob(os.path.join(tmp_dir, cluster_dir + "*")): shutil.move(fname, ploidy_dir) if os.path.exists(os.path.join(tmp_dir, "Rplots.pdf")): shutil.move(os.path.join(tmp_dir, "Rplots.pdf"), os.path.join(ploidy_dir, "%s.Rplots.pdf" % cluster_dir)) return ploidy_dir
def _run_wham(inputs, background_bams): """Run WHAM on a defined set of inputs and targets. """ out_file = os.path.join(_sv_workdir(inputs[0]), "%s-wham.vcf.gz" % dd.get_sample_name(inputs[0])) if not utils.file_exists(out_file): with file_transaction(inputs[0], out_file) as tx_out_file: cores = dd.get_cores(inputs[0]) ref_file = dd.get_ref_file(inputs[0]) include_chroms = ",".join([ c.name for c in ref.file_contigs(ref_file) if chromhacks.is_autosomal_or_x(c.name) ]) all_bams = ",".join([x["align_bam"] for x in inputs] + background_bams) cmd = ( "whamg -x {cores} -a {ref_file} -f {all_bams} -c {include_chroms} " "| bgzip -c > {tx_out_file}") do.run( cmd.format(**locals()), "WHAM SV caller: %s" % ", ".join(dd.get_sample_name(d) for d in inputs)) return vcfutils.bgzip_and_index(out_file, inputs[0]["config"])