def _reallocate(in_file): tool = do.find_cmd("reallocate.pl") cmd = "perl {tool} -i {in_file} 5000 1000 b" out_file = in_file + ".weighted-5000-1000-b" if not utils.file_exists(out_file): do.run(cmd.format(**locals()), "reallocate") return out_file
def _bedpe_to_vcf(bedpe_file, sconfig_file, items): """Convert BEDPE output into a VCF file. """ tovcf_script = do.find_cmd("bedpeToVcf") if tovcf_script: out_file = "%s.vcf.gz" % utils.splitext_plus(bedpe_file)[0] out_nogzip = out_file.replace(".vcf.gz", ".vcf") raw_file = "%s-raw.vcf" % utils.splitext_plus(bedpe_file)[0] if not utils.file_exists(out_file): if not utils.file_exists(raw_file): with file_transaction(raw_file) as tx_raw_file: ref_file = tz.get_in(["reference", "fasta", "base"], items[0]) cmd = [ sys.executable, tovcf_script, "-c", sconfig_file, "-f", ref_file, "-b", bedpe_file, "-o", tx_raw_file, ] do.run(cmd, "Convert lumpy bedpe output to VCF") prep_file = vcfutils.sort_by_ref(raw_file, items[0]) if not utils.file_exists(out_nogzip): utils.symlink_plus(prep_file, out_nogzip) out_file = vcfutils.bgzip_and_index(out_nogzip, items[0]["config"]) return out_file
def _clean(in_file): tool = do.find_cmd("TBr2_duster.pl") cmd = "perl {tool} -i {in_file}" out_file = in_file + ".no-dust" if not utils.file_exists(out_file): do.run(cmd.format(**locals()), "duster") return out_file
def _call_vcf(in_bam, sample, workdir, reference, config): """ recalibration from BisSNP tool """ bissnp = do.find_cmd("bissnp") basename = sample num_cores = config['algorithm'].get('cores', 1) memory = config['algorithm'].get('memory', 4) jvm_opts = "-Xms750m -Xmx%sg" % memory cmd = ("{bissnp} {jvm_opts} -R {reference} -I {in_bam} " "-T BisulfiteGenotyper " "-vfn1 {tx_out} " "-vfn2 {out_vfn2} " "-stand_call_conf 20 " "-stand_emit_conf 0 " "-mmq 0 " "-mbq 0 " "-nt {num_cores}") with chdir(workdir): out_vfn1 = op.join(workdir, sample + ".rawcpg.vcf") out_vfn2 = op.join(workdir, sample + ".rawsnp.vcf") if not file_exists(out_vfn1): with file_transaction(out_vfn1) as tx_out: log.logger.debug(cmd.format(**locals())) do.run(cmd.format(**locals()), "BisSNP writerecal in %s" % in_bam) return out_vfn1, out_vfn2
def _protac(in_file, reference): tool = do.find_cmd("proTRAC.pl") cmd = "perl {tool} -genome {reference} -map {in_file} -nh -nr -rpm -distr 1-90 -pimax 32" out_file = "protac" if not utils.file_exists(out_file): do.run(cmd.format(**locals()), "protac") open(out_file, 'w').close() return out_file
def _mapper(in_file, reference): tool = do.find_cmd("sRNAmapper.pl") cmd = "perl {tool} -i {in_file} -g {reference} -a best -o {tx_out}" out_file = "hits.eland" if not utils.file_exists(out_file): with file_transaction(out_file) as tx_out: do.run(cmd.format(**locals()), "mapper") return out_file
def _collapse(in_file): tool = do.find_cmd("TBr2_collapse.pl") cmd = "perl {tool} -i {in_file} -o {tx_out}" basename = splitext_plus(op.basename(in_file))[0] out_file = basename + "_collapse.fastq" if not utils.file_exists(out_file): with file_transaction(out_file) as tx_out: do.run(cmd.format(**locals()), "collapse") return out_file
def _align(in_fastq, sample, workdir, genome_index, is_directional, bowtie2, reference, config): """ align with bismark. this is actually not used. the align is in ngsalign.bismark.align """ bismark = do.find_cmd("bismark") resources = config_utils.get_resources("bismark") num_cores = 1 if resources and resources.get("bismark_threads"): num_cores = resources.get("bismark_threads") else: num_cores = max(int(config['algorithm'].get('cores', 1) / 2), 1) bowtie_threads = 1 if resources and resources.get("bowtie_threads"): bowtie_threads = resources.get("bowtie_threads") basename = sample if is_directional: is_directional = "" else: is_directional = "--non_directional" cmd = "{bismark} -n 1 -o {tx_dir} --basename {sample} --unmapped {is_directional} {genome_index} {in_fastq}" if bowtie2: cmd = "{bismark} --bowtie2 --parallel {num_cores} -p {bowtie_threads} -o {tx_dir} --basename {sample} --unmapped {is_directional} {genome_index} {in_fastq}" out_dir = op.join(workdir, sample) out_bam = op.join(out_dir, sample + ".bam") with chdir(workdir): if not file_exists(out_bam): with tx_tmpdir() as tx_dir: cmd = cmd.format(**locals()) log.logger.debug(cmd) do.run(cmd, "bismark in %s" % in_fastq) shutil.move(tx_dir, out_dir) broad_runner = broad.runner_from_config(config) # out_bam, _ = broad_runner.run_fn("picard_formatconverter", out_sam) names = { 'rg': in_fastq, 'library': 'BS_LIB', 'pl': 'Illumina', 'pu': 'R1', 'sm': in_fastq, 'sample': sample } out_fix_bam = broad_runner.run_fn("picard_fix_rgs", out_bam, names) order_bam = splitext_plus(out_fix_bam)[0] + "_order.bam" broad_runner.run_fn("picard_reorder", out_fix_bam, reference, order_bam) index(order_bam, config) if bowtie2: order_bam = _set_quality(order_bam) index(order_bam, config) return order_bam
def _bedpe_to_vcf(bedpe_file, sconfig_file, items): """Convert BEDPE output into a VCF file. """ tovcf_script = do.find_cmd("bedpeToVcf") if tovcf_script: out_file = "%s.vcf" % utils.splitext_plus(bedpe_file)[0] if not utils.file_exists(out_file) and not utils.file_exists(out_file + ".gz"): with file_transaction(out_file) as tx_out_file: ref_file = tz.get_in(["reference", "fasta", "base"], items[0]) cmd = [sys.executable, tovcf_script, "-c", sconfig_file, "-f", ref_file, "-b", bedpe_file, "-o", tx_out_file] do.run(cmd, "Convert lumpy bedpe output to VCF") out_file = vcfutils.bgzip_and_index(out_file, items[0]["config"]) return out_file
def _run_report(bam_in, sample, biasm_file, workdir, config): """ Run bismark2report command """ bismark = do.find_cmd("bismark2report") bam_report = op.join(op.dirname(bam_in), sample) + '_SE_report.txt' cmd = "{bismark} --alignment_report {bam_report} -o {tx_out} --mbias_report {biasm_file}" out_dir = op.join(workdir, sample) out_file = op.join(out_dir, sample + '.html') with chdir(out_dir): if not file_exists(out_file): with file_transaction(out_file) as tx_out: do.run(cmd.format(**locals()), "bismarkr2report in %s" % bam_in) return out_dir
def _bedpe_to_vcf(bedpe_file, sconfig_file, items): """Convert BEDPE output into a VCF file. """ tovcf_script = do.find_cmd("bedpeToVcf") if tovcf_script: out_file = "%s.vcf" % utils.splitext_plus(bedpe_file)[0] if not utils.file_exists(out_file) and not utils.file_exists(out_file + ".gz"): with file_transaction(out_file) as tx_out_file: ref_file = tz.get_in(["reference", "fasta", "base"], items[0]) cmd = [ sys.executable, tovcf_script, "-c", sconfig_file, "-f", ref_file, "-b", bedpe_file, "-o", tx_out_file ] do.run(cmd, "Convert lumpy bedpe output to VCF") out_file = vcfutils.bgzip_and_index(out_file, items[0]["config"]) return out_file
def _run_meth_extractor(bam_in, sample, workdir, config): """ Run bismark_methylation_extractor command """ bismark = do.find_cmd("bismark_methylation_extractor") cores = config['algorithm'].get('cores', 1) memory = config['algorithm'].get('mem', 5) cmd = "{bismark} --no_overlap --comprehensive --multicore {cores} --buffer_size {memory}G --bedGraph --counts --gzip {bam_in}" out_dir = op.join(workdir, sample) mbias_file = op.join(out_dir, op.basename(splitext_plus(bam_in)[0]) + '.M-bias.txt') if not file_exists(mbias_file): with tx_tmpdir() as tx_dir: with chdir(tx_dir): do.run(cmd.format(**locals()), "bismark_methylation_extractor in %s" % bam_in) shutil.move(tx_dir, out_dir) assert op.exists(mbias_file), "mbias report doesn't exists:%s" % mbias_file return mbias_file
def _bedpe_to_vcf(bedpe_file, sconfig_file, items): """Convert BEDPE output into a VCF file. """ tovcf_script = do.find_cmd("bedpeToVcf") if tovcf_script: out_file = "%s.vcf.gz" % utils.splitext_plus(bedpe_file)[0] out_nogzip = out_file.replace(".vcf.gz", ".vcf") raw_file = "%s-raw.vcf" % utils.splitext_plus(bedpe_file)[0] if not utils.file_exists(out_file): if not utils.file_exists(raw_file): with file_transaction(items[0], raw_file) as tx_raw_file: cmd = [sys.executable, tovcf_script, "-c", sconfig_file, "-f", dd.get_ref_file(items[0]), "-t", "LUMPY", "-b", bedpe_file, "-o", tx_raw_file] do.run(cmd, "Convert lumpy bedpe output to VCF") prep_file = vcfutils.sort_by_ref(raw_file, items[0]) if not utils.file_exists(out_nogzip): utils.symlink_plus(prep_file, out_nogzip) out_file = vcfutils.bgzip_and_index(out_nogzip, items[0]["config"]) return out_file
def _recal_BQ_score(in_bam, sample, workdir, counts_file, reference, config): """ recalibration from BisSNP tool """ bissnp = do.find_cmd("bissnp") basename = sample num_cores = config['algorithm'].get('cores', 1) memory = config['algorithm'].get('memory', 4) jvm_opts = "-Xms750m -Xmx%sg" % memory cmd = ("{bissnp} {jvm_opts} -R {reference} -I {in_bam} " "-T BisulfiteTableRecalibration " "-recalFile {counts_file} " "-o {tx_out} " "-maxQ 60 ") with chdir(workdir): out_recal = op.join(workdir, sample + "_recal1.bam") if not file_exists(out_recal): with file_transaction(out_recal) as tx_out: log.logger.debug(cmd.format(**locals())) do.run(cmd.format(**locals()), "BisSNP writerecal in %s" % in_bam) index(out_recal, config) return out_recal
def _align(in_fastq, sample, workdir, genome_index, is_directional, bowtie2, reference, config): """ align with bismark """ bismark = do.find_cmd("bismark") num_cores = max(int(config['algorithm'].get('cores', 1) / 2), 1) basename = sample if is_directional: is_directional = "" else: is_directional = "--non_directional" cmd = "{bismark} -n 1 -o {tx_dir} --basename {sample} --unmapped {is_directional} {genome_index} {in_fastq}" if bowtie2: cmd = "{bismark} --bowtie2 -p {num_cores} -n 1 -o {tx_dir} --basename {sample} --unmapped {is_directional} {genome_index} {in_fastq}" out_dir = op.join(workdir, sample) out_bam = op.join(out_dir, sample + ".bam") with chdir(workdir): if not file_exists(out_bam): with tx_tmpdir() as tx_dir: cmd = cmd.format(**locals()) log.logger.debug(cmd) do.run(cmd, "bismark in %s" % in_fastq) shutil.move(tx_dir, out_dir) broad_runner = broad.runner_from_config(config) # out_bam, _ = broad_runner.run_fn("picard_formatconverter", out_sam) names = {'rg': in_fastq, 'library': 'RRBS_LIB', 'pl': 'Illumina', 'pu': 'R1', 'sm': in_fastq, 'sample': sample} out_fix_bam = broad_runner.run_fn("picard_fix_rgs", out_bam, names) order_bam = splitext_plus(out_fix_bam)[0] + "_order.bam" broad_runner.run_fn("picard_reorder", out_fix_bam, reference, order_bam) index(order_bam, config) if bowtie2: order_bam = _set_quality(order_bam) index(order_bam, config) return order_bam
def _trimming(in_fastq, out_dir, sample, is_rrbs, is_directional): """ Trimming reads using trim_galore """ trim_galore = find_cmd("trim_galore") if is_rrbs: is_rrbs = "--rrbs" if is_directional: is_directional = "" else: is_directional = "--non_directional" cmd = "{trim_galore} {is_rrbs} {is_directional} --length 30 --quality 30 {in_fastq} -o {tx_dir}" trimming = op.join(out_dir, sample, sample + "_trimmed.fq") if in_fastq.endswith("gz"): trimming += ".gz" with chdir(out_dir): if not file_exists(trimming): with tx_tmpdir() as tx_dir: logger.debug(cmd.format(**locals())) run(cmd.format(**locals()), "trim_galore in %s" % in_fastq) shutil.move(tx_dir, op.join(out_dir, sample)) assert op.exists(trimming), "trimming file doesn't exists:%s" % trimming return trimming
def _count_covars(in_bam, sample, workdir, snp, reference, config): """ countcovars from BisSNP tool """ bissnp = do.find_cmd("bissnp") basename = sample num_cores = config['algorithm'].get('cores', 1) memory = config['algorithm'].get('memory', 4) jvm_opts = "-Xms750m -Xmx%sg" % memory cmd = ("{bissnp} {jvm_opts} -R {reference} -I {in_bam} " "-T BisulfiteCountCovariates " "-knownSites {snp} " "-cov ReadGroupCovariate " "-cov QualityScoreCovariate " "-cov CycleCovariate " "-recalFile {tx_out} " "-nt {num_cores} ") with chdir(workdir): out_count = op.join(workdir, sample + "_recal1.csv") if not file_exists(out_count): with file_transaction(out_count) as tx_out: log.logger.debug(cmd.format(**locals())) do.run(cmd.format(**locals()), "BisSNP countcovarts in %s" % in_bam) return out_count