def run_gatk(): # generate interval list file intvfile = {{job.outdir | path.join: "interval.list" | quote}} cmd = '{samtools} idxstats {tumor!r} | head -1 | cut -f1 > {intvfile!r}'.format( samtools = samtools, tumor = tumor, intvfile = intvfile ) runcmd(cmd) mem = mem2(mem, 'java') params['I:tumor'] = tumor params['I:normal'] = normal params.R = ref params.o = outfile params.nct = nthread params.L = intvfile cmd = '{gatk} -T MuTect2 {mem} -Djava.io.tmpdir={tmpdir!r} {args}'.format( gatk = gatk, mem = mem, tmpdir = tmpdir, args = cmdargs(params, dash = '-', equal = ' ') ) runcmd(cmd) if gz: runcmd(['gzip', outfile])
def run_picard(): global infile mem = mem2(argsmem, '-jdict') mem['-Djava.io.tmpdir'] = tmpdir shellpicard = Shell(subcmd = True, dash = '', equal = '=').picard(**mem) if not (steps.sort or steps.index or steps.markdup or steps.rmdup): shellpicard.SamFormatConverter(TMP_DIR = tmpdir, I = infile, O = outfile).run() else: bamfile = outfile if steps.sort: bamfile = path.join(joboutdir, inprefix + '.sorted.bam') shellpicard.ShortSam(TMP_DIR = tmpdir, I = infile, O = bamfile, SO = sortby).run() if infile != {{i.infile | quote}}: shell.rm(f = True, _ = infile) infile = bamfile if steps.markdup: mfile = "/dev/null" bamfile = path.join(joboutdir, inprefix + '.dedup.bam') shellpicard.MarkDuplicates(REMOVE_DUPLICATES = 'true' if steps.rmdup else 'false', TMP_DIR = tmpdir, I = infile, O = bamfile, M = mfile).run() if infile != {{i.infile | quote}}: shell.rm(f = True, _ = infile) infile = bamfile if steps.index: shellpicard.BuildBamIndex(TMP_DIR = tmpdir, I = infile, O = outfile + '.bai').run() if infile != outfile: if path.exists(infile + '.bai'): shell.mv(infile + '.bai', outfile + '.bai') shell.mv(infile, outfile)
def run_samtools(): global infile if not (steps.sort or steps.index or steps.markdup or steps.rmdup): subshell.samtools.view(b = True, o = outfile, O = 'bam', _ = infile).run() else: bamfile = outfile if steps.sort: mem = mem2(argsmem, 'M') bamfile = path.join(joboutdir, inprefix + '.sorted.bam') subshell.samtools.sort( m = mem + 'M', n = sortby == 'queryname', o = bamfile, T = tmpdir, O = 'bam', _ = infile, **{'@': nthread} ).run() if infile != {{i.infile | quote}}: shell.rm(infile, f = True) infile = bamfile if steps.markdup or steps.rmdup: bamfile = path.join(joboutdir, inprefix + '.dedup.bam') subshell.rmdup(infile, bamfile).run() if infile != {{i.infile | quote}}: shell.rm(infile, f = True) infile = bamfile if steps.index: subshell.samtools.index(bamfile, outfile + '.bai') if infile != outfile: if path.exists(infile + '.bai'): shell.mv(infile + '.bai', outfile + '.bai') shell.mv(infile, outfile)
def run_virmid(): params.R = ref params.D = tumor params.N = normal params.w = joboutdir cmd = '{virmid} {mem} -Djava.io.tmpdir={tmpdir!r} {args}'.format( virmid = virmid, mem = mem2(mem, 'java'), tmpdir = tmpdir, args = cmdargs(params) ) runcmd(['mv', path.join(joboutdir, '*.virmid.som.passed.vcf'), outfile]) if gz: runcmd(['gzip', outfile])
def run_gatk(): mem = mem2(argsmem, '-jdict') intfile = path.join(joboutdir, outprefix + '.intervals') mem['-Djava.io.tmpdir={}'.format(shell.shquote(tmpdir))] = True gatksh = Shell(equal=' ', dash='-').gatk rtcparams = params.get('RealignerTargetCreator', Box()) rtcparams.T = 'RealignerTargetCreator' rtcparams.R = ref rtcparams.I = infile rtcparams.o = intfile rtcparams.nt = nthread rtcparams._ = list(mem.keys()) gatksh(**rtcparams).run() bamfileir = path.join(joboutdir, outprefix + '.ir.bam') irparams = params.get('IndelRealigner', Box()) irparams.T = 'IndelRealigner' irparams.R = ref irparams.I = infile irparams.o = bamfileir irparams._ = list(mem.keys()) irparams.targetIntervals = intfile gatksh(**irparams).run() recaltable = path.join(joboutdir, outprefix + '.recaltable') brparams = params.get('BaseRecalibrator', Box()) brparams.T = 'BaseRecalibrator' brparams.R = ref brparams.I = bamfileir brparams.o = recaltable brparams.nct = nthread brparams._ = list(mem.keys()) brparams.knownSites = knownSites gatksh(**brparams).run() prparams = params.get('PrintReads', Box()) prparams.T = 'PrintReads' prparams.R = ref prparams.I = bamfileir prparams.o = outfile prparams.nct = nthread prparams._ = list(mem.keys()) gatksh(**prparams).run() shell.rm(bamfileir, f=True) shell.mv(outprefix + '.bai', outfile + '.bai')
def run_gatk(): gatkmem = mem2(mem, 'jdict') gatkmem['Djava.io.tmpdir={!r}'.format(tmpdir)] = True gatksh = Shell(equal = ' ', dash = '-') params.T = 'HaplotypeCaller' params.R = ref params.I = infile params.o = outfile params.nct = nthread params._ = list(gatkmem.keys()) gatksh(**params).run() if gz: shell.gzip(outfile)
def run_strelka(): cparams = {{args.configParams | repr}} cparams.normalBam = normal cparams.tumorBam = tumor cparams.referenceFasta = ref cparams.runDir = joboutdir runcmd('{strelka} {args}'.format(strelka = strelka, args = cmdargs(cparams))) params.m = 'local' params.g = mem2(mem, 'G')[:-1] params.j = nthread runcmd('{joboutdir}/runWorkflow.py {args}'.format(joboutdir = joboutdir, args = cmdargs(params))) snvvcf = path.join(joboutdir, 'results', 'variants', 'somatic.snvs.vcf.gz') indvcf = path.join(joboutdir, 'results', 'variants', 'somatic.indels.vcf.gz') _mergeAndAddGT(snvvcf, indvcf, outfile) if gz: runcmd(['gzip', outfile])
def run_strelka(): # config cfgParams.bam = infile cfgParams.referenceFasta = ref cfgParams.runDir = joboutdir Shell().strelka(**cfgParams).run() # run the pipeline params.m = 'local' params.j = nthread params.g = mem2(mem, 'G')[:-1] Shell({'runWorkflow': path.join(joboutdir, 'runWorkflow.py')}).runWorkflow(**params).run() # mv output file to desired outfile ofile = path.join(joboutdir, 'results', 'variants', 'genome.S1.vcf.gz') shell.mv(ofile, outfile + '.gz') if not gz: shell.gunzip(outfile + '.gz')
def run_biobambam(): mem = mem2(argsmem, 'M') if steps.index: params.index = 1 params.indexfilename = outfile + '.bai' params.I = infile params.O = outfile params.SO = sortby params.blockme = mem params.tmpfile = path.join(tmpdir, 'biobambam.tmp') params.inputformat = infmt params.outfmt = 'bam' params.inputthreads = nthread params.outputthreads = nthread params.markduplicates = int(steps.markdup) params.rmdup = int(steps.rmdup) Shell(dash = '', equal = '=').biobambam(**params).run()
from sys import stderr from shutil import move from pyppl import Box from bioprocs.utils import runcmd, mem2, cmdargs params = {{args.params}} try: {% case args.tool %} {% when 'trimmomatic' %} mem = mem2 ({{args.mem | quote}}, "java") minlen = str({{args.minlen}} * 2) adfile = "{{job.outdir}}/adapters.fa" with open (adfile, "w") as ad: ad.write (">TruSeq3_IndexedAdapter\n") ad.write ({{args.adapter | quote}} + "\n") params['threads'] = {{args.nthread}} cmd = '{{args.trimmomatic}} %s SE %s "{{in.fq}}" "{{out.outfq}}" ILLUMINACLIP:%s:2:30:10 LEADING:{{args.cut5}} TRAILING:{{args.cut3}} SLIDINGWINDOW:4:{{args.minq}} MINLEN:%s' % (mem, cmdargs(params, dash = '-', equal = ' '), adfile, minlen) runcmd (cmd) {% when 'cutadapt' %} params['a'] = {{args.adapter | quote}} params['u'] = "{{args.cut5}}" params['u'] = "-{{args.cut3}}" params['m'] = {{args.minlen}} params['q'] = "{{args.minq}},{{args.minq}}" params['o'] = {{out.outfq | quote}} cmd = '{{args.cutadapt}} %s "{{in.fq}}"' % cmdargs(params, dash = '-', equal = ' ') runcmd (cmd)
cmd = '{{args.biobambam}} %s' % cmdargs(params, dash = '', equal = '=') runcmd (cmd) {% when 'bedtools' %} params['i'] = infile params['fq'] = fqfile1 params['fq2'] = fqfile2 cmd = '{{args.bedtools}} bamtofastq %s' % cmdargs(params, dash = '-', equal = ' ') runcmd (cmd) {% when 'samtools' %} params['t'] = True params['1'] = fqfile1 params['2'] = fqfile2 cmd = '{{args.samtools}} fastq %s "%s"' % (cmdargs(params, dash = '-', equal = ' '), infile) runcmd (cmd) {% when 'picard' %} mem = mem2({{ args.mem | quote }}, 'Java') params[mem] = True params['-Djava.io.tmpdir'] = tmpdir params['TMP_DIR'] = tmpdir params['I'] = infile params['F'] = fqfile1 params['F2'] = fqfile2 cmd = '{{args.picard}} SamToFastq %s' % cmdargs(params, dash='', equal='=') runcmd (cmd) {% endcase %} {% if args.gz %} runcmd ('gzip "%s"' % (fqfile1)) runcmd ('gzip "%s"' % (fqfile2)) {% endif %} except Exception as ex:
if not rg['ID']: g = re.search (r'[^a-zA-Z0-9]+(L\\d+)[^a-zA-Z0-9]+', "{{o.outfile | fn}}") rg['ID'] = g.group(1) if g else "{{o.outfile | fn}}.L{{job.index}}" if not rg['SM']: rg['SM'] = "{{o.outfile | fn}}" tmpdir = path.join ("{{args.tmpdir}}", "{{proc.id}}.{{i.infile | fn}}.{{job.index}}") if not path.exists (tmpdir): makedirs (tmpdir) params = {{args.params}} try: {% case args.tool %} ############## picard {% when 'picard' %} mem = mem2({{ args.mem | quote }}) params['-Djava.io.tmpdir'] = tmpdir params['TMP_DIR'] = tmpdir params['I'] = {{i.infile | quote}} params['O'] = {{o.outfile | quote}} for k,v in rg.items(): params['RG' + k] = v runcmd ('{{args.picard}} AddOrReplaceReadGroups %s %s' % (mem, cmdargs(params, dash='', equal='='))) ############## bamutil {% when 'bamutil' %} params['RG'] = "@RG\\tID:%s\\t%s" % (rg['ID'], "\\t".join([k + ":" + v for k,v in rg.items() if k!='ID'])) params['in'] = {{i.infile | quote}} params['out'] = {{o.outfile | quote}}
tool = {{args.tool | repr}} picard = {{args.picard | repr}} chain = {{args.lochain | repr}} ref = {{args.ref | repr}} params = {{args.params | repr}} mem = {{args.mem | repr}} tmpdir = {{args.tmpdir | repr}} if not chain: logger.error('Chain file (args.lochain) not provided!') exit(1) # picard LiftoverVcf -Xmx4g -Xms1g I=TCGA-05-4382-10.vcf O=1.vcf CHAIN=liftovers/hg38ToHg19.over.chain.gz R=ucsc_hg19.fa REJECT=r.vcf if tool == 'picard': params.I = infile params.O = outfile params.CHAIN = chain params.REJECT = umfile params.R = ref javamem = mem2(mem, 'java') for jm in javamem.split(): params['-' + jm[1:]] = True params['-Djava.io.tmpdir'] = tmpdir cmd = '{picard} LiftoverVcf {params}' runcmd( cmd.format(picard=picard, params=cmdargs(params, equal='=', dash='')))