def run_picard(): global infile mem = mem2(argsmem, '-jdict') mem['-Djava.io.tmpdir'] = tmpdir shellpicard = Shell(subcmd = True, dash = '', equal = '=').picard(**mem) if not (steps.sort or steps.index or steps.markdup or steps.rmdup): shellpicard.SamFormatConverter(TMP_DIR = tmpdir, I = infile, O = outfile).run() else: bamfile = outfile if steps.sort: bamfile = path.join(joboutdir, inprefix + '.sorted.bam') shellpicard.ShortSam(TMP_DIR = tmpdir, I = infile, O = bamfile, SO = sortby).run() if infile != {{i.infile | quote}}: shell.rm(f = True, _ = infile) infile = bamfile if steps.markdup: mfile = "/dev/null" bamfile = path.join(joboutdir, inprefix + '.dedup.bam') shellpicard.MarkDuplicates(REMOVE_DUPLICATES = 'true' if steps.rmdup else 'false', TMP_DIR = tmpdir, I = infile, O = bamfile, M = mfile).run() if infile != {{i.infile | quote}}: shell.rm(f = True, _ = infile) infile = bamfile if steps.index: shellpicard.BuildBamIndex(TMP_DIR = tmpdir, I = infile, O = outfile + '.bai').run() if infile != outfile: if path.exists(infile + '.bai'): shell.mv(infile + '.bai', outfile + '.bai') shell.mv(infile, outfile)
def run_samtools(): global infile if not (steps.sort or steps.index or steps.markdup or steps.rmdup): subshell.samtools.view(b = True, o = outfile, O = 'bam', _ = infile).run() else: bamfile = outfile if steps.sort: mem = mem2(argsmem, 'M') bamfile = path.join(joboutdir, inprefix + '.sorted.bam') subshell.samtools.sort( m = mem + 'M', n = sortby == 'queryname', o = bamfile, T = tmpdir, O = 'bam', _ = infile, **{'@': nthread} ).run() if infile != {{i.infile | quote}}: shell.rm(infile, f = True) infile = bamfile if steps.markdup or steps.rmdup: bamfile = path.join(joboutdir, inprefix + '.dedup.bam') subshell.rmdup(infile, bamfile).run() if infile != {{i.infile | quote}}: shell.rm(infile, f = True) infile = bamfile if steps.index: subshell.samtools.index(bamfile, outfile + '.bai') if infile != outfile: if path.exists(infile + '.bai'): shell.mv(infile + '.bai', outfile + '.bai') shell.mv(infile, outfile)
def run_gatk(): mem = mem2(argsmem, '-jdict') intfile = path.join(joboutdir, outprefix + '.intervals') mem['-Djava.io.tmpdir={}'.format(shell.shquote(tmpdir))] = True gatksh = Shell(equal=' ', dash='-').gatk rtcparams = params.get('RealignerTargetCreator', Box()) rtcparams.T = 'RealignerTargetCreator' rtcparams.R = ref rtcparams.I = infile rtcparams.o = intfile rtcparams.nt = nthread rtcparams._ = list(mem.keys()) gatksh(**rtcparams).run() bamfileir = path.join(joboutdir, outprefix + '.ir.bam') irparams = params.get('IndelRealigner', Box()) irparams.T = 'IndelRealigner' irparams.R = ref irparams.I = infile irparams.o = bamfileir irparams._ = list(mem.keys()) irparams.targetIntervals = intfile gatksh(**irparams).run() recaltable = path.join(joboutdir, outprefix + '.recaltable') brparams = params.get('BaseRecalibrator', Box()) brparams.T = 'BaseRecalibrator' brparams.R = ref brparams.I = bamfileir brparams.o = recaltable brparams.nct = nthread brparams._ = list(mem.keys()) brparams.knownSites = knownSites gatksh(**brparams).run() prparams = params.get('PrintReads', Box()) prparams.T = 'PrintReads' prparams.R = ref prparams.I = bamfileir prparams.o = outfile prparams.nct = nthread prparams._ = list(mem.keys()) gatksh(**prparams).run() shell.rm(bamfileir, f=True) shell.mv(outprefix + '.bai', outfile + '.bai')
def run_sambamba(): global infile if not (steps.sort or steps.index or steps.markdup or steps.rmdup): subshell.sambamba.view(S = True, f = 'bam', o = outfile, t = nthread, _ = infile).run() else: bamfile = outfile if infmt == 'sam': bamfile = path.join(joboutdir, inprefix + '.s2b.bam') subshell.sambamba.view(S = True, f = 'bam', o = bamfile, t = nthread, _ = infile).run() infile = bamfile if steps.sort: if sortby == 'queryname': params.n = True params.N = True bamfile = path.join(joboutdir, inprefix + '.sorted.bam') params.m = argsmem params.tmpdir = tmpdir params.o = bamfile params.t = nthread params._ = infile subshell.sambamba.sort(**params).run() if infile != {{i.infile | quote}}: shell.rm(f = True, _ = infile) infile = bamfile if steps.markdup: bamfile = path.join(joboutdir, inprefix + '.dedup.bam') subshell.sambamba.markdup(r = steps.rmdup, t = nthread, tmpdir = tmpdir, _ = [infile, bamfile]).run() if infile != {{i.infile | quote}}: shell.rm(f = True, _ = infile) infile = bamfile if steps.index: if path.exists(infile + '.bai'): shell.mv(infile + '.bai', outfile + '.bai') else: subshell.sambamba.index(t = nthread, _ = [infile, infile + '.bai']) if infile != outfile: if path.exists(infile + '.bai'): shell.mv(infile + '.bai', outfile + '.bai') shell.mv(infile, outfile)
shell.mv(outprefix + '.bai', outfile + '.bai') def run_bamutil(): bush = Shell(subcmd=True, equal=' ').bamutil if knownSites: params.dbsnp = knownSites params['in'] = infile params.out = outfile params.refFile = ref params.verbose = True refcache = path.splitext(ref)[0] + '-bs.umfa' if not path.isfile(refcache): poll = Poll(workdir, joblen, jobindex) poll.first(lambda **kwargs: bush.recal(**kwargs).run(), **params) else: bush.recab(**params).run() tools = dict(gatk=run_gatk, bamutil=run_bamutil) try: tools[tool]() except KeyError: raise KeyError('Tool {!r} not supported.'.format(tool)) except Exception as ex: stderr.write("Job failed: %s" % str(ex)) raise finally: shell.rm(tmpdir, r=True, f=True)