Beispiel #1
0
def run_picard():
	global infile
	mem = mem2(argsmem, '-jdict')
	mem['-Djava.io.tmpdir'] = tmpdir
	shellpicard = Shell(subcmd = True, dash = '', equal = '=').picard(**mem)
	if not (steps.sort or steps.index or steps.markdup or steps.rmdup):
		shellpicard.SamFormatConverter(TMP_DIR = tmpdir, I = infile, O = outfile).run()
	else:
		bamfile = outfile
		if steps.sort:
			bamfile = path.join(joboutdir, inprefix + '.sorted.bam')
			shellpicard.ShortSam(TMP_DIR = tmpdir, I = infile, O = bamfile, SO = sortby).run()
			if infile != {{i.infile | quote}}:
				shell.rm(f = True, _ = infile)
			infile = bamfile
		if steps.markdup:
			mfile = "/dev/null"
			bamfile = path.join(joboutdir, inprefix + '.dedup.bam')
			shellpicard.MarkDuplicates(REMOVE_DUPLICATES = 'true' if steps.rmdup else 'false', TMP_DIR = tmpdir, I = infile, O = bamfile, M = mfile).run()
			if infile != {{i.infile | quote}}:
				shell.rm(f = True, _ = infile)
			infile = bamfile
		if steps.index:
			shellpicard.BuildBamIndex(TMP_DIR = tmpdir, I = infile, O = outfile + '.bai').run()
		if infile != outfile:
			if path.exists(infile + '.bai'):
				shell.mv(infile + '.bai', outfile + '.bai')
			shell.mv(infile, outfile)
Beispiel #2
0
def run_samtools():
	global infile
	if not (steps.sort or steps.index or steps.markdup or steps.rmdup):
		subshell.samtools.view(b = True, o = outfile, O = 'bam', _ = infile).run()
	else:
		bamfile = outfile
		if steps.sort:
			mem = mem2(argsmem, 'M')
			bamfile = path.join(joboutdir, inprefix + '.sorted.bam')
			subshell.samtools.sort(
				m = mem + 'M', 
				n = sortby == 'queryname',
				o = bamfile,
				T = tmpdir,
				O = 'bam',
				_ = infile,
				**{'@': nthread}
			).run()
			if infile != {{i.infile | quote}}:
				shell.rm(infile, f = True)
			infile = bamfile
		if steps.markdup or steps.rmdup:
			bamfile = path.join(joboutdir, inprefix + '.dedup.bam')
			subshell.rmdup(infile, bamfile).run()
			if infile != {{i.infile | quote}}:
				shell.rm(infile, f = True)
			infile = bamfile
		if steps.index:
			subshell.samtools.index(bamfile, outfile + '.bai')
		if infile != outfile:
			if path.exists(infile + '.bai'):
				shell.mv(infile + '.bai', outfile + '.bai')
			shell.mv(infile, outfile)
Beispiel #3
0
def run_gatk():
    mem = mem2(argsmem, '-jdict')
    intfile = path.join(joboutdir, outprefix + '.intervals')

    mem['-Djava.io.tmpdir={}'.format(shell.shquote(tmpdir))] = True
    gatksh = Shell(equal=' ', dash='-').gatk

    rtcparams = params.get('RealignerTargetCreator', Box())
    rtcparams.T = 'RealignerTargetCreator'
    rtcparams.R = ref
    rtcparams.I = infile
    rtcparams.o = intfile
    rtcparams.nt = nthread
    rtcparams._ = list(mem.keys())
    gatksh(**rtcparams).run()

    bamfileir = path.join(joboutdir, outprefix + '.ir.bam')
    irparams = params.get('IndelRealigner', Box())
    irparams.T = 'IndelRealigner'
    irparams.R = ref
    irparams.I = infile
    irparams.o = bamfileir
    irparams._ = list(mem.keys())

    irparams.targetIntervals = intfile
    gatksh(**irparams).run()

    recaltable = path.join(joboutdir, outprefix + '.recaltable')
    brparams = params.get('BaseRecalibrator', Box())
    brparams.T = 'BaseRecalibrator'
    brparams.R = ref
    brparams.I = bamfileir
    brparams.o = recaltable
    brparams.nct = nthread
    brparams._ = list(mem.keys())

    brparams.knownSites = knownSites
    gatksh(**brparams).run()

    prparams = params.get('PrintReads', Box())
    prparams.T = 'PrintReads'
    prparams.R = ref
    prparams.I = bamfileir
    prparams.o = outfile
    prparams.nct = nthread
    prparams._ = list(mem.keys())
    gatksh(**prparams).run()

    shell.rm(bamfileir, f=True)
    shell.mv(outprefix + '.bai', outfile + '.bai')
Beispiel #4
0
def run_sambamba():
	global infile
	if not (steps.sort or steps.index or steps.markdup or steps.rmdup):
		subshell.sambamba.view(S = True, f = 'bam', o = outfile, t = nthread, _ = infile).run()
	else:
		bamfile = outfile
		if infmt == 'sam':
			bamfile = path.join(joboutdir, inprefix + '.s2b.bam')
			subshell.sambamba.view(S = True, f = 'bam', o = bamfile, t = nthread, _ = infile).run()
			infile = bamfile
		if steps.sort:
			if sortby == 'queryname':
				params.n = True
				params.N = True
			bamfile       = path.join(joboutdir, inprefix + '.sorted.bam')
			params.m      = argsmem
			params.tmpdir = tmpdir
			params.o      = bamfile
			params.t      = nthread
			params._      = infile
			subshell.sambamba.sort(**params).run()
			if infile != {{i.infile | quote}}:
				shell.rm(f = True, _ = infile)
			infile = bamfile
		if steps.markdup:
			bamfile = path.join(joboutdir, inprefix + '.dedup.bam')
			subshell.sambamba.markdup(r = steps.rmdup, t = nthread, tmpdir = tmpdir, _ = [infile, bamfile]).run()
			if infile != {{i.infile | quote}}:
				shell.rm(f = True, _ = infile)
			infile = bamfile
		if steps.index:
			if path.exists(infile + '.bai'):
				shell.mv(infile + '.bai', outfile + '.bai')
			else:
				subshell.sambamba.index(t = nthread, _ = [infile, infile + '.bai'])
		if infile != outfile:
			if path.exists(infile + '.bai'):
				shell.mv(infile + '.bai', outfile + '.bai')
			shell.mv(infile, outfile)
Beispiel #5
0
    shell.mv(outprefix + '.bai', outfile + '.bai')


def run_bamutil():
    bush = Shell(subcmd=True, equal=' ').bamutil
    if knownSites:
        params.dbsnp = knownSites
    params['in'] = infile

    params.out = outfile
    params.refFile = ref
    params.verbose = True
    refcache = path.splitext(ref)[0] + '-bs.umfa'
    if not path.isfile(refcache):
        poll = Poll(workdir, joblen, jobindex)
        poll.first(lambda **kwargs: bush.recal(**kwargs).run(), **params)
    else:
        bush.recab(**params).run()


tools = dict(gatk=run_gatk, bamutil=run_bamutil)
try:
    tools[tool]()
except KeyError:
    raise KeyError('Tool {!r} not supported.'.format(tool))
except Exception as ex:
    stderr.write("Job failed: %s" % str(ex))
    raise
finally:
    shell.rm(tmpdir, r=True, f=True)