Beispiel #1
0
def run_gatk():
	# generate interval list file
	intvfile = {{job.outdir | path.join: "interval.list" | quote}}
	cmd = '{samtools} idxstats {tumor!r} | head -1 | cut -f1 > {intvfile!r}'.format(
		samtools = samtools,
		tumor    = tumor,
		intvfile = intvfile
	)
	runcmd(cmd)

	mem = mem2(mem, 'java')

	params['I:tumor']  = tumor
	params['I:normal'] = normal
	
	params.R   = ref
	params.o   = outfile
	params.nct = nthread
	params.L   = intvfile

	cmd = '{gatk} -T MuTect2 {mem} -Djava.io.tmpdir={tmpdir!r} {args}'.format(
		gatk   = gatk,
		mem    = mem,
		tmpdir = tmpdir,
		args   = cmdargs(params, dash = '-', equal = ' ')
	)
	runcmd(cmd)
	if gz: runcmd(['gzip', outfile])
Beispiel #2
0
def run_picard():
	global infile
	mem = mem2(argsmem, '-jdict')
	mem['-Djava.io.tmpdir'] = tmpdir
	shellpicard = Shell(subcmd = True, dash = '', equal = '=').picard(**mem)
	if not (steps.sort or steps.index or steps.markdup or steps.rmdup):
		shellpicard.SamFormatConverter(TMP_DIR = tmpdir, I = infile, O = outfile).run()
	else:
		bamfile = outfile
		if steps.sort:
			bamfile = path.join(joboutdir, inprefix + '.sorted.bam')
			shellpicard.ShortSam(TMP_DIR = tmpdir, I = infile, O = bamfile, SO = sortby).run()
			if infile != {{i.infile | quote}}:
				shell.rm(f = True, _ = infile)
			infile = bamfile
		if steps.markdup:
			mfile = "/dev/null"
			bamfile = path.join(joboutdir, inprefix + '.dedup.bam')
			shellpicard.MarkDuplicates(REMOVE_DUPLICATES = 'true' if steps.rmdup else 'false', TMP_DIR = tmpdir, I = infile, O = bamfile, M = mfile).run()
			if infile != {{i.infile | quote}}:
				shell.rm(f = True, _ = infile)
			infile = bamfile
		if steps.index:
			shellpicard.BuildBamIndex(TMP_DIR = tmpdir, I = infile, O = outfile + '.bai').run()
		if infile != outfile:
			if path.exists(infile + '.bai'):
				shell.mv(infile + '.bai', outfile + '.bai')
			shell.mv(infile, outfile)
Beispiel #3
0
def run_samtools():
	global infile
	if not (steps.sort or steps.index or steps.markdup or steps.rmdup):
		subshell.samtools.view(b = True, o = outfile, O = 'bam', _ = infile).run()
	else:
		bamfile = outfile
		if steps.sort:
			mem = mem2(argsmem, 'M')
			bamfile = path.join(joboutdir, inprefix + '.sorted.bam')
			subshell.samtools.sort(
				m = mem + 'M', 
				n = sortby == 'queryname',
				o = bamfile,
				T = tmpdir,
				O = 'bam',
				_ = infile,
				**{'@': nthread}
			).run()
			if infile != {{i.infile | quote}}:
				shell.rm(infile, f = True)
			infile = bamfile
		if steps.markdup or steps.rmdup:
			bamfile = path.join(joboutdir, inprefix + '.dedup.bam')
			subshell.rmdup(infile, bamfile).run()
			if infile != {{i.infile | quote}}:
				shell.rm(infile, f = True)
			infile = bamfile
		if steps.index:
			subshell.samtools.index(bamfile, outfile + '.bai')
		if infile != outfile:
			if path.exists(infile + '.bai'):
				shell.mv(infile + '.bai', outfile + '.bai')
			shell.mv(infile, outfile)
Beispiel #4
0
def run_virmid():
	params.R = ref
	params.D = tumor
	params.N = normal
	params.w = joboutdir
	cmd = '{virmid} {mem} -Djava.io.tmpdir={tmpdir!r} {args}'.format(
		virmid = virmid,
		mem    = mem2(mem, 'java'),
		tmpdir = tmpdir,
		args   = cmdargs(params)
	)
	runcmd(['mv', path.join(joboutdir, '*.virmid.som.passed.vcf'), outfile])
	if gz: runcmd(['gzip', outfile])
Beispiel #5
0
def run_gatk():
    mem = mem2(argsmem, '-jdict')
    intfile = path.join(joboutdir, outprefix + '.intervals')

    mem['-Djava.io.tmpdir={}'.format(shell.shquote(tmpdir))] = True
    gatksh = Shell(equal=' ', dash='-').gatk

    rtcparams = params.get('RealignerTargetCreator', Box())
    rtcparams.T = 'RealignerTargetCreator'
    rtcparams.R = ref
    rtcparams.I = infile
    rtcparams.o = intfile
    rtcparams.nt = nthread
    rtcparams._ = list(mem.keys())
    gatksh(**rtcparams).run()

    bamfileir = path.join(joboutdir, outprefix + '.ir.bam')
    irparams = params.get('IndelRealigner', Box())
    irparams.T = 'IndelRealigner'
    irparams.R = ref
    irparams.I = infile
    irparams.o = bamfileir
    irparams._ = list(mem.keys())

    irparams.targetIntervals = intfile
    gatksh(**irparams).run()

    recaltable = path.join(joboutdir, outprefix + '.recaltable')
    brparams = params.get('BaseRecalibrator', Box())
    brparams.T = 'BaseRecalibrator'
    brparams.R = ref
    brparams.I = bamfileir
    brparams.o = recaltable
    brparams.nct = nthread
    brparams._ = list(mem.keys())

    brparams.knownSites = knownSites
    gatksh(**brparams).run()

    prparams = params.get('PrintReads', Box())
    prparams.T = 'PrintReads'
    prparams.R = ref
    prparams.I = bamfileir
    prparams.o = outfile
    prparams.nct = nthread
    prparams._ = list(mem.keys())
    gatksh(**prparams).run()

    shell.rm(bamfileir, f=True)
    shell.mv(outprefix + '.bai', outfile + '.bai')
Beispiel #6
0
def run_gatk():
	gatkmem = mem2(mem, 'jdict')
	gatkmem['Djava.io.tmpdir={!r}'.format(tmpdir)] = True

	gatksh     = Shell(equal = ' ', dash = '-')
	params.T   = 'HaplotypeCaller'
	params.R   = ref
	params.I   = infile
	params.o   = outfile
	params.nct = nthread
	params._   = list(gatkmem.keys())

	gatksh(**params).run()
	if gz: shell.gzip(outfile)
Beispiel #7
0
def run_strelka():
	cparams                = {{args.configParams | repr}}
	cparams.normalBam      = normal
	cparams.tumorBam       = tumor
	cparams.referenceFasta = ref
	cparams.runDir         = joboutdir
	runcmd('{strelka} {args}'.format(strelka = strelka, args = cmdargs(cparams)))

	params.m = 'local'
	params.g = mem2(mem, 'G')[:-1]
	params.j = nthread
	runcmd('{joboutdir}/runWorkflow.py {args}'.format(joboutdir = joboutdir, args = cmdargs(params)))

	snvvcf = path.join(joboutdir, 'results', 'variants', 'somatic.snvs.vcf.gz')
	indvcf = path.join(joboutdir, 'results', 'variants', 'somatic.indels.vcf.gz')
	_mergeAndAddGT(snvvcf, indvcf, outfile)
	if gz: runcmd(['gzip', outfile])
Beispiel #8
0
def run_strelka():
	# config
	cfgParams.bam            = infile
	cfgParams.referenceFasta = ref
	cfgParams.runDir         = joboutdir
	Shell().strelka(**cfgParams).run()

	# run the pipeline
	params.m = 'local'
	params.j = nthread
	params.g = mem2(mem, 'G')[:-1]
	Shell({'runWorkflow': path.join(joboutdir, 'runWorkflow.py')}).runWorkflow(**params).run()

	# mv output file to desired outfile
	ofile = path.join(joboutdir, 'results', 'variants', 'genome.S1.vcf.gz')
	shell.mv(ofile, outfile + '.gz')
	if not gz: shell.gunzip(outfile + '.gz')
Beispiel #9
0
def run_biobambam():
	mem = mem2(argsmem, 'M')
	if steps.index:
		params.index         = 1
		params.indexfilename = outfile + '.bai'

	params.I              = infile
	params.O              = outfile
	params.SO             = sortby
	params.blockme        = mem
	params.tmpfile        = path.join(tmpdir, 'biobambam.tmp')
	params.inputformat    = infmt
	params.outfmt         = 'bam'
	params.inputthreads   = nthread
	params.outputthreads  = nthread
	params.markduplicates = int(steps.markdup)
	params.rmdup          = int(steps.rmdup)

	Shell(dash = '', equal = '=').biobambam(**params).run()
Beispiel #10
0
from sys import stderr
from shutil import move

from pyppl import Box
from bioprocs.utils import runcmd, mem2, cmdargs

params = {{args.params}}
try:
{% case args.tool %}
	{% when 'trimmomatic' %}
	mem    = mem2 ({{args.mem | quote}}, "java")
	minlen = str({{args.minlen}} * 2)
	adfile = "{{job.outdir}}/adapters.fa"
	with open (adfile, "w") as ad:
		ad.write (">TruSeq3_IndexedAdapter\n")
		ad.write ({{args.adapter | quote}} + "\n")

	params['threads'] = {{args.nthread}}
	cmd    = '{{args.trimmomatic}} %s SE %s "{{in.fq}}" "{{out.outfq}}" ILLUMINACLIP:%s:2:30:10 LEADING:{{args.cut5}} TRAILING:{{args.cut3}} SLIDINGWINDOW:4:{{args.minq}} MINLEN:%s' % (mem, cmdargs(params, dash = '-', equal = ' '), adfile, minlen)
	runcmd (cmd)

	{% when 'cutadapt' %}
	params['a'] = {{args.adapter | quote}}
	params['u'] = "{{args.cut5}}"
	params['u'] = "-{{args.cut3}}"
	params['m'] = {{args.minlen}}
	params['q'] = "{{args.minq}},{{args.minq}}"
	params['o'] = {{out.outfq | quote}}
	cmd = '{{args.cutadapt}} %s "{{in.fq}}"' % cmdargs(params, dash = '-', equal = ' ')
	runcmd (cmd)
Beispiel #11
0
	cmd = '{{args.biobambam}} %s' % cmdargs(params, dash = '', equal = '=')
	runcmd (cmd)
{% when 'bedtools' %}
	params['i']   = infile
	params['fq']  = fqfile1
	params['fq2'] = fqfile2
	cmd = '{{args.bedtools}} bamtofastq %s' % cmdargs(params, dash = '-', equal = ' ')
	runcmd (cmd)
{% when 'samtools' %}
	params['t'] = True
	params['1'] = fqfile1
	params['2'] = fqfile2
	cmd = '{{args.samtools}} fastq %s "%s"' % (cmdargs(params, dash = '-', equal = ' '), infile)
	runcmd (cmd)
{% when 'picard' %}
	mem = mem2({{ args.mem | quote }}, 'Java')
	params[mem]                = True
	params['-Djava.io.tmpdir'] = tmpdir
	params['TMP_DIR']          = tmpdir
	params['I']                = infile
	params['F']                = fqfile1
	params['F2']               = fqfile2
	cmd = '{{args.picard}} SamToFastq %s' % cmdargs(params, dash='', equal='=')
	runcmd (cmd)
{% endcase %}

{% if args.gz %}
	runcmd ('gzip "%s"' % (fqfile1))
	runcmd ('gzip "%s"' % (fqfile2))
{% endif %}
except Exception as ex:
Beispiel #12
0
if not rg['ID']:
	g = re.search (r'[^a-zA-Z0-9]+(L\\d+)[^a-zA-Z0-9]+', "{{o.outfile | fn}}")
	rg['ID'] = g.group(1) if g else "{{o.outfile | fn}}.L{{job.index}}"
if not rg['SM']:
	rg['SM'] = "{{o.outfile | fn}}"

tmpdir    = path.join ("{{args.tmpdir}}", "{{proc.id}}.{{i.infile | fn}}.{{job.index}}")
if not path.exists (tmpdir):
	makedirs (tmpdir)

params = {{args.params}}
try:
{% case args.tool %}
	############## picard
	{% when 'picard' %}
	mem = mem2({{ args.mem | quote }})
	params['-Djava.io.tmpdir'] = tmpdir
	params['TMP_DIR'] = tmpdir
	params['I'] = {{i.infile | quote}}
	params['O'] = {{o.outfile | quote}}
	for k,v in rg.items():
		params['RG' + k] = v

	runcmd ('{{args.picard}} AddOrReplaceReadGroups %s %s' % (mem, cmdargs(params, dash='', equal='=')))

	############## bamutil
	{% when 'bamutil' %}
	params['RG'] = "@RG\\tID:%s\\t%s" % (rg['ID'], "\\t".join([k + ":" + v for k,v in rg.items() if k!='ID']))
	params['in'] = {{i.infile | quote}}
	params['out'] = {{o.outfile | quote}}
Beispiel #13
0
tool = {{args.tool | repr}}
picard = {{args.picard | repr}}
chain = {{args.lochain | repr}}
ref = {{args.ref | repr}}
params = {{args.params | repr}}
mem = {{args.mem | repr}}
tmpdir = {{args.tmpdir | repr}}

if not chain:
    logger.error('Chain file (args.lochain) not provided!')
    exit(1)

# picard LiftoverVcf -Xmx4g -Xms1g  I=TCGA-05-4382-10.vcf O=1.vcf CHAIN=liftovers/hg38ToHg19.over.chain.gz R=ucsc_hg19.fa REJECT=r.vcf

if tool == 'picard':

    params.I = infile
    params.O = outfile
    params.CHAIN = chain
    params.REJECT = umfile
    params.R = ref

    javamem = mem2(mem, 'java')
    for jm in javamem.split():
        params['-' + jm[1:]] = True

    params['-Djava.io.tmpdir'] = tmpdir

    cmd = '{picard} LiftoverVcf {params}'
    runcmd(
        cmd.format(picard=picard, params=cmdargs(params, equal='=', dash='')))