Ejemplo n.º 1
0
def run_somaticsniper():
	params.f   = ref
	params.F   = 'vcf'
	params[''] = [tumor, normal, outfile]
	cmd = '{ssniper} {args}'.format(ssniper = ssniper, args = cmdargs(params))
	runcmd(cmd)
	if gz: runcmd(['gzip', outfile])
Ejemplo n.º 2
0
def run_gatk():
	# generate interval list file
	intvfile = {{job.outdir | path.join: "interval.list" | quote}}
	cmd = '{samtools} idxstats {tumor!r} | head -1 | cut -f1 > {intvfile!r}'.format(
		samtools = samtools,
		tumor    = tumor,
		intvfile = intvfile
	)
	runcmd(cmd)

	mem = mem2(mem, 'java')

	params['I:tumor']  = tumor
	params['I:normal'] = normal
	
	params.R   = ref
	params.o   = outfile
	params.nct = nthread
	params.L   = intvfile

	cmd = '{gatk} -T MuTect2 {mem} -Djava.io.tmpdir={tmpdir!r} {args}'.format(
		gatk   = gatk,
		mem    = mem,
		tmpdir = tmpdir,
		args   = cmdargs(params, dash = '-', equal = ' ')
	)
	runcmd(cmd)
	if gz: runcmd(['gzip', outfile])
Ejemplo n.º 3
0
def run_strelka():
	cparams                = {{args.configParams | repr}}
	cparams.normalBam      = normal
	cparams.tumorBam       = tumor
	cparams.referenceFasta = ref
	cparams.runDir         = joboutdir
	runcmd('{strelka} {args}'.format(strelka = strelka, args = cmdargs(cparams)))

	params.m = 'local'
	params.g = mem2(mem, 'G')[:-1]
	params.j = nthread
	runcmd('{joboutdir}/runWorkflow.py {args}'.format(joboutdir = joboutdir, args = cmdargs(params)))

	snvvcf = path.join(joboutdir, 'results', 'variants', 'somatic.snvs.vcf.gz')
	indvcf = path.join(joboutdir, 'results', 'variants', 'somatic.indels.vcf.gz')
	_mergeAndAddGT(snvvcf, indvcf, outfile)
	if gz: runcmd(['gzip', outfile])
Ejemplo n.º 4
0
def run_vardict():
	params.v = True
	params.G = ref
	params.b = '{}|{}'.format(tumor, normal)
	cmd = '{vardict} {args} > {outfile!r}'.format(
		vardict = vardict,
		args    = cmdargs(params),
		outfile = outfile
	)
	runcmd(cmd)
	if gz: runcmd(['gzip', outfile])
Ejemplo n.º 5
0
def run_virmid():
	params.R = ref
	params.D = tumor
	params.N = normal
	params.w = joboutdir
	cmd = '{virmid} {mem} -Djava.io.tmpdir={tmpdir!r} {args}'.format(
		virmid = virmid,
		mem    = mem2(mem, 'java'),
		tmpdir = tmpdir,
		args   = cmdargs(params)
	)
	runcmd(['mv', path.join(joboutdir, '*.virmid.som.passed.vcf'), outfile])
	if gz: runcmd(['gzip', outfile])
Ejemplo n.º 6
0
def getAlleleCount(bamfile, snpfile, outfile):
	brcparams   = Box()
	brcparams.f = ref
	brcparams.w = 0
	brcparams.l = snpfile

	brcparams[''] = bamfile
	cmd = '{bamrc} {args} > {outfile!r}'.format(
		bamrc = bamrc, args = cmdargs(brcparams, equal = ' '), outfile = outfile + '.tmp')
	runcmd(cmd)

	# reformated output to desired format
	reader = TsvReader(outfile + '.tmp', cnames = False)
	snper  = TsvReader(snpfile, cnames = False)
	#chr1	564773	C	14	=:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00	A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00	C:14:...	G:0:...	T:0:...	N:0:...
	writer = TsvWriter(outfile)
	writer.cnames = ['Chrm', 'pos', 'A', 'C', 'G', 'T', 'Total', 'refCount', 'mutCount']

	for r in reader:
		while True:
			try:
				snp   = next(snper)
			except StopIteration:
				break
			# use the end position, in case it's 0-based
			if snp[0] == r[0] and snp[2] == r[1]:
				counts = dict(
					A = r[5].split(':', 2)[1],
					C = r[6].split(':', 2)[1],
					G = r[7].split(':', 2)[1],
					T = r[8].split(':', 2)[1]
				)
				rec    = TsvRecord()
				rec.Chrm  = r[0]
				rec.pos   = r[1]
				rec.Total = r[3]
				rec.A = counts['A']
				rec.C = counts['C']
				rec.G = counts['G']
				rec.T = counts['T']
				# if reference allele is unknown, assuming all are ref alleles
				rec.refCount = counts.get(snp[6].upper(), r[3])
				# if mut allele is unknown, assuming no mutations happened
				rec.mutCount = counts.get(snp[7].upper(), 0)
				writer.write(rec)
				# go to next snp
				break
			else:
				# go to next r
				continue
	writer.close()
Ejemplo n.º 7
0
        def runChrom(file1, file2, chrom):
            outfile1_list = list(file1[:-3].rpartition('.'))
            outfile1_list.insert(-2, '-' + chrom)
            outfile1 = ''.join(outfile1_list)
            outfile2 = list(file2[:-3].rpartition('.'))
            outfile2.insert(-2, '-' + chrom)
            outfile2 = ''.join(outfile2)
            outfile1_list.insert(-2, '.subtracted')
            outfile = ''.join(outfile1_list)
            vfcmd = '{} -h {!r} {!r} > {!r}'
            runcmd(vfcmd.format(tabix, file1, chrom, outfile1))
            runcmd(vfcmd.format(tabix, file2, chrom, outfile2))

            cmd = '{bedtools} subtract {params} > {outfile!r}'
            params = {'a': outfile1, 'b': outfile2}
            if rmany:
                params['A'] = True

            params = cmdargs(params, dash='-', equal=' ')
            runcmd(
                cmd.format(bedtools=bedtools, params=params, outfile=outfile))
            remove(outfile1)
            remove(outfile2)
            return outfile
Ejemplo n.º 8
0
params = {{args.params}}
fq1 = {{o.fq1 | quote}}
fq2 = {{o.fq2 | quote}}
try:
{% case args.tool %}
	{% when 'wgsim' %}
	{% if args.gz %}
	fq1 = "{{o.fq1 | [:-3]}}"
	fq2 = "{{o.fq2 | [:-3]}}"
	{% endif %}

	params['N'] = {{args.num}}
	params['1'] = {{args.len1}}
	params['2'] = {{args.len2}}
	params['S'] = {{i.seed | lambda x: -1 if x is None else x}}
	cmd = '{{args.wgsim}} %s "%s" "%s" "%s"' % (cmdargs(params), ref, fq1, fq2)
	runcmd (cmd)

	{% if args.gz %}
	runcmd ('gzip "%s"' % fq1)
	runcmd ('gzip "%s"' % fq2)
	{% endif %}
	
	{% when 'dwgsim' %}
	prefix = {{o.fq1 | [:-8] | quote}}
	{% if args.gz %}
	fq1 = "{{o.fq1 | [:-3]}}"
	fq2 = "{{o.fq2 | [:-3]}}"
	prefix = "{{o.fq1 | [:-11]}}"
	{% endif %}
Ejemplo n.º 9
0
outfile = {{o.outfile | quote}}
argssql = {{args.sql | quote}}
inopts  = {{args.inopts | repr}}
outopts = {{args.outopts | repr}}

if sqlfile:
	with open(sqlfile) as f:
		sql = ' '.join(f.readlines()).strip()
	if argssql:
		logger.warning('`args.sql` is ignored, as `i.sqlfile` is provided.')
else:
	sql = argssql

if not sql:
	raise ValueError('One of `i.sqlfile` and `args.sql` is requied.')

params = {
	'H': inopts.cnames,
	'd': inopts.delimit,
	'e': inopts.encoding,
	'z': (inopts.gz == 'auto' and infile.endswith('.gz')) or inopts.gz is True,
	'D': outopts.delimit if outopts.delimit is not None else inopts.delimit,
	'O': outopts.cnames if outopts.cnames is not None else inopts.cnames,
	'E': outopts.encoding if outopts.encoding is not None else inopts.encoding
}
c = cmd.Cmd(['cat', infile]).pipe('q {} {} > {!r}'.format(
	cmdargs(params), 
	cmdargs({' ': sql})[2:],
	outfile
), shell = True).run()
logger.info('Done: %s', c.cmd)
Ejemplo n.º 10
0
from pyppl import Box
from bioprocs.utils import runcmd, cmdargs

params = {{args.params}}

####### htseq
{% if args.tool == 'htseq' %}

{% if i.infile.endswith('.bam') %}
params['f'] = 'bam'
{% endif %}

cmd = '{{args.htseq}} %s "{{i.infile}}" "{{args.refgene}}" > "{{o.outfile}}"' % (cmdargs(params))
runcmd (cmd)

{% endif %}
Ejemplo n.º 11
0
	mem    = mem2 ({{args.mem | quote}}, 'java')
	minlen = str({{args.minlen}} * 2)
	adfile = "{{job.outdir}}/adapters.fa"
	with open (adfile, "w") as ad:
		ad.write (">PE1\n")
		ad.write (seqrev({{args.adapter1 | quote}}) + "\n")
		ad.write (">PE1_rc\n")
		ad.write ({{args.adapter1 | quote}} + "\n")
		ad.write (">PE2\n")
		ad.write (seqrev({{args.adapter2 | quote}}) + "\n")
		ad.write (">PE2_rc\n")
		ad.write ({{args.adapter2 | quote}} + "\n")

	params['threads'] = {{args.nthread}}
	cmd = '{{args.trimmomatic}} %s PE %s "{{i.fq1}}" "{{i.fq2}}" "{{o.outfq1}}" /dev/null "{{o.outfq2}}" /dev/null ILLUMINACLIP:%s:2:30:10 LEADING:{{args.cut5}} TRAILING:{{args.cut3}} SLIDINGWINDOW:4:{{args.minq}} MINLEN:%s' % (mem, cmdargs(params, dash = '-', equal = ' '), adfile, minlen)
	runcmd (cmd)

	{% when 'cutadapt' %}
	params['a'] = {{args.adapter1 | quote}}
	params['A'] = {{args.adapter2 | quote}}
	params['u'] = "{{args.cut5}}"
	params['u'] = "-{{args.cut3}}"
	params['U'] = "{{args.cut5}}"
	params['U'] = "-{{args.cut3}}"
	params['m'] = {{args.minlen}}
	params['q'] = "{{args.minq}},{{args.minq}}"
	params['o'] = {{o.outfq1 | quote}}
	params['p'] = {{o.outfq2 | quote}}
	cmd = '{{args.cutadapt}} %s {{ i.fq1 | quote }} {{ i.fq2 | quote }}' % cmdargs(params, dash = '-', equal = ' ')
	runcmd (cmd)
Ejemplo n.º 12
0
from os import path
from pyppl import Box
from bioprocs.utils import runcmd, cmdargs

params = {}
region = {{i.region | quote}}
outfile = {{o.outfile | quote}}

if path.isfile(region):
    if path.getsize(region) > 0:
        params['R'] = {{i.region | quote}}
        params.update({{args.params}})
        cmd = '{{args.tabix}} %s "{{i.infile}}" > "{{o.outfile}}"' % cmdargs(
            params, equal=' ')
        runcmd(cmd)
    else:
        open(outfile, 'w').close()
else:
    if region:
        params.update({{args.params}})
        cmd = '{{args.tabix}} %s "{{i.infile}}" {{i.region}} > "{{o.outfile}}"' % cmdargs(
            params, equal=' ')
        runcmd(cmd)
    else:
        open(outfile, 'w').close()
Ejemplo n.º 13
0
from pyppl import Box
from bioprocs.utils import runcmd, mem2, cmdargs

params = {{args.params}}
try:
{% case args.tool %}
	{% when 'trimmomatic' %}
	mem    = mem2 ({{args.mem | quote}}, "java")
	minlen = str({{args.minlen}} * 2)
	adfile = "{{job.outdir}}/adapters.fa"
	with open (adfile, "w") as ad:
		ad.write (">TruSeq3_IndexedAdapter\n")
		ad.write ({{args.adapter | quote}} + "\n")

	params['threads'] = {{args.nthread}}
	cmd    = '{{args.trimmomatic}} %s SE %s "{{in.fq}}" "{{out.outfq}}" ILLUMINACLIP:%s:2:30:10 LEADING:{{args.cut5}} TRAILING:{{args.cut3}} SLIDINGWINDOW:4:{{args.minq}} MINLEN:%s' % (mem, cmdargs(params, dash = '-', equal = ' '), adfile, minlen)
	runcmd (cmd)

	{% when 'cutadapt' %}
	params['a'] = {{args.adapter | quote}}
	params['u'] = "{{args.cut5}}"
	params['u'] = "-{{args.cut3}}"
	params['m'] = {{args.minlen}}
	params['q'] = "{{args.minq}},{{args.minq}}"
	params['o'] = {{out.outfq | quote}}
	cmd = '{{args.cutadapt}} %s "{{in.fq}}"' % cmdargs(params, dash = '-', equal = ' ')
	runcmd (cmd)

	{% when 'skewer' %}
	params['m'] = 'any'
	params['t'] = {{args.nthread}}
Ejemplo n.º 14
0
        "1": ref + ' ' + alt,
        "2": alt + ' ' + alt
    }
    return compGTs.get(gt, '0 0')


logger.info('Writing tped file ...')
tpedWriter = TsvWriter(tpedfile)
for r in inreader:
    (chrom, pos, _, ref, alt) = r[0].split('_')
    if chrom.startswith('chr'):
        chrom = chrom[3:]
    chrom = chrmaps.get(chrom, chrom)
    tpedWriter.write([chrom, r[0], 0, pos] +
                     [getCompondGT(gt, ref, alt) for gt in r.values()[:]])
tpedWriter.close()

logger.info("Converting using plink ...")
cmd = '{} {}'.format(
    plink,
    cmdargs({
        'tfile': prefix,
        'make-bed': True,
        'out': prefix
    }, equal=' '))
runcmd(cmd)

if not keeptxt:
    remove(tpedfile)
    remove(tfamfile)
Ejemplo n.º 15
0
	############# picard
	{% when 'picard' %}
	mem = mem2({{ args.mem | quote }}, 'java')
	infiles = {{ i.infiles }}
	for i, infile in enumerate(infiles):
		params['I' + ' ' * i] = infile
	{% if args.nthread > 1 %}
	params['USE_THREADING'] = 'true'
	{% else %}
	params['USE_THREADING'] = 'false'
	{% endif %}
	params['TMP_DIR'] = tmpdir
	params['O']       = {{o.outfile | quote}}
	params['AS']      = 'true'

	cmd = '{{args.picard}} MergeSamFiles %s -Djava.io.tmpdir="%s" %s' % (mem, tmpdir, cmdargs(params, dash = '', equal = '='))
	runcmd (cmd)

	############# bamutil
	{% when 'bamutil' %}
	infiles = {{ i.infiles }}
	for i, infile in enumerate(infiles):
		params['i' + ' ' * i] = infile
	params['o'] = {{o.outfile | quote}}

	cmd = '{{args.bamutil}} mergeBam %s' % cmdargs(params)
	runcmd (cmd)

	############# samtools
	{% when 'samtools' %}
	inlist = path.join({{job.outdir | quote}}, 'bamlist.txt')
Ejemplo n.º 16
0
tmpfile  = outfile + '.tmp'
skip     = {{args.inopts | lambda x: x.get('skip', 0)}}
delimit  = {{args.inopts | lambda x: x.get('delimit', '\t') | quote}}
comment  = {{args.inopts | lambda x: x.get('comment', '#') | quote}}

if not skip and not comment:
	tmpfile = infile
else:
	with open(infile) as readerSkip, open(outfile, 'w') as writerSkip:
		for i, line in enumerate(readerSkip):
			if i >= skip: break
			writerSkip.write(line)

	readerTmp = TsvReader(infile, delimit = delimit, comment = comment, skip = skip, ftype = 'nometa', head = False)
	#readerTmp.autoMeta()
	writerTmp = TsvWriter(tmpfile, delimit = delimit, ftype = 'nometa')
	#writerTmp.meta.update(readerTmp.meta)
	for r in readerTmp:
		writerTmp.write(r)
	writerTmp.close()
	
{% if args.case %}
case = "LANG=C"
{% else %}
case = "LANG=en_US.UTF-8"
{% endif %}

cmd = '%s sort %s "%s" >> {{o.outfile | quote}}' % (case, cmdargs(params), tmpfile)
{% endif %}

runcmd(cmd)
Ejemplo n.º 17
0
	makedirs (tmpdir)

params = {{args.params}}
try:
{% case args.tool %}
	############## picard
	{% when 'picard' %}
	mem = mem2({{ args.mem | quote }})
	params['-Djava.io.tmpdir'] = tmpdir
	params['TMP_DIR'] = tmpdir
	params['I'] = {{i.infile | quote}}
	params['O'] = {{o.outfile | quote}}
	for k,v in rg.items():
		params['RG' + k] = v

	runcmd ('{{args.picard}} AddOrReplaceReadGroups %s %s' % (mem, cmdargs(params, dash='', equal='=')))

	############## bamutil
	{% when 'bamutil' %}
	params['RG'] = "@RG\\tID:%s\\t%s" % (rg['ID'], "\\t".join([k + ":" + v for k,v in rg.items() if k!='ID']))
	params['in'] = {{i.infile | quote}}
	params['out'] = {{o.outfile | quote}}

	runcmd ('{{args.bamutil}} polishBam %s' % cmdargs(params, equal = ' '))

{% endcase %}
except Exception as ex:
	stderr.write ("Job failed: %s" % str(ex))
	raise
finally:
	rmtree (tmpdir)
Ejemplo n.º 18
0
	runcmd(cmd)
	if gz: runcmd(['gzip', outfile])

def run_snvsniffer():
	# generate a header file
	theader = {{job.outdir | path.join: bn(i.tumor)  | @append: '.header' | quote}}
	nheader = {{job.outdir | path.join: bn(i.normal) | @append: '.header' | quote}}
	cmd = '{samtools} view -H {infile!r} > {hfile!r}'
	runcmd(cmd.format(samtools = samtools, infile = tumor, hfile = theader))
	runcmd(cmd.format(samtools = samtools, infile = normal, hfile = nheader))

	params.g = ref
	params.o = outfile

	params[''] = [theader, nheader, tumor, normal]
	cmd = '{ssniffer} somatic {args}'.format(ssniffer, cmdargs(params))
	runcmd(cmd)
	if gz: runcmd(['gzip', outfile])

def _mergeAndAddGT(snvvcf, indvcf, outfile):
	from pysam import VariantFile
	snv = VariantFile(snvvcf)
	ind = VariantFile(indvcf)
	
	snv.header.info.add('TYPE', 1, 'String', 'Type of somatic mutation')
	ind.header.info.add('TYPE', 1, 'String', 'Type of somatic mutation')
	snv.header.info.add('QSI', 1, 'Integer', 'Quality score for any somatic variant, ie. for the ALT haplotype to be present at a significantly different frequency in the tumor and normal')
	snv.header.info.add('TQSI', 1, 'Integer', 'Data tier used to compute QSI')
	snv.header.info.add('QSI_NT', 1, 'Integer', 'Quality score reflecting the joint probability of a somatic variant and NT')
	snv.header.info.add('TQSI_NT', 1, 'Integer', 'Data tier used to compute QSI_NT')
	snv.header.info.add('IC', 1, 'Integer', 'Number of times RU repeats in the indel allele')
Ejemplo n.º 19
0
infile = {{i.infile | quote}}
outfile = {{o.outfile | quote}}

gz = {{args.gz | repr}}
params = {{args.params | repr}}
tabix = {{args.tabix | quote}}

# make sure outfile without .gz
if gz: outfile = outfile[:-3]

gztype = gzip_type(infile)

if gztype == 'gzip':
    gunzip(infile, outfile)
    if gz:
        bgzip(outfile)
elif gztype == 'bgzip':
    if gz:
        sys.symlink(infile, outfile + '.gz')
    else:
        gunzip(infile, outfile)
else:
    if gz:
        bgzip(infile, outfile + '.gz')
    else:
        sys.symlink(infile, outfile)

cmd = '{} {} {!r}'.format(tabix, cmdargs({'p': 'vcf'}),
                          outfile + '.gz' if gz else outfile)
runcmd(cmd)
Ejemplo n.º 20
0
# run gdc-client to download the data
gdc = '{} download '.format(gdc_client)
args = Box({
    'm': infile,
    'n': nthread,
    'd': outdir,
    'retry-amount': '3',
    'debug': True,
    'log-file': path.join(outdir, 'gdc-client.log')
})
if token:
    args.t = token

args.update(params)
cmd2run = gdc + cmdargs(args, equal=' ')
system(cmd2run)

# check if all the data sucessfully downloaded
with open(infile) as fin:
    ids = [
        line.split()[0] for line in fin
        if line.strip() and not line.startswith('id')
    ]

del args['m']
for i in ids:
    if not path.isdir(path.join(outdir, i)):
        logger.warning('File failed to download: {}'.format(i))
        cmd2run = gdc + cmdargs(args, equal=' ') + ' ' + i
        runcmd(cmd2run)
Ejemplo n.º 21
0
from bioprocs.utils import runcmd, cmdargs

infile = {{i.infile | quote}}
outfile = {{o.outfile | quote}}
header = {{args.header | repr}}
by = {{args.by | quote}}
tool = {{args.tool | quote}}

if header:
    # write the header to outfile
    params = cmdargs({
        'e': '^#',
    }, dash='-', equal=' ')
    if infile.endswith('.gz'):
        cmd = 'zcat {infile} | grep {cmdargs} > {outfile}'
    else:
        cmd = 'grep {cmdargs} {infile} > {outfile}'
    runcmd(cmd.format(cmdargs=params, infile=infile, outfile=outfile))

if tool == 'sort':
    if infile.endswith('.gz'):
        cmd = 'zcat {infile} | grep "^#" | sort {cmdargs} >> {outfile}'
    else:
        cmd = 'grep -v "^#" {infile} | sort {cmdargs} >> {outfile}'

    if by.lower().startswith('coord'):
        params = cmdargs({'k#1': '1,1', 'k#2': '2,2n'}, dash='-', equal=' ')
    else:
        params = cmdargs({
            'k#1': '3,3',
            'k#2': '1,1',
Ejemplo n.º 22
0
fqfile1 = fqfile1[:-3]
fqfile2 = fqfile2[:-3]
{% endif %}

params  = {{args.params}}
try:
{% case args.tool %}
{% when 'biobambam' %}
	params['gz'] = 0
	params['F']  = fqfile1
	params['F2'] = fqfile2
	params['T']  = path.join(tmpdir, infile + '.tmp')
	params['filename'] = infile
	if infile.endswith('.sam'):
		params['inputformat'] = 'sam'
	cmd = '{{args.biobambam}} %s' % cmdargs(params, dash = '', equal = '=')
	runcmd (cmd)
{% when 'bedtools' %}
	params['i']   = infile
	params['fq']  = fqfile1
	params['fq2'] = fqfile2
	cmd = '{{args.bedtools}} bamtofastq %s' % cmdargs(params, dash = '-', equal = ' ')
	runcmd (cmd)
{% when 'samtools' %}
	params['t'] = True
	params['1'] = fqfile1
	params['2'] = fqfile2
	cmd = '{{args.samtools}} fastq %s "%s"' % (cmdargs(params, dash = '-', equal = ' '), infile)
	runcmd (cmd)
{% when 'picard' %}
	mem = mem2({{ args.mem | quote }}, 'Java')
Ejemplo n.º 23
0
indir = {{i.indir | quote}}
outfile = {{o.outfile | quote}}
plink = {{args.plink | quote}}
samid = {{args.samid | quote}}
snpid = {{args.snpid | quote}}
addchr = {{args.addchr | repr}}
nors = {{args.nors | quote}}
chroms = {{args.chroms | repr}}

bedfile = glob(path.join(indir, '*.bed'))[0]
input = path.splitext(bedfile)[0]
output = path.splitext(outfile)[0]

params = {'bfile': input, 'recode': 'A-transpose', 'out': output}

cmd = '%s %s 1>&2' % (plink, cmdargs(params, equal=' '))
runcmd(cmd)

fams = TsvReader(input + '.fam', ftype='nometa', delimit=' ', head=False)
if samid == 'fid':
    header = "\t" + "\t".join(fams.dump(0)) + "\n"
elif samid == 'iid':
    header = "\t" + "\t".join(fams.dump(1)) + "\n"
else:
    header = "\t" + "\t".join(r[0] + '_' + r[1] for r in fams) + "\n"
fams.close()

gts = TsvReader(output + '.traw', ftype='nometa', skip=1, head=False)

with open(outfile, 'w') as fout:
    fout.write(header)
Ejemplo n.º 24
0
from sys import stderr
from pyppl import Box
from bioprocs.utils import runcmd, cmdargs

fq   = {{i.fq | quote}}
params = {{args.params}}
try:
	{% if args.tool == 'fastqc' %}
	params['o'] = {{o.outdir | quote}}
	cmd = '{{args.fastqc}} %s "{{i.fq}}"' % cmdargs(params)
	runcmd(cmd)

	{% else %}
	raise Exception('Tool {{args.tool}} %s not supported.')

	{% endif %}
except Exception as ex:
	stderr.write ("Job failed: %s" % str(ex))
	raise
Ejemplo n.º 25
0
shell.TOOLS['cnvkit'] = cnvkit
envs = dict(OPENBLAS_NUM_THREADS=str(nthread),
            OMP_NUM_THREADS=str(nthread),
            NUMEXPR_NUM_THREADS=str(nthread),
            MKL_NUM_THREADS=str(nthread))
ckshell = shell.Shell(subcmd=True, equal=' ', envs=envs, cwd=outdir).cnvkit

# generate target file
params_t = params.target
params_t.o = path.join(outdir, prefix + '.bed')
ckshell.target(exbaits, **params_t).run()

# generate access file
if not accfile:
    accfile = path.join(outdir, prefix + '.access.bed')
    params_a = params.access
    params_a.o = accfile
    ckshell.access(ref, **params_a).run()

# autobin
params_b = params.autobin
params_b.t = params_t.o
params_b.g = accfile
params_b[''] = infiles
runcmd('cd {wdir}; {cnvkit} autobin {args}'.format(
    wdir=shell.shquote(outdir),
    cnvkit=shell.shquote(cnvkit),
    args=cmdargs(params_b, equal=' ')),
       env=envs)
Ejemplo n.º 26
0
accessfile    = "{workdir}/1/output/cnvkit_access.bed".format(workdir = workdir)
targetfile    = "{workdir}/1/output/cnvkit_targets.bed".format(workdir = workdir)
refcnn        = "{workdir}/1/output/reference.cnn".format(workdir = workdir)
fixedCnr      = "{outdir}/{infn}.cnr".format(outdir = outdir, infn = infn)
segfile       = "{outdir}/{infn}.cns".format(outdir = outdir, infn = infn)
callfile      = "{outdir}/{infn}.call.cns".format(outdir = outdir, infn = infn)
# report files
breaksfile    = "{outdir}/{infn}.breaks.txt".format(outdir = outdir, infn = infn)
gainlossfile  = "{outdir}/{infn}.gainloss.txt".format(outdir = outdir, infn = infn)
metricsfile   = "{outdir}/{infn}.metrics.txt".format(outdir = outdir, infn = infn)
segmetricsfile= "{outdir}/{infn}.segmetrics.txt".format(outdir = outdir, infn = infn)
openblas_nthr = "export OPENBLAS_NUM_THREADS={nthread}; export OMP_NUM_THREADS={nthread}; export NUMEXPR_NUM_THREADS={nthread}; export MKL_NUM_THREADS={nthread}; ".format(nthread = nthread)

cnvkitAccessParams      = params.access
cnvkitAccessParams['o'] = accessfile
cmd1 = openblas_nthr + '{cnvkit} access {ref} {args}'.format(cnvkit = cnvkit, ref = repr(ref), args = cmdargs(cnvkitAccessParams))

cnvkitTargetParams = params.target
cnvkitTargetParams['o'] = targetfile
cmd2 = '{cnvkit} target {accessfile} {args}'.format(cnvkit = cnvkit, accessfile = repr(accessfile), args = cmdargs(cnvkitTargetParams))

log2log('CNVkit: Run access and target at job #0 ...')
poll.first(cmd1 + '; ' + cmd2, lockfile = 'access.poll.lock')
log2log('CNVkit: Run access and target at job #0 ... done')

cnvkitCoverageParams = params.coverage
cnvkitCoverageParams['p'] = nthread
cnvkitCoverageParams['o'] = targetCov
cmd = openblas_nthr + '{cnvkit} coverage {infile} {targetfile} {args}'.format(cnvkit = cnvkit, infile = repr(infile), targetfile = repr(targetfile), args = cmdargs(cnvkitCoverageParams))

log2log('CNVkit: Run coverage at all jobs ...')
Ejemplo n.º 27
0
params['input-vcf']  = {{i.infile | quote}}
params['output-maf'] = {{o.outfile | quote}}
params['vep-data']   = {{args.vepDb | quote}}
params['vep-forks']  = {{args.nthread}}
params['filter-vcf'] = {{args.filtervcf | quote}}
params['ref-fasta']  = {{args.ref | quote}}
params['vep-path']   = path.dirname(vep)
{% if args.tumor1st %}
params['tumor-id']   = samples.pop(0)
params['normal-id']  = samples[0] if samples else 'NORMAL'
{% else %}
params['normal-id']  = samples.pop(0)
params['tumor-id']   = samples[0] if samples else 'NORMAL'
{% endif %}

cmd = '{{args.vcf2maf}} %s' % (cmdargs(params, equal=' '))
runcmd(cmd)

{% 	else %}
cmds = []
for sample in samples:
	vtparams = {}
	vtparams['a'] = True
	vtparams['c'] = sample
	vtparams['e'] = True
	samplevcf     = "{{job.outdir}}/{{i.infile | fn}}-%s.vcf" % sample
	cmd = '{{args.vcftools}} %s {{i.infile | quote}} > "%s"' % (cmdargs(vtparams), samplevcf)

	# vcf2maf.pl --input-vcf ZYYP-ZYYB.vcf  --output-maf ZYYP-ZYYB.snpEff.maf --tumor-id ZXLT-ZXLB_TUMOR --normal-id ZXLT-ZXLB_NORMAL --vep-data /path/to/vep/cache/ --filter-vcf /path/to/vep/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz --ref-fasta /path/to/hs37d5/phase2_reference_assembly_sequence/hs37d5.fa --vep-path /path/to/miniconda2/bin
	params['input-vcf']  = samplevcf
	params['output-maf'] = "{{job.outdir}}/{{i.infile | fn}}-%s.maf" % sample
Ejemplo n.º 28
0
from os import path
from pyppl import Box
from vcf import Reader as Vcf
from bioprocs.utils import runcmd, cmdargs, logger

infile = {{i.infile | quote}}
idxfile = infile + '.tbi'
if not path.isfile(idxfile):
    raise ValueError('Vcf file needs to be indexed')

outdir = {{o.outdir | quote}}
plink = {{args.plink | repr}}
params = {{args.params | repr}}

params.vcf = infile
params['make-bed'] = True
params.out = path.join(outdir, {{i.infile | fn2 | quote}})

args = cmdargs(params, equal=' ')
cmd = '{} {} 1>&2'.format(plink, args)
runcmd(cmd)
Ejemplo n.º 29
0
        ocdir = path.join(outdir, 'query-{}.tomtom'.format(i + 1))
        ocdirs.append(ocdir)
        writer = MemeWriter(qfile)
        writer.meta = reader.meta
        writer.writeMeta()
        for _ in range(joblist[i]):
            try:
                writer.write(reader.next())
            except StopIteration:
                break
        writer.close()
        thparams = params.copy()
        thparams[""] = [qfile, mfile2]
        thparams.thresh = qval
        thparams.oc = ocdir
        cmdps.append((tomtom, cmdargs(thparams, dash='-', equal=' ')))
    reader.close()
    Parallel(nthread, raiseExc=True).run('{} {}', cmdps)

    writer = TsvWriter(outfile)
    reader = TsvReader(path.join(ocdirs[0], 'tomtom.txt'),
                       comment='##',
                       cnames=lambda header: header[1:].strip().split("\t"))
    writer.cnames = reader.cnames
    writer.writeHead(lambda cnames: "#" + "\t".join(cnames))
    reader.close()
    for ocdir in ocdirs:
        reader = TsvReader(
            path.join(ocdir, 'tomtom.txt'),
            comment='##',
            cnames=lambda header: header[1:].strip().split("\t"))
Ejemplo n.º 30
0
{% endif %}

params  = {{args.params}}
try:
{% case args.tool %}
	############# biobambam
	{% when 'biobambam' %}
	params['gz']       = 0
	#bug
	#params['S']        = fqfile
	params['filename'] = infile
	params['T']        = path.join(tmpdir, infile + '.tmp')
	if infile.endswith('.sam'):
		params['inputformat'] = 'sam'

	cmd = '{{args.biobambam}} %s > "%s"' % (cmdargs(params, dash = '', equal = '='), fqfile)
	runcmd (cmd)

	############# bedtools
	{% when 'bedtools' %}
	params['i']  = infile
	params['fq'] = fqfile

	cmd = '{{args.bedtools}} bamtofastq %s' % cmdargs(params, dash = '-', equal = ' ')
	runcmd (cmd)

	############# samtools
	{% when 'samtools' %}
	params['t'] = True
	params['s'] = fqfile