Esempio n. 1
0
	def all(self, todo, *args, **kwargs):
		lockfilename = 'poll.all.lock'
		if 'lockfile' in kwargs:
			lockfilename = kwargs['lockfile']
			del kwargs['lockfile']
		lockfilenames = [
			path.join(self.workdir, str(jobindex + 1), 'output', lockfilename) \
			for jobindex in range(self.joblen)
		]
		lockfiles = [HardFileLock(f) for f in lockfilenames]
		
		#log2pyppl('Doing stuff at job #%s ... ' % self.jobindex)
		# with lockfiles[self.jobindex]:
		#lockfiles[self.jobindex].acquire()
		try:
			if callable(todo):
				todo(*args, **kwargs)
			else:
				runcmd(todo.format(*args, **kwargs))
		except:
			raise
		finally:
			open(lockfilenames[self.jobindex], 'w').close()
			#lockfiles[self.jobindex].release()
		#log2pyppl('Doing stuff at job #%s ... done ' % self.jobindex)

		for i, lockfile in enumerate(lockfiles):
			if i == self.jobindex: continue
			#log2pyppl('Waiting for lock file: %s ... ' % self.jobindex)
			#Poll.wait(lambda x: not path.exists(x), lockfilenames[i])
			#log2pyppl('Waiting for job: %s ... ' % self.jobindex)
			with lockfile: pass
Esempio n. 2
0
def run_somaticsniper():
	params.f   = ref
	params.F   = 'vcf'
	params[''] = [tumor, normal, outfile]
	cmd = '{ssniper} {args}'.format(ssniper = ssniper, args = cmdargs(params))
	runcmd(cmd)
	if gz: runcmd(['gzip', outfile])
Esempio n. 3
0
	def first(self, todo, *args, **kwargs):
		lockfilename = 'poll.first.lock'
		if 'lockfile' in kwargs:
			lockfilename = kwargs['lockfile']
			del kwargs['lockfile']
		# make sure it's cleaned when job reset
		lockfilename = path.join(self.workdir, '1', 'output', lockfilename)
		lockfile     = HardFileLock(lockfilename)
		if self.jobindex == 0:
			#log2pyppl('JOB #0: DOING stuff ... ')
			#with lockfile:
			#lockfile.acquire()
			try:
				if callable(todo):
					todo(*args, **kwargs)
				else:
					runcmd(todo.format(*args, **kwargs))
			except:
				raise
			finally:
				open(lockfilename, 'w').close()
			#log2pyppl('JOB #0: DOING stuff ... done')
		else:
			#log2pyppl('JOB #x: waiting ... for flag file')
			#Poll.wait(lambda x: not path.exists(x), lockfilename)
			#log2pyppl('JOB #x: waiting ... for job #0')
			with lockfile: pass
Esempio n. 4
0
	def non1st(self, todo, *args, **kwargs):
		lockfilename = 'poll.non1st.lock'
		if 'lockfile' in kwargs:
			lockfilename = kwargs['lockfile']
			del kwargs['lockfile']
		lockfilenames = [
			path.join(self.workdir, str(jobindex + 1), 'output', lockfilename) \
			for jobindex in range(self.joblen)
		]
		lockfiles = [HardFileLock(f) for f in lockfilenames]
		if self.jobindex == 0:
			for i, lockfilename in enumerate(lockfilenames):
				if i == 0: continue
				#Poll.wait(lambda x: not path.exists(x), lockfilename)
				with lockfiles[i]: pass
		else:
			#with lockfiles[self.jobindex]:
			#lockfiles[self.jobindex].acquire()
			try:
				if callable(todo):
					todo(*args, **kwargs)
				else:
					runcmd(todo.format(*args, **kwargs))
			except:
				raise
			finally:
				open(lockfilename[self.jobindex], 'w').close()
Esempio n. 5
0
def gunzip(fn, outfn=None):
    args = {'f': True, '1': True}
    if not outfn:
        cmd = 'gunzip {!r} {}'.format(fn, args)
    else:
        cmd = 'gunzip {!r} {} -c > {!r}'.format(fn, args, outfn)
    runcmd(cmd)
Esempio n. 6
0
def run_vardict():
	params.v = True
	params.G = ref
	params.b = '{}|{}'.format(tumor, normal)
	cmd = '{vardict} {args} > {outfile!r}'.format(
		vardict = vardict,
		args    = cmdargs(params),
		outfile = outfile
	)
	runcmd(cmd)
	if gz: runcmd(['gzip', outfile])
Esempio n. 7
0
def run_virmid():
	params.R = ref
	params.D = tumor
	params.N = normal
	params.w = joboutdir
	cmd = '{virmid} {mem} -Djava.io.tmpdir={tmpdir!r} {args}'.format(
		virmid = virmid,
		mem    = mem2(mem, 'java'),
		tmpdir = tmpdir,
		args   = cmdargs(params)
	)
	runcmd(['mv', path.join(joboutdir, '*.virmid.som.passed.vcf'), outfile])
	if gz: runcmd(['gzip', outfile])
Esempio n. 8
0
def getAlleleCount(bamfile, snpfile, outfile):
	brcparams   = Box()
	brcparams.f = ref
	brcparams.w = 0
	brcparams.l = snpfile

	brcparams[''] = bamfile
	cmd = '{bamrc} {args} > {outfile!r}'.format(
		bamrc = bamrc, args = cmdargs(brcparams, equal = ' '), outfile = outfile + '.tmp')
	runcmd(cmd)

	# reformated output to desired format
	reader = TsvReader(outfile + '.tmp', cnames = False)
	snper  = TsvReader(snpfile, cnames = False)
	#chr1	564773	C	14	=:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00	A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00	C:14:...	G:0:...	T:0:...	N:0:...
	writer = TsvWriter(outfile)
	writer.cnames = ['Chrm', 'pos', 'A', 'C', 'G', 'T', 'Total', 'refCount', 'mutCount']

	for r in reader:
		while True:
			try:
				snp   = next(snper)
			except StopIteration:
				break
			# use the end position, in case it's 0-based
			if snp[0] == r[0] and snp[2] == r[1]:
				counts = dict(
					A = r[5].split(':', 2)[1],
					C = r[6].split(':', 2)[1],
					G = r[7].split(':', 2)[1],
					T = r[8].split(':', 2)[1]
				)
				rec    = TsvRecord()
				rec.Chrm  = r[0]
				rec.pos   = r[1]
				rec.Total = r[3]
				rec.A = counts['A']
				rec.C = counts['C']
				rec.G = counts['G']
				rec.T = counts['T']
				# if reference allele is unknown, assuming all are ref alleles
				rec.refCount = counts.get(snp[6].upper(), r[3])
				# if mut allele is unknown, assuming no mutations happened
				rec.mutCount = counts.get(snp[7].upper(), 0)
				writer.write(rec)
				# go to next snp
				break
			else:
				# go to next r
				continue
	writer.close()
Esempio n. 9
0
def run_gatk():
	# generate interval list file
	intvfile = {{job.outdir | path.join: "interval.list" | quote}}
	cmd = '{samtools} idxstats {tumor!r} | head -1 | cut -f1 > {intvfile!r}'.format(
		samtools = samtools,
		tumor    = tumor,
		intvfile = intvfile
	)
	runcmd(cmd)

	mem = mem2(mem, 'java')

	params['I:tumor']  = tumor
	params['I:normal'] = normal
	
	params.R   = ref
	params.o   = outfile
	params.nct = nthread
	params.L   = intvfile

	cmd = '{gatk} -T MuTect2 {mem} -Djava.io.tmpdir={tmpdir!r} {args}'.format(
		gatk   = gatk,
		mem    = mem,
		tmpdir = tmpdir,
		args   = cmdargs(params, dash = '-', equal = ' ')
	)
	runcmd(cmd)
	if gz: runcmd(['gzip', outfile])
Esempio n. 10
0
        def tabindex(vcf, outdir):
            if not vcf.endswith('.gz'):
                gzfile = path.join(outdir, path.basename(vcf) + '.gz')
                bgzip_cmd = 'bgzip {!r} -c > {!r}'.format(vcf, gzfile)
                runcmd(bgzip_cmd)
                runcmd('{} {!r}'.format(tabix, gzfile))
            else:
                gzfile = path.join(outdir, path.basename(vcf))
                # it is gzipped, try to find the index file (.tbi)
                idxfile = path.join(outdir, path.basename(vcf) + '.tbi')
                symlink(vcf, gzfile)
                while True:
                    try:
                        link = readlink(vcf)
                        tbifile = link + '.tbi'
                        if path.isfile(tbifile):
                            symlink(tbifile, gzfile + '.tbi')
                            break
                        vcf = link
                    except OSError:
                        break

                if not path.isfile(idxfile):
                    index_cmd = '{} {!r}'.format(tabix, gzfile)
                    runcmd(index_cmd)

            return gzfile
Esempio n. 11
0
def run_strelka():
	cparams                = {{args.configParams | repr}}
	cparams.normalBam      = normal
	cparams.tumorBam       = tumor
	cparams.referenceFasta = ref
	cparams.runDir         = joboutdir
	runcmd('{strelka} {args}'.format(strelka = strelka, args = cmdargs(cparams)))

	params.m = 'local'
	params.g = mem2(mem, 'G')[:-1]
	params.j = nthread
	runcmd('{joboutdir}/runWorkflow.py {args}'.format(joboutdir = joboutdir, args = cmdargs(params)))

	snvvcf = path.join(joboutdir, 'results', 'variants', 'somatic.snvs.vcf.gz')
	indvcf = path.join(joboutdir, 'results', 'variants', 'somatic.indels.vcf.gz')
	_mergeAndAddGT(snvvcf, indvcf, outfile)
	if gz: runcmd(['gzip', outfile])
Esempio n. 12
0
        def runChrom(file1, file2, chrom):
            outfile1_list = list(file1[:-3].rpartition('.'))
            outfile1_list.insert(-2, '-' + chrom)
            outfile1 = ''.join(outfile1_list)
            outfile2 = list(file2[:-3].rpartition('.'))
            outfile2.insert(-2, '-' + chrom)
            outfile2 = ''.join(outfile2)
            outfile1_list.insert(-2, '.subtracted')
            outfile = ''.join(outfile1_list)
            vfcmd = '{} -h {!r} {!r} > {!r}'
            runcmd(vfcmd.format(tabix, file1, chrom, outfile1))
            runcmd(vfcmd.format(tabix, file2, chrom, outfile2))

            cmd = '{bedtools} subtract {params} > {outfile!r}'
            params = {'a': outfile1, 'b': outfile2}
            if rmany:
                params['A'] = True

            params = cmdargs(params, dash='-', equal=' ')
            runcmd(
                cmd.format(bedtools=bedtools, params=params, outfile=outfile))
            remove(outfile1)
            remove(outfile2)
            return outfile
Esempio n. 13
0
fqfile2 = fqfile2[:-3]
{% endif %}

params  = {{args.params}}
try:
{% case args.tool %}
{% when 'biobambam' %}
	params['gz'] = 0
	params['F']  = fqfile1
	params['F2'] = fqfile2
	params['T']  = path.join(tmpdir, infile + '.tmp')
	params['filename'] = infile
	if infile.endswith('.sam'):
		params['inputformat'] = 'sam'
	cmd = '{{args.biobambam}} %s' % cmdargs(params, dash = '', equal = '=')
	runcmd (cmd)
{% when 'bedtools' %}
	params['i']   = infile
	params['fq']  = fqfile1
	params['fq2'] = fqfile2
	cmd = '{{args.bedtools}} bamtofastq %s' % cmdargs(params, dash = '-', equal = ' ')
	runcmd (cmd)
{% when 'samtools' %}
	params['t'] = True
	params['1'] = fqfile1
	params['2'] = fqfile2
	cmd = '{{args.samtools}} fastq %s "%s"' % (cmdargs(params, dash = '-', equal = ' '), infile)
	runcmd (cmd)
{% when 'picard' %}
	mem = mem2({{ args.mem | quote }}, 'Java')
	params[mem]                = True
Esempio n. 14
0
fq1 = {{o.fq1 | quote}}
fq2 = {{o.fq2 | quote}}
try:
{% case args.tool %}
	{% when 'wgsim' %}
	{% if args.gz %}
	fq1 = "{{o.fq1 | [:-3]}}"
	fq2 = "{{o.fq2 | [:-3]}}"
	{% endif %}

	params['N'] = {{args.num}}
	params['1'] = {{args.len1}}
	params['2'] = {{args.len2}}
	params['S'] = {{i.seed | lambda x: -1 if x is None else x}}
	cmd = '{{args.wgsim}} %s "%s" "%s" "%s"' % (cmdargs(params), ref, fq1, fq2)
	runcmd (cmd)

	{% if args.gz %}
	runcmd ('gzip "%s"' % fq1)
	runcmd ('gzip "%s"' % fq2)
	{% endif %}
	
	{% when 'dwgsim' %}
	prefix = {{o.fq1 | [:-8] | quote}}
	{% if args.gz %}
	fq1 = "{{o.fq1 | [:-3]}}"
	fq2 = "{{o.fq2 | [:-3]}}"
	prefix = "{{o.fq1 | [:-11]}}"
	{% endif %}

	params['N'] = {{args.num}}
Esempio n. 15
0
	cmd = '{{args.awk}} -v sample="{sample}" index={index} -f {awkfile} {infile}'.format(
		sample = sample,
		index  = 10 + i,
		awk    = str(repr(awkfile)),
		infile = str(repr(infile))
	)
	cmds.append(cmd)

########### gatk
{% elif args.tool == 'gatk' %}
for sample in samples:
	params                       = {}
	params['R']                  = {{args.ref | quote}}
	params['V']                  = {{i.infile | quote}}
	params['o']                  = "{{o.outdir}}/{{i.infile | fn}}-%s.vcf" % sample
	params['sample_name']        = sample
	params['excludeFiltered']    = True
	params['excludeNonVariants'] = True
	params.update({{args.params}})
	cmd = '{{args.gatk}} -T SelectVariants %s' % (cmdargs(params, equal=' '))
	cmds.append(cmd)

{% endif %}

{% if args.nthread == 1 %}
for cmd in cmds: runcmd(cmd)
{% else %}
p = Parallel({{args.nthread}})
p.run('{}', [(cmd,) for cmd in cmds])
{% endif %}
Esempio n. 16
0
infile = {{i.infile | quote}}
outfile = {{o.outfile | quote}}
header = {{args.header | repr}}
by = {{args.by | quote}}
tool = {{args.tool | quote}}

if header:
    # write the header to outfile
    params = cmdargs({
        'e': '^#',
    }, dash='-', equal=' ')
    if infile.endswith('.gz'):
        cmd = 'zcat {infile} | grep {cmdargs} > {outfile}'
    else:
        cmd = 'grep {cmdargs} {infile} > {outfile}'
    runcmd(cmd.format(cmdargs=params, infile=infile, outfile=outfile))

if tool == 'sort':
    if infile.endswith('.gz'):
        cmd = 'zcat {infile} | grep "^#" | sort {cmdargs} >> {outfile}'
    else:
        cmd = 'grep -v "^#" {infile} | sort {cmdargs} >> {outfile}'

    if by.lower().startswith('coord'):
        params = cmdargs({'k#1': '1,1', 'k#2': '2,2n'}, dash='-', equal=' ')
    else:
        params = cmdargs({
            'k#1': '3,3',
            'k#2': '1,1',
            'k#3': '2,2n'
        },
Esempio n. 17
0
def sam2bam(samfile, bamfile):
	logger.info('Converting sam to bam: ')
	logger.info('- %s' % samfile)
	logger.info('- %s' % bamfile)
	cmd = '%s view -Sb "%s" > "%s"; rm -f "%s"' % (samtools, samfile, bamfile, samfile)
	runcmd(cmd)
Esempio n. 18
0
from os import makedirs, path
from shutil import rmtree
from pyppl import Box
from bioprocs.utils import cmdargs, runcmd, mem2

tmpdir = path.join("{{args.tmpdir}}",
                   "{{proc.id}}.{{i.infile | fn}}.{{job.index}}")
if not path.exists(tmpdir): makedirs(tmpdir)

mem = mem2({{args.mem | quote}}, 'java')
ref = {{args.ref | quote}}
params = {{args.params}}
try:
    params['TMP_DIR'] = tmpdir
    params['I'] = {{i.infile | quote}}
    params['O'] = {{o.outfile | quote}}
    params['R'] = ref
    runcmd('{{args.picard}} ReorderSam %s -Djava.io.tmpdir="%s" %s' %
           (mem, tmpdir, cmdargs(params, dash='', equal='=')))
except Exception as ex:
    stderr.write("Job failed: %s" % str(ex))
    raise
finally:
    rmtree(tmpdir)
Esempio n. 19
0
		descs[key] = desc_prefix + builtin_descs[fname](ffunc)
	else:
		realfilters[fname] = ffunc if callable(ffunc) else eval(ffunc)
		descs[fname] = desc_prefix + fname

reader = vcf.Reader(filename=infile)
for fname, fdesc in descs.items():
	reader.filters[fname] = vcf.parser._Filter(id = fname, desc = fdesc)
writer = vcf.Writer(open(outfile, 'w'), reader)

while True:
	try:
		record = reader.next()
		for fname, ffunc in realfilters.items():
			if ffunc(record, record.samples):
				record.FILTER = record.FILTER or []
				record.FILTER.append(fname)
		if keep or not record.FILTER:
			writer.write_record(record)
	except StopIteration:
		break
	except:
		continue

writer.close()

if gz:
	runcmd(['bgzip', outfile])


Esempio n. 20
0
params  = {{args.params}}
try:
{% case args.tool %}
	############# biobambam
	{% when 'biobambam' %}
	params['gz']       = 0
	#bug
	#params['S']        = fqfile
	params['filename'] = infile
	params['T']        = path.join(tmpdir, infile + '.tmp')
	if infile.endswith('.sam'):
		params['inputformat'] = 'sam'

	cmd = '{{args.biobambam}} %s > "%s"' % (cmdargs(params, dash = '', equal = '='), fqfile)
	runcmd (cmd)

	############# bedtools
	{% when 'bedtools' %}
	params['i']  = infile
	params['fq'] = fqfile

	cmd = '{{args.bedtools}} bamtofastq %s' % cmdargs(params, dash = '-', equal = ' ')
	runcmd (cmd)

	############# samtools
	{% when 'samtools' %}
	params['t'] = True
	params['s'] = fqfile

	cmd = '{{args.samtools}} fastq %s "%s"' % (cmdargs(params), infile)
Esempio n. 21
0
	makedirs (tmpdir)

params = {{args.params}}
try:
{% case args.tool %}
	############## picard
	{% when 'picard' %}
	mem = mem2({{ args.mem | quote }})
	params['-Djava.io.tmpdir'] = tmpdir
	params['TMP_DIR'] = tmpdir
	params['I'] = {{i.infile | quote}}
	params['O'] = {{o.outfile | quote}}
	for k,v in rg.items():
		params['RG' + k] = v

	runcmd ('{{args.picard}} AddOrReplaceReadGroups %s %s' % (mem, cmdargs(params, dash='', equal='=')))

	############## bamutil
	{% when 'bamutil' %}
	params['RG'] = "@RG\\tID:%s\\t%s" % (rg['ID'], "\\t".join([k + ":" + v for k,v in rg.items() if k!='ID']))
	params['in'] = {{i.infile | quote}}
	params['out'] = {{o.outfile | quote}}

	runcmd ('{{args.bamutil}} polishBam %s' % cmdargs(params, equal = ' '))

{% endcase %}
except Exception as ex:
	stderr.write ("Job failed: %s" % str(ex))
	raise
finally:
	rmtree (tmpdir)
Esempio n. 22
0
        openfunc = gzip.open if infile1.endswith('.gz') else open
        with openfunc(infile1) as fin, open(outfile, 'w') as fout:
            for line in fin:
                if not line.startswith('#'):
                    break
                fout.write(line)

    # go directly
    if not bychrom:
        cmd = '{bedtools} subtract {params} >> {outfile}'
        params = {'a': infile1, 'b': infile2}
        if rmany:
            params['A'] = True

        params = cmdargs(params, dash='-', equal=' ')
        runcmd(cmd.format(bedtools=bedtools, params=params, outfile=outfile))
    else:
        # check if infile1 is tabix indexed
        def tabindex(vcf, outdir):
            if not vcf.endswith('.gz'):
                gzfile = path.join(outdir, path.basename(vcf) + '.gz')
                bgzip_cmd = 'bgzip {!r} -c > {!r}'.format(vcf, gzfile)
                runcmd(bgzip_cmd)
                runcmd('{} {!r}'.format(tabix, gzfile))
            else:
                gzfile = path.join(outdir, path.basename(vcf))
                # it is gzipped, try to find the index file (.tbi)
                idxfile = path.join(outdir, path.basename(vcf) + '.tbi')
                symlink(vcf, gzfile)
                while True:
                    try:
Esempio n. 23
0
	if gz: runcmd(['gzip', outfile])

def run_somaticsniper():
	params.f   = ref
	params.F   = 'vcf'
	params[''] = [tumor, normal, outfile]
	cmd = '{ssniper} {args}'.format(ssniper = ssniper, args = cmdargs(params))
	runcmd(cmd)
	if gz: runcmd(['gzip', outfile])

def run_snvsniffer():
	# generate a header file
	theader = {{job.outdir | path.join: bn(i.tumor)  | @append: '.header' | quote}}
	nheader = {{job.outdir | path.join: bn(i.normal) | @append: '.header' | quote}}
	cmd = '{samtools} view -H {infile!r} > {hfile!r}'
	runcmd(cmd.format(samtools = samtools, infile = tumor, hfile = theader))
	runcmd(cmd.format(samtools = samtools, infile = normal, hfile = nheader))

	params.g = ref
	params.o = outfile

	params[''] = [theader, nheader, tumor, normal]
	cmd = '{ssniffer} somatic {args}'.format(ssniffer, cmdargs(params))
	runcmd(cmd)
	if gz: runcmd(['gzip', outfile])

def _mergeAndAddGT(snvvcf, indvcf, outfile):
	from pysam import VariantFile
	snv = VariantFile(snvvcf)
	ind = VariantFile(indvcf)
	
Esempio n. 24
0
def bgzip(fn, outfn=None):
    if not outfn:
        cmd = 'bgzip {!r}'.format(fn)
    else:
        cmd = 'bgzip {!r} -c > {!r}'.format(fn, outfn)
    runcmd(cmd)
Esempio n. 25
0
shell.TOOLS['cnvkit'] = cnvkit
envs = dict(OPENBLAS_NUM_THREADS=str(nthread),
            OMP_NUM_THREADS=str(nthread),
            NUMEXPR_NUM_THREADS=str(nthread),
            MKL_NUM_THREADS=str(nthread))
ckshell = shell.Shell(subcmd=True, equal=' ', envs=envs, cwd=outdir).cnvkit

# generate target file
params_t = params.target
params_t.o = path.join(outdir, prefix + '.bed')
ckshell.target(exbaits, **params_t).run()

# generate access file
if not accfile:
    accfile = path.join(outdir, prefix + '.access.bed')
    params_a = params.access
    params_a.o = accfile
    ckshell.access(ref, **params_a).run()

# autobin
params_b = params.autobin
params_b.t = params_t.o
params_b.g = accfile
params_b[''] = infiles
runcmd('cd {wdir}; {cnvkit} autobin {args}'.format(
    wdir=shell.shquote(outdir),
    cnvkit=shell.shquote(cnvkit),
    args=cmdargs(params_b, equal=' ')),
       env=envs)
Esempio n. 26
0
tool = {{args.tool | repr}}
picard = {{args.picard | repr}}
chain = {{args.lochain | repr}}
ref = {{args.ref | repr}}
params = {{args.params | repr}}
mem = {{args.mem | repr}}
tmpdir = {{args.tmpdir | repr}}

if not chain:
    logger.error('Chain file (args.lochain) not provided!')
    exit(1)

# picard LiftoverVcf -Xmx4g -Xms1g  I=TCGA-05-4382-10.vcf O=1.vcf CHAIN=liftovers/hg38ToHg19.over.chain.gz R=ucsc_hg19.fa REJECT=r.vcf

if tool == 'picard':

    params.I = infile
    params.O = outfile
    params.CHAIN = chain
    params.REJECT = umfile
    params.R = ref

    javamem = mem2(mem, 'java')
    for jm in javamem.split():
        params['-' + jm[1:]] = True

    params['-Djava.io.tmpdir'] = tmpdir

    cmd = '{picard} LiftoverVcf {params}'
    runcmd(
        cmd.format(picard=picard, params=cmdargs(params, equal='=', dash='')))
Esempio n. 27
0
params['output-maf'] = {{o.outfile | quote}}
params['vep-data']   = {{args.vepDb | quote}}
params['vep-forks']  = {{args.nthread}}
params['filter-vcf'] = {{args.filtervcf | quote}}
params['ref-fasta']  = {{args.ref | quote}}
params['vep-path']   = path.dirname(vep)
{% if args.tumor1st %}
params['tumor-id']   = samples.pop(0)
params['normal-id']  = samples[0] if samples else 'NORMAL'
{% else %}
params['normal-id']  = samples.pop(0)
params['tumor-id']   = samples[0] if samples else 'NORMAL'
{% endif %}

cmd = '{{args.vcf2maf}} %s' % (cmdargs(params, equal=' '))
runcmd(cmd)

{% 	else %}
cmds = []
for sample in samples:
	vtparams = {}
	vtparams['a'] = True
	vtparams['c'] = sample
	vtparams['e'] = True
	samplevcf     = "{{job.outdir}}/{{i.infile | fn}}-%s.vcf" % sample
	cmd = '{{args.vcftools}} %s {{i.infile | quote}} > "%s"' % (cmdargs(vtparams), samplevcf)

	# vcf2maf.pl --input-vcf ZYYP-ZYYB.vcf  --output-maf ZYYP-ZYYB.snpEff.maf --tumor-id ZXLT-ZXLB_TUMOR --normal-id ZXLT-ZXLB_NORMAL --vep-data /path/to/vep/cache/ --filter-vcf /path/to/vep/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz --ref-fasta /path/to/hs37d5/phase2_reference_assembly_sequence/hs37d5.fa --vep-path /path/to/miniconda2/bin
	params['input-vcf']  = samplevcf
	params['output-maf'] = "{{job.outdir}}/{{i.infile | fn}}-%s.maf" % sample
	params['vep-data']   = {{args.vepDb | quote}}
Esempio n. 28
0
cnvkitReferenceParams['f'] = ref
cmd = openblas_nthr + '{cnvkit} reference {workdir}/*/output/*/*.targetcov.cnn {args}'.format(cnvkit = cnvkit, workdir = repr(workdir), args = cmdargs(cnvkitReferenceParams))

log2log('CNVkit: Run reference at job #0 ...')
poll.first(cmd, lockfile = 'reference.poll.lock')
log2log('CNVkit: Run reference at job #0 ... done')

mtfile = "{outdir}/cnvkit_mt".format(outdir = outdir)
open(mtfile, 'w').close()

cnvkitFixParams      = params.fix
cnvkitFixParams['o'] = fixedCnr
cmd = openblas_nthr + '{cnvkit} fix {targetCov} {mtfile} {refcnn} {args}'.format(cnvkit = cnvkit, targetCov = repr(targetCov), mtfile = repr(mtfile), refcnn = repr(refcnn), args = cmdargs(cnvkitFixParams))

log2log('CNVkit: Run fix at all jobs ...')
runcmd (cmd)
log2log('CNVkit: Run fix at all jobs ... done')

if path.getsize(fixedCnr) < 60:
	open(segfile, 'w').write('chromosome	start	end	gene	log2	depth	probes	weight\\n')
else:
	cnvkitSegmentParams = params.segment
	cnvkitSegmentParams['o'] = segfile
	cnvkitSegmentParams['p'] = nthread

	cmd = openblas_nthr + '{cnvkit} segment {args} {fixedCnr}'.format(cnvkit = cnvkit, args = cmdargs(cnvkitSegmentParams), fixedCnr = repr(fixedCnr))
	log2log('CNVkit: Run segment at all jobs ...')
	runcmd (cmd)
	log2log('CNVkit: Run segment at all jobs ... done')

if path.getsize(segfile) < 60:
Esempio n. 29
0
        cmdps.append((tomtom, cmdargs(thparams, dash='-', equal=' ')))
    reader.close()
    Parallel(nthread, raiseExc=True).run('{} {}', cmdps)

    writer = TsvWriter(outfile)
    reader = TsvReader(path.join(ocdirs[0], 'tomtom.txt'),
                       comment='##',
                       cnames=lambda header: header[1:].strip().split("\t"))
    writer.cnames = reader.cnames
    writer.writeHead(lambda cnames: "#" + "\t".join(cnames))
    reader.close()
    for ocdir in ocdirs:
        reader = TsvReader(
            path.join(ocdir, 'tomtom.txt'),
            comment='##',
            cnames=lambda header: header[1:].strip().split("\t"))
        for r in reader:
            writer.write(r)
        reader.close()
    writer.close()
else:
    params[""] = [mfile1, mfile2]
    params.thresh = qval
    params.oc = outdir

    cmd = '{tomtom} {params}'.format(tomtom=tomtom,
                                     params=cmdargs(params,
                                                    dash='-',
                                                    equal=' '))
    runcmd(cmd)
Esempio n. 30
0
    'debug': True,
    'log-file': path.join(outdir, 'gdc-client.log')
})
if token:
    args.t = token

args.update(params)
cmd2run = gdc + cmdargs(args, equal=' ')
system(cmd2run)

# check if all the data sucessfully downloaded
with open(infile) as fin:
    ids = [
        line.split()[0] for line in fin
        if line.strip() and not line.startswith('id')
    ]

del args['m']
for i in ids:
    if not path.isdir(path.join(outdir, i)):
        logger.warning('File failed to download: {}'.format(i))
        cmd2run = gdc + cmdargs(args, equal=' ') + ' ' + i
        runcmd(cmd2run)
        rename(path.join(outdir, 'https:', 'api.gdc.cancer.gov', 'data', i),
               path.join(outdir, i))

if path.isdir(path.join(outdir, 'https:')):
    rmdir(path.join(outdir, 'https:', 'api.gdc.cancer.gov', 'data'))
    rmdir(path.join(outdir, 'https:', 'api.gdc.cancer.gov'))
    rmdir(path.join(outdir, 'https:'))