Ejemplo n.º 1
0
		en.plot(plotfile, res = devpars.res, width = devpars.width, height = devpars.height)
	if pathview and 'KEGG' in db:
		pathviewRDir  = path.join(outdir, prefix + '.' + db + '.pathview')
		pathviewRfile = path.join(pathviewRDir, 'pathview.R')
		shell.mkdir(pathviewRDir)
		with open(pathviewRfile, 'w') as f:
			f.write("""
			{rimport}('__init__.r')
			library(pathview)
			args = commandArgs(trailingOnly = TRUE)
			setwd({pathviewRDir!r})
			inopts = {{args.inopts | R}}
			inopts$rnames = FALSE
			indata = read.table.inopts({infile!r}, inopts)
			genes  = as.vector(indata[, {genecol}, drop = TRUE])
			pvargs = {{args.pathview | R}}
			{% raw %}
			if (!is.null(pvargs$fccol)) {{
				fcdata = as.vector(indata[, pvargs$fccol, drop = TRUE])
				names(fcdata) = genes
				genes = fcdata
			}}
			{% endraw %}
			pathview(gene.data = genes, pathway.id = args[1], species = 'hsa', gene.idtype="SYMBOL")
			""".format(
				rimport = rimport, genecol = genecol + 1 if isinstance(genecol, int) else genecol, 
				infile = infile, pathviewRDir = pathviewRDir)
			)
		para.run(runPathview, [(pathviewRfile, term.Term.split('_')[-1]) for term in en.results[:top]])
		
Ejemplo n.º 2
0
	samplevcf     = "{{job.outdir}}/{{i.infile | fn}}-%s.vcf" % sample
	cmd = '{{args.vcftools}} %s {{i.infile | quote}} > "%s"' % (cmdargs(vtparams), samplevcf)

	# vcf2maf.pl --input-vcf ZYYP-ZYYB.vcf  --output-maf ZYYP-ZYYB.snpEff.maf --tumor-id ZXLT-ZXLB_TUMOR --normal-id ZXLT-ZXLB_NORMAL --vep-data /path/to/vep/cache/ --filter-vcf /path/to/vep/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz --ref-fasta /path/to/hs37d5/phase2_reference_assembly_sequence/hs37d5.fa --vep-path /path/to/miniconda2/bin
	params['input-vcf']  = samplevcf
	params['output-maf'] = "{{job.outdir}}/{{i.infile | fn}}-%s.maf" % sample
	params['vep-data']   = {{args.vepDb | quote}}
	params['vep-forks']  = {{args.nthread}}
	params['filter-vcf'] = {{args.filtervcf | quote}}
	params['ref-fasta']  = {{args.ref | quote}}
	params['vep-path']   = path.dirname(vep)

	cmd = cmd + '; {{args.vcf2maf}} --tumor-id %s %s' % (sample, cmdargs(params, equal=' '))
	cmds.append(cmd)

{% 		if args.nthread == 1 %}
for cmd in cmds: runcmd(cmd)
{% 		else %}
# Note the threads may be hanging on here.
p = Parallel({{args.nthread}})
p.run('{}', [(cmd,) for cmd in cmds])
{% 		endif %}

for i, sample in enumerate(samples):
	singlemaf = "{{job.outdir}}/{{i.infile | fn}}-%s.maf" % sample
	if i == 0:
		runcmd('cat "%s" > {{o.outfile | quote}}' % singlemaf)
	else:
		runcmd('egrep -v "^#|^Hugo_Symbol" "%s" >> {{o.outfile | quote}}' % singlemaf)
{% 	endif %}
{% endif %}
Ejemplo n.º 3
0
invcfs = {{i.infiles | repr}}
outfile = {{o.outfile | quote}}
nthread = {{args.nthread | int}}
joboutdir = {{job.outdir | quote}}
vcftools = {{args.vcftools | quote}}
gatk = {{args.gatk | quote}}
tabix = {{args.tabix | quote}}
ref = {{args.ref | quote}}
params = {{args.params | repr}}
tool = {{args.tool | quote}}

shell.TOOLS.vcftools = vcftools
shell.TOOLS.gatk = gatk

para = Parallel(nthread, raiseExc=True)
invcfs = para.run(vcfIndex, [(vcf, tabix) for vcf in invcfs])


def run_vcftools():
    params.d = params.get('d', True)
    params.t = params.get('t', True)
    params._ = invcfs
    params._stdout = outfile
    shell.Shell(equal=' ').vcftools(**params).run()


def run_gatk():
    params.T = 'CombineVariants'
    params.o = outfile
    params.R = ref
    params.nt = nthread
Ejemplo n.º 4
0
exts = dict()
for sam in sam_meta:
	parts = sam['file_name'].split('.')
	ext = '.' + parts[-1]
	if ext == '.gz':
		ext = '.' + parts[-2] + ext
	exts      [sam['file_name']] = ext
	sample_ids[sam['file_name']] = sam['associated_entities'][0]['entity_submitter_id'][:15]

samfiles = []
for ext in set(exts.values()):
	samfiles += glob.glob (os.path.join(os.path.abspath(indir), "*" + ext))
	# or direct dir from TCGA download
	samfiles += glob.glob (os.path.join(os.path.abspath(indir), "*", "*" + ext))

lock = Lock()
def single(samfile):
	bn = os.path.basename (samfile)
	if not bn in sample_ids: return
	newfile = os.path.join (outdir, sample_ids[bn] + exts[bn])
	with lock:
		if os.path.exists (newfile):
			os.remove(newfile)
		if 'link' in method:
			os.symlink (samfile, newfile)
		elif method == 'copy':
			copyfile(samfile, newfile)

p = Parallel(nthread = nthread, backend = 'threading', raiseExc = True)
p.run(single, [(samfile,) for samfile in samfiles])
Ejemplo n.º 5
0
            runcmd(
                cmd.format(bedtools=bedtools, params=params, outfile=outfile))
            remove(outfile1)
            remove(outfile2)
            return outfile

        infile1 = tabindex(infile1, outdir)
        infile2 = tabindex(infile2, outdir)
        chroms = [
            chr.strip()
            for chr in check_output([tabix, '-l', infile1]).splitlines()
        ]

        if nthread > 1:
            p = Parallel(nthread, raiseExc=True)
            outfiles = p.run(runChrom,
                             [(infile1, infile2, chrom) for chrom in chroms])
            # make sure it's in the right order
            outfiles = sorted(outfiles,
                              key=lambda x: chroms.index(x.split('.')[-2]))
        else:
            outfiles = []
            for chrom in chroms:
                outfiles.append(runChrom(infile1, infile2, chrom))

        with open(outfile, 'a+') as fout:
            for of in outfiles:
                with open(of) as f:
                    fout.write(f.read())

elif tool == 'pyvcf':
    import vcf
Ejemplo n.º 6
0
		makedirs(thdir)
	
	asbname = path.basename(affysnps).split('.')[0]
	for i, dist in enumerate(dists):
		writer = TsvWriter(path.join(thdir, '{bname}.thread{i}.snp'.format(
			bname = asbname, i = i
		)))
		for _ in range(dist):
			writer.write(next(reader))
		writer.close()
	
	para   = Parallel(nthread, raiseExc = True)
	para.run(getAlleleCount, [
		(tumbam, path.join(
			thdir, '{bname}.thread{i}.snp'.format(bname = asbname, i = i)
		), path.join(
			thdir, '{tumbn}.thread{i}.bamrc'.format(tumbn = path.basename(tumbam), i = i)
		)) for i in range(nthread)
	])
	# merge to tumsnp
	writer = TsvWriter(tumsnp)
	writer.cnames = ['Chrm', 'pos', 'A', 'C', 'G', 'T', 'Total', 'refCount', 'mutCount']
	writer.writeHead(lambda cn: "#" + "\t".join(cn))
	for i in range(nthread):
		subrc = path.join(
			thdir, '{tumbn}.thread{i}.bamrc'.format(tumbn = path.basename(tumbam), i = i)
		)
		reader = TsvReader(subrc, cnames = False)
		for r in reader:
			writer.write(r.values())
		reader.close()