コード例 #1
0
pathview = {{args.pathview | repr}}

shell.TOOLS.Rscript = Rscript
if isinstance(cutoff, dict):
	if cutoff['by'] == 'p':
		cutoff['by'] = 'Pval'
	if cutoff['by'] == 'q':
		cutoff['by'] = 'AdjPval'

reader = TsvReader(infile, **inopts)
genes  = [r[genecol] for r in reader]

en = Enrichr(cutoff = cutoff, top = top, Rscript = Rscript)
en.addList(genes, description = path.basename(infile))

para = Parallel(nthread = nthread)
runPathview = lambda r, hsa: shell.Shell().Rscript(r, hsa).run()
for db in dbs:
	outfile = path.join(outdir, prefix + '.' + db + '.txt')
	en.enrich(db)
	en.export(outfile, top = 100)
	if plot:
		plotfile = path.join(outdir, prefix + '.' + db + '.png')
		en.plot(plotfile, res = devpars.res, width = devpars.width, height = devpars.height)
	if pathview and 'KEGG' in db:
		pathviewRDir  = path.join(outdir, prefix + '.' + db + '.pathview')
		pathviewRfile = path.join(pathviewRDir, 'pathview.R')
		shell.mkdir(pathviewRDir)
		with open(pathviewRfile, 'w') as f:
			f.write("""
			{rimport}('__init__.r')
コード例 #2
0
        writer = MemeWriter(qfile)
        writer.meta = reader.meta
        writer.writeMeta()
        for _ in range(joblist[i]):
            try:
                writer.write(reader.next())
            except StopIteration:
                break
        writer.close()
        thparams = params.copy()
        thparams[""] = [qfile, mfile2]
        thparams.thresh = qval
        thparams.oc = ocdir
        cmdps.append((tomtom, cmdargs(thparams, dash='-', equal=' ')))
    reader.close()
    Parallel(nthread, raiseExc=True).run('{} {}', cmdps)

    writer = TsvWriter(outfile)
    reader = TsvReader(path.join(ocdirs[0], 'tomtom.txt'),
                       comment='##',
                       cnames=lambda header: header[1:].strip().split("\t"))
    writer.cnames = reader.cnames
    writer.writeHead(lambda cnames: "#" + "\t".join(cnames))
    reader.close()
    for ocdir in ocdirs:
        reader = TsvReader(
            path.join(ocdir, 'tomtom.txt'),
            comment='##',
            cnames=lambda header: header[1:].strip().split("\t"))
        for r in reader:
            writer.write(r)
コード例 #3
0
ファイル: pVcf2Maf.py プロジェクト: LeaveYeah/bioprocs
	samplevcf     = "{{job.outdir}}/{{i.infile | fn}}-%s.vcf" % sample
	cmd = '{{args.vcftools}} %s {{i.infile | quote}} > "%s"' % (cmdargs(vtparams), samplevcf)

	# vcf2maf.pl --input-vcf ZYYP-ZYYB.vcf  --output-maf ZYYP-ZYYB.snpEff.maf --tumor-id ZXLT-ZXLB_TUMOR --normal-id ZXLT-ZXLB_NORMAL --vep-data /path/to/vep/cache/ --filter-vcf /path/to/vep/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz --ref-fasta /path/to/hs37d5/phase2_reference_assembly_sequence/hs37d5.fa --vep-path /path/to/miniconda2/bin
	params['input-vcf']  = samplevcf
	params['output-maf'] = "{{job.outdir}}/{{i.infile | fn}}-%s.maf" % sample
	params['vep-data']   = {{args.vepDb | quote}}
	params['vep-forks']  = {{args.nthread}}
	params['filter-vcf'] = {{args.filtervcf | quote}}
	params['ref-fasta']  = {{args.ref | quote}}
	params['vep-path']   = path.dirname(vep)

	cmd = cmd + '; {{args.vcf2maf}} --tumor-id %s %s' % (sample, cmdargs(params, equal=' '))
	cmds.append(cmd)

{% 		if args.nthread == 1 %}
for cmd in cmds: runcmd(cmd)
{% 		else %}
# Note the threads may be hanging on here.
p = Parallel({{args.nthread}})
p.run('{}', [(cmd,) for cmd in cmds])
{% 		endif %}

for i, sample in enumerate(samples):
	singlemaf = "{{job.outdir}}/{{i.infile | fn}}-%s.maf" % sample
	if i == 0:
		runcmd('cat "%s" > {{o.outfile | quote}}' % singlemaf)
	else:
		runcmd('egrep -v "^#|^Hugo_Symbol" "%s" >> {{o.outfile | quote}}' % singlemaf)
{% 	endif %}
{% endif %}
コード例 #4
0
ファイル: pVcfMerge.py プロジェクト: LeaveYeah/bioprocs
invcfs = {{i.infiles | repr}}
outfile = {{o.outfile | quote}}
nthread = {{args.nthread | int}}
joboutdir = {{job.outdir | quote}}
vcftools = {{args.vcftools | quote}}
gatk = {{args.gatk | quote}}
tabix = {{args.tabix | quote}}
ref = {{args.ref | quote}}
params = {{args.params | repr}}
tool = {{args.tool | quote}}

shell.TOOLS.vcftools = vcftools
shell.TOOLS.gatk = gatk

para = Parallel(nthread, raiseExc=True)
invcfs = para.run(vcfIndex, [(vcf, tabix) for vcf in invcfs])


def run_vcftools():
    params.d = params.get('d', True)
    params.t = params.get('t', True)
    params._ = invcfs
    params._stdout = outfile
    shell.Shell(equal=' ').vcftools(**params).run()


def run_gatk():
    params.T = 'CombineVariants'
    params.o = outfile
    params.R = ref
コード例 #5
0
exts = dict()
for sam in sam_meta:
	parts = sam['file_name'].split('.')
	ext = '.' + parts[-1]
	if ext == '.gz':
		ext = '.' + parts[-2] + ext
	exts      [sam['file_name']] = ext
	sample_ids[sam['file_name']] = sam['associated_entities'][0]['entity_submitter_id'][:15]

samfiles = []
for ext in set(exts.values()):
	samfiles += glob.glob (os.path.join(os.path.abspath(indir), "*" + ext))
	# or direct dir from TCGA download
	samfiles += glob.glob (os.path.join(os.path.abspath(indir), "*", "*" + ext))

lock = Lock()
def single(samfile):
	bn = os.path.basename (samfile)
	if not bn in sample_ids: return
	newfile = os.path.join (outdir, sample_ids[bn] + exts[bn])
	with lock:
		if os.path.exists (newfile):
			os.remove(newfile)
		if 'link' in method:
			os.symlink (samfile, newfile)
		elif method == 'copy':
			copyfile(samfile, newfile)

p = Parallel(nthread = nthread, backend = 'threading', raiseExc = True)
p.run(single, [(samfile,) for samfile in samfiles])
コード例 #6
0
            params = cmdargs(params, dash='-', equal=' ')
            runcmd(
                cmd.format(bedtools=bedtools, params=params, outfile=outfile))
            remove(outfile1)
            remove(outfile2)
            return outfile

        infile1 = tabindex(infile1, outdir)
        infile2 = tabindex(infile2, outdir)
        chroms = [
            chr.strip()
            for chr in check_output([tabix, '-l', infile1]).splitlines()
        ]

        if nthread > 1:
            p = Parallel(nthread, raiseExc=True)
            outfiles = p.run(runChrom,
                             [(infile1, infile2, chrom) for chrom in chroms])
            # make sure it's in the right order
            outfiles = sorted(outfiles,
                              key=lambda x: chroms.index(x.split('.')[-2]))
        else:
            outfiles = []
            for chrom in chroms:
                outfiles.append(runChrom(infile1, infile2, chrom))

        with open(outfile, 'a+') as fout:
            for of in outfiles:
                with open(of) as f:
                    fout.write(f.read())
コード例 #7
0
	reader = TsvReader(affysnps, cnames = False)
	# dir to save the split file and result file
	thdir  = path.join(outdir, 'bamrc.nthreads')
	if not path.exists(thdir):
		makedirs(thdir)
	
	asbname = path.basename(affysnps).split('.')[0]
	for i, dist in enumerate(dists):
		writer = TsvWriter(path.join(thdir, '{bname}.thread{i}.snp'.format(
			bname = asbname, i = i
		)))
		for _ in range(dist):
			writer.write(next(reader))
		writer.close()
	
	para   = Parallel(nthread, raiseExc = True)
	para.run(getAlleleCount, [
		(tumbam, path.join(
			thdir, '{bname}.thread{i}.snp'.format(bname = asbname, i = i)
		), path.join(
			thdir, '{tumbn}.thread{i}.bamrc'.format(tumbn = path.basename(tumbam), i = i)
		)) for i in range(nthread)
	])
	# merge to tumsnp
	writer = TsvWriter(tumsnp)
	writer.cnames = ['Chrm', 'pos', 'A', 'C', 'G', 'T', 'Total', 'refCount', 'mutCount']
	writer.writeHead(lambda cn: "#" + "\t".join(cn))
	for i in range(nthread):
		subrc = path.join(
			thdir, '{tumbn}.thread{i}.bamrc'.format(tumbn = path.basename(tumbam), i = i)
		)