Beispiel #1
0
# save the data file
# expfile
"""
	S1	S2	..	Sn
G1	...
G2	...
"""
expreader  = TsvReader(expfile)
expdata    = [r for r in expreader if r[0] in genes or r[0] in tfs]
expreader.close()
datawriter = TsvWriter(outdata)
for i, cname in enumerate(expreader.cnames):
	if i == 0:
		# genes + tfs
		datawriter.cnames = [r[0] for r in expdata]
		datawriter.writeHead()
	else:
		datawriter.write([cname] + [r[i] for r in expdata])
datawriter.close()
del expdata
genes = [g for g in genes if g in datawriter.cnames]
tfs   = [g for g in tfs if g in datawriter.cnames]

genetfs = {g: [tf for tf in gtfs if tf in tfs] for g, gtfs in genetfs.items() if g in genes}

# save the group file
# mutfile
"""
	S1	S2	..	Sn
M1	... (0/1/2/NA)
M2	...
Beispiel #2
0
indata1 = TsvReader(infile1, **inopts1)
indata2 = TsvReader(infile2, **inopts2)

cnames1 = indata1.meta if not rnames1 else indata1.meta[1:]
cnames2 = indata2.meta if not rnames2 else indata2.meta[1:]
paired  = list(set(cnames1) & set(cnames2))
cnames1 = cnames2 = paired

if rnames1:
	cnames1 = [indata1.meta[0]] + cnames1
if rnames2:
	cnames2 = [indata2.meta[0]] + cnames2

cindex1 = [indata1.meta.index(c) for c in cnames1]
cindex2 = [indata2.meta.index(c) for c in cnames2]

outdata1 = TsvWriter(outfile1)
outdata2 = TsvWriter(outfile2)
outdata1.meta = cnames1
outdata2.meta = cnames2
outdata1.writeHead()
outdata2.writeHead()

for r1 in indata1:
	outdata1.write(r1[i] for i in cindex1)
outdata1.close()

for r2 in indata2:
	outdata2.write(r2[i] for i in cindex2)
outdata2.close()
Beispiel #3
0
	cmdparams        = []
	params.thresh    = pval
	params.verbosity = 4
	for motif, name in motifs.items():
		params.oc    = path.join(outdir, name + '.' + re.sub(r'[^\w_]', '', motif))
		params.motif = motif
		params[""]   = [tfmotifs, sfile]
		cmdparams.append((meme, cmdargs(params, dash = '--', equal = ' ')))
	Parallel(nthread, raiseExc = True).run('{} {}', cmdparams)

	writer = TsvWriter(outfile)
	writer.cnames = [
		"CHR", "START", "END", "NAME", "SCORE", "STRAND", "MOTIF", "SEQ", "STARTONSEQ",
		"STOPONSEQ", "RAWSCORE", "PVAL", "QVAL", "MATCHEDSEQ", "UCSCLINK"
	]
	writer.writeHead(callback = lambda cnames: "#" + "\t".join(cnames))

	def rowfactory(r):
		r.PVAL       = float(r['p-value'])
		if r.PVAL >= pval:
			return None
		r.RAWSCORE = r.score
		try:
			r.SCORE = int(float(r.score) * 10)
		except TypeError:
			r.SCORE = 0
		r.STRAND   = r.strand
		r.MOTIF    = r.motif_id
		# split motif_alt_id
		# GENE or GENE::chr1:111-222 or ::chr1:111-222 or chr1:111-222
		r.SEQ = r.sequence_name
Beispiel #4
0
outfile = {{ o.outfile | quote}}
outdir  = {{ o.outdir | quote}}
params = {{ args.params | repr}}
idxfile = {{ args.idxfile | quote}}
kallisto = {{ args.kallisto | quote}}
nthread = {{ args.nthread | repr}}

shell.TOOLS.kallisto = kallisto
params.i = idxfile
params.o = outdir
params.t = nthread
params._ = [fq1, fq2]

kallisto = shell.Shell(subcmd = True).kallisto
kallisto.quant(**params).run()

imfile        = path.join(outdir, 'abundance.tsv')
reader        = TsvReader(imfile)
writer        = TsvWriter(outfile)
writer.cnames = ['target_id', 'est_counts']
writer.writeHead()

for r in reader:
	r.target_id = r.target_id.split('::')[0]
	try:
		r.est_counts = int(round(float(r.est_counts)))
	except TypeError:
		r.est_counts = 0
	writer.write(r)
writer.close()
Beispiel #5
0
		for _ in range(dist):
			writer.write(next(reader))
		writer.close()
	
	para   = Parallel(nthread, raiseExc = True)
	para.run(getAlleleCount, [
		(tumbam, path.join(
			thdir, '{bname}.thread{i}.snp'.format(bname = asbname, i = i)
		), path.join(
			thdir, '{tumbn}.thread{i}.bamrc'.format(tumbn = path.basename(tumbam), i = i)
		)) for i in range(nthread)
	])
	# merge to tumsnp
	writer = TsvWriter(tumsnp)
	writer.cnames = ['Chrm', 'pos', 'A', 'C', 'G', 'T', 'Total', 'refCount', 'mutCount']
	writer.writeHead(lambda cn: "#" + "\t".join(cn))
	for i in range(nthread):
		subrc = path.join(
			thdir, '{tumbn}.thread{i}.bamrc'.format(tumbn = path.basename(tumbam), i = i)
		)
		reader = TsvReader(subrc, cnames = False)
		for r in reader:
			writer.write(r.values())
		reader.close()
	writer.close()

	# normal
	para.run(getAlleleCount, [
		(normbam, path.join(
			thdir, '{bname}.thread{i}.snp'.format(bname = asbname, i = i)
		), path.join(