# save the data file # expfile """ S1 S2 .. Sn G1 ... G2 ... """ expreader = TsvReader(expfile) expdata = [r for r in expreader if r[0] in genes or r[0] in tfs] expreader.close() datawriter = TsvWriter(outdata) for i, cname in enumerate(expreader.cnames): if i == 0: # genes + tfs datawriter.cnames = [r[0] for r in expdata] datawriter.writeHead() else: datawriter.write([cname] + [r[i] for r in expdata]) datawriter.close() del expdata genes = [g for g in genes if g in datawriter.cnames] tfs = [g for g in tfs if g in datawriter.cnames] genetfs = {g: [tf for tf in gtfs if tf in tfs] for g, gtfs in genetfs.items() if g in genes} # save the group file # mutfile """ S1 S2 .. Sn M1 ... (0/1/2/NA) M2 ...
indata1 = TsvReader(infile1, **inopts1) indata2 = TsvReader(infile2, **inopts2) cnames1 = indata1.meta if not rnames1 else indata1.meta[1:] cnames2 = indata2.meta if not rnames2 else indata2.meta[1:] paired = list(set(cnames1) & set(cnames2)) cnames1 = cnames2 = paired if rnames1: cnames1 = [indata1.meta[0]] + cnames1 if rnames2: cnames2 = [indata2.meta[0]] + cnames2 cindex1 = [indata1.meta.index(c) for c in cnames1] cindex2 = [indata2.meta.index(c) for c in cnames2] outdata1 = TsvWriter(outfile1) outdata2 = TsvWriter(outfile2) outdata1.meta = cnames1 outdata2.meta = cnames2 outdata1.writeHead() outdata2.writeHead() for r1 in indata1: outdata1.write(r1[i] for i in cindex1) outdata1.close() for r2 in indata2: outdata2.write(r2[i] for i in cindex2) outdata2.close()
cmdparams = [] params.thresh = pval params.verbosity = 4 for motif, name in motifs.items(): params.oc = path.join(outdir, name + '.' + re.sub(r'[^\w_]', '', motif)) params.motif = motif params[""] = [tfmotifs, sfile] cmdparams.append((meme, cmdargs(params, dash = '--', equal = ' '))) Parallel(nthread, raiseExc = True).run('{} {}', cmdparams) writer = TsvWriter(outfile) writer.cnames = [ "CHR", "START", "END", "NAME", "SCORE", "STRAND", "MOTIF", "SEQ", "STARTONSEQ", "STOPONSEQ", "RAWSCORE", "PVAL", "QVAL", "MATCHEDSEQ", "UCSCLINK" ] writer.writeHead(callback = lambda cnames: "#" + "\t".join(cnames)) def rowfactory(r): r.PVAL = float(r['p-value']) if r.PVAL >= pval: return None r.RAWSCORE = r.score try: r.SCORE = int(float(r.score) * 10) except TypeError: r.SCORE = 0 r.STRAND = r.strand r.MOTIF = r.motif_id # split motif_alt_id # GENE or GENE::chr1:111-222 or ::chr1:111-222 or chr1:111-222 r.SEQ = r.sequence_name
outfile = {{ o.outfile | quote}} outdir = {{ o.outdir | quote}} params = {{ args.params | repr}} idxfile = {{ args.idxfile | quote}} kallisto = {{ args.kallisto | quote}} nthread = {{ args.nthread | repr}} shell.TOOLS.kallisto = kallisto params.i = idxfile params.o = outdir params.t = nthread params._ = [fq1, fq2] kallisto = shell.Shell(subcmd = True).kallisto kallisto.quant(**params).run() imfile = path.join(outdir, 'abundance.tsv') reader = TsvReader(imfile) writer = TsvWriter(outfile) writer.cnames = ['target_id', 'est_counts'] writer.writeHead() for r in reader: r.target_id = r.target_id.split('::')[0] try: r.est_counts = int(round(float(r.est_counts))) except TypeError: r.est_counts = 0 writer.write(r) writer.close()
for _ in range(dist): writer.write(next(reader)) writer.close() para = Parallel(nthread, raiseExc = True) para.run(getAlleleCount, [ (tumbam, path.join( thdir, '{bname}.thread{i}.snp'.format(bname = asbname, i = i) ), path.join( thdir, '{tumbn}.thread{i}.bamrc'.format(tumbn = path.basename(tumbam), i = i) )) for i in range(nthread) ]) # merge to tumsnp writer = TsvWriter(tumsnp) writer.cnames = ['Chrm', 'pos', 'A', 'C', 'G', 'T', 'Total', 'refCount', 'mutCount'] writer.writeHead(lambda cn: "#" + "\t".join(cn)) for i in range(nthread): subrc = path.join( thdir, '{tumbn}.thread{i}.bamrc'.format(tumbn = path.basename(tumbam), i = i) ) reader = TsvReader(subrc, cnames = False) for r in reader: writer.write(r.values()) reader.close() writer.close() # normal para.run(getAlleleCount, [ (normbam, path.join( thdir, '{bname}.thread{i}.snp'.format(bname = asbname, i = i) ), path.join(