def Main(): ''' IO TEMPLATE ''' global args,out args=ParseArg() fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") G2RFP=defaultdict(init); ''' END OF IO TEMPLATE ''' m=[0.0 for i in xrange(200)] # 100 to 200 for b in TableIO.parse(IO.fopen(args.db,"r"),"bed6"): G2RFP[b.chr].append(b) total_reads=0; for i0,i in enumerate(TableIO.parse(fin,"bed6")): spectral=[0 for j in xrange(200)] # 100 to 200 for j in G2RFP[i.chr]: dis=i.start-j.start if(dis >=-100 and dis<100): # 50 to 100 spectral[dis+100]+=j.score # 50 to 100 spectral=norm(spectral) total_reads+=i.score m=[a*i.score+b for a,b in itertools.izip(spectral,m)] if i0%100==0: print("{} processed\r".format(i0),file=sys.stderr) print("pos\tvalue",file=out); for i,x in enumerate(m): print("{}\t{}".format(i,float(x)/total_reads),file=out)
def run(args): schema_template=schema_templates[args.input_format] SQL_template=insert_templates[args.input_format] db_filename=args.db out=IO.fopen(args.output,"w") if db_filename=="guess": db_filename=args.input.strip(".gz")+".db" db_is_new = not os.path.exists(db_filename) print("Database file : %s"%db_filename,file=out) with sqlite3.connect(db_filename) as conn: cursor=conn.cursor() if db_is_new: print ('Creating table %s if not exists\n________________________________'%args.table_name,file=out) S=schema_template.substitute({"table_name":args.table_name}) print (S,file=out) print ("_______________________________",file=out) cursor.execute(S) else: S=schema_template.substitute({"table_name":args.table_name}) cursor.execute(S) fin=IO.fopen(args.input,"r") S1=SQL_template.substitute({"table_name":args.table_name}) print(S1,file=out) s=TableIO.parse(args.input,"simple") cursor.executemany(S1,s) conn.commit() print("loaded",file=out)
def run(args): bedformat="bed"+str(args.bed_column_number) dbi=DBI.init(args.genome,"genome") out=IO.fopen(args.output,"w") for i in TableIO.parse(IO.fopen(args.input,"r"),bedformat): print (">",i.id+"_"+args.method,file=out) print (seq_wrapper(dbi.query(i,method=args.method)),file=out)
def run(args): dbi=DBI.init(args.bw,"bigwig") out=IO.fopen(args.output,"w") for i in TableIO.parse(IO.fopen(args.input,"r"),args.format): ht=[ j for j in dbi.query(i,method=args.method) ] print("QR",i,file=out) print("HT",ht,file=out)
def run(args): fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") r=[] m=0 ideograms=[] qr="" for i in TableIO.parse(fin,sep=","): if len(i)==1: a=i[0].split("\t") if len(a)==2: if args.query=="all" or args.query==qr: if m > 0: ideograms.append({"id":qr,"length":m}) qr=a[1].strip() else: if args.query=="all" or args.query==qr: r.append({"chr":qr,"start":i[0],"length":i[2],"value":i[1]}) m=int(i[0])+int(i[2]) if args.query=="all" or args.query==qr: if m > 0: ideograms.append({"id":qr,"length":m}) j={ "ideograms":ideograms, "tracks": [ { "name":args.input, "type":"bedgraph", "values":r } ] } print(json.dumps(j,indent=4),file=out)
def run(args): fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") if args.format=="guess": args.format=IO.guess_format(args.input) s=TableIO.parse(args.input,args.format) for i in s: print(i,file=out)
def parse_bed(f): a=[] for i in TableIO.parse(f,"bed12"): h={} for v,key in izip(i,hclass["bed12"]._fields): h[key]=v a.append(h) return a
def run(local_args): ''' IO TEMPLATE ''' global args,out args=local_args out=IO.fopen(args.output,"w") fin=IO.fopen(args.input,"r") print("# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION,file=out) print("# in bam2x ( https://github.com/nimezhu/bam2x )",file=out) print("# Date: ",time.asctime(),file=out) print("# The command line is :",file=out) print("#\t"," ".join(sys.argv),file=out) gene=DBI.init(args.genetab,"binindex",cls="bed12"); upstream_list=[] downstream_list=[] exons_list=[] introns_list=[] utr3_list=[] utr5_list=[] for g in gene: upstream_list.append(g.upstream(args.upstream)); downstream_list.append(g.downstream(args.downstream)); for e in g.Exons(): exons_list.append(e) for i in g.Introns(): introns_list.append(i) if not (g.utr3() is None): utr3_list.append(g.utr3()) if not (g.utr5() is None): utr5_list.append(g.utr5()) upstream=DBI.init(upstream_list,"binindex",cls="bed6") downstream=DBI.init(downstream_list,"binindex",cls="bed6") exons=DBI.init(exons_list,"binindex",cls="bed6") introns=DBI.init(introns_list,"binindex",cls="bed6") utr3=DBI.init(utr3_list,"binindex",cls="bed6") utr5=DBI.init(utr5_list,"binindex",cls="bed6") if args.format=="guess": args.format=IO.guess_format(args.input) for (i0,i) in enumerate(TableIO.parse(fin,args.format)): if i0==0: if isinstance(i,Bed12): print("#chr\tstart\tend\tname\tscore\tstrand\tthick_start\tthick_end\titem_rgb\tblock_count\tblock_sizes\tblock_starts\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5",file=out) else: print("#chr\tstart\tend\tname\tscore\tstrand\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5",file=out) print(i,file=out,end="") print("\t",toIDs(gene.query(i)),file=out,end="") print("\t",toIDs(upstream.query(i)),file=out,end="") print("\t",toIDs(downstream.query(i)),file=out,end="") print("\t",toIDs(exons.query(i)),file=out,end="") print("\t",toIDs(introns.query(i)),file=out,end="") print("\t",toIDs(utr3.query(i)),file=out,end="") print("\t",toIDs(utr5.query(i)),file=out)
def run(args): bedformat="bed"+str(args.bed_column_number) dbi=DBI.init(args.bam,"bam") out=IO.fopen(args.output,"w") for i in TableIO.parse(IO.fopen(args.input,"r"),bedformat): print("QR",i,file=out) for j in dbi.query(i,method=args.method): print("HT",j,file=out) print("",file=out)
def parse_orthologs(f): ortholog_fields=["lnc","lncGeneSymbol","ortholog","orthologGeneSymbol","alignNo","exonID","locusID","indelRate","lncExonsAlinged","orthExonsAligned","category"] a=[] for i in TableIO.parse(f): h={} for v,key in izip(i,ortholog_fields): h[key]=v a.append(h) return a
def _generate_db(filename,db_filename,table_name): with sqlite3.connect(db_filename) as conn: cursor=conn.cursor() S=schema_t.substitute({"table_name":table_name}) cursor.execute(S) LOAD_S=insert_t.substitute({"table_name":table_name}) s=TableIO.parse(IO.fopen(filename,"r"),"simple") cursor.executemany(LOAD_S,s) conn.commit()
def run(args): out=IO.fopen(args.output,"w") if args.annotation=="exon": for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"): for j in i.Exons(): print(j,file=out) elif args.annotation=="intron": for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"): for j in i.Introns(): print(j,file=out) elif args.annotation=="cds": for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"): j=i.cds() if j is not None and j.cdna_length() > 0: print(j,file=out) elif args.annotation=="cds": for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"): j=i.cds() if j is not None and j.cdna_length() > 0: print(j,file=out) elif args.annotation=="utr5": for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"): j=i.utr5() if j is not None and j.cdna_length() > 0: print(j,file=out) elif args.annotation=="utr3": for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"): j=i.utr3() if j is not None and j.cdna_length() > 0: print(j,file=out) elif args.annotation=="utr": for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"): j=i.utr5() if j is not None and j.cdna_length() > 0: print(j,file=out) j=i.utr3() if j is not None and j.cdna_length() > 0: print(j,file=out) elif args.annotation=="upstream": for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"): j=i.upstream(args.bp) print(j,file=out) elif args.annotation=="downstream": for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"): j=i.downstream(args.bp) print(j,file=out)
def run(args): bedformat = "bed" + str(args.bed_column_number) dbi = DBI.init(args.genome, "genome") out = IO.fopen(args.output, "w") for i in TableIO.parse(IO.fopen(args.input, "r"), bedformat): seq = dbi.query(i, method=args.method) if len(seq) == 0: continue print(">{}".format(i.id + "_" + args.method), file=out) print(seq_wrapper(seq), file=out, end="")
def run(args): # logging.basicConfig(level=logging.DEBUG) dbi = DBI.init(args.bam, "bam") out = IO.fopen(args.output, "w") for i in TableIO.parse(IO.fopen(args.input, "r"), "bed12"): print("QR\t", i, file=out) for j in dbi.query(i, method="bam1", strand=args.strand): if compatible_with_transcript(j, i): print("HT\t{}".format(_translate_to_meta(i, j)), file=out) elif not args.hit: print("OP\t{}".format(j), file=out) print("", file=out)
def run(args): out=IO.fopen(args.output,"w") cls=None if hclass.has_key(args.type): cls=hclass[args.type] dbi=DBI.init(args.db,"tabix",cls=cls) else: dbi=DBI.init(args.db,"tabix") for i in TableIO.parse(IO.fopen(args.input,"r"),args.format): print("QR",i,file=out) for j,ht in enumerate(dbi.query(i)): print("HT_{k}\t{ht}".format(k=j+1,ht=ht),file=out)
def run(args): if os.path.isfile(args.bed + ".tbi"): dbi = DBI.init(args.bed, "tabix", cls=BED12) else: dbi = DBI.init(args.bed, "binindex", cls=BED12) out = IO.fopen(args.output, "w") for i in TableIO.parse(IO.fopen(args.input, "r"), "bed12"): print("QR\t", i, file=out) for j in dbi.query(i): if compatible_with_transcript(j, i): print("HT\t{}".format(_translate_to_meta(i, j)), file=out) elif not args.hit: print("OP\t{}".format(j), file=out) print("", file=out)
def run(args): fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") beds=[i for i in TableIO.parse(fin,"bed12")] beds.sort() for i,x in enumerate(iter_cluster(beds)): id=find_prefix_consensus([i0.id for i0 in x[1]]) strand=find_consensus_strand([i0.strand for i0 in x[1]]) print("REGION\tCL_{index}\t{chr}\t{start}\t{end}\t{id}\t{score}\t{strand}".format(strand=strand,score=len(x[1]),chr=x[1][0].chr,start=x[1][0].start,end=x[0],index=str(i+1),id=id),file=out) for j,y in enumerate(greedy_iter_compatible_group(x[1])): print("\tGROUP{j}\t{bed}".format(j=j+1,bed=merge_beds(y,id="CL.{i}_GP.{j}".format(i=i+1,j=j+1))),file=out) for k,z in enumerate(sorted(y,key= lambda x0:x0.cdna_length(), reverse=True)): print("\t\tCL.{i}_GP.{j}_TR.{k}\t{l}\t{z}".format(i=i+1,j=j+1,k=k+1,l=z.cdna_length(),z=z),file=out)
def run(args): fin=IO.fopen(args.input,"r") outfile=args.input if not args.sorted: l = [ i for i in TableIO.parse(fin,args.format) ] l.sort() name=splitext(args.input) outfile = "{name[0]}.sorted{name[1]}".format(name=name) out = IO.fopen(outfile,"w") for i in l: print(i,file=out) out.close() format=args.format.translate(None,digits) tabix_index(outfile,preset=format)
def __init__(self,bamfiles,**dict): ''' ''' if type(bamfiles)==type("string"): filename=bamfiles bamfiles=[] for i in TableIO.parse(filename,"simple"): bamfiles.append(i[0]) self.bamfiles=[] for bamfile in bamfiles: if type(bamfile)==type("str"): try: bamfile=pysam.Samfile(bamfile,"rb") except: print >>sys.stderr,"WARNING: Can't init the bam file",bamfile self.bamfiles.append(bamfile)
def run(args): logging.basicConfig(level=logging.DEBUG) fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") bam=DBI.init(args.bam,"bam"); beds=[i for i in TableIO.parse(fin,"bed12")] beds.sort() bp=args.bp print("mapped:{}".format(bam.mapped)) print("unmapped:{}".format(bam.unmapped)) data={} for i,x in enumerate(iter_cluster(beds)): print("{}\t{}:{}-{}".format(i+1,x["chr"],x["start"]+1,x["stop"])) ''' cds=[z.cds() for z in x["beds"] if z.cds()] utr3=[z.utr3() for z in x["beds"] if z.utr3()] utr5=[z.utr5() for z in x["beds"] if z.utr5()] ''' coords = [ up_down_coordinate(gene,args.bp,args.bp) for gene in x["beds"] ] for j,y in enumerate(coords): data[y.id]={} data[y.id]["coord"]=y data[y.id]["values"]=[0.0 for l in range(y.cdna_length())]; coord_beds = [ _translate(coord,bed) for coord,bed in itertools.izip(coords,x["beds"])] for j,read in enumerate(bam.query(method="bam1",chr=x["chr"],start=x["start"]-args.bp,stop=x["stop"]+args.bp,strand=args.strand)): NM=getNM(read) # number of hits NC=0 # number of compatible c_coords=[] for k,coord in enumerate(coords): if overlap(read,coord) and compatible(read,coord): # don't consider the reads extend out of coords. NC+=1 c_coords.append(k) for k,c in enumerate(c_coords): coord=coords[c] if read.start < coord.start or read.stop > coord.stop: start=max(read.start,coord.start) stop=min(read.stop,coord.stop) read=read._slice(start,stop) read_in_coord = _translate(coord,read) for l in xrange(read_in_coord.start,read_in_coord.stop): data[coord.id]["values"][l]+=1.0/NC/NM for j,y in enumerate(coords): print(data[y.id]["coord"]) print(data[y.id]["values"]) '''
def run(args): fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") if args.format=="guess": args.format=IO.guess_format(args.input) s=TableIO.parse(args.input,args.format) l=[] for i,x in enumerate(s): if i/10000==0: logging.info("reading %s entrys in %s",i,args.input) l.append(x) logging.info("begin sorting") l.sort() logging.info("sorting done") for i in l: print(i,file=out) logging.info("completed")
def run(args): logging.basicConfig(level=logging.INFO) global bam,out bam=DBI.init(args.bam,"bam") fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") p=mp.Pool(processes=args.num_cpus) beds_list=[[] for i in xrange(args.num_cpus)] for i0,bed in enumerate(TableIO.parse(fin,"bed12")): beds_list[i0%args.num_cpus].append(bed) gene_num=i0+1 print("bin_id\tmean\tentropy\treverse_strand_mean\treverse_strand_entropy",file=out) up_results=p.map(count_flank_star,itertools.izip(beds_list,itertools.repeat(args.bp),itertools.repeat(args.strand),itertools.repeat(True))) output(up_results,args.bp,gene_num,"UP") results = p.map(count_list_star,itertools.izip(beds_list,itertools.repeat(args.bin_num),itertools.repeat(args.strand))) output(results,args.bin_num,gene_num,"TR") down_results=p.map(count_flank_star,itertools.izip(beds_list,itertools.repeat(args.bp),itertools.repeat(args.strand),itertools.repeat(False))) output(down_results,args.bp,gene_num,"DN")
def run(local_args): logging.basicConfig(level=logging.WARNING) global args,out,dbi_bam,g, MIN_INTRON_LENGTH, MIN_SPLICING_SITES_SCORE, MIN_FPK_RATIO,query_num MIN_INTRON_LENGTH=10 MIN_SPLICING_SITES_SCORE=2 ''' IO TEMPLATE ''' ''' mySorts={ 0:sort_by_intron_and_abundance, 1:sort_by_intron, 2:sort_by_abundance } ''' args=local_args #print "debug:",args.report_seq MIN_FPK_RATIO=args.min_uniq_fpk_increase #TO TEST fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") ''' END OF IO TEMPLATE ''' print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION, print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )" print >>out,"# Date: ",time.asctime() print >>out,"# The command line is :" print >>out,"#\t"," ".join(sys.argv) # header=["chr","start","end","id","score","strand","seq"]; # dbi_splicing_sites=DBI.init(args.splicing_sites,"tabix",tabix="metabed",header=header); #if args.format=="guess": # args.format=IO.guess_format(args.input) reader=TableIO.parse(fin,args.format) query_list=[] query_lists=[[] for i in range(args.num_cpus)] query_num=0 for i,x in enumerate(reader): query_lists[i%args.num_cpus].append(x) query_num=i+1 #querys(query_lists[0]) #DEBUG pool=Pool(processes=args.num_cpus) results=pool.map(querys,query_lists) #print results output(results)
def run(args): logging.basicConfig(level=logging.DEBUG) db_filename=args.translator t_name,t_ext=splitext(args.translator) ''' test if it is db file generate db file if it doesn't exists. ''' if t_ext!="db": #possible_db=args.translator.strip("\\.gz")+".db" possible_db=args.translator+".db" print(possible_db) if os.path.exists(possible_db): db_filename=possible_db else: _generate_db(args.translator,possible_db,args.table_name) db_filename=possible_db ''' query db file ''' out=IO.fopen(args.output,"w") with sqlite3.connect(db_filename) as conn: conn.row_factory=lambda conn,x: Bed12._make(Bed12._types(x[1:])) cursor=conn.cursor() for i in TableIO.parse(IO.fopen(args.input,"r"),"bed"): s=template.substitute({"table_name":args.table_name,"name":i.chr.strip()}) print(s) cursor.execute(s) gene=None try: gene=cursor.fetchone() logging.debug(i) logging.debug(i.cdna_length()) logging.debug(gene) logging.debug(gene.cdna_length()) except: raise logging.warning("can't find gene %s"%i.chr) continue assert gene.cdna_length() > i.cdna_length() print(reverse_translate(gene,i),file=out)
def iterate(fin): buf = fin.next() x = buf.split("\t")[1:] qr = BED12._make(BED12._types(x)) hits = [] overlap = [] i = 0 for x in TableIO.parse(fin): if x[0] == "QR": if i % 100 == 0: logging.info("processing " + str(i) + " genes") i += 1 yield qr, hits, overlap qr = BED12._make(BED12._types(x[1:])) hits = [] overlap = [] elif x[0] == "HT": hits.append(BED12._make(BED12._types(x[1:]))) elif x[0] == "OP": overlap.append(BED12._make(BED12._types(x[1:]))) yield qr, hits, overlap
def run(args): #logging.basicConfig(level=logging.DEBUG) dbi=DBI.init(args.bam,"bam") mapped=dbi.mapped out=IO.fopen(args.output,"w") print("Gene\tRPKM",file=out); for i in TableIO.parse(IO.fopen(args.input,"r"),"bed12"): print(i.id,"\t",end="",file=out) s=0.0 l=i.cdna_length() if args.uniq: for j in dbi.query(i,method="bam1",strand=args.strand,uniq=args.uniq): if compatible_with_transcript(j,i): s+=1.0 else: for j in dbi.query(i,method="bam1",strand=args.strand,uniq=args.uniq): if compatible_with_transcript(j,i): (nh,_,_)=j.itemRgb.split(",") nh=int(nh) s+=1.0/nh rpkm=float(s)*(1000000.0/mapped)*(1000.0/float(l)) print(rpkm,file=out)
def test(): if len(sys.argv)==1: print >>sys.stderr,"Usage: binindex.py file.bed" exit() a=TableIO.parse(sys.argv[1],'bed12') data=binindex(a) data2=binindex() bed=Bed("chr1",100000,2000000,".",".",".") for i in data.query(bed): print "before remove:",len(data) data.remove(i) print "after remove:",len(data) data2.append(i) print data2 for i in data2: print i print "data finalize:" data.merge(data2) print data print data+data2 print data print data.uniq() print data
def run(args): logging.basicConfig(level=logging.INFO) up=args.up down=args.down bp_num=up+down offset=-up bam=DBI.init(args.bam,"bam") fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") bin_sum=[0 for i in xrange(bp_num)] bin_e=[0.0 for i in xrange(bp_num)] bin_dis=[[] for i in xrange(bp_num)] for i0,bed in enumerate(TableIO.parse(fin,args.format)): bed_bin=[0 for i in xrange(bp_num)] if args.tts: pos=bed.tts() else: pos=bed.tss() pos_flank=get_flank_region(pos,up,down) for read in bam.query(pos_flank,"bam1",strand="read1"): a=translate_coordinates(pos,read) #print(a,file=out) for e in a.Exons(): #print(e,file=out) start=e.start-offset end=e.stop-offset if start < 0: start=0 if end > bp_num: end=bp_num for j in xrange(start,end): bed_bin[j]+=1 for i in xrange(bp_num): bin_sum[i]+=bed_bin[i] bin_dis[i].append(bed_bin[i]) bed_num=i0+1 for i in xrange(bp_num): bin_e[i]=gini_coefficient(bin_dis[i]) if args.tts: print("pos_to_tts\taggregation_mean\tgini_coefficient",file=out) else: print("pos_to_tss\taggregation_mean\tgini_coefficient",file=out) for i in xrange(bp_num): print("{bin}\t{aggregation}\t{E}".format(bin=i+offset,aggregation=float(bin_sum[i])/bed_num,E=bin_e[i]),file=out) try: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt matplotlib.rcParams.update({'font.size':9}) ax1=plt.subplot2grid((7,1),(6,0)) plt.ylabel('gini coeffecient') plt.fill_between(range(-up,down),bin_e,color="r",alpha=0.2,y2=0) ax1.set_ylim(0,1) ax1.set_xlim(-up,down) ax1.axes.get_xaxis().set_visible(False) plt.axvline(x=0,linewidth=1, color='y') ax2=plt.subplot2grid((7,1),(0,0),rowspan=5) ax2.set_xlim(-up,down) plt.plot(range(-up,down),[float(i)/bed_num for i in bin_sum]) plt.ylabel('mean coverage') if args.tts: plt.xlabel('pos to tts (bp)') else: plt.xlabel('pos to tss (bp)') plt.axvline(x=0,linewidth=1, color='y') plt.grid(True) plt.savefig(args.output+".png") except: pass
def run(args): logging.basicConfig(level=logging.INFO) def process(): if len(buff)==1: return 0 max_score=0.0 total_score=0.0 e=[] for i in buff: total_score+=i.score e.append(i.score) e=[i/total_score for i in e] gini=gini_coefficient(e) if total_score < args.min_reads_number: return 0 record={} meta=BED6(buff[0].chr,buff[0].start,buff[-1].stop,args.prefix+"."+str(group_id),total_score,buff[0].strand) peak=max(buff,key=lambda x:x.score) record["peak"]=peak._replace(score=peak.score/total_score) record["meta"]=meta._replace(strand=peak.strand) record["gini"]=gini records.append(record) return 1 def simple_output(): print("# formats: bayes_prob_model2, gini, [ region bed, score is total reads], [peak bed , score is proportion ]",file=out) for i,x in enumerate(records): print("{p2}\t{gini}\t{meta}\t".format(p2=p2[i],meta=x["meta"],gini=x["gini"]),end="",file=out) print(x["peak"],file=out) def bed12_output(): print("# formats: bed12 , [R,G,B] are corresponding to [ TTS_GINI_PVALUE*200, TSS_GINI_PALUE*200, PROPORTION_OF_PEAK*200 ]",file=out) for i,x in enumerate(records): if args.tts: g=0 r=int(p2[i]*200) else: g=int(p2[i]*200) r=0 b=int(x["gini"]*200) if p2[i]>0.5: meta=x["meta"]._replace(id=x["meta"].id+".end") else: meta=x["meta"] rgb="{r},{g},{b}".format(r=r,g=g,b=b) print("{bed6}\t{thickStart}\t{thickEnd}\t{itemRgb}\t{blockCount}\t{blockSizes}\t{blockStarts}".format(bed6=meta,thickStart=x["peak"].start,thickEnd=x["peak"].end,itemRgb=rgb,blockSizes=x["meta"].stop-x["meta"].start,blockCount=1,blockStarts=0),file=out) records=[] GAP=args.gap fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") iterator=TableIO.parse(fin,"bed6") last=iterator.next() last_stop=last.stop group_id=0 buff=[last] last_chr=last.chr for x,i in enumerate(iterator): if x%10000==0: logging.info("processing {x} reads".format(x=x)); if i.chr!=last_chr or i.start-last_stop > GAP: group_id+=process() buff=[i] last_chr=i.chr last_stop=i.stop else: buff.append(i) if i.stop>last_stop: last_stop=i.stop process() gini=array([i["gini"] for i in records]) model=fit_two_peaks_EM(gini) p2=bayes_p2(gini,model) print("# Date: ",time.asctime(),file=out) print("# Program Version ",VERSION,file=out) print("# The command line is :",file=out) print("#\t"," ".join(sys.argv),file=out) print("# learning model:",file=out) print("#",model_str(model),file=out) #simple_output() bed12_output()
import pyjsonrpc from bam2x.Annotation import BED12, BED6 import sqlite3 from bam2x.Struct import binindex from bam2x.DBI.Templates import select_template as template from bam2x.DBI.Templates import factories from bam2x import TableIO, Tools conn = sqlite3.connect("./jsonrpc/data/hg19_one_tr_per_gene.bed.db") conn.row_factory = factories["bed12"] cursor = conn.cursor() s = "select * from gene" cursor.execute(s) r = cursor.fetchall() h = {} DataS3 = binindex(TableIO.parse("./jsonrpc/data/DataS3.uniq.bed.gz", "bed6")) genelist = [i.id for i in r] print(genelist) for i in r: h[i.id] = i class RequestHandler(pyjsonrpc.HttpRequestHandler): @pyjsonrpc.rpcmethod def add(self, a, b): """Test method""" return a + b @pyjsonrpc.rpcmethod def branch(self, seq): x = branchpoint_predict(seq)
def run(args): fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") for i in TableIO.parse(fin,"bed12"): print(remove_small_introns(i,args.cutoff),file=out)
def toBed12(self,chr="unknown_chr",strand="read2",**dict): from bam2x import TableIO x=list() for i in TableIO.parse(self.reads,"bam2bed12",references=chr,strand=strand,**dict): x.append(i) return x