def __init__(self,file,**dict): ''' Wrapped in xplib.DBI.init() ''' if type(file)==type([1,2,3]): f=file else: format=dict['format'] f=TableIO.parse(file,format) self.data=binindex(f)
def Main(): global args, out args = ParseArg() if args.output == "stdout": out = sys.stdout else: try: out = open(args.output, "w") except IOError: print >> sys.stderr, "can't open file ", args.output, "to write. Using stdout instead" out = sys.stdout argv = sys.argv argv[0] = argv[0].split("/")[-1] print >> out, "# This data was generated by program ", argv[ 0], "(version %s)" % VERSION, print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )" print >> out, "# Date: ", time.asctime() print >> out, "# The command line is :\n#\t", " ".join(argv) db_format = args.db_format if len(db_format) == 1: db_format = [db_format[0] for i in range(len(args.db))] data = binindex() for i, f in enumerate(args.db): for item in TableIO.parse(f, db_format[i]): flag = 0 for feat in data.query(item): if args.pos: if feat.start == item.start and feat.stop == item.stop: flag = 1 elif feat == item: #define in Class.__cmp__ flag = 1 if not flag: data.append(item) data_list = [] for i in data: data_list.append(i) data_list.sort() for i in data_list: print >> out, i
def Main(): global args,out args=ParseArg() if args.output=="stdout": out=sys.stdout else: try: out=open(args.output,"w") except IOError: print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead" out=sys.stdout argv=sys.argv argv[0]=argv[0].split("/")[-1] print >>out,"# This data was generated by program ",argv[0],"(version %s)"%VERSION, print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )" print >>out,"# Date: ",time.asctime() print >>out,"# The command line is :\n#\t"," ".join(argv) db_format=args.db_format if len(db_format)==1: db_format=[db_format[0] for i in range(len(args.db))] data=binindex() for i,f in enumerate(args.db): for item in TableIO.parse(f,db_format[i]): flag=0 for feat in data.query(item): if args.pos: if feat.start==item.start and feat.stop==item.stop: flag=1 elif feat==item: #define in Class.__cmp__ flag=1 if not flag: data.append(item) data_list=[] for i in data: data_list.append(i) data_list.sort() for i in data_list: print >>out,i
def Main(): args=ParseArg() hist_n=args.hist_n clu_n=args.clu_n File=args.input #read emission matrix and store in Rpy2 print "#Reading emission matrix from" emission=args.emission print '\t'+emission robjects.r("emission=read.table('"+emission+"',header=T,sep='\t')") robjects.r("emission=emission[c(12,11,13,8,7,10,6,9,4,5,2,1,3,15,14),match(c('H3K4me3','H3K4me2','H3K4me1','H3K27me3','H3K36me3','H3K27ac','H2AZ'),colnames(emission))]") state_n=robjects.r("dim(emission)[1]")[0] # number of chromatin state color_state=['red','pink','purple','DarkOrange','Orange','Gold','yellow','DeepSkyBlue','ForestGreen','Green','Lime','GreenYellow','LightCyan','white','white'] #Find overall distribution of all chromatin states print "Counting distribution of chromatin states..." chromHMM_segment = TableIO.parse(args.segment,'bed') #count represent overall probability distribution of all chromatin states count=np.zeros(state_n) num=0 for segment in chromHMM_segment: num=num+1 i=int(segment.id[1:]) count[i-1]+=(segment.stop-segment.start)/200 print 'Reading %d segments... [for distribution of chromatin states]'%(num),'\r', print ## read and index histone pattern data for single nucleosomes in all populations print "Indexing histone pattern data for single nucleosomes in all populations..." data=TableIO.parse(File,'metabed',header=True) ## generate bed file for chromatin states in nucleosomes to be uploaded in UCSC genome browser if args.bed: name=os.path.basename(File).split('.')[0] outbed=open(name+"_State_browser.bed",'w') print "## Start generate BED9 file for uploading..." print >>outbed,'track name="ChromatinState" description="'+name+'" visibility=2 itemRgb="On"' #print >>outbed,'chr\tstart\tend\t'+'\t'.join('P_%d'%(s+1) for s in range(clu_n)) for n,i in enumerate(data): matrix=np.array(str(i).split('\t')[8:(8+hist_n*clu_n)],dtype="int").reshape(hist_n,clu_n,order="F") # matrix of histone patterns, row: histone, column: population if n % 50000 == 0: print "\tWriting %dth nucleosomes into BED9 file,\r"%(n), line='\t'.join (str(f) for f in [i.chr,i.start,i.stop]) for k in range(clu_n): state=histone2state(matrix.T[k],count) color_code=','.join (str(int(f)) for f in np.array(matplotlib.colors.colorConverter.to_rgb(color_state[state-1]))*255) print >>outbed,'\t'.join (str(f) for f in [i.chr,i.start,i.stop,'P_%d_%d'%(k+1,state),0,'.',i.start,i.stop,color_code]) line=line+'\t%d'%(state) #print >>outbed,line outbed.close() sys.exit(1) # read region information region=args.region chro=region.split(":")[0] start=int(region.split(":")[1].split("-")[0]) end=int(region.split(":")[1].split("-")[1]) print "#Query region:["+chro+": %d-%d]"%(start,end) y_nucle=0.47 #location of nucleosome line ## query data in region dbi=binindex(data) query=dbi.query(Bed([chro,start,end])) ## initialize figure fig=plt.figure(figsize=(10,6)) ax = plt.subplot(111,frameon=False,yticks=[]) ax.set_xlim(start-(end-start)/6,end) n=0 print "##Start draw nucleosomes:" ################################################# ## draw genes from y = y_nucle+0.04*(clu_n+1) #### index the gene.tab file print " ## drawing gene track ..." print " ## Indexing gene.tab ..." gene_dbi=DBI.init(args.genetab,'genebed') print " ## query regions from gene.tab" query_gene=gene_dbi.query(Bed([chro,start,end])) #### determine height of gene track bottoms=[0 for i in range(100)] max_index=0 for i in query_gene: index=0 while(1): if i.start > bottoms[index]: bottoms[index]=i.stop if max_index<index: max_index=index break else: index+=1 gene_track_number=max_index+1 gene_track_height=0.03*gene_track_number+0.02 ax.set_ylim(0.05,1+gene_track_height+0.01) print " ## start draw gene track" # add frame for gene track rect=matplotlib.patches.Rectangle((start,y_nucle+0.04),end-start, gene_track_height, edgecolor='black',fill=False) ax.add_patch(rect) bottoms=[0 for i in range(100)] for i in gene_dbi.query(Bed([chro,start,end])): index=0 while(1): if i.start > bottoms[index]: addGeneToFig(i,ax,start,end,1,0.03*index+y_nucle+0.05) bottoms[index]=i.stop break index+=1 ################################################# top_heatmap_y = 0.71+gene_track_height # the y axis value for bottom of top heatmaps print "## Draw nucleosome tracks..." for i in query: n=n+1 print " Nucleosome %d\t at "%(n)+chro+": %d-%d"%(i.start,i.stop) matrix=np.array(str(i).split('\t')[8:(8+hist_n*clu_n)],dtype="int").reshape(hist_n,clu_n,order="F") # matrix of histone patterns, row: histone, column: population prob=np.array(str(i).split('\t')[(8+hist_n*clu_n):],dtype=float) ax.plot([i.smt_pos,i.smt_pos],[y_nucle+0.03,y_nucle],color='r') #red nucleosome midpoint rect=matplotlib.patches.Rectangle((i.start,y_nucle), i.stop-i.start, 0.03, color='#EB70AA') #pink nucleosome region ax.add_patch(rect) for j in range(clu_n): state=histone2state(matrix.T[j],count) state_rect=matplotlib.patches.Rectangle((i.start,y_nucle+0.04*(j+1)+gene_track_height+0.01), i.stop-i.start, 0.03, color=color_state[state-1]) ax.add_patch(state_rect) im = OffsetImage(matrix, interpolation='nearest',zoom=10/(1+gene_track_height+0.01),cmap=plt.cm.binary,alpha=0.5) if n<=9: xybox=((n+0.5)/10.0,top_heatmap_y) xy = [i.smt_pos,y_nucle+0.04*clu_n+0.03+gene_track_height+0.01] xytext=((n+0.7)/10.0,top_heatmap_y) c_style="bar,angle=180,fraction=-0.1" elif n<=18: xybox=((n-9+0.5)/10.0,0.2) xy = [i.smt_pos,y_nucle] xytext = ((n-9+0.7)/10.0,0.40) c_style="bar,angle=180,fraction=-0.1" else: print "WARN: nucleosome number larger than 18 in this region, only plot the pattern for first 18 nucleosomes" break ab = AnnotationBbox(im, xy, xybox=xybox, xycoords='data', boxcoords=("axes fraction", "data"), box_alignment=(0.,0.), pad=0.1) ax.annotate("",xy, xytext=xytext, xycoords='data', textcoords=("axes fraction", "data"), arrowprops=dict(arrowstyle="->",connectionstyle=c_style)) #arrowprops=None) ax.add_artist(ab) # add mark for histone mark and regions with low confidence for i in range(hist_n): if prob[i]<0.6: xy_star=tuple(map(sum,zip(xybox,(0.065,0.03*(hist_n-1-i)-0.01)))) ax.annotate("*",xy=xy_star,xycoords=("axes fraction", "data"),color='red') ax.annotate('Nucleosome:', xy=(start-(end-start)/6, y_nucle), xycoords='data',size=12) ax.annotate('Epigenetic Pattern:', xy=(start-(end-start)/6, 0.23+top_heatmap_y), xycoords='data',size=12) ax.annotate(chro, xy=(start-(end-start)/6, 0.1), xycoords='data',size=12) name=open(File).readline().split('\t')[8:(8+hist_n)] for n,i in enumerate(name): ax.annotate(i.split("_")[0],xy=(start-(end-start)/8, top_heatmap_y+0.03*(hist_n-1-n)),xycoords='data',size=10) ax.annotate(i.split("_")[0],xy=(start-(end-start)/8, 0.2+0.03*(hist_n-1-n)),xycoords='data',size=10) # flame for nucleosome and chromatin state tracks rect=matplotlib.patches.Rectangle((start,y_nucle),end-start, 0.03, edgecolor='black',fill=False) ax.add_patch(rect) for k in range(clu_n): rect=matplotlib.patches.Rectangle((start,y_nucle+0.04*(k+1)+gene_track_height+0.01),end-start, 0.03, edgecolor='grey',fill=False) ax.add_patch(rect) ax.annotate('Population%d'%(k+1),xy=(start-(end-start)/6, y_nucle+0.04*(k+1)+gene_track_height+0.01),xycoords='data',size=12) # chromatin state legend for s in range(state_n): dist=(end-start)*1.0/state_n length=dist*0.75 rect=matplotlib.patches.Rectangle((start+dist*s,0.1), length, 0.03, color=color_state[s]) ax.add_patch(rect) ax.annotate(s+1,xy=(start+dist*s+length/3,0.075),xycoords='data',size=10) ax.annotate("Chromatin states:",xy=(start,0.14),xycoords='data',size=12) ax.add_patch(matplotlib.patches.Rectangle((start-length/6,0.07),end-start, 0.1, edgecolor='grey',fill=False)) plt.title("Region: ["+chro+": %d-%d]"%(start,end),size=14) plt.savefig(args.output) plt.close()