Ejemplo n.º 1
0
Archivo: DB.py Proyecto: yu68/bam2x
 def __init__(self,file,**dict):
     '''
     Wrapped in xplib.DBI.init()
     '''
     if type(file)==type([1,2,3]):
         f=file
     else:
         format=dict['format']
         f=TableIO.parse(file,format)
     self.data=binindex(f)
Ejemplo n.º 2
0
 def __init__(self,file,**dict):
     '''
     Wrapped in xplib.DBI.init()
     '''
     if type(file)==type([1,2,3]):
         f=file
     else:
         format=dict['format']
         f=TableIO.parse(file,format)
     self.data=binindex(f)
Ejemplo n.º 3
0
def Main():
    global args, out
    args = ParseArg()
    if args.output == "stdout":
        out = sys.stdout
    else:
        try:
            out = open(args.output, "w")
        except IOError:
            print >> sys.stderr, "can't open file ", args.output, "to write. Using stdout instead"
            out = sys.stdout
    argv = sys.argv
    argv[0] = argv[0].split("/")[-1]
    print >> out, "# This data was generated by program ", argv[
        0], "(version %s)" % VERSION,
    print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )"
    print >> out, "# Date: ", time.asctime()
    print >> out, "# The command line is :\n#\t", " ".join(argv)
    db_format = args.db_format
    if len(db_format) == 1:
        db_format = [db_format[0] for i in range(len(args.db))]
    data = binindex()
    for i, f in enumerate(args.db):
        for item in TableIO.parse(f, db_format[i]):
            flag = 0
            for feat in data.query(item):
                if args.pos:
                    if feat.start == item.start and feat.stop == item.stop:
                        flag = 1
                elif feat == item:  #define in Class.__cmp__
                    flag = 1
            if not flag:
                data.append(item)
    data_list = []
    for i in data:
        data_list.append(i)
    data_list.sort()
    for i in data_list:
        print >> out, i
Ejemplo n.º 4
0
def Main():
    global args,out
    args=ParseArg()
    if args.output=="stdout":
        out=sys.stdout
    else:
        try:
            out=open(args.output,"w")
        except IOError:
            print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead"
            out=sys.stdout
    argv=sys.argv
    argv[0]=argv[0].split("/")[-1]
    print >>out,"# This data was generated by program ",argv[0],"(version %s)"%VERSION,
    print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )"
    print >>out,"# Date: ",time.asctime()
    print >>out,"# The command line is :\n#\t"," ".join(argv)
    db_format=args.db_format
    if len(db_format)==1:
        db_format=[db_format[0] for i in range(len(args.db))]
    data=binindex()
    for i,f in enumerate(args.db):
        for item in TableIO.parse(f,db_format[i]):
            flag=0
            for feat in data.query(item):
                if args.pos:
                    if feat.start==item.start and feat.stop==item.stop:
                        flag=1
                elif feat==item: #define in Class.__cmp__
                    flag=1
            if not flag:
                data.append(item)
    data_list=[]
    for i in data:
        data_list.append(i)
    data_list.sort()
    for i in data_list:
        print >>out,i
def Main():
    args=ParseArg()

    hist_n=args.hist_n
    clu_n=args.clu_n
    File=args.input


    #read emission matrix and store in Rpy2
    print "#Reading emission matrix from"
    emission=args.emission
    print '\t'+emission
    robjects.r("emission=read.table('"+emission+"',header=T,sep='\t')")
    robjects.r("emission=emission[c(12,11,13,8,7,10,6,9,4,5,2,1,3,15,14),match(c('H3K4me3','H3K4me2','H3K4me1','H3K27me3','H3K36me3','H3K27ac','H2AZ'),colnames(emission))]")
    state_n=robjects.r("dim(emission)[1]")[0] # number of chromatin state
    
    color_state=['red','pink','purple','DarkOrange','Orange','Gold','yellow','DeepSkyBlue','ForestGreen','Green','Lime','GreenYellow','LightCyan','white','white']


    #Find overall distribution of all chromatin states
    print "Counting distribution of chromatin states..."
    chromHMM_segment = TableIO.parse(args.segment,'bed')
    #count represent overall probability distribution of all chromatin states
    count=np.zeros(state_n)
    num=0
    for segment in chromHMM_segment:
        num=num+1
        i=int(segment.id[1:])
        count[i-1]+=(segment.stop-segment.start)/200
        print 'Reading %d segments... [for distribution of chromatin states]'%(num),'\r',
    print


    ## read and index histone pattern data for single nucleosomes in all populations
    print "Indexing histone pattern data for single nucleosomes in all populations..."
    data=TableIO.parse(File,'metabed',header=True)


    ## generate bed file for chromatin states in nucleosomes to be uploaded in UCSC genome browser
    if args.bed:
        name=os.path.basename(File).split('.')[0]
        outbed=open(name+"_State_browser.bed",'w')
        print "## Start generate BED9 file for uploading..."
        print >>outbed,'track name="ChromatinState" description="'+name+'" visibility=2 itemRgb="On"'
        #print >>outbed,'chr\tstart\tend\t'+'\t'.join('P_%d'%(s+1) for s in range(clu_n))

        for n,i in enumerate(data):
            matrix=np.array(str(i).split('\t')[8:(8+hist_n*clu_n)],dtype="int").reshape(hist_n,clu_n,order="F")  # matrix of histone patterns, row: histone, column: population
            if n % 50000 == 0:
                print "\tWriting %dth nucleosomes into BED9 file,\r"%(n),
            line='\t'.join (str(f) for f in [i.chr,i.start,i.stop])
            for k in range(clu_n):
                state=histone2state(matrix.T[k],count)
                color_code=','.join (str(int(f)) for f in np.array(matplotlib.colors.colorConverter.to_rgb(color_state[state-1]))*255)
                print >>outbed,'\t'.join (str(f) for f in [i.chr,i.start,i.stop,'P_%d_%d'%(k+1,state),0,'.',i.start,i.stop,color_code])
                line=line+'\t%d'%(state)
            #print >>outbed,line
        outbed.close()
        sys.exit(1)


    # read region information
    region=args.region
    chro=region.split(":")[0]
    start=int(region.split(":")[1].split("-")[0])
    end=int(region.split(":")[1].split("-")[1])
    print "#Query region:["+chro+": %d-%d]"%(start,end)


    y_nucle=0.47 #location of nucleosome line

    
    ## query data in region
    dbi=binindex(data)
    query=dbi.query(Bed([chro,start,end]))

    ## initialize figure 
    fig=plt.figure(figsize=(10,6))

    ax = plt.subplot(111,frameon=False,yticks=[])
    ax.set_xlim(start-(end-start)/6,end)
    n=0
    print "##Start draw nucleosomes:"

    #################################################
    ## draw genes from y = y_nucle+0.04*(clu_n+1) 
    
    #### index the gene.tab file

    print "  ## drawing gene track ..."
    print "    ## Indexing gene.tab ..."
    gene_dbi=DBI.init(args.genetab,'genebed')


    print "    ## query regions from gene.tab"
    query_gene=gene_dbi.query(Bed([chro,start,end]))
    #### determine height of gene track    
    bottoms=[0 for i in range(100)]
    max_index=0
    for i in query_gene:
        index=0
        while(1):
            if i.start > bottoms[index]:
                bottoms[index]=i.stop
                if max_index<index: max_index=index
                break
            else:
                index+=1
    gene_track_number=max_index+1
    gene_track_height=0.03*gene_track_number+0.02
    ax.set_ylim(0.05,1+gene_track_height+0.01) 
    
    print "    ## start draw gene track"
    # add frame for gene track
    rect=matplotlib.patches.Rectangle((start,y_nucle+0.04),end-start, gene_track_height, edgecolor='black',fill=False)
    ax.add_patch(rect)
    
    bottoms=[0 for i in range(100)]
    for i in gene_dbi.query(Bed([chro,start,end])):
        index=0
        while(1):
            if i.start > bottoms[index]:
                addGeneToFig(i,ax,start,end,1,0.03*index+y_nucle+0.05)
                bottoms[index]=i.stop
                break
            index+=1

 
    ################################################# 
    
    top_heatmap_y = 0.71+gene_track_height # the y axis value for bottom of top heatmaps 

    print "##  Draw nucleosome tracks..."
    for i in query:
        n=n+1
        print "  Nucleosome %d\t at "%(n)+chro+": %d-%d"%(i.start,i.stop)
        matrix=np.array(str(i).split('\t')[8:(8+hist_n*clu_n)],dtype="int").reshape(hist_n,clu_n,order="F")  # matrix of histone patterns, row: histone, column: population
        prob=np.array(str(i).split('\t')[(8+hist_n*clu_n):],dtype=float)

        ax.plot([i.smt_pos,i.smt_pos],[y_nucle+0.03,y_nucle],color='r') #red nucleosome midpoint
        rect=matplotlib.patches.Rectangle((i.start,y_nucle), i.stop-i.start, 0.03, color='#EB70AA') #pink nucleosome region
        ax.add_patch(rect)

        for j in range(clu_n):
            state=histone2state(matrix.T[j],count)
            state_rect=matplotlib.patches.Rectangle((i.start,y_nucle+0.04*(j+1)+gene_track_height+0.01), i.stop-i.start, 0.03, color=color_state[state-1])
            ax.add_patch(state_rect)

    
        im = OffsetImage(matrix, interpolation='nearest',zoom=10/(1+gene_track_height+0.01),cmap=plt.cm.binary,alpha=0.5)
        

        if n<=9:
            xybox=((n+0.5)/10.0,top_heatmap_y)
            xy = [i.smt_pos,y_nucle+0.04*clu_n+0.03+gene_track_height+0.01]
            xytext=((n+0.7)/10.0,top_heatmap_y)
            c_style="bar,angle=180,fraction=-0.1"
        elif n<=18:
            xybox=((n-9+0.5)/10.0,0.2)
            xy = [i.smt_pos,y_nucle]
            xytext = ((n-9+0.7)/10.0,0.40)
            c_style="bar,angle=180,fraction=-0.1"
        else:
            print "WARN: nucleosome number larger than 18 in this region, only plot the pattern for first 18 nucleosomes"
            break

        ab = AnnotationBbox(im, xy,
                            xybox=xybox,
                            xycoords='data',
                            boxcoords=("axes fraction", "data"),
                            box_alignment=(0.,0.),
                            pad=0.1)
        ax.annotate("",xy,
                    xytext=xytext,
                    xycoords='data',
                    textcoords=("axes fraction", "data"),
                    arrowprops=dict(arrowstyle="->",connectionstyle=c_style))
                        #arrowprops=None)
    
        ax.add_artist(ab)
        
        # add mark for histone mark and regions with low confidence
        for i in range(hist_n):
            if prob[i]<0.6:
                xy_star=tuple(map(sum,zip(xybox,(0.065,0.03*(hist_n-1-i)-0.01))))
                ax.annotate("*",xy=xy_star,xycoords=("axes fraction", "data"),color='red')


    ax.annotate('Nucleosome:', xy=(start-(end-start)/6, y_nucle),  xycoords='data',size=12)
    ax.annotate('Epigenetic Pattern:', xy=(start-(end-start)/6, 0.23+top_heatmap_y),  xycoords='data',size=12)
    ax.annotate(chro, xy=(start-(end-start)/6, 0.1),  xycoords='data',size=12)

    name=open(File).readline().split('\t')[8:(8+hist_n)]
    for n,i in enumerate(name):
        ax.annotate(i.split("_")[0],xy=(start-(end-start)/8, top_heatmap_y+0.03*(hist_n-1-n)),xycoords='data',size=10)
        ax.annotate(i.split("_")[0],xy=(start-(end-start)/8, 0.2+0.03*(hist_n-1-n)),xycoords='data',size=10)

    # flame for nucleosome and chromatin state tracks
    rect=matplotlib.patches.Rectangle((start,y_nucle),end-start, 0.03, edgecolor='black',fill=False)
    ax.add_patch(rect)    
    for k in range(clu_n):
        rect=matplotlib.patches.Rectangle((start,y_nucle+0.04*(k+1)+gene_track_height+0.01),end-start, 0.03, edgecolor='grey',fill=False)
        ax.add_patch(rect)
        ax.annotate('Population%d'%(k+1),xy=(start-(end-start)/6, y_nucle+0.04*(k+1)+gene_track_height+0.01),xycoords='data',size=12)

    # chromatin state legend
    for s in range(state_n):
        dist=(end-start)*1.0/state_n 
        length=dist*0.75
        rect=matplotlib.patches.Rectangle((start+dist*s,0.1), length, 0.03, color=color_state[s])
        ax.add_patch(rect)
        ax.annotate(s+1,xy=(start+dist*s+length/3,0.075),xycoords='data',size=10) 
    ax.annotate("Chromatin states:",xy=(start,0.14),xycoords='data',size=12)      
    ax.add_patch(matplotlib.patches.Rectangle((start-length/6,0.07),end-start, 0.1, edgecolor='grey',fill=False))

    plt.title("Region: ["+chro+": %d-%d]"%(start,end),size=14)
    plt.savefig(args.output)
    plt.close()