예제 #1
0
파일: violinplot.py 프로젝트: mortonjt/Boa
def countMidpoints(operon,fastaindex):
    intervals = []
    operon = interval_filter.overlaps(operon,fastaindex,backtran=False)
    operon = sorted(operon,key=lambda x:x[5])
    #toxins = [t for t in operon if t[1].split('.')[0]=='toxin']
    #starts,ends = zip(*operon)[5:7]
    gene = operon[0]
    strand = fasta.strand(fasta.getFrame(gene[0]))
    name = gene[0]
    st,end = map(int,gene[5:7])
    front = st
    #if strand== "+":
    #    st,end = min(map(int,starts)),min(map(int,ends)) 
    #    front = min(st,end)
    #else:
    #    st,end = max(map(int,starts)),max(map(int,ends)) 
    #    front = max(st,end)
    
    #print "Operon",name,"Strand",strand
    for i in xrange(1,len(operon)):
        gene = operon[i]
        name = gene[0]
        cluster = gene[1]
        function = cluster.split('.')[0]
        st,end = map(int,gene[5:7])
        mid = (st+end)/2 - front
        #print function,'st',st,'end',end,'front',front,'mid',mid
        intervals.append( (function,mid) )
    return intervals
예제 #2
0
파일: violinplot.py 프로젝트: mortonjt/Boa
def listIntervals(operon,fastaindex):
    intervals = []
    
    operon = interval_filter.overlaps(operon,fastaindex,False)
    operon = sorted(operon,key=lambda x:x[5])
    toxins = [t for t in operon if t[1].split('.')[0]=='toxin']
    starts,ends = zip(*toxins)[5:7]
    gene = operon[0]
    st,end = map(int,gene[5:7])
    front = st
    name = gene[0]
    print "Functions",[gene[1].split('.')[0] for gene in operon ]
    strand = fasta.strand(fasta.getFrame(gene[0]))
    #if strand== "+":
    #    st,end = min(map(int,starts)),min(map(int,ends)) 
    #    front = min(st,end)
    #else:
    #    st,end = max(map(int,starts)),max(map(int,ends)) 
    #    front = max(st,end)
        
    for i in xrange(0,len(operon)):
        gene = operon[i]
        name = gene[0]
        cluster = gene[1]
        function = cluster.split('.')[0]
        st,end = map(int,gene[5:7])
        if st>end: st,end = end,st
        intervals.append( (function,st-front,end-front) )
    ints = []
    for intv in intervals:
        func,st,end = intv
        interval = xrange(st,end)
        funcs = [func]*len(interval)
        ints+=zip(funcs,interval)
    return ints