def countMidpoints(operon,fastaindex): intervals = [] operon = interval_filter.overlaps(operon,fastaindex,backtran=False) operon = sorted(operon,key=lambda x:x[5]) #toxins = [t for t in operon if t[1].split('.')[0]=='toxin'] #starts,ends = zip(*operon)[5:7] gene = operon[0] strand = fasta.strand(fasta.getFrame(gene[0])) name = gene[0] st,end = map(int,gene[5:7]) front = st #if strand== "+": # st,end = min(map(int,starts)),min(map(int,ends)) # front = min(st,end) #else: # st,end = max(map(int,starts)),max(map(int,ends)) # front = max(st,end) #print "Operon",name,"Strand",strand for i in xrange(1,len(operon)): gene = operon[i] name = gene[0] cluster = gene[1] function = cluster.split('.')[0] st,end = map(int,gene[5:7]) mid = (st+end)/2 - front #print function,'st',st,'end',end,'front',front,'mid',mid intervals.append( (function,mid) ) return intervals
def listIntervals(operon,fastaindex): intervals = [] operon = interval_filter.overlaps(operon,fastaindex,False) operon = sorted(operon,key=lambda x:x[5]) toxins = [t for t in operon if t[1].split('.')[0]=='toxin'] starts,ends = zip(*toxins)[5:7] gene = operon[0] st,end = map(int,gene[5:7]) front = st name = gene[0] print "Functions",[gene[1].split('.')[0] for gene in operon ] strand = fasta.strand(fasta.getFrame(gene[0])) #if strand== "+": # st,end = min(map(int,starts)),min(map(int,ends)) # front = min(st,end) #else: # st,end = max(map(int,starts)),max(map(int,ends)) # front = max(st,end) for i in xrange(0,len(operon)): gene = operon[i] name = gene[0] cluster = gene[1] function = cluster.split('.')[0] st,end = map(int,gene[5:7]) if st>end: st,end = end,st intervals.append( (function,st-front,end-front) ) ints = [] for intv in intervals: func,st,end = intv interval = xrange(st,end) funcs = [func]*len(interval) ints+=zip(funcs,interval) return ints