def averagemotifs(motifs,ovlp=2,template=None,DFUNC=negcommonbitsrange,VERBOSE=1,prop=''): if not template: Dmat = computeDmat(motifs) idx = centroididx(Dmat) template = motifs[idx] for m in motifs: off, rc = minshortestoverhangdiff(template,m,OVLP(template,m),'want_offset',DFUNC=DFUNC) m.offset = off m.rc = rc #Find most negative offset offsets = [m.offset for m in motifs] ; offsets.sort() maxposs = [(m.offset + m.width) for m in motifs] ; maxposs.sort() minpos = -offsets[0] maxpos = maxposs[-1] + minpos pmotifs = [] for m in motifs: if m.rc: _m = m.revcomp() else : _m = m leftpad = minpos + m.offset rightpad = maxpos - (leftpad + m.width) padded = _m[-leftpad,_m.width+rightpad] #print '%s%s%s\t%s'%('*'*leftpad,_m.oneletter,'*'*rightpad,padded) pmotifs.append(padded) AVE = MotifTools.sum(pmotifs,[]) if VERBOSE: for m in pmotifs: d = minshortestoverhangdiff(AVE,m,OVLP(AVE,m),DFUNC=DFUNC) print '%s %5.3f'%(m.oneletter,d), if m.__dict__.has_key('key'): print m.key, if prop and m.__dict__.has_key(prop): print m.__dict__[prop], print print '-'*m.width return AVE
def alignAndCombineMotifs(motifs, weights): # zip motifs and weights simMotifs = zip(motifs, weights) # sort by weights simMotifs.sort(key=lambda x: abs(x[1])) simMotifs.reverse() aligned = alignSimilarMotifs([x[0] for x in simMotifs], minoverlap=4) #print '--' #for each in aligned: print each.oneletter #print '\n' comboMotif = MotifTools.sum(aligned,[-x[1] for x in simMotifs]) return comboMotif
def averagemotifs(motifs, ovlp=2, template=None, DFUNC=negcommonbitsrange, VERBOSE=1, prop=''): if not template: Dmat = computeDmat(motifs) idx = centroididx(Dmat) template = motifs[idx] for m in motifs: off, rc = minshortestoverhangdiff(template, m, OVLP(template, m), 'want_offset', DFUNC=DFUNC) m.offset = off m.rc = rc #Find most negative offset offsets = [m.offset for m in motifs] offsets.sort() maxposs = [(m.offset + m.width) for m in motifs] maxposs.sort() minpos = -offsets[0] maxpos = maxposs[-1] + minpos pmotifs = [] for m in motifs: if m.rc: _m = m.revcomp() else: _m = m leftpad = minpos + m.offset rightpad = maxpos - (leftpad + m.width) padded = _m[-leftpad, _m.width + rightpad] #print '%s%s%s\t%s'%('*'*leftpad,_m.oneletter,'*'*rightpad,padded) pmotifs.append(padded) AVE = MotifTools.sum(pmotifs, []) if VERBOSE: for m in pmotifs: d = minshortestoverhangdiff(AVE, m, OVLP(AVE, m), DFUNC=DFUNC) print '%s %5.3f' % (m.oneletter, d), if m.__dict__.has_key('key'): print m.key, if prop and m.__dict__.has_key(prop): print m.__dict__[prop], print print '-' * m.width return AVE
# TAMOify kmers and logify pVals for i in range(len(testMotifs)): testMotifs[i] = (Motif(testMotifs[i][0]),numpy.log10(float(testMotifs[i][1]))) # Sort on log'd pVals testMotifs.sort(key=lambda x: x[1]) comboMotifs = [] for i in range(0,int(len(testMotifs)*0.2)): simMotifs = getKmersWithOneMisMtch(testMotifs[i][0],testMotifs) alndMotifs = alignSimilarMotifs([x[0] for x in simMotifs]) #for m in simMotifs: #print m[0].oneletter comboMotifs.append(MotifTools.sum(alndMotifs,[-x[1] for x in simMotifs])) # -x[1] to convert neg logs to pos weights print len(comboMotifs) t2 = time.time() oFile = '/Users/biggus/Documents/James/Collaborations/Campbell/data/Results_HyperGeoScreen/masked/Results_gGEMS/CCupAt4Days.gte2x.5-16mers.shfSeq.3.gGEMS.tmo' pFile = '/Users/biggus/Documents/James/Collaborations/Campbell/data/Results_HyperGeoScreen/masked/Results_gGEMS/CCupAt4Days.gte2x.5-16mers.shfSeq.3.gGEMS.pkl' MotifTools.save_motifs(comboMotifs,oFile,kmer_count=60) pFile = open(pFile, 'w') cPickle.dump(comboMotifs,pFile) t3 = time.time() print 'Calculations took %.3f min.\nWriting/Pickling took %.3f min.' % ((float(t2)-t1)/60, (float(t3)-t2)/60)
from TAMO import MotifTools motifs = ['TGATACA', 'TGATAAA', 'TGACAAA', 'AGATACA', 'AGATACG',] for i in range(len(motifs)): motifs[i] = MotifTools.Motif_from_text(motifs[i]) weights = [3, 7, 2, 2, 1,] noWeight = MotifTools.sum(motifs) withWeights = MotifTools.sum(motifs, weights) print "nW:\n%s\n" % (noWeight.printlogo()) print "wW:\n%s" % (withWeights.printlogo()) x=1
if mtf.width > lMotifLen: lMotifLen = mtf.width for i in range(len(alignedMotifs)): if alignedMotifs[i].width < lMotifLen: alignedMotifs[i] = alignedMotifs[i][0,lMotifLen] return alignedMotifs m = MotifTools.load('/Users/biggus/Documents/James/Collaborations/Campbell/data/Results_HyperGeoScreen/masked/Results_gGEMS/CCupAt4Days.6-8mers.gGEMS.top6.motifs.stdThresh.tmo') w = [5.8952, 5.6523, 5.0585, 4.9788, 4.9678, 4.7688] twoFive = [[m[0],m[1],m[4]],[w[0],w[1],w[4]]] alndMotifs = alignSimilarMotifs(twoFive[0], minoverlap=4) for m in alndMotifs: print m.oneletter sumdMotif = MotifTools.sum(alndMotifs) #bKmers = sumdMotif.bogus_kmers() #for k in bKmers: #print k None