Esempio n. 1
0
def averagemotifs(motifs,ovlp=2,template=None,DFUNC=negcommonbitsrange,VERBOSE=1,prop=''):
    if not template: 
        Dmat = computeDmat(motifs)
        idx  = centroididx(Dmat)
        template = motifs[idx]

    for m in motifs:
        off, rc = minshortestoverhangdiff(template,m,OVLP(template,m),'want_offset',DFUNC=DFUNC)
        m.offset = off
        m.rc     = rc
        #Find most negative offset
    offsets = [m.offset for m in motifs]             ; offsets.sort()
    maxposs = [(m.offset + m.width) for m in motifs] ; maxposs.sort()
    minpos = -offsets[0]
    maxpos = maxposs[-1] + minpos
    pmotifs = []
    for m in motifs:
        if m.rc: _m = m.revcomp()
        else   : _m = m
        leftpad  = minpos + m.offset
        rightpad = maxpos - (leftpad + m.width)
        padded   = _m[-leftpad,_m.width+rightpad]
        #print '%s%s%s\t%s'%('*'*leftpad,_m.oneletter,'*'*rightpad,padded)
        pmotifs.append(padded)
    AVE = MotifTools.sum(pmotifs,[])
    if VERBOSE:
        for m in pmotifs:
            d = minshortestoverhangdiff(AVE,m,OVLP(AVE,m),DFUNC=DFUNC)
            print '%s   %5.3f'%(m.oneletter,d),
            if m.__dict__.has_key('key'): print m.key,
            if prop and m.__dict__.has_key(prop): print m.__dict__[prop],
            print
        print '-'*m.width
    return AVE
Esempio n. 2
0
def alignAndCombineMotifs(motifs, weights):
    # zip motifs and weights
    simMotifs = zip(motifs, weights)
    # sort by weights
    simMotifs.sort(key=lambda x: abs(x[1]))
    simMotifs.reverse()
    
    aligned = alignSimilarMotifs([x[0] for x in simMotifs], minoverlap=4)
    #print '--'
    #for each in aligned: print each.oneletter
    #print '\n'
    
    comboMotif = MotifTools.sum(aligned,[-x[1] for x in simMotifs])
    return comboMotif
Esempio n. 3
0
def averagemotifs(motifs,
                  ovlp=2,
                  template=None,
                  DFUNC=negcommonbitsrange,
                  VERBOSE=1,
                  prop=''):
    if not template:
        Dmat = computeDmat(motifs)
        idx = centroididx(Dmat)
        template = motifs[idx]

    for m in motifs:
        off, rc = minshortestoverhangdiff(template,
                                          m,
                                          OVLP(template, m),
                                          'want_offset',
                                          DFUNC=DFUNC)
        m.offset = off
        m.rc = rc
        #Find most negative offset
    offsets = [m.offset for m in motifs]
    offsets.sort()
    maxposs = [(m.offset + m.width) for m in motifs]
    maxposs.sort()
    minpos = -offsets[0]
    maxpos = maxposs[-1] + minpos
    pmotifs = []
    for m in motifs:
        if m.rc: _m = m.revcomp()
        else: _m = m
        leftpad = minpos + m.offset
        rightpad = maxpos - (leftpad + m.width)
        padded = _m[-leftpad, _m.width + rightpad]
        #print '%s%s%s\t%s'%('*'*leftpad,_m.oneletter,'*'*rightpad,padded)
        pmotifs.append(padded)
    AVE = MotifTools.sum(pmotifs, [])
    if VERBOSE:
        for m in pmotifs:
            d = minshortestoverhangdiff(AVE, m, OVLP(AVE, m), DFUNC=DFUNC)
            print '%s   %5.3f' % (m.oneletter, d),
            if m.__dict__.has_key('key'): print m.key,
            if prop and m.__dict__.has_key(prop): print m.__dict__[prop],
            print
        print '-' * m.width
    return AVE
Esempio n. 4
0
# TAMOify kmers and logify pVals
for i in range(len(testMotifs)):
    testMotifs[i] = (Motif(testMotifs[i][0]),numpy.log10(float(testMotifs[i][1])))
    
# Sort on log'd pVals
testMotifs.sort(key=lambda x: x[1])

comboMotifs = []

for i in range(0,int(len(testMotifs)*0.2)):
    simMotifs  = getKmersWithOneMisMtch(testMotifs[i][0],testMotifs) 
    alndMotifs = alignSimilarMotifs([x[0] for x in simMotifs])
    #for m in simMotifs:
        #print m[0].oneletter
    comboMotifs.append(MotifTools.sum(alndMotifs,[-x[1] for x in simMotifs])) # -x[1] to convert neg logs to pos weights
    print len(comboMotifs)

t2 = time.time()    

oFile = '/Users/biggus/Documents/James/Collaborations/Campbell/data/Results_HyperGeoScreen/masked/Results_gGEMS/CCupAt4Days.gte2x.5-16mers.shfSeq.3.gGEMS.tmo'
pFile = '/Users/biggus/Documents/James/Collaborations/Campbell/data/Results_HyperGeoScreen/masked/Results_gGEMS/CCupAt4Days.gte2x.5-16mers.shfSeq.3.gGEMS.pkl'
MotifTools.save_motifs(comboMotifs,oFile,kmer_count=60)

pFile = open(pFile, 'w')
cPickle.dump(comboMotifs,pFile)
t3 = time.time()    
print 'Calculations took %.3f min.\nWriting/Pickling took %.3f min.' % ((float(t2)-t1)/60, (float(t3)-t2)/60) 
    

Esempio n. 5
0
from TAMO import MotifTools

motifs = ['TGATACA',
          'TGATAAA',
          'TGACAAA',
          'AGATACA',
          'AGATACG',]
for i in range(len(motifs)):
    motifs[i] = MotifTools.Motif_from_text(motifs[i])

weights = [3,
           7,
           2,
           2,
           1,]

noWeight = MotifTools.sum(motifs)
withWeights = MotifTools.sum(motifs, weights)

print "nW:\n%s\n" % (noWeight.printlogo())
print "wW:\n%s" % (withWeights.printlogo())
x=1
Esempio n. 6
0
        if mtf.width > lMotifLen:
            lMotifLen = mtf.width
    for i in range(len(alignedMotifs)):
        if alignedMotifs[i].width < lMotifLen:
            alignedMotifs[i] = alignedMotifs[i][0,lMotifLen]
        
        
    
    return alignedMotifs


m = MotifTools.load('/Users/biggus/Documents/James/Collaborations/Campbell/data/Results_HyperGeoScreen/masked/Results_gGEMS/CCupAt4Days.6-8mers.gGEMS.top6.motifs.stdThresh.tmo')
w = [5.8952,
     5.6523,
     5.0585,
     4.9788,
     4.9678,
     4.7688]

twoFive = [[m[0],m[1],m[4]],[w[0],w[1],w[4]]]

alndMotifs = alignSimilarMotifs(twoFive[0], minoverlap=4)
for m in alndMotifs:
    print m.oneletter
    
sumdMotif = MotifTools.sum(alndMotifs)

#bKmers = sumdMotif.bogus_kmers()
#for k in bKmers:
    #print k
None