Ejemplo n.º 1
0
    def __init__(self,seed_seqs, all_seqs, width = 6, verbose = ''):
        self.seed_seqs  = seed_seqs #Sequences to be scanned for seeds
        self.seqs       = all_seqs
        self.candidates = []
        self.models     = []      #Set directly or computed from seed_seqs
        self.width      = width
        self.verbose    = verbose
        if width:
            self.goodwmersT = MotifTools.top_nmers(self.width,self.seed_seqs,1,"")
        else:
            self.goodwmersT = zip(self.seed_seqs,range(len(self.seed_seqs)))
        self.bgprob     = {'A': 0.31, 'C': .19, 'G': .19, 'T': .31}
        self.beta       = 0.001
        self.deltamin   = 1e-3
        self.probes     = []
        self.method     = "ZOOPS" # OOPS or ZOOPS )
        self.param      = {}
        self.gapflank   = 0
        self.gapweight  = 0.2
        self.seedbeta   = 0.02
        self.joint      = 1

        global theMarkovBackground
        if theMarkovBackground:
            self.bgprob = theMarkovBackground.zeroth()

        '''DELETE
Ejemplo n.º 2
0
def main():
    seqsD = Fasta.load(sys.argv[1])
    seqs = seqsD.values()
    for w in range(1, 7):
        allnmers = permute(w)
        nmersT = MotifTools.top_nmers(w, seqs, 'with counts', 'purge Ns')
        nmersD = {}
        total = 0
        for nmer in allnmers:
            nmersD[nmer] = 1  #Pseudo count
            total = total + 1
        for nmer, count in nmersT[:]:
            try:
                rc = MotifTools.revcomplement(nmer)
                nmersD[nmer] = nmersD[nmer] + count
                nmersD[rc] = nmersD[rc] + count
                total = total + 2 * count
            except KeyError:
                pass
        _t = nmersD.keys()
        _t.sort()
        print "# freq in %s (total %d with pseudocounts)" % (sys.argv[1],
                                                             total)
        for nmer in _t:
            print "%-7s %20.17f" % (nmer, float(nmersD[nmer]) / total)
        sys.stdout.flush()
Ejemplo n.º 3
0
def main():
    seqsD = Fasta.load(sys.argv[1])
    seqs  = seqsD.values()
    for w in range(1,7):
        allnmers = permute(w)
        nmersT = MotifTools.top_nmers(w,seqs,'with counts','purge Ns')
        nmersD = {}
        total = 0
        for nmer in allnmers:
            nmersD[nmer] = 1 #Pseudo count
            total = total + 1
        for nmer,count in nmersT[:]:
            try: 
                rc = MotifTools.revcomplement(nmer)
                nmersD[nmer] = nmersD[nmer] + count
                nmersD[rc]   = nmersD[rc]   + count
                total = total + 2*count
            except KeyError:
                pass
        _t = nmersD.keys()
        _t.sort()
        print "# freq in %s (total %d with pseudocounts)"%(sys.argv[1],total)
        for nmer in _t:
            print "%-7s %20.17f"%(nmer,float(nmersD[nmer]) / total)
        sys.stdout.flush()
Ejemplo n.º 4
0
def info2seeds(N,infofile,probefile,species='YEAST'):
    G    = ProbeSet(species)
    IDs  = G.ids_from_file(probefile)
    Q    = EM.theMarkovBackground.zeroth()
 
    seqs = Fasta.seqs(infofile)
    
    if not N:
        nmers = seqs
    else:
        nmers= MotifTools.top_nmers(N,seqs)
        if len(nmers) > 1000: nmers = nmers[0:1000]
        
    print "Scoring enrichment of %d nmers from %s"%len(nmers,infofile)
    sys.stdout.flush()
    
    nmers_scoresT = []
    for nmer in nmers:
        if nmer.isalpha():
            p = G.p_value(nmer,IDs,'') #'verbose'
            nmers_scoresT.append((nmer,p))
    nmers_scoresT.sort(lambda x,y: cmp(x[1],y[1]))
    last = min(20,len(nmers_scoresT))
    models = []
    for i in range(last):
        seq = nmers_scoresT[i][0]
        m = MotifTools.Motif('',Q)
        m.compute_from_text(seq,0.1)
        models.append(m)
    for tup in nmers_scoresT[0:40]:
        print tup
    return(models)
Ejemplo n.º 5
0
def main(fastafile, outDirectory):  # !! 1/2/09 AD added 'fastafile' var and changed 'if __name__' as way to call this from script.
    seqsD = Fasta.load(fastafile)
    seqs  = seqsD.values()
    
    output = []
    for w in range(1,7):
        allnmers = permute(w)
        nmersT = MotifTools.top_nmers(w,seqs,'with counts','purge Ns')
        nmersD = {}
        total = 0
        for nmer in allnmers:
            nmersD[nmer] = 1 #Pseudo count
            total = total + 1
        for nmer,count in nmersT[:]:
            try: 
                rc = MotifTools.revcomplement(nmer)
                nmersD[nmer] = nmersD[nmer] + count
                nmersD[rc]   = nmersD[rc]   + count
                total = total + 2*count
            except KeyError:
                pass
        _t = nmersD.keys()
        _t.sort()
        output.append("# freq in %s (total %d with pseudocounts)\n"%(fastafile.split('/')[-1],total))  # AD 02-27-09 added a '\n' to make file look right
        for nmer in _t:
            output.append( "%-7s %20.17f\n"%(nmer,float(nmersD[nmer]) / total))  # AD 02-27-09 added a '\n' to make file look right
        
        # open output file and write out results
        outFile = '%s/%s.freq' % (outDirectory, fastafile.split('/')[-1])
        outFile = open(outFile, 'w')
        for index in output:
            outFile.write(index)
Ejemplo n.º 6
0
def Reduce_Nmers(Info):
    print 'COMPUTING Nmers ....'
    mseqs = ReduceInfo2seqs(Info,70, lambda L: MotifTools.top_nmers(6,L)[0:3])
    print "Combining representative sequences...: "
    for i in range(len(mseqs)):
        i = i + 1
        print '\t%s'%mseqs[i-1],
        if (i%5 == 0): print
    print 

    top_seq_pairs = MotifTools.top_nmers(5,mseqs,1)
    total_nmers = 0
    for (mner,count) in top_seq_pairs:
        total_nmers = total_nmers + count
    for (nmer,count) in top_seq_pairs[0:8]:
        print "RESULT: %s\t%2d (%5.2f%%) occurences:  "%(nmer,count,
                                                         100*float(count)/total_nmers),
        for bsite in Info.query['bsites']:
            seq = bsite.cleantxt()
            (max,s1,s2) = MotifTools.compare_seqs(nmer,seq)
            print '   %s vs %s %4.2f correct'%(s1,s2,max)
Ejemplo n.º 7
0
 def freq_from_seqs_old(self,seqs):
     self.highestorder = 4
     for depth in range(1,6):
         nmersT = MotifTools.top_nmers(depth, seqs, "TUPLES")
         self.nmers_by_size[depth] = map(lambda x:x[0],nmersT)
         total = 0
         for nmer,count in nmersT:
             total = total + count
         for nmer,count in nmersT:
             rc = MotifTools.revcomplement(nmer)
             if nmer == rc:                       #correct top_nmers 
                 f   = float(count)/total         #palindrome count
             else:
                 f   = float(count)/total/2
             self.F[nmer] = f
             self.F[rc]   = f
     for depth in range(0):                       #For debugging
         total = 0
         for k in self.F.keys():
             if len(k) == depth:
                 total = total + self.F[k]
                 print k, self.F[k]
         print depth,total
Ejemplo n.º 8
0
 def study_seqs(self,seqs):
     for depth in range(1,6):
         nmersT = MotifTools.top_nmers(depth, seqs, "TUPLES")
         total = 0
         for nmer,count in nmersT:
             total = total + count
             rc = MotifTools.revcomplement(nmer)
         for nmer,count in nmersT:
             f   = math.log(float(count)/total)/math.log(2)
             f_2 = math.log(0.5 * float(count)/total)/math.log(2)
             rc = MotifTools.revcomplement(nmer)
             if rc != nmer:
                 self.D[nmer] = f_2
                 self.D[rc]   = f_2
             else:
                 self.D[nmer] = f
     for depth in range(0):
         total = 0
         for k in self.D.keys():
             if len(k) == depth:
                 total = total + pow(2,self.D[k])
                 print k, pow(2,self.D[k])
         print depth,total
     self.highestorder = 5
Ejemplo n.º 9
0
 def freq_from_seqs(self,seqs):
    self.highestorder = 6
    for w in range(1,7):
        allnmers = permute(w)
        nmersT = MotifTools.top_nmers(w,seqs,'with counts','purge Ns')
        self.nmers_by_size[w] = allnmers[:]
        nmersD = {}
        total = 0.0
        for nmer in allnmers: #Pseudo count
            nmersD[nmer] = 1 
            total = total + 1
        for nmer,count in nmersT:
            try: 
                rc = MotifTools.revcomplement(nmer)
                nmersD[nmer] = nmersD[nmer] + count
                nmersD[rc]   = nmersD[rc]   + count
                total = total + 2*count
            except KeyError:
                pass
        for nmer in nmersD.keys():
            rc = MotifTools.revcomplement(nmer)
            f  = nmersD[nmer]/total
            self.F[nmer] = f
            self.F[rc]   = f