Exemple #1
0
 def __init__(self,seq=''):
     self.seq    = seq
     self.info   = ''
     self.logP   = 0
     self.c_wmerbgs= None
     if seq:
         self.c_intA = MDsupport.seq2int(seq)
         self.compute_logP()
Exemple #2
0
    def EM_Cstart(self):
        verbose = self.verbose
        if verbose:
            print "Seeding models..."
            sys.stdout.flush()
        self.seed_models()

        #Initialize parameters
        if not self.param.has_key('gamma'): self.param['gamma'] = 0.2
        timings = {'Probes':0, 'Background':0, 'C EM':0, 'Post':0}
        _time = time.time()

        for seq in self.seqs:
            P = Probe(seq)
            self.probes.append(P)


        _time2 = time.time(); timings['Probes'] = _time2-_time; _time = _time2

        if verbose: print "Optimizing candidates by EM."
        if verbose: sys.stdout.flush()

        c_logZ_sets = {}
        for Model,i in zip(self.models,range(len(self.models))):
            width = Model.width

            self.calcmask(width)
              
            if not c_logZ_sets.has_key(width):
                c_logZs_set = []
                if verbose: print "#%s   |%s|"%(' '*28,'-'*len(self.seqs))
                if verbose: sys.stdout.flush()
                if verbose: print "Computing background (width %2d)  "%width,
                for P in self.probes:
                    if verbose: sys.stdout.write('.')
                    if verbose: sys.stdout.flush()
                    logZs = self.all_Wmers(width,P)
                    c_logZs = MDsupport.list2double(logZs)
                    c_logZs_set.append(c_logZs)
                    #P.c_wmerbgs = MDsupport.list2double(map(lambda x: x.logQtot, Wlist))
                c_logZ_sets[width] = c_logZs_set
                if verbose: print

            c_logZ_set = c_logZ_sets[width]
            for P,c_logZs in zip(self.probes,c_logZ_set):
                P.c_wmerbgs = c_logZs
                
            _time2 = time.time()
            timings['Background'] = timings['Background'] +_time2-_time
            _time = _time2


            '''Perform EM'''
            _time  = time.time()
            newModel = self.EM_C(Model, self.probes)
            _time2 = time.time(); timings['C EM'] = timings['C EM'] + _time2-_time; _time = _time2

            #print "cLL: ",newModel.joint
            #print "pLL: ",self.compute_joint(newModel,Wmers_by_seq)

            '''Was there a problem?'''
            if newModel == None:
                continue


            '''Set various things in PSSM'''
            #Distance(s)
            seeddist = MotifTools.infomaskdiff(newModel,Model)
            print '%s ----> %s'%(Model,newModel)
            print "Seed %2d: %s  -->  %s  mask:%9.5f  infoMask:%9.5f d:%9.5f"%(
                i, Model, newModel,
                MotifTools.maskdiff(newModel,Model),
                MotifTools.infomaskdiff(newModel,Model), #order is important
                Model-newModel)
            #Seed
            if Model.seedtxt: newModel.seedtxt = Model.seedtxt
            if Model.source:  newModel.source  = Model.source
            
            #newModel.denoise()
            newModel.seeddist = seeddist
            newModel.seednum  = i
            print newModel
            newModel._print_p()
            newModel._print_ll()

            '''Set various things in Candidate (like a wrapper for PSSM)'''
            C = MotifCandidate()
            C.pssm = newModel.copy()
            #C.wmers = self.best_by_Z(Wmers_by_seq)
            C.wmers  = [newModel.emit() for junk in range(20)]
            #C._update()  #MAJOR REMOVAL????????? DBG 10-14-03
            #C.MAPpurge()
            C.pssm = newModel.copy()  
            self.candidates.append(C)
            _time2 = time.time(); timings['Post'] = timings['Post']+_time2-_time;_time = _time2

        '''Print Timing Information'''
        if verbose:
            print "# Timing Information"
            _t = 0
            for timing in timings.keys():
                _t = _t + timings[timing]
            for timing in timings.keys():
                print "# %12s %f  %f%%"%(timing,timings[timing],timings[timing]*100/_t)