def __init__(self,seq=''): self.seq = seq self.info = '' self.logP = 0 self.c_wmerbgs= None if seq: self.c_intA = MDsupport.seq2int(seq) self.compute_logP()
def EM_Cstart(self): verbose = self.verbose if verbose: print "Seeding models..." sys.stdout.flush() self.seed_models() #Initialize parameters if not self.param.has_key('gamma'): self.param['gamma'] = 0.2 timings = {'Probes':0, 'Background':0, 'C EM':0, 'Post':0} _time = time.time() for seq in self.seqs: P = Probe(seq) self.probes.append(P) _time2 = time.time(); timings['Probes'] = _time2-_time; _time = _time2 if verbose: print "Optimizing candidates by EM." if verbose: sys.stdout.flush() c_logZ_sets = {} for Model,i in zip(self.models,range(len(self.models))): width = Model.width self.calcmask(width) if not c_logZ_sets.has_key(width): c_logZs_set = [] if verbose: print "#%s |%s|"%(' '*28,'-'*len(self.seqs)) if verbose: sys.stdout.flush() if verbose: print "Computing background (width %2d) "%width, for P in self.probes: if verbose: sys.stdout.write('.') if verbose: sys.stdout.flush() logZs = self.all_Wmers(width,P) c_logZs = MDsupport.list2double(logZs) c_logZs_set.append(c_logZs) #P.c_wmerbgs = MDsupport.list2double(map(lambda x: x.logQtot, Wlist)) c_logZ_sets[width] = c_logZs_set if verbose: print c_logZ_set = c_logZ_sets[width] for P,c_logZs in zip(self.probes,c_logZ_set): P.c_wmerbgs = c_logZs _time2 = time.time() timings['Background'] = timings['Background'] +_time2-_time _time = _time2 '''Perform EM''' _time = time.time() newModel = self.EM_C(Model, self.probes) _time2 = time.time(); timings['C EM'] = timings['C EM'] + _time2-_time; _time = _time2 #print "cLL: ",newModel.joint #print "pLL: ",self.compute_joint(newModel,Wmers_by_seq) '''Was there a problem?''' if newModel == None: continue '''Set various things in PSSM''' #Distance(s) seeddist = MotifTools.infomaskdiff(newModel,Model) print '%s ----> %s'%(Model,newModel) print "Seed %2d: %s --> %s mask:%9.5f infoMask:%9.5f d:%9.5f"%( i, Model, newModel, MotifTools.maskdiff(newModel,Model), MotifTools.infomaskdiff(newModel,Model), #order is important Model-newModel) #Seed if Model.seedtxt: newModel.seedtxt = Model.seedtxt if Model.source: newModel.source = Model.source #newModel.denoise() newModel.seeddist = seeddist newModel.seednum = i print newModel newModel._print_p() newModel._print_ll() '''Set various things in Candidate (like a wrapper for PSSM)''' C = MotifCandidate() C.pssm = newModel.copy() #C.wmers = self.best_by_Z(Wmers_by_seq) C.wmers = [newModel.emit() for junk in range(20)] #C._update() #MAJOR REMOVAL????????? DBG 10-14-03 #C.MAPpurge() C.pssm = newModel.copy() self.candidates.append(C) _time2 = time.time(); timings['Post'] = timings['Post']+_time2-_time;_time = _time2 '''Print Timing Information''' if verbose: print "# Timing Information" _t = 0 for timing in timings.keys(): _t = _t + timings[timing] for timing in timings.keys(): print "# %12s %f %f%%"%(timing,timings[timing],timings[timing]*100/_t)