class Organism(object): """Emulate an ESTReMo-style organism""" def __init__(self,paragraph_text,background_string,overshooting=True, background_scaling_factor=100, ncr_size=32): """Initialize an organism from a paragraph and a background""" paragraph_lines = paragraph_text.split("\n") self.paragraph = Paragraph(paragraph_lines) self.targets = [seq.tProb for seq in self.paragraph.seq_data] self.motif = self.paragraph.motif() self.original_motif = self.motif[:] #copy for backup against mutations self.recognizer = self.paragraph.recognizer self.background = background_string self.memo_background_Z = None self.overshooting = overshooting self.background_scaling_factor = background_scaling_factor self.ncr_size = 32 # samples * scaling factor = effective background def fitness(self): site_energies = [self.recognizer.binding_energy(site) for site in self.motif] w = len(self.motif[0]) n = len(self.background) background_energies = [self.recognizer.binding_energy(self.background[i:i+w]) for i in range(n-w+1)] foreground_Z = sum([exp(-beta * energy) for energy in site_energies]) Z = foreground_Z + self.background_Z() site_occupancies = [exp(-beta*energy)/Z for energy in site_energies] print "site occupancies:",site_occupancies return sum(self.site_fitness(occ,target) for (occ,target) in zip(site_occupancies,self.targets)) def background_Z(self): if not self.memo_background_Z is None: return self.memo_background_Z else: w = len(self.motif[0]) n = len(self.background) samples = n / self.background_scaling_factor print "samples:",samples be = self.recognizer.binding_energy bkgd_energies = [be(random_substring(self.background,w)) for i in range(samples)] self.memo_background_Z = sum([exp(-beta * energy) for energy in bkgd_energies]) * self.background_scaling_factor return self.memo_background_Z def site_fitness(self,occupancy,target): # print "occupancy:",occupancy # print "target:",target delta = 0.17 eta = 0.02 M = 1.8 Ky = 0.4 #Params come from config file Z = occupancy * M L = Ky / ((delta / eta) * (1 - target)**2 - 1) g = delta * ((Z * L) / (Ky + L)) - eta * (Z / (1 - (Z/M))) if not self.overshooting: #i.e. if overshooting is not penalized... # compute optimum expression level Zopt = M * (1 - sqrt((eta / delta) * ((L + Ky) / L))) # and fitness corresponding to optimum expression gopt = delta * ((Zopt * L) / (Ky + L)) - eta * (Zopt / (1 - (Zopt/M))) if Z > Zopt: return gopt #print g return g def mutate_site(self,site_number,position,base): site = self.motif[site_number] self.motif[site_number] = string_replace(site,position,base) def reset_motif(self): self.motif = self.original_motif[:] def explore_site_mutations(self): print self.motif self.original_motif = self.motif[:] fit = self.fitness() mutations = [] for site_number in range(len(self.motif)): for position in range(len(self.motif[0])): original_base = self.motif[site_number][position] for base in "ACTG": if original_base == base: continue #don't recompute original fitness' self.mutate_site(site_number,position,base) fit_prime = self.fitness() report_string = "Improvement" if fit_prime > fit else "" diff = (fit_prime - fit)/fit mutations.append((site_number,position,base,fit_prime)) print site_number,position,base,self.fitness(),diff,report_string self.reset_motif() return mutations def grad_descent(self): iteration = 0 mutations = None motif_dustbin = [] while (mutations is None or any(fit > 0 for (site,pos,base,fit) in mutations)): mutations = self.explore_site_mutations() site_idx, pos, base, fit = max(mutations, key=lambda tup:tup[3])#max by fitness self.motif[site_idx] = string_replace(self.motif[site_idx],pos,base) print "iteration:",iteration,site_idx,pos,base,fit motif_dustbin.append(self.motif[:]) iteration += 1 def serialize(self,population_size): """Print self out per ESTReMo's population serialization format""" print population_size for i in range(population_size): print i print len(self.motif) for site in self.motif: # TODO we should probably define the organism to # include the ncr. For now, randomize the rest of the # ncr print site + random_site(self.ncr_size - len(site)) print 1 if type(self.recognizer) is SLP else 2 # TODO needs to be generalized to MLPs! weights = concat(map(list,transpose(self.recognizer.input_layer))) print len(weights) for weight in weights: print weight