class Population (object): def __init__ (self, indiv_instance, ff_name, prefix="/tmp/exelixi"): self.indiv_class = indiv_instance.__class__ self.feature_factory = instantiate_class(ff_name) self.prefix = prefix self._shard_id = None self._exe_dict = None self._hash_ring = None self.n_pop = self.feature_factory.n_pop self._total_indiv = 0 self._term_limit = self.feature_factory.term_limit self._hist_granularity = self.feature_factory.hist_granularity self._selection_rate = self.feature_factory.selection_rate self._mutation_rate = self.feature_factory.mutation_rate self._shard = {} self._bf = BloomFilter(num_bytes=125, num_probes=14, iterable=[]) def set_ring (self, shard_id, exe_dict): """initialize the HashRing""" self._shard_id = shard_id self._exe_dict = exe_dict self._hash_ring = HashRing(exe_dict.keys()) ###################################################################### ## Individual lifecycle within the local subset of the Population def populate (self, current_gen): """initialize the population""" for _ in xrange(self.n_pop): # constructor pattern indiv = self.indiv_class() indiv.populate(current_gen, self.feature_factory.generate_features()) # add the generated Individual to the Population # failure semantics: must filter nulls from initial population self.reify(indiv) def reify (self, indiv): """test/add a newly generated Individual into the Population (birth)""" neighbor_shard_id = None exe_uri = None if self._hash_ring: neighbor_shard_id = self._hash_ring.get_node(indiv.key) if neighbor_shard_id != self._shard_id: exe_uri = self._exe_dict[neighbor_shard_id] # distribute this operation over the hash ring, through a remote queue if exe_uri: msg = { "key": indiv.key, "gen": indiv.gen, "feature_set": loads(indiv.get_json_feature_set()) } lines = post_exe_rest(self.prefix, neighbor_shard_id, exe_uri, "pop/reify", msg) return False else: return self._reify_locally(indiv) def receive_reify (self, key, gen, feature_set): """test/add a received reify request """ indiv = self.indiv_class() indiv.populate(gen, feature_set) self._reify_locally(indiv) def _reify_locally (self, indiv): """test/add a newly generated Individual into the Population locally (birth)""" if not indiv.key in self._bf: self._bf.update([indiv.key]) self._total_indiv += 1 # potentially the most expensive operation, deferred until remote reification indiv.get_fitness(self.feature_factory, force=True) self._shard[indiv.key] = indiv return True else: return False def evict (self, indiv): """remove an Individual from the Population (death)""" if indiv.key in self._shard: # Individual only needs to be removed locally del self._shard[indiv.key] # NB: serialize to disk (write behinds) url = self._get_storage_path(indiv) def get_part_hist (self): """tally counts for the partial histogram of the fitness distribution""" d = (Counter([ round(indiv.get_fitness(self.feature_factory, force=False), self._hist_granularity) for indiv in self._shard.values() ])).items() d.sort(reverse=True) return d def get_fitness_cutoff (self, hist): """determine fitness cutoff (bin lower bounds) for the parent selection filter""" h = hist.items() h.sort(reverse=True) logging.debug("fit: %s", h) n_indiv = sum([ count for bin, count in h ]) part_sum = 0 break_next = False for bin, count in h: if break_next: break part_sum += count percentile = part_sum / float(n_indiv) break_next = percentile >= self._selection_rate logging.debug("fit: percentile %f part_sum %d n_indiv %d bin %f", percentile, part_sum, n_indiv, bin) return bin def _get_storage_path (self, indiv): """create a path for durable storage of an Individual""" return self.prefix + "/" + indiv.key def _boost_diversity (self, current_gen, indiv): """randomly select other individuals and mutate them, to promote genetic diversity""" if self._mutation_rate > random(): indiv.mutate(self, current_gen, self.feature_factory) elif len(self._shard.values()) >= 3: # ensure that there are at least three parents self.evict(indiv) def _select_parents (self, current_gen, fitness_cutoff): """select the parents for the next generation""" partition = map(lambda x: (round(x.get_fitness(), self._hist_granularity) >= fitness_cutoff, x), self._shard.values()) good_fit = map(lambda x: x[1], filter(lambda x: x[0], partition)) poor_fit = map(lambda x: x[1], filter(lambda x: not x[0], partition)) # randomly select other individuals to promote genetic diversity, while removing the remnant for indiv in poor_fit: self._boost_diversity(current_gen, indiv) return self._shard.values() def next_generation (self, current_gen, fitness_cutoff): """select/mutate/crossover parents to produce a new generation""" parents = self._select_parents(current_gen, fitness_cutoff) for _ in xrange(self.n_pop - len(parents)): f, m = sample(parents, 2) success = f.breed(self, current_gen, m, self.feature_factory) # backfill to avoid the dreaded Population collapse for _ in xrange(self.n_pop - len(self._shard.values())): # constructor pattern indiv = self.indiv_class() indiv.populate(current_gen, self.feature_factory.generate_features()) self.reify(indiv) logging.info("gen: %d shard %s size %d total %d", current_gen, self._shard_id, len(self._shard.values()), self._total_indiv) def test_termination (self, current_gen, hist): """evaluate the terminating condition for this generation and report progress""" return self.feature_factory.test_termination(current_gen, self._term_limit, hist) def enum (self, fitness_cutoff): """enum all Individuals that exceed the given fitness cutoff""" return [[ "%0.4f" % indiv.get_fitness(), str(indiv.gen), indiv.get_json_feature_set() ] for indiv in filter(lambda x: x.get_fitness() >= fitness_cutoff, self._shard.values()) ] def report_summary (self): """report a summary of the evolution""" for indiv in sorted(self._shard.values(), key=lambda x: x.get_fitness(), reverse=True): print self._get_storage_path(indiv) print "\t".join(["%0.4f" % indiv.get_fitness(), "%d" % indiv.gen, indiv.get_json_feature_set()])