Esempio n. 1
0
File: ga.py Progetto: smorin/exelixi
class Population (object):
    def __init__ (self, indiv_instance, ff_name, prefix="/tmp/exelixi"):
        self.indiv_class = indiv_instance.__class__
        self.feature_factory = instantiate_class(ff_name)

        self.prefix = prefix
        self._shard_id = None
        self._exe_dict = None
        self._hash_ring = None

        self.n_pop = self.feature_factory.n_pop
        self._total_indiv = 0
        self._term_limit = self.feature_factory.term_limit
        self._hist_granularity = self.feature_factory.hist_granularity

        self._selection_rate = self.feature_factory.selection_rate
        self._mutation_rate = self.feature_factory.mutation_rate

        self._shard = {}
        self._bf = BloomFilter(num_bytes=125, num_probes=14, iterable=[])


    def set_ring (self, shard_id, exe_dict):
        """initialize the HashRing"""
        self._shard_id = shard_id
        self._exe_dict = exe_dict
        self._hash_ring = HashRing(exe_dict.keys())


    ######################################################################
    ## Individual lifecycle within the local subset of the Population

    def populate (self, current_gen):
        """initialize the population"""
        for _ in xrange(self.n_pop):
            # constructor pattern
            indiv = self.indiv_class()
            indiv.populate(current_gen, self.feature_factory.generate_features())

            # add the generated Individual to the Population
            # failure semantics: must filter nulls from initial population
            self.reify(indiv)


    def reify (self, indiv):
        """test/add a newly generated Individual into the Population (birth)"""
        neighbor_shard_id = None
        exe_uri = None

        if self._hash_ring:
            neighbor_shard_id = self._hash_ring.get_node(indiv.key)

            if neighbor_shard_id != self._shard_id:
                exe_uri = self._exe_dict[neighbor_shard_id]

        # distribute this operation over the hash ring, through a remote queue
        if exe_uri:
            msg = { "key": indiv.key, "gen": indiv.gen, "feature_set": loads(indiv.get_json_feature_set()) }
            lines = post_exe_rest(self.prefix, neighbor_shard_id, exe_uri, "pop/reify", msg)
            return False
        else:
            return self._reify_locally(indiv)


    def receive_reify (self, key, gen, feature_set):
        """test/add a received reify request """
        indiv = self.indiv_class()
        indiv.populate(gen, feature_set)
        self._reify_locally(indiv)


    def _reify_locally (self, indiv):
        """test/add a newly generated Individual into the Population locally (birth)"""
        if not indiv.key in self._bf:
            self._bf.update([indiv.key])
            self._total_indiv += 1

            # potentially the most expensive operation, deferred until remote reification
            indiv.get_fitness(self.feature_factory, force=True)
            self._shard[indiv.key] = indiv

            return True
        else:
            return False


    def evict (self, indiv):
        """remove an Individual from the Population (death)"""
        if indiv.key in self._shard:
            # Individual only needs to be removed locally
            del self._shard[indiv.key]

            # NB: serialize to disk (write behinds)
            url = self._get_storage_path(indiv)


    def get_part_hist (self):
        """tally counts for the partial histogram of the fitness distribution"""
        d = (Counter([ round(indiv.get_fitness(self.feature_factory, force=False), self._hist_granularity) for indiv in self._shard.values() ])).items()
        d.sort(reverse=True)
        return d


    def get_fitness_cutoff (self, hist):
        """determine fitness cutoff (bin lower bounds) for the parent selection filter"""
        h = hist.items()
        h.sort(reverse=True)
        logging.debug("fit: %s", h)

        n_indiv = sum([ count for bin, count in h ])
        part_sum = 0
        break_next = False

        for bin, count in h:
            if break_next:
                break

            part_sum += count
            percentile = part_sum / float(n_indiv)
            break_next = percentile >= self._selection_rate

        logging.debug("fit: percentile %f part_sum %d n_indiv %d bin %f", percentile, part_sum, n_indiv, bin)
        return bin


    def _get_storage_path (self, indiv):
        """create a path for durable storage of an Individual"""
        return self.prefix + "/" + indiv.key


    def _boost_diversity (self, current_gen, indiv):
        """randomly select other individuals and mutate them, to promote genetic diversity"""
        if self._mutation_rate > random():
            indiv.mutate(self, current_gen, self.feature_factory)
        elif len(self._shard.values()) >= 3:
            # ensure that there are at least three parents
            self.evict(indiv)


    def _select_parents (self, current_gen, fitness_cutoff):
        """select the parents for the next generation"""
        partition = map(lambda x: (round(x.get_fitness(), self._hist_granularity) >= fitness_cutoff, x), self._shard.values())
        good_fit = map(lambda x: x[1], filter(lambda x: x[0], partition))
        poor_fit = map(lambda x: x[1], filter(lambda x: not x[0], partition))

        # randomly select other individuals to promote genetic diversity, while removing the remnant
        for indiv in poor_fit:
            self._boost_diversity(current_gen, indiv)

        return self._shard.values()


    def next_generation (self, current_gen, fitness_cutoff):
        """select/mutate/crossover parents to produce a new generation"""
        parents = self._select_parents(current_gen, fitness_cutoff)

        for _ in xrange(self.n_pop - len(parents)):
            f, m = sample(parents, 2) 
            success = f.breed(self, current_gen, m, self.feature_factory)

        # backfill to avoid the dreaded Population collapse
        for _ in xrange(self.n_pop - len(self._shard.values())):
            # constructor pattern
            indiv = self.indiv_class()
            indiv.populate(current_gen, self.feature_factory.generate_features())
            self.reify(indiv)

        logging.info("gen: %d shard %s size %d total %d", current_gen, self._shard_id, len(self._shard.values()), self._total_indiv)


    def test_termination (self, current_gen, hist):
        """evaluate the terminating condition for this generation and report progress"""
        return self.feature_factory.test_termination(current_gen, self._term_limit, hist)


    def enum (self, fitness_cutoff):
        """enum all Individuals that exceed the given fitness cutoff"""
        return [[ "%0.4f" % indiv.get_fitness(), str(indiv.gen), indiv.get_json_feature_set() ]
                for indiv in filter(lambda x: x.get_fitness() >= fitness_cutoff, self._shard.values()) ]


    def report_summary (self):
        """report a summary of the evolution"""
        for indiv in sorted(self._shard.values(), key=lambda x: x.get_fitness(), reverse=True):
            print self._get_storage_path(indiv)
            print "\t".join(["%0.4f" % indiv.get_fitness(), "%d" % indiv.gen, indiv.get_json_feature_set()])