Ejemplo n.º 1
0
    def test_get_hypothesis_from_debug_string(self):
        from simulations import french_two_rules as simulation
        self.initialise_simulation(simulation)

        hypothesis_string = """
        HMM: states ['q1', 'q2'], transitions {'q1': ['q2'], 'q2': ['qf'], 'q0': ['q1']}, emissions {'q1': ['arb', 'tab', 'kup', 'yrl', 'purp', 'filt', 'romp', 'byl', 'dart', 'mord', 'kuverk', 'prut', 'kylt', 'amur', 'klop', 'film', 'kurb', 'kapt', 'tabl', 'krab', 'karaf', 'parl', 'provok', 'filtr', 'klad', 'kuverkl', 'purpr', 'odor', 'arbr', 'furyr', 'burk', 'kupl', 'batir', 'rompr', 'mordr', 'dartr'], 'q2': ['mal', 'iv', 'ε', 'fad', 'puri', 'byvab', 'kif', 'timid', 'abil']}
HMM:
q0: ['q1']
q1: ['q2'], ['amur', 'arb', 'arbr', 'batir', 'burk', 'byl', 'dart', 'dartr', 'film', 'filt', 'filtr', 'furyr', 'kapt', 'karaf', 'klad', 'klop', 'krab', 'kup', 'kupl', 'kurb', 'kuverk', 'kuverkl', 'kylt', 'mord', 'mordr', 'odor', 'parl', 'provok', 'prut', 'purp', 'purpr', 'romp', 'rompr', 'tab', 'tabl', 'yrl']
q2: ['qf'], ['abil', 'byvab', 'fad', 'iv', 'kif', 'mal', 'puri', 'timid', 'ε']
q0->q1->q2->qf
Rule Set:
transducer_generated:
[] --> [{'back': '+', 'center': '+', 'high': '-', 'low': '-'}] / [{'cons': '+'}, {'strident': '-'}, {'MB': True}]__[{'cons': '+'}] obligatory: False | ε --> ['e'] / [['p', 'd', 'v', 't', 'k', 'l', 'r', 'm', 'f', 'b'], ['p', 'm', 'd', 'e', 't', 'k', 'a', 'i', 'l', 'r', 'u', 'o', 'y', 'b'], ['B']]__[['p', 'd', 'v', 't', 'k', 'l', 'r', 'm', 'f', 'b']] obligatory: False
[] --> [{'back': '-', 'liquid': '+', 'strident': '-', 'voice': '+'}] / [{'cons': '-', 'lateral': '-'}, {'back': '-', 'center': '-', 'liquid': '-', 'voice': '+'}, {'MB': True}]__[] obligatory: False | ε --> ['l', 'r'] / [['e', 'a', 'i', 'u', 'o', 'y'], ['d', 'b', 'i', 'm', 'y', 'v'], ['B']]__[] obligatory: False
[] --> [{'lateral': '+'}] / [{'MB': True}]__[{'MB': True}, {'lateral': '-', 'voice': '+'}, {'voice': '+'}] obligatory: False | ε --> ['l'] / [['B']]__[['B'], ['o', 'd', 'b', 'e', 'a', 'i', 'r', 'u', 'm', 'y', 'v'], ['m', 'd', 'v', 'e', 'a', 'i', 'l', 'r', 'u', 'o', 'y', 'b']] obligatory: False
not transducer_generated:
Energy: 250,819.10989435515 (data_by_grammar: 230,883.2703113556, hmm: 19,624.83958299955, rule_set: 311.0)
        
        """

        hypothesis = get_hypothesis_from_log_string(hypothesis_string)
        log_hypothesis(hypothesis)
        print(hypothesis.get_energy())
        print(hypothesis.get_recent_energy_signature())
 def test_get_random_hypothesis(self):
     self.configurations["EVOLVE_HMM"] = True
     self.configurations["EVOLVE_RULES"] = True
     self.initialise_segment_table("plural_english_segment_table.txt")
     data = ['kats', 'dogz', 'kat', 'dog']
     rand_hypothesis = Hypothesis.get_random_hypothesis(data)
     log_hypothesis(rand_hypothesis)
    def run(self):
        self.evaluate_population()
        self.keep_elite()
        self.hall_of_fame.update(self.population)
        self.debug_generation()

        for _ in range(ga_config.TOTAL_GENERATIONS):
            self.make_generation()
            self.generation += 1
            self.debug_generation()

            self.cxpb *= ga_config.CROSSOVER_COOLING_RATE
            self.mutpb *= ga_config.MUTATION_COOLING_RATE

        best_hypothesis = self.hall_of_fame[0]
        self.logger.info("** {} final best hypothesis: ".format(
            self.island_name))
        log_hypothesis(best_hypothesis, self.logger.info)
        try:
            if self.result_queue:
                self.result_queue.put((self.island_name, best_hypothesis),
                                      block=False)
        except Full:
            self.logger.error('{}: Result queue full'.format(self.island_name))

        self.logger.finalize_log()
Ejemplo n.º 4
0
    def collect_all_island_results(self):
        best_energy = float("inf")
        best_hypothesis = None
        self.flush_queue()
        self.logger.info("Looking at {}/{} results".format(
            len(self.final_queue), self.local_simulation_num_islands))
        all_islands = [
            f'island_{i}'
            for i in range(self.first_island_idx, self.last_island_idx + 1)
        ]
        for island, hypothesis in self.final_queue:
            self.logger.info('{} best hypothesis:'.format(island))
            all_islands.remove(island)
            log_hypothesis(hypothesis, self.logger.info)

            energy = hypothesis.get_energy()
            if energy < best_energy:
                best_energy = energy
                best_hypothesis = hypothesis

        self.best_energy = best_energy
        self.best_hypothesis = best_hypothesis
        if self.best_hypothesis:
            self.logger.info('*Best hypothesis from all islands:*')
            log_hypothesis(self.best_hypothesis, self.logger.info)
        if all_islands:
            self.logger.info(f'{len(all_islands)} missing: {all_islands}')
    def debug_generation(self):
        self.hall_of_fame.update(self.population)
        record = self.stats.compile(self.population) if self.stats else {}
        self.logbook.record(gen=self.generation,
                            nevals=self.new_individuals_in_generation,
                            **record)
        self.logger.info(self.logbook.stream)

        best_hypothesis_str = hypothesis_to_string(self.hall_of_fame[0])
        record.update({
            'generation': self.generation,
            'best_hypothesis': best_hypothesis_str
        })
        self.logger.log_stats_record(record, self.island_number)

        if self.generation > 0 and self.generation % ga_config.CLEAR_RULE_SET_CACHE_INTERVAL == 0:
            RuleSet.clear_caching()

        if ga_config.DUMP_ALL_POPULATION_EVERY_N_GENERATIONS > 0 and self.generation % ga_config.DUMP_ALL_POPULATION_EVERY_N_GENERATIONS == 0 and self.generation > 0:
            self.dump_population()

        if self.generation % ga_config.HALL_OF_FAME_DEBUG_INTERVAL == 0:

            self.logger.debug('\n\n**** {} top {} hypothesis:****\n'.format(
                self.island_name, ga_config.HALL_OF_FAME_HYPOTHESES))
            for i in range(ga_config.HALL_OF_FAME_HYPOTHESES):
                try:
                    hypo = self.hall_of_fame[i]
                    self.logger.debug('** #{} **'.format(i + 1))
                    log_hypothesis(hypo, self.logger.debug)
                    self.logger.debug('\n')
                except IndexError:
                    break
 def log_all_population(self):
     self.logger.debug(
         "\n\n**** Population dump: Generation {} ****\n".format(
             self.generation))
     for h, hypo in enumerate(self.population):
         self.logger.debug('Hypothesis {}\n'.format(h))
         log_hypothesis(hypo, self.logger.debug)
         self.logger.debug('\n')
     self.logger.debug("**** End population dump ****".format(
         self.generation))
    def finalize_simulation(self):
        self.dump_population()
        best_hypothesis = self.hall_of_fame[0]
        self.logger.info("{} best hypothesis: ".format(self.island_name))
        log_hypothesis(best_hypothesis, self.logger.info)

        if self.generation >= self.simulation_total_generations:
            if self.result_queue:
                self.result_queue.put((self.island_name, best_hypothesis),
                                      block=False)

        self.logger.finalize_log(self.island_number)
 def init_target_hypothesis(self):
     target_tuple = self.simulation.target_tuple
     target_rule_set = RuleSet.load_form_flat_list(target_tuple[1])
     target_hypothesis = Hypothesis.create_hypothesis(
         HMM(deepcopy(target_tuple[0])), target_rule_set)
     target_energy = target_hypothesis.get_energy()
     self.logger.info('Target hypothesis:')
     log_hypothesis(target_hypothesis, self.logger.info)
     self.logger.info('Target energy: {}'.format(target_energy))
     self.logger.info('Target hypothesis energy signature: {}'.format(
         target_hypothesis.get_recent_energy_signature()))
     return target_hypothesis, target_energy
 def test_representative_random_hypothesis_rate(self):
     # Check how often random hypothesis represents the data
     self.initialise_segment_table("plural_english_segment_table.txt")
     data = ['kats', 'dogz', 'kat', 'dog']
     total_hypotheses = 0
     energy = float("inf")
     while energy == float("inf"):
         rand_hypothesis = Hypothesis.get_random_hypothesis(data)
         energy = rand_hypothesis.get_energy()
         log_hypothesis(rand_hypothesis)
         total_hypotheses += 1
         print("Total hypotheses generated: ", total_hypotheses)
         print()
     log_hypothesis(rand_hypothesis)
Ejemplo n.º 10
0
    def collect_all_island_results(self):
        best_energy = float("inf")
        best_hypothesis = None
        for _ in range(self.local_simulation_num_islands):
            island, hypothesis = self.result_queue.get(block=True)
            self.logger.info('{} best hypothesis:'.format(island))
            log_hypothesis(hypothesis, self.logger.info)

            energy = hypothesis.get_energy()
            if energy < best_energy:
                best_energy = energy
                best_hypothesis = hypothesis

        self.best_energy = best_energy
        self.best_hypothesis = best_hypothesis
        if self.best_hypothesis:
            self.logger.info('*Best hypothesis from all islands:*')
            log_hypothesis(self.best_hypothesis, self.logger.info)
    def debug_generation(self):
        self.hall_of_fame.update(self.population)
        record = self.stats.compile(self.population) if self.stats else {}
        self.logbook.record(gen=self.generation,
                            nevals=self.new_individuals_in_generation,
                            **record)
        self.logger.info(self.logbook.stream)

        best_hypothesis_str = hypothesis_to_string(self.hall_of_fame[0])
        record.update({
            'generation': self.generation,
            'best_hypothesis': best_hypothesis_str
        })
        self.logger.log_stats_record(record, self.island_number)

        if self.generation != self.initial_generation and self.generation % ga_config.DUMP_POPULATION_INTERVAL == 0:
            self.dump_population()

        if self.generation > 0 and self.generation % ga_config.CLEAR_KEY_VALUE_CACHE_INTERVAL == 0:
            self.cache.flush()

        if self.generation > 0 and self.generation % ga_config.CLEAR_TRANSDUCERS_CACHE_INTERVAL == 0:
            from bracket_rule_transducer import BracketRuleTransducer
            self.logger.info("Clearing rule set cache...")
            RuleSet.clear_caching()
            self.logger.info("Clearing bracket rule transducers cache...")
            BracketRuleTransducer.clear_caching()

        if ga_config.LOG_POPULATION_INTERVAL > 0 and self.generation % ga_config.LOG_POPULATION_INTERVAL == 0 and self.generation > 0:
            self.log_all_population()

        if self.generation % ga_config.HALL_OF_FAME_DEBUG_INTERVAL == 0:

            self.logger.debug('\n\n**** {} top {} hypothesis:****\n'.format(
                self.island_name, ga_config.HALL_OF_FAME_HYPOTHESES))
            for i in range(ga_config.HALL_OF_FAME_HYPOTHESES):
                try:
                    hypo = self.hall_of_fame[i]
                    self.logger.debug('** #{} **'.format(i + 1))
                    log_hypothesis(hypo, self.logger.debug)
                    self.logger.debug('\n')
                except IndexError:
                    break
Ejemplo n.º 12
0
    def start(self):
        logger.info('GRID SEARCH ')
        for c, combination in enumerate(self.all_combinations):
            current_params = {}
            for i in range(len(combination)):
                param = GRID[i][0]
                val = combination[i]
                setattr(modules['ga_config'], param, val)
                current_params[param] = combination[i]

            logger.info('GRID SEARCH Starting simulation {}/{} with params:\n{}'.format(c+1, len(self.all_combinations), str(current_params)))
            final_hypothesis = self.run_simulation()
            if final_hypothesis:
                self.results.append([current_params, final_hypothesis.get_energy()])
                logger.info('GRID SEARCH Ended simulation with final hypothesis:')
                log_hypothesis(final_hypothesis, logger.info)
            else:
                logger.error('GRID SEARCH failed - simulation failed with current params '
                             '(probably param combination is invalid). Trying next combination on grid...')

        logger.info('GRID SEARCH Final results:\n{}'.format(json.dumps(self.results)))
Ejemplo n.º 13
0
    def test_crossover_random(self):
        self.initialise_segment_table("dag_zook_segments_new.txt")
        data = ['kat', 'dot', 'dag', 'kod'] + \
               ['katso', 'dotso', 'dagzo', 'kodzo'] + \
               ['katko', 'dotko', 'daggo', 'kodgo'] + \
               ['katto', 'dotto', 'dagdo', 'koddo']

        config['EVOLVE_RULES'] = True
        config['EVOLVE_HMM'] = True
        config['MAX_NUMBER_OF_RULES'] = 3

        h1 = GAHypothesis(data)
        h2 = GAHypothesis(data)
        print('\nH1\n')
        log_hypothesis(h1)
        print('\nH2\n')
        log_hypothesis(h2)

        offspring_1, offspring_2 = GeneticAlgorithm._crossover(h1, h2)
        print('\nOFFSPRING 1\n')
        log_hypothesis(offspring_1)
        print('\nOFFSPRING 2\n')
        log_hypothesis(offspring_2)