def setUp(self): configurations["DATA_ENCODING_LENGTH_MULTIPLIER"] = 25 configurations["MORPHEME_BOUNDARY_FLAG"] = True configurations["UNDERSPECIFICATION_FLAG"] = True self.initialise_segment_table("underspecification_segment_table.txt") self.data = ['dat', 'tat', 'da', 'ta'] hmm = HMM({ 'q0': ['q1'], 'q1': (['q2', 'qf'], [ 'dag', 'kat', 'dot', 'kod', 'gas', 'toz', 'kta', 'dgo', 'skoz', 'gdas' ]), 'q2': (['qf'], ['zook', 'gos', 'dod', 'sad']) }) rule = Rule([{ "voice": "0" }], [{ "voice": "-" }], [], [{ "bound": "+" }], True) rule.get_transducer() print(rule.get_segment_representation()) rule_set = RuleSet([rule]) print(rule_set.get_outputs_of_word('daTB'))
def get_random_hypothesis_by_mutations(cls, data, fixed_hmm=None, fixed_rules=None): if configurations["EVOLVE_RULES"]: initial_rule_set = RuleSet() elif fixed_rules: initial_rule_set = RuleSet.load_from_flat_list(fixed_rules) else: initial_rule_set = RuleSet() if configurations["EVOLVE_HMM"]: initial_hmm = None elif fixed_hmm: initial_hmm = HMM(deepcopy(fixed_hmm)) else: initial_hmm = None hypothesis = Hypothesis.create_initial_hypothesis(data, initial_hmm=initial_hmm, initial_rule_set=initial_rule_set) for _ in range(ga_config.RANDOM_INIT_WARMUP_STEPS): new_hypothesis = deepcopy(hypothesis) success = new_hypothesis.grammar.make_mutation() current_energy = hypothesis.get_energy() new_energy = new_hypothesis.get_energy() if success and not isinf(new_energy): if new_energy < current_energy or random.random() < ga_config.ACCEPT_WORSE_PROBAB: hypothesis = new_hypothesis return hypothesis
def test_unequal_rules_crossover(self): self.configurations['RULE_SET_CROSSOVER_METHOD'] = 'pivot' self.initialise_segment_table("plural_english_segment_table.txt") r1 = Rule([{"velar": "+"}], [{"velar": "-"}], [{"cons": "+"}], [{"cons": "+"}], obligatory=True) r2 = Rule([{"voice": "+"}], [{"voice": "-"}], [{"velar": "+"}], [{"cons": "+"}], obligatory=True) r3 = Rule([{"voice": "-"}], [{"cons": "-"}], [{"cons": "+"}], [{"voice": "+"}], obligatory=False) rule_1 = RuleSet([r1]) rule_2 = RuleSet([r2, r3]) self._seed_me_multiple( methods=[random.randrange] + [random.randint for _ in range(5)], argss=[[1]] + [[0, 1] for _ in range(5)], expecteds=[0, 0, 0, 0, 1, 0]) offspring_1, offspring_2 = RuleSet.crossover(rule_1, rule_2) self.assertEqual(str(offspring_1), str(RuleSet([r1, r3]))) self.assertEqual(str(offspring_2), str(RuleSet([r2]))) log_rule_set(rule_1) print() log_rule_set(rule_2) print() log_rule_set(offspring_1) print() log_rule_set(offspring_2)
def debug_generation(self): self.hall_of_fame.update(self.population) record = self.stats.compile(self.population) if self.stats else {} self.logbook.record(gen=self.generation, nevals=self.new_individuals_in_generation, **record) self.logger.info(self.logbook.stream) best_hypothesis_str = hypothesis_to_string(self.hall_of_fame[0]) record.update({ 'generation': self.generation, 'best_hypothesis': best_hypothesis_str }) self.logger.log_stats_record(record, self.island_number) if self.generation > 0 and self.generation % ga_config.CLEAR_RULE_SET_CACHE_INTERVAL == 0: RuleSet.clear_caching() if ga_config.DUMP_ALL_POPULATION_EVERY_N_GENERATIONS > 0 and self.generation % ga_config.DUMP_ALL_POPULATION_EVERY_N_GENERATIONS == 0 and self.generation > 0: self.dump_population() if self.generation % ga_config.HALL_OF_FAME_DEBUG_INTERVAL == 0: self.logger.debug('\n\n**** {} top {} hypothesis:****\n'.format( self.island_name, ga_config.HALL_OF_FAME_HYPOTHESES)) for i in range(ga_config.HALL_OF_FAME_HYPOTHESES): try: hypo = self.hall_of_fame[i] self.logger.debug('** #{} **'.format(i + 1)) log_hypothesis(hypo, self.logger.debug) self.logger.debug('\n') except IndexError: break
def test_crossover(self): self.initialise_segment_table("plural_english_segment_table.txt") rule_1 = RuleSet([ Rule([{ "voice": "+" }], [{ "voice": "-" }], [{ "velar": "+" }], [{ "cons": "+" }], obligatory=True) ]) rule_2 = RuleSet([ Rule([{ "low": "-" }], [{ "low": "+" }], [{ "cont": "+" }], [{ "voice": "-" }], obligatory=True) ]) offspring_1, offspring_2 = RuleSet.crossover(rule_1, rule_2) log_rule_set(rule_1) print() log_rule_set(rule_2) print() log_rule_set(offspring_1) print() log_rule_set(offspring_2)
def test_insertion_with_right_context_only2(self): configurations["SINGLE_CONTEXT_TRANSDUCER_FLAG"] = True self.initialise_segment_table("abd_segment_table.txt") rule = Rule([], [{"cons": "-"}], [], [{"cons": "+", "labial": "+"}, {"cons": "+", "labial": "-"}], obligatory=True) rule_set = RuleSet([rule]) self.assertCountEqual(rule_set.get_outputs_of_word('bdbd'), ['abdabd'])
def test_make_mutation__remove_rule(self): self.configurations['UNDERSPECIFICATION_FLAG'] = 0 self.configurations['REMOVE_RULE'] = 1 self.initialise_segment_table("ab_segment_table.txt") rule = Rule([], [{"cons": "-"}], [{"cons": "+"}], [{"cons": "+"}], obligatory=True) rule_set = RuleSet([rule]) rule_set.make_mutation() self.assertEqual(str(RuleSet()), str(rule_set))
def test_kleene_star(self): self.initialise_segment_table("plural_english_segment_table.txt") self.configurations["CHANGE_KLEENE_VALUE"] = True rule = Rule([{"cons": "-"}], [{"low": "+"}], [{"cons": "-"}, {"cons": "+", "kleene": True}], [], obligatory=True) rule_set = RuleSet([rule]) self.assertCountEqual(rule_set.get_outputs_of_word("ato"), ['ata']) self.assertCountEqual(rule_set.get_outputs_of_word("attto"), ['attta'])
def test_crossover(self): self.initialise_segment_table("dag_zook_segments_new.txt") rule_set_1 = RuleSet([ Rule(*[[{ "cons": "+" }], [{ "voice": "-" }], [{ "low": "+" }], [{ "cont": "-" }], True]) ]) rule_set_2 = RuleSet([ Rule(*[[{ "cons": "+" }], [{ "low": "-" }], [{ "voice": "-" }], [], False]) ]) plural_english_data = 1 * ['kats', 'dogz', 'kat', 'dog'] hmm_1 = HMM({ INITIAL_STATE: ['q1'], 'q1': (['q2', FINAL_STATE], ['dag', 'kot']), 'q2': ([FINAL_STATE], ['z']) }) hmm_2 = HMM({ INITIAL_STATE: ['q1'], 'q1': (['q2'], ['dog', 'kat']), 'q2': (['q3'], ['s']), 'q3': ([FINAL_STATE], ['z']) }) grammar_1 = Grammar(hmm_1, rule_set_1) grammar_2 = Grammar(hmm_2, rule_set_2) hypothesis_1 = Hypothesis(grammar_1, plural_english_data) hypothesis_2 = Hypothesis(grammar_2, plural_english_data) offspring_1, offspring_2 = GeneticAlgorithm.crossover( hypothesis_1, hypothesis_2) print("*** Parents:\n") GeneticAlgorithm.log_hypothesis(hypothesis_1) GeneticAlgorithm.log_hypothesis(hypothesis_2) print("\n\n*** Offspring:\n") GeneticAlgorithm.log_hypothesis(offspring_1) GeneticAlgorithm.log_hypothesis(offspring_2) offspring_3, offspring_4 = GeneticAlgorithm.crossover( offspring_1, offspring_2) print("\n\n*** 2nd gen offspring:\n") GeneticAlgorithm.log_hypothesis(offspring_3) GeneticAlgorithm.log_hypothesis(offspring_4)
def test_vicky(self): self.initialise_segment_table("plural_english_segment_table.txt") rule = Rule([], [{ "voice": "-" }], [{ "voice": "-" }], [], obligatory=True) rule_set = RuleSet([rule]) print(rule_set.get_outputs_of_word("dot"))
def test_rule_application_direction(self): # Test whether rules are applied recursively once the environment changes self.initialise_segment_table("turkish_segment_table.txt") rule = Rule([{"syll": "+"}], [{"back": "-"}], [{"syll": "+", "back": "-"}], [], obligatory=True) rule_set = RuleSet([rule]) # TODO: this should be replaced with: # self.assertEqual(rule_set.get_outputs_of_word("i1a"), ['iia']) # I have no idea why `iie` returns here as well, but this is a bug. self.assertIn('iia', rule_set.get_outputs_of_word("i1a"))
def test_make_mutation__change_rule(self): self.configurations['UNDERSPECIFICATION_FLAG'] = 0 self.configurations['CHANGE_RULE'] = 1 self.configurations['MUTATE_OBLIGATORY'] = 1 self.initialise_segment_table("ab_segment_table.txt") rule = Rule([], [{"cons": "-"}], [{"cons": "+"}], [{"cons": "+"}], obligatory=True) cule = Rule([], [{"cons": "-"}], [{"cons": "+"}], [{"cons": "+"}], obligatory=False) rule_set = RuleSet([rule]) rule_set.make_mutation() self.assertEqual(str(RuleSet([cule])), str(rule_set))
def test_assimilation(self): self.initialise_segment_table("plural_english_segment_table.txt") rule = Rule([{ "cons": "+" }], [{ "voice": "-" }], [{ "voice": "-" }], [], obligatory=True) rule_set = RuleSet([rule]) print(rule_set.get_outputs_of_word("tz"))
def __init__(self, hmm, rule_set=None): if isinstance(hmm, HMM): self.hmm = hmm else: self.hmm = HMM(hmm) segment_table = SegmentTable() self.segment_symbol_length = ceil(log(len(segment_table) + 1, 2)) # + 1 for the delimiter if rule_set: self.rule_set = rule_set else: self.rule_set = RuleSet()
def test_abnese_insertion(self): self.initialise_segment_table("ab_segment_table.txt") rule = Rule([], [{ "cons": "-" }], [{ "cons": "+" }], [{ "cons": "+" }], obligatory=True) rule_set = RuleSet([rule]) print(rule_set.get_outputs_of_word("aabb"))
def test_crossover(self): from copy import deepcopy rule_1 = Rule.load([[{ 'cont': '+' }], [{ 'coronal': '-' }], [{ 'coronal': '-' }], [], True]) rule_2 = Rule.load([[{ 'cons': '+', 'low': '-' }], [{ 'voice': '-' }], [{ 'voice': '-' }], [], True]) crossover_rule_1 = deepcopy(rule_1) crossover_rule_2 = deepcopy(rule_2) crossover_rule_1.left_context_feature_bundle_list = rule_2.left_context_feature_bundle_list crossover_rule_1.right_context_feature_bundle_list = rule_2.right_context_feature_bundle_list crossover_rule_1.change_feature_bundle_list = rule_2.change_feature_bundle_list crossover_rule_2.left_context_feature_bundle_list = rule_1.left_context_feature_bundle_list crossover_rule_2.right_context_feature_bundle_list = rule_1.right_context_feature_bundle_list crossover_rule_2.change_feature_bundle_list = rule_1.change_feature_bundle_list rule_set_1 = RuleSet([crossover_rule_1]) rule_set_2 = RuleSet([crossover_rule_2]) print(rule_set_1) print(rule_set_2) hmm = HMM({ 'q0': ['q1'], 'q1': (['q2', 'qf'], ['dag', 'kat', 'dot', 'kod']), 'q2': (['qf'], ['zo', 'go', 'do']) }) grammar_1 = Grammar(hmm, rule_set_1) grammar_2 = Grammar(hmm, rule_set_2) data = ['kat', 'dot', 'dag', 'kod'] + \ ['katso', 'dotso', 'dagzo', 'kodzo'] + \ ['katko', 'dotko', 'daggo', 'kodgo'] + \ ['katto', 'dotto', 'dagdo', 'koddo'] hypothesis_1 = Hypothesis(grammar_1, data) hypothesis_2 = Hypothesis(grammar_2, data) print(hypothesis_1.get_energy()) print(hypothesis_2.get_energy())
def play(hand: Hand, deck: Deck, player_hand: Hand): if RuleSet.is_bust(player_hand): # No need to do anything return while deck.count() > 0: hand.add(deck.draw()) if RuleSet.sums_to_21(hand) or RuleSet.is_bust( hand) or hand.sum() >= player_hand.sum(): # Computer is cheating looking at player cards return
def test_insertion_with_right_context_only_2(self): configurations["SINGLE_CONTEXT_TRANSDUCER_FLAG"] = True self.initialise_segment_table("ab_segment_table.txt") rule = Rule([], [{ "cons": "-" }], [], [{ "cons": "+" }, { "cons": "+" }], obligatory=True) rule_set = RuleSet([rule]) print(rule_set.get_outputs_of_word('bbbb'))
def test_make_mutation(self): self.initialise_segment_table("ab_segment_table.txt") rule = Rule([], [{ "cons": "-" }], [{ "cons": "+" }], [{ "cons": "+" }], obligatory=True) rule_set = RuleSet([rule]) rule_set.make_mutation() print(rule_set)
def test_rule_application_direction(self): # Test whether rules are applied recursively once the environment changes self.initialise_segment_table("turkish_segment_table.txt") rule = Rule([{ "cons": "-" }], [{ "back": "-" }], [{ "cons": "-", "back": "-" }], [], obligatory=True) rule_set = RuleSet([rule]) print(rule_set.get_outputs_of_word("i1a")) # -> iia
def test_kleene_star(self): self.initialise_segment_table("plural_english_segment_table.txt") rule = Rule([{ "cons": "-" }], [{ "low": "+" }], [{ "cons": "-" }, { "cons": "+", "kleene": True }], [], obligatory=True) rule_set = RuleSet([rule]) print(rule_set.get_outputs_of_word("ato")) # -> ata print(rule_set.get_outputs_of_word("atttto")) # -> atttta
def test_parser_kleene(self): hmm = HMM({ INITIAL_STATE: ['q1'], 'q1': (['q2', FINAL_STATE], ['at', 'attstktttt', 'st']), 'q2': ([FINAL_STATE], ['o']) }) hmm_transducer = hmm.get_transducer() self.write_to_dot_to_file(hmm_transducer, "test_hmm_transducer_kleene") assimilation_rule_with_kleene = Rule([{ "cons": "-" }], [{ "low": "+" }], [{ "cons": "-" }, { "cons": "+", "kleene": True }], [], obligatory=True) rule_set_with_kleene = RuleSet([assimilation_rule_with_kleene]) grammar = Grammar(hmm, rule_set_with_kleene) nfa = grammar.get_nfa() self.write_to_dot_to_file(nfa, "test_parser_nfa_kleene")
def get_energy(self, hmm, rule_set_list, case_name): grammar = Grammar(hmm, RuleSet(rule_set_list)) hypothesis = Hypothesis(grammar, self.data) energy = hypothesis.get_energy() print("{}: {}".format(case_name, hypothesis.get_recent_energy_signature())) return energy
def test_turkish_blah(self): self.initialise_simulation(turkish_vowel_harmony_new_weights) Q2s = [ 'in', 'ler', 'siz', 'i', 'ten', 'sel', 'lik', 'li', 'e', EPSILON ] hmm_dict = { 'q0': ['q1'], 'q1': (['q2'], [ 'el', 'j1l', 'ek', 'ip', 'renk', 'son', 'et', 'josun', 'kedi', 'kent', 'k0j', 'k0k', 'sokak', 'tuz', 'dal', 'gyn', 'kirpi', 'k1z', 's1rtlan', 'g0z', 'kurt', 'aj', 'arp' ]), 'q2': (['qf'], Q2s), } some_hmm = HMM(deepcopy(hmm_dict)) some_rules = RuleSet([ Rule([{ "syll": "+" }], [{ "back": "+" }], [{ "cont": "+", "back": "+" }, { "syll": "-", "kleene": True }], [], True) ]) some_hypo = Hypothesis(Grammar(some_hmm, some_rules)) # self.assert_equal_no_infs(self.get_target_hypo().get_energy(), some_hypo.get_energy())
def get_energy(self, simulation_case): case_name = simulation_case.case_name configuration.configurations_dict["case_name"] = case_name if isinstance(simulation_case.hmm_dict, HMM): hmm = simulation_case.hmm_dict else: hmm = HMM(simulation_case.hmm_dict) if isinstance(simulation_case.flat_rule_set_list, RuleSet): rule_set = simulation_case.flat_rule_set_list else: rule_set_list = [] for flat_rule in simulation_case.flat_rule_set_list: rule_set_list.append(Rule(*flat_rule)) rule_set = RuleSet(rule_set_list) grammar = Grammar(hmm, rule_set) self.write_to_dot_to_file(hmm, "hmm_" + case_name) self.write_to_dot_to_file(grammar.get_nfa(), "grammar_nfa_" + case_name) hypothesis = Hypothesis(grammar, self.data) energy = hypothesis.get_energy() if self.target_energy: print("{}: {} distance from target: {}".format( case_name, hypothesis.get_recent_energy_signature(), energy - self.target_energy)) else: print("{}: {}".format(case_name, hypothesis.get_recent_energy_signature())) return energy
def test_phi_ro_identity(self): self.initialise_segment_table("ab_segment_table.txt") rule = Rule([{ "cons": "-" }], [{ "cons": "-" }], [{ "cons": "+" }], [{ "cons": "+" }], obligatory=True) rule_set = RuleSet([rule]) print(rule_set.get_outputs_of_word("bb")) # should be bb , instead [] print(rule_set.get_outputs_of_word( "bab")) # should be 'bab' instead [u'bab', u'bab']
def test_simulated_annealing_runtime(self): import simulations.turkish_vowel_harmony as current_simulation configurations.load_configurations_from_dict( current_simulation.configurations_dict) self.initialise_segment_table('turkish_segment_table.txt') initial_hmm = None initial_rule_set = None initial_hypothesis = Hypothesis.create_initial_hypothesis( current_simulation.data, initial_hmm, initial_rule_set) target_tuple = current_simulation.target_tuple data = current_simulation.data target_rule_set = RuleSet.load_form_flat_list(target_tuple[1]) target_hypothesis = Hypothesis.create_hypothesis( HMM(target_tuple[0]), target_rule_set, data) target_energy = target_hypothesis.get_energy() simulated_annealing = SimulatedAnnealing(initial_hypothesis, target_energy) simulated_annealing.before_loop() # mutate hypothesis for some time before measuring steps for i in range(500): simulated_annealing.make_step() @timeit_best_of_N def make_step_profiled(): simulated_annealing.make_step() make_step_profiled()
def test_abadnese(self): self.initialise_segment_table("abd_segment_table.txt") data = [ 'bbabbba', 'baabbba', 'babbbba', 'bbabadba', 'baabadba', 'babbadba', 'bbabbad', 'baabbad', 'babbbad', 'bbabadad', 'baabadad', 'babbadad', 'bbabbab', 'baabbab', 'babbbab', 'bbabadab', 'baabadab', 'babbadab' ] hmm = HMM({ 'q0': ['q1'], 'q1': (['q2', 'qf'], ['bbab', 'baab', 'babb', 'bbaba', 'baaba', 'babba']), 'q2': (['qf'], ['dba', 'dad', 'dab']) }) rule = Rule.load([[{ "cons": "+" }], [{ "labial": "+" }], [{ "labial": "+" }], [], True]) rule_set = RuleSet([rule]) grammar = Grammar(hmm, rule_set) hypothesis = Hypothesis(grammar, data) self.assertEqual(hypothesis.get_energy(), 245)
def test_get_parsing_results(self): self.initialise_segment_table("abnese_lengthening_segment_table.txt") configurations["MORPHEME_BOUNDARY_FLAG"] = True configurations["LENGTHENING_FLAG"] = True configurations["HMM_ENCODING_LENGTH_MULTIPLIER"] = 100 configurations["DATA_ENCODING_LENGTH_MULTIPLIER"] = 20 hmm = HMM({ 'q0': ['q1'], 'q1': (['qf'], ['aabb', 'abb', 'bbaabb', 'aba', 'aaba', 'bbaa']) }) rule1 = Rule([], [{ "long": "+" }], [], [{}, { "bound": "+" }], obligatory=True) rule2 = Rule([], [{ "syll": "+" }], [{ "cons": "+" }], [{ "cons": "+" }], obligatory=True) rule_set = RuleSet([rule1, rule2]) grammar = Grammar(hmm, rule_set) data = [ u'baba:a', u'babaab:ab', u'ab:a', u'aab:a', u'aab:ab', u'ab:ab' ] hypothesis = Hypothesis(grammar, data) simulated_annealing = SimulatedAnnealing(hypothesis, 0) print(simulated_annealing._get_parsing_results())
def test_opacity_two_hypotheses(self): from simulations import dag_zook_opacity as simulation self.initialise_simulation(simulation) hmm = HMM({ 'q0': ['q1'], 'q1': (['q2', 'q3'], [ 'daot', 'dkoz', 'dog', 'dok', 'gdaas', 'gkas', 'kaos', 'kat', 'kood', 'ksoag', 'ogtd', 'oktdo', 'skaz', 'tak', 'tso' ]), 'q2': (['qf'], ['go', 'kazka', 'soka', 'ta', EPSILON]), 'q3': (['qf'], ['da', 'saat', 'tsk', 'zoka']) }) epenthesis_rule = Rule([], [{ 'low': '+' }], [{ 'coronal': '+' }], [{ 'coronal': '+' }], True) assimilation_rule = Rule([{ 'cons': '+' }], [{ 'voice': '-' }], [{ 'voice': '-' }], [], True) rule_set = RuleSet([assimilation_rule, epenthesis_rule]) grammar = Grammar(hmm, rule_set) hypothesis = Hypothesis(grammar) print(hypothesis.get_energy())