def test_crossover(self):
        from copy import deepcopy

        rule_1 = Rule.load([[{
            'cont': '+'
        }], [{
            'coronal': '-'
        }], [{
            'coronal': '-'
        }], [], True])
        rule_2 = Rule.load([[{
            'cons': '+',
            'low': '-'
        }], [{
            'voice': '-'
        }], [{
            'voice': '-'
        }], [], True])

        crossover_rule_1 = deepcopy(rule_1)
        crossover_rule_2 = deepcopy(rule_2)
        crossover_rule_1.left_context_feature_bundle_list = rule_2.left_context_feature_bundle_list
        crossover_rule_1.right_context_feature_bundle_list = rule_2.right_context_feature_bundle_list
        crossover_rule_1.change_feature_bundle_list = rule_2.change_feature_bundle_list

        crossover_rule_2.left_context_feature_bundle_list = rule_1.left_context_feature_bundle_list
        crossover_rule_2.right_context_feature_bundle_list = rule_1.right_context_feature_bundle_list
        crossover_rule_2.change_feature_bundle_list = rule_1.change_feature_bundle_list

        rule_set_1 = RuleSet([crossover_rule_1])
        rule_set_2 = RuleSet([crossover_rule_2])
        print(rule_set_1)
        print(rule_set_2)

        hmm = HMM({
            'q0': ['q1'],
            'q1': (['q2', 'qf'], ['dag', 'kat', 'dot', 'kod']),
            'q2': (['qf'], ['zo', 'go', 'do'])
        })
        grammar_1 = Grammar(hmm, rule_set_1)
        grammar_2 = Grammar(hmm, rule_set_2)

        data = ['kat', 'dot',     'dag', 'kod'] + \
               ['katso', 'dotso', 'dagzo', 'kodzo'] + \
               ['katko', 'dotko', 'daggo', 'kodgo'] + \
               ['katto', 'dotto', 'dagdo', 'koddo']

        hypothesis_1 = Hypothesis(grammar_1, data)
        hypothesis_2 = Hypothesis(grammar_2, data)

        print(hypothesis_1.get_energy())
        print(hypothesis_2.get_energy())
예제 #2
0
 def get_energy(self, hmm, rule_set_list, case_name):
     grammar = Grammar(hmm, RuleSet(rule_set_list))
     hypothesis = Hypothesis(grammar, self.data)
     energy = hypothesis.get_energy()
     print("{}: {}".format(case_name,
                           hypothesis.get_recent_energy_signature()))
     return energy
    def test_opacity_two_hypotheses(self):
        from simulations import dag_zook_opacity as simulation
        self.initialise_simulation(simulation)
        hmm = HMM({
            'q0': ['q1'],
            'q1': (['q2', 'q3'], [
                'daot', 'dkoz', 'dog', 'dok', 'gdaas', 'gkas', 'kaos', 'kat',
                'kood', 'ksoag', 'ogtd', 'oktdo', 'skaz', 'tak', 'tso'
            ]),
            'q2': (['qf'], ['go', 'kazka', 'soka', 'ta', EPSILON]),
            'q3': (['qf'], ['da', 'saat', 'tsk', 'zoka'])
        })

        epenthesis_rule = Rule([], [{
            'low': '+'
        }], [{
            'coronal': '+'
        }], [{
            'coronal': '+'
        }], True)
        assimilation_rule = Rule([{
            'cons': '+'
        }], [{
            'voice': '-'
        }], [{
            'voice': '-'
        }], [], True)

        rule_set = RuleSet([assimilation_rule, epenthesis_rule])
        grammar = Grammar(hmm, rule_set)
        hypothesis = Hypothesis(grammar)
        print(hypothesis.get_energy())
 def get_energy(self, simulation_case):
     case_name = simulation_case.case_name
     configuration.configurations_dict["case_name"] = case_name
     if isinstance(simulation_case.hmm_dict, HMM):
         hmm = simulation_case.hmm_dict
     else:
         hmm = HMM(simulation_case.hmm_dict)
     if isinstance(simulation_case.flat_rule_set_list, RuleSet):
         rule_set = simulation_case.flat_rule_set_list
     else:
         rule_set_list = []
         for flat_rule in simulation_case.flat_rule_set_list:
             rule_set_list.append(Rule(*flat_rule))
         rule_set = RuleSet(rule_set_list)
     grammar = Grammar(hmm, rule_set)
     self.write_to_dot_to_file(hmm, "hmm_" + case_name)
     self.write_to_dot_to_file(grammar.get_nfa(),
                               "grammar_nfa_" + case_name)
     hypothesis = Hypothesis(grammar, self.data)
     energy = hypothesis.get_energy()
     if self.target_energy:
         print("{}: {} distance from target: {}".format(
             case_name, hypothesis.get_recent_energy_signature(),
             energy - self.target_energy))
     else:
         print("{}: {}".format(case_name,
                               hypothesis.get_recent_energy_signature()))
     return energy
    def test_turkish_blah(self):
        self.initialise_simulation(turkish_vowel_harmony_new_weights)
        Q2s = [
            'in', 'ler', 'siz', 'i', 'ten', 'sel', 'lik', 'li', 'e', EPSILON
        ]
        hmm_dict = {
            'q0': ['q1'],
            'q1': (['q2'], [
                'el', 'j1l', 'ek', 'ip', 'renk', 'son', 'et', 'josun', 'kedi',
                'kent', 'k0j', 'k0k', 'sokak', 'tuz', 'dal', 'gyn', 'kirpi',
                'k1z', 's1rtlan', 'g0z', 'kurt', 'aj', 'arp'
            ]),
            'q2': (['qf'], Q2s),
        }
        some_hmm = HMM(deepcopy(hmm_dict))
        some_rules = RuleSet([
            Rule([{
                "syll": "+"
            }], [{
                "back": "+"
            }], [{
                "cont": "+",
                "back": "+"
            }, {
                "syll": "-",
                "kleene": True
            }], [], True)
        ])

        some_hypo = Hypothesis(Grammar(some_hmm, some_rules))

        #
        self.assert_equal_no_infs(self.get_target_hypo().get_energy(),
                                  some_hypo.get_energy())
    def test_abadnese(self):
        self.initialise_segment_table("abd_segment_table.txt")
        data = [
            'bbabbba', 'baabbba', 'babbbba', 'bbabadba', 'baabadba',
            'babbadba', 'bbabbad', 'baabbad', 'babbbad', 'bbabadad',
            'baabadad', 'babbadad', 'bbabbab', 'baabbab', 'babbbab',
            'bbabadab', 'baabadab', 'babbadab'
        ]

        hmm = HMM({
            'q0': ['q1'],
            'q1': (['q2',
                    'qf'], ['bbab', 'baab', 'babb', 'bbaba', 'baaba',
                            'babba']),
            'q2': (['qf'], ['dba', 'dad', 'dab'])
        })
        rule = Rule.load([[{
            "cons": "+"
        }], [{
            "labial": "+"
        }], [{
            "labial": "+"
        }], [], True])
        rule_set = RuleSet([rule])

        grammar = Grammar(hmm, rule_set)
        hypothesis = Hypothesis(grammar, data)
        self.assertEqual(hypothesis.get_energy(), 245)
    def test_turkish__only_syll_is_the_correct_context(self):
        self.initialise_simulation(turkish_vowel_harmony_new_weights)

        # +syll --> +back
        hmm_dict = {
            'q0': ['q1'],
            'q1': (['q2'], [
                'el', 'j1l', 'ek', 'ip', 'renk', 'son', 'et', 'josun', 'kedi',
                'kent', 'k0j', 'k0k', 'sokak', 'tuz', 'dal', 'gyn', 'kirpi',
                'k1z', 's1rtlan', 'g0z', 'kurt', 'aj', 'arp'
            ]),
            'q2': (['qf'], [
                'in', 'ler', 'siz', 'i', 'ten', 'sel', 'lik', 'li', 'e',
                EPSILON
            ]),
        }
        rule_change = ([{"syll": "+"}], [{"back": "+"}])

        # +syll --> -back
        hmm_dict2 = {
            'q0': ['q1'],
            'q1': (['q2'], [
                'el', 'j1l', 'ek', 'ip', 'renk', 'son', 'et', 'josun', 'kedi',
                'kent', 'k0j', 'k0k', 'sokak', 'tuz', 'dal', 'gyn', 'kirpi',
                'k1z', 's1rtlan', 'g0z', 'kurt', 'aj', 'arp'
            ]),
            'q2': (['qf'], [
                '1n', 'lar', 's1z', '1', 'tan', 'sal', 'l1k', 'l1', 'a',
                EPSILON
            ]),
        }
        rule_change2 = ([{"syll": "+"}], [{"back": "-"}])

        target_energy = self.get_target_hypo().get_energy()
        unexpexted_context = []
        for feat in 'syll,back,round,high,voice,cont,lateral,son'.split(','):
            for val in ['+', '-']:
                if (feat, val) == ('syll', '-'):
                    continue
                for r, change in enumerate([rule_change, rule_change2],
                                           start=1):
                    for h, hmm in enumerate([hmm_dict, hmm_dict2], start=1):
                        some_hmm = HMM(deepcopy(hmm))
                        rule = change + ([{
                            "syll": "+",
                            "back": change[1][0]['back']
                        }, {
                            feat: val,
                            "kleene": True
                        }], [], True)
                        some_rules = RuleSet([Rule(*rule)])
                        some_hypo = Hypothesis(Grammar(some_hmm, some_rules))
                        if some_hypo.get_energy() <= target_energy:
                            unexpexted_context.append(
                                {f"hmm{h} rule {r}": {
                                    feat: val
                                }})

        assert unexpexted_context == [], f"Unexpected kleene context for rule: {unexpexted_context}"
 def get_energy(self, hmm, rule_set_list, case_name):
     grammar = Grammar(hmm, RuleSet(rule_set_list))
     self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa_" + case_name)
     hypothesis = Hypothesis(grammar, self.data)
     energy = hypothesis.get_energy()
     print("{}: {}".format(case_name,
                           hypothesis.get_recent_energy_signature()))
     return energy
    def test_plural_english_hypothesis(self):
        self.initialise_segment_table("plural_english_segment_table.txt")
        self.rule_set = self.get_rule_set("plural_english_rule_set.json")
        plural_english_data = 1 * ['kats', 'dogz', 'kat', 'dog']
        hmm = HMM({INITIAL_STATE: ['q1'],
                 'q1': (['q2', FINAL_STATE], ['dog', 'kat']),
                 'q2': ([FINAL_STATE], ['z'])})

        grammar = Grammar(hmm, self.rule_set)
        self.write_to_dot_file(self.rule_set.rules[0].get_transducer(), "plural_english_rule")
        self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa")
        self.configurations.simulation_data = plural_english_data
        hypothesis = Hypothesis(grammar)
        self.assertEqual(int(hypothesis.get_energy()), 117)
    def test_abadnese_no_rule(self):
        self.initialise_segment_table("abd_segment_table.txt")
        data = ['bbabbba', 'baabbba', 'babbbba', 'bbabadba', 'baabadba', 'babbadba', 'bbabbad', 'baabbad',
        'babbbad', 'bbabadad', 'baabadad', 'babbadad', 'bbabbab', 'baabbab', 'babbbab', 'bbabadab',
        'baabadab', 'babbadab']

        hmm = HMM({'q0': ['q1'],
              'q1': (['q2'], ['bbab', 'baab', 'babb', 'bbaba', 'baaba', 'babba']),
              'q2': (['qf'], ['dba', 'dad', 'dab', 'bba', 'bad', 'bab'])})

        grammar = Grammar(hmm, [])
        self.configurations.simulation_data = data
        hypothesis = Hypothesis(grammar)
        self.assertEqual(int(hypothesis.get_energy()), 243)
    def test_assimilation_no_rule(self):
        self.initialise_segment_table("plural_english_segment_table.txt")
        data = ['kat', 'dot',     'dag', 'kod'] + \
               ['katso', 'dotso', 'dagzo', 'kodzo'] + \
               ['katko', 'dotko', 'daggo', 'kodgo'] + \
               ['katto', 'dotto', 'dagdo', 'koddo']

        hmm = HMM({'q0': ['q1'],
              'q1': (['q2', 'qf'], ['dag', 'kat', 'dot', 'kod']),
              'q2': (['qf'], ['zo', 'go', 'do', 'to', 'so', 'ko'])
               })

        grammar = Grammar(hmm, [])

        hypothesis = Hypothesis(grammar)
        self.configurations.simulation_data = data
        self.assertEqual(int(hypothesis.get_energy()), 230)
    def test_assimilation2(self):
        self.initialise_segment_table("plural_english_segment_table.txt")
        self.rule_set = self.get_rule_set("plural_english_rule_set.json")
        data = ['kat', 'dot',     'dag', 'kod'] + \
               ['katso', 'dotso', 'dagzo', 'kodzo'] + \
               ['katko', 'dotko', 'daggo', 'kodgo'] + \
               ['katto', 'dotto', 'dagdo', 'koddo']

        hmm = HMM({'q0': ['q1'],
              'q1': (['q2', 'qf'], ['dag', 'kat', 'dot', 'kod']),
              'q2': (['qf'], ['zo', 'go', 'do'])
               })

        grammar = Grammar(hmm, self.rule_set)
        self.configurations.simulation_data = data
        hypothesis = Hypothesis(grammar)
        for _ in range(10):  #1.4
            energy = hypothesis.get_energy()
    def test_abadnese_no_rule(self):
        self.initialise_segment_table("abd_segment_table.txt")
        data = [
            'bbabbba', 'baabbba', 'babbbba', 'bbabadba', 'baabadba',
            'babbadba', 'bbabbad', 'baabbad', 'babbbad', 'bbabadad',
            'baabadad', 'babbadad', 'bbabbab', 'baabbab', 'babbbab',
            'bbabadab', 'baabadab', 'babbadab'
        ]

        hmm = HMM({
            'q0': ['q1'],
            'q1': (['q2',
                    'qf'], ['bbab', 'baab', 'babb', 'bbaba', 'baaba',
                            'babba']),
            'q2': (['qf'], ['dba', 'dad', 'dab', 'bba', 'bad', 'bab'])
        })

        grammar = Grammar(hmm, [])
        hypothesis = Hypothesis(grammar, data)
        self.assertEqual(hypothesis.get_energy(), 252)
    def test_abnese(self):
        self.initialise_segment_table("ab_segment_table.txt")
        self.configurations["BRACKET_TRANSDUCER"] = True
        data = ['bab', 'aabab']

        hmm = HMM( {'q0': ['q1'],
              'q1': (['qf'], ['bb', 'aabb'])
              })
        rule = Rule([], [{"cons": "-"}], [{"cons": "+"}], [{"cons": "+"}], False)  # e->a / b_b
        rule_set = RuleSet([rule])

        print(rule_set.get_outputs_of_word("bb"))

        grammar = Grammar(hmm, rule_set)
        self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa")
        self.configurations.simulation_data = data
        hypothesis = Hypothesis(grammar)

        print(hypothesis.get_energy())
        print(hypothesis.get_recent_energy_signature())
    def test_katso_two_rule(self):
        #configurations["DATA_ENCODING_LENGTH_MULTIPLIER"] = 25
        self.initialise_segment_table("plural_english_segment_table.txt")
        data = ['kat', 'dot',     'dag', 'kod',     'gas', 'toz'] + \
               ['katso', 'dotso', 'dagzo', 'kodzo', 'gasazo', 'tozazo'] + \
               ['katko', 'dotko', 'daggo', 'kodgo', 'gasko', 'tozgo'] + \
               ['katto', 'dotto', 'dagdo', 'koddo', 'gasto', 'tozdo']

        hmm = {'q0': ['q1'],
              'q1': (['q2', 'qf'], ['dag', 'kat', 'dot', 'kod', 'gas', 'toz']),
              'q2': (['qf'], ['zo', 'go', 'do'])}

        epenthesis_rule = Rule.load([[], [{"cons": "-", "low": "+"}], [{"cons": "+", "cont": "+"}], [{"cons": "+", "cont": "+"}], True])
        assimilation_rule = Rule.load([[{"cons": "+"}], [{"voice": "-"}], [{"voice": "-"}], [], True])
        rule_set = RuleSet([epenthesis_rule, assimilation_rule])
        hmm = HMM(hmm)
        grammar = Grammar(hmm, rule_set)
        self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa")
        self.configurations.simulation_data = data
        hypothesis = Hypothesis(grammar)
        self.assertEqual(int(hypothesis.get_energy()), 364)
    'q1': ['q2'],
    'q2': [FINAL_STATE]
}
target_hmm_emissions = {
    'q1': [prefix],
    'q2': replace_bab_with_bb_for_everty_word(target_stems)
}
target_hmm_inner_states = ['q1', 'q2']
target_hmm = HMM(target_hmm_transitions, target_hmm_emissions,
                 target_hmm_inner_states)
# dot(target_hmm, 'complex_morphology_hmm')
target_lexicon = Lexicon([], max_word_length_in_data, initial_hmm=target_hmm)
dot(target_hmm, 'abnese_target')
target_grammar = Grammar(target_constraint_set, target_lexicon)
target_hypothesis = Hypothesis(target_grammar, data)
target_energy = target_hypothesis.get_energy()
print(f"target hypothesis: {target_hypothesis.get_recent_energy_signature()}")
print_empty_line()

#words initial hypothesis

words_initial_lexicon = Lexicon(data,
                                max_word_length_in_data,
                                alphabet_or_words="words")
dot(words_initial_lexicon.hmm, 'abnese_initial_hmm')
words_initial_grammar = Grammar(faith_constraint_set, words_initial_lexicon)
words_initial_hypothesis = Hypothesis(words_initial_grammar, data)
words_initial_energy = words_initial_hypothesis.get_energy()
print(
    f"words initial hypothesis: {words_initial_hypothesis.get_recent_energy_signature()}"
)
예제 #17
0
target_stems_and_suffixes = get_target_stems_and_suffixes(target_stems, surface_suffixes)


corpus = target_stems + target_stems_and_suffixes
corpus = Corpus(corpus)


data = corpus.get_words()
max_word_length_in_data = max([len(word) for word in data])

#initial hypothesis

initial_lexicon = Lexicon(data, max_word_length_in_data)
initial_grammar = Grammar(initial_constraint_set, initial_lexicon)
initial_hypothesis = Hypothesis(initial_grammar, data)
initial_energy = initial_hypothesis.get_energy()
print(f"initial hypothesis: {initial_hypothesis.get_recent_energy_signature()}")
print(f"initial energy: {initial_energy}")
print_empty_line()


target_hmm_transitions = {INITIAL_STATE: ["q1"],
                          "q1": ["q2", FINAL_STATE],
                          "q2": [FINAL_STATE]}

target_hmm_emissions = {"q1": target_stems,
                        "q2": ["kun"]}

target_hmm_inner_states = ["q1", "q2"]
target_hmm = HMM(target_hmm_transitions, target_hmm_emissions, target_hmm_inner_states)
target_lexicon = Lexicon([], max_word_length_in_data, initial_hmm=target_hmm)