def test_parser_kleene(self): hmm = HMM({ INITIAL_STATE: ['q1'], 'q1': (['q2', FINAL_STATE], ['at', 'attstktttt', 'st']), 'q2': ([FINAL_STATE], ['o']) }) hmm_transducer = hmm.get_transducer() self.write_to_dot_to_file(hmm_transducer, "test_hmm_transducer_kleene") assimilation_rule_with_kleene = Rule([{ "cons": "-" }], [{ "low": "+" }], [{ "cons": "-" }, { "cons": "+", "kleene": True }], [], obligatory=True) rule_set_with_kleene = RuleSet([assimilation_rule_with_kleene]) grammar = Grammar(hmm, rule_set_with_kleene) nfa = grammar.get_nfa() self.write_to_dot_to_file(nfa, "test_parser_nfa_kleene")
def get_energy(self, simulation_case): case_name = simulation_case.case_name configuration.configurations_dict["case_name"] = case_name if isinstance(simulation_case.hmm_dict, HMM): hmm = simulation_case.hmm_dict else: hmm = HMM(simulation_case.hmm_dict) if isinstance(simulation_case.flat_rule_set_list, RuleSet): rule_set = simulation_case.flat_rule_set_list else: rule_set_list = [] for flat_rule in simulation_case.flat_rule_set_list: rule_set_list.append(Rule(*flat_rule)) rule_set = RuleSet(rule_set_list) grammar = Grammar(hmm, rule_set) self.write_to_dot_to_file(hmm, "hmm_" + case_name) self.write_to_dot_to_file(grammar.get_nfa(), "grammar_nfa_" + case_name) hypothesis = Hypothesis(grammar, self.data) energy = hypothesis.get_energy() if self.target_energy: print("{}: {} distance from target: {}".format( case_name, hypothesis.get_recent_energy_signature(), energy - self.target_energy)) else: print("{}: {}".format(case_name, hypothesis.get_recent_energy_signature())) return energy
def get_energy(self, hmm, rule_set_list, case_name): grammar = Grammar(hmm, RuleSet(rule_set_list)) self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa") hypothesis = Hypothesis(grammar, self.data) energy = hypothesis.get_energy() print("{}: {}".format(case_name, hypothesis.get_recent_energy_signature())) return energy
def test_parser2(self): hmm = HMM({ INITIAL_STATE: ['q1'], 'q1': (['q2', FINAL_STATE], ['dog', 'kat']), 'q2': ([FINAL_STATE], ['z']) }) grammar = Grammar(hmm, self.plural_english_rule_set) nfa = grammar.get_nfa()
def test_parser(self): hmm_multiple_paths = HMM({ INITIAL_STATE: ['q1', 'q3'], 'q1': (['q2', FINAL_STATE], ['dog', 'kat', 'kats', 'dogz']), 'q2': ([FINAL_STATE], ['z']), 'q3': (['q3', FINAL_STATE], self.plural_english_segments) }) grammar = Grammar(hmm_multiple_paths, self.plural_english_rule_set) hypothesis = Hypothesis(grammar, ['dogz']) nfa = grammar.get_nfa() parses, outputs = nfa_parser(nfa, 'dogz') print(parses) print(outputs) nfa = grammar.get_nfa() self.write_to_dot_to_file(nfa, "test_parser_nfa")
def test_uniform_encoding_length(self): hmm_multiple_paths = HMM({ INITIAL_STATE: ['q1', 'q3'], 'q1': (['q2', FINAL_STATE], ['dog', 'kat', 'kats', 'dogz']), 'q2': ([FINAL_STATE], ['z']), 'q3': (['q3', FINAL_STATE], self.plural_english_segments) }) grammar = Grammar(hmm_multiple_paths, None) nfa = grammar.get_nfa() parse_path, output = nfa_parser(nfa, 'dogz') print(parse_path) print(output) nfa = grammar.get_nfa() self.write_to_dot_to_file(nfa, "test_parser_nfa") encoding_length = get_encoding_length(nfa, parse_path) print(encoding_length) assert (encoding_length == 4.0)
def test_plural_english_hypothesis(self): self.initialise_segment_table("plural_english_segment_table.txt") self.rule_set = self.get_rule_set("plural_english_rule_set.json") plural_english_data = 1 * ['kats', 'dogz', 'kat', 'dog'] hmm = HMM({INITIAL_STATE: ['q1'], 'q1': (['q2', FINAL_STATE], ['dog', 'kat']), 'q2': ([FINAL_STATE], ['z'])}) grammar = Grammar(hmm, self.rule_set) self.write_to_dot_file(self.rule_set.rules[0].get_transducer(), "plural_english_rule") self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa") self.configurations.simulation_data = plural_english_data hypothesis = Hypothesis(grammar) self.assertEqual(int(hypothesis.get_energy()), 117)
def test_parse_uniform_encoding(self): hmm_multiple_paths = HMM({ INITIAL_STATE: ['q1', 'q3'], 'q1': (['q2', FINAL_STATE], ['dog', 'kat', 'kats', 'dogz']), 'q2': ([FINAL_STATE], ['z']), 'q3': (['q3', FINAL_STATE], self.plural_english_segments) }) grammar = Grammar(hmm_multiple_paths, None) # hypothesis = Hypothesis(grammar, ['kats']) nfa = grammar.get_nfa() transducer = grammar.get_transducer() self.write_to_dot_to_file(nfa, "test_parser_nfa_uniform_encoding") parse_paths = nfa_parser_get_all_parses(nfa, 'dogz') print(parse_paths) print(parse_paths[2], get_encoding_length(nfa, parse_paths[2])) print(get_shortest_encoding_length(nfa, parse_paths)) assert (len(parse_paths) == 3)
def test_abnese(self): self.initialise_segment_table("ab_segment_table.txt") self.configurations["BRACKET_TRANSDUCER"] = True data = ['bab', 'aabab'] hmm = HMM( {'q0': ['q1'], 'q1': (['qf'], ['bb', 'aabb']) }) rule = Rule([], [{"cons": "-"}], [{"cons": "+"}], [{"cons": "+"}], False) # e->a / b_b rule_set = RuleSet([rule]) print(rule_set.get_outputs_of_word("bb")) grammar = Grammar(hmm, rule_set) self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa") self.configurations.simulation_data = data hypothesis = Hypothesis(grammar) print(hypothesis.get_energy()) print(hypothesis.get_recent_energy_signature())
def test_katso_two_rule(self): #configurations["DATA_ENCODING_LENGTH_MULTIPLIER"] = 25 self.initialise_segment_table("plural_english_segment_table.txt") data = ['kat', 'dot', 'dag', 'kod', 'gas', 'toz'] + \ ['katso', 'dotso', 'dagzo', 'kodzo', 'gasazo', 'tozazo'] + \ ['katko', 'dotko', 'daggo', 'kodgo', 'gasko', 'tozgo'] + \ ['katto', 'dotto', 'dagdo', 'koddo', 'gasto', 'tozdo'] hmm = {'q0': ['q1'], 'q1': (['q2', 'qf'], ['dag', 'kat', 'dot', 'kod', 'gas', 'toz']), 'q2': (['qf'], ['zo', 'go', 'do'])} epenthesis_rule = Rule.load([[], [{"cons": "-", "low": "+"}], [{"cons": "+", "cont": "+"}], [{"cons": "+", "cont": "+"}], True]) assimilation_rule = Rule.load([[{"cons": "+"}], [{"voice": "-"}], [{"voice": "-"}], [], True]) rule_set = RuleSet([epenthesis_rule, assimilation_rule]) hmm = HMM(hmm) grammar = Grammar(hmm, rule_set) self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa") self.configurations.simulation_data = data hypothesis = Hypothesis(grammar) self.assertEqual(int(hypothesis.get_energy()), 364)