def test_parser_kleene(self):
        hmm = HMM({
            INITIAL_STATE: ['q1'],
            'q1': (['q2', FINAL_STATE], ['at', 'attstktttt', 'st']),
            'q2': ([FINAL_STATE], ['o'])
        })

        hmm_transducer = hmm.get_transducer()
        self.write_to_dot_to_file(hmm_transducer, "test_hmm_transducer_kleene")

        assimilation_rule_with_kleene = Rule([{
            "cons": "-"
        }], [{
            "low": "+"
        }], [{
            "cons": "-"
        }, {
            "cons": "+",
            "kleene": True
        }], [],
                                             obligatory=True)

        rule_set_with_kleene = RuleSet([assimilation_rule_with_kleene])
        grammar = Grammar(hmm, rule_set_with_kleene)

        nfa = grammar.get_nfa()
        self.write_to_dot_to_file(nfa, "test_parser_nfa_kleene")
 def get_energy(self, simulation_case):
     case_name = simulation_case.case_name
     configuration.configurations_dict["case_name"] = case_name
     if isinstance(simulation_case.hmm_dict, HMM):
         hmm = simulation_case.hmm_dict
     else:
         hmm = HMM(simulation_case.hmm_dict)
     if isinstance(simulation_case.flat_rule_set_list, RuleSet):
         rule_set = simulation_case.flat_rule_set_list
     else:
         rule_set_list = []
         for flat_rule in simulation_case.flat_rule_set_list:
             rule_set_list.append(Rule(*flat_rule))
         rule_set = RuleSet(rule_set_list)
     grammar = Grammar(hmm, rule_set)
     self.write_to_dot_to_file(hmm, "hmm_" + case_name)
     self.write_to_dot_to_file(grammar.get_nfa(),
                               "grammar_nfa_" + case_name)
     hypothesis = Hypothesis(grammar, self.data)
     energy = hypothesis.get_energy()
     if self.target_energy:
         print("{}: {} distance from target: {}".format(
             case_name, hypothesis.get_recent_energy_signature(),
             energy - self.target_energy))
     else:
         print("{}: {}".format(case_name,
                               hypothesis.get_recent_energy_signature()))
     return energy
 def get_energy(self, hmm, rule_set_list, case_name):
     grammar = Grammar(hmm, RuleSet(rule_set_list))
     self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa")
     hypothesis = Hypothesis(grammar, self.data)
     energy = hypothesis.get_energy()
     print("{}: {}".format(case_name, hypothesis.get_recent_energy_signature()))
     return energy
 def test_parser2(self):
     hmm = HMM({
         INITIAL_STATE: ['q1'],
         'q1': (['q2', FINAL_STATE], ['dog', 'kat']),
         'q2': ([FINAL_STATE], ['z'])
     })
     grammar = Grammar(hmm, self.plural_english_rule_set)
     nfa = grammar.get_nfa()
    def test_parser(self):
        hmm_multiple_paths = HMM({
            INITIAL_STATE: ['q1', 'q3'],
            'q1': (['q2', FINAL_STATE], ['dog', 'kat', 'kats', 'dogz']),
            'q2': ([FINAL_STATE], ['z']),
            'q3': (['q3', FINAL_STATE], self.plural_english_segments)
        })

        grammar = Grammar(hmm_multiple_paths, self.plural_english_rule_set)
        hypothesis = Hypothesis(grammar, ['dogz'])
        nfa = grammar.get_nfa()
        parses, outputs = nfa_parser(nfa, 'dogz')
        print(parses)
        print(outputs)

        nfa = grammar.get_nfa()

        self.write_to_dot_to_file(nfa, "test_parser_nfa")
    def test_uniform_encoding_length(self):
        hmm_multiple_paths = HMM({
            INITIAL_STATE: ['q1', 'q3'],
            'q1': (['q2', FINAL_STATE], ['dog', 'kat', 'kats', 'dogz']),
            'q2': ([FINAL_STATE], ['z']),
            'q3': (['q3', FINAL_STATE], self.plural_english_segments)
        })

        grammar = Grammar(hmm_multiple_paths, None)
        nfa = grammar.get_nfa()
        parse_path, output = nfa_parser(nfa, 'dogz')
        print(parse_path)
        print(output)

        nfa = grammar.get_nfa()
        self.write_to_dot_to_file(nfa, "test_parser_nfa")
        encoding_length = get_encoding_length(nfa, parse_path)
        print(encoding_length)
        assert (encoding_length == 4.0)
    def test_plural_english_hypothesis(self):
        self.initialise_segment_table("plural_english_segment_table.txt")
        self.rule_set = self.get_rule_set("plural_english_rule_set.json")
        plural_english_data = 1 * ['kats', 'dogz', 'kat', 'dog']
        hmm = HMM({INITIAL_STATE: ['q1'],
                 'q1': (['q2', FINAL_STATE], ['dog', 'kat']),
                 'q2': ([FINAL_STATE], ['z'])})

        grammar = Grammar(hmm, self.rule_set)
        self.write_to_dot_file(self.rule_set.rules[0].get_transducer(), "plural_english_rule")
        self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa")
        self.configurations.simulation_data = plural_english_data
        hypothesis = Hypothesis(grammar)
        self.assertEqual(int(hypothesis.get_energy()), 117)
    def test_parse_uniform_encoding(self):
        hmm_multiple_paths = HMM({
            INITIAL_STATE: ['q1', 'q3'],
            'q1': (['q2', FINAL_STATE], ['dog', 'kat', 'kats', 'dogz']),
            'q2': ([FINAL_STATE], ['z']),
            'q3': (['q3', FINAL_STATE], self.plural_english_segments)
        })

        grammar = Grammar(hmm_multiple_paths, None)
        # hypothesis = Hypothesis(grammar, ['kats'])
        nfa = grammar.get_nfa()
        transducer = grammar.get_transducer()
        self.write_to_dot_to_file(nfa, "test_parser_nfa_uniform_encoding")
        parse_paths = nfa_parser_get_all_parses(nfa, 'dogz')
        print(parse_paths)
        print(parse_paths[2], get_encoding_length(nfa, parse_paths[2]))
        print(get_shortest_encoding_length(nfa, parse_paths))

        assert (len(parse_paths) == 3)
    def test_abnese(self):
        self.initialise_segment_table("ab_segment_table.txt")
        self.configurations["BRACKET_TRANSDUCER"] = True
        data = ['bab', 'aabab']

        hmm = HMM( {'q0': ['q1'],
              'q1': (['qf'], ['bb', 'aabb'])
              })
        rule = Rule([], [{"cons": "-"}], [{"cons": "+"}], [{"cons": "+"}], False)  # e->a / b_b
        rule_set = RuleSet([rule])

        print(rule_set.get_outputs_of_word("bb"))

        grammar = Grammar(hmm, rule_set)
        self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa")
        self.configurations.simulation_data = data
        hypothesis = Hypothesis(grammar)

        print(hypothesis.get_energy())
        print(hypothesis.get_recent_energy_signature())
    def test_katso_two_rule(self):
        #configurations["DATA_ENCODING_LENGTH_MULTIPLIER"] = 25
        self.initialise_segment_table("plural_english_segment_table.txt")
        data = ['kat', 'dot',     'dag', 'kod',     'gas', 'toz'] + \
               ['katso', 'dotso', 'dagzo', 'kodzo', 'gasazo', 'tozazo'] + \
               ['katko', 'dotko', 'daggo', 'kodgo', 'gasko', 'tozgo'] + \
               ['katto', 'dotto', 'dagdo', 'koddo', 'gasto', 'tozdo']

        hmm = {'q0': ['q1'],
              'q1': (['q2', 'qf'], ['dag', 'kat', 'dot', 'kod', 'gas', 'toz']),
              'q2': (['qf'], ['zo', 'go', 'do'])}

        epenthesis_rule = Rule.load([[], [{"cons": "-", "low": "+"}], [{"cons": "+", "cont": "+"}], [{"cons": "+", "cont": "+"}], True])
        assimilation_rule = Rule.load([[{"cons": "+"}], [{"voice": "-"}], [{"voice": "-"}], [], True])
        rule_set = RuleSet([epenthesis_rule, assimilation_rule])
        hmm = HMM(hmm)
        grammar = Grammar(hmm, rule_set)
        self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa")
        self.configurations.simulation_data = data
        hypothesis = Hypothesis(grammar)
        self.assertEqual(int(hypothesis.get_energy()), 364)