def test_bracket_markings(self):
        self.initialise_segment_table("plural_english_segment_table.txt")
        from bracket_rule_transducer import BracketRuleTransducer
        from bracket_rule_transducer import LEFT_IDENTITY_BRACKET, RIGHT_BRACKETS, LEFT_BRACKETS, RIGHT_IDENTITY_BRACKET
        rule = Rule([{"cons": "+"}], [{"voice": "-"}], [], [{"voice": "-"}], False)
        word = 'zt'

        prologued_word_transducer = get_prologued_word(word)

        rule.extract_data_from_feature_bundle_lists()
        bracket_rule_transducer_factory = BracketRuleTransducer(rule)
        if rule.obligatory:
            obligatory_dfa = bracket_rule_transducer_factory._get_obligatory_dfa([LEFT_IDENTITY_BRACKET],
                                                                                 RIGHT_BRACKETS)
            obligatory_transducer = pyfst_from_dfa(obligatory_dfa)
            prologued_obligatory = safe_compose(prologued_word_transducer, obligatory_transducer)

            custom_obligatory_transducer = pyfst_from_dfa(
                bracket_rule_transducer_factory._get_obligatory_dfa([RIGHT_IDENTITY_BRACKET], LEFT_BRACKETS))
            prologued_obligatory = safe_compose(prologued_obligatory, custom_obligatory_transducer)

        else:
            prologued_obligatory = prologued_word_transducer

        right_context_transducer = bracket_rule_transducer_factory._get_right_context_dfa()
        prologued_obligatory_right = safe_compose(prologued_obligatory, right_context_transducer)

        left_context_transducer = bracket_rule_transducer_factory._get_left_context_dfa()
        prologued_obligatory_right_left = safe_compose(prologued_obligatory_right, left_context_transducer)
        print(get_transducer_outputs(prologued_obligatory_right_left, limit=10))

        replace_transducer = bracket_rule_transducer_factory.get_replace_transducer()
        prologued_obligatory_right_left_replace = safe_compose(prologued_obligatory_right_left, replace_transducer)
        print(get_transducer_outputs(prologued_obligatory_right_left_replace, limit=10))
Example #2
0
    def get_left_to_right_application(self):
        prologue_transducer = get_prologue_transducer()
        if self.obligatory or self._should_fix_transducer():
            obligatory_transducer = pyfst_from_dfa(
                self._get_obligatory_dfa([LEFT_IDENTITY_BRACKET],
                                         [RIGHT_IDENTITY_BRACKET]))
            composed_transducer = safe_compose(prologue_transducer,
                                               obligatory_transducer)
        else:
            composed_transducer = prologue_transducer
        right_context_transducer = self._get_right_context_dfa()
        replace_transducer = self.get_replace_transducer()
        replace_transducer = uniform_encoding.get_weighted_replace_transducer(
            replace_transducer, self)
        left_context_transducer = self._get_left_context_dfa()
        prologue_inverse_transducer = get_prologue_inverse_transducer()

        composed_transducer = chain_safe_compose(composed_transducer,
                                                 right_context_transducer,
                                                 replace_transducer,
                                                 left_context_transducer)

        if self.transformation_type == INSERTION:
            insertion_obligatory_transducer = pyfst_from_dfa(
                self._get_obligatory_dfa([RIGHT_IDENTITY_BRACKET],
                                         [LEFT_IDENTITY_BRACKET]))
            composed_transducer = safe_compose(
                composed_transducer, insertion_obligatory_transducer)

        # remove multiple paths
        if self.transformation_type == ASSIMILATION or self.transformation_type == INSERTION:
            RI_obligatory_transducer = pyfst_from_dfa(
                self._get_custom_obligatory_dfa([RIGHT_APPLICATION_BRACKET],
                                                [LEFT_IDENTITY_BRACKET]))
            composed_transducer = safe_compose(composed_transducer,
                                               RI_obligatory_transducer)
        if self.transformation_type == DELETION:
            JL_obligatory_transducer = pyfst_from_dfa(
                self._get_custom_obligatory_dfa([RIGHT_IDENTITY_BRACKET],
                                                [LEFT_APPLICATION_BRACKET]))
            composed_transducer = safe_compose(composed_transducer,
                                               JL_obligatory_transducer)

        composed_transducer = safe_compose(composed_transducer,
                                           prologue_inverse_transducer)
        if not self.obligatory:  # obligatory rules should be weighted only in the replace level
            if not self._should_fix_transducer(
            ):  # only non-right-context rules should be weighted this way
                composed_transducer = uniform_encoding.get_weighted_rule_transducer(
                    composed_transducer, self)
        return composed_transducer
Example #3
0
def get_ignore_dfa(sigma, language_dfa, ignored_set):
    new_sigma = sigma | ignored_set
    intro_transducer = get_intro_transducer(sigma, ignored_set)
    language_transducer = pyfst_from_dfa(language_dfa)
    composed_transducer = safe_compose(language_transducer, intro_transducer)
    language_dfa = pyfst_to_dfa(composed_transducer, new_sigma)
    return language_dfa
def get_prologued_word(word):
    from bracket_rule_transducer import get_prologue_transducer
    word_transducer = get_transducer_acceptor(word)
    prologue_transducer = get_prologue_transducer()
    prologued_word_transducer = safe_compose(word_transducer, prologue_transducer)
    prologued_word_transducer.remove_epsilon()
    return prologued_word_transducer
Example #5
0
    def get_left_to_right_application(self):
        prologue_transducer = get_prologue_transducer()
        if self.obligatory:
            obligatory_transducer = pyfst_from_dfa(
                self._get_obligatory_dfa([LEFT_IDENTITY_BRACKET],
                                         [RIGHT_IDENTITY_BRACKET]))
            composed_transducer = safe_compose(prologue_transducer,
                                               obligatory_transducer)
        else:
            composed_transducer = prologue_transducer

        right_context_transducer = self._get_right_context_dfa()
        replace_transducer = self.get_replace_transducer()
        left_context_transducer = self._get_left_context_dfa()
        prologue_inverse_transducer = get_prologue_inverse_transducer()

        composed_transducer = chain_safe_compose(composed_transducer,
                                                 right_context_transducer,
                                                 replace_transducer,
                                                 left_context_transducer)

        if self.transformation_type == INSERTION:
            insertion_obligatory_transducer = pyfst_from_dfa(
                self._get_obligatory_dfa([RIGHT_IDENTITY_BRACKET],
                                         [LEFT_IDENTITY_BRACKET]))
            composed_transducer = safe_compose(
                composed_transducer, insertion_obligatory_transducer)

        # remove multiple paths
        if self.transformation_type == ASSIMILATION or self.transformation_type == INSERTION:
            RI_obligatory_transducer = pyfst_from_dfa(
                self._get_custom_obligatory_dfa([RIGHT_APPLICATION_BRACKET],
                                                [LEFT_IDENTITY_BRACKET]))
            composed_transducer = safe_compose(composed_transducer,
                                               RI_obligatory_transducer)
        if self.transformation_type == DELETION:
            JL_obligatory_transducer = pyfst_from_dfa(
                self._get_custom_obligatory_dfa([RIGHT_IDENTITY_BRACKET],
                                                [LEFT_APPLICATION_BRACKET]))
            composed_transducer = safe_compose(composed_transducer,
                                               JL_obligatory_transducer)

        composed_transducer = safe_compose(composed_transducer,
                                           prologue_inverse_transducer)
        return composed_transducer
Example #6
0
    def get_outputs_of_word(self, word):
        word_transducer = get_transducer_acceptor(word)
        rule_set_transducer = self.get_transducer()
        if rule_set_transducer:
            dot(word_transducer, "word_transducer")
            dot(rule_set_transducer, "rule_set_transducer")
            word_rule_set_transducer = safe_compose(word_transducer,
                                                    rule_set_transducer)
            word_rule_set_transducer.remove_epsilon()
            dot(word_rule_set_transducer, "word_rule_set_transducer")
            if len(word_rule_set_transducer):
                try:
                    outputs = get_transducer_outputs(word_rule_set_transducer)
                except:
                    print("get_outputs_of_word failed with word: {}".format(
                        word))

            else:
                outputs = []
        else:
            outputs = get_transducer_outputs(word_transducer)
        return outputs
    def _rule_construction_helper(self, rule, word):

        from bracket_rule_transducer import get_prologue_inverse_transducer
        from bracket_rule_transducer import BracketRuleTransducer
        from bracket_rule_transducer import LEFT_IDENTITY_BRACKET, RIGHT_IDENTITY_BRACKET, RIGHT_APPLICATION_BRACKET, LEFT_APPLICATION_BRACKET
        remove_multiple_paths = False
        second_obligatory = True

        prologued_word_transducer = get_prologued_word(word)

        rule.extract_data_from_feature_bundle_lists()
        bracket_rule_transducer_factory = BracketRuleTransducer(rule)
        if rule.obligatory:
            obligatory_dfa = bracket_rule_transducer_factory._get_obligatory_dfa([LEFT_IDENTITY_BRACKET],
                                                                                 [RIGHT_IDENTITY_BRACKET])
            obligatory_transducer = pyfst_from_dfa(obligatory_dfa)
            prologued_obligatory = safe_compose(prologued_word_transducer, obligatory_transducer)
            if second_obligatory:
                custom_obligatory_transducer = pyfst_from_dfa(
                    bracket_rule_transducer_factory._get_obligatory_dfa([RIGHT_IDENTITY_BRACKET],
                                                                        [LEFT_IDENTITY_BRACKET]))
                prologued_obligatory = safe_compose(prologued_obligatory, custom_obligatory_transducer)

        else:
            prologued_obligatory = prologued_word_transducer

        right_context_transducer = bracket_rule_transducer_factory._get_right_context_dfa()
        prologued_obligatory_right = safe_compose(prologued_obligatory, right_context_transducer)

        replace_transducer = bracket_rule_transducer_factory.get_replace_transducer()
        prologued_obligatory_right_replace = safe_compose(prologued_obligatory_right, replace_transducer)

        left_context_transducer = bracket_rule_transducer_factory._get_left_context_dfa()
        prologued_obligatory_right_replace_left = safe_compose(prologued_obligatory_right_replace,
                                                               left_context_transducer)
        print(get_transducer_outputs(prologued_obligatory_right_replace_left, limit=10))

        # remove_multiple_paths
        if remove_multiple_paths:
            custom_obligatory_transducer = pyfst_from_dfa(
                bracket_rule_transducer_factory._get_custom_obligatory_dfa([RIGHT_APPLICATION_BRACKET],
                                                                           [LEFT_IDENTITY_BRACKET]))
            prologued_obligatory_right_replace_left = safe_compose(prologued_obligatory_right_replace_left,
                                                                   custom_obligatory_transducer)

            custom_obligatory_transducer = pyfst_from_dfa(
                bracket_rule_transducer_factory._get_custom_obligatory_dfa([RIGHT_IDENTITY_BRACKET],
                                                                           [LEFT_APPLICATION_BRACKET]))
            prologued_obligatory_right_replace_left = safe_compose(prologued_obligatory_right_replace_left,
                                                                   custom_obligatory_transducer)

        print(get_transducer_outputs(prologued_obligatory_right_replace_left, limit=10))

        prologue_inverse_transducer = get_prologue_inverse_transducer()
        prologued_obligatory_right_replace_left_inverse = safe_compose(prologued_obligatory_right_replace_left,
                                                                       prologue_inverse_transducer)
        prologued_obligatory_right_replace_left_inverse.remove_epsilon()
        transducer_outputs = get_transducer_outputs(prologued_obligatory_right_replace_left_inverse, limit=10)
        print(transducer_outputs)

        return transducer_outputs