コード例 #1
0
    def test_bracket_markings(self):
        self.initialise_segment_table("plural_english_segment_table.txt")
        from bracket_rule_transducer import BracketRuleTransducer
        from bracket_rule_transducer import LEFT_IDENTITY_BRACKET, RIGHT_BRACKETS, LEFT_BRACKETS, RIGHT_IDENTITY_BRACKET
        rule = Rule([{"cons": "+"}], [{"voice": "-"}], [], [{"voice": "-"}], False)
        word = 'zt'

        prologued_word_transducer = get_prologued_word(word)

        rule.extract_data_from_feature_bundle_lists()
        bracket_rule_transducer_factory = BracketRuleTransducer(rule)
        if rule.obligatory:
            obligatory_dfa = bracket_rule_transducer_factory._get_obligatory_dfa([LEFT_IDENTITY_BRACKET],
                                                                                 RIGHT_BRACKETS)
            obligatory_transducer = pyfst_from_dfa(obligatory_dfa)
            prologued_obligatory = safe_compose(prologued_word_transducer, obligatory_transducer)

            custom_obligatory_transducer = pyfst_from_dfa(
                bracket_rule_transducer_factory._get_obligatory_dfa([RIGHT_IDENTITY_BRACKET], LEFT_BRACKETS))
            prologued_obligatory = safe_compose(prologued_obligatory, custom_obligatory_transducer)

        else:
            prologued_obligatory = prologued_word_transducer

        right_context_transducer = bracket_rule_transducer_factory._get_right_context_dfa()
        prologued_obligatory_right = safe_compose(prologued_obligatory, right_context_transducer)

        left_context_transducer = bracket_rule_transducer_factory._get_left_context_dfa()
        prologued_obligatory_right_left = safe_compose(prologued_obligatory_right, left_context_transducer)
        print(get_transducer_outputs(prologued_obligatory_right_left, limit=10))

        replace_transducer = bracket_rule_transducer_factory.get_replace_transducer()
        prologued_obligatory_right_left_replace = safe_compose(prologued_obligatory_right_left, replace_transducer)
        print(get_transducer_outputs(prologued_obligatory_right_left_replace, limit=10))
コード例 #2
0
    def get_left_to_right_application(self):
        prologue_transducer = get_prologue_transducer()
        if self.obligatory or self._should_fix_transducer():
            obligatory_transducer = pyfst_from_dfa(
                self._get_obligatory_dfa([LEFT_IDENTITY_BRACKET],
                                         [RIGHT_IDENTITY_BRACKET]))
            composed_transducer = safe_compose(prologue_transducer,
                                               obligatory_transducer)
        else:
            composed_transducer = prologue_transducer
        right_context_transducer = self._get_right_context_dfa()
        replace_transducer = self.get_replace_transducer()
        replace_transducer = uniform_encoding.get_weighted_replace_transducer(
            replace_transducer, self)
        left_context_transducer = self._get_left_context_dfa()
        prologue_inverse_transducer = get_prologue_inverse_transducer()

        composed_transducer = chain_safe_compose(composed_transducer,
                                                 right_context_transducer,
                                                 replace_transducer,
                                                 left_context_transducer)

        if self.transformation_type == INSERTION:
            insertion_obligatory_transducer = pyfst_from_dfa(
                self._get_obligatory_dfa([RIGHT_IDENTITY_BRACKET],
                                         [LEFT_IDENTITY_BRACKET]))
            composed_transducer = safe_compose(
                composed_transducer, insertion_obligatory_transducer)

        # remove multiple paths
        if self.transformation_type == ASSIMILATION or self.transformation_type == INSERTION:
            RI_obligatory_transducer = pyfst_from_dfa(
                self._get_custom_obligatory_dfa([RIGHT_APPLICATION_BRACKET],
                                                [LEFT_IDENTITY_BRACKET]))
            composed_transducer = safe_compose(composed_transducer,
                                               RI_obligatory_transducer)
        if self.transformation_type == DELETION:
            JL_obligatory_transducer = pyfst_from_dfa(
                self._get_custom_obligatory_dfa([RIGHT_IDENTITY_BRACKET],
                                                [LEFT_APPLICATION_BRACKET]))
            composed_transducer = safe_compose(composed_transducer,
                                               JL_obligatory_transducer)

        composed_transducer = safe_compose(composed_transducer,
                                           prologue_inverse_transducer)
        if not self.obligatory:  # obligatory rules should be weighted only in the replace level
            if not self._should_fix_transducer(
            ):  # only non-right-context rules should be weighted this way
                composed_transducer = uniform_encoding.get_weighted_rule_transducer(
                    composed_transducer, self)
        return composed_transducer
コード例 #3
0
def get_sigma_transducer_for_intro(sigma):
    sigma_key = "".join(sorted(list(sigma)))
    if sigma_key not in sigma_transducer_dict:
        sigma_regex = "({})".format("+".join(sigma))
        sigma_dfa = get_dfa_from_regex(sigma_regex, sigma=sigma)
        sigma_transducer_dict[sigma_key] = pyfst_from_dfa(sigma_dfa)
    return sigma_transducer_dict[sigma_key]
コード例 #4
0
def get_ignore_dfa(sigma, language_dfa, ignored_set):
    new_sigma = sigma | ignored_set
    intro_transducer = get_intro_transducer(sigma, ignored_set)
    language_transducer = pyfst_from_dfa(language_dfa)
    composed_transducer = safe_compose(language_transducer, intro_transducer)
    language_dfa = pyfst_to_dfa(composed_transducer, new_sigma)
    return language_dfa
コード例 #5
0
    def get_replace_transducer(self):
        transducer_symbol_table = SegmentTable().transducer_symbol_table
        inner_replace_transducer = fst.Transducer(
            isyms=transducer_symbol_table, osyms=transducer_symbol_table)
        for segment1, segment2 in self.target_change_tuples_list:
            inner_replace_transducer.add_arc(0, 1, segment1, segment2)
        inner_replace_transducer[1].final = True
        inner_replace_transducer_ignore_brackets = [
            LEFT_CENTER_BRACKET, RIGHT_CENTER_BRACKET
        ]

        for bracket in inner_replace_transducer_ignore_brackets:
            inner_replace_transducer.add_arc(0, 0, bracket, bracket)
            inner_replace_transducer.add_arc(1, 1, bracket, bracket)

        opt_part = left_bracket_transducer + inner_replace_transducer + right_bracket_transducer
        add_opt(opt_part)

        sigma_star_regex = "({})*".format("+".join(self.alphabet))
        sigma_star_dfa = get_dfa_from_regex(sigma_star_regex,
                                            sigma=self.alphabet)
        sigma_star_dfa_ignore_identity = get_ignore_dfa(
            self.alphabet
            | set([LEFT_IDENTITY_BRACKET, RIGHT_IDENTITY_BRACKET]),
            sigma_star_dfa,
            set([LEFT_IDENTITY_BRACKET, RIGHT_IDENTITY_BRACKET]))
        id_sigma_star = pyfst_from_dfa(sigma_star_dfa_ignore_identity)

        concat_transducer = id_sigma_star + opt_part
        replace_transducer = concat_transducer.closure()
        # dot(replace_transducer, "replace_transducer")
        return replace_transducer
コード例 #6
0
    def get_left_to_right_application(self):
        prologue_transducer = get_prologue_transducer()
        if self.obligatory:
            obligatory_transducer = pyfst_from_dfa(
                self._get_obligatory_dfa([LEFT_IDENTITY_BRACKET],
                                         [RIGHT_IDENTITY_BRACKET]))
            composed_transducer = safe_compose(prologue_transducer,
                                               obligatory_transducer)
        else:
            composed_transducer = prologue_transducer

        right_context_transducer = self._get_right_context_dfa()
        replace_transducer = self.get_replace_transducer()
        left_context_transducer = self._get_left_context_dfa()
        prologue_inverse_transducer = get_prologue_inverse_transducer()

        composed_transducer = chain_safe_compose(composed_transducer,
                                                 right_context_transducer,
                                                 replace_transducer,
                                                 left_context_transducer)

        if self.transformation_type == INSERTION:
            insertion_obligatory_transducer = pyfst_from_dfa(
                self._get_obligatory_dfa([RIGHT_IDENTITY_BRACKET],
                                         [LEFT_IDENTITY_BRACKET]))
            composed_transducer = safe_compose(
                composed_transducer, insertion_obligatory_transducer)

        # remove multiple paths
        if self.transformation_type == ASSIMILATION or self.transformation_type == INSERTION:
            RI_obligatory_transducer = pyfst_from_dfa(
                self._get_custom_obligatory_dfa([RIGHT_APPLICATION_BRACKET],
                                                [LEFT_IDENTITY_BRACKET]))
            composed_transducer = safe_compose(composed_transducer,
                                               RI_obligatory_transducer)
        if self.transformation_type == DELETION:
            JL_obligatory_transducer = pyfst_from_dfa(
                self._get_custom_obligatory_dfa([RIGHT_IDENTITY_BRACKET],
                                                [LEFT_APPLICATION_BRACKET]))
            composed_transducer = safe_compose(composed_transducer,
                                               JL_obligatory_transducer)

        composed_transducer = safe_compose(composed_transducer,
                                           prologue_inverse_transducer)
        return composed_transducer
コード例 #7
0
    def _get_left_context_dfa(self):
        left_context_key = str(self.left_context_feature_bundle_list)
        if left_context_key in left_context_dfas:
            return left_context_dfas[left_context_key]

        alphabet = self.alphabet
        sigma_star_dfa = sigma_star_dfa_for_left_context
        if self.left_context_feature_bundle_list:
            context_regex = get_context_regex(
                self.left_context_feature_bundle_list)
            if configurations["LENGTHENING_FLAG"]:
                context_regex = context_regex + "(Y)*"
            left_context_dfa = str2regexp(context_regex,
                                          sigma=alphabet).toDFA()
            left_context_dfa_ignore_L = get_ignore_dfa(
                alphabet | set(LEFT_BRACKETS), left_context_dfa,
                set(LEFT_BRACKETS))
            sigma_star_left_context_dfa = sigma_star_dfa.concat(
                left_context_dfa_ignore_L)
        else:
            sigma_star_left_context_dfa = sigma_star_dfa

        left_brackets_regex = "({})".format("+".join(LEFT_BRACKETS))
        left_bracket_dfa = get_dfa_from_regex(left_brackets_regex,
                                              sigma=LEFT_BRACKETS)

        sigma_star_L = sigma_star_dfa.concat(left_bracket_dfa)

        sigma_star_L_complement = ~sigma_star_L
        subtraction_result = sigma_star_left_context_dfa & sigma_star_L_complement

        L_sigma_star = left_bracket_dfa.concat(sigma_star_dfa)

        p_iff_s_dfa = get_p_iff_s_dfa(subtraction_result, L_sigma_star)

        p_iff_s_ignore_right_bracket = get_ignore_dfa(alphabet | set(BRACKETS),
                                                      p_iff_s_dfa,
                                                      set(RIGHT_BRACKETS))

        left_context_dfa = p_iff_s_ignore_right_bracket
        left_context_dfa = pyfst_from_dfa(left_context_dfa)
        left_context_dfas[left_context_key] = left_context_dfa
        return left_context_dfa
コード例 #8
0
    def _get_right_context_dfa(self):
        right_context_key = str(self.right_context_feature_bundle_list)
        if right_context_key in right_context_dfas:
            return right_context_dfas[right_context_key]

        alphabet = self.alphabet
        sigma_star_dfa = sigma_star_dfa_for_right_context

        if self.right_context_feature_bundle_list:
            right_context_dfa = get_context_dfa(
                self.right_context_feature_bundle_list)
            right_context_dfa_ignore_R = get_ignore_dfa(
                alphabet | set(RIGHT_BRACKETS), right_context_dfa,
                set(RIGHT_BRACKETS))
            right_context_sigma_star_dfa = right_context_dfa_ignore_R.concat(
                sigma_star_dfa)
        else:
            right_context_sigma_star_dfa = sigma_star_dfa

        right_brackets_regex = "({})".format("+".join(RIGHT_BRACKETS))
        right_bracket_acceptor = get_dfa_from_regex(right_brackets_regex,
                                                    sigma=RIGHT_BRACKETS)
        sigma_star_R = sigma_star_dfa.concat(right_bracket_acceptor)

        R_sigma_star = right_bracket_acceptor.concat(sigma_star_dfa)
        R_sigma_star_complement = ~R_sigma_star

        subtraction_result = right_context_sigma_star_dfa & R_sigma_star_complement

        p_iff_s_dfa = get_p_iff_s_dfa(sigma_star_R, subtraction_result)

        p_iff_s_ignore_left_bracket = get_ignore_dfa(alphabet | set(BRACKETS),
                                                     p_iff_s_dfa,
                                                     set(LEFT_BRACKETS))

        right_context_dfa = p_iff_s_ignore_left_bracket
        right_context_dfa = pyfst_from_dfa(right_context_dfa)
        right_context_dfas[right_context_key] = right_context_dfa
        return right_context_dfa
コード例 #9
0
sigma_transducer_dict = dict()


def get_sigma_transducer_for_intro(sigma):
    sigma_key = "".join(sorted(list(sigma)))
    if sigma_key not in sigma_transducer_dict:
        sigma_regex = "({})".format("+".join(sigma))
        sigma_dfa = get_dfa_from_regex(sigma_regex, sigma=sigma)
        sigma_transducer_dict[sigma_key] = pyfst_from_dfa(sigma_dfa)
    return sigma_transducer_dict[sigma_key]


alphabet = set(SegmentTable().get_segments_symbols())

m_sigma_star_regex = "({})*".format("+".join(alphabet))
m_sigma_star_dfa = get_dfa_from_regex(m_sigma_star_regex, sigma=alphabet)

sigma_star_dfa_for_left_context = get_ignore_dfa(alphabet | set(LEFT_BRACKETS),
                                                 m_sigma_star_dfa,
                                                 set(LEFT_BRACKETS))
sigma_star_dfa_for_right_context = get_ignore_dfa(
    alphabet | set(RIGHT_BRACKETS), m_sigma_star_dfa, set(RIGHT_BRACKETS))
sigma_star_dfa_for_obligatory = get_ignore_dfa(alphabet | set(BRACKETS),
                                               m_sigma_star_dfa, set(BRACKETS))

left_bracket_transducer = pyfst_from_dfa(
    get_dfa_from_regex(LEFT_APPLICATION_BRACKET).toDFA())
right_bracket_transducer = pyfst_from_dfa(
    get_dfa_from_regex(RIGHT_APPLICATION_BRACKET).toDFA())
コード例 #10
0
 def right_bracket_transducer(self):
     if self.RIGHT_BRACKET_TRANSDUCER is None:
         self.RIGHT_BRACKET_TRANSDUCER = pyfst_from_dfa(
             get_dfa_from_regex(RIGHT_APPLICATION_BRACKET).toDFA())
     return self.RIGHT_BRACKET_TRANSDUCER
コード例 #11
0
    def _rule_construction_helper(self, rule, word):

        from bracket_rule_transducer import get_prologue_inverse_transducer
        from bracket_rule_transducer import BracketRuleTransducer
        from bracket_rule_transducer import LEFT_IDENTITY_BRACKET, RIGHT_IDENTITY_BRACKET, RIGHT_APPLICATION_BRACKET, LEFT_APPLICATION_BRACKET
        remove_multiple_paths = False
        second_obligatory = True

        prologued_word_transducer = get_prologued_word(word)

        rule.extract_data_from_feature_bundle_lists()
        bracket_rule_transducer_factory = BracketRuleTransducer(rule)
        if rule.obligatory:
            obligatory_dfa = bracket_rule_transducer_factory._get_obligatory_dfa([LEFT_IDENTITY_BRACKET],
                                                                                 [RIGHT_IDENTITY_BRACKET])
            obligatory_transducer = pyfst_from_dfa(obligatory_dfa)
            prologued_obligatory = safe_compose(prologued_word_transducer, obligatory_transducer)
            if second_obligatory:
                custom_obligatory_transducer = pyfst_from_dfa(
                    bracket_rule_transducer_factory._get_obligatory_dfa([RIGHT_IDENTITY_BRACKET],
                                                                        [LEFT_IDENTITY_BRACKET]))
                prologued_obligatory = safe_compose(prologued_obligatory, custom_obligatory_transducer)

        else:
            prologued_obligatory = prologued_word_transducer

        right_context_transducer = bracket_rule_transducer_factory._get_right_context_dfa()
        prologued_obligatory_right = safe_compose(prologued_obligatory, right_context_transducer)

        replace_transducer = bracket_rule_transducer_factory.get_replace_transducer()
        prologued_obligatory_right_replace = safe_compose(prologued_obligatory_right, replace_transducer)

        left_context_transducer = bracket_rule_transducer_factory._get_left_context_dfa()
        prologued_obligatory_right_replace_left = safe_compose(prologued_obligatory_right_replace,
                                                               left_context_transducer)
        print(get_transducer_outputs(prologued_obligatory_right_replace_left, limit=10))

        # remove_multiple_paths
        if remove_multiple_paths:
            custom_obligatory_transducer = pyfst_from_dfa(
                bracket_rule_transducer_factory._get_custom_obligatory_dfa([RIGHT_APPLICATION_BRACKET],
                                                                           [LEFT_IDENTITY_BRACKET]))
            prologued_obligatory_right_replace_left = safe_compose(prologued_obligatory_right_replace_left,
                                                                   custom_obligatory_transducer)

            custom_obligatory_transducer = pyfst_from_dfa(
                bracket_rule_transducer_factory._get_custom_obligatory_dfa([RIGHT_IDENTITY_BRACKET],
                                                                           [LEFT_APPLICATION_BRACKET]))
            prologued_obligatory_right_replace_left = safe_compose(prologued_obligatory_right_replace_left,
                                                                   custom_obligatory_transducer)

        print(get_transducer_outputs(prologued_obligatory_right_replace_left, limit=10))

        prologue_inverse_transducer = get_prologue_inverse_transducer()
        prologued_obligatory_right_replace_left_inverse = safe_compose(prologued_obligatory_right_replace_left,
                                                                       prologue_inverse_transducer)
        prologued_obligatory_right_replace_left_inverse.remove_epsilon()
        transducer_outputs = get_transducer_outputs(prologued_obligatory_right_replace_left_inverse, limit=10)
        print(transducer_outputs)

        return transducer_outputs