def test_bracket_markings(self): self.initialise_segment_table("plural_english_segment_table.txt") from bracket_rule_transducer import BracketRuleTransducer from bracket_rule_transducer import LEFT_IDENTITY_BRACKET, RIGHT_BRACKETS, LEFT_BRACKETS, RIGHT_IDENTITY_BRACKET rule = Rule([{"cons": "+"}], [{"voice": "-"}], [], [{"voice": "-"}], False) word = 'zt' prologued_word_transducer = get_prologued_word(word) rule.extract_data_from_feature_bundle_lists() bracket_rule_transducer_factory = BracketRuleTransducer(rule) if rule.obligatory: obligatory_dfa = bracket_rule_transducer_factory._get_obligatory_dfa([LEFT_IDENTITY_BRACKET], RIGHT_BRACKETS) obligatory_transducer = pyfst_from_dfa(obligatory_dfa) prologued_obligatory = safe_compose(prologued_word_transducer, obligatory_transducer) custom_obligatory_transducer = pyfst_from_dfa( bracket_rule_transducer_factory._get_obligatory_dfa([RIGHT_IDENTITY_BRACKET], LEFT_BRACKETS)) prologued_obligatory = safe_compose(prologued_obligatory, custom_obligatory_transducer) else: prologued_obligatory = prologued_word_transducer right_context_transducer = bracket_rule_transducer_factory._get_right_context_dfa() prologued_obligatory_right = safe_compose(prologued_obligatory, right_context_transducer) left_context_transducer = bracket_rule_transducer_factory._get_left_context_dfa() prologued_obligatory_right_left = safe_compose(prologued_obligatory_right, left_context_transducer) print(get_transducer_outputs(prologued_obligatory_right_left, limit=10)) replace_transducer = bracket_rule_transducer_factory.get_replace_transducer() prologued_obligatory_right_left_replace = safe_compose(prologued_obligatory_right_left, replace_transducer) print(get_transducer_outputs(prologued_obligatory_right_left_replace, limit=10))
def get_left_to_right_application(self): prologue_transducer = get_prologue_transducer() if self.obligatory or self._should_fix_transducer(): obligatory_transducer = pyfst_from_dfa( self._get_obligatory_dfa([LEFT_IDENTITY_BRACKET], [RIGHT_IDENTITY_BRACKET])) composed_transducer = safe_compose(prologue_transducer, obligatory_transducer) else: composed_transducer = prologue_transducer right_context_transducer = self._get_right_context_dfa() replace_transducer = self.get_replace_transducer() replace_transducer = uniform_encoding.get_weighted_replace_transducer( replace_transducer, self) left_context_transducer = self._get_left_context_dfa() prologue_inverse_transducer = get_prologue_inverse_transducer() composed_transducer = chain_safe_compose(composed_transducer, right_context_transducer, replace_transducer, left_context_transducer) if self.transformation_type == INSERTION: insertion_obligatory_transducer = pyfst_from_dfa( self._get_obligatory_dfa([RIGHT_IDENTITY_BRACKET], [LEFT_IDENTITY_BRACKET])) composed_transducer = safe_compose( composed_transducer, insertion_obligatory_transducer) # remove multiple paths if self.transformation_type == ASSIMILATION or self.transformation_type == INSERTION: RI_obligatory_transducer = pyfst_from_dfa( self._get_custom_obligatory_dfa([RIGHT_APPLICATION_BRACKET], [LEFT_IDENTITY_BRACKET])) composed_transducer = safe_compose(composed_transducer, RI_obligatory_transducer) if self.transformation_type == DELETION: JL_obligatory_transducer = pyfst_from_dfa( self._get_custom_obligatory_dfa([RIGHT_IDENTITY_BRACKET], [LEFT_APPLICATION_BRACKET])) composed_transducer = safe_compose(composed_transducer, JL_obligatory_transducer) composed_transducer = safe_compose(composed_transducer, prologue_inverse_transducer) if not self.obligatory: # obligatory rules should be weighted only in the replace level if not self._should_fix_transducer( ): # only non-right-context rules should be weighted this way composed_transducer = uniform_encoding.get_weighted_rule_transducer( composed_transducer, self) return composed_transducer
def get_ignore_dfa(sigma, language_dfa, ignored_set): new_sigma = sigma | ignored_set intro_transducer = get_intro_transducer(sigma, ignored_set) language_transducer = pyfst_from_dfa(language_dfa) composed_transducer = safe_compose(language_transducer, intro_transducer) language_dfa = pyfst_to_dfa(composed_transducer, new_sigma) return language_dfa
def get_prologued_word(word): from bracket_rule_transducer import get_prologue_transducer word_transducer = get_transducer_acceptor(word) prologue_transducer = get_prologue_transducer() prologued_word_transducer = safe_compose(word_transducer, prologue_transducer) prologued_word_transducer.remove_epsilon() return prologued_word_transducer
def get_left_to_right_application(self): prologue_transducer = get_prologue_transducer() if self.obligatory: obligatory_transducer = pyfst_from_dfa( self._get_obligatory_dfa([LEFT_IDENTITY_BRACKET], [RIGHT_IDENTITY_BRACKET])) composed_transducer = safe_compose(prologue_transducer, obligatory_transducer) else: composed_transducer = prologue_transducer right_context_transducer = self._get_right_context_dfa() replace_transducer = self.get_replace_transducer() left_context_transducer = self._get_left_context_dfa() prologue_inverse_transducer = get_prologue_inverse_transducer() composed_transducer = chain_safe_compose(composed_transducer, right_context_transducer, replace_transducer, left_context_transducer) if self.transformation_type == INSERTION: insertion_obligatory_transducer = pyfst_from_dfa( self._get_obligatory_dfa([RIGHT_IDENTITY_BRACKET], [LEFT_IDENTITY_BRACKET])) composed_transducer = safe_compose( composed_transducer, insertion_obligatory_transducer) # remove multiple paths if self.transformation_type == ASSIMILATION or self.transformation_type == INSERTION: RI_obligatory_transducer = pyfst_from_dfa( self._get_custom_obligatory_dfa([RIGHT_APPLICATION_BRACKET], [LEFT_IDENTITY_BRACKET])) composed_transducer = safe_compose(composed_transducer, RI_obligatory_transducer) if self.transformation_type == DELETION: JL_obligatory_transducer = pyfst_from_dfa( self._get_custom_obligatory_dfa([RIGHT_IDENTITY_BRACKET], [LEFT_APPLICATION_BRACKET])) composed_transducer = safe_compose(composed_transducer, JL_obligatory_transducer) composed_transducer = safe_compose(composed_transducer, prologue_inverse_transducer) return composed_transducer
def get_outputs_of_word(self, word): word_transducer = get_transducer_acceptor(word) rule_set_transducer = self.get_transducer() if rule_set_transducer: dot(word_transducer, "word_transducer") dot(rule_set_transducer, "rule_set_transducer") word_rule_set_transducer = safe_compose(word_transducer, rule_set_transducer) word_rule_set_transducer.remove_epsilon() dot(word_rule_set_transducer, "word_rule_set_transducer") if len(word_rule_set_transducer): try: outputs = get_transducer_outputs(word_rule_set_transducer) except: print("get_outputs_of_word failed with word: {}".format( word)) else: outputs = [] else: outputs = get_transducer_outputs(word_transducer) return outputs
def _rule_construction_helper(self, rule, word): from bracket_rule_transducer import get_prologue_inverse_transducer from bracket_rule_transducer import BracketRuleTransducer from bracket_rule_transducer import LEFT_IDENTITY_BRACKET, RIGHT_IDENTITY_BRACKET, RIGHT_APPLICATION_BRACKET, LEFT_APPLICATION_BRACKET remove_multiple_paths = False second_obligatory = True prologued_word_transducer = get_prologued_word(word) rule.extract_data_from_feature_bundle_lists() bracket_rule_transducer_factory = BracketRuleTransducer(rule) if rule.obligatory: obligatory_dfa = bracket_rule_transducer_factory._get_obligatory_dfa([LEFT_IDENTITY_BRACKET], [RIGHT_IDENTITY_BRACKET]) obligatory_transducer = pyfst_from_dfa(obligatory_dfa) prologued_obligatory = safe_compose(prologued_word_transducer, obligatory_transducer) if second_obligatory: custom_obligatory_transducer = pyfst_from_dfa( bracket_rule_transducer_factory._get_obligatory_dfa([RIGHT_IDENTITY_BRACKET], [LEFT_IDENTITY_BRACKET])) prologued_obligatory = safe_compose(prologued_obligatory, custom_obligatory_transducer) else: prologued_obligatory = prologued_word_transducer right_context_transducer = bracket_rule_transducer_factory._get_right_context_dfa() prologued_obligatory_right = safe_compose(prologued_obligatory, right_context_transducer) replace_transducer = bracket_rule_transducer_factory.get_replace_transducer() prologued_obligatory_right_replace = safe_compose(prologued_obligatory_right, replace_transducer) left_context_transducer = bracket_rule_transducer_factory._get_left_context_dfa() prologued_obligatory_right_replace_left = safe_compose(prologued_obligatory_right_replace, left_context_transducer) print(get_transducer_outputs(prologued_obligatory_right_replace_left, limit=10)) # remove_multiple_paths if remove_multiple_paths: custom_obligatory_transducer = pyfst_from_dfa( bracket_rule_transducer_factory._get_custom_obligatory_dfa([RIGHT_APPLICATION_BRACKET], [LEFT_IDENTITY_BRACKET])) prologued_obligatory_right_replace_left = safe_compose(prologued_obligatory_right_replace_left, custom_obligatory_transducer) custom_obligatory_transducer = pyfst_from_dfa( bracket_rule_transducer_factory._get_custom_obligatory_dfa([RIGHT_IDENTITY_BRACKET], [LEFT_APPLICATION_BRACKET])) prologued_obligatory_right_replace_left = safe_compose(prologued_obligatory_right_replace_left, custom_obligatory_transducer) print(get_transducer_outputs(prologued_obligatory_right_replace_left, limit=10)) prologue_inverse_transducer = get_prologue_inverse_transducer() prologued_obligatory_right_replace_left_inverse = safe_compose(prologued_obligatory_right_replace_left, prologue_inverse_transducer) prologued_obligatory_right_replace_left_inverse.remove_epsilon() transducer_outputs = get_transducer_outputs(prologued_obligatory_right_replace_left_inverse, limit=10) print(transducer_outputs) return transducer_outputs