def min_consonant_count_transducer(min_consonant_count=3, add_meta_arc=True): """Allows only strings with at least |min_consonant_count| consonants.""" t = pt.Transducer() for i in range(min_consonant_count+1): for l in pt.abc.ALL_SYMS: t.add_arc(i, i, l, l) if i > 0: for l in pt.abc.CONSONANTS: t.add_arc(i-1, i, l, l) t[min_consonant_count].final = True if add_meta_arc: pt.AddPassThroughArcs(t) return t
def ApplyLoanwords(self, ar_vocab_groups, loanwords_transducer, sw_pre_transducer, add_meta_arc, with_syllabification): time_a = time.time() sw_word_transducer = pt.UnionLinearChains(self.sw_pron_list) if add_meta_arc: pt.AddPassThroughArcs(sw_word_transducer) if with_syllabification: pt.AddSyllabificationArcs(sw_word_transducer) sw_word_transducer.arc_sort_input() time_sw = time.time() print(" building SW transducer took:", time_sw-time_a, "sec") ar_transducer = pt.UnionLinearChains(self.ar_word_list) ar_transducer.arc_sort_output() time_b = time.time() print(" building AR transducer took:", time_b-time_sw, "sec") print(" sw_pre_transducer") sw_vocab = sw_pre_transducer >> sw_word_transducer sw_vocab.arc_sort_input() time_c = time.time() print(" applying sw_pre_transducer took:", time_c-time_b, "sec") print(" loanwords") combined = loanwords_transducer >> sw_vocab combined.arc_sort_input() time_d = time.time() print(" applying loanwords took:", time_d-time_c, "sec") print(" ar_vocab") self.t_all = pt.Transducer() for ar_vocab in ar_vocab_groups: print(".", sep="", end="") sys.stdout.flush() self.t_all.set_union(ar_vocab >> combined) print() self.t_all.arc_sort_input() time_e = time.time() print(" ar_vocab >> combined took:", time_e-time_d, "sec") print(" t_correct") self.t_correct = ar_transducer >> self.t_all self.t_correct.arc_sort_output() self.t_all.arc_sort_output() time_g = time.time() print(" building t_correct took:", time_g-time_e, "sec") print(" total ApplyLoanwords took:", time_g-time_a, "sec")
def ar_morphology_transducer(add_meta_arc=True, with_syllabification=False): """Removes one AR prefix and one suffix (optionally).""" rule_name = "<<IT_MORPH>>" if add_meta_arc: operation_weight = None else: operation_weight = pt.abc.OT_CONSTRAINTS[rule_name] t = strip_transducer(morphemes.AR_PREFIXES, operation_weight, add_meta_arc, rule_name) t.concatenate(pt.accept_all_transducer()) t.concatenate( strip_transducer(morphemes.AR_SUFFIXES, operation_weight, add_meta_arc, rule_name)) if add_meta_arc: pt.AddPassThroughArcs(t) if with_syllabification: pt.AddSyllabificationArcs(t) #t.arc_sort_input() return t
def sw_morphology_transducer(add_meta_arc=True, with_syllabification=False): """Appends SW prefixes and suffixes (optionally).""" rule_name = "<<MT_MORPH>>" if add_meta_arc: operation_weight = None else: operation_weight = pt.abc.OT_CONSTRAINTS[rule_name] t = append_transducer(morphemes.SW_PREFIXES, operation_weight, add_meta_arc, rule_name) #t.concatenate(append_transducer(morphemes.SW_PREFIXES, operation_weight, add_meta_arc, rule_name)) t.concatenate(pt.accept_all_transducer()) t.concatenate( append_transducer(morphemes.SW_SUFFIXES, operation_weight, add_meta_arc, rule_name)) if add_meta_arc: pt.AddPassThroughArcs(t) if with_syllabification: pt.AddSyllabificationArcs(t) #t.arc_sort_output() return t
def vowel_deletion_transducer(add_meta_arc=True): """Deletion of vowels.""" t = pt.Transducer() for l in pt.abc.ALL_SYMS: t.add_arc(0, 0, l, l) next_node = 1 for l in pt.abc.VOWELS: t.add_arc(0, next_node, l, pt.abc.EPSILON) rule_name = "<<MAX-V>>" if add_meta_arc: t.add_arc(next_node, 0, pt.abc.EPSILON, rule_name) else: t.add_arc(next_node, 0, pt.abc.EPSILON, pt.abc.EPSILON, pt.abc.OT_CONSTRAINTS[rule_name]) next_node += 1 t[0].final = True if add_meta_arc: pt.AddPassThroughArcs(t) return t
def AssertReachable(ar_str, sw_str, ar_post_transducer, loanwords_transducer, sw_pre_transducer, add_meta_arc=True, with_syllabification=False): print("Constructing input transducer for word:", ar_str) in_t = pt.linear_chain(ar_str) in_t = in_t >> ar_post_transducer print("Constructing output transducer for word:", sw_str) out_t = pt.linear_chain(sw_str) if add_meta_arc: pt.AddPassThroughArcs(out_t) if with_syllabification: pt.AddSyllabificationArcs(out_t) out_t = sw_pre_transducer >> out_t if add_meta_arc: out_t = out_t >> pt.weights_transducer() out_t.arc_sort_input() in_t.arc_sort_output() print("Combining loanwords with output") test_t1 = loanwords_transducer >> out_t if len(test_t1) == 0: print("NOT REACHABLE: loanwords_transducer >> out_t") print("Testing input with loanwords") test_t3 = in_t >> loanwords_transducer if len(test_t3) == 0: print("NOT REACHABLE: in_t >> loanwords_transducer") print("Combining with input") test_t2 = in_t >> test_t1 if len(test_t2) == 0: print("NOT REACHABLE: in_t >> test_t1") else: print("Reachable!") print("Printing full paths") pt.PrintFullPaths(test_t2, num_shortest=1) print("AssertReachable done.")