def syllabify(word): '''Syllabify the given word.''' word = replace_umlauts(word) word = apply_T1(word) word = apply_T2(word) word = apply_T4(word) word = apply_T5(word) word = apply_T6(word) word = apply_T7(word) word = replace_umlauts(word, put_back=True)[1:] # FENCEPOST return word
def _syllabify(word): '''Syllabify the given word.''' word = replace_umlauts(word) word, CONTINUE_VV, CONTINUE_VVV, applied_rules = apply_T1(word) if CONTINUE_VV: word, T2 = apply_T2(word) word, T4 = apply_T4(word) applied_rules += T2 + T4 if CONTINUE_VVV: word, T5 = apply_T5(word) word, T6 = apply_T6(word) word, T7 = apply_T7(word) applied_rules += T5 + T6 + T7 word = replace_umlauts(word, put_back=True) return word, applied_rules
def _syllabify(word): '''Syllabify the given word.''' word = replace_umlauts(word) word, applied_rules = apply_T1(word) if re.search(r'[^ieAyOauo]*([ieAyOauo]{2})[^ieAyOauo]*', word): word, T2 = apply_T2(word) word, T8 = apply_T8(word) word, T4 = apply_T4(word) applied_rules += T2 + T8 + T4 if re.search(r'[ieAyOauo]{3}', word): word, T5 = apply_T5(word) word, T6 = apply_T6(word) word, T7 = apply_T7(word) applied_rules += T5 + T6 + T7 word = replace_umlauts(word, put_back=True) return word, applied_rules
def syllabify(word): '''Syllabify the given word, whether simplex or complex.''' compound = bool(re.search(r'(-| |=)', word)) syllabify = _syllabify_compound if compound else _syllabify_simplex syllabifications = list(syllabify(word)) for word, rules in rank(syllabifications): # post-process word = str(replace_umlauts(word, put_back=True)) rules = rules[1:] yield word, rules
def _syllabify(word, T4=True, T1E=True): '''Syllabify the given word.''' word = replace_umlauts(word) word, rules = apply_T1(word, T1E=T1E) if re.search(r'[^ieAyOauo]*([ieAyOauo]{2})[^ieAyOauo]*', word): word, T2 = apply_T2(word) word, T8 = apply_T8(word) word, T9 = apply_T9(word) word, T4 = apply_T4(word) if T4 else (word, '') rules += T2 + T8 + T9 + T4 if re.search(r'[ieAyOauo]{3}', word): word, T6 = apply_T6(word) word, T5 = apply_T5(word) word, T7 = apply_T7(word) word, T2 = apply_T2(word) rules += T5 + T6 + T7 + T2 word = replace_umlauts(word, put_back=True) rules = rules or ' T0' # T0 means no rules have applied return word, rules