コード例 #1
0
ファイル: v01.py プロジェクト: tsnaomi/finnsyll
def syllabify(word):
    '''Syllabify the given word.'''

    word = replace_umlauts(word)

    word = apply_T1(word)
    word = apply_T2(word)
    word = apply_T4(word)
    word = apply_T5(word)
    word = apply_T6(word)
    word = apply_T7(word)

    word = replace_umlauts(word, put_back=True)[1:]  # FENCEPOST

    return word
コード例 #2
0
def _syllabify(word):
    '''Syllabify the given word.'''
    word = replace_umlauts(word)
    word, CONTINUE_VV, CONTINUE_VVV, applied_rules = apply_T1(word)

    if CONTINUE_VV:
        word, T2 = apply_T2(word)
        word, T4 = apply_T4(word)
        applied_rules += T2 + T4

    if CONTINUE_VVV:
        word, T5 = apply_T5(word)
        word, T6 = apply_T6(word)
        word, T7 = apply_T7(word)
        applied_rules += T5 + T6 + T7

    word = replace_umlauts(word, put_back=True)

    return word, applied_rules
コード例 #3
0
def _syllabify(word):
    '''Syllabify the given word.'''
    word = replace_umlauts(word)
    word, applied_rules = apply_T1(word)

    if re.search(r'[^ieAyOauo]*([ieAyOauo]{2})[^ieAyOauo]*', word):
        word, T2 = apply_T2(word)
        word, T8 = apply_T8(word)
        word, T4 = apply_T4(word)
        applied_rules += T2 + T8 + T4

    if re.search(r'[ieAyOauo]{3}', word):
        word, T5 = apply_T5(word)
        word, T6 = apply_T6(word)
        word, T7 = apply_T7(word)
        applied_rules += T5 + T6 + T7

    word = replace_umlauts(word, put_back=True)

    return word, applied_rules
コード例 #4
0
def syllabify(word):
    '''Syllabify the given word, whether simplex or complex.'''
    compound = bool(re.search(r'(-| |=)', word))
    syllabify = _syllabify_compound if compound else _syllabify_simplex
    syllabifications = list(syllabify(word))

    for word, rules in rank(syllabifications):
        # post-process
        word = str(replace_umlauts(word, put_back=True))
        rules = rules[1:]

        yield word, rules
コード例 #5
0
def _syllabify(word, T4=True, T1E=True):
    '''Syllabify the given word.'''
    word = replace_umlauts(word)
    word, rules = apply_T1(word, T1E=T1E)

    if re.search(r'[^ieAyOauo]*([ieAyOauo]{2})[^ieAyOauo]*', word):
        word, T2 = apply_T2(word)
        word, T8 = apply_T8(word)
        word, T9 = apply_T9(word)
        word, T4 = apply_T4(word) if T4 else (word, '')
        rules += T2 + T8 + T9 + T4

    if re.search(r'[ieAyOauo]{3}', word):
        word, T6 = apply_T6(word)
        word, T5 = apply_T5(word)
        word, T7 = apply_T7(word)
        word, T2 = apply_T2(word)
        rules += T5 + T6 + T7 + T2

    word = replace_umlauts(word, put_back=True)
    rules = rules or ' T0'  # T0 means no rules have applied

    return word, rules