예제 #1
0
파일: tokenise.py 프로젝트: edycop/brat
def jp_token_boundary_gen(text):
    # TODO: consider honoring WHITESPACE_TOKENIZATION for japanese also
    if TOKENIZATION is not None and TOKENIZATION != JAPANESE_TOKENIZATION:
        from message import Messager
        Messager.warning('Ignoring unexpected TOKENIZATION '
                'specification for Japanese.')
    from mecab import token_offsets_gen
    for o in token_offsets_gen(text):
        yield o
예제 #2
0
def jp_token_boundary_gen(text):
    try:
        from mecab import token_offsets_gen
        for o in token_offsets_gen(text):
            yield o
    except ImportError:
        from message import Messager
        Messager.error('Failed to import MeCab, '
                       'falling back on whitespace tokenization. '
                       'Please check configuration and/or server setup.')
        for o in whitespace_token_boundary_gen(text):
            yield o
예제 #3
0
파일: tokenise.py 프로젝트: 52nlp/brat
def jp_token_boundary_gen(text):
    try:
        from mecab import token_offsets_gen
        for o in token_offsets_gen(text):
            yield o
    except ImportError:
        from message import Messager
        Messager.error('Failed to import MeCab, '
                       'falling back on whitespace tokenization. '
                       'Please check configuration and/or server setup.')
        for o in whitespace_token_boundary_gen(text):
            yield o
def jp_token_boundary_gen(text):
    from mecab import token_offsets_gen
    for o in token_offsets_gen(text):
        yield o
예제 #5
0
파일: tokenise.py 프로젝트: omarghf1/c4v-py
def jp_token_boundary_gen(text):
    from mecab import token_offsets_gen
    for o in token_offsets_gen(text):
        yield o