コード例 #1
0
def get_iterator(lang):
    it = _iterators.get(lang)
    if it is None:
        it = _iterators[lang] = _icu.BreakIterator(
            _icu.UBRK_WORD,
            lang_as_iso639_1(lang) or lang)
    return it
コード例 #2
0
def count_words(text, lang='en'):
    with _lock:
        it = _iterators.get(lang, None)
        if it is None:
            it = _iterators[lang] = _icu.BreakIterator(
                _icu.UBRK_WORD,
                lang_as_iso639_1(lang) or lang)
        it.set_text(text)
        return len(it.split2())
コード例 #3
0
def index_of(needle, haystack, lang='en'):
    with _lock:
        it = _iterators.get(lang, None)
        if it is None:
            it = _iterators[lang] = _icu.BreakIterator(
                _icu.UBRK_WORD,
                lang_as_iso639_1(lang) or lang)
        it.set_text(haystack)
        return it.index(needle)
コード例 #4
0
def split_into_words_and_positions(text, lang='en'):
    with _lock:
        it = _iterators.get(lang, None)
        if it is None:
            it = _iterators[lang] = _icu.BreakIterator(
                _icu.UBRK_WORD,
                lang_as_iso639_1(lang) or lang)
        it.set_text(text)
        return it.split2()