Esempio n. 1
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals

from copy import deepcopy
from itertools import chain
import os

from pypinyin.compat import text_type, callable_check
from pypinyin.constants import (PHRASES_DICT, PINYIN_DICT, RE_HANS, TONE,
                                NORMAL)
from pypinyin.utils import simple_seg, _replace_tone2_style_dict_to_default
from pypinyin.style import auto_discover, convert

auto_discover()


def seg(hans):
    if getattr(seg, 'no_jieba', None):
        ret = hans
        return simple_seg(ret)

    if seg.jieba is None:
        try:
            import jieba
            seg.jieba = jieba
        except ImportError:
            seg.no_jieba = True
        return seg(hans)
    else:
Esempio n. 2
0
from __future__ import unicode_literals

from copy import deepcopy
from itertools import chain

from pypinyin.compat import text_type, callable_check
from pypinyin.constants import (
    PHRASES_DICT, PINYIN_DICT,
    RE_HANS, Style
)
from pypinyin.contrib import mmseg
from pypinyin.utils import simple_seg, _replace_tone2_style_dict_to_default
from pypinyin.style import auto_discover, convert as convert_style

auto_discover()


def seg(hans):
    hans = simple_seg(hans)
    ret = []
    for x in hans:
        if not RE_HANS.match(x):   # 没有拼音的字符,不再参与二次分词
            ret.append(x)
        elif PHRASES_DICT:
            ret.extend(list(mmseg.seg.cut(x)))
        else:   # 禁用了词语库,不分词
            ret.append(x)
    return ret