Beispiel #1
0
def get_pinyin():
    from pypinyin.constants import PINYIN_DICT
    from pypinyin.style import convert

    pinyin_set_raw = {p for pin in PINYIN_DICT.values() for p in pin.split(",") if p.strip()}
    pinyin_set = {convert(p, 8, True) for p in pinyin_set_raw}
    pin_set = {convert(p, 3, True) for p in pinyin_set_raw}
    yin_set = {convert(p, 9, True) for p in pinyin_set_raw}
Beispiel #2
0
def get_pinyin_for_match(pinyin_tone_list):
    result = {}
    for pinyin_tone in pinyin_tone_list:
        initial = convert(pinyin_tone, strict=True, style=Style.INITIALS)
        final = convert(pinyin_tone, strict=True, style=Style.FINALS)

        complete = ''
        if not initial and not final:  # handle possible bad case
            complete = pinyin_tone
        else:
            complete = f"{initial}{final}"

        if complete not in result:
            result[complete] = (initial, final)

    return result
Beispiel #3
0
def to_fixed(pinyin, style, strict=True):
    """根据拼音风格格式化带声调的拼音.

    :param pinyin: 单个拼音
    :param style: 拼音风格
    :param strict: 是否严格遵照《汉语拼音方案》来处理声母和韵母
    :return: 根据拼音风格格式化后的拼音字符串
    :rtype: unicode
    """
    return convert(pinyin, style=style, strict=strict, default=pinyin)
Beispiel #4
0
def to_fixed(pinyin, style, strict=True):
    """根据拼音风格格式化带声调的拼音.

    :param pinyin: 单个拼音
    :param style: 拼音风格
    :param strict: 是否严格遵照《汉语拼音方案》来处理声母和韵母
    :return: 根据拼音风格格式化后的拼音字符串
    :rtype: unicode
    """
    return convert(pinyin, style=style, strict=strict, default=pinyin)
Beispiel #5
0
def create_poly_dic():
    poly_dict = defaultdict(list)
    with codecs.open("polyphones.txt", 'r', encoding='utf-8') as f:
        lines = f.readlines()
    for item in [x for x in lines if x != '\n']:
        words = item.strip().split()
        for p in words[1].split(','):
            tmp_p = style.convert(p, style=8, strict=False)
            # 注意:原始文件中轻声是没有音调的,但在我们的数据集中轻声是用5表示的
            if tmp_p[-1] not in ['1','2','3','4']:
                tmp_p += '5'
            poly_dict[words[-1]].append(tmp_p)
    json_str = json.dumps(poly_dict, ensure_ascii=False, indent=2)
    with open('polyphones.json', "w") as json_file:
        json_file.write(json_str)
Beispiel #6
0
def test_finals_tone3_no_final():
    assert convert('ń', Style.FINALS_TONE3, True, None) == ''
    assert convert('ń', Style.FINALS_TONE3, False, None) == 'n2'
Beispiel #7
0
def test_finals_tone3_no_final():
    assert convert('ń', Style.FINALS_TONE3, True, None) == 'n2'