Exemple #1
0
def fill_lu(item: CharPhoneTable, transformer: Dict[str,
                                                    str]) -> CharPhoneTable:
    sy = split_sy(item.full)
    if sy[0] not in transformer or sy[1] not in transformer:
        raise RuntimeError(f"{sy} not in transformer")
    item.lu = transformer[sy[0]] + transformer[sy[1]]
    return item
Exemple #2
0
def fill_zrm(item: CharPhoneTable,
             transformer: Dict[str, str]) -> Tuple[CharPhoneTable, bool]:
    sy = split_sy(item.full)
    if sy[0] not in transformer or sy[1] not in transformer:
        print(f"{sy} not in transformer", file=sys.stderr)
        return item, False
    item.zrm = transformer[sy[0]] + transformer[sy[1]]
    return item, True
Exemple #3
0
def fix_diff_s_same_y_full(item: CharPhoneTable) -> CharPhoneTable:
    full_sy = split_sy(item.full)
    correct_s = item.phones[0]
    if correct_s == "u":
        correct_s = "sh"
    elif correct_s == "i":
        correct_s = "ch"
    elif correct_s == "v":
        correct_s = "zh"
    item.full = correct_s + full_sy[1]
    return item
Exemple #4
0
                                     map(lambda e: e.word), set)

    with open(words_path, "r", encoding='utf8') as fin:

        #FIXME: bug to fix, we have more phone type now.
        ft_dict = get_double_dict()

        to_add_words = pipe(
            fin,
            map(lambda e: e.strip().split('\t')),
            filter(lambda e: len(e) in (1, 2)),
            filter(lambda e: len(e[0]) <= 5),
            filter(lambda e: not contain_alpha(e[0]) and not contain_symbols(e[
                0])),
            filter(lambda e: e[0] not in exist_words),
            map(cols_to_item),
            map(lambda e:
                (e, map(lambda e: split_sy(e), lazy_pinyin(e.word)))),
            map(lambda e: attr.evolve(
                e[0], phones=''.join(full_to_double(e[1], ft_dict)))),
            map(lambda e: WordPhoneTable(word=e.word,
                                         phones=e.phones,
                                         priority=e.priority,
                                         updatedt=datetime.now())),
        )

        with db.atomic():
            WordPhoneTable.bulk_create(to_add_words, batch_size=100)

    print('done')
Exemple #5
0
def full_to_double(pinyin, full_to_two):
    return [full_to_two[e[0]] + full_to_two[e[1]] for e in pinyin]


def get_double_dict():

    full_to_two = {}
    for item in FullToTwoTable.select():
        if item.full in full_to_two:
            print(f"ERROR in {item.full}")
            sys.exit(1)
        else:
            full_to_two[item.full] = item.two
    return full_to_two


if __name__ == "__main__":
    full_to_two = get_double_dict()

    for item in WordPhoneTable.select():
        word = item.word
        phones = item.phones
        pinyin = [split_sy(e) for e in lazy_pinyin(word)]
        # print(word, phones, pinyin)
        double = ''.join(full_to_double(pinyin, full_to_two))
        if phones != double:
            print(f"diff in {item.id}, {word}, {phones}, {double}")
            item.delete_instance()

    print("done")