for item in top_single_chars_items.items():
            fout.write(f"{item[0]}#序{item[1]}\n")

    sys_single_char_data = f"{output_dir}/sys_single_char_data.txt"
    with open(sys_single_char_data, 'w', encoding='utf8') as fout:
        fout.write("---config@码表分类=主码-系统码表\n")
        fout.write("---config@允许编辑=否\n")
        fout.write(f"---config@码表别名=系统单字\n")
        pipe(
            CharPhoneTable.select().order_by(CharPhoneTable.priority.desc()),
            filter(lambda e: e.char in char_to_shape),
            map(lambda e: f"{e.char}\t{e.zrm+char_to_shape[e.char]}#序40000"),
            for_each(lambda e: fout.write(e + '\n')),
        )

    del_words = pipe(DelWordTable.select(), map(lambda e: e.word), set)
    sys_word_data = f"{output_dir}/sys_word_data.txt"
    with open(sys_word_data, 'w', encoding='utf8') as fout:
        fout.write("---config@码表分类=主码-2\n")
        fout.write("---config@允许编辑=否\n")
        fout.write(f"---config@码表别名=系统词组\n")
        pipe(
            WordPhoneTable.select().order_by(fn.LENGTH(WordPhoneTable.word),
                                             WordPhoneTable.priority.desc()),
            filter(lambda e: e.word not in del_words),
            map(lambda e: (f'{e.word}\t{e.zrm}', e.word[0], e.word[-1])),
            filter(lambda e: e[1] in char_to_shape and e[2] in char_to_shape),
            map(lambda e:
                f'{e[0]}{char_to_shape[e[1]][0]}{char_to_shape[e[2]][-1]}#序20000'
                ), for_each(lambda e: fout.write(e + '\n')))
Exemple #2
0
if __name__ == "__main__":
    if len(sys.argv) != 2:
        print(f"USAGE: python3 {sys.argv[0]} words.txt", file=sys.stderr)
        print("words format:word prioroty w1_yin w2_yin ...")
        sys.exit(1)

    _, words_path = sys.argv

    exist_words = set()
    exist_words = pipe(WordPhoneTable.select(),
                       map(lambda e: e.word),
                       set
                       )

    exist_words = exist_words | pipe(DelWordTable.select(),
                                     map(lambda e: e.word),
                                     set
                                     )

    xhe_transformer = get_full_to_xhe_transformer();
    zrm_transformer = get_full_to_zrm_transformmer();
    lu_transformer = get_full_to_lu_transformmer();

    with open(words_path, "r", encoding='utf8') as fin:
        to_add_words = pipe(fin,
                            map(lambda e: e.strip().split(' ')),
                            # filter(lambda e: len(e) in (1, 2)),
                            filter(lambda e: len(e[0]) <= 5),
                            filter(lambda e: not contain_alpha(
                                e[0]) and not contain_symbols(e[0])),
Exemple #3
0
    sys_single_char_data = f"{output_dir}/sys_single_char_data.txt"
    with open(sys_single_char_data, 'w', encoding='utf8') as fout:
        fout.write("---config@码表分类=主码-系统码表\n")
        fout.write("---config@允许编辑=否\n")
        fout.write(f"---config@码表别名=系统单字\n")
        pipe(
            CharPhoneTable.select().order_by(CharPhoneTable.priority.desc()),
            filter(lambda e: e.char in char_to_shape),
            map(
                lambda e: f"{e.char}\t{e.zrm+char_to_shape[e.char]}#序40000"),
            for_each(lambda e: fout.write(e+'\n')),
        )

    del_words = pipe(
        DelWordTable.select(),
        map(lambda e: e.word),
        set
    )
    sys_word_data = f"{output_dir}/sys_word_data.txt"
    with open(sys_word_data, 'w', encoding='utf8') as fout:
        fout.write("---config@码表分类=主码-2\n")
        fout.write("---config@允许编辑=否\n")
        fout.write(f"---config@码表别名=系统词组\n")
        pipe(
            WordPhoneTable.select().order_by(fn.LENGTH(WordPhoneTable.word),
                                             WordPhoneTable.priority.desc()),
            filter(lambda e: e.word not in del_words),
            map(lambda e: (f'{e.word}\t{e.zrm}', e.word[0], e.word[-1])),
            filter(lambda e: e[1] in char_to_shape and e[2] in char_to_shape),
            map(lambda e: f'{e[0]}{char_to_shape[e[1]][0]}{char_to_shape[e[2]][-1]}#序20000'),
Exemple #4
0
#encoding=utf8
import os, sys
from datetime import datetime
from tables import db, DelWordTable
from toolz.curried import map, filter, pipe, groupby, keymap

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("USAGE: python3 add_del_words.py words.txt")
        sys.exit(1)

    _, words_path = sys.argv

    exist_wordphones = pipe(DelWordTable.select(), map(lambda e: e.word), set)

    with open(words_path, "r", encoding='utf8') as fin:
        to_add_words = pipe(
            fin,
            map(lambda e: e.strip()),
            filter(lambda e: e != ''),
            filter(lambda e: e not in exist_wordphones),
            groupby(lambda e: e),
            keymap(lambda e: DelWordTable(word=e, updatedt=datetime.now())),
        )

        with db.atomic():
            DelWordTable.bulk_create(to_add_words, batch_size=100)

        # for w in to_add_words:
        #     print(f"add {w}")
        #     # w.save()