예제 #1
0
def get_char_to_lu_phones() -> Dict[str, List[str]]:
    char_to_phones = pipe(CharPhoneTable.select(),
                          map(lambda e: (e.char, e.lu)),
                          filter(lambda e: e[0] != '' and e[1] != ''),
                          groupby(lambda e: e[0]),
                          valmap(lambda phones: [e[1] for e in phones]), dict)
    return char_to_phones
예제 #2
0
def load_char_phones() -> Dict[str, List[str]]:
    result = {}
    for item in CharPhoneTable.select():
        if item.char in result:
            result[item.char].append(item.full)
        else:
            result[item.char] = [item.full]
    return result
예제 #3
0
def check_chars_pinyin(transformer: Dict[str, str], schema: ShuangPinSchema):
    to_update_items = []
    for item in CharPhoneTable.select():
        full = item.full
        if schema == XHE_SP_SCHEMA:
            shuangpin = item.xhe
        elif schema == LU_SP_SCHEMA:
            shuangpin = item.lu
        elif schema == ZRM_SP_SCHEMA:
            shuangpin = item.zrm
        elif schema == BINGJI_SP_SCHEMA:
            shuangpin = item.bingji
        else:
            raise RuntimeError(f"unkonwn schame {schema}")
        s, y = split_sy(full)
        sp = transformer[s] + transformer[y]
        if shuangpin != sp:
            if schema == XHE_SP_SCHEMA:
                item.xhe = sp
            elif schema == LU_SP_SCHEMA:
                item.lu = sp
            elif schema == ZRM_SP_SCHEMA:
                item.zrm = sp
            elif schema == BINGJI_SP_SCHEMA:
                item.bingji = sp
            else:
                raise RuntimeError(f"unkonwn schame {schema}")
            to_update_items.append(item)

    with db.atomic():
        if schema == XHE_SP_SCHEMA:
            CharPhoneTable.bulk_update(to_update_items,
                                       fields=['xhe'],
                                       batch_size=100)
        elif schema == LU_SP_SCHEMA:
            CharPhoneTable.bulk_update(to_update_items,
                                       fields=['lu'],
                                       batch_size=100)
        elif schema == ZRM_SP_SCHEMA:
            CharPhoneTable.bulk_update(to_update_items,
                                       fields=['zrm'],
                                       batch_size=100)
        elif schema == BINGJI_SP_SCHEMA:
            CharPhoneTable.bulk_update(to_update_items,
                                       fields=['bingji'],
                                       batch_size=100)
        else:
            raise RuntimeError(f"unkonwn schame {schema}")

    print(to_update_items)
    print(f'update {len(to_update_items)} char items')
예제 #4
0
def get_exists_chars() -> Set[str]:
    exist_chars = set()
    for e in CharPhoneTable.select():
        exist_chars.add(e.char)
    return exist_chars
    if len(sys.argv) != 1:
        print("USAGE: python3 generate_dd_txt.py ")
        sys.exit(1)

    fname, output_dir = sys.argv[0], "zrm_phone_xhe_shape"

    if not Path(output_dir).exists():
        os.makedirs(output_dir)

    char_to_shape = pipe(CharShapeTable.select(),
                         map(lambda e: (e.char, e.shapes)),
                         reduceby(lambda e: e[0], lambda e1, e2: e1),
                         valmap(lambda e: e[1]), dict)
    print(f"total {len(char_to_shape)} char shapes")

    char_to_phones = pipe(CharPhoneTable.select(),
                          map(lambda e: (e.char, e.zrm)),
                          groupby(lambda e: e[0]),
                          valmap(lambda phones: [e[1] for e in phones]), dict)
    print(f"total {len(char_to_phones)} char phones")

    one_hit_char_items = generate_one_hit_char(60000)
    top_single_chars_items = generate_topest_char(char_to_phones, 60000)
    sys_top_chars_data = f"{output_dir}/sys_top_chars_data.txt"
    with open(sys_top_chars_data, 'w', encoding='utf8') as fout:
        fout.write("---config@码表分类=主码-1\n")
        fout.write("---config@允许编辑=否\n")
        fout.write(f"---config@码表别名=简码单字\n")
        for item in one_hit_char_items.items():
            fout.write(f"{item[0]}#序{item[1]}\n")
        for item in top_single_chars_items.items():
예제 #6
0
再进入“高级设置→管理个性短语→导入个性短语”导入“xiaolu_word_for_baidu.ini”文件即可。
        ''')
        sys.exit(1)

    fname, output_dir = sys.argv[0], "baidu_mobile_ini"

    if not Path(output_dir).exists():
        os.makedirs(output_dir)

    char_to_shape = pipe(CharShapeTable.select(),
                         map(lambda e: (e.char, e.shapes)),
                         reduceby(lambda e: e[0], lambda e1, e2: e1),
                         valmap(lambda e: e[1]), dict)
    print(f"total {len(char_to_shape)} char shapes")

    char_to_phones = pipe(CharPhoneTable.select(),
                          map(lambda e: (e.char, e.xhe)),
                          groupby(lambda e: e[0]),
                          valmap(lambda phones: [e[1] for e in phones]), dict)
    print(f"total {len(char_to_phones)} char phones")

    all_items = []
    #单字部分
    all_items.extend(
        [tuple(e.split("\t")) for e in generate_one_hit_char(60000).keys()])
    all_items.extend([
        tuple(e.split("\t"))
        for e in generate_topest_char(char_to_phones, 60000)
    ])

    #系统单字部分
예제 #7
0
import sys, os
from collections import defaultdict
from datetime import datetime
from tables import db, WordPhoneTable, CharPhoneTable

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("USAGE: python3 dump_word_nophones.py word.txt")
        sys.exit(1)

    _, word_txt_path = sys.argv

    char_to_phones = defaultdict(list)
    for item in CharPhoneTable.select():
        if item.phones not in char_to_phones[item.char]:
            char_to_phones[item.char].append(item.phones)
    
    with open(word_txt_path, 'r', encoding='utf8') as fin:

        word_phones = []
        for line in fin:
            line = line.strip()
            if line == "" or line.startswith(";"):
                continue
            word = "".join([e for e in line if e not in "abcdefghijklmnopqrstuvwxyz"])
            if len(word) > 3: 
                continue
            
            phones = []
            for char in word:
                if char not in char_to_phones:
예제 #8
0

def fill_zrm(item: CharPhoneTable,
             transformer: Dict[str, str]) -> Tuple[CharPhoneTable, bool]:
    sy = split_sy(item.full)
    if sy[0] not in transformer or sy[1] not in transformer:
        print(f"{sy} not in transformer", file=sys.stderr)
        return item, False
    item.zrm = transformer[sy[0]] + transformer[sy[1]]
    return item, True


if __name__ == "__main__":

    null_phones_items = pipe(
        CharPhoneTable.select().where(CharPhoneTable.phones == ''),
        list,
    )
    if len(null_phones_items) != 0:
        pipe(
            null_phones_items,
            for_each(lambda e: print(e)),
        )
        print(f"null phones item is: {len(null_phones_items)}")
        sys.exit(1)

    del null_phones_items

    null_full_items = pipe(
        CharPhoneTable.select().where(CharPhoneTable.full == ''),
        list,
예제 #9
0
import sys, os
from datetime import datetime
from tables import db, CharPhoneTable

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("USAGE: python3 dump_char_phone_table.py char_phone.txt")
        sys.exit(1)
    
    _, char_phone_path = sys.argv
    with open(char_phone_path, 'r', encoding='utf8') as fin:
        for line in fin:
            line = line.strip()
            cols = line.split('\t')
            if len(cols) != 2:
                print(f"ERROR line {line} in file {char_phone_path}")
                continue
            cols = list(map(lambda e: e.strip(), cols))
            exit_num = CharPhoneTable.select().where(CharPhoneTable.char == cols[0], CharPhoneTable.phones == cols[1]).count()
            if exit_num > 0:
                print(f"WARNING: char phone already exists, {line}")
                continue
            else:
                CharPhoneTable(char=cols[0], phones=cols[1], priority=1, updatedt=datetime.now()).save()
    print('done')    
    pass

예제 #10
0
        raise RuntimeError(f"{sy} not in transformer")
    item.zrm = transformer[sy[0]] + transformer[sy[1]]
    return item


def fill_lu(item: CharPhoneTable, transformer: Dict[str, str]) -> CharPhoneTable:
    sy = split_sy(item.full)
    if sy[0] not in transformer or sy[1] not in transformer:
        raise RuntimeError(f"{sy} not in transformer")
    item.lu = transformer[sy[0]] + transformer[sy[1]]
    return item


if __name__ == "__main__":

    null_xhe_count = CharPhoneTable.select().where(CharPhoneTable.xhe == '').count()
    if null_xhe_count != 0:
        print(f'{null_xhe_count} null xhe phones, please check manually.')

    null_full_count = CharPhoneTable.select().where(CharPhoneTable.full == '').count()
    if null_full_count != 0:
        to_update_full_items = pipe(CharPhoneTable.select().where(CharPhoneTable.full == ''),
             map(lambda e: (e, ''.join(get_full(e.char)))),
             map(lambda e: update_full(e[0], e[1])),
             )
        with db.atomic():
            CharPhoneTable.bulk_update(to_update_full_items, fields=['full'], batch_size=100)
        del to_update_full_items
        null_full_count = CharPhoneTable.select().where(CharPhoneTable.full == '').count()
        if null_full_count != 0:
            print(f'{null_full_count} null full full phones, please check manually.')
예제 #11
0
    item.zrm = transformer[sy[0]] + transformer[sy[1]]
    return item


def fill_lu(item: CharPhoneTable, transformer: Dict[str,
                                                    str]) -> CharPhoneTable:
    sy = split_sy(item.full)
    if sy[0] not in transformer or sy[1] not in transformer:
        raise RuntimeError(f"{sy} not in transformer")
    item.lu = transformer[sy[0]] + transformer[sy[1]]
    return item


if __name__ == "__main__":

    null_xhe_count = CharPhoneTable.select().where(
        CharPhoneTable.xhe == '').count()
    if null_xhe_count != 0:
        print(f'{null_xhe_count} null xhe phones, please check manually.')

    null_full_count = CharPhoneTable.select().where(
        CharPhoneTable.full == '').count()
    if null_full_count != 0:
        to_update_full_items = pipe(
            CharPhoneTable.select().where(CharPhoneTable.full == ''),
            map(lambda e: (e, ''.join(get_full(e.char)))),
            map(lambda e: update_full(e[0], e[1])),
        )
        with db.atomic():
            CharPhoneTable.bulk_update(to_update_full_items,
                                       fields=['full'],
                                       batch_size=100)