def __init__(self, w, h, x, y, font=None, callback=None): """ :param w:文本框宽度 :param h:文本框高度 :param x:文本框坐标 :param y:文本框坐标 :param font:文本框中使用的字体 :param callback:在文本框按下回车键之后的回调函数 """ self.width = w self.height = h self.x = x self.y = y self.text = "" # 文本框内容 self.callback = callback # 创建背景surface self.__surface = pygame.Surface((w, h)) # 如果font为None,那么效果可能不太好,建议传入font,更好调节 if font is None: self.font = pygame.font.SysFont('microsoftyaheimicrosoftyaheiui', 16) else: self.font = font self.dagparams = DefaultDagParams() self.state = 0 # 0初始状态 1输入拼音状态 self.page = 1 # 第几页 self.limit = 5 # 显示几个汉字 self.pinyin = '' self.word_list = [] # 候选词列表 self.word_list_surf = None # 候选词surface self.buffer_text = '' # 联想缓冲区字符串
def genarate_word_error(sents): ans = [] # hmmparams = DefaultHmmParams() dagparams = DefaultDagParams() for sent in sents: seg_sent = list(jieba.cut(sent)) while True: select_word = random.sample(seg_sent, 1)[0] #随机一个候选词 if len(select_word) > 1: break error_word = select_word pinyin_list = lazy_pinyin(select_word) #获取选定词的拼音 # result1 = viterbi(hmm_params=hmmparams, observations=pinyin_list, path_num=5) try: result2 = dag(dagparams, pinyin_list, path_num=5, log=True) except KeyError: continue while len(result2) > 1: error_word = ''.join(random.choice(result2).path) if error_word != select_word: break word_index = sent.find(select_word) #替换词语中的单字 err_sent = sent[:word_index] + error_word if word_index + len(select_word) < len(sent): err_sent += sent[word_index + len(select_word):] if err_sent != sent: ans.append((sent, err_sent)) return ans
def pinyin_2_hanzi(pinyinList): from Pinyin2Hanzi import DefaultDagParams from Pinyin2Hanzi import dag dagParams = DefaultDagParams() # 取第一个值 result = dag(dagParams, pinyinList, path_num=10, log=True)[0].path[0] return result
def pinyin_2_hanzi(pinyinList): from Pinyin2Hanzi import DefaultDagParams from Pinyin2Hanzi import dag dagParams = DefaultDagParams() result = dag(dagParams, pinyinList, path_num=10, log=True) # 10 代表后选值个数 for item in result: socre = item.score res = item.path # 转换结果 print(socre, ''.join(res))
def pinyin_to_chinese(self, data): '''get the chinese from the pinyin :param data: pinyin data :return: ''' dagparames = DefaultDagParams() result = dag(dagparames, data, path_num=10, log=True) for item in result: print(str(item.score) + ":", item.path)
def pinyin_2_hanzi(self, pinyinList): from Pinyin2Hanzi import DefaultDagParams dagParams = DefaultDagParams() result = None for pinyin in pinyinList: hanzi = [l[0] for l in dagParams.get_phrase([pinyin], 10000)] if result is None: result = hanzi else: result = [a + b for a in result for b in hanzi] return result
def pinyin_2_hanzi(pinyin_str): pinyin_list = pinyin_str.split() dagParams = DefaultDagParams() # 1个候选值 result = dag(dagParams, pinyin_list, path_num=1, log=True) if result: res = result[0].path # 转换结果 hanzi_str = ''.join(res) return hanzi_str else: logger.info("转化有误:" + pinyin_str)
def pinyin_to_hanzi(pinyin, Topk=5): ''' 拼音转化为汉字 汉字存在多意性,所以这里没有一一对应的关系,只能选出概率最高的topk ''' translator = DefaultDagParams() result = dag(translator, pinyin, path_num=Topk, log=True) for item in result: socre = item.score # 得分 res = item.path # 转换结果 print socre, ''.join([one.decode('utf-8') for one in res])
def pinyin_2_hanzi(sentences): from Pinyin2Hanzi import DefaultDagParams from Pinyin2Hanzi import dag dagParams = DefaultDagParams() pinyinList = lazy_pinyin(sentences) print(pinyinList) result = dag(dagParams, pinyinList, path_num=3) #10代表侯选值个数 for item in result: socre = item.score res = item.path # 转换结果 print(socre, res)
def pinyin_2_hanzi(word): if Pinyin2Hanzi.is_chinese(word): word_pinyin = lazy_pinyin(word) dagParams = DefaultDagParams() word_list = [] result = dag(dagParams, word_pinyin, path_num=3, log=True) for item in result: word_list.append(item.path[0]) return word_list else: return "Null"
def pinyin_2_hanzi(self, pinyinList): dagParams = DefaultDagParams() result = dag(dagParams, pinyinList, path_num=10, log=True) #10代表侯选值个数 item_result = [] try: for item in result: # socre = item.score # res = item.path # 转换结果 item_result.append([item.score, item.path]) return sorted(item_result, key=itemgetter(0), reverse=True)[0][1] except Exception as e: print(e) print("输入异常,请重新输入拼音")
def pinyin2hanzi(pinyin_list): ''' :param text_list: 拼音列表 :return: 文本二维列表,且每个一维列表的长度为1 ''' dagParams, entities = DefaultDagParams(), [] for line in pinyin_list: result = dag(dagParams, line, path_num=5, log=True) for item in result: res = item.path # 转换结果 if len(res) > 1: continue entities.append(res) return entities
def pinyin_to_hanzi(pinyin,Topk=5,Log=True): ''' 拼音转化为汉字 汉字存在多意性,所以这里没有一一对应的关系,只能选出概率最高的topk ''' print(pinyin) translator=DefaultDagParams() result=dag(translator,pinyin,path_num=Topk,log=Log) #print(result) # for item in result: # socre=item.score # 得分 # res=item.path # 转换结果 # print(socre, ''.join([one.decode('utf-8') for one in res])) return result
def pinyin_2_hanzi(pinyin_str): ''' zhao qing shi ding hu qu fang di chan xie hui --- 肇庆市鼎湖区房地产协会 ''' pinyin_list = pinyin_str.split() dagParams = DefaultDagParams() # 1个候选值 result = dag(dagParams, pinyin_list, path_num=1, log=True) if result: res = result[0].path # 转换结果 hanzi_str = ''.join(res) return hanzi_str else: return '' logger.info("转化有误:" + pinyin_str)
def __init__(self): # 初始化 self.hmmparams = DefaultHmmParams() self.dagparams = DefaultDagParams() self.result = '' self.shengmu = ['b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'w', 'x', 'y', 'z', 'ch', 'sh', 'zh'] self.yy = ['a', 'ai', 'an', 'ang', 'ao', 'e', 'en', 'eng', 'er', 'o', 'ou', 'ong'] self.ym_b = ["a", "ai", "an", "ang", "ao", "ei", "en", "eng", "i", "ian", "iao", "ie", "in", "ing", "o", "u"] self.ym_c = ["a", "ai", "an", "ang", "ao", "e", "en", "eng", "i", "ong", "ou", "u", "uan", "ui", "un", "uo"] self.ym_d = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "i", "ia", "ian", "iao", "ie", "ing", "iu", "ong", "ou", "u", "uan", "ui", "un", "uo"] self.ym_f = ["a", "an", "ang", "ei", "en", "eng", "iao", "o", "ou", "u"] self.ym_g = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "ong", "ou", "u", "uai", "uan", "uang", "ui", "un", "uo"] self.ym_h = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "ong", "ou", "u", "ua", "uai", "uan", "uang", "ui", "un", "uo"] self.ym_j = ["i", "ia", "ian", "iang", "iao", "ie", "in", "ing", "iong", "iu", "u", "uan", "ue", "un"] self.ym_k = ["a", "ai", "an", "ang", "ao", "e", "en", "eng", "ong", "ou", "u", "ui", "un", "uo"] self.ym_l = ["a", "ai", "an", "ang", "ao", "e", "ei", "eng", "i", "ia", "ian", "iao", "ie", "in", "ing", "iu", "o", "ong", "ou", "u", "uan", "un", "uo", "v", "ve"] self.ym_m = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "i", "ian", "iao", "ie", "in", "ing", "iu", "o", "ou", "u"] self.ym_n = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "i", "ian", "iang", "iao", "ie", "in", "ing", "iu", "ong", "ou", "u", "uan", "un", "uo", "v", "ve"] self.ym_p = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "i", "ian", "iao", "ie", "in", "ing", "o", "ou", "u"] self.ym_q = ["i", "ia", "ian", "iang", "iao", "ie", "in", "ing", "iong", "iu", "u", "uan", "ue", "un"] self.ym_r = ["an", "ang", "ao", "e", "en", "eng", "i", "ong", "ou", "u", "ua", "uan", "ui", "un", "uo"] self.ym_s = ["a", "ai", "an", "ang", "ao", "e", "en", "eng", "i", "ong", "ou", "u", "uan", "ui", "un", "uo"] self.ym_t = ["a", "ai", "an", "ang", "ao", "e", "ei", "eng", "i", "ian", "iao", "ie", "ing", "ong", "ou", "u", "uan", "ui", "un", "uo"] self.ym_w = ["a", "ai", "an", "ang", "ei", "en", "eng", "o", "u"] self.ym_x = ["i", "ia", "ian", "iang", "iao", "ie", "in", "ing", "iong", "iu", "u", "uan", "ue", "un"] self.ym_y = ["a", "an", "ang", "ao", "e", "i", "in", "ing", "o", "ong", "ou", "u", "uan", "ue", "un"] self.ym_z = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "i", "ong", "ou", "u", "uan", "ui", "un", "uo"] self.ym_ch = ["a", "ai", "an", "ang", "ao", "e", "en", "eng", "i", "ong", "ou", "u", "ua", "uai", "uan", "uang", "ui", "un", "uo"] self.ym_sh = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "i", "ou", "u", "ua", "uai", "uan", "uang", "ui", "un", "uo"] self.ym_zh = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "i", "ong", "ou", "u", "ua", "uai", "uan", "uang", "ui", "un", "uo"] self.ym = [self.yy, self.ym_b, self.ym_c, self.ym_d, self.ym_f, self.ym_g, self.ym_h, self.ym_j, self.ym_k, self.ym_l, self.ym_m, self.ym_n, self.ym_p, self.ym_q, self.ym_r, self.ym_s, self.ym_t, self.ym_w, self.ym_x, self.ym_y, self.ym_z, self.ym_ch, self.ym_sh, self.ym_zh ]
def get_chengyu(word): bigger_list = [] if Pinyin2Hanzi.is_chinese(word): word_pinyin = lazy_pinyin(word) dagParams = DefaultDagParams() word_list = [] result = dag(dagParams, word_pinyin, path_num=3, log=True) for item in result: word_list.append(item.path[0]) for avg_word in word_list: bigger_list.append(find_chengyu(avg_word)) heiheihei = list((chain(*bigger_list))) max_len = len(heiheihei) - 1 flag = random.randint(0, max_len) return heiheihei[flag] else: return "Null"
def __init__(self, w, h, x, y, callback=None): self.font = pygame.font.SysFont('microsoftyaheimicrosoftyaheiui', 16) self.width = w self.height = h self.x = x self.y = y self.text = "" # 文本框内容 self.callback = callback # 创建背景surface self.__surface = pygame.Surface((w, h)) self.__surface.fill((250, 250, 250)) self.dagparams = DefaultDagParams() self.state = 0 # 0初始状态 1输入拼音状态 self.page = 1 # 第几页 self.limit = 5 # 显示几个汉字 self.pinyin = '' self.word_list = [] # 候选词列表 self.word_list_surf = None # 候选词surface self.buffer_text = '' # 联想缓冲区字符串
# coding: utf-8 from __future__ import (print_function, unicode_literals) import sys sys.path.append('..') from Pinyin2Hanzi import DefaultDagParams from Pinyin2Hanzi import dag dagparams = DefaultDagParams() result = dag(dagparams, ['wo']) for item in result: print(item.score, '/'.join(item.path)) print(20 * '*') result = dag(dagparams, ['ni', 'hao']) for item in result: print(item.score, '/'.join(item.path)) print(20 * '*') result = dag(dagparams, ['ni', 'bu', 'zhi', 'dao', 'de', 'shi']) for item in result: print(item.score, '/'.join(item.path)) print(20 * '*') result = dag(dagparams, ['ni', 'bu', 'zhi', 'dao', 'de', 'shi'], path_num=2,
def __init__(self): self._dagparams = DefaultDagParams()
#!/usr/bin/env python # -*- coding:utf-8 -*- # @FileName :input.py # @Time :2022/1/4 21:04 # @Author :russionbear import pygame from Pinyin2Hanzi import is_pinyin, dag, DefaultDagParams from .. import Pen import ctypes __param = DefaultDagParams() PATH_NUM = 20 def get_pinyin(s0): """ 将一段拼音,分解成一个个拼音 :param s0: 匹配的字符串 :return: 匹配到的拼音列表 """ for i1, i in enumerate(reversed(s0)): if i < 'a' or i > 'z': s0 = s0[-i1 + 1:] result = [] if not s0: return result max_len = 6 # 拼音最长为6
def pinyin_2_hanzi(pinyinList): from Pinyin2Hanzi import DefaultDagParams from Pinyin2Hanzi import dag dagParams = DefaultDagParams() result = dag(dagParams, pinyinList, path_num=500000, log=True)#10代表侯选值个数 return [''.join(item.path) for item in result]