def name_tran(str): test = PinYin() test.load_word() str[0] family = test.hanzi2pinyin(string=str[0])[0] last = u'' print(str[1:]) for word in test.hanzi2pinyin(string=str[1:]): last = last + word name_en = last.title() + u' ' + family.title() return name_en
def zcf(namelist): """ :param namelist: :return If the match returns a list of numbers, else return None: """ nlist = [] flag = 0 test = PinYin() test.load_word() key = raw_input("关键词 : ") for x in range(len(namelist)): #print namelist[x] t = test.hanzi2pinyin(str(namelist[x])) charnum = len(list(namelist[x].decode('utf-8'))) flag2 = True if len(key) == charnum: #print str(len(key)) + " " + str(charnum) for xx in range(charnum): flag2 = (t[xx][0] == key[xx]) and flag2 else: continue if flag2 is True: flag += 1 nlist.append(x) if flag == 0: return None else: return nlist
def writeCityName(): if not os.path.exists('cityName.csv'): url = "http://www.zxinc.org/gb2260.htm" print 'start reading ...' response = urllib.urlopen(url) page = response.read() page = page.decode('utf8') print 'reading done...' pattern = re.compile(ur'([\u4e00-\u9fa5]{2,5}市)') match = pattern.findall(page) if match: try: with open('cityName.csv', 'wb') as csvfile: csvWrite = csv.writer(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) csvfile.write(codecs.BOM_UTF8) test = PinYin() test.load_word() for result in match: result = result.encode('utf8') py = test.hanzi2pinyin(string=result[:-3]) csvWrite.writerow([result[:-3], py[-1]]) print 'write done!' except Exception as e: print e finally: csvfile.close() else: print 'cityName.csv detected'
class rhyRobot: #if baidu doesnot work.Try use proxy. def __init__(self): self.pinYinRobot = PinYin() self.pinYinRobot.load_word() self.shengMu = [ "b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h", "j", "q", "x", "zh", "ch", "sh", "r", "z", "c", "s", "y", "w" ] self.zhengTi = [ "zhi", "chi", "shi", "ri", "zi", "ci", "si", "yu", "ye", "yue", "yuan", "yin", "yun", "ying" ] print("pinYinRobot is loaded") def findRhyForWords(self, chinese): pinYinList = self.pinYinRobot.hanzi2pinyin(chinese) for singleWord in pinYinList: for zhengTi in self.zhengTi: if (singleWord == zhengTi): print singleWord + " is whole,cant rhy" return pinYinTuple = self.__findPinYinTuple(pinYinList) allPossibleWord = self.__findAllPosiblePinYin(pinYinTuple) print allPossibleWord def __getResultFromBaidu(self, allPossibleWord): pass def __getResultFromLocal(self, allPossibleWord): pass def __findAllPosiblePinYin(self, pinYinTuple): shengMuLen = len(self.shengMu) myLoopMachine = LoopMachine(len(pinYinTuple), shengMuLen) allPossibleWord = [] while (myLoopMachine.shouldStop()): loopIndex = myLoopMachine.getLoopIndex() newWord = '' appendFlag = True for i in range(len(loopIndex)): wordToAppend = self.shengMu[loopIndex[i]] + pinYinTuple[i][1] if (possibleDict.has_key(wordToAppend) == False): appendFlag = False break newWord = newWord + wordToAppend + ' ' if (appendFlag == True): allPossibleWord.append((newWord, 0)) myLoopMachine.incr() return allPossibleWord def __findPinYinTuple(self, pinYinList): pinYinTuple = [] for item in pinYinList: if (item[:2] == "zh" or item[:2] == "ch" or item[:2] == "sh"): pinYinTuple.append((item[:2], item[2:])) else: pinYinTuple.append((item[:1], item[1:])) return pinYinTuple
def post(self): p = PinYin(dict_file=os.path.join(os.path.dirname(__file__), 'libs','pinyin','word.data')) p.load_word() phone = Phone() phone.name = cgi.escape(self.request.get("name")) phone.phone = int(cgi.escape(self.request.get("phone"))) phone.department = cgi.escape(self.request.get("department")) phone.name_pinyin = ''.join(p.hanzi2pinyin(string=phone.name)) phone.department_pinyin = ''.join(p.hanzi2pinyin(string=phone.department)) phone.hire_date = datetime.datetime.now().date() phone.put() path = os.path.join(os.path.dirname(__file__), 'templates','success.html') self.response.out.write(template.render(path,{}))
def t2(): test = PinYin() test.load_word() #string = u"Kottlers古玩城" #string = u"Head 2 Toe发型店" #string = u"蓝" #print string #print test.hanzi2pinyin(string=string) #print Cartesian_product(test.hanzi2pinyin(string=string)) name = u"普季(商城)" name = u"Kottlers古玩城" name = u"hello 艾压(重庆店)山" name = u"库兰达(库兰达热带雨林)" #name = u"盛文甘hello店(店)" #name = u"义乌三期市场(原篁园市场)" print name p = re.compile(u'[\u4e00-\u9fa5]+') p_eng = re.compile(u'[a-zA-Z]+') j = 0 strs = [] while (j < len(name)): #for j in xrange(len(name)): # if j if j + 1 == len(name): strs.append(name[j]) else: print(name[j], name[j + 1]), is_hz_py(name[j], name[j + 1]) if not is_hz_py(name[j], name[j + 1]): print name[j], j strs.append(name[j] + u" ") else: strs.append(name[j]) j += 1 name = "".join(strs) ch_names = p.findall(name) tmp = name ll = [] mydict = {} cnames = "".join([ch_name for ch_name in ch_names]) #pys = test.hanzi2pinyin(string=cnames) pys = Cartesian_product(test.hanzi2pinyin(string=cnames)) print cnames, pys, ch_names for p in pys: tmp2 = name for ch_name in ch_names: m = re.search(ch_name, cnames) _start = m.start() _end = m.end() replace = " ".join([k for k in p.split()[_start:_end]]) print _start, _end, replace, tmp2 tmp2 = re.sub(ch_name, replace, tmp2, 1) print tmp2
def t2(): test = PinYin() test.load_word() #string = u"Kottlers古玩城" #string = u"Head 2 Toe发型店" #string = u"蓝" #print string #print test.hanzi2pinyin(string=string) #print Cartesian_product(test.hanzi2pinyin(string=string)) name = u"普季(商城)" name = u"Kottlers古玩城" name = u"hello 艾压(重庆店)山" name = u"库兰达(库兰达热带雨林)" #name = u"盛文甘hello店(店)" #name = u"义乌三期市场(原篁园市场)" print name p = re.compile(u'[\u4e00-\u9fa5]+') p_eng = re.compile(u'[a-zA-Z]+') j = 0 strs = [] while (j<len(name)): #for j in xrange(len(name)): # if j if j+1 == len(name): strs.append(name[j]) else: print (name[j], name[j+1]), is_hz_py(name[j], name[j+1]) if not is_hz_py(name[j], name[j+1]): print name[j], j strs.append(name[j]+u" ") else: strs.append(name[j]) j += 1 name = "".join(strs) ch_names = p.findall(name) tmp = name ll = [] mydict = {} cnames = "".join([ch_name for ch_name in ch_names]) #pys = test.hanzi2pinyin(string=cnames) pys = Cartesian_product(test.hanzi2pinyin(string=cnames)) print cnames, pys, ch_names for p in pys: tmp2 = name for ch_name in ch_names: m = re.search(ch_name, cnames) _start = m.start() _end = m.end() replace = " ".join([k for k in p.split()[_start:_end]]) print _start, _end, replace, tmp2 tmp2 = re.sub(ch_name, replace, tmp2, 1) print tmp2
def Convert(self): py_engine = PinYin() py_engine.load_word() contact = list() f = open(self.filename,'r') for line in open(self.filename): line = f.readline() k = re.findall(r"(\N\:[^\;]*\;[^\;]*\;[^\;]*\;[^\;]*\;)", line) if k: if k[0].find(';') - 2 > 3: xing = k[0][2: 5] ming = k[0][5: k[0].find(';')] + k[0][k[0].find(';') + 1 : k[0].find(';', k[0].find(';')+1)] else: xing = k[0][2: k[0].find(';') ] ming = k[0][k[0].find(';') + 1 : k[0].find(';', k[0].find(';')+1)] contact.append('N:'+xing+';'+ming+';'+";;\n") phones = py_engine.hanzi2pinyin(string=xing) line = "X-PHONETIC-LAST-NAME:" for item in phones: if item != '': line = line + item.capitalize() line += "\n" contact.append(line) phones = py_engine.hanzi2pinyin(string=ming) line = "X-PHONETIC-FIRST-NAME:" for item in phones: if item != '': line = line + item.capitalize() line += "\n" contact.append(line) else: contact.append(line) fout = open("ok_"+self.filename, 'w') for line in contact: fout.write(line)
def idiomFind(x): if x == None: raise Exception else: with open('idiom.txt','r') as f: base = f.readlines() random.shuffle(base) j = 0 for i in base: c = i[:3].decode('utf8') if len(i)>1: try: test = PinYin() test.load_word() py = test.hanzi2pinyin(c)[0] if (py == x): return i except: continue return None
def Convert(self): py_engine = PinYin() py_engine.load_word() contact = list() f = open(self.filename,'r') for line in open(self.filename): line = f.readline() contact.append(line) k = re.findall(r"(\N\:[^\;]*\;)", line) if k: phones = py_engine.hanzi2pinyin(string=k[0]) line = "X-PHONETIC-LAST-NAME:" for item in phones: if item != '': line = line + item.capitalize() line += "\n" contact.append(line) fout = open(filename, 'w') for line in contact: fout.write(line)
def main(args): test = PinYin() test.load_word() conn = getconn() cursor = conn.cursor() cursor.execute('select rname,rid from roominfo where py_name is null') # cursor.execute('select cname,area from area_name_map where py_name is null') rows = cursor.fetchall() for row in rows: myword = row[0].encode("utf8") pylist = test.hanzi2pinyin(string=myword) pystr = pylist[0] for w in pylist[1:]: pystr = pystr + w[0] # cursor.execute('update area_name_map set py_name=? where cname=? and area=?',(pystr,row[0],row[1])) cursor.execute('update roominfo set py_name=? where rid=?', (pystr, row[1])) conn.commit() conn.close()
array3 = lists[2] array4 = lists[3] with open('dict.txt', 'r') as userDict: for i in userDict: i.strip('\r\n') f.write(array1 + i.strip() + '\n') f.write(array1 + array2 + array3[0:1] + i.strip() + '\n') f.write(array1 + array2[0:1] + array3[0:1] + array4[0:1] + i.strip() + '\n') f.write(array1[0:1] + array2[0:1] + array3[0:1] + array4[0:1] + i.strip() + '\n') f.write(array1.capitalize() + array2[0:1] + array3[0:1] + array4[0:1] + i.strip() + '\n') f.write(array1.capitalize()[0:1] + array2[0:1] + array3[0:1] + array4[0:1] + i.strip() + '\n') #f.close() print array1 + array2 + array3[0:1] + array4[0:1] print array1 + array2[0:1] + array3[0:1] + array4[0:1] print array1[0:1] + array2[0:1] + array3[0:1] + array4[0:1] print array1.capitalize() + array2[0:1] + array3[0:1] + array4[0:1] print array1.capitalize()[0:1] + array2[0:1] + array3[0:1] + array4[0:1] with open('username.txt', 'r') as un: for userinfo in un: lists = test.hanzi2pinyin(string=userinfo.strip()) if len(lists) == 2: listsTwo(lists) elif len(lists) == 3: listsThree(lists) elif len(lists) == 4: listsFour(lists) else: print 'you chinese name maybe ==1 or > 4 ,please current' #un.close() f.close()
f.write(array1 + array2 + array3[0:1] + i.strip() + '\n') f.write(array1 + array2[0:1] + array3[0:1] + array4[0:1] + i.strip() + '\n') f.write(array1[0:1] + array2[0:1] + array3[0:1] + array4[0:1] + i.strip() + '\n') f.write(array1.capitalize() + array2[0:1] + array3[0:1] + array4[0:1] + i.strip() + '\n') f.write(array1.capitalize()[0:1] + array2[0:1] + array3[0:1] + array4[0:1] + i.strip() + '\n') #f.close() print array1 + array2 + array3[0:1] + array4[0:1] print array1 + array2[0:1] + array3[0:1] + array4[0:1] print array1[0:1] + array2[0:1] + array3[0:1] + array4[0:1] print array1.capitalize() + array2[0:1] + array3[0:1] + array4[0:1] print array1.capitalize()[0:1] + array2[0:1] + array3[0:1] + array4[0:1] with open('username.txt', 'r') as un: for userinfo in un: lists = test.hanzi2pinyin(string=userinfo.strip()) if len(lists) == 2: listsTwo(lists) elif len(lists) == 3: listsThree(lists) elif len(lists) == 4: listsFour(lists) else: print 'you chinese name maybe ==1 or > 4 ,please current' #un.close() f.close()
'my puzzle of PYthon' print( [str(a) + str(b) for a in ["1","2"] for b in ["a", "b"] ] ) # 1a .... from pinyin import PinYin test = PinYin() test.load_word() a = test.hanzi2pinyin(string='钓鱼岛是中国的') print(a) # Python 3 renamed the unicode type to str unicode_or_str = "钓鱼岛是中国的" print()
'my puzzle of PYthon' print([str(a) + str(b) for a in ["1", "2"] for b in ["a", "b"]]) # 1a .... from pinyin import PinYin test = PinYin() test.load_word() a = test.hanzi2pinyin(string='钓鱼岛是中国的') print(a) # Python 3 renamed the unicode type to str unicode_or_str = "钓鱼岛是中国的" print()
# -*- coding:utf-8 -*- from pinyin import PinYin import sys if __name__ == "__main__": test = PinYin() test.load_word() # string = "测试文本" f = open(sys.argv[1],'r') f2 = open("dict/"+"short_"+sys.argv[1].split("/")[-1],'w') f3 = open("dict/"+"full_"+sys.argv[1].split("/")[-1],'w') for i in f: # ch_str = i.split(" ")[1].strip() ch_str = i.strip() print "\""+ch_str+"\"" short_arr = test.hanzi2pinyin(string=ch_str) short_str = "" for x in short_arr: try: print x short_str += x[0] except: continue print short_str f2.write(short_str+" "+ch_str+"\n") f3.write("".join(short_arr)+" "+ch_str+"\n") f2.close() f3.close() # print "in: %s" % string
def getPinYin(hanzi): test = PinYin() test.load_word() return test.hanzi2pinyin(string=hanzi)
# -*- coding: utf-8 -*- from pinyin import PinYin import json def load(file): with open(file) as json_file: # json.load可以将文件转化为字典,loads可以将字符串转化为字典 data = json.load(json_file) return data #这个程序的作用是,读取分好词的结果,转化成拼音 input = load('out.json') print len(input) test = PinYin() test.load_word() st = [] for i in input: st.append(str(test.hanzi2pinyin(string=i))) print len(st) json_dic = json.dumps(st) fo = open("out_pinyin.json", "wb") fo.write(json_dic) # 关闭打开的文件 fo.close()