예제 #1
0
def name_tran(str):
    test = PinYin()
    test.load_word()
    str[0]
    family = test.hanzi2pinyin(string=str[0])[0]
    last = u''
    print(str[1:])
    for word in test.hanzi2pinyin(string=str[1:]):
        last = last + word

    name_en = last.title() + u' ' + family.title()
    return name_en
예제 #2
0
파일: zcf520.py 프로젝트: Yuntong/script
def zcf(namelist):
    """
    :param namelist:
    :return If the match returns a list of numbers, else return None:
    """
    nlist = []
    flag = 0
    test = PinYin()
    test.load_word()
    key = raw_input("关键词  :  ")
    for x in range(len(namelist)):
        #print namelist[x]
        t = test.hanzi2pinyin(str(namelist[x]))
        charnum = len(list(namelist[x].decode('utf-8')))
        flag2 = True
        if len(key) == charnum:
            #print str(len(key)) + " " + str(charnum)
            for xx in range(charnum):
                 flag2 = (t[xx][0] == key[xx]) and flag2
        else:
            continue
        if flag2 is True:
            flag += 1
            nlist.append(x)    
    if flag == 0:
        return None
    else:
        return nlist
예제 #3
0
def writeCityName():
    if not os.path.exists('cityName.csv'):
        url = "http://www.zxinc.org/gb2260.htm"
        print 'start reading ...'
        response = urllib.urlopen(url)
        page = response.read()
        page = page.decode('utf8')
        print 'reading done...'
        pattern = re.compile(ur'([\u4e00-\u9fa5]{2,5}市)')
        match = pattern.findall(page)
        if match:
            try:
                with open('cityName.csv', 'wb') as csvfile:
                    csvWrite = csv.writer(csvfile,
                                          delimiter=' ',
                                          quotechar='|',
                                          quoting=csv.QUOTE_MINIMAL)
                    csvfile.write(codecs.BOM_UTF8)
                    test = PinYin()
                    test.load_word()
                    for result in match:
                        result = result.encode('utf8')
                        py = test.hanzi2pinyin(string=result[:-3])
                        csvWrite.writerow([result[:-3], py[-1]])
                print 'write done!'
            except Exception as e:
                print e
            finally:
                csvfile.close()
    else:
        print 'cityName.csv detected'
예제 #4
0
class rhyRobot:
    #if baidu doesnot work.Try use proxy.
    def __init__(self):
        self.pinYinRobot = PinYin()
        self.pinYinRobot.load_word()
        self.shengMu = [
            "b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h", "j", "q",
            "x", "zh", "ch", "sh", "r", "z", "c", "s", "y", "w"
        ]
        self.zhengTi = [
            "zhi", "chi", "shi", "ri", "zi", "ci", "si", "yu", "ye", "yue",
            "yuan", "yin", "yun", "ying"
        ]
        print("pinYinRobot is loaded")

    def findRhyForWords(self, chinese):
        pinYinList = self.pinYinRobot.hanzi2pinyin(chinese)
        for singleWord in pinYinList:
            for zhengTi in self.zhengTi:
                if (singleWord == zhengTi):
                    print singleWord + " is whole,cant rhy"
                    return
        pinYinTuple = self.__findPinYinTuple(pinYinList)
        allPossibleWord = self.__findAllPosiblePinYin(pinYinTuple)
        print allPossibleWord

    def __getResultFromBaidu(self, allPossibleWord):
        pass

    def __getResultFromLocal(self, allPossibleWord):
        pass

    def __findAllPosiblePinYin(self, pinYinTuple):
        shengMuLen = len(self.shengMu)
        myLoopMachine = LoopMachine(len(pinYinTuple), shengMuLen)
        allPossibleWord = []
        while (myLoopMachine.shouldStop()):
            loopIndex = myLoopMachine.getLoopIndex()
            newWord = ''
            appendFlag = True
            for i in range(len(loopIndex)):
                wordToAppend = self.shengMu[loopIndex[i]] + pinYinTuple[i][1]
                if (possibleDict.has_key(wordToAppend) == False):
                    appendFlag = False
                    break
                newWord = newWord + wordToAppend + ' '
            if (appendFlag == True):
                allPossibleWord.append((newWord, 0))
            myLoopMachine.incr()
        return allPossibleWord

    def __findPinYinTuple(self, pinYinList):
        pinYinTuple = []
        for item in pinYinList:
            if (item[:2] == "zh" or item[:2] == "ch" or item[:2] == "sh"):
                pinYinTuple.append((item[:2], item[2:]))
            else:
                pinYinTuple.append((item[:1], item[1:]))
        return pinYinTuple
예제 #5
0
파일: index.py 프로젝트: binbin/phone_book
  def post(self):
    p = PinYin(dict_file=os.path.join(os.path.dirname(__file__), 'libs','pinyin','word.data'))
    p.load_word()

    phone = Phone()
    phone.name = cgi.escape(self.request.get("name"))
    phone.phone = int(cgi.escape(self.request.get("phone")))
    phone.department = cgi.escape(self.request.get("department"))
    phone.name_pinyin = ''.join(p.hanzi2pinyin(string=phone.name))
    phone.department_pinyin = ''.join(p.hanzi2pinyin(string=phone.department))
    phone.hire_date = datetime.datetime.now().date()
    phone.put()
    
    

    path = os.path.join(os.path.dirname(__file__), 'templates','success.html')
    self.response.out.write(template.render(path,{}))
예제 #6
0
파일: cn2pinyin.py 프로젝트: ddmkchan/Utils
def t2():
    test = PinYin()
    test.load_word()
    #string = u"Kottlers古玩城"
    #string = u"Head 2 Toe发型店"
    #string = u"蓝"
    #print string
    #print test.hanzi2pinyin(string=string)
    #print Cartesian_product(test.hanzi2pinyin(string=string))

    name = u"普季(商城)"
    name = u"Kottlers古玩城"
    name = u"hello 艾压(重庆店)山"
    name = u"库兰达(库兰达热带雨林)"
    #name = u"盛文甘hello店(店)"
    #name = u"义乌三期市场(原篁园市场)"
    print name
    p = re.compile(u'[\u4e00-\u9fa5]+')
    p_eng = re.compile(u'[a-zA-Z]+')
    j = 0
    strs = []
    while (j < len(name)):

        #for j in xrange(len(name)):
        #    if j

        if j + 1 == len(name):
            strs.append(name[j])
        else:
            print(name[j], name[j + 1]), is_hz_py(name[j], name[j + 1])
            if not is_hz_py(name[j], name[j + 1]):
                print name[j], j
                strs.append(name[j] + u" ")
            else:
                strs.append(name[j])
        j += 1
    name = "".join(strs)
    ch_names = p.findall(name)
    tmp = name
    ll = []
    mydict = {}
    cnames = "".join([ch_name for ch_name in ch_names])
    #pys = test.hanzi2pinyin(string=cnames)
    pys = Cartesian_product(test.hanzi2pinyin(string=cnames))
    print cnames, pys, ch_names
    for p in pys:
        tmp2 = name
        for ch_name in ch_names:
            m = re.search(ch_name, cnames)
            _start = m.start()
            _end = m.end()
            replace = " ".join([k for k in p.split()[_start:_end]])
            print _start, _end, replace, tmp2
            tmp2 = re.sub(ch_name, replace, tmp2, 1)
        print tmp2
예제 #7
0
파일: cn2pinyin.py 프로젝트: ddmkchan/Utils
def t2():
    test = PinYin()
    test.load_word()
    #string = u"Kottlers古玩城"
    #string = u"Head 2 Toe发型店"
    #string = u"蓝"
    #print string
    #print test.hanzi2pinyin(string=string)
    #print Cartesian_product(test.hanzi2pinyin(string=string))

    name = u"普季(商城)"
    name = u"Kottlers古玩城"
    name = u"hello 艾压(重庆店)山"
    name = u"库兰达(库兰达热带雨林)"
    #name = u"盛文甘hello店(店)"
    #name = u"义乌三期市场(原篁园市场)"
    print name
    p = re.compile(u'[\u4e00-\u9fa5]+')
    p_eng = re.compile(u'[a-zA-Z]+')
    j = 0
    strs = []
    while (j<len(name)):
        
    #for j in xrange(len(name)):
    #    if j 
        
        if j+1 == len(name):
            strs.append(name[j])
        else:
            print (name[j], name[j+1]), is_hz_py(name[j], name[j+1])
            if not is_hz_py(name[j], name[j+1]):
                print name[j], j
                strs.append(name[j]+u" ")
            else:
                strs.append(name[j])
        j += 1
    name  = "".join(strs)
    ch_names =  p.findall(name)
    tmp = name
    ll = []
    mydict = {}
    cnames = "".join([ch_name for ch_name in ch_names])
    #pys = test.hanzi2pinyin(string=cnames)
    pys = Cartesian_product(test.hanzi2pinyin(string=cnames))
    print cnames, pys, ch_names
    for p in pys:
        tmp2 = name
        for ch_name in ch_names:
            m = re.search(ch_name, cnames)
            _start = m.start()
            _end = m.end()
            replace = " ".join([k for k in p.split()[_start:_end]])
            print _start, _end, replace, tmp2
            tmp2 = re.sub(ch_name, replace, tmp2, 1)
        print tmp2
예제 #8
0
    def Convert(self):
        py_engine = PinYin()
        py_engine.load_word()

        contact = list()
        f = open(self.filename,'r')
        for line in open(self.filename):  
            line = f.readline()
            k = re.findall(r"(\N\:[^\;]*\;[^\;]*\;[^\;]*\;[^\;]*\;)", line) 
            if k:
                if k[0].find(';') - 2 > 3:
                    xing = k[0][2: 5]
                    ming = k[0][5: k[0].find(';')] + k[0][k[0].find(';') + 1 : k[0].find(';', k[0].find(';')+1)]
                else:
                    xing = k[0][2: k[0].find(';') ]
                    ming = k[0][k[0].find(';') + 1 : k[0].find(';', k[0].find(';')+1)]
                contact.append('N:'+xing+';'+ming+';'+";;\n")

                phones = py_engine.hanzi2pinyin(string=xing)
                line = "X-PHONETIC-LAST-NAME:"
                for item in phones:
                    if item != '':
                        line = line + item.capitalize()
                line += "\n"
                contact.append(line)

                phones = py_engine.hanzi2pinyin(string=ming)
                line = "X-PHONETIC-FIRST-NAME:"
                for item in phones:
                    if item != '':
                        line = line + item.capitalize()
                line += "\n"
                contact.append(line)
            else:
                contact.append(line)

        fout = open("ok_"+self.filename, 'w')
        for line in contact:
            fout.write(line)
예제 #9
0
def idiomFind(x):
    if x == None:
        raise Exception
    else:
        with open('idiom.txt','r') as f:
            base = f.readlines()
            random.shuffle(base)
            j = 0
            for i in base:
                
                c = i[:3].decode('utf8')
                if len(i)>1:
                    try:
                        test = PinYin()
                        test.load_word()
                        py = test.hanzi2pinyin(c)[0]
                        if (py == x):
                            return i
                    except:
                        continue
        return None
    def Convert(self):
        py_engine = PinYin()
        py_engine.load_word()

        contact = list()
        f = open(self.filename,'r')
        for line in open(self.filename):  
            line = f.readline()
            contact.append(line)
            k = re.findall(r"(\N\:[^\;]*\;)", line) 
            if k:
                phones = py_engine.hanzi2pinyin(string=k[0])
                line = "X-PHONETIC-LAST-NAME:"
                for item in phones:
                    if item != '':
                        line = line + item.capitalize()
                line += "\n"
                contact.append(line)

        fout = open(filename, 'w')
        for line in contact:
            fout.write(line)
예제 #11
0
def main(args):

    test = PinYin()
    test.load_word()

    conn = getconn()
    cursor = conn.cursor()
    cursor.execute('select rname,rid from roominfo where py_name is null')
    #    cursor.execute('select cname,area from area_name_map where py_name is null')
    rows = cursor.fetchall()

    for row in rows:
        myword = row[0].encode("utf8")
        pylist = test.hanzi2pinyin(string=myword)
        pystr = pylist[0]
        for w in pylist[1:]:
            pystr = pystr + w[0]
#        cursor.execute('update area_name_map set py_name=? where cname=? and area=?',(pystr,row[0],row[1]))
        cursor.execute('update roominfo set py_name=? where rid=?',
                       (pystr, row[1]))
        conn.commit()
    conn.close()
예제 #12
0
	array3 = lists[2]
	array4 = lists[3]
	with open('dict.txt', 'r') as userDict:
		for i in userDict:
			i.strip('\r\n')
			f.write(array1 + i.strip() + '\n')
			f.write(array1 + array2 + array3[0:1] + i.strip() + '\n')
			f.write(array1 + array2[0:1] + array3[0:1] + array4[0:1] + i.strip() + '\n')
			f.write(array1[0:1] + array2[0:1] + array3[0:1] + array4[0:1] + i.strip() + '\n')
			f.write(array1.capitalize() + array2[0:1] + array3[0:1] + array4[0:1] + i.strip() + '\n')
			f.write(array1.capitalize()[0:1] + array2[0:1] + array3[0:1] + array4[0:1] + i.strip() + '\n')
	#f.close()
	print array1 + array2 + array3[0:1] + array4[0:1]
	print array1 + array2[0:1] + array3[0:1] + array4[0:1]
	print array1[0:1] + array2[0:1] + array3[0:1] + array4[0:1]
	print array1.capitalize() + array2[0:1] + array3[0:1] + array4[0:1]
	print array1.capitalize()[0:1] + array2[0:1] + array3[0:1] + array4[0:1]
with open('username.txt', 'r') as un:
	for userinfo in un:
		lists = test.hanzi2pinyin(string=userinfo.strip())
		if len(lists) == 2:
			listsTwo(lists)
		elif len(lists) == 3:
			listsThree(lists)
		elif len(lists) == 4:
			listsFour(lists)
		else:
			print 'you chinese name maybe ==1 or > 4 ,please current'
#un.close()
f.close()
예제 #13
0
            f.write(array1 + array2 + array3[0:1] + i.strip() + '\n')
            f.write(array1 + array2[0:1] + array3[0:1] + array4[0:1] +
                    i.strip() + '\n')
            f.write(array1[0:1] + array2[0:1] + array3[0:1] + array4[0:1] +
                    i.strip() + '\n')
            f.write(array1.capitalize() + array2[0:1] + array3[0:1] +
                    array4[0:1] + i.strip() + '\n')
            f.write(array1.capitalize()[0:1] + array2[0:1] + array3[0:1] +
                    array4[0:1] + i.strip() + '\n')
    #f.close()
    print array1 + array2 + array3[0:1] + array4[0:1]
    print array1 + array2[0:1] + array3[0:1] + array4[0:1]
    print array1[0:1] + array2[0:1] + array3[0:1] + array4[0:1]
    print array1.capitalize() + array2[0:1] + array3[0:1] + array4[0:1]
    print array1.capitalize()[0:1] + array2[0:1] + array3[0:1] + array4[0:1]


with open('username.txt', 'r') as un:
    for userinfo in un:
        lists = test.hanzi2pinyin(string=userinfo.strip())
        if len(lists) == 2:
            listsTwo(lists)
        elif len(lists) == 3:
            listsThree(lists)
        elif len(lists) == 4:
            listsFour(lists)
        else:
            print 'you chinese name maybe ==1 or > 4 ,please current'
#un.close()
f.close()
예제 #14
0
'my puzzle of PYthon'

print( [str(a) + str(b) for a in ["1","2"] for b in ["a", "b"] ] )
# 1a ....

from pinyin import PinYin

test = PinYin()
test.load_word()
a = test.hanzi2pinyin(string='钓鱼岛是中国的')
print(a)

# Python 3 renamed the unicode type to str
unicode_or_str = "钓鱼岛是中国的"

print()
예제 #15
0
'my puzzle of PYthon'

print([str(a) + str(b) for a in ["1", "2"] for b in ["a", "b"]])
# 1a ....

from pinyin import PinYin

test = PinYin()
test.load_word()
a = test.hanzi2pinyin(string='钓鱼岛是中国的')
print(a)

# Python 3 renamed the unicode type to str
unicode_or_str = "钓鱼岛是中国的"

print()
예제 #16
0
# -*- coding:utf-8 -*-
from pinyin import PinYin
import sys

if __name__ == "__main__":
  test = PinYin()
  test.load_word()
  # string = "测试文本"
  f = open(sys.argv[1],'r')
  f2 = open("dict/"+"short_"+sys.argv[1].split("/")[-1],'w')
  f3 = open("dict/"+"full_"+sys.argv[1].split("/")[-1],'w')
  for i in f:
    # ch_str = i.split(" ")[1].strip()
    ch_str = i.strip()
    print "\""+ch_str+"\""
    short_arr = test.hanzi2pinyin(string=ch_str)
    short_str = ""
    for x in short_arr:
      try:
        print x
        short_str += x[0]
      except:
        continue

    print short_str
    f2.write(short_str+" "+ch_str+"\n")
    f3.write("".join(short_arr)+" "+ch_str+"\n")

  f2.close()
  f3.close()
  # print "in: %s" % string
예제 #17
0
def getPinYin(hanzi):
    test = PinYin()
    test.load_word()
    return test.hanzi2pinyin(string=hanzi)
예제 #18
0
# -*- coding: utf-8 -*-
from pinyin import PinYin
import json


def load(file):
    with open(file) as json_file:
        # json.load可以将文件转化为字典,loads可以将字符串转化为字典
        data = json.load(json_file)
        return data


#这个程序的作用是,读取分好词的结果,转化成拼音
input = load('out.json')
print len(input)

test = PinYin()
test.load_word()
st = []
for i in input:
    st.append(str(test.hanzi2pinyin(string=i)))

print len(st)

json_dic = json.dumps(st)
fo = open("out_pinyin.json", "wb")
fo.write(json_dic)
# 关闭打开的文件
fo.close()