Python PinYin.hanzi2pinyin_split Beispiele, pinyin.PinYin.hanzi2pinyin_split Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: main.py Projekt: HiddenStrawberry/OPAC-LibraryKit

def get_item(marc_no, status=0):
    dict = {}
    test = PinYin()
    test.load_word('word.data')
    hm = requests.get(ourl + 'item.php?marc_no=' + str(marc_no)).text.encode(
        encoder).decode('utf8').replace('&nbsp;', '')
    parser = HTMLParser.HTMLParser()
    s1 = parser.unescape(hm)
    static = re.findall('<div id="book_info">(.*?)<div class="clear"></div>',
                        s1, re.S)[0]
    booklist = re.findall('<dl class="booklist">(.*?)</dl>', static, re.S)
    for each in booklist:
        pm = re.findall('<dt>(.*?)</dt>', each, re.S)[0]
        if pm == '':
            continue
        st = re.findall('<dd>(.*?)</dd>', each, re.S)[0]
        try:
            st1 = re.findall('>(.*?)</a>', st, re.S)[0]
        except:
            st1 = st
        pms = test.hanzi2pinyin_split(string=pm, split="",
                                      firstcode=True).replace('/', '')
        dict[pms] = st1
        if status == 1:
            print pm,
            print st1
    return dict

Beispiel #2

0

Datei anzeigen

Datei: fuckvideodb.py Projekt: fengxuangit/MyScript

def main():
    mysql = MySQLHander()
    p = PinYin()
    p.load_word()
    with open('video.json') as json_file:
        alldata = json.load(json_file)

    for data in alldata:
        sql = "INSERT INTO resource VALUES (null, '', '{title}', '{desc}', '{thumb}', '{url}',{duration}, {vister}, {likes},{creat_time}, '{up_time}')".format(title=data['name'].encode('utf-8', 'ignore'), desc=data['name'].encode('utf-8', 'ignore'), thumb=data['thumb'], url=data['url'],duration=random.randint(80, 120), vister=random.randint(4500, 9999), likes=random.randint(500, 2000), creat_time=time.time(), up_time=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
        rid = mysql.insert(sql)

        for tag in xrange(1, 2):
            tagname = data["tag{0}".format(tag)].encode('utf-8', 'ignore')
            sql = "SELECT id from category WHERE cname='{0}'".format(tagname)
            mysql.query(sql)
            result = mysql.fetchOneRow()
            if not result:
                ename = p.hanzi2pinyin_split(string=tagname, split="-").replace('-', '')
                sql = "INSERT INTO category values(null, '{ename}', '{cname}', {time})".format(ename=ename, cname=tagname, time=int(time.time()))
                tagid = mysql.insert(sql)
                sql = "INSERT INTO category_mapping values(null, {cid}, {rid}, {time})".format(cid=tagid, rid=rid, time=int(time.time()))
                mysql.insert(sql)
            else:
                tagid = result[0]
                sql = "INSERT INTO category_mapping values(null, {cid}, {rid}, {time})".format(cid=tagid, rid=rid, time=int(time.time()))
                mysql.insert(sql)

        for pic in data['screen']:
            sql = "INSERT INTO screenshots values(null, {rid}, '{pic}', {time})".format(rid=rid, pic=pic, time=int(time.time()))
            mysql.insert(sql)

        print "{0} done".format(data['thumb'])

    mysql.close()

Beispiel #3

0

Datei anzeigen

Datei: start_crawl.py Projekt: jryyufeng/zhihu_spider

 def getciyun(self):
     # 得到词云回答者信息
     test1 = PinYin()
     test1.load_word()
     str1 = str(test1.hanzi2pinyin_split(string=str(self.aa.topic), split="-"))
     path1 =  'F:/zhihu/answer/people_qb.txt'
     cloud.ciyun1(path1,str1+'people')
     #得到词云，问题信息
     path2='F:/zhihu/answer/question_top10.txt'
     cloud.ciyun1(path2,str1+'question')
     path2 =  'F:/zhihu/answer/p_location.txt'
     cloud.ciyun1(path2,str1+'slocation')

Beispiel #4

0

Datei anzeigen

def draw_frame(faces, img, gray, move):

    global xdeg
    global ydeg
    global fps
    global time_t

    if move == 2:
        steering_control(faces, img)
    # Draw a rectangle around every face
    for (x, y, w, h) in faces:

        cv2.rectangle(img, (x, y), (x + w, y + h), (200, 255, 0), 2)
        #-----rec-face
        roi = gray[x:x + w, y:y + h]
        try:
            roi = cv2.resize(roi, (200, 200), interpolation=cv2.INTER_LINEAR)
            params = model.predict(roi)
            if params[1] < 500.0:
                #print (names[params[0]])
                #pec = (' %.2f' % (params[1]))
                #sign = names[params[0]] + pec
                pyin = PinYin()
                pyin.load_word()
                pname = names[params[0]]
                change_identity(pname)
                #pyin.hanzi2pinyin(string = pname)
                pname = pyin.hanzi2pinyin_split(string=pname, split='')
                s = ''
                for p in pname:
                    s = s + p
                sign = ("%s %.2f" % (s, params[1]))
                # print(sign)
                cv2.putText(img, sign, (x, y - 2), cv2.FONT_HERSHEY_SIMPLEX,
                            0.5, (0, 0, 255), 2)
                #img = cv2ImgAddText(img, sign , x , y - 2, (0, 0, 255), 20)
                #img = change_cv2_draw(img,sign,(x, y + 2), 20 , 'firebrick' )

        except:
            continue

    # Calculate and show the FPS
    fps = fps + 1
    sfps = fps / (time.time() - t_start)
    cv2.putText(img, "FPS : " + str(int(sfps)), (10, 15),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

    cv2.imshow("recognize-face", img)

Beispiel #5

0

Datei anzeigen

Datei: mall.py Projekt: ChenShuyan/pyb2c

def mallSearch():    
    kw = MyRequest.getParams('kw')

    if kw:
        kw = addslashes(kw)
        malls = Mall.query.filter(Mall.keyword.like('%'+kw+'%')).all()
        keyword = Keyword.query.filter_by(keyword=kw).first()
        if keyword:
            if len(malls) > keyword.items:
                keyword.items = len(malls)
            if timetodate(keyword.updatetime,0) == timetodate(g.siteTime,0):
                keyword.month_search += 1
            else:
                keyword.month_search = 1

            if timetodate(keyword.updatetime,8) == timetodate(g.siteTime,8):
                keyword.week_search += 1
            else:
                keyword.week_search = 1

            if timetodate(keyword.updatetime,3) == timetodate(g.siteTime,3):
                keyword.today_search += 1
            else:
                keyword.today_search = 1

            keyword.total_search += 1
            keyword.updatetime = g.siteTime
        else:
            keyword = Keyword()
            keyword.keyword = kw
            keyword.items = len(malls)
            keyword.updatetime = g.siteTime
            keyword.month_search = 1
            keyword.week_search = 1
            keyword.today_search = 1
            keyword.total_search = 1
            py = PinYin(g.rootpath+os.path.sep+'assets'+os.path.sep+'word.data')
            py.load_word()
            keyword.letter = py.hanzi2pinyin_split(string=kw, split=" ")
            db.session.add(keyword)
        db.session.commit()

        rewords = Keyword.query.filter(Keyword.keyword.like('%'+kw+'%')).limit(10).all()

        return render_template('mall/mallSearch.html',malls=malls,g=g,kw=kw,rewords=rewords)
    else:
        return redirect(url_for('index'))

Beispiel #6

0

Datei anzeigen

Datei: ChangeToPinyin.py Projekt: SundongCandy/Whatever

class changetopinyin:
    wf_dict={}
    
    def __init__(self):
        self.test=PinYin()
        self.test.load_word()


    def change(self,filename):
        with open(filename,'r') as ff:
            for item in ff.readlines():
                word,fre=item.split(' ')[0],int(item.split(' ')[1])
                wf=word_fre(word,fre)
                self.addtodict(wf)
        for item in self.wf_dict.itervalues():
            i=0
            for ii in item:
                try:
                    item[i]=(ii.word,ii.fre)
                except Exception as e:
                    print(e)
                i+=1
        self.save('pinyin_dict2')


    def addtodict(self,wf):
        pp=self.test.hanzi2pinyin_split(wf.word,'_')
        if(pp in self.wf_dict):
            if(len(self.wf_dict[pp])<=5):
                heapq.heappush(self.wf_dict[pp],wf)
            else:
                heapq.heappushpop(self.wf_dict[pp],wf)
        else:
            self.wf_dict[pp]=[]
            self.wf_dict[pp].append(wf)

    def save(self,filename):
        with open(filename,'w') as wff:
            try:
                wff.write(json.dumps(self.wf_dict))
            except Exception as e:
                print(e)

Beispiel #7

0

Datei anzeigen

class changetopinyin:
    wf_dict = {}

    def __init__(self):
        self.test = PinYin()
        self.test.load_word()

    def change(self, filename):
        with open(filename, 'r') as ff:
            for item in ff.readlines():
                word, fre = item.split(' ')[0], int(item.split(' ')[1])
                wf = word_fre(word, fre)
                self.addtodict(wf)
        for item in self.wf_dict.itervalues():
            i = 0
            for ii in item:
                try:
                    item[i] = (ii.word, ii.fre)
                except Exception as e:
                    print(e)
                i += 1
        self.save('pinyin_dict2')

    def addtodict(self, wf):
        pp = self.test.hanzi2pinyin_split(wf.word, '_')
        if (pp in self.wf_dict):
            if (len(self.wf_dict[pp]) <= 5):
                heapq.heappush(self.wf_dict[pp], wf)
            else:
                heapq.heappushpop(self.wf_dict[pp], wf)
        else:
            self.wf_dict[pp] = []
            self.wf_dict[pp].append(wf)

    def save(self, filename):
        with open(filename, 'w') as wff:
            try:
                wff.write(json.dumps(self.wf_dict))
            except Exception as e:
                print(e)

Beispiel #8

0

Datei anzeigen

Datei: correct.py Projekt: SundongCandy/Whatever

class correction:
    
    def __init__(self):
        self.pp=PinYin()
        self.pp.load_word()
        with open('pinyin_dict','r') as ff:
            line=ff.readline()
            self.jj_dict=json.loads(line)
            ff.close()


    def correct(self,phrase_list):
        termlist=[]
        flag=False

        newplist=self.recompose(phrase_list)
        '''for item in newplist:
            for item2 in item:
                print(item2.encode('utf-8'))'''
        for nnlist in newplist:
            i=0
            tmp_correct=[]
            correct_num=[]
            for item in nnlist:
                py=self.pp.hanzi2pinyin_split(item,'_')
                tmp=[]
                tmp_correct.append(tmp)
                if(py in self.jj_dict):
                    for item2 in self.jj_dict[py]:
                        tmp_correct[i].append(item2)
                else:
                    tmp_correct[i].append((item,1))
                correct_num.append(0)
                i+=1
            
            length=len(tmp_correct)
            notend=True
            while notend:
                i=0
                tmpstr=''
                score=0
                for j in xrange(0,length):
                    tmps=tmp_correct[j][correct_num[j]][0]
                    tmpstr+=tmps
                    score+=int(tmp_correct[j][correct_num[j]][1])*len(tmps)**7
                termlist.append(tup(tmpstr,score))
                correct_num[0]+=1
                while correct_num[i]>=len(tmp_correct[i]):
                    correct_num[i]=0
                    if i<length-1:
                        correct_num[i+1]+=1
                        i+=1
                    else:
                        notend=False
        
        result_list=self.sscore(termlist)
        comstr=''
        for item in phrase_list:
            comstr+=item
        if result_list[0]==comstr:
            result_list=[]
        else:
            if comstr in result_list:
                result_list.pop(result_list.index(comstr))
        return result_list


    def sscore(self,termlist):
        heap=[]
        result_list=[]
        for item in termlist:
            heap.append(item)
        heapq.heapify(heap)
        while len(heap) > 5:
            
            a=heapq.heappop(heap)
        while len(heap) > 0:
            result_list.append(heapq.heappop(heap).term)
        
        result_list.reverse()
        return result_list

    def recompose(self,phrase_list):
        position=[]
        attach={}
        cpl=[]    #the consequence:list of list 
        i=0       #position of single word
        for item in phrase_list:
            if(len(item)==1):
                position.append(i)
                attach[i]=0
            i+=1

        notend=True
        length=len(position)
        if length>0:
            while notend:
                gap=0
                tmp_list=copy.deepcopy(phrase_list)
                tmp_position=copy.deepcopy(position)
                pi=0
                while pi < len(tmp_position):
                    item2=tmp_position[pi]
                    if(attach[item2]==0):
                        if item2-1-gap>=0:
                            tmp_list[item2-1-gap]+=tmp_list[item2-gap]
                            k=tmp_position.index(item2)
                            tmp_position.pop(k)
                            tmp_list.pop(item2-gap)
                            gap+=1
                        else:
                            pi+=1
                            '''while k < len(tmp_position):
                                tmp_position[k]-=gap
#print(tmp_position[k])
                                attach[tmp_position[k]]=attach[tmp_position[k]+gap]
                                k+=1'''
                    else:
                        if attach[item2]==1:
                            if item2+1-gap<len(tmp_list):
                                tmp_list[item2+1-gap]=tmp_list[item2-gap]+tmp_list[item2+1-gap]
                                k=tmp_position.index(item2)
                                tmp_position.pop(k)
                                tmp_list.pop(item2-gap)
                                gap+=1
                                if item2+1 in tmp_position:
                                    tmp_position.pop(tmp_position.index(item2+1))
                            else:
                                pi+=1
                        else:
                             pi+=1
                        '''while k < len(tmp_position):
                            tmp_position[k]-=gap
                            attach[tmp_position[k]]=attach[tmp_position[k]+gap]
                            k+=1'''
                '''flag=True
                for item3 in tmp_list:
                    if len(item3)==1:
                        flag=False
                if flag:'''
                if tmp_list not in cpl:
                    cpl.append(tmp_list)

                attach[position[0]]+=1 #每次变换一个
                i=0
                while attach[position[i]]>=3:
                        attach[position[i]]=0
                        if i<length-1:
                            attach[position[i+1]]+=1
                            i+=1
                        else:
                            notend=False
        else:
            cpl.append(phrase_list)
        return cpl

Beispiel #9

0

Datei anzeigen

Datei: main.py Projekt: dezechristophe/Crotal

#!/usr/bin/env python
# -*- coding:utf-8 -*-

from pinyin import PinYin

test = PinYin()
test.load_word()
string = "钓鱼岛是中国的"
print "out: %s" % test.hanzi2pinyin_split(string=string, split="-")

Beispiel #10

0

Datei anzeigen

    u"爱奇艺",
    u"腾讯视频",
    u"qq",
    u"熊猫tv",
    u"快递",
    u'4399',
}


word2pinyin = PinYin()
word2pinyin.load_word()
alphabet = {'a':1, 'b':1, 'c':1, 'd':1, 'e':1, 'f':1, 'g':1,
            'h':1, 'i':1, 'j':1, 'k':1, 'l':1, 'm':1, 'n':1,
            'o':1, 'p':1, 'q':1, 'r':1, 's':1, 't':1,
            'u':1, 'v':1, 'w':1, 'x':1, 'y':1, 'z':1}


def hanzi2pinyi(word):
    result = []
    for hanzi in word:
        if hanzi.lower() in alphabet:
            result.append(hanzi.lower())
        else:
            result.append(word2pinyin.hanzi2pinyin(hanzi))
    return ''.join(result)


if __name__ == '__main__':
    for word in white_list:
        print word2pinyin.hanzi2pinyin_split(word)

Beispiel #11

0

Datei anzeigen

Datei: word.py Projekt: githubfisher/youhot_website

#!/usr/bin/env python
# -*- coding:utf-8 -*-

from pinyin import PinYin
import sys

test = PinYin('application/libraries/pinyin.py/word.data')
test.load_word()

print test.hanzi2pinyin_split(string=sys.argv[1], split='_')

Beispiel #12

0

Datei anzeigen

Datei: correct.py Projekt: SundongCandy/Whatever

class correction:
    def __init__(self):
        self.pp = PinYin()
        self.pp.load_word()
        with open('pinyin_dict', 'r') as ff:
            line = ff.readline()
            self.jj_dict = json.loads(line)
            ff.close()

    def correct(self, phrase_list):
        termlist = []
        flag = False

        newplist = self.recompose(phrase_list)
        '''for item in newplist:
            for item2 in item:
                print(item2.encode('utf-8'))'''
        for nnlist in newplist:
            i = 0
            tmp_correct = []
            correct_num = []
            for item in nnlist:
                py = self.pp.hanzi2pinyin_split(item, '_')
                tmp = []
                tmp_correct.append(tmp)
                if (py in self.jj_dict):
                    for item2 in self.jj_dict[py]:
                        tmp_correct[i].append(item2)
                else:
                    tmp_correct[i].append((item, 1))
                correct_num.append(0)
                i += 1

            length = len(tmp_correct)
            notend = True
            while notend:
                i = 0
                tmpstr = ''
                score = 0
                for j in xrange(0, length):
                    tmps = tmp_correct[j][correct_num[j]][0]
                    tmpstr += tmps
                    score += int(
                        tmp_correct[j][correct_num[j]][1]) * len(tmps)**7
                termlist.append(tup(tmpstr, score))
                correct_num[0] += 1
                while correct_num[i] >= len(tmp_correct[i]):
                    correct_num[i] = 0
                    if i < length - 1:
                        correct_num[i + 1] += 1
                        i += 1
                    else:
                        notend = False

        result_list = self.sscore(termlist)
        comstr = ''
        for item in phrase_list:
            comstr += item
        if result_list[0] == comstr:
            result_list = []
        else:
            if comstr in result_list:
                result_list.pop(result_list.index(comstr))
        return result_list

    def sscore(self, termlist):
        heap = []
        result_list = []
        for item in termlist:
            heap.append(item)
        heapq.heapify(heap)
        while len(heap) > 5:

            a = heapq.heappop(heap)
        while len(heap) > 0:
            result_list.append(heapq.heappop(heap).term)

        result_list.reverse()
        return result_list

    def recompose(self, phrase_list):
        position = []
        attach = {}
        cpl = []  #the consequence:list of list
        i = 0  #position of single word
        for item in phrase_list:
            if (len(item) == 1):
                position.append(i)
                attach[i] = 0
            i += 1

        notend = True
        length = len(position)
        if length > 0:
            while notend:
                gap = 0
                tmp_list = copy.deepcopy(phrase_list)
                tmp_position = copy.deepcopy(position)
                pi = 0
                while pi < len(tmp_position):
                    item2 = tmp_position[pi]
                    if (attach[item2] == 0):
                        if item2 - 1 - gap >= 0:
                            tmp_list[item2 - 1 - gap] += tmp_list[item2 - gap]
                            k = tmp_position.index(item2)
                            tmp_position.pop(k)
                            tmp_list.pop(item2 - gap)
                            gap += 1
                        else:
                            pi += 1
                            '''while k < len(tmp_position):
                                tmp_position[k]-=gap
#print(tmp_position[k])
                                attach[tmp_position[k]]=attach[tmp_position[k]+gap]
                                k+=1'''
                    else:
                        if attach[item2] == 1:
                            if item2 + 1 - gap < len(tmp_list):
                                tmp_list[item2 + 1 - gap] = tmp_list[
                                    item2 - gap] + tmp_list[item2 + 1 - gap]
                                k = tmp_position.index(item2)
                                tmp_position.pop(k)
                                tmp_list.pop(item2 - gap)
                                gap += 1
                                if item2 + 1 in tmp_position:
                                    tmp_position.pop(
                                        tmp_position.index(item2 + 1))
                            else:
                                pi += 1
                        else:
                            pi += 1
                        '''while k < len(tmp_position):
                            tmp_position[k]-=gap
                            attach[tmp_position[k]]=attach[tmp_position[k]+gap]
                            k+=1'''
                '''flag=True
                for item3 in tmp_list:
                    if len(item3)==1:
                        flag=False
                if flag:'''
                if tmp_list not in cpl:
                    cpl.append(tmp_list)

                attach[position[0]] += 1  #每次变换一个
                i = 0
                while attach[position[i]] >= 3:
                    attach[position[i]] = 0
                    if i < length - 1:
                        attach[position[i + 1]] += 1
                        i += 1
                    else:
                        notend = False
        else:
            cpl.append(phrase_list)
        return cpl

Beispiel #13

0

Datei anzeigen

Datei: import.py Projekt: simon1024/crm

	positionTypes[name] = nid

# parse file and import data to db
fd = open(employee_file, 'r')
for line in fd.readlines():
	value = {}
	line = line.strip()
	items = line.split('\t')

	name = items[1]
	no = items[2]

	if no in nos.keys():
		continue

	username = h2p.hanzi2pinyin_split(string=name, split='')
	i = 0
	while username in userNames.keys():
		i = i + 1
		username = username + str(i)

	departmentName = items[3]
	if departmentName not in departmentTypes.keys():
		cursor.execute("insert into DeptType (name) values(%s)", (departmentName))
		cursor.execute("select id from DeptType where name=%s", (departmentName))
		id = cursor.fetchone()[0]
		departmentTypes[departmentName] = id
	department = departmentTypes[departmentName]

	positionName = items[4]
	if positionName not in positionTypes.keys():

Beispiel #14

0

Datei anzeigen

Datei: zhuan.py Projekt: 252319634/ajax_search

# -*- coding: utf-8 -*-
# from models import Cnword
from pinyin import PinYin
test = PinYin()
test.load_word()
print test.hanzi2pinyin(string='钓鱼岛是中国的')
print test.hanzi2pinyin_split(string='钓鱼岛是中国的')

Beispiel #15

0

Datei anzeigen

Datei: getCheckInDataFromAPI.py Projekt: malan1991/GetCheckInDataFromWeibo

def hanziToPinyin(hanzi):
    test = PinYin()
    test.load_word()
    return test.hanzi2pinyin_split(string=hanzi, split="_")