Ejemplo n.º 1
0
    def udngram(self):
	tail=[]
	getre=bngram.wordspliting(self.uni,"query")
	getre.start()
	getre.join()
	tail.append(getre)
	self.worddic, self.wordlist = self.dq.tailobject(tail)
Ejemplo n.º 2
0
def bn_query(q,strin):
    dic=bngram.wordspliting(strin,'sd')
    dic.run()
    q.put((dic.dicword,dic.companword,dic.ascword,dic.gramword))
Ejemplo n.º 3
0
def linesplitinster(md5urllist):
    purei = Purecontent("r")
    total = len(md5urllist)
    wordi = TextInsert()
    wsynccount = 0
    for md5url in md5urllist.keys():
        st = time.time()
        tail = []
        totaldic = 0
        totalcomp = 0
        pureserial = purei.queryserial(md5url)
        if purei.querycontentcount(pureserial):
            purecount = int(purei.querycontentcount(pureserial)) + 1
        else:
            purecount = 0
        for seri in xrange(purecount):
            querykey = pureserial + contentprocess.lintoascii(seri)
            while count_active(tail) >= config.splitercpu:
                time.sleep(0.5)
            getre = bngram.wordspliting(purei.querycontentinline(querykey), querykey)
            tail.append(getre)
            getre.start()  # execute getre.run()
        dba = DataInsert()
        dba.outdicdbinit()  # open the word database which are out of dic
        dba.companwordcount = 0
        wa = 0  # if we have to reload anuutf-8 dic
        for splitterlist in tail:
            splitterlist.join(config.splitertimeout)
            totalcomp = totalcomp + len(splitterlist.companword)
            totaldic = totaldic + len(splitterlist.dicword)
            dba.wordlist = splitterlist.companword
            if dba.wordlist:
                dba.anuworddb()
                wa = 1
        dba.outdicdbclose()
        if wa:
            wordi.anureload()
        # print dba.companwordcount,totalcomp,totaldic
        # wordi=TextInsert()
        for splitterlist in tail:
            if splitterlist.dicword:
                wordi.getdicdb = 1
                wordi.dicword = splitterlist.dicword
                wordi.tempwurl(splitterlist.querykey)
            if splitterlist.companword:
                wordi.getdicdb = 2
                wordi.dicword = splitterlist.companword
                wordi.tempwurl(splitterlist.querykey)
        tail = []
        # print time.time()-st
        wsynccount += 1
        if wsynccount > 8192:
            stderr.write("dbsync")
            wordi.sync_wpage()
            wsynccount = 0
            if reloadxmlrpcd():
                stderr.write("+")
        stderr.write(".")

    title, word = "", ""
    stderr.write("dbsync")
    wordi.sync_wpage()
    if reloadxmlrpcd():
        stderr.write("+")
    wordi.closedicdb()
    purei.close()