def query(self, qstring): words = fenci.solve(qstring) if len(words) == 0: return #get word ids wordids = [] tables = 0 tablelist = '' clauselist = '' fieldlist = 'w0.urlid' for word in words: row = self.con.execute( "select rowid from wordlist where word='%s'" % word ).fetchone() if row != None: wordid = row[0] wordids.append(wordid) if tables > 0: tablelist += ',' clauselist += ' and ' clauselist += 'w%d.urlid=w%d.urlid and ' % (tables-1, tables) fieldlist += ',w%d.location' % tables tablelist += 'wordlocation w%d' % tables clauselist += 'w%d.wordid=%d' % (tables, wordid) tables += 1 # full sql fullsql = 'select %s from %s where %s' % (fieldlist, tablelist, clauselist) print 'SQL: ', fullsql cur = self.con.execute(fullsql) rows = [row for row in cur] return rows, wordids
def getwords(): allwords = {} articlewords = [] articletitles = [] ec = 0 for feed in feedlist: f = fp.parse(feed) for e in f.entries: if e.title in articletitles: continue #get words words = fenci.solve(e.title.encode('utf8') + e.description.encode('utf8'), fenci.mmseg) articletitles.append(e.title) articlewords.append({}) for w in words: allwords.setdefault(w, 0) allwords[w] += 1 articlewords[ec].setdefault(w, 0) articlewords[ec][w] += 1 ec += 1 return allwords, articlewords, articletitles
def segment(doc): result = fenci.solve(doc) return result.keys()