def __init__ (self, config): PYSQLiteDB.__init__ (self, user_db = "user_zhengju.db") #~ print "db init" #~ Init caches self.select_cache = Cache () self.select_all_cache = Cache() self.longest_length_cache = Cache() self.load_config(config)
class ZhengJuDB (PYSQLiteDB): """phrase database that contains all phrases and phrases' pinyin""" def __init__ (self, config): PYSQLiteDB.__init__ (self, user_db = "user_zhengju.db") #~ print "db init" #~ Init caches self.select_cache = Cache () self.select_all_cache = Cache() self.longest_length_cache = Cache() self.load_config(config) def load_config(self, config): self.large_charset = config.read ("/IMEngine/Python/ZhengJu/LargeCharset", False) def clear_cache(self): #~ pass self.select_cache.clear() self.select_all_cache.clear() self.longest_length_cache.clear() def add_phrase (self, pinyin, freq, database = "user_db", user_freq = 32): """ add phrase to database""" sqlstring = """INSERT INTO %s.py_phrase (ylen, y0, y1, y2, y3, yx, s0, s1, s2, s3, phrase, freq, user_freq) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""" phrase = u"".join([i.char for i in pinyin]) #~ print "commit", phrase record = [None, None, None, None, None, None, None, None, None, None, None, 0, None] record [YLEN] = len (pinyin) i = 0 for p in pinyin[:4]: record[Y0 + i] = p.get_pinyin_id () record[S0 + i] = p.get_sheng_mu_id () i += 1 if len(pinyin) > 4: record[YX] = "'".join (map (str, pinyin[4:])) record[PHRASE] = phrase record[FREQ]= freq record[USER_FREQ] = user_freq self.db.execute (sqlstring % (database, ), record) self.db.commit () del self.select_cache[u"'".join (map (str, pinyin))] def clean_useless_phrase (self): "Remove phrase from user database" #~ print "remove", record[6] sql = "DELETE FROM user_db.py_phrase WHERE user_freq = 0" self.db.execute (sql) self.db.commit () self.clear_cache() def remove_phrase (self, record, database = "user_db"): "Remove phrase from user database" #~ print "remove", record[6] sql = "DELETE FROM user_db.py_phrase WHERE ylen = ? AND y0 = ? AND phrase = ?" self.db.execute (sql, (record[YLEN], record[Y0], record[6])) self.db.commit () self.clear_cache() def build_pinyin_condition(self, pys, length = None): sql_conditions = [] i = 0 for py in pys[:4]: sql_conditions.append ("s%d = %d" % (i, py.get_sheng_mu_id ())) if py.is_complete (): sql_conditions.append ("y%d = %d" % (i, py.get_pinyin_id ())) i += 1 if len (pys) > 4: pp = lambda (x): x.get_pinyin() if x.is_complete () else x.get_pinyin() + "%" pattern = "'".join (map (pp, pys[4:])) if length == None: sql_conditions.append ("yx LIKE \"" + pattern + "%\"") else: sql_conditions.append ("yx LIKE \"" + pattern + "\"") return sql_conditions def get_longest_phrase_length(self , pylist): """return the longest word starting with first two pinyin of pylist""" if len(pylist)<5: return len(pylist) pys=pylist[:4] pinyin_string = u"'".join (map (str, pys)) result = self.longest_length_cache [pinyin_string] if result != None: return result candidates = self.select_words_by_pinyin_list_all(pys) if candidates: result = max([i[YLEN] for i in candidates]) else: result = 3 #~ tables_union = """( SELECT ylen FROM main.py_phrase WHERE %(conditions)s UNION ALL #~ SELECT ylen FROM user_db.py_phrase WHERE %(conditions)s )""" #~ sql_conditions = self.build_pinyin_condition(pys) #~ tables_union = tables_union % { "conditions" : " AND ".join (sql_conditions) } #~ sql_string = "SELECT Max(ylen) FROM " + tables_union + " ;" #~ print "start", time.time() #~ result = list (self.db.execute (sql_string).fetchall ()) #~ if not result[0][0]: #~ result = 1 #~ else: #~ result = result[0][0] self.longest_length_cache[pinyin_string] = result #~ print "longest",pinyin_string, result, time.time() if result > len(pylist): return len(pylist) return result def select_words_by_phrase (self, pys, database = None): return self.select_words_by_pinyin(pys, phrase=u"".join([i.char for i in pys]),database=database) def select_phrase (self, phrase, database = None): sql_conditions = "ylen =%d AND phrase = \"%s\"" % (len(phrase), phrase) if database: tables_union = "(SELECT ylen, y0, y1, y2, y3, yx, phrase, freq, user_freq FROM " + database+ ".py_phrase WHERE %(conditions)s) " else: tables_union = """ (SELECT ylen, y0, y1, y2, y3, yx, phrase, freq, user_freq FROM user_db.py_phrase WHERE %(conditions)s UNION ALL SELECT ylen, y0, y1, y2, y3, yx, phrase, freq, user_freq FROM main.py_phrase WHERE %(conditions)s) """ sql_prefix = "SELECT * FROM " + tables_union % { "conditions" : sql_conditions } sql_string = sql_prefix + " GROUP BY phrase ORDER BY freq;" result = list (self.db.execute (sql_string).fetchall ()) return result def select_words_by_pinyin (self, pys, length=None, phrase=None, database = None): sql_conditions = [] if length: sql_conditions = ["ylen = %d" % length ] if not self.large_charset: sql_conditions.append("freq > 0") if phrase: sql_conditions.append("phrase = \"%s" % phrase +"\"") if database: tables_union = "( SELECT ylen, y0, y1, y2, y3, yx, phrase, freq, user_freq FROM " + database+ ".py_phrase WHERE %(conditions)s )" else: tables_union = """( SELECT ylen, y0, y1, y2, y3, yx, phrase, freq, user_freq FROM user_db.py_phrase WHERE %(conditions)s UNION ALL SELECT ylen, y0, y1, y2, y3, yx, phrase, freq, user_freq FROM main.py_phrase WHERE %(conditions)s )""" sql_conditions.extend (self.build_pinyin_condition (pys, length)) tables_union = tables_union % { "conditions" : " AND ".join (sql_conditions) } sql_prefix = "SELECT ylen, y0, y1, y2, y3, yx, phrase, MAX(freq*ifnull(user_freq,1)) as adj_freq FROM " + tables_union sql_string = sql_prefix + " GROUP BY phrase ORDER BY adj_freq DESC;" #~ print "by pinyin" result = list (self.db.execute (sql_string).fetchall ()) #~ print result return result def select_words_by_pinyin_list_all (self, pys, length = None): """select words from database by list that contains PYDict.PinYinWord objects""" pinyin_string = u"'".join (map (str, pys)) result = self.select_all_cache [pinyin_string] if result == None: result = self.select_words_by_pinyin(pys) self.select_all_cache[pinyin_string] = result #~ print "list_all",pinyin_string return result def remove_all_user_words(self): sql = "DELETE FROM user_db.py_phrase WHERE freq = " + str(USER_WORD) self.db.execute (sql) self.db.commit () def remove_all_user_phrase(self): sql = "DELETE FROM user_db.py_phrase WHERE freq = " + str(USER_PHRASE) self.db.execute (sql) self.db.commit () def remvoe_all_user_freq(self): sql = "DELETE FROM user_db.py_phrase WHERE freq != %d and freq != %d" (USER_PHRASE, USER_WORD) self.db.execute (sql) self.db.commit () def select_words_by_pinyin_list (self, pys, database = None): """select words from database by list that contains PYDict.PinYinWord objects""" pinyin_string = u"'".join (map (str, pys)) result = self.select_cache [pinyin_string] if result == None: result = self.select_words_by_pinyin(pys, len(pys), database) self.select_cache[pinyin_string] = result #~ print "list",pinyin_string return result def adjust_phrase_freq (self, pylist): """this function adjusts frequence of phrase in user database.""" p = self.select_words_by_phrase(pylist,"user_db") if len(p)>0: q = self.select_words_by_pinyin(pylist, length = len(pylist), database = "main") if len(q) == 0 or p[0][7] > q[0][7]: sql_conditions = ["ylen = %d" % len(pylist) ] sql_conditions.extend (self.build_pinyin_condition (pylist, len(pylist))) sql_conditions.append ( "phrase != ?" ) sql_conditions.append ( "user_freq * freq > " + str(p[0][7]) ) sql = "UPDATE user_db.py_phrase SET user_freq = user_freq / 2 WHERE %(conditions)s" % { "conditions" : " AND ".join (sql_conditions) } self.db.execute (sql, [u"".join([i.char for i in pylist])]) self.db.commit () sql = "UPDATE user_db.py_phrase SET user_freq = user_freq * 2 WHERE phrase = ?" self.db.execute (sql,[u"".join([i.char for i in pylist])]) self.db.commit () else: p = self.select_words_by_phrase(pylist,"main") self.add_phrase(pylist,p[0][7],user_freq = 10)