def _load_term_to_db(fileName): p = Pinyin() count = 0 with open(fileName) as fp: for line in fp: print count count += 1 if(count < 3225): continue try: line = line.decode('gbk') except Exception, err: print line, err continue terms = line.split() for term in terms: if len(term) <= 1: continue pinyin = p.get_pinyin(term, "") cond = {"_id": term} if db.get_term(cond): to = {"$inc": {"accFrequence": 1}} db.update_term(cond, to) else: data = {"_id": term, "pinyin": pinyin, "accFrequence": 1, "queryFrequence": 0} db.insert_term(data)
def _load_term_to_db(fileName): p = Pinyin() count = 0 with open(fileName) as fp: for line in fp: print count count += 1 if (count < 3225): continue try: line = line.decode('gbk') except Exception, err: print line, err continue terms = line.split() for term in terms: if len(term) <= 1: continue pinyin = p.get_pinyin(term, "") cond = {"_id": term} if db.get_term(cond): to = {"$inc": {"accFrequence": 1}} db.update_term(cond, to) else: data = { "_id": term, "pinyin": pinyin, "accFrequence": 1, "queryFrequence": 0 } db.insert_term(data)
def update_query(query): if not isinstance(query, unicode): query = query.decode("utf-8") pinyin = p.get_pinyin(query, "") cond = {"_id": query} res = db.get_term(cond) if res: to = {"$inc": {"queryFrequence": 1}} db.update_term(cond, to) if res[u"queryFrequence"] == 2: pinyinTree.insert_pinyin(pinyin) else: data = {"_id": query, "pinyin": pinyin, "accFrequence": 0, "queryFrequence": 1} db.insert_term(data) related = _get_related(query) return related
def update_query(query): if not isinstance(query, unicode): query = query.decode('utf-8') pinyin = p.get_pinyin(query, "") cond = {"_id": query} res = db.get_term(cond) if res: to = {"$inc": {"queryFrequence": 1}} db.update_term(cond, to) if res[u'queryFrequence'] == 2: pinyinTree.insert_pinyin(pinyin) else: data = { "_id": query, "pinyin": pinyin, "accFrequence": 0, "queryFrequence": 1 } db.insert_term(data) related = _get_related(query) return related