Beispiel #1
0
def _load_term_to_db(fileName):
    p = Pinyin()
    count = 0
    with open(fileName) as fp:
        for line in fp:
            print count
            count += 1
            if(count < 3225):
                continue
            try:
                line = line.decode('gbk')
            except Exception, err:
                print line, err
                continue
            terms = line.split()
            for term in terms:
                if len(term) <= 1:
                    continue
                pinyin = p.get_pinyin(term, "")
                cond = {"_id": term}
                if db.get_term(cond):
                    to = {"$inc": {"accFrequence": 1}}
                    db.update_term(cond, to)
                else:
                    data = {"_id": term,
                            "pinyin": pinyin,
                            "accFrequence": 1,
                            "queryFrequence": 0}
                    db.insert_term(data)
Beispiel #2
0
def _load_term_to_db(fileName):
    p = Pinyin()
    count = 0
    with open(fileName) as fp:
        for line in fp:
            print count
            count += 1
            if (count < 3225):
                continue
            try:
                line = line.decode('gbk')
            except Exception, err:
                print line, err
                continue
            terms = line.split()
            for term in terms:
                if len(term) <= 1:
                    continue
                pinyin = p.get_pinyin(term, "")
                cond = {"_id": term}
                if db.get_term(cond):
                    to = {"$inc": {"accFrequence": 1}}
                    db.update_term(cond, to)
                else:
                    data = {
                        "_id": term,
                        "pinyin": pinyin,
                        "accFrequence": 1,
                        "queryFrequence": 0
                    }
                    db.insert_term(data)
def update_query(query):
    if not isinstance(query, unicode):
        query = query.decode("utf-8")
    pinyin = p.get_pinyin(query, "")
    cond = {"_id": query}
    res = db.get_term(cond)
    if res:
        to = {"$inc": {"queryFrequence": 1}}
        db.update_term(cond, to)
        if res[u"queryFrequence"] == 2:
            pinyinTree.insert_pinyin(pinyin)
    else:
        data = {"_id": query, "pinyin": pinyin, "accFrequence": 0, "queryFrequence": 1}
        db.insert_term(data)

    related = _get_related(query)
    return related
Beispiel #4
0
def update_query(query):
    if not isinstance(query, unicode):
        query = query.decode('utf-8')
    pinyin = p.get_pinyin(query, "")
    cond = {"_id": query}
    res = db.get_term(cond)
    if res:
        to = {"$inc": {"queryFrequence": 1}}
        db.update_term(cond, to)
        if res[u'queryFrequence'] == 2:
            pinyinTree.insert_pinyin(pinyin)
    else:
        data = {
            "_id": query,
            "pinyin": pinyin,
            "accFrequence": 0,
            "queryFrequence": 1
        }
        db.insert_term(data)

    related = _get_related(query)
    return related