Ejemplo n.º 1
0
def _init():
    # terms = db.get_terms({"accFrequence": {"$gt": 300}})
    terms = db.get_terms({"$or": [{"accFrequence": {"$gt": 200}}, {"queryFrequence": {"$gt": 3}}]})
    for term in terms:
        pinyin = term["pinyin"]
        pinyinTree.insert_pinyin(pinyin)
    _load_dir_post()
Ejemplo n.º 2
0
def _init():
    #terms = db.get_terms({"accFrequence": {"$gt": 300}})
    terms = db.get_terms({"$or": [{"accFrequence": {"$gt": 200}},\
            {"queryFrequence": {"$gt": 3}}]})
    for term in terms:
        pinyin = term['pinyin']
        pinyinTree.insert_pinyin(pinyin)
    _load_dir_post()
Ejemplo n.º 3
0
def _get_related(query):
    terms = db.get_terms({"queryFrequence": {"$gt": 3}})

    frequences = []
    minDists = []

    relatedTerms = []
    if not terms:
        return relatedTerms

    for term in terms:
        t = term['_id']
        if (t.find(query) != -1 or query.find(t) != -1) and t != query:
            queryFrequence = term['queryFrequence']
            minDist = _min_dist(query, t)
            minDists.append(minDist)
            frequences.append(queryFrequence)

            relatedTerm = {}
            relatedTerm['term'] = t
            relatedTerm['dist'] = minDist
            relatedTerm['fre'] = queryFrequence
            relatedTerms.append(relatedTerm)

    if not relatedTerms:
        return []

    frequences = sorted(frequences)
    minDists = sorted(minDists, reverse=True)
    lenOfTerm = len(relatedTerms)

    related = {}
    for relatedTerm in relatedTerms:
        term = relatedTerm['term']
        dist = relatedTerm['dist']
        fre = relatedTerm['fre']
        scoreOfDist = minDists.index(dist) / float(lenOfTerm)
        scoreOfFre = frequences.index(fre) / float(lenOfTerm)

        score = scoreOfDist * 0.4 + scoreOfFre * 0.6
        related[term] = score

    results = sorted(related.items(),
                     key=lambda related: related[1],
                     reverse=True)
    return results[0:min(len(results), 10)]
Ejemplo n.º 4
0
def _get_related(query):
    terms = db.get_terms({"queryFrequence": {"$gt": 3}})

    frequences = []
    minDists = []

    relatedTerms = []
    if not terms:
        return relatedTerms

    for term in terms:
        t = term["_id"]
        if (t.find(query) != -1 or query.find(t) != -1) and t != query:
            queryFrequence = term["queryFrequence"]
            minDist = _min_dist(query, t)
            minDists.append(minDist)
            frequences.append(queryFrequence)

            relatedTerm = {}
            relatedTerm["term"] = t
            relatedTerm["dist"] = minDist
            relatedTerm["fre"] = queryFrequence
            relatedTerms.append(relatedTerm)

    if not relatedTerms:
        return []

    frequences = sorted(frequences)
    minDists = sorted(minDists, reverse=True)
    lenOfTerm = len(relatedTerms)

    related = {}
    for relatedTerm in relatedTerms:
        term = relatedTerm["term"]
        dist = relatedTerm["dist"]
        fre = relatedTerm["fre"]
        scoreOfDist = minDists.index(dist) / float(lenOfTerm)
        scoreOfFre = frequences.index(fre) / float(lenOfTerm)

        score = scoreOfDist * 0.4 + scoreOfFre * 0.6
        related[term] = score

    results = sorted(related.items(), key=lambda related: related[1], reverse=True)
    return results[0 : min(len(results), 10)]
Ejemplo n.º 5
0
def get_matches(query):
    if not isinstance(query, unicode):
        query = query.decode("utf-8")

    string = p.get_pinyin(query, "")
    matches = pinyinTree.get_match(string)
    if not matches:
        return None
    results = {}
    for match in matches:
        # terms = db.get_terms({"pinyin": match, "accFrequence": {"$gt": 5}})
        terms = db.get_terms({"pinyin": match})
        for term in terms:
            if _is_match(query, term["_id"], string, term["pinyin"]):
                score = term["accFrequence"] * 0.05 + term["queryFrequence"] * 0.95
                results[term["_id"]] = score

    results = sorted(results.items(), key=lambda results: results[1], reverse=True)

    return results[0 : min(20, len(results))]
Ejemplo n.º 6
0
def get_matches(query):
    if not isinstance(query, unicode):
        query = query.decode('utf-8')

    string = p.get_pinyin(query, "")
    matches = pinyinTree.get_match(string)
    if not matches:
        return None
    results = {}
    for match in matches:
        #terms = db.get_terms({"pinyin": match, "accFrequence": {"$gt": 5}})
        terms = db.get_terms({"pinyin": match})
        for term in terms:
            if _is_match(query, term['_id'], string, term['pinyin']):
                score = term['accFrequence'] * 0.05 + term[
                    'queryFrequence'] * 0.95
                results[term['_id']] = score

    results = sorted(results.items(),
                     key=lambda results: results[1],
                     reverse=True)

    return results[0:min(20, len(results))]