def _init(): # terms = db.get_terms({"accFrequence": {"$gt": 300}}) terms = db.get_terms({"$or": [{"accFrequence": {"$gt": 200}}, {"queryFrequence": {"$gt": 3}}]}) for term in terms: pinyin = term["pinyin"] pinyinTree.insert_pinyin(pinyin) _load_dir_post()
def _init(): #terms = db.get_terms({"accFrequence": {"$gt": 300}}) terms = db.get_terms({"$or": [{"accFrequence": {"$gt": 200}},\ {"queryFrequence": {"$gt": 3}}]}) for term in terms: pinyin = term['pinyin'] pinyinTree.insert_pinyin(pinyin) _load_dir_post()
def _get_related(query): terms = db.get_terms({"queryFrequence": {"$gt": 3}}) frequences = [] minDists = [] relatedTerms = [] if not terms: return relatedTerms for term in terms: t = term['_id'] if (t.find(query) != -1 or query.find(t) != -1) and t != query: queryFrequence = term['queryFrequence'] minDist = _min_dist(query, t) minDists.append(minDist) frequences.append(queryFrequence) relatedTerm = {} relatedTerm['term'] = t relatedTerm['dist'] = minDist relatedTerm['fre'] = queryFrequence relatedTerms.append(relatedTerm) if not relatedTerms: return [] frequences = sorted(frequences) minDists = sorted(minDists, reverse=True) lenOfTerm = len(relatedTerms) related = {} for relatedTerm in relatedTerms: term = relatedTerm['term'] dist = relatedTerm['dist'] fre = relatedTerm['fre'] scoreOfDist = minDists.index(dist) / float(lenOfTerm) scoreOfFre = frequences.index(fre) / float(lenOfTerm) score = scoreOfDist * 0.4 + scoreOfFre * 0.6 related[term] = score results = sorted(related.items(), key=lambda related: related[1], reverse=True) return results[0:min(len(results), 10)]
def _get_related(query): terms = db.get_terms({"queryFrequence": {"$gt": 3}}) frequences = [] minDists = [] relatedTerms = [] if not terms: return relatedTerms for term in terms: t = term["_id"] if (t.find(query) != -1 or query.find(t) != -1) and t != query: queryFrequence = term["queryFrequence"] minDist = _min_dist(query, t) minDists.append(minDist) frequences.append(queryFrequence) relatedTerm = {} relatedTerm["term"] = t relatedTerm["dist"] = minDist relatedTerm["fre"] = queryFrequence relatedTerms.append(relatedTerm) if not relatedTerms: return [] frequences = sorted(frequences) minDists = sorted(minDists, reverse=True) lenOfTerm = len(relatedTerms) related = {} for relatedTerm in relatedTerms: term = relatedTerm["term"] dist = relatedTerm["dist"] fre = relatedTerm["fre"] scoreOfDist = minDists.index(dist) / float(lenOfTerm) scoreOfFre = frequences.index(fre) / float(lenOfTerm) score = scoreOfDist * 0.4 + scoreOfFre * 0.6 related[term] = score results = sorted(related.items(), key=lambda related: related[1], reverse=True) return results[0 : min(len(results), 10)]
def get_matches(query): if not isinstance(query, unicode): query = query.decode("utf-8") string = p.get_pinyin(query, "") matches = pinyinTree.get_match(string) if not matches: return None results = {} for match in matches: # terms = db.get_terms({"pinyin": match, "accFrequence": {"$gt": 5}}) terms = db.get_terms({"pinyin": match}) for term in terms: if _is_match(query, term["_id"], string, term["pinyin"]): score = term["accFrequence"] * 0.05 + term["queryFrequence"] * 0.95 results[term["_id"]] = score results = sorted(results.items(), key=lambda results: results[1], reverse=True) return results[0 : min(20, len(results))]
def get_matches(query): if not isinstance(query, unicode): query = query.decode('utf-8') string = p.get_pinyin(query, "") matches = pinyinTree.get_match(string) if not matches: return None results = {} for match in matches: #terms = db.get_terms({"pinyin": match, "accFrequence": {"$gt": 5}}) terms = db.get_terms({"pinyin": match}) for term in terms: if _is_match(query, term['_id'], string, term['pinyin']): score = term['accFrequence'] * 0.05 + term[ 'queryFrequence'] * 0.95 results[term['_id']] = score results = sorted(results.items(), key=lambda results: results[1], reverse=True) return results[0:min(20, len(results))]