コード例 #1
0
ファイル: locql.py プロジェクト: Discoverful/LOCQL-Research
def create_question(question):
    if not question:
        return False
    question_query = Question.all(keys_only=True)
    question_query.filter("question_id =", question.question_id)
    if question_query.get():
        return False
    logging.info("Create a new question")
    question.terms = extract_terms(question.title)
    question.terms += generate_local_terms(question.terms, question.place_ids)
    if question.terms:
        term_dict = dict(zip(question.terms, [1]*len(question.terms)))
        update_termstats(term_dict)
    db.put(question)
    return True
コード例 #2
0
ファイル: locql.py プロジェクト: Discoverful/LOCQL-Research
def find_relevant_questions(query, place_ids=[], max_num=10):
    query = query.strip()
    query_terms = extract_terms(query)
    query_terms += generate_local_terms(query_terms, place_ids)
    # NOTE: 
    # the following code uses list-properties and merge-join to implement keyword search
    # but it leads to the problem of exploding index if len(query_terms) >= 2
    #
    # select_str = "SELECT * FROM Question WHERE"
    # where_str = " AND ".join([("terms = '%s'" % term) for term in query_terms])
    # order_str = "ORDER BY create_time DESC" # useful as sorted() is guaranteed to be stable
    # limit_str = "LIMIT 200"
    # questions = db.GqlQuery(select_str+" "+where_str+" "+order_str+" "+limit_str)
    #
    questions = []
    if query_terms:
        termstats = TermStat.get_by_key_name(query_terms)
        term_dict = dict([(termstat.key().name(),termstat.docfreq) for termstat in termstats if termstat])
        terms = sorted(term_dict.keys(),
                       key=lambda term: term_dict[term])
        best_terms = []
        k = 0
        for term in terms:
            k += 1
            if len(best_terms) < 1:
                best_terms.append(term)
            else:
                if (k <= 5) and (term_dict[term] <= 20):
                    best_terms.append(term)
        if best_terms:
            question_query = Question.all()
            question_query.filter("terms IN", best_terms)
            question_query.order("-create_time")
            questions = question_query.fetch(max_num*10) # the number of questions to be ranked
            if questions:
                questions.sort(key=lambda question: question_score(question,term_dict),
                               reverse=True)
    return questions[:max_num]