コード例 #1
0
ファイル: run_query.py プロジェクト: prdx/LearningToRank
def run_built_in():
    print("Processing: built in model")
    built_in = BuiltInModel()
    for key in query_list:
        results = built_in.query(query_list[key])['hits']['hits']
        rank = 1
        write_output(model='es',
                     query_no=str(key),
                     doc_no=result['_id'],
                     rank=str(rank),
                     score=str(result['_score']))
        rank += 1
コード例 #2
0
ファイル: run_query.py プロジェクト: prdx/LearningToRank
def run_pseudo_feedback():
    print("Processing: Pseudo Relevance Feedback model")
    pseudo_feedback = PseudoRelevanceFeedbackModel(document_statistics)
    for q_no in query_list:
        query = query_list[q_no]
        results = pseudo_feedback.query(query, total_length)['hits']['hits']
        rank = 1
        for result in results:
            write_output(model='pseudo_feedback',
                         query_no=str(q_no),
                         doc_no=result['_id'],
                         rank=str(rank),
                         score=str(result['_score']))
            rank += 1
    print("Pseudo feedback done")
コード例 #3
0
ファイル: run_query.py プロジェクト: prdx/LearningToRank
def run_bm25():
    print("Processing: Okapi BM25 model")
    bm25 = OkapiBM25(document_statistics)
    for q_no in query_list:
        query = query_list[q_no]
        results = bm25.query(query, wfd_collection, tf_for_queries[q_no])
        rank = 1
        for key, value in sorted(iter(results.items()),
                                 key=lambda k_v2: (k_v2[1], k_v2[0]),
                                 reverse=True):
            # if rank > Constants.MAX_OUTPUT or value <= 0:
            # break
            write_output(model='bm25',
                         query_no=str(q_no),
                         doc_no=str(key),
                         rank=str(rank),
                         score=str(value))
            rank += 1
    print("BM25 Done")
コード例 #4
0
ファイル: run_query.py プロジェクト: prdx/LearningToRank
def run_tf_idf():
    print("Processing: TF-IDF model")
    tfidf = TFIDFModel(document_statistics)
    for q_no in query_list:
        query = query_list[q_no]
        results = tfidf.query(query, wfd_collection, tf_for_queries[q_no])
        rank = 1
        for key, value in sorted(iter(results.items()),
                                 key=lambda k_v1: (k_v1[1], k_v1[0]),
                                 reverse=True):
            # if rank > Constants.MAX_OUTPUT:
            # break
            write_output(model='tfidf',
                         query_no=str(q_no),
                         doc_no=str(key),
                         rank=str(rank),
                         score=str(value))
            rank += 1
    print("TF-IDF Done")
コード例 #5
0
ファイル: run_query.py プロジェクト: prdx/LearningToRank
def run_okapi_tf():
    print("Processing: Okapi TF model")
    okapi_tf = OkapiTFModel(document_statistics)
    for q_no in query_list:
        query = query_list[q_no]
        results = okapi_tf.query(query, tf_for_queries[q_no])
        rank = 1
        for key, value in sorted(iter(results.items()),
                                 key=lambda k_v: (k_v[1], k_v[0]),
                                 reverse=True):
            # if rank > Constants.MAX_OUTPUT:
            # break
            write_output(model='okapi_tf',
                         query_no=str(q_no),
                         doc_no=str(key),
                         rank=str(rank),
                         score=str(value))
            rank += 1
    print("Okapi TF Done")
コード例 #6
0
ファイル: run_query.py プロジェクト: prdx/LearningToRank
def run_laplace_unigram():
    print("Processing: Unigram LM with Laplace model")
    laplace_unigram = LaplaceUnigramLMModel(document_statistics)
    for q_no in query_list:
        query = query_list[q_no]
        results = laplace_unigram.query(query, tf_for_queries[q_no])
        rank = 1
        for key, value in sorted(iter(results.items()),
                                 key=lambda k_v3: (k_v3[1], k_v3[0]),
                                 reverse=True):
            # if rank > Constants.MAX_OUTPUT:
            # break
            write_output(model='laplace_unigram',
                         query_no=str(q_no),
                         doc_no=str(key),
                         rank=str(rank),
                         score=str(value))
            rank += 1
    print("Unigram LM with Laplace done")
コード例 #7
0
ファイル: run_query.py プロジェクト: prdx/LearningToRank
def run_jelmer_unigram():
    print("Processing: Unigram LM with Jelinek-Mercer model")
    jelmer_unigram = JelinekMercerUnigramLMModel(document_statistics)
    for q_no in query_list:
        query = query_list[q_no]
        results = jelmer_unigram.query(query, tf_for_queries[q_no],
                                       total_tf_wd[q_no], total_length)
        rank = 1
        for key, value in sorted(iter(results.items()),
                                 key=lambda k_v4: (k_v4[1], k_v4[0]),
                                 reverse=True):
            # if rank > Constants.MAX_OUTPUT:
            # break
            write_output(model='jelmer_unigram',
                         query_no=str(q_no),
                         doc_no=str(key),
                         rank=str(rank),
                         score=str(value))
            rank += 1
    print("Unigram LM with Jelinek Mercer done")
コード例 #8
0
def run_okapi_tf():
    print "Processing: Okapi TF model"
    okapi_tf = OkapiTFModel(document_statistics)
    for q_no in query_list:
        query = query_list[q_no]
        results = okapi_tf.query(query, tf_for_queries[q_no])
        rank = 1
        for key, value in sorted(results.iteritems(),
                                 key=lambda (k, v): (v, k),
                                 reverse=True):
            if rank > Constants.MAX_OUTPUT:
                break
            if value != 0:
                write_output(model='okapi_tf',
                             query_no=str(q_no),
                             doc_no=str(key),
                             rank=str(rank),
                             score=str(value))
                rank += 1
    print "Okapi TF Done"
コード例 #9
0
def run_bm25_with_ps():
    print("Processing: BM25 model with ps")
    for q_no in query_list:
        results = {}
        for doc in bm25_result[q_no]:
            if doc in ps_result:
                results[doc] = bm25_result[q_no][doc] + ps_result[q_no][doc]
            else:
                results[doc] = bm25_result[q_no][doc]

        rank = 1
        for key, value in sorted(iter(results.items()),
                                 key=lambda k_v3: (k_v3[1], k_v3[0]),
                                 reverse=True):
            if rank > Constants.MAX_OUTPUT:
                break
            if value != 0:
                write_output(model='bm25_ps',
                             query_no=str(q_no),
                             doc_no=str(key),
                             rank=str(rank),
                             score=str(value))
                rank += 1
コード例 #10
0
def run_proximity_search():
    print("Processing: Proximity Search")
    ps = ProximitySearchModel(document_statistics)
    for q_no in query_list_for_ps:
        query = query_list_for_ps[q_no]
        results = ps.query(query, term_maps_collection, wfd_collection,
                           tf_for_queries[q_no])
        rank = 1
        for key, value in sorted(iter(results.items()),
                                 key=lambda k_v2: (k_v2[1], k_v2[0]),
                                 reverse=True):
            if rank > Constants.MAX_OUTPUT:
                break
            if q_no not in ps_result:
                ps_result[q_no] = {key: value}
            else:
                ps_result[q_no][key] = value
            write_output(model='ps',
                         query_no=str(q_no),
                         doc_no=str(key),
                         rank=str(rank),
                         score=str(value))
            rank += 1
    print("Proximity Search done")
コード例 #11
0
def run_laplace_unigram_with_ps():
    print("Processing: Unigram LM with Laplace model with ps")
    for q_no in query_list:
        results = {}
        for doc in laplace_result[q_no]:
            if doc in ps_result:
                print("Here")
                results[doc] = laplace_result[q_no][doc] * ps_result[q_no][doc]
            else:
                results[doc] = laplace_result[q_no][doc]

        rank = 1
        for key, value in sorted(iter(results.items()),
                                 key=lambda k_v3: (k_v3[1], k_v3[0]),
                                 reverse=True):
            if rank > Constants.MAX_OUTPUT:
                break
            if value != 0:
                write_output(model='laplace_unigram_ps',
                             query_no=str(q_no),
                             doc_no=str(key),
                             rank=str(rank),
                             score=str(value))
                rank += 1