def run_built_in(): print("Processing: built in model") built_in = BuiltInModel() for key in query_list: results = built_in.query(query_list[key])['hits']['hits'] rank = 1 write_output(model='es', query_no=str(key), doc_no=result['_id'], rank=str(rank), score=str(result['_score'])) rank += 1
def run_pseudo_feedback(): print("Processing: Pseudo Relevance Feedback model") pseudo_feedback = PseudoRelevanceFeedbackModel(document_statistics) for q_no in query_list: query = query_list[q_no] results = pseudo_feedback.query(query, total_length)['hits']['hits'] rank = 1 for result in results: write_output(model='pseudo_feedback', query_no=str(q_no), doc_no=result['_id'], rank=str(rank), score=str(result['_score'])) rank += 1 print("Pseudo feedback done")
def run_bm25(): print("Processing: Okapi BM25 model") bm25 = OkapiBM25(document_statistics) for q_no in query_list: query = query_list[q_no] results = bm25.query(query, wfd_collection, tf_for_queries[q_no]) rank = 1 for key, value in sorted(iter(results.items()), key=lambda k_v2: (k_v2[1], k_v2[0]), reverse=True): # if rank > Constants.MAX_OUTPUT or value <= 0: # break write_output(model='bm25', query_no=str(q_no), doc_no=str(key), rank=str(rank), score=str(value)) rank += 1 print("BM25 Done")
def run_tf_idf(): print("Processing: TF-IDF model") tfidf = TFIDFModel(document_statistics) for q_no in query_list: query = query_list[q_no] results = tfidf.query(query, wfd_collection, tf_for_queries[q_no]) rank = 1 for key, value in sorted(iter(results.items()), key=lambda k_v1: (k_v1[1], k_v1[0]), reverse=True): # if rank > Constants.MAX_OUTPUT: # break write_output(model='tfidf', query_no=str(q_no), doc_no=str(key), rank=str(rank), score=str(value)) rank += 1 print("TF-IDF Done")
def run_okapi_tf(): print("Processing: Okapi TF model") okapi_tf = OkapiTFModel(document_statistics) for q_no in query_list: query = query_list[q_no] results = okapi_tf.query(query, tf_for_queries[q_no]) rank = 1 for key, value in sorted(iter(results.items()), key=lambda k_v: (k_v[1], k_v[0]), reverse=True): # if rank > Constants.MAX_OUTPUT: # break write_output(model='okapi_tf', query_no=str(q_no), doc_no=str(key), rank=str(rank), score=str(value)) rank += 1 print("Okapi TF Done")
def run_laplace_unigram(): print("Processing: Unigram LM with Laplace model") laplace_unigram = LaplaceUnigramLMModel(document_statistics) for q_no in query_list: query = query_list[q_no] results = laplace_unigram.query(query, tf_for_queries[q_no]) rank = 1 for key, value in sorted(iter(results.items()), key=lambda k_v3: (k_v3[1], k_v3[0]), reverse=True): # if rank > Constants.MAX_OUTPUT: # break write_output(model='laplace_unigram', query_no=str(q_no), doc_no=str(key), rank=str(rank), score=str(value)) rank += 1 print("Unigram LM with Laplace done")
def run_jelmer_unigram(): print("Processing: Unigram LM with Jelinek-Mercer model") jelmer_unigram = JelinekMercerUnigramLMModel(document_statistics) for q_no in query_list: query = query_list[q_no] results = jelmer_unigram.query(query, tf_for_queries[q_no], total_tf_wd[q_no], total_length) rank = 1 for key, value in sorted(iter(results.items()), key=lambda k_v4: (k_v4[1], k_v4[0]), reverse=True): # if rank > Constants.MAX_OUTPUT: # break write_output(model='jelmer_unigram', query_no=str(q_no), doc_no=str(key), rank=str(rank), score=str(value)) rank += 1 print("Unigram LM with Jelinek Mercer done")
def run_okapi_tf(): print "Processing: Okapi TF model" okapi_tf = OkapiTFModel(document_statistics) for q_no in query_list: query = query_list[q_no] results = okapi_tf.query(query, tf_for_queries[q_no]) rank = 1 for key, value in sorted(results.iteritems(), key=lambda (k, v): (v, k), reverse=True): if rank > Constants.MAX_OUTPUT: break if value != 0: write_output(model='okapi_tf', query_no=str(q_no), doc_no=str(key), rank=str(rank), score=str(value)) rank += 1 print "Okapi TF Done"
def run_bm25_with_ps(): print("Processing: BM25 model with ps") for q_no in query_list: results = {} for doc in bm25_result[q_no]: if doc in ps_result: results[doc] = bm25_result[q_no][doc] + ps_result[q_no][doc] else: results[doc] = bm25_result[q_no][doc] rank = 1 for key, value in sorted(iter(results.items()), key=lambda k_v3: (k_v3[1], k_v3[0]), reverse=True): if rank > Constants.MAX_OUTPUT: break if value != 0: write_output(model='bm25_ps', query_no=str(q_no), doc_no=str(key), rank=str(rank), score=str(value)) rank += 1
def run_proximity_search(): print("Processing: Proximity Search") ps = ProximitySearchModel(document_statistics) for q_no in query_list_for_ps: query = query_list_for_ps[q_no] results = ps.query(query, term_maps_collection, wfd_collection, tf_for_queries[q_no]) rank = 1 for key, value in sorted(iter(results.items()), key=lambda k_v2: (k_v2[1], k_v2[0]), reverse=True): if rank > Constants.MAX_OUTPUT: break if q_no not in ps_result: ps_result[q_no] = {key: value} else: ps_result[q_no][key] = value write_output(model='ps', query_no=str(q_no), doc_no=str(key), rank=str(rank), score=str(value)) rank += 1 print("Proximity Search done")
def run_laplace_unigram_with_ps(): print("Processing: Unigram LM with Laplace model with ps") for q_no in query_list: results = {} for doc in laplace_result[q_no]: if doc in ps_result: print("Here") results[doc] = laplace_result[q_no][doc] * ps_result[q_no][doc] else: results[doc] = laplace_result[q_no][doc] rank = 1 for key, value in sorted(iter(results.items()), key=lambda k_v3: (k_v3[1], k_v3[0]), reverse=True): if rank > Constants.MAX_OUTPUT: break if value != 0: write_output(model='laplace_unigram_ps', query_no=str(q_no), doc_no=str(key), rank=str(rank), score=str(value)) rank += 1