def retrieve_docs(): snippet_generator = Snippet.SnippetGenerator(document_tokens, stop_words_path) model = RetrievalModel.CosineSimilarity(N, inverted_index, document_tokens) ranked_list = model.cosine_similarity_list(query_dict) with open(query_expansion_table, "w") as file: csv_writer = csv.writer(file) updated_query_dict = {} for query_id, scores in ranked_list.items(): updated_query = relevance_feedback_query(query_dict[query_id], scores) updated_query_dict[query_id] = updated_query updated_list = model.cosine_similarity_list(updated_query_dict) for query_id, scores in updated_list.items(): i = 0 for score in scores: i += 1 csv_writer.writerow( (query_id, "Q0", score[0], i, score[1], "query_expansion")) if i == 1: query = query_dict[query_id] print("Given Query: " + query) print("Top Document for given query: " + score[0]) print("Snippet: \n" + snippet_generator.generate_snippet(score[0], query)) file.close()
def retrieve_docs(): snippet_generator = Snippet.SnippetGenerator(document_tokens,"") stop_words = retrieve_stop_words() updated_document_tokens = update_docs(stop_words) updated_query_dict = update_queries(stop_words) model = RetrievalModel.CosineSimilarity(N, inverted_index, updated_document_tokens) ranked_list = model.cosine_similarity_list(updated_query_dict) with open(stop_table, "w") as file: csv_writer = csv.writer(file) for query_id, scores in ranked_list.items(): i = 0 for score in scores: i += 1 csv_writer.writerow((query_id, "Q0", score[0], i, score[1], "using_stop_words")) if i == 1: query = query_dict[query_id] print("Given Query: " + query) print("Top Document for given query: " + score[0]) print("Snippet: \n" + snippet_generator.generate_snippet(score[0],query)) file.close()
def retrieve_relevant_documents(): snippet_generator = Snippet.SnippetGenerator(document_tokens, stop_words_path) retrieve_cosine_sim_docs(snippet_generator) retrieve_tf_idf__docs(snippet_generator) retrieve_bm_25_docs(snippet_generator)