def search_and_rank_query(query, docs, k, stemming, output_path): p = Parse(stemming) wordnet = WordNet() query = wordnet.expand_query(p.remove_stopwords(query)) parsed_query, parsed_entities = p.parse_query(query) searcher = Searcher(docs, output_path) relevant_docs = searcher.relevant_docs_from_posting( parsed_query, parsed_entities) ranked_docs = searcher.ranker.rank_relevant_docs(relevant_docs) return searcher.ranker.retrieve_top_k(ranked_docs, k)
def search_and_rank_query(query, inverted_index, k, number_of_documents, inverted_documents_dict, load_path): p = Parse() query_object = p.parse_query(query) searcher = Searcher(inverted_index, number_of_documents, load_path) relevant_docs = searcher.relevant_docs_from_posting(query_object) normalized_query = searcher.normalized_query(query_object) ranked_docs = searcher.ranker.rank_relevant_doc(relevant_docs, normalized_query, inverted_documents_dict) return searcher.ranker.retrieve_top_k(ranked_docs, k)
def search_and_rank_query(corpus_path, queries_list, inverted_index, num_docs_to_retrieve, stemming, word2vec, output_path): config = ConfigClass(corpus_path) p = Parse(stemming) answers = defaultdict(list) for i, q in enumerate(queries_list): # print("start query number: ", i + 1) query = p.parse_query(q) searcher = Searcher(inverted_index, stemming, word2vec) relevant_docs = searcher.relevant_docs_from_posting( query, stemming, config, output_path) ranked_docs = searcher.ranker.rank_relevant_doc( relevant_docs, query, word2vec, stemming, output_path) answers[i] = searcher.ranker.retrieve_top_k(ranked_docs, num_docs_to_retrieve) # print("finish query number: ", i + 1) return answers
def search_and_rank_query(query, inverted_index, k, config): """ Parse a query to tokens, search for relevant documents and rank them using tf-idf cos similiarity :param query: string that contains a query :param inverted_index: The inverted index for the corpus :param k: Number of queries to retrive :param config: configuration class, holds info about stemming and where files are saved :return: k most relevant tweets for query """ start = dt.datetime.now() p = Parse(config.toStem) query_as_list = p.parse_query(query) searcher = Searcher(inverted_index) relevant_docs = searcher.relevant_docs_from_posting(query_as_list, config) ranked_docs = searcher.ranker.rank_relevant_doc(relevant_docs, config, query_as_list) end = dt.datetime.now() tot_time = (end - start).total_seconds() / 60.0 #print("Query \"{}\" took {} minutes to analayze".format(query, tot_time)) return searcher.ranker.retrieve_top_k(ranked_docs, k)