def search_and_rank_query(query, inverted_index,num_docs_to_retrieve): p = Parse() dictFromQuery = {} p.tokenSplit(query, dictFromQuery) query_as_list = [*dictFromQuery] searcher = Searcher(inverted_index) #posting = utils.load_obj("posting") print('-------------------------------------') print('Start import mapReduce') map_reduce = MapReduce.import_map_reduce('MapReduceData/') print('Done importing mapReduce') posting = {} print('-------------------------------------') print('Start build posting file') for term in query_as_list: posting[term] = map_reduce.read_from(term) print('Done building posting file') print('-------------------------------------') print('Get relevant Doc') relevant_docs = searcher.relevant_docs_from_posting(query_as_list,posting) print('Done getting relevant Doc') print('-------------------------------------') print('Start ranking docs') ranked_docs = searcher.ranker.rank_relevant_doc(relevant_docs,dictFromQuery,posting,num_docs_to_retrieve) print('Done ranking docs') return searcher.ranker.retrieve_top_k(ranked_docs,num_docs_to_retrieve)
def search_and_rank_query(query, inverted_index, num_docs_to_retrieve): p = Parse() dictFromQuery = {} map_reduce_ag = MapReduce.import_map_reduce('MapReduceData/AG/') map_reduce_hq = MapReduce.import_map_reduce('MapReduceData/HQ/') map_reduce_rz = MapReduce.import_map_reduce('MapReduceData/Rz/') map_reduce_other = MapReduce.import_map_reduce('MapReduceData/Others/') map_reduce_doc = MapReduce.import_map_reduce('MapReduceData/Document/') p.tokenSplit(query, dictFromQuery) query_as_list = [*dictFromQuery] searcher = Searcher(inverted_index) #posting = utils.load_obj("posting") print('-------------------------------------') print('Start import mapReduce') # map_reduce = MapReduce.import_map_reduce('MapReduceData/') print('Done importing mapReduce') posting = {} print('-------------------------------------') print('Start build posting file') query_as_list.sort(key=lambda x: x.lower()) for term in query_as_list: lower_letter = term[0].lower() current_map = map_reduce_other if 'a' <= lower_letter <= 'g': current_map = map_reduce_ag elif 'h' <= lower_letter <= 'q': current_map = map_reduce_hq elif 'r' <= lower_letter <= 'z': current_map = map_reduce_rz posting[term] = current_map.read_from(term.lower()) print('Done building posting file') print('-------------------------------------') print('Get relevant Doc') relevant_docs = searcher.relevant_docs_from_posting(query_as_list, posting) print('Done getting relevant Doc') print('-------------------------------------') print('Start ranking docs') ranked_docs = searcher.ranker.rank_relevant_doc( relevant_docs, dictFromQuery, posting, map_reduce_ag, map_reduce_hq, map_reduce_rz, map_reduce_other, num_docs_to_retrieve) print('Done ranking docs') return searcher.ranker.retrieve_top_k(ranked_docs, num_docs_to_retrieve)