def generate_snippet(file_name): relevant_list = open('cacm.rel', 'r') query_relevant = {} for line in relevant_list.readlines(): words = line.split() if query_relevant.has_key(words[0]): doc_no = words[2][5:] doc_no = str(doc_no) doc = 'CACM-' + (4 - len(doc_no)) * '0' + doc_no query_relevant[words[0]].append(doc) else: query_relevant[words[0]] = [] doc_no = words[2][5:] doc = 'CACM-' + (4 - len(doc_no)) * '0' + doc_no query_relevant[words[0]].append(doc) #Get query_dict from the file cacm.query.txt f = open('cacm.query.txt', 'r') soup = BeautifulSoup(f.read(), 'html.parser') f.close() rawquery_dict = {} for i in range(64): query_no = (soup.find('docno')).text.encode('utf-8') (soup.find('docno')).decompose() query = (soup.find('doc')).text.encode('utf-8') (soup.find('doc')).decompose() query_no = query_no.strip(" ") rawquery_dict[query_no] = query r = Retriever("", "") query_dict = {} for query_no, raw_query in rawquery_dict.viewitems(): query_dict[query_no] = r.process_query(raw_query, True) print "Enter the query no" no = raw_input() no = str(no) query = query_dict[no] print query s = SnippetGen(file_name) query_results = s.get_queryresults(name='bms25') results = query_results[no] # pdb.set_trace() s.get_snippet(query, results)
#Get query_dict from the file cacm.query.txt f = open('cacm.query.txt','r') soup = BeautifulSoup(f.read(), 'html.parser') f.close() rawquery_dict = {} for i in range(64): query_no = (soup.find('docno')).text.encode('utf-8') (soup.find('docno')).decompose() query = (soup.find('doc')).text.encode('utf-8') (soup.find('doc')).decompose query_no = query_no.strip(" ") rawquery_dict[query_no] = query r = Retriever("", "") query_dict = {} for query_no, raw_query in rawquery_dict.viewitems(): query_dict[query_no] = r.process_query(raw_query, True) print "Enter the query no" no = raw_input() no = str(no) query = query_dict[no] print query s = SnippetGen() query_results = s.get_queryresults(name = 'bms25') results = query_results[no] # pdb.set_trace() s.get_snippet(query,results)