Ejemplo n.º 1
0
def generate_snippet(file_name):
    relevant_list = open('cacm.rel', 'r')
    query_relevant = {}
    for line in relevant_list.readlines():
        words = line.split()
        if query_relevant.has_key(words[0]):
            doc_no = words[2][5:]
            doc_no = str(doc_no)
            doc = 'CACM-' + (4 - len(doc_no)) * '0' + doc_no
            query_relevant[words[0]].append(doc)
        else:
            query_relevant[words[0]] = []
            doc_no = words[2][5:]
            doc = 'CACM-' + (4 - len(doc_no)) * '0' + doc_no
            query_relevant[words[0]].append(doc)

    #Get query_dict from the file cacm.query.txt
    f = open('cacm.query.txt', 'r')
    soup = BeautifulSoup(f.read(), 'html.parser')
    f.close()
    rawquery_dict = {}
    for i in range(64):
        query_no = (soup.find('docno')).text.encode('utf-8')
        (soup.find('docno')).decompose()
        query = (soup.find('doc')).text.encode('utf-8')
        (soup.find('doc')).decompose()

        query_no = query_no.strip(" ")
        rawquery_dict[query_no] = query

    r = Retriever("", "")
    query_dict = {}
    for query_no, raw_query in rawquery_dict.viewitems():
        query_dict[query_no] = r.process_query(raw_query, True)

    print "Enter the query no"
    no = raw_input()
    no = str(no)
    query = query_dict[no]
    print query
    s = SnippetGen(file_name)
    query_results = s.get_queryresults(name='bms25')
    results = query_results[no]
    # pdb.set_trace()
    s.get_snippet(query, results)
Ejemplo n.º 2
0
#Get query_dict from the file cacm.query.txt
f = open('cacm.query.txt','r')
soup = BeautifulSoup(f.read(), 'html.parser')
f.close()
rawquery_dict = {}
for i in range(64):
  query_no = (soup.find('docno')).text.encode('utf-8')
  (soup.find('docno')).decompose()
  query = (soup.find('doc')).text.encode('utf-8')
  (soup.find('doc')).decompose

  query_no = query_no.strip(" ")
  rawquery_dict[query_no] = query

r = Retriever("", "")
query_dict = {}
for query_no, raw_query in rawquery_dict.viewitems():
  query_dict[query_no] = r.process_query(raw_query, True)
 
print "Enter the query no"
no = raw_input()
no = str(no)
query = query_dict[no]
print query
s = SnippetGen()
query_results = s.get_queryresults(name = 'bms25')
results = query_results[no]
# pdb.set_trace()
s.get_snippet(query,results)