def read_queries(filename): in_ = open(filename, 'r') queries = in_.read() queries = queries.split("\n") ir = IR() for query in queries: target_doc_id = int(query[0:query.index(' ')]) q = Query(query[query.index(' ') + 1:len(query)]) rank = -1 documents = ir.search(q) for i in range(0, len(documents)): if documents[i].id == target_doc_id: rank = i break documents = documents[0:15] divider = '\n==============================================================================\n' out_ = open('../tests/' + query + '.txt', 'w') result = ( 'target document is rank ' + str(rank + 1) + divider + divider.join([ 'DOC ID:\n\t' + str(doc.id) + '\n' + doc.fancy_str() for (doc, r) in zip(documents, range(1, len(documents) + 1)) ])) result += '\n\n\n' out_.write(result) out_.close() in_.close()
def read_queries(filename): in_ = open(filename, 'r') queries = in_.read() queries = queries.split("\n") ir = IR() for query in queries: target_doc_id = int(query[0 : query.index(' ')]) q = Query(query[query.index(' ')+1 : len(query)]) rank = -1 documents = ir.search(q) for i in range(0, len(documents)): if documents[i].id == target_doc_id: rank = i break documents = documents[0:15] divider = '\n==============================================================================\n' out_ = open('../tests/'+ query +'.txt', 'w') result = ('target document is rank ' + str(rank + 1) + divider + divider.join(['DOC ID:\n\t' + str(doc.id) + '\n' + doc.fancy_str() for (doc, r) in zip(documents, range(1, len(documents)+1))])) result += '\n\n\n' out_.write(result) out_.close() in_.close()
def main(): print "Loading index..." start = time.time() ir = IR() done = time.time() elapsed = done - start print "Index loaded in " + str(elapsed) + " seconds" print "\n============================================================" print "======================== IR Machine ========================" print "============================================================" print "= A IR tool to query over the Reuters database. =" print "= More details about the database at http://bit.ly/1F8AFcO =" print "= Source code avaliable at http://bit.ly/1mezIcN =" print "= Authors: =" print "= @Joao Gabriel Santiago Mauricio de Abreu =" print "= @Natalia Paola de Vasconcelos Cometti =" print "= @Victor Felix Pimenta =" print "= Since: 12/11/2015 =" print "============================================================\n" quit = False while not quit: k = 0 input_ = raw_input("Type your query ('q' to quit, 'h' to help): ") if input_ == 'q': break elif input_ == 'h': _help() continue query = Query(input_) start = time.time() documents = ir.search(query) done = time.time() elapsed = done - start if len(documents) > 0: print "\n" + str(len(documents)) + " results found in " + str( elapsed) + " seconds:\n" _print(k, documents) while True: opt = raw_input( "============================================================\n" + "Type:\n" + " '+' -> more results\n" + " <DOC#> -> print content\n" + " 'e' -> export all results to a file\n" + " 'r' -> query again\n" + " 'q' -> quit\n" + " 'h' -> help\n" + "============================================================\n" ) if opt == '+': k += 10 _print(k, documents) elif opt == 'e': f = open( "../dump/" + input_ + " (" + datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S') + ").txt", 'w') result = '\n==============================================================================\n'.join( [doc.fancy_str() for doc in documents]) result += '\n\n\n' f.write(result) f.close() elif opt == 'r': break elif opt == 'q': quit = True break elif opt == 'h': _help() else: try: idx = int(opt) if idx > 0 and idx <= (k + 10): print documents[int(opt) - 1].fancy_str() else: print "Error: Document number out of bounds!" except ValueError: print "Error: Invalid input!" else: print "No results found for '" + input_ + "'"
def main(): print "Loading index..." start = time.time() ir = IR() done = time.time() elapsed = done - start print "Index loaded in "+str(elapsed)+" seconds" print "\n============================================================" print "======================== IR Machine ========================" print "============================================================" print "= A IR tool to query over the Reuters database. =" print "= More details about the database at http://bit.ly/1F8AFcO =" print "= Source code avaliable at http://bit.ly/1mezIcN =" print "= Authors: =" print "= @Joao Gabriel Santiago Mauricio de Abreu =" print "= @Natalia Paola de Vasconcelos Cometti =" print "= @Victor Felix Pimenta =" print "= Since: 12/11/2015 =" print "============================================================\n" quit = False while not quit: k = 0 input_ = raw_input("Type your query ('q' to quit, 'h' to help): ") if input_ == 'q': break elif input_ == 'h': _help() continue query = Query(input_) start = time.time() documents = ir.search(query) done = time.time() elapsed = done - start if len(documents) > 0: print "\n"+str(len(documents))+" results found in "+str(elapsed)+" seconds:\n" _print(k, documents) while True: opt = raw_input("============================================================\n" +"Type:\n" +" '+' -> more results\n" +" <DOC#> -> print content\n" +" 'e' -> export all results to a file\n" +" 'r' -> query again\n" +" 'q' -> quit\n" +" 'h' -> help\n" +"============================================================\n") if opt == '+': k+=10 _print(k, documents) elif opt == 'e': f = open("../dump/"+input_+" ("+datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')+").txt", 'w') result = '\n==============================================================================\n'.join([doc.fancy_str() for doc in documents]) result += '\n\n\n' f.write(result) f.close() elif opt == 'r': break elif opt == 'q': quit = True break elif opt == 'h': _help() else: try: idx = int(opt) if idx > 0 and idx <= (k+10): print documents[int(opt)-1].fancy_str() else: print "Error: Document number out of bounds!" except ValueError: print "Error: Invalid input!" else: print "No results found for '"+input_+"'"