output_desktop.close() output_mobile.close() import time t0 = time.time() for query in queries: print query, time.time() - t0 query_id = query['id'] query_str = query['query'] tmp = "%s/%s" % (base_tmp, query_id) os.mkdir(tmp) ini['tmp_folder'] = tmp if web_search.find_query(cursor, query_str, 'NTCIR') is None: register_query_pages(query_id, query_str, cursor, ntcir_urls_folder, ntcir_htmls_folder, cache_folder) conn.commit() (results, html_urls) = one_click_search(ini, query_str, [(1000, 'DESKTOP'), (280, 'MOBILE')]) for (output_file, result) in [(output_file_desktop, results['DESKTOP']), (output_file_mobile, results['MOBILE'])]: output = file(output_file, 'a') output.write('%s\tOUT\t%s\n' % (query_id, re.sub('\n', ' ', result[0]))) printed = set() for evidence in result[1]: #TODO evidence id to page if not evidence in printed: url = html_urls[int(evidence)] url = re.sub('.*/', '', url) output.write('%s\tSOURCE\t%s\n' % (query_id, url)) printed.add(evidence)
import web_search if __name__ == "__main__": cache_folder = sys.argv[1] search_engine = sys.argv[2] html_file = sys.argv[3] query_str = " ".join(sys.argv[4:]) print "cache folder", cache_folder print "search engine", search_engine print "urls", html_file print "query", query_str (conn, cursor) = web_search.open_db(cache_folder) if web_search.find_query(cursor, query_str, search_engine) is not None: sys.exit("Query already in index") cache_files_folder = "%s/pages" % (cache_folder,) print "page cache folder", cache_files_folder try: os.mkdir(cache_files_folder) print "creating page cache folder", cache_files_folder except object as exc: print "(warning) problem creating", cache_files_folder, exc except OSError: pass qid = web_search.add_query(cursor, query_str, search_engine) rank = 0
import web_search if __name__ == '__main__': cache_folder = sys.argv[1] search_engine = sys.argv[2] html_file = sys.argv[3] query_str = " ".join(sys.argv[4:]) print "cache folder", cache_folder print "search engine", search_engine print "urls", html_file print "query", query_str (conn, cursor) = web_search.open_db(cache_folder) if web_search.find_query(cursor, query_str, search_engine) is not None: sys.exit("Query already in index") cache_files_folder = "%s/pages" % (cache_folder, ) print "page cache folder", cache_files_folder try: os.mkdir(cache_files_folder) print "creating page cache folder", cache_files_folder except object as exc: print "(warning) problem creating", cache_files_folder, exc except OSError: pass qid = web_search.add_query(cursor, query_str, search_engine) rank = 0
output_desktop.close() output_mobile.close() import time t0 = time.time() for query in queries: print query, time.time() - t0 query_id = query['id'] query_str = query['query'] tmp = "%s/%s" % (base_tmp, query_id) os.mkdir(tmp) ini['tmp_folder'] = tmp if web_search.find_query(cursor, query_str, 'NTCIR') is None: register_query_pages(query_id, query_str, cursor, ntcir_urls_folder, ntcir_htmls_folder, cache_folder) conn.commit() (results, html_urls) = one_click_search(ini, query_str, [(1000, 'DESKTOP'), (280, 'MOBILE')]) for (output_file, result) in [(output_file_desktop, results['DESKTOP']), (output_file_mobile, results['MOBILE'])]: output = file(output_file, 'a') output.write('%s\tOUT\t%s\n' % (query_id, re.sub('\n', ' ', result[0]))) printed = set()