whoosh_dir = os.path.join(os.getcwd(), "mase/query_logs/index") config = { #"proxyhost": "http://wwwcache.gla.ac.uk:8080", # <-- remove if not UGLW "log_dir": log_dir, "bing_api_key": "", # Obtain key from https://datamarket.azure.com/dataset/5BA839F1-12CE-4CCE-BF57-A49D98D29A44 } # create a ServiceManager service = ServiceManager(config) # create a Web SearchService web_search_service = SearchService(service, "web_search") web_search_service.search_engine = BingV3(web_search_service, source='Web', resultsPerPage=4) service.add_search_service(web_search_service) # Create a blacklist filter to block queries containing the terms below query_black_list = BlackListModifier(terms="bad worse nasty filthy") #Add our blacklist filter to the web search service web_search_service.add_query_modifier( query_black_list) # Add Web SearchService to ServiceManager # create a file based QUERY LOGGER web_search_service.query_logger = QueryLogger(web_search_service, log_mode=0) web_search_service.postLogging = True # create a index based QUERY LOGGER
config = { "proxyhost": "http://wwwcache.gla.ac.uk:8080", # <-- remove if not UGLW "log_dir": log_dir, "bing_api_key": "/aROdM5Ck7fKHR4ge30r8W/K/D84GJkcl42lL8eNMSc=", # Obtain key from https://datamarket.azure.com/dataset/5BA839F1-12CE-4CCE-BF57-A49D98D29A44 } # create a ServiceManager service = ServiceManager(config) # create a ServiceManager service = ServiceManager(config) # create a Web SearchService web_search_service = SearchService(service, "web_search") web_search_service.search_engine = BingV3(web_search_service, source="Web", resultsPerPage=4) service.add_search_service(web_search_service) # Create a blacklist filter to block queries containing the terms below query_black_list = BlackListModifier(terms="bad worse nasty filthy") # Add our blacklist filter to the web search service web_search_service.add_query_modifier(query_black_list) # Add Web SearchService to ServiceManager # create a file based QUERY LOGGER web_search_service.query_logger = QueryLogger(web_search_service, log_mode=0) web_search_service.postLogging = True # create a index based QUERY LOGGER whoosh_query_logger = WhooshQueryLogger(whoosh_query_index_dir=whoosh_dir, unique=True)
#print "page len" + str(pagelen) results = [] reponse = [] with self.docIndex.searcher() as searcher: results = searcher.search_page( query_terms, page, pagelen=pagelen ) # results = searcher.search( query_terms ) results.fragmenter = highlight.ContextFragmenter(maxchars=300, surround=300) results.formatter = highlight.HtmlFormatter() results.fragmenter.charlimit = 100000 print "WhooshTRECNewsEngine found: " + str(len(results)) + " results" print "Page %d of %d - PageLength of %d" % (results.pagenum, results.pagecount, results.pagelen) response = parse_whoosh_trec('WhooshTRECNewsEngine', query.search_terms, results) return response except: print "Error in Search Service: Whoosh TREC News search failed" config = { "log_dir": "treconomics/query_logs", } # create a ServiceManager service = ServiceManager(config) # create a SearchService, choose search engine and enable query logging trec_search_service = SearchService(service, "trec_search") trec_search_service.search_engine = WhooshTRECNewsEngine(trec_search_service, my_whoosh_doc_index_dir) # add SearchService to ServiceManager service.add_search_service(trec_search_service)
log_dir = os.path.join(DOLLEMANPATH, log_dir) else: whoosh_dir = os.path.join(os.getcwd(), "mase/query_logs/index") config = { #"proxyhost": "http://wwwcache.gla.ac.uk:8080", # <-- remove if not UGLW "log_dir": log_dir, "bing_api_key": "", # Obtain key from https://datamarket.azure.com/dataset/5BA839F1-12CE-4CCE-BF57-A49D98D29A44 } # create a ServiceManager service = ServiceManager(config) # create a Web SearchService web_search_service = SearchService(service, "web_search") web_search_service.search_engine = BingV3(web_search_service, source='Web', resultsPerPage = 4) service.add_search_service(web_search_service) # Create a blacklist filter to block queries containing the terms below query_black_list = BlackListModifier(terms = "bad worse nasty filthy") #Add our blacklist filter to the web search service web_search_service.add_query_modifier(query_black_list)# Add Web SearchService to ServiceManager # create a file based QUERY LOGGER web_search_service.query_logger = QueryLogger(web_search_service, log_mode=0) web_search_service.postLogging = True # create a index based QUERY LOGGER whoosh_query_logger = WhooshQueryLogger(whoosh_query_index_dir=whoosh_dir, unique=True)
results.fragmenter = highlight.ContextFragmenter(maxchars=300, surround=300) results.formatter = highlight.HtmlFormatter() results.fragmenter.charlimit = 100000 print "WhooshTRECNewsEngine found: " + str( len(results)) + " results" print "Page %d of %d - PageLength of %d" % ( results.pagenum, results.pagecount, results.pagelen) response = parse_whoosh_trec('WhooshTRECNewsEngine', query.search_terms, results) return response except: print "Error in Search Service: Whoosh TREC News search failed" config = { "log_dir": "treconomics/query_logs", } # create a ServiceManager service = ServiceManager(config) # create a SearchService, choose search engine and enable query logging trec_search_service = SearchService(service, "trec_search") trec_search_service.search_engine = WhooshTRECNewsEngine( trec_search_service, my_whoosh_doc_index_dir) # add SearchService to ServiceManager service.add_search_service(trec_search_service)