def __init__(self, con, cf, name=""): cur = con.cursor() # create a ServiceManager self.sm = ServiceManager(cf) # create SearchServices self.search_service = SearchService(self.sm, name) # Add SearchServices to ServiceManager self.sm.add_search_service(self.search_service) # searching search engine in the database cur.execute( "SELECT configuration_searchengine.searchEngine FROM configuration_searchengineused,configuration_searchengine WHERE searchEngine_id = configuration_searchengine.id" ) entries = cur.fetchall() entry = entries[0] #we only take the first search engine print "Search engine " + entry[0] cur.execute( "SELECT key, value FROM configuration_parameters WHERE searchEngineUsed_id=1" ) parameters = cur.fetchall() dictPar = {} for parameter in parameters: dicPar[parameter[0]] = parameter[1] # Assign SearchEngine to SearchServices self.search_service.search_engine = globals()[entry[0]]( self.search_service, **dictPar) #searching the QueryFilters cur.execute( "SELECT numOrder, configuration_queryfilter.queryFilter, configuration_queryfilterorder.id FROM configuration_queryfilterorder,configuration_queryfilter WHERE queryFilter_id = configuration_queryfilter.id" ) entries = cur.fetchall() for entry in entries: try: ptr_func = globals()[entry[1]] except: print "Error: " + entry[i] + " not found" continue cur.execute( "SELECT key, value FROM configuration_parameterq WHERE queryFilterOrder_id=" + str(entry[2])) parameters = cur.fetchall() dictPar = {} for parameter in parameters: dictPar[parameter[0]] = parameter[1] print "Query filter " + str( entry[0]) + " " + entry[1] + " " + " " + str(dictPar) if (issubclass(ptr_func, QueryModifier)): self.search_service.add_query_modifier( globals()[entry[1]](**dictPar)) print "QueryModifier" if (issubclass(ptr_func, QueryFilter)): self.search_service.add_query_filter( globals()[entry[1]](**dictPar)) print "QueryFilter" cur.execute( "SELECT numOrder, configuration_resultfilter.resultFilter, configuration_resultfilterorder.id FROM configuration_resultfilterorder,configuration_resultfilter WHERE resultFilter_id = configuration_resultfilter.id" ) entries = cur.fetchall() for entry in entries: try: ptr_func = globals()[entry[1]] except: print "Error: " + entry[i] + " not found" continue cur.execute( "SELECT key, value FROM configuration_parameterr WHERE resultFilterOrder_id=" + str(entry[2])) parameters = cur.fetchall() dictPar = {} for parameter in parameters: dictPar[parameter[0]] = parameter[1] print "Result filter " + str( entry[0]) + " " + entry[1] + " " + str( ptr_func.__bases__) + " " + str(dictPar) if (issubclass(ptr_func, ResultModifier)): self.search_service.add_result_modifier(ptr_func(**dictPar)) print "ResultModifier" if (issubclass(ptr_func, ResultFilter)): self.search_service.add_result_filter(ptr_func()) print "ResultFilter"
whoosh_dir = os.path.join(os.getcwd(), "mase/query_logs/index") config = { "proxyhost": "http://wwwcache.gla.ac.uk:8080", # <-- remove if not UGLW "log_dir": log_dir, "bing_api_key": "/aROdM5Ck7fKHR4ge30r8W/K/D84GJkcl42lL8eNMSc=", # Obtain key from https://datamarket.azure.com/dataset/5BA839F1-12CE-4CCE-BF57-A49D98D29A44 } # create a ServiceManager service = ServiceManager(config) # create a ServiceManager service = ServiceManager(config) # create a Web SearchService web_search_service = SearchService(service, "web_search") web_search_service.search_engine = BingV3(web_search_service, source="Web", resultsPerPage=4) service.add_search_service(web_search_service) # Create a blacklist filter to block queries containing the terms below query_black_list = BlackListModifier(terms="bad worse nasty filthy") # Add our blacklist filter to the web search service web_search_service.add_query_modifier(query_black_list) # Add Web SearchService to ServiceManager # create a file based QUERY LOGGER web_search_service.query_logger = QueryLogger(web_search_service, log_mode=0) web_search_service.postLogging = True # create a index based QUERY LOGGER
log_dir = os.path.join(DOLLEMANPATH, log_dir) else: whoosh_dir = os.path.join(os.getcwd(), "mase/query_logs/index") config = { #"proxyhost": "http://wwwcache.gla.ac.uk:8080", # <-- remove if not UGLW "log_dir": log_dir, "bing_api_key": "", # Obtain key from https://datamarket.azure.com/dataset/5BA839F1-12CE-4CCE-BF57-A49D98D29A44 } # create a ServiceManager service = ServiceManager(config) # create a Web SearchService web_search_service = SearchService(service, "web_search") web_search_service.search_engine = BingV3(web_search_service, source='Web', resultsPerPage=4) service.add_search_service(web_search_service) # Create a blacklist filter to block queries containing the terms below query_black_list = BlackListModifier(terms="bad worse nasty filthy") #Add our blacklist filter to the web search service web_search_service.add_query_modifier( query_black_list) # Add Web SearchService to ServiceManager # create a file based QUERY LOGGER web_search_service.query_logger = QueryLogger(web_search_service, log_mode=0) web_search_service.postLogging = True
def __init__(self,cf,name="", dictParS={}): """Constructor for the service. Parameters: * cf: dictionary with some enviromment vaulues (see ServiceManager) * name: a the name for the created search service (see SearchService) * dictParS: a list of extra parameters for the search engine, in a dictionary format parameterName:value """ #if we are outside Django, we need to define this here, not in the general settings.py as in a Django app. if(not (hasattr(settings, 'DATABASE_NAME'))): settings.DATABASES['default']['NAME'] = os.path.join(APP_DIR, 'puppy/interface/interface.db') else: settings.DATABASE_NAME = 'puppy/interface/interface.db' #print settings.DATABASES # create a ServiceManager self.sm = ServiceManager(cf) # create SearchServices self.search_service = SearchService(self.sm, name) # Add SearchServices to ServiceManager self.sm.add_search_service(self.search_service) # searching search engine in the database #cur.execute("SELECT configuration_searchengine.searchEngine FROM configuration_searchengineused,configuration_searchengine WHERE searchEngine_id = configuration_searchengine.id") entries = SearchEngineUsed.objects.all() entry = entries[0] #we only take the first search engine print entry.searchEngine.searchEngine try: ptr_func = globals()[entry.searchEngine.searchEngine] except: print "Error: "+str(entry)+" not found" #cur.execute("SELECT key, value FROM configuration_parameters WHERE searchEngineUsed_id=1") for parameter in ParameterS.objects.filter(searchEngineUsed = entry): print parameter dictParS[parameter.key] = parameter.value self.search_service.search_engine = ptr_func(self.search_service, **dictParS) #searching the QueryFilters #cur.execute("SELECT numOrder, configuration_queryfilter.queryFilter, configuration_queryfilterorder.id FROM configuration_queryfilterorder,configuration_queryfilter WHERE queryFilter_id = configuration_queryfilter.id") entries = QueryFilterOrder.objects.all() for entry in entries: try: ptr_func = globals()[entry.queryFilter.queryFilter] except: print "Error: "+str(entry)+" not found" continue; dictPar = {} for parameter in ParameterQ.objects.filter(queryFilterOrder = entry): print parameter dictPar[parameter.key] = parameter.value print "Query filter "+str(entry.numOrder)+" "+entry.queryFilter.queryFilter+" "+" "+str(dictPar) if(issubclass(ptr_func,QueryModifier)): self.search_service.add_query_modifier(ptr_func(order=entry.numOrder,**dictPar)) print "QueryModifier" if(issubclass(ptr_func,QueryFilter)): self.search_service.add_query_filter (ptr_func(order=entry.numOrder,**dictPar)) print "QueryFilter" ## ## ## cur.execute("SELECT numOrder, configuration_resultfilter.resultFilter, configuration_resultfilterorder.id FROM configuration_resultfilterorder,configuration_resultfilter WHERE resultFilter_id = configuration_resultfilter.id") ## entries = ResultFilterOrder.objects.all() for entry in entries: try: ptr_func = globals()[entry.resultFilter.resultFilter] except: print "Error: "+str(entry)+" not found" continue; dictPar = {} for parameter in ParameterR.objects.filter(resultFilterOrder = entry): print parameter dictPar[parameter.key] = parameter.value print "Result filter "+str(entry.numOrder)+" "+entry.resultFilter.resultFilter+" "+" "+str(dictPar) if(issubclass(ptr_func,ResultModifier)): self.search_service.add_result_modifier(ptr_func(order=entry.numOrder,**dictPar)) print "ResultModifier" if(issubclass(ptr_func,ResultFilter)): self.search_service.add_result_filter (ptr_func(order=entry.numOrder,**dictPar)) print "ResultFilter"
def __init__(self, cf, name="", dictParS={}): """Constructor for the service. Parameters: * cf: dictionary with some enviromment vaulues (see ServiceManager) * name: a the name for the created search service (see SearchService) * dictParS: a list of extra parameters for the search engine, in a dictionary format parameterName:value """ #if we are outside Django, we need to define this here, not in the general settings.py as in a Django app. if (not (hasattr(settings, 'DATABASE_NAME'))): settings.DATABASES['default']['NAME'] = os.path.join( APP_DIR, 'puppy/interface/interface.db') else: settings.DATABASE_NAME = 'puppy/interface/interface.db' #print settings.DATABASES # create a ServiceManager self.sm = ServiceManager(cf) # create SearchServices self.search_service = SearchService(self.sm, name) # Add SearchServices to ServiceManager self.sm.add_search_service(self.search_service) # searching search engine in the database #cur.execute("SELECT configuration_searchengine.searchEngine FROM configuration_searchengineused,configuration_searchengine WHERE searchEngine_id = configuration_searchengine.id") entries = SearchEngineUsed.objects.all() entry = entries[0] #we only take the first search engine print entry.searchEngine.searchEngine try: ptr_func = globals()[entry.searchEngine.searchEngine] except: print "Error: " + str(entry) + " not found" #cur.execute("SELECT key, value FROM configuration_parameters WHERE searchEngineUsed_id=1") for parameter in ParameterS.objects.filter(searchEngineUsed=entry): print parameter dictParS[parameter.key] = parameter.value self.search_service.search_engine = ptr_func(self.search_service, **dictParS) #searching the QueryFilters #cur.execute("SELECT numOrder, configuration_queryfilter.queryFilter, configuration_queryfilterorder.id FROM configuration_queryfilterorder,configuration_queryfilter WHERE queryFilter_id = configuration_queryfilter.id") entries = QueryFilterOrder.objects.all() for entry in entries: try: ptr_func = globals()[entry.queryFilter.queryFilter] except: print "Error: " + str(entry) + " not found" continue dictPar = {} for parameter in ParameterQ.objects.filter(queryFilterOrder=entry): print parameter dictPar[parameter.key] = parameter.value print "Query filter " + str( entry.numOrder ) + " " + entry.queryFilter.queryFilter + " " + " " + str(dictPar) if (issubclass(ptr_func, QueryModifier)): self.search_service.add_query_modifier( ptr_func(order=entry.numOrder, **dictPar)) print "QueryModifier" if (issubclass(ptr_func, QueryFilter)): self.search_service.add_query_filter( ptr_func(order=entry.numOrder, **dictPar)) print "QueryFilter" ## ## ## cur.execute("SELECT numOrder, configuration_resultfilter.resultFilter, configuration_resultfilterorder.id FROM configuration_resultfilterorder,configuration_resultfilter WHERE resultFilter_id = configuration_resultfilter.id") ## entries = ResultFilterOrder.objects.all() for entry in entries: try: ptr_func = globals()[entry.resultFilter.resultFilter] except: print "Error: " + str(entry) + " not found" continue dictPar = {} for parameter in ParameterR.objects.filter( resultFilterOrder=entry): print parameter dictPar[parameter.key] = parameter.value print "Result filter " + str( entry.numOrder ) + " " + entry.resultFilter.resultFilter + " " + " " + str( dictPar) if (issubclass(ptr_func, ResultModifier)): self.search_service.add_result_modifier( ptr_func(order=entry.numOrder, **dictPar)) print "ResultModifier" if (issubclass(ptr_func, ResultFilter)): self.search_service.add_result_filter( ptr_func(order=entry.numOrder, **dictPar)) print "ResultFilter"
def __init__(self,con,cf,name=""): cur = con.cursor() # create a ServiceManager self.sm = ServiceManager(cf) # create SearchServices self.search_service = SearchService(self.sm, name) # Add SearchServices to ServiceManager self.sm.add_search_service(self.search_service) # searching search engine in the database cur.execute("SELECT configuration_searchengine.searchEngine FROM configuration_searchengineused,configuration_searchengine WHERE searchEngine_id = configuration_searchengine.id") entries = cur.fetchall() entry = entries[0] #we only take the first search engine print "Search engine "+entry[0] cur.execute("SELECT key, value FROM configuration_parameters WHERE searchEngineUsed_id=1") parameters = cur.fetchall() dictPar = {} for parameter in parameters: dicPar[parameter[0]] = parameter[1] # Assign SearchEngine to SearchServices self.search_service.search_engine = globals()[entry[0]](self.search_service, **dictPar) #searching the QueryFilters cur.execute("SELECT numOrder, configuration_queryfilter.queryFilter, configuration_queryfilterorder.id FROM configuration_queryfilterorder,configuration_queryfilter WHERE queryFilter_id = configuration_queryfilter.id") entries = cur.fetchall() for entry in entries: try: ptr_func = globals()[entry[1]] except: print "Error: "+entry[i]+" not found" continue; cur.execute("SELECT key, value FROM configuration_parameterq WHERE queryFilterOrder_id="+str(entry[2])) parameters = cur.fetchall() dictPar = {} for parameter in parameters: dictPar[parameter[0]] = parameter[1] print "Query filter "+str(entry[0])+" "+entry[1]+" "+" "+str(dictPar) if(issubclass(ptr_func,QueryModifier)): self.search_service.add_query_modifier(globals()[entry[1]](**dictPar)) print "QueryModifier" if(issubclass(ptr_func,QueryFilter)): self.search_service.add_query_filter (globals()[entry[1]](**dictPar)) print "QueryFilter" cur.execute("SELECT numOrder, configuration_resultfilter.resultFilter, configuration_resultfilterorder.id FROM configuration_resultfilterorder,configuration_resultfilter WHERE resultFilter_id = configuration_resultfilter.id") entries = cur.fetchall() for entry in entries: try: ptr_func = globals()[entry[1]] except: print "Error: "+entry[i]+" not found" continue; cur.execute("SELECT key, value FROM configuration_parameterr WHERE resultFilterOrder_id="+str(entry[2])) parameters = cur.fetchall() dictPar = {} for parameter in parameters: dictPar[parameter[0]] = parameter[1] print "Result filter "+str(entry[0])+" "+entry[1]+" "+str(ptr_func.__bases__)+" "+str(dictPar) if(issubclass(ptr_func,ResultModifier)): self.search_service.add_result_modifier(ptr_func(**dictPar)) print "ResultModifier" if(issubclass(ptr_func,ResultFilter)): self.search_service.add_result_filter (ptr_func()) print "ResultFilter"
#print "page len" + str(pagelen) results = [] reponse = [] with self.docIndex.searcher() as searcher: results = searcher.search_page( query_terms, page, pagelen=pagelen ) # results = searcher.search( query_terms ) results.fragmenter = highlight.ContextFragmenter(maxchars=300, surround=300) results.formatter = highlight.HtmlFormatter() results.fragmenter.charlimit = 100000 print "WhooshTRECNewsEngine found: " + str(len(results)) + " results" print "Page %d of %d - PageLength of %d" % (results.pagenum, results.pagecount, results.pagelen) response = parse_whoosh_trec('WhooshTRECNewsEngine', query.search_terms, results) return response except: print "Error in Search Service: Whoosh TREC News search failed" config = { "log_dir": "treconomics/query_logs", } # create a ServiceManager service = ServiceManager(config) # create a SearchService, choose search engine and enable query logging trec_search_service = SearchService(service, "trec_search") trec_search_service.search_engine = WhooshTRECNewsEngine(trec_search_service, my_whoosh_doc_index_dir) # add SearchService to ServiceManager service.add_search_service(trec_search_service)
results.fragmenter = highlight.ContextFragmenter(maxchars=300, surround=300) results.formatter = highlight.HtmlFormatter() results.fragmenter.charlimit = 100000 print "WhooshTRECNewsEngine found: " + str( len(results)) + " results" print "Page %d of %d - PageLength of %d" % ( results.pagenum, results.pagecount, results.pagelen) response = parse_whoosh_trec('WhooshTRECNewsEngine', query.search_terms, results) return response except: print "Error in Search Service: Whoosh TREC News search failed" config = { "log_dir": "treconomics/query_logs", } # create a ServiceManager service = ServiceManager(config) # create a SearchService, choose search engine and enable query logging trec_search_service = SearchService(service, "trec_search") trec_search_service.search_engine = WhooshTRECNewsEngine( trec_search_service, my_whoosh_doc_index_dir) # add SearchService to ServiceManager service.add_search_service(trec_search_service)