def __init__(self, con, cf, name=""):

        cur = con.cursor()

        # create a ServiceManager
        self.sm = ServiceManager(cf)

        # create SearchServices
        self.search_service = SearchService(self.sm, name)

        # Add SearchServices to ServiceManager
        self.sm.add_search_service(self.search_service)

        # searching search engine in the database
        cur.execute(
            "SELECT configuration_searchengine.searchEngine FROM configuration_searchengineused,configuration_searchengine  WHERE searchEngine_id = configuration_searchengine.id"
        )

        entries = cur.fetchall()

        entry = entries[0]  #we only take the first search engine

        print "Search engine " + entry[0]

        cur.execute(
            "SELECT key, value FROM configuration_parameters WHERE searchEngineUsed_id=1"
        )

        parameters = cur.fetchall()

        dictPar = {}

        for parameter in parameters:
            dicPar[parameter[0]] = parameter[1]

        # Assign SearchEngine to SearchServices
        self.search_service.search_engine = globals()[entry[0]](
            self.search_service, **dictPar)

        #searching the QueryFilters
        cur.execute(
            "SELECT numOrder, configuration_queryfilter.queryFilter, configuration_queryfilterorder.id  FROM configuration_queryfilterorder,configuration_queryfilter  WHERE queryFilter_id = configuration_queryfilter.id"
        )

        entries = cur.fetchall()

        for entry in entries:
            try:
                ptr_func = globals()[entry[1]]
            except:
                print "Error: " + entry[i] + " not found"
                continue

            cur.execute(
                "SELECT key, value FROM configuration_parameterq WHERE queryFilterOrder_id="
                + str(entry[2]))

            parameters = cur.fetchall()

            dictPar = {}

            for parameter in parameters:
                dictPar[parameter[0]] = parameter[1]

            print "Query filter " + str(
                entry[0]) + " " + entry[1] + " " + " " + str(dictPar)

            if (issubclass(ptr_func, QueryModifier)):
                self.search_service.add_query_modifier(
                    globals()[entry[1]](**dictPar))
                print "QueryModifier"
            if (issubclass(ptr_func, QueryFilter)):
                self.search_service.add_query_filter(
                    globals()[entry[1]](**dictPar))
                print "QueryFilter"

        cur.execute(
            "SELECT numOrder, configuration_resultfilter.resultFilter, configuration_resultfilterorder.id FROM configuration_resultfilterorder,configuration_resultfilter  WHERE resultFilter_id = configuration_resultfilter.id"
        )

        entries = cur.fetchall()

        for entry in entries:
            try:
                ptr_func = globals()[entry[1]]
            except:
                print "Error: " + entry[i] + " not found"
                continue

            cur.execute(
                "SELECT key, value FROM configuration_parameterr WHERE resultFilterOrder_id="
                + str(entry[2]))

            parameters = cur.fetchall()

            dictPar = {}

            for parameter in parameters:
                dictPar[parameter[0]] = parameter[1]

            print "Result filter " + str(
                entry[0]) + " " + entry[1] + " " + str(
                    ptr_func.__bases__) + " " + str(dictPar)

            if (issubclass(ptr_func, ResultModifier)):
                self.search_service.add_result_modifier(ptr_func(**dictPar))
                print "ResultModifier"
            if (issubclass(ptr_func, ResultFilter)):
                self.search_service.add_result_filter(ptr_func())
                print "ResultFilter"
Exemple #2
0
    whoosh_dir = os.path.join(os.getcwd(), "mase/query_logs/index")

config = {
    "proxyhost": "http://wwwcache.gla.ac.uk:8080",  # <-- remove if not UGLW
    "log_dir": log_dir,
    "bing_api_key": "/aROdM5Ck7fKHR4ge30r8W/K/D84GJkcl42lL8eNMSc=",  # Obtain key from https://datamarket.azure.com/dataset/5BA839F1-12CE-4CCE-BF57-A49D98D29A44
}

# create a ServiceManager
service = ServiceManager(config)

# create a ServiceManager
service = ServiceManager(config)

# create a Web SearchService
web_search_service = SearchService(service, "web_search")
web_search_service.search_engine = BingV3(web_search_service, source="Web", resultsPerPage=4)
service.add_search_service(web_search_service)

# Create a blacklist filter to block queries containing the terms below
query_black_list = BlackListModifier(terms="bad worse nasty filthy")

# Add our blacklist filter to the web search service
web_search_service.add_query_modifier(query_black_list)  # Add Web SearchService to ServiceManager


# create a file based QUERY LOGGER
web_search_service.query_logger = QueryLogger(web_search_service, log_mode=0)
web_search_service.postLogging = True

# create a index based QUERY LOGGER
Exemple #3
0
    log_dir = os.path.join(DOLLEMANPATH, log_dir)
else:
    whoosh_dir = os.path.join(os.getcwd(), "mase/query_logs/index")

config = {
    #"proxyhost": "http://wwwcache.gla.ac.uk:8080", # <-- remove if not UGLW
    "log_dir": log_dir,
    "bing_api_key":
    "",  # Obtain key from https://datamarket.azure.com/dataset/5BA839F1-12CE-4CCE-BF57-A49D98D29A44
}

# create a ServiceManager
service = ServiceManager(config)

# create a Web SearchService
web_search_service = SearchService(service, "web_search")
web_search_service.search_engine = BingV3(web_search_service,
                                          source='Web',
                                          resultsPerPage=4)
service.add_search_service(web_search_service)

# Create a blacklist filter to block queries containing the terms below
query_black_list = BlackListModifier(terms="bad worse nasty filthy")

#Add our blacklist filter to the web search service
web_search_service.add_query_modifier(
    query_black_list)  # Add Web SearchService to ServiceManager

# create a file based QUERY LOGGER
web_search_service.query_logger = QueryLogger(web_search_service, log_mode=0)
web_search_service.postLogging = True
Exemple #4
0
    def __init__(self,cf,name="", dictParS={}):    
        """Constructor for the service.
    
        Parameters:
    
        * cf: dictionary with some enviromment vaulues (see ServiceManager)
        * name: a the name for the created search service  (see SearchService)
        * dictParS: a list of extra parameters for the search engine, in a dictionary format parameterName:value
        """
        #if we are outside Django, we need to define this here, not in the general settings.py as in a Django app.
        if(not (hasattr(settings, 'DATABASE_NAME'))):
            settings.DATABASES['default']['NAME'] = os.path.join(APP_DIR, 'puppy/interface/interface.db')
        else:
            settings.DATABASE_NAME = 'puppy/interface/interface.db'

        #print settings.DATABASES 

        # create a ServiceManager
        self.sm = ServiceManager(cf)

        # create SearchServices
        self.search_service = SearchService(self.sm, name)


        # Add SearchServices to ServiceManager
        self.sm.add_search_service(self.search_service)

        # searching search engine in the database
        #cur.execute("SELECT configuration_searchengine.searchEngine FROM configuration_searchengineused,configuration_searchengine  WHERE searchEngine_id = configuration_searchengine.id")

        entries = SearchEngineUsed.objects.all()

        entry = entries[0]  #we only take the first search engine

        print entry.searchEngine.searchEngine

        try:
            ptr_func = globals()[entry.searchEngine.searchEngine]    
        except:
            print "Error: "+str(entry)+" not found"
            

        #cur.execute("SELECT key, value FROM configuration_parameters WHERE searchEngineUsed_id=1")

 
        for parameter in ParameterS.objects.filter(searchEngineUsed = entry):
            print parameter
            dictParS[parameter.key] = parameter.value



        self.search_service.search_engine = ptr_func(self.search_service, **dictParS)    

        #searching the QueryFilters
        #cur.execute("SELECT numOrder, configuration_queryfilter.queryFilter, configuration_queryfilterorder.id  FROM configuration_queryfilterorder,configuration_queryfilter  WHERE queryFilter_id = configuration_queryfilter.id")

        entries = QueryFilterOrder.objects.all()

        for entry in entries:
            try:
                ptr_func = globals()[entry.queryFilter.queryFilter]    
            except:
                print "Error: "+str(entry)+" not found"
                continue; 

            dictPar = {}

            for parameter in ParameterQ.objects.filter(queryFilterOrder = entry):
                print parameter

                dictPar[parameter.key] = parameter.value

            print "Query filter "+str(entry.numOrder)+" "+entry.queryFilter.queryFilter+" "+" "+str(dictPar)

            if(issubclass(ptr_func,QueryModifier)):
                self.search_service.add_query_modifier(ptr_func(order=entry.numOrder,**dictPar))
                print "QueryModifier"
            if(issubclass(ptr_func,QueryFilter)):
                self.search_service.add_query_filter (ptr_func(order=entry.numOrder,**dictPar))
                print "QueryFilter"
##        
##
##        cur.execute("SELECT numOrder, configuration_resultfilter.resultFilter, configuration_resultfilterorder.id FROM configuration_resultfilterorder,configuration_resultfilter  WHERE resultFilter_id = configuration_resultfilter.id")
##
        entries = ResultFilterOrder.objects.all()

        for entry in entries:
            try:
                ptr_func = globals()[entry.resultFilter.resultFilter]    
            except:
                print "Error: "+str(entry)+" not found"
                continue; 

            dictPar = {}

            for parameter in ParameterR.objects.filter(resultFilterOrder = entry):
                print parameter

                dictPar[parameter.key] = parameter.value

            print "Result filter "+str(entry.numOrder)+" "+entry.resultFilter.resultFilter+" "+" "+str(dictPar)

            if(issubclass(ptr_func,ResultModifier)):
                self.search_service.add_result_modifier(ptr_func(order=entry.numOrder,**dictPar))
                print "ResultModifier"
            if(issubclass(ptr_func,ResultFilter)):
                self.search_service.add_result_filter (ptr_func(order=entry.numOrder,**dictPar))
                print "ResultFilter"
    def __init__(self, cf, name="", dictParS={}):
        """Constructor for the service.
    
        Parameters:
    
        * cf: dictionary with some enviromment vaulues (see ServiceManager)
        * name: a the name for the created search service  (see SearchService)
        * dictParS: a list of extra parameters for the search engine, in a dictionary format parameterName:value
        """
        #if we are outside Django, we need to define this here, not in the general settings.py as in a Django app.
        if (not (hasattr(settings, 'DATABASE_NAME'))):
            settings.DATABASES['default']['NAME'] = os.path.join(
                APP_DIR, 'puppy/interface/interface.db')
        else:
            settings.DATABASE_NAME = 'puppy/interface/interface.db'

        #print settings.DATABASES

        # create a ServiceManager
        self.sm = ServiceManager(cf)

        # create SearchServices
        self.search_service = SearchService(self.sm, name)

        # Add SearchServices to ServiceManager
        self.sm.add_search_service(self.search_service)

        # searching search engine in the database
        #cur.execute("SELECT configuration_searchengine.searchEngine FROM configuration_searchengineused,configuration_searchengine  WHERE searchEngine_id = configuration_searchengine.id")

        entries = SearchEngineUsed.objects.all()

        entry = entries[0]  #we only take the first search engine

        print entry.searchEngine.searchEngine

        try:
            ptr_func = globals()[entry.searchEngine.searchEngine]
        except:
            print "Error: " + str(entry) + " not found"

        #cur.execute("SELECT key, value FROM configuration_parameters WHERE searchEngineUsed_id=1")

        for parameter in ParameterS.objects.filter(searchEngineUsed=entry):
            print parameter
            dictParS[parameter.key] = parameter.value

        self.search_service.search_engine = ptr_func(self.search_service,
                                                     **dictParS)

        #searching the QueryFilters
        #cur.execute("SELECT numOrder, configuration_queryfilter.queryFilter, configuration_queryfilterorder.id  FROM configuration_queryfilterorder,configuration_queryfilter  WHERE queryFilter_id = configuration_queryfilter.id")

        entries = QueryFilterOrder.objects.all()

        for entry in entries:
            try:
                ptr_func = globals()[entry.queryFilter.queryFilter]
            except:
                print "Error: " + str(entry) + " not found"
                continue

            dictPar = {}

            for parameter in ParameterQ.objects.filter(queryFilterOrder=entry):
                print parameter

                dictPar[parameter.key] = parameter.value

            print "Query filter " + str(
                entry.numOrder
            ) + " " + entry.queryFilter.queryFilter + " " + " " + str(dictPar)

            if (issubclass(ptr_func, QueryModifier)):
                self.search_service.add_query_modifier(
                    ptr_func(order=entry.numOrder, **dictPar))
                print "QueryModifier"
            if (issubclass(ptr_func, QueryFilter)):
                self.search_service.add_query_filter(
                    ptr_func(order=entry.numOrder, **dictPar))
                print "QueryFilter"
##
##
##        cur.execute("SELECT numOrder, configuration_resultfilter.resultFilter, configuration_resultfilterorder.id FROM configuration_resultfilterorder,configuration_resultfilter  WHERE resultFilter_id = configuration_resultfilter.id")
##
        entries = ResultFilterOrder.objects.all()

        for entry in entries:
            try:
                ptr_func = globals()[entry.resultFilter.resultFilter]
            except:
                print "Error: " + str(entry) + " not found"
                continue

            dictPar = {}

            for parameter in ParameterR.objects.filter(
                    resultFilterOrder=entry):
                print parameter

                dictPar[parameter.key] = parameter.value

            print "Result filter " + str(
                entry.numOrder
            ) + " " + entry.resultFilter.resultFilter + " " + " " + str(
                dictPar)

            if (issubclass(ptr_func, ResultModifier)):
                self.search_service.add_result_modifier(
                    ptr_func(order=entry.numOrder, **dictPar))
                print "ResultModifier"
            if (issubclass(ptr_func, ResultFilter)):
                self.search_service.add_result_filter(
                    ptr_func(order=entry.numOrder, **dictPar))
                print "ResultFilter"
    def __init__(self,con,cf,name=""):
    
        cur = con.cursor()

        # create a ServiceManager
        self.sm = ServiceManager(cf)

        # create SearchServices
        self.search_service = SearchService(self.sm, name)


        # Add SearchServices to ServiceManager
        self.sm.add_search_service(self.search_service)

        # searching search engine in the database
        cur.execute("SELECT configuration_searchengine.searchEngine FROM configuration_searchengineused,configuration_searchengine  WHERE searchEngine_id = configuration_searchengine.id")

        entries = cur.fetchall()

        entry = entries[0]  #we only take the first search engine

        print "Search engine "+entry[0]

        cur.execute("SELECT key, value FROM configuration_parameters WHERE searchEngineUsed_id=1")

        parameters = cur.fetchall()

        dictPar = {}

        for parameter in parameters:
            dicPar[parameter[0]] = parameter[1]
    
        # Assign SearchEngine to SearchServices
        self.search_service.search_engine = globals()[entry[0]](self.search_service, **dictPar)


        #searching the QueryFilters
        cur.execute("SELECT numOrder, configuration_queryfilter.queryFilter, configuration_queryfilterorder.id  FROM configuration_queryfilterorder,configuration_queryfilter  WHERE queryFilter_id = configuration_queryfilter.id")

        entries = cur.fetchall()

        for entry in entries:
            try:
                ptr_func = globals()[entry[1]]
            except:
                print "Error: "+entry[i]+" not found"
                continue;
            
            cur.execute("SELECT key, value FROM configuration_parameterq WHERE queryFilterOrder_id="+str(entry[2]))

            parameters = cur.fetchall()

            dictPar = {}

            for parameter in parameters:
                dictPar[parameter[0]] = parameter[1]
                
            print "Query filter "+str(entry[0])+" "+entry[1]+" "+" "+str(dictPar)


            if(issubclass(ptr_func,QueryModifier)):
                self.search_service.add_query_modifier(globals()[entry[1]](**dictPar))
                print "QueryModifier"
            if(issubclass(ptr_func,QueryFilter)):
                self.search_service.add_query_filter (globals()[entry[1]](**dictPar))
                print "QueryFilter"
        

        cur.execute("SELECT numOrder, configuration_resultfilter.resultFilter, configuration_resultfilterorder.id FROM configuration_resultfilterorder,configuration_resultfilter  WHERE resultFilter_id = configuration_resultfilter.id")

        entries = cur.fetchall()

        for entry in entries:
            try:
                ptr_func = globals()[entry[1]]
            except:
                print "Error: "+entry[i]+" not found"
                continue;
                        
            cur.execute("SELECT key, value FROM configuration_parameterr WHERE resultFilterOrder_id="+str(entry[2]))

            parameters = cur.fetchall()

            dictPar = {}

            for parameter in parameters:
                dictPar[parameter[0]] = parameter[1]       
        
            print "Result filter "+str(entry[0])+" "+entry[1]+" "+str(ptr_func.__bases__)+" "+str(dictPar)

            if(issubclass(ptr_func,ResultModifier)):
                self.search_service.add_result_modifier(ptr_func(**dictPar))
                print "ResultModifier"
            if(issubclass(ptr_func,ResultFilter)):
                self.search_service.add_result_filter (ptr_func())
                print "ResultFilter"    
Exemple #7
0
            #print "page len" + str(pagelen)
            results = []
            reponse = []
            with self.docIndex.searcher() as searcher:
                results = searcher.search_page( query_terms, page, pagelen=pagelen )
  #             results = searcher.search( query_terms )

                results.fragmenter = highlight.ContextFragmenter(maxchars=300, surround=300)
                results.formatter = highlight.HtmlFormatter()
                results.fragmenter.charlimit = 100000

                print "WhooshTRECNewsEngine found: " + str(len(results)) + " results"
                print  "Page %d of %d - PageLength of %d" % (results.pagenum, results.pagecount, results.pagelen)
                response = parse_whoosh_trec('WhooshTRECNewsEngine', query.search_terms, results)
            return response
        except:
            print "Error in Search Service: Whoosh TREC News search failed"



config = { "log_dir": "treconomics/query_logs",  }

# create a ServiceManager
service = ServiceManager(config)

# create a SearchService, choose search engine and enable query logging
trec_search_service = SearchService(service, "trec_search")

trec_search_service.search_engine = WhooshTRECNewsEngine(trec_search_service, my_whoosh_doc_index_dir)
# add SearchService to ServiceManager
service.add_search_service(trec_search_service)
Exemple #8
0
                results.fragmenter = highlight.ContextFragmenter(maxchars=300,
                                                                 surround=300)
                results.formatter = highlight.HtmlFormatter()
                results.fragmenter.charlimit = 100000

                print "WhooshTRECNewsEngine found: " + str(
                    len(results)) + " results"
                print "Page %d of %d - PageLength of %d" % (
                    results.pagenum, results.pagecount, results.pagelen)
                response = parse_whoosh_trec('WhooshTRECNewsEngine',
                                             query.search_terms, results)
            return response
        except:
            print "Error in Search Service: Whoosh TREC News search failed"


config = {
    "log_dir": "treconomics/query_logs",
}

# create a ServiceManager
service = ServiceManager(config)

# create a SearchService, choose search engine and enable query logging
trec_search_service = SearchService(service, "trec_search")

trec_search_service.search_engine = WhooshTRECNewsEngine(
    trec_search_service, my_whoosh_doc_index_dir)
# add SearchService to ServiceManager
service.add_search_service(trec_search_service)