def __init__(self, api_key='', username='', google_api_key='', **kwargs): """ Neutrinogeoaddress engine constructor. Kwargs: api_key (str): string representation of api key needed to access the search api username (str): string representing the username associated with the service google_api_key(str): string representation of a Google API key which has map API permissions, this is used for generating the iframe url for embedded maps. See Engine. Raises: EngineException Usage: engine = EngineFactory('Neutrinogeoaddress api_key='etc123456etc123456etc123456', username='******', google_api_key='12313414412') """ Engine.__init__(self, **kwargs) self.api_key = api_key self.username = username self.google_api_key = google_api_key self.country_code = kwargs.get('country_code', 'GB') # Set country code to GB if not found self.language_code = kwargs.get('language_code', '') if not self.api_key: raise EngineAPIKeyException(self.name, "'api_key=' keyword argument not specified") elif not self.username: raise EngineAPIKeyException(self.name, "'username=' keyword argument not specified")
def __init__(self, api_key='', cx='', **kwargs): """ Google engine constructor. Kwargs: api_key (str): string representation of api key needed to access google custom search api cx (str): string representation of the cx parameter needed to access google custom search api See Engine. Raises: EngineException Usage: engine = EngineFactory('GoogleCSE', api_key='etc123456etc123456etc123456', cx='abc123abc123abc123') """ Engine.__init__(self, **kwargs) self.api_key = api_key self.cx = cx if not self.api_key: raise EngineAPIKeyException(self.name, "'api_key=' keyword argument not specified") if not self.cx: raise EngineAPIKeyException(self.name, "'cx=' keyword argument not specified") self.default_result_type = kwargs.get('default_result_type', DEFAULT_RESULT_TYPE) # Catch empty strings and such. if not self.default_result_type: self.default_result_type = DEFAULT_RESULT_TYPE
def __init__(self, api_key='', **kwargs): """ Googleplus engine constructor. Kwargs: api_key (str): string representation of api key needed to access bing search api default_result_type (str): Optionally provide a default result type. See Engine. Raises: EngineException Usage: engine = EngineFactory('Googleplus api_key='etc123456etc123456etc123456') """ Engine.__init__(self, **kwargs) self.api_key = api_key if not self.api_key: raise EngineAPIKeyException(self.name, "'api_key=' keyword argument not specified") self.default_result_type = kwargs.get('default_result_type', DEFAULT_RESULT_TYPE) # Catch empty strings and such. if not self.default_result_type: self.default_result_type = DEFAULT_RESULT_TYPE
def __init__(self, whoosh_index_dir='', stopwords_file='', model=1, implicit_or=False, **kwargs): """ Whoosh engine constructor. Kwargs: See Engine. Usage: See EngineFactory. """ Engine.__init__(self, **kwargs) self.whoosh_index_dir = whoosh_index_dir if not self.whoosh_index_dir: raise EngineConnectionException(self.name, "'whoosh_index_dir=' keyword argument not specified") self.stopwords_file = stopwords_file if self.stopwords_file: self.stopwords = ListReader(self.stopwords_file) # Open the stopwords file, read into a ListReader self.snippet_size = 3 self.implicit_or=implicit_or try: # This creates a static docIndex for ALL instance of WhooshTrec. # This will not work if you want indexes from multiple sources. # As this currently is not the case, this is a suitable fix. if not hasattr(Whooshtrec, 'docIndex'): Whooshtrec.docIndex = open_dir(whoosh_index_dir) log.debug("Whoosh Document index open: {0}".format(whoosh_index_dir)) log.debug("Documents in index: {0}".format( self.docIndex.doc_count())) self._field = 'content' if 'alltext' in self.docIndex.schema: self._field = 'alltext' log.debug("Using all text field") if self.implicit_or: self.parser = QueryParser(self._field, self.docIndex.schema, group=OrGroup) log.debug("OR Query parser created") else: self.parser = QueryParser(self._field, self.docIndex.schema, group=AndGroup) log.debug("AND Query parser created") self.analyzer = self.docIndex.schema[self.parser.fieldname].analyzer self.set_fragmenter() #self.formatter = highlight.HtmlFormatter() self.set_model(model) except: msg = "Could not open Whoosh index at: " + whoosh_index_dir raise EngineConnectionException(self.name, msg)
def __init__(self, **kwargs): """ Wikipedia engine constructor. Kwargs: See Engine. Usage: See EngineFactory. """ Engine.__init__(self, **kwargs)
def __init__(self, **kwargs): """ GOV.uk engine constructor. Kwargs: See Engine. Usage: See EngineFactory. """ Engine.__init__(self, **kwargs)
def __init__(self, whoosh_index_dir="", use_cache=True, cache_host="localhost", cache_port=6379, **kwargs): """ Constructor for the engine. """ Engine.__init__(self, **kwargs) self.whoosh_index_dir = whoosh_index_dir if not self.whoosh_index_dir: raise EngineConnectionException(self.name, "'whoosh_index_dir=' keyword argument not specified") # Only put PL2 in for now (for more, add the model parameter to the constructor to specify!) self.scoring_model_identifier = 1 self.scoring_model = scoring.PL2(c=10.0) try: self.doc_index = open_dir(self.whoosh_index_dir) self.reader = self.doc_index.reader() self.parser = QueryParser("content", self.doc_index.schema) # By default, we use AND grouping. # Use the grouping parameter and specify whoosh.qparser.OrGroup, etc... # Objects required for document snippet generation self.analyzer = self.doc_index.schema[self.parser.fieldname].analyzer self.fragmenter = ContextFragmenter(maxchars=200, surround=40) self.formatter = HtmlFormatter() except EmptyIndexError: message = "Could not open Whoosh index at '{0}'".format(self.whoosh_index_dir) raise EngineConnectionException(self.name, message) except OSError: message = "Could not open Whoosh index at '{0}' - directory does not exist".format(self.whoosh_index_dir) raise EngineConnectionException(self.name, message) self.use_cache = use_cache if self.use_cache: self.cache = RedisConn(host=cache_host, port=cache_port) self.cache.connect() self.page_cache_forward_look = 40 # How many additional pages to cache when required. self.page_cache_when = 4 # When the user is x pages away from the end of the page cache, cache more pages. self.page_cache_controller = PageCacheController( cache_host=self.cache.host, cache_port=self.cache.port, whoosh_index=self.doc_index, scoring_model_identifier=self.scoring_model_identifier, parser=self.parser, analyzer=self.analyzer, fragmenter=self.fragmenter, formatter=self.formatter, cache_forward_look=self.page_cache_forward_look, )
def __init__(self, whoosh_index_dir='', model=1, implicit_or=False, use_cache=False, interleave=False, interleave_continuous=False, **kwargs): """ Whoosh engine constructor. Kwargs: See Engine. Usage: See EngineFactory. """ Engine.__init__(self, **kwargs) self.whoosh_index_dir = whoosh_index_dir if not self.whoosh_index_dir: raise EngineConnectionException(self.name, "'whoosh_index_dir=' keyword argument not specified") self.use_cache = use_cache self.cache = redis.StrictRedis(host='localhost', port=6379, db=0) self.interleave = interleave # Should we interleave results, and how often? self.interleave_continuous = interleave_continuous # Do we continue to interleave after the initial loop? self.implicit_or = implicit_or # Do we implicitly join terms together with ORs? self.scoring_model = scoring.BM25F(B=0.25) # Use the BM25F scoring module (B=0.75 is default for Whoosh) if model == 0: self.scoring_model = scoring.TF_IDF() # Use the TFIDF scoring module if model == 2: self.scoring_model = scoring.PL2() # Use PL2 with default values if model == 3: self.scoring_model = scoring.BM25F(B=1) # BM11 try: #self.docIndex = open_dir(whoosh_index_dir) # This creates a static docIndex for ALL instance of WhooshTrecNews. # This will not work if you want indexes from multiple sources. # As this currently is not the case, this is a suitable fix. if not hasattr(WhooshTrecNews, 'docIndex'): WhooshTrecNews.docIndex = open_dir(whoosh_index_dir) print "Whoosh Document index open: ", whoosh_index_dir print "Documents in index: ", self.docIndex.doc_count() self.parser = QueryParser("content", self.docIndex.schema) except: msg = "Could not open Whoosh index at: " + whoosh_index_dir raise EngineConnectionException(self.name, msg)
def __init__(self, **kwargs): """ Twitter engine constructor. Kwargs: See Engine. Raises: EngineException Usage: engine = EngineFactory('twitter') """ Engine.__init__(self, **kwargs) if not CONSUMER_KEY or not CONSUMER_SECRET or not ACCESS_TOKEN_KEY or not ACCESS_TOKEN_SECRET: raise EngineAPIKeyException(self.name, 'OAuth details not supplied')
def __init__(self, api_key='', **kwargs): """ Pipl engine constructor. Kwargs: See Engine. Raises: EngineException Usage: engine = EngineFactory('Pipl api_key='etc123456etc123456etc123456') """ Engine.__init__(self, **kwargs) self.api_key = api_key if not self.api_key: raise EngineAPIKeyException(self.name, "'api_key=' keyword argument not specified")
def __init__(self, api_key='', **kwargs): """ Bing engine constructor. Kwargs: api_key (str): string representation of api key needed to access bing search api See Engine. Raises: EngineException Usage: engine = EngineFactory('bing', api_key='etc123456etc123456etc123456') """ Engine.__init__(self, **kwargs) self.api_key = api_key if not self.api_key: raise EngineAPIKeyException(self.name, "'api_key=' keyword argument not specified")
def __init__(self, api_key='', **kwargs): """ Facebook engine constructor. Kwargs: See Engine. Raises: EngineException Usage: engine = EngineFactory('Facebook api_key='etc123456etc123456etc123456') """ Engine.__init__(self, **kwargs) self.api_key = api_key if not self.api_key: raise EngineAPIKeyException( self.name, "'api_key=' keyword argument not specified")
def __init__(self, api_key='', **kwargs): """ Bing engine constructor. Kwargs: api_key (str): string representation of api key needed to access bing search api See Engine. Raises: EngineException Usage: engine = EngineFactory('bing', api_key='etc123456etc123456etc123456') """ Engine.__init__(self, **kwargs) self.api_key = api_key if not self.api_key: raise EngineAPIKeyException( self.name, "'api_key=' keyword argument not specified")
def __init__(self, whoosh_index_dir='', stopwords_file='', cache_host='localhost', cache_port=6379, **kwargs): Engine.__init__(self, **kwargs) self.whoosh_index_dir = whoosh_index_dir if not self.whoosh_index_dir: raise EngineConnectionException(self.name, "'whoosh_index_dir=' keyword argument not specified") self.stopwords_file = stopwords_file if self.stopwords_file: self.stopwords = ListReader(self.stopwords_file) # Open the stopwords file, read into a ListReader else: raise EngineConnectionException(self.name, "'stopwords_file=' keyword argument not specified") self.scoring_model_identifier = 1 self.scoring_model = scoring.PL2(c=10.0) self.__verbose = False try: self.doc_index = open_dir(self.whoosh_index_dir) self.reader = self.doc_index.reader() self.parser = QueryParser('content', self.doc_index.schema) # By default, we use AND grouping. # Use the grouping parameter and specify whoosh.qparser.OrGroup, etc... # Objects required for document snippet generation self.analyzer = self.doc_index.schema[self.parser.fieldname].analyzer self.fragmenter = ContextFragmenter(maxchars=200, surround=40) self.formatter = HtmlFormatter() except EmptyIndexError: message = "Could not open Whoosh index at '{0}'".format(self.whoosh_index_dir) raise EngineConnectionException(self.name, message) except OSError: message = "Could not open Whoosh index at '{0}' - directory does not exist".format(self.whoosh_index_dir) raise EngineConnectionException(self.name, message) # Attempt to connect to the specified Redis cache. self.cache = RedisConn(host=cache_host, port=cache_port) self.cache.connect()
def __init__(self, **kwargs): """ Twitter engine constructor. Kwargs: default_result_type (str): Optionally provide a default result type. See Engine. Raises: EngineException Usage: engine = EngineFactory('twitter') """ Engine.__init__(self, **kwargs) if not CONSUMER_KEY or not CONSUMER_SECRET or not ACCESS_TOKEN_KEY or not ACCESS_TOKEN_SECRET: raise EngineAPIKeyException(self.name, 'OAuth details not supplied') self.default_result_type = kwargs.get('default_result_type', DEFAULT_RESULT_TYPE) # Catch empty strings and such. if not self.default_result_type: self.default_result_type = DEFAULT_RESULT_TYPE
def __init__(self, whoosh_index_dir='', use_cache=True, cache_host='localhost', cache_port=6379, **kwargs): """ Constructor for the engine. """ Engine.__init__(self, **kwargs) self.whoosh_index_dir = whoosh_index_dir if not self.whoosh_index_dir: raise EngineConnectionException( self.name, "'whoosh_index_dir=' keyword argument not specified") # Only put PL2 in for now (for more, add the model parameter to the constructor to specify!) self.scoring_model_identifier = 1 self.scoring_model = scoring.PL2(c=10.0) try: self.doc_index = open_dir(self.whoosh_index_dir) self.reader = self.doc_index.reader() self.parser = QueryParser( 'content', self.doc_index.schema) # By default, we use AND grouping. # Use the grouping parameter and specify whoosh.qparser.OrGroup, etc... # Objects required for document snippet generation self.analyzer = self.doc_index.schema[ self.parser.fieldname].analyzer self.fragmenter = ContextFragmenter(maxchars=200, surround=40) self.formatter = HtmlFormatter() except EmptyIndexError: message = "Could not open Whoosh index at '{0}'".format( self.whoosh_index_dir) raise EngineConnectionException(self.name, message) except OSError: message = "Could not open Whoosh index at '{0}' - directory does not exist".format( self.whoosh_index_dir) raise EngineConnectionException(self.name, message) self.use_cache = use_cache if self.use_cache: self.cache = RedisConn(host=cache_host, port=cache_port) self.cache.connect() self.page_cache_forward_look = 40 # How many additional pages to cache when required. self.page_cache_when = 4 # When the user is x pages away from the end of the page cache, cache more pages. self.page_cache_controller = PageCacheController( cache_host=self.cache.host, cache_port=self.cache.port, whoosh_index=self.doc_index, scoring_model_identifier=self.scoring_model_identifier, parser=self.parser, analyzer=self.analyzer, fragmenter=self.fragmenter, formatter=self.formatter, cache_forward_look=self.page_cache_forward_look)
def __init__(self, **kwargs): Engine.__init__(self, **kwargs)