Beispiel #1
0
    def __init__(self, api_key='', username='', google_api_key='', **kwargs):
        """
        Neutrinogeoaddress engine constructor.

        Kwargs:
            api_key (str): string representation of api key needed to access the search api
            username (str): string representing the username associated with the service
            google_api_key(str): string representation of a Google API key which has map API permissions, this is used
                for generating the iframe url for embedded maps.
            See Engine.

        Raises:
            EngineException

        Usage:
            engine = EngineFactory('Neutrinogeoaddress api_key='etc123456etc123456etc123456', username='******',
                                    google_api_key='12313414412')

        """
        Engine.__init__(self, **kwargs)
        self.api_key = api_key
        self.username = username
        self.google_api_key = google_api_key

        self.country_code = kwargs.get('country_code', 'GB')  # Set country code to GB if not found
        self.language_code = kwargs.get('language_code', '')

        if not self.api_key:
            raise EngineAPIKeyException(self.name, "'api_key=' keyword argument not specified")
        elif not self.username:
            raise EngineAPIKeyException(self.name, "'username=' keyword argument not specified")
Beispiel #2
0
    def __init__(self, api_key='', cx='', **kwargs):
        """
        Google engine constructor.

        Kwargs:
            api_key (str): string representation of api key needed to access google custom search api
            cx (str): string representation of the cx parameter needed to access google custom search api
            See Engine.

        Raises:
            EngineException

        Usage:
            engine = EngineFactory('GoogleCSE', api_key='etc123456etc123456etc123456', cx='abc123abc123abc123')

        """
        Engine.__init__(self, **kwargs)
        self.api_key = api_key
        self.cx = cx

        if not self.api_key:
            raise EngineAPIKeyException(self.name, "'api_key=' keyword argument not specified")

        if not self.cx:
            raise EngineAPIKeyException(self.name, "'cx=' keyword argument not specified")

        self.default_result_type = kwargs.get('default_result_type', DEFAULT_RESULT_TYPE)
        # Catch empty strings and such.
        if not self.default_result_type:
            self.default_result_type = DEFAULT_RESULT_TYPE
Beispiel #3
0
    def __init__(self, api_key='', **kwargs):
        """
        Googleplus engine constructor.

        Kwargs:
            api_key (str): string representation of api key needed to access bing search api
            default_result_type (str): Optionally provide a default result type.
            See Engine.

        Raises:
            EngineException

        Usage:
            engine = EngineFactory('Googleplus api_key='etc123456etc123456etc123456')

        """
        Engine.__init__(self, **kwargs)
        self.api_key = api_key

        if not self.api_key:
            raise EngineAPIKeyException(self.name, "'api_key=' keyword argument not specified")

        self.default_result_type = kwargs.get('default_result_type', DEFAULT_RESULT_TYPE)
        # Catch empty strings and such.
        if not self.default_result_type:
            self.default_result_type = DEFAULT_RESULT_TYPE
Beispiel #4
0
    def __init__(self, whoosh_index_dir='', stopwords_file='', model=1, implicit_or=False, **kwargs):
        """
        Whoosh engine constructor.

        Kwargs:
            See Engine.

        Usage:
            See EngineFactory.

        """
        Engine.__init__(self, **kwargs)
        self.whoosh_index_dir = whoosh_index_dir
        if not self.whoosh_index_dir:
            raise EngineConnectionException(self.name, "'whoosh_index_dir=' keyword argument not specified")


        self.stopwords_file = stopwords_file
        if self.stopwords_file:
            self.stopwords = ListReader(self.stopwords_file)  # Open the stopwords file, read into a ListReader

        self.snippet_size = 3

        self.implicit_or=implicit_or

        try:
            # This creates a static docIndex for ALL instance of WhooshTrec.
            # This will not work if you want indexes from multiple sources.
            # As this currently is not the case, this is a suitable fix.
            if not hasattr(Whooshtrec, 'docIndex'):
                Whooshtrec.docIndex = open_dir(whoosh_index_dir)

            log.debug("Whoosh Document index open: {0}".format(whoosh_index_dir))
            log.debug("Documents in index: {0}".format( self.docIndex.doc_count()))


            self._field = 'content'
            if 'alltext' in self.docIndex.schema:
                self._field = 'alltext'
                log.debug("Using all text field")

            if self.implicit_or:
                self.parser = QueryParser(self._field, self.docIndex.schema, group=OrGroup)
                log.debug("OR Query parser created")
            else:
                self.parser = QueryParser(self._field, self.docIndex.schema, group=AndGroup)
                log.debug("AND Query parser created")


            self.analyzer = self.docIndex.schema[self.parser.fieldname].analyzer

            self.set_fragmenter()

            #self.formatter = highlight.HtmlFormatter()
            self.set_model(model)

        except:
            msg = "Could not open Whoosh index at: " + whoosh_index_dir
            raise EngineConnectionException(self.name, msg)
Beispiel #5
0
    def __init__(self, **kwargs):
        """
        Wikipedia engine constructor.

        Kwargs:
            See Engine.

        Usage:
            See EngineFactory.

        """
        Engine.__init__(self, **kwargs)
Beispiel #6
0
    def __init__(self, **kwargs):
        """
        Wikipedia engine constructor.

        Kwargs:
            See Engine.

        Usage:
            See EngineFactory.

        """
        Engine.__init__(self, **kwargs)
Beispiel #7
0
    def __init__(self, **kwargs):
        """
        GOV.uk engine constructor.

        Kwargs:
            See Engine.

        Usage:
            See EngineFactory.

        """
        Engine.__init__(self, **kwargs)
    def __init__(self, whoosh_index_dir="", use_cache=True, cache_host="localhost", cache_port=6379, **kwargs):
        """
        Constructor for the engine.
        """
        Engine.__init__(self, **kwargs)

        self.whoosh_index_dir = whoosh_index_dir
        if not self.whoosh_index_dir:
            raise EngineConnectionException(self.name, "'whoosh_index_dir=' keyword argument not specified")

        #  Only put PL2 in for now (for more, add the model parameter to the constructor to specify!)
        self.scoring_model_identifier = 1
        self.scoring_model = scoring.PL2(c=10.0)

        try:
            self.doc_index = open_dir(self.whoosh_index_dir)
            self.reader = self.doc_index.reader()

            self.parser = QueryParser("content", self.doc_index.schema)  # By default, we use AND grouping.
            # Use the grouping parameter and specify whoosh.qparser.OrGroup, etc...

            #  Objects required for document snippet generation
            self.analyzer = self.doc_index.schema[self.parser.fieldname].analyzer
            self.fragmenter = ContextFragmenter(maxchars=200, surround=40)
            self.formatter = HtmlFormatter()
        except EmptyIndexError:
            message = "Could not open Whoosh index at '{0}'".format(self.whoosh_index_dir)
            raise EngineConnectionException(self.name, message)
        except OSError:
            message = "Could not open Whoosh index at '{0}' - directory does not exist".format(self.whoosh_index_dir)
            raise EngineConnectionException(self.name, message)

        self.use_cache = use_cache
        if self.use_cache:
            self.cache = RedisConn(host=cache_host, port=cache_port)
            self.cache.connect()

            self.page_cache_forward_look = 40  # How many additional pages to cache when required.
            self.page_cache_when = 4  # When the user is x pages away from the end of the page cache, cache more pages.

            self.page_cache_controller = PageCacheController(
                cache_host=self.cache.host,
                cache_port=self.cache.port,
                whoosh_index=self.doc_index,
                scoring_model_identifier=self.scoring_model_identifier,
                parser=self.parser,
                analyzer=self.analyzer,
                fragmenter=self.fragmenter,
                formatter=self.formatter,
                cache_forward_look=self.page_cache_forward_look,
            )
Beispiel #9
0
    def __init__(self, whoosh_index_dir='', model=1, implicit_or=False, use_cache=False, interleave=False, interleave_continuous=False, **kwargs):
        """
        Whoosh engine constructor.

        Kwargs:
            See Engine.

        Usage:
            See EngineFactory.

        """
        Engine.__init__(self, **kwargs)
        self.whoosh_index_dir = whoosh_index_dir
        if not self.whoosh_index_dir:
            raise EngineConnectionException(self.name, "'whoosh_index_dir=' keyword argument not specified")

        self.use_cache = use_cache
        self.cache = redis.StrictRedis(host='localhost', port=6379, db=0)
        self.interleave = interleave  # Should we interleave results, and how often?
        self.interleave_continuous = interleave_continuous  # Do we continue to interleave after the initial loop?
        self.implicit_or = implicit_or  # Do we implicitly join terms together with ORs?
        self.scoring_model = scoring.BM25F(B=0.25)  # Use the BM25F scoring module (B=0.75 is default for Whoosh)

        if model == 0:
            self.scoring_model = scoring.TF_IDF()  # Use the TFIDF scoring module
        if model == 2:
            self.scoring_model = scoring.PL2()  # Use PL2 with default values
        if model == 3:
            self.scoring_model = scoring.BM25F(B=1)  # BM11

        try:
            #self.docIndex = open_dir(whoosh_index_dir)

            # This creates a static docIndex for ALL instance of WhooshTrecNews.
            # This will not work if you want indexes from multiple sources.
            # As this currently is not the case, this is a suitable fix.
            if not hasattr(WhooshTrecNews, 'docIndex'):
                WhooshTrecNews.docIndex = open_dir(whoosh_index_dir)

            print "Whoosh Document index open: ", whoosh_index_dir
            print "Documents in index: ", self.docIndex.doc_count()
            self.parser = QueryParser("content", self.docIndex.schema)
        except:
            msg = "Could not open Whoosh index at: " + whoosh_index_dir
            raise EngineConnectionException(self.name, msg)
Beispiel #10
0
    def __init__(self, **kwargs):
        """
        Twitter engine constructor.

        Kwargs:
            See Engine.

        Raises:
            EngineException

        Usage:
            engine = EngineFactory('twitter')

        """
        Engine.__init__(self, **kwargs)

        if not CONSUMER_KEY or not CONSUMER_SECRET or not ACCESS_TOKEN_KEY or not ACCESS_TOKEN_SECRET:
            raise EngineAPIKeyException(self.name, 'OAuth details not supplied')
Beispiel #11
0
    def __init__(self, api_key='', **kwargs):
        """
        Pipl engine constructor.

        Kwargs:
            See Engine.

        Raises:
            EngineException

        Usage:
            engine = EngineFactory('Pipl api_key='etc123456etc123456etc123456')

        """
        Engine.__init__(self, **kwargs)
        self.api_key = api_key

        if not self.api_key:
            raise EngineAPIKeyException(self.name, "'api_key=' keyword argument not specified")
Beispiel #12
0
    def __init__(self, api_key='', **kwargs):
        """
        Bing engine constructor.

        Kwargs:
            api_key (str): string representation of api key needed to access bing search api
            See Engine.

        Raises:
            EngineException

        Usage:
            engine = EngineFactory('bing', api_key='etc123456etc123456etc123456')

        """
        Engine.__init__(self, **kwargs)
        self.api_key = api_key

        if not self.api_key:
            raise EngineAPIKeyException(self.name, "'api_key=' keyword argument not specified")
Beispiel #13
0
    def __init__(self, api_key='', **kwargs):
        """
        Facebook engine constructor.

        Kwargs:
            See Engine.

        Raises:
            EngineException

        Usage:
            engine = EngineFactory('Facebook api_key='etc123456etc123456etc123456')

        """
        Engine.__init__(self, **kwargs)
        self.api_key = api_key

        if not self.api_key:
            raise EngineAPIKeyException(
                self.name, "'api_key=' keyword argument not specified")
Beispiel #14
0
    def __init__(self, api_key='', **kwargs):
        """
        Bing engine constructor.

        Kwargs:
            api_key (str): string representation of api key needed to access bing search api
            See Engine.

        Raises:
            EngineException

        Usage:
            engine = EngineFactory('bing', api_key='etc123456etc123456etc123456')

        """
        Engine.__init__(self, **kwargs)
        self.api_key = api_key

        if not self.api_key:
            raise EngineAPIKeyException(
                self.name, "'api_key=' keyword argument not specified")
Beispiel #15
0
    def __init__(self, whoosh_index_dir='', stopwords_file='', cache_host='localhost', cache_port=6379, **kwargs):
        Engine.__init__(self, **kwargs)

        self.whoosh_index_dir = whoosh_index_dir
        if not self.whoosh_index_dir:
            raise EngineConnectionException(self.name, "'whoosh_index_dir=' keyword argument not specified")

        self.stopwords_file = stopwords_file
        if self.stopwords_file:
            self.stopwords = ListReader(self.stopwords_file)  # Open the stopwords file, read into a ListReader
        else:
            raise EngineConnectionException(self.name, "'stopwords_file=' keyword argument not specified")

        self.scoring_model_identifier = 1
        self.scoring_model = scoring.PL2(c=10.0)
        
        self.__verbose = False

        try:
            self.doc_index = open_dir(self.whoosh_index_dir)
            self.reader = self.doc_index.reader()
            self.parser = QueryParser('content', self.doc_index.schema)  # By default, we use AND grouping.
                                                                         # Use the grouping parameter and specify whoosh.qparser.OrGroup, etc...

            #  Objects required for document snippet generation
            self.analyzer = self.doc_index.schema[self.parser.fieldname].analyzer
            self.fragmenter = ContextFragmenter(maxchars=200, surround=40)
            self.formatter = HtmlFormatter()
        except EmptyIndexError:
            message = "Could not open Whoosh index at '{0}'".format(self.whoosh_index_dir)
            raise EngineConnectionException(self.name, message)
        except OSError:
            message = "Could not open Whoosh index at '{0}' - directory does not exist".format(self.whoosh_index_dir)
            raise EngineConnectionException(self.name, message)

        # Attempt to connect to the specified Redis cache.
        self.cache = RedisConn(host=cache_host, port=cache_port)
        self.cache.connect()
Beispiel #16
0
    def __init__(self, **kwargs):
        """
        Twitter engine constructor.

        Kwargs:
            default_result_type (str): Optionally provide a default result type.
            See Engine.

        Raises:
            EngineException

        Usage:
            engine = EngineFactory('twitter')

        """
        Engine.__init__(self, **kwargs)

        if not CONSUMER_KEY or not CONSUMER_SECRET or not ACCESS_TOKEN_KEY or not ACCESS_TOKEN_SECRET:
            raise EngineAPIKeyException(self.name, 'OAuth details not supplied')

        self.default_result_type = kwargs.get('default_result_type', DEFAULT_RESULT_TYPE)
        # Catch empty strings and such.
        if not self.default_result_type:
            self.default_result_type = DEFAULT_RESULT_TYPE
Beispiel #17
0
    def __init__(self,
                 whoosh_index_dir='',
                 use_cache=True,
                 cache_host='localhost',
                 cache_port=6379,
                 **kwargs):
        """
        Constructor for the engine.
        """
        Engine.__init__(self, **kwargs)

        self.whoosh_index_dir = whoosh_index_dir
        if not self.whoosh_index_dir:
            raise EngineConnectionException(
                self.name,
                "'whoosh_index_dir=' keyword argument not specified")

        #  Only put PL2 in for now (for more, add the model parameter to the constructor to specify!)
        self.scoring_model_identifier = 1
        self.scoring_model = scoring.PL2(c=10.0)

        try:
            self.doc_index = open_dir(self.whoosh_index_dir)
            self.reader = self.doc_index.reader()
            self.parser = QueryParser(
                'content',
                self.doc_index.schema)  # By default, we use AND grouping.
            # Use the grouping parameter and specify whoosh.qparser.OrGroup, etc...

            #  Objects required for document snippet generation
            self.analyzer = self.doc_index.schema[
                self.parser.fieldname].analyzer
            self.fragmenter = ContextFragmenter(maxchars=200, surround=40)
            self.formatter = HtmlFormatter()
        except EmptyIndexError:
            message = "Could not open Whoosh index at '{0}'".format(
                self.whoosh_index_dir)
            raise EngineConnectionException(self.name, message)
        except OSError:
            message = "Could not open Whoosh index at '{0}' - directory does not exist".format(
                self.whoosh_index_dir)
            raise EngineConnectionException(self.name, message)

        self.use_cache = use_cache
        if self.use_cache:
            self.cache = RedisConn(host=cache_host, port=cache_port)
            self.cache.connect()

            self.page_cache_forward_look = 40  # How many additional pages to cache when required.
            self.page_cache_when = 4  # When the user is x pages away from the end of the page cache, cache more pages.

            self.page_cache_controller = PageCacheController(
                cache_host=self.cache.host,
                cache_port=self.cache.port,
                whoosh_index=self.doc_index,
                scoring_model_identifier=self.scoring_model_identifier,
                parser=self.parser,
                analyzer=self.analyzer,
                fragmenter=self.fragmenter,
                formatter=self.formatter,
                cache_forward_look=self.page_cache_forward_look)
Beispiel #18
0
 def __init__(self, **kwargs):
     Engine.__init__(self, **kwargs)
Beispiel #19
0
 def __init__(self, **kwargs):
     Engine.__init__(self, **kwargs)