Beispiel #1
0
def test_indexer_process(es_client):
    mappings = TypeMappingRegistry()
    mappings.register_type('page', {
        'title': {'type': 'localized'},
    })

    index = "foo_bar-my_schema-en-page"
    indexer = Indexer(
        mappings, Queue(), hostname='foo.bar', es_client=es_client)

    indexer.queue.put({
        'action': 'index',
        'schema': 'my-schema',
        'type_name': 'page',
        'id': 1,
        'language': 'en',
        'properties': {
            'title': 'Go ahead and jump',
            'es_public': True
        }
    })

    assert indexer.process() == 1
    assert indexer.process() == 0
    es_client.indices.refresh(index=index)

    search = es_client.search(index=index)
    assert search['hits']['total'] == 1
    assert search['hits']['hits'][0]['_id'] == '1'
    assert search['hits']['hits'][0]['_source'] == {
        'title': 'Go ahead and jump',
        'es_public': True
    }
    assert search['hits']['hits'][0]['_type'] == 'page'

    # check if the analyzer was applied correctly (stopword removal)
    search = es_client.search(
        index=index, body={'query': {'match': {'title': 'and'}}})

    assert search['hits']['total'] == 0

    search = es_client.search(
        index=index, body={'query': {'match': {'title': 'go jump'}}})

    assert search['hits']['total'] == 1

    # delete the document again
    indexer.queue.put({
        'action': 'delete',
        'schema': 'my-schema',
        'type_name': 'page',
        'id': 1
    })

    assert indexer.process() == 1
    assert indexer.process() == 0
    es_client.indices.refresh(index=index)

    es_client.search(index=index)
    assert search['hits']['total'] == 1
Beispiel #2
0
def test_orm_event_queue_overflow(capturelog):

    capturelog.setLevel(logging.ERROR, logger='onegov.search')

    class Tweet(Searchable):

        def __init__(self, id):
            self.id = id

        @property
        def es_suggestion(self):
            return self.id

        es_id = 'id'
        es_type_name = 'tweet'
        es_language = 'en'
        es_public = True
        es_properties = {}

    mappings = TypeMappingRegistry()
    mappings.register_type('tweet', {})

    translator = ORMEventTranslator(mappings, max_queue_size=3)
    translator.on_insert('foobar', Tweet(1))
    translator.on_update('foobar', Tweet(2))
    translator.on_delete('foobar', Tweet(3))

    assert len(capturelog.records()) == 0

    translator.on_insert('foobar', Tweet(4))

    assert len(capturelog.records()) == 1
    assert capturelog.records()[0].message == \
        'The orm event translator queue is full!'
Beispiel #3
0
def test_elasticsearch_outage(es_client, es_url):
    mappings = TypeMappingRegistry()
    mappings.register_type('page', {
        'title': {'type': 'localized'},
    })

    indexer = Indexer(
        mappings, Queue(), hostname='foo.bar', es_client=es_client)

    indexer.queue.put({
        'action': 'index',
        'schema': 'my-schema',
        'type_name': 'page',
        'id': 1,
        'language': 'en',
        'properties': {
            'title': 'Foo',
            'es_public': True
        }
    })

    indexer.es_client.index = Mock(side_effect=TransportError)

    for i in range(0, 2):
        assert indexer.process() == 0
        assert indexer.queue.empty()
        assert indexer.failed_task is not None

    indexer.queue.put({
        'action': 'index',
        'schema': 'my-schema',
        'type_name': 'page',
        'id': 2,
        'language': 'en',
        'properties': {
            'title': 'Bar',
            'es_public': True
        }
    })

    for i in range(0, 2):
        assert indexer.process() == 0
        assert not indexer.queue.empty()
        assert indexer.failed_task is not None

    indexer.es_client = Elasticsearch(es_url)

    indexer.es_client.indices.refresh(index='_all')
    assert indexer.es_client.search(index='_all')['hits']['total'] == 0

    assert indexer.process() == 2
    assert indexer.failed_task is None

    indexer.es_client.indices.refresh(index='_all')
    assert indexer.es_client.search(index='_all')['hits']['total'] == 2
Beispiel #4
0
def test_orm_event_translator_delete():

    class Page(Searchable):

        def __init__(self, id):
            self.id = id

        es_id = 'id'
        es_type_name = 'page'

    mappings = TypeMappingRegistry()
    mappings.register_type('page', {})

    translator = ORMEventTranslator(mappings)
    translator.on_delete('foobar', Page(123))

    assert translator.queue.get() == {
        'action': 'delete',
        'schema': 'foobar',
        'type_name': 'page',
        'id': 123
    }
    assert translator.queue.empty()
Beispiel #5
0
def test_type_mapping_registry():

    registry = TypeMappingRegistry()
    registry.register_type('page', {
        'title': {'type': 'string'}
    })
    registry.register_type('comment', {
        'comment': {'type': 'string'}
    })

    assert set(t.name for t in registry) == {'page', 'comment'}

    with pytest.raises(AssertionError):
        registry.register_type('page', {})
Beispiel #6
0
    def configure_search(self, **cfg):
        """ Configures the elasticsearch client, leaving it as a property
        on the class::

            app.es_client

        The following configuration options are accepted:

        :enable_elasticsearch:
            If True, elasticsearch is enabled (defaults to True).

        :elasticsearch_hosts:
            A list of elasticsearch clusters, including username, password,
            protocol and port.

            For example: ``https://user:secret@localhost:443``

            By default the client connects to the localhost on port 9200
            (the default), and on port 19200 (the default of boxen).

            At least one host in the list of servers must be up at startup.

        :elasticsearch_may_queue_size:
            The maximum queue size reserved for documents to be indexed. This
            queue is filling up if the elasticsearch cluster cannot be reached.

            Once the queue is full, warnings are emitted.

            Defaults to 10'000

        :elasticsearch_verify_certs:
            If true, the elasticsearch client verifies the certificates of
            the ssl connection. Defaults to true. Do not disable, unless you
            are in testing!

        """

        if not cfg.get('enable_elasticsearch', True):
            self.es_client = None
            return

        hosts = cfg.get('elasticsearch_hosts', (
            'http://localhost:9200',
            'http://localhost:19200'
        ))

        max_queue_size = int(cfg.get('elasticsarch_max_queue_size', '10000'))
        verify_certs = cfg.get('elasticsearch_verify_certs', True)

        if verify_certs:
            self.es_client = Elasticsearch(
                hosts, verify_certs=True, ca_certs=certifi.where(),
                sniff_on_connection_fail=True, timeout=5
            )
        else:
            self.es_client = Elasticsearch(
                hosts, sniff_on_connection_fail=True, timeout=5
            )

        if self.has_database_connection:
            self.es_mappings = TypeMappingRegistry()

            for base in self.session_manager.bases:
                self.es_mappings.register_orm_base(base)

            self.es_orm_events = ORMEventTranslator(
                self.es_mappings,
                max_queue_size=max_queue_size
            )

            self.es_indexer = Indexer(
                self.es_mappings,
                self.es_orm_events.queue,
                es_client=self.es_client
            )

            self.session_manager.on_insert.connect(
                self.es_orm_events.on_insert)
            self.session_manager.on_update.connect(
                self.es_orm_events.on_update)
            self.session_manager.on_delete.connect(
                self.es_orm_events.on_delete)
Beispiel #7
0
class ElasticsearchApp(morepath.App):
    """ Provides elasticsearch integration for
    :class:`onegov.core.framework.Framework` based applications.

    The application must be connected to a database.

    Usage::

        from onegov.core import Framework

        class MyApp(Framework, ESIntegration):
            pass

    """

    def configure_search(self, **cfg):
        """ Configures the elasticsearch client, leaving it as a property
        on the class::

            app.es_client

        The following configuration options are accepted:

        :enable_elasticsearch:
            If True, elasticsearch is enabled (defaults to True).

        :elasticsearch_hosts:
            A list of elasticsearch clusters, including username, password,
            protocol and port.

            For example: ``https://user:secret@localhost:443``

            By default the client connects to the localhost on port 9200
            (the default), and on port 19200 (the default of boxen).

            At least one host in the list of servers must be up at startup.

        :elasticsearch_may_queue_size:
            The maximum queue size reserved for documents to be indexed. This
            queue is filling up if the elasticsearch cluster cannot be reached.

            Once the queue is full, warnings are emitted.

            Defaults to 10'000

        :elasticsearch_verify_certs:
            If true, the elasticsearch client verifies the certificates of
            the ssl connection. Defaults to true. Do not disable, unless you
            are in testing!

        """

        if not cfg.get('enable_elasticsearch', True):
            self.es_client = None
            return

        hosts = cfg.get('elasticsearch_hosts', (
            'http://localhost:9200',
            'http://localhost:19200'
        ))

        max_queue_size = int(cfg.get('elasticsarch_max_queue_size', '10000'))
        verify_certs = cfg.get('elasticsearch_verify_certs', True)

        if verify_certs:
            self.es_client = Elasticsearch(
                hosts, verify_certs=True, ca_certs=certifi.where(),
                sniff_on_connection_fail=True, timeout=5
            )
        else:
            self.es_client = Elasticsearch(
                hosts, sniff_on_connection_fail=True, timeout=5
            )

        if self.has_database_connection:
            self.es_mappings = TypeMappingRegistry()

            for base in self.session_manager.bases:
                self.es_mappings.register_orm_base(base)

            self.es_orm_events = ORMEventTranslator(
                self.es_mappings,
                max_queue_size=max_queue_size
            )

            self.es_indexer = Indexer(
                self.es_mappings,
                self.es_orm_events.queue,
                es_client=self.es_client
            )

            self.session_manager.on_insert.connect(
                self.es_orm_events.on_insert)
            self.session_manager.on_update.connect(
                self.es_orm_events.on_update)
            self.session_manager.on_delete.connect(
                self.es_orm_events.on_delete)

    def es_search(self, languages='*', types='*', include_private=False,
                  explain=False):
        """ Returns a search scoped to the current application, with the
        given languages, types and private documents excluded by default.

        """

        search = Search(
            session=self.session(),
            mappings=self.es_mappings,
            using=self.es_client,
            index=self.es_indices(languages, types),
            extra=dict(explain=explain)
        )

        if not include_private:
            search = search.filter("term", es_public=True)

        # by default, do not include any fields (this will still include
        # the id and the type, which is enough for the orm querying)
        search = search.fields([])

        return search

    def es_indices(self, languages='*', types='*'):
        return self.es_indexer.ixmgr.get_external_index_names(
            schema=self.schema,
            languages=languages,
            types=types
        )

    def es_search_by_request(self, request, types='*', explain=False):
        """ Takes the current :class:`~onegov.core.request.CoreRequest` and
        returns an elastic search scoped to the current application, the
        requests language and it's access rights.

        """

        return self.es_search(
            languages=[request.locale.split('_')[0]],
            types=types,
            include_private=self.es_may_use_private_search(request),
            explain=explain
        )

    def es_suggestions(self, query, languages='*', types='*',
                       include_private=False):
        """ Returns suggestions for the given query. """

        if not query:
            return []

        if include_private:
            context = ['public', 'private']
        else:
            context = ['public']

        result = self.es_client.suggest(
            index=self.es_indices(languages=languages, types=types),
            body={
                'suggestions': {
                    'text': query,
                    'completion': {
                        'field': 'es_suggestion',
                        'context': {
                            'es_public_categories': context
                        }
                    },
                }
            }
        )

        suggestions = []

        for suggestion in result.get('suggestions', []):
            for item in suggestion['options']:
                suggestions.append(item['text'])

        return suggestions

    def es_suggestions_by_request(self, request, query, types='*'):
        """ Returns suggestions for the given query, scoped to the language
        and the login status of the given requst.

        """
        return self.es_suggestions(
            query,
            languages=[request.locale.split('_')[0]],
            types=types,
            include_private=self.es_may_use_private_search(request)
        )

    def es_may_use_private_search(self, request):
        """ Returns True if the given request is allowed to access private
        search results. By default every logged in user has access to those.

        This method may be overwritten if this is not desired.

        """
        return request.is_logged_in
Beispiel #8
0
def test_orm_event_translator_properties():

    class Page(Searchable):

        es_id = 'id'
        es_type_name = 'page'
        es_properties = {
            'title': {'type': 'localized'},
            'body': {'type': 'localized'},
            'tags': {'type': 'string'},
            'date': {'type': 'date'},
            'published': {'type': 'boolean'},
            'likes': {'type': 'long'}
        }

        def __init__(self, id, **kwargs):
            self.id = id
            self.language = kwargs.pop('language', 'en')
            self.public = kwargs.pop('public', True)

            for k, v in kwargs.items():
                setattr(self, k, v)

        @property
        def es_language(self):
            return self.language

        @property
        def es_public(self):
            return self.public

        @property
        def es_suggest(self):
            return self.title

    mappings = TypeMappingRegistry()
    mappings.register_type('page', Page.es_properties)

    translator = ORMEventTranslator(mappings)

    for on_event in (translator.on_insert, translator.on_update):
        on_event('my-schema', Page(
            id=1,
            title='About',
            body='We are Pied Piper',
            tags=['aboutus', 'company'],
            date=datetime(2015, 9, 11),
            published=True,
            likes=1000
        ))

        assert translator.queue.get() == {
            'action': 'index',
            'schema': 'my-schema',
            'type_name': 'page',
            'id': 1,
            'language': 'en',
            'properties': {
                'title': 'About',
                'body': 'We are Pied Piper',
                'tags': ['aboutus', 'company'],
                'date': '2015-09-11T00:00:00',
                'likes': 1000,
                'published': True,
                'es_public': True,
                'es_public_categories': ['public'],
                'es_suggestion': {
                    'input': ['About'],
                    'output': 'About'
                }
            }
        }
        assert translator.queue.empty()