예제 #1
0
 def setUp(self):
     """Custom shared utility setup for following tests."""
     self.catalog = self.layer['portal']['portal_catalog']
     # install index
     from collective.es.index.esproxyindex import ElasticSearchProxyIndex
     from plone.app.textfield.value import RichTextValue
     from collective.es.index.utils import get_query_client
     client = get_query_client()
     client.indices.create('testing_plone')
     espi = ElasticSearchProxyIndex(
         'espi',
         caller=self.catalog,
     )
     self.catalog.addIndex('espi', espi)
     portal = self.layer['portal']
     setRoles(portal, TEST_USER_ID, ['Manager'])
     login(portal, TEST_USER_NAME)
     portal.invokeFactory('Document', 'd1', title='Test one')
     portal.d1.text = RichTextValue('Blah Blah Blah', 'text/plain',
                                    'text/html')
     portal.invokeFactory('Document', 'd2', title='Test two')
     portal.d2.text = RichTextValue('Yada Yada Yada', 'text/plain',
                                    'text/html')
     portal.invokeFactory('Document', 'd3', title='Test three')
     portal.d3.text = RichTextValue('Something completely different',
                                    'text/plain', 'text/html')
     # give es time to index documents
     time.sleep(2)
예제 #2
0
 def numObjects(self):
     """Return the number of indexed objects."""
     es = get_query_client()
     search = Search(using=es, index=index_name())
     try:
         return len(list(search.scan()))
     except Exception:
         logger.exception('ElasticSearch "count" query failed')
         return 'Problem getting all documents count from ElasticSearch!'
    def _apply_index(self, request):
        """Apply the index to query parameters given in 'request'.

        The argument should be a mapping object.

        If the request does not contain the needed parameters, then
        None is returned.

        If the request contains a parameter with the name of the
        column and this parameter is either a Record or a class
        instance then it is assumed that the parameters of this index
        are passed as attribute (Note: this is the recommended way to
        pass parameters since Zope 2.4)

        Otherwise two objects are returned.  The first object is a
        ResultSet containing the record numbers of the matching
        records.  The second object is a tuple containing the names of
        all data fields used.
        """
        if query_blocker.blocked:
            return
        record = parseIndexRequest(request, self.id)
        if record.keys is None:
            return None
        template_params = {
            'keys': record.keys,
        }
        query_body = self._apply_template(template_params)
        logger.info(query_body)
        es_kwargs = dict(
            index=index_name(),
            body=query_body,
            size=BATCH_SIZE,
            scroll='1m',
            _source_include=['rid'],
        )
        es = get_query_client()
        result = es.search(**es_kwargs)
        # initial return value, other batches to be applied

        def score(record):
            return int(10000 * float(record['_score']))

        retval = IIBTree()
        for r in result['hits']['hits']:
            retval[r['_source']['rid']] = score(r)

        total = result['hits']['total']
        if total > BATCH_SIZE:
            sid = result['_scroll_id']
            counter = BATCH_SIZE
            while counter < total:
                result = es.scroll(scroll_id=sid, scroll='1m')
                for record in result['hits']['hits']:
                    retval[record['_source']['rid']] = score(record)
                counter += BATCH_SIZE
        return retval, (self.id,)
 def numObjects(self):
     """Return the number of indexed objects."""
     es_kwargs = dict(
         index=index_name(),
         body={'query': {'match_all': {}}},
     )
     es = get_query_client()
     try:
         return es.count(**es_kwargs)['count']
     except Exception:
         logger.exception('ElasticSearch "count" query failed')
         return 'Problem getting all documents count from ElasticSearch!'
예제 #5
0
 def setUp(self):
     """Custom shared utility setup for following tests."""
     self.catalog = self.layer['portal']['portal_catalog']
     from collective.es.index.esproxyindex import ElasticSearchProxyIndex
     from collective.es.index.utils import get_query_client
     client = get_query_client()
     client.indices.create('testing_plone')
     espi = ElasticSearchProxyIndex(
         'espi',
         caller=self.catalog,
     )
     self.catalog.addIndex('espi', espi)
     portal = self.layer['portal']
     setRoles(portal, TEST_USER_ID, ['Manager'])
     login(portal, TEST_USER_NAME)
     portal.invokeFactory('Document', 'd1', title='Test one')
     portal.invokeFactory('Document', 'd2', title='Test two')
     portal.invokeFactory('Document', 'd3', title='Test three')
     # give es time to index documents
     time.sleep(2)
예제 #6
0
    def _apply_index(self, request):
        """Apply the index to query parameters given in 'request'.

        The argument should be a mapping object.

        If the request does not contain the needed parameters, then
        None is returned.

        If the request contains a parameter with the name of the
        column and this parameter is either a Record or a class
        instance then it is assumed that the parameters of this index
        are passed as attribute (Note: this is the recommended way to
        pass parameters since Zope 2.4)

        Otherwise two objects are returned.  The first object is a
        ResultSet containing the record numbers of the matching
        records.  The second object is a tuple containing the names of
        all data fields used.
        """
        config = get_configuration()
        timeout = getattr(config, 'request_timeout', 20)
        search_fields = getattr(config, 'search_fields', None)
        if not search_fields:
            search_fields = SEARCH_FIELDS
        search_fields = search_fields.split()
        logger.info(search_fields)
        if query_blocker.blocked:
            return
        record = parseIndexRequest(request, self.id)
        if record.keys is None:
            return None
        es = get_query_client()
        search = Search(using=es, index=index_name())
        search = search.params(request_timeout=timeout)
        search = search.sort('rid', '_id')
        search = search.source(include='rid')
        query_string = record.keys[0].decode('utf8')
        logger.info(query_string)
        if '*' in query_string:
            query_string = query_string.replace('*', ' ')
        query_string = query_string.strip()
        search = search.query('simple_query_string',
                              query=query_string,
                              fields=search_fields)
        results_count = search.count()
        search = search.params(request_timeout=timeout,
                               size=BATCH_SIZE,
                               track_scores=True)
        # setup highlighting
        for field in search_fields:
            name = field.split('^')[0]
            if name == 'title':
                # title shows up in results anyway
                continue
            search = search.highlight(name, fragment_size=FRAGMENT_SIZE)

        # initial return value, other batches to be applied
        retval = IIBTree()
        highlights = OOBTree()
        last_seen = None
        count = 0
        batch_count = results_count / BATCH_SIZE
        if results_count % BATCH_SIZE != 0:
            batch_count = batch_count + 1
        for i in xrange(batch_count):
            if last_seen is not None:
                search = search.update_from_dict({'search_after': last_seen})
            try:
                results = search.execute(ignore_cache=True)
            except TransportError:
                # No es client, return empty results
                logger.exception('ElasticSearch client not available.')
                return IIBTree(), (self.id, )

            for r in results:
                rid = getattr(r, 'rid', None)
                if rid is not None:
                    retval[rid] = int(10000 * float(r.meta.score))
                    # Index query returns only rids, so we need
                    # to save highlights for later use
                    highlight_list = []
                    if getattr(r.meta, 'highlight', None) is not None:
                        for key in dir(r.meta.highlight):
                            highlight_list.extend(r.meta.highlight[key])
                    highlights[r.meta.id] = highlight_list
                last_seen = [rid, r.meta.id]
                count = count + 1

        # store highlights
        try:
            annotations = IAnnotations(self.REQUEST)
            annotations[HIGHLIGHT_KEY] = highlights
        except TypeError:
            # maybe we are in a test
            pass

        return retval, (self.id, )
예제 #7
0
    def _apply_index(self, request):
        """Apply the index to query parameters given in 'request'.

        The argument should be a mapping object.

        If the request does not contain the needed parameters, then
        None is returned.

        If the request contains a parameter with the name of the
        column and this parameter is either a Record or a class
        instance then it is assumed that the parameters of this index
        are passed as attribute (Note: this is the recommended way to
        pass parameters since Zope 2.4)

        Otherwise two objects are returned.  The first object is a
        ResultSet containing the record numbers of the matching
        records.  The second object is a tuple containing the names of
        all data fields used.
        """
        config = get_configuration()
        timeout = getattr(config, 'request_timeout', 20)
        search_fields = getattr(config, 'search_fields', None)
        if not search_fields:
            search_fields = SEARCH_FIELDS
        search_fields = search_fields.split()
        if query_blocker.blocked:
            return
        record = parseIndexRequest(request, self.id)
        if record.keys is None:
            return None
        es = get_query_client()
        search = Search(using=es, index=index_name())
        search = search.params(
            request_timeout=timeout,
            size=BATCH_SIZE,
            preserve_order=True,
        )
        search = search.source(include='rid')
        query_string = record.keys[0]
        if query_string and query_string.startswith('*'):
            # plone.app.querystring contains op sends a leading *, remove it
            query_string = query_string[1:]
        search = search.query('simple_query_string',
                              query=query_string,
                              fields=search_fields)
        # setup highlighting
        for field in search_fields:
            name = field.split('^')[0]
            if name == 'title':
                # title shows up in results anyway
                continue
            search = search.highlight(name, fragment_size=FRAGMENT_SIZE)

        try:
            result = search.scan()
        except TransportError:
            # No es client, return empty results
            logger.exception('ElasticSearch client not available.')
            return IIBTree(), (self.id, )
        # initial return value, other batches to be applied

        retval = IIBTree()
        highlights = OOBTree()
        for r in result:
            if getattr(r, 'rid', None) is None:
                # something was indexed with no rid. Ignore for now.
                # this is only for highlights, so no big deal if we
                # skip one
                continue
            retval[r.rid] = int(10000 * float(r.meta.score))
            # Index query returns only rids, so we need
            # to save highlights for later use
            highlight_list = []
            if getattr(r.meta, 'highlight', None) is not None:
                for key in dir(r.meta.highlight):
                    highlight_list.extend(r.meta.highlight[key])
            highlights[r.meta.id] = highlight_list

        # store highlights
        try:
            annotations = IAnnotations(self.REQUEST)
            annotations[HIGHLIGHT_KEY] = highlights
        except TypeError:
            # maybe we are in a test
            pass

        return retval, (self.id, )
예제 #8
0
 def test_get_query_client(self):
     """Test if client is found vi utility."""
     from collective.es.index.interfaces import IElasticSearchClient
     from collective.es.index.utils import get_query_client
     client = get_query_client()
     assert (IElasticSearchClient.providedBy(client))
예제 #9
0
 def search(self):
     es = get_query_client()
     s = Search(doc_type=self.doc_types, index=index_name(), using=es)
     s = s.params(size=BATCH_SIZE)
     return s.response_class(FacetedResponse)