def setUp(self): """Custom shared utility setup for following tests.""" self.catalog = self.layer['portal']['portal_catalog'] # install index from collective.es.index.esproxyindex import ElasticSearchProxyIndex from plone.app.textfield.value import RichTextValue from collective.es.index.utils import get_query_client client = get_query_client() client.indices.create('testing_plone') espi = ElasticSearchProxyIndex( 'espi', caller=self.catalog, ) self.catalog.addIndex('espi', espi) portal = self.layer['portal'] setRoles(portal, TEST_USER_ID, ['Manager']) login(portal, TEST_USER_NAME) portal.invokeFactory('Document', 'd1', title='Test one') portal.d1.text = RichTextValue('Blah Blah Blah', 'text/plain', 'text/html') portal.invokeFactory('Document', 'd2', title='Test two') portal.d2.text = RichTextValue('Yada Yada Yada', 'text/plain', 'text/html') portal.invokeFactory('Document', 'd3', title='Test three') portal.d3.text = RichTextValue('Something completely different', 'text/plain', 'text/html') # give es time to index documents time.sleep(2)
def numObjects(self): """Return the number of indexed objects.""" es = get_query_client() search = Search(using=es, index=index_name()) try: return len(list(search.scan())) except Exception: logger.exception('ElasticSearch "count" query failed') return 'Problem getting all documents count from ElasticSearch!'
def _apply_index(self, request): """Apply the index to query parameters given in 'request'. The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. If the request contains a parameter with the name of the column and this parameter is either a Record or a class instance then it is assumed that the parameters of this index are passed as attribute (Note: this is the recommended way to pass parameters since Zope 2.4) Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ if query_blocker.blocked: return record = parseIndexRequest(request, self.id) if record.keys is None: return None template_params = { 'keys': record.keys, } query_body = self._apply_template(template_params) logger.info(query_body) es_kwargs = dict( index=index_name(), body=query_body, size=BATCH_SIZE, scroll='1m', _source_include=['rid'], ) es = get_query_client() result = es.search(**es_kwargs) # initial return value, other batches to be applied def score(record): return int(10000 * float(record['_score'])) retval = IIBTree() for r in result['hits']['hits']: retval[r['_source']['rid']] = score(r) total = result['hits']['total'] if total > BATCH_SIZE: sid = result['_scroll_id'] counter = BATCH_SIZE while counter < total: result = es.scroll(scroll_id=sid, scroll='1m') for record in result['hits']['hits']: retval[record['_source']['rid']] = score(record) counter += BATCH_SIZE return retval, (self.id,)
def numObjects(self): """Return the number of indexed objects.""" es_kwargs = dict( index=index_name(), body={'query': {'match_all': {}}}, ) es = get_query_client() try: return es.count(**es_kwargs)['count'] except Exception: logger.exception('ElasticSearch "count" query failed') return 'Problem getting all documents count from ElasticSearch!'
def setUp(self): """Custom shared utility setup for following tests.""" self.catalog = self.layer['portal']['portal_catalog'] from collective.es.index.esproxyindex import ElasticSearchProxyIndex from collective.es.index.utils import get_query_client client = get_query_client() client.indices.create('testing_plone') espi = ElasticSearchProxyIndex( 'espi', caller=self.catalog, ) self.catalog.addIndex('espi', espi) portal = self.layer['portal'] setRoles(portal, TEST_USER_ID, ['Manager']) login(portal, TEST_USER_NAME) portal.invokeFactory('Document', 'd1', title='Test one') portal.invokeFactory('Document', 'd2', title='Test two') portal.invokeFactory('Document', 'd3', title='Test three') # give es time to index documents time.sleep(2)
def _apply_index(self, request): """Apply the index to query parameters given in 'request'. The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. If the request contains a parameter with the name of the column and this parameter is either a Record or a class instance then it is assumed that the parameters of this index are passed as attribute (Note: this is the recommended way to pass parameters since Zope 2.4) Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ config = get_configuration() timeout = getattr(config, 'request_timeout', 20) search_fields = getattr(config, 'search_fields', None) if not search_fields: search_fields = SEARCH_FIELDS search_fields = search_fields.split() logger.info(search_fields) if query_blocker.blocked: return record = parseIndexRequest(request, self.id) if record.keys is None: return None es = get_query_client() search = Search(using=es, index=index_name()) search = search.params(request_timeout=timeout) search = search.sort('rid', '_id') search = search.source(include='rid') query_string = record.keys[0].decode('utf8') logger.info(query_string) if '*' in query_string: query_string = query_string.replace('*', ' ') query_string = query_string.strip() search = search.query('simple_query_string', query=query_string, fields=search_fields) results_count = search.count() search = search.params(request_timeout=timeout, size=BATCH_SIZE, track_scores=True) # setup highlighting for field in search_fields: name = field.split('^')[0] if name == 'title': # title shows up in results anyway continue search = search.highlight(name, fragment_size=FRAGMENT_SIZE) # initial return value, other batches to be applied retval = IIBTree() highlights = OOBTree() last_seen = None count = 0 batch_count = results_count / BATCH_SIZE if results_count % BATCH_SIZE != 0: batch_count = batch_count + 1 for i in xrange(batch_count): if last_seen is not None: search = search.update_from_dict({'search_after': last_seen}) try: results = search.execute(ignore_cache=True) except TransportError: # No es client, return empty results logger.exception('ElasticSearch client not available.') return IIBTree(), (self.id, ) for r in results: rid = getattr(r, 'rid', None) if rid is not None: retval[rid] = int(10000 * float(r.meta.score)) # Index query returns only rids, so we need # to save highlights for later use highlight_list = [] if getattr(r.meta, 'highlight', None) is not None: for key in dir(r.meta.highlight): highlight_list.extend(r.meta.highlight[key]) highlights[r.meta.id] = highlight_list last_seen = [rid, r.meta.id] count = count + 1 # store highlights try: annotations = IAnnotations(self.REQUEST) annotations[HIGHLIGHT_KEY] = highlights except TypeError: # maybe we are in a test pass return retval, (self.id, )
def _apply_index(self, request): """Apply the index to query parameters given in 'request'. The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. If the request contains a parameter with the name of the column and this parameter is either a Record or a class instance then it is assumed that the parameters of this index are passed as attribute (Note: this is the recommended way to pass parameters since Zope 2.4) Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ config = get_configuration() timeout = getattr(config, 'request_timeout', 20) search_fields = getattr(config, 'search_fields', None) if not search_fields: search_fields = SEARCH_FIELDS search_fields = search_fields.split() if query_blocker.blocked: return record = parseIndexRequest(request, self.id) if record.keys is None: return None es = get_query_client() search = Search(using=es, index=index_name()) search = search.params( request_timeout=timeout, size=BATCH_SIZE, preserve_order=True, ) search = search.source(include='rid') query_string = record.keys[0] if query_string and query_string.startswith('*'): # plone.app.querystring contains op sends a leading *, remove it query_string = query_string[1:] search = search.query('simple_query_string', query=query_string, fields=search_fields) # setup highlighting for field in search_fields: name = field.split('^')[0] if name == 'title': # title shows up in results anyway continue search = search.highlight(name, fragment_size=FRAGMENT_SIZE) try: result = search.scan() except TransportError: # No es client, return empty results logger.exception('ElasticSearch client not available.') return IIBTree(), (self.id, ) # initial return value, other batches to be applied retval = IIBTree() highlights = OOBTree() for r in result: if getattr(r, 'rid', None) is None: # something was indexed with no rid. Ignore for now. # this is only for highlights, so no big deal if we # skip one continue retval[r.rid] = int(10000 * float(r.meta.score)) # Index query returns only rids, so we need # to save highlights for later use highlight_list = [] if getattr(r.meta, 'highlight', None) is not None: for key in dir(r.meta.highlight): highlight_list.extend(r.meta.highlight[key]) highlights[r.meta.id] = highlight_list # store highlights try: annotations = IAnnotations(self.REQUEST) annotations[HIGHLIGHT_KEY] = highlights except TypeError: # maybe we are in a test pass return retval, (self.id, )
def test_get_query_client(self): """Test if client is found vi utility.""" from collective.es.index.interfaces import IElasticSearchClient from collective.es.index.utils import get_query_client client = get_query_client() assert (IElasticSearchClient.providedBy(client))
def search(self): es = get_query_client() s = Search(doc_type=self.doc_types, index=index_name(), using=es) s = s.params(size=BATCH_SIZE) return s.response_class(FacetedResponse)