def test_fallback_search_solr(self): """Should work as test_fallback_search, but based on the native solr search utility """ pc = api.portal.get_tool("portal_catalog") mock_results = SolrResponse() mock_results.response = pc({"path": {"query": "/plone/en-de"}}) mock_search = MagicMock(return_value=mock_results) mock_search.getManager = lambda: SolrConnectionManager(active=True) from zope.interface import alsoProvides from plone.indexer.interfaces import IIndexableObject alsoProvides(mock_search, IIndexableObject) sm = self.portal.getSiteManager() sm.unregisterUtility(provided=ISearch) sm.unregisterUtility(provided=ISolrConnectionConfig) sm.registerUtility(component=SolrConnectionConfig(), provided=ISolrConnectionConfig) sm.registerUtility(component=mock_search, provided=ISearch) lf_search_view = self.portal.restrictedTraverse( "@@language-fallback-search") results = lf_search_view.search_solr("path_parents:/plone/events") self.assertEqual(set([x.getPath() for x in results]), set(['/plone/en-de', '/plone/en-de/en-event', '/plone/en/notrans-event'])) mock_search.search.assert_called_with( "path_parents:/plone/events +Language:en OR all OR de")
def test_fallback_search_solr(self): """Should work as test_fallback_search, but based on the native solr search utility """ pc = api.portal.get_tool("portal_catalog") mock_results = SolrResponse() mock_results.response = pc({"path": {"query": "/plone/en-de"}}) mock_search = MagicMock(return_value=mock_results) mock_search.getManager = lambda: SolrConnectionManager(active=True) from zope.interface import alsoProvides from plone.indexer.interfaces import IIndexableObject alsoProvides(mock_search, IIndexableObject) sm = self.portal.getSiteManager() sm.unregisterUtility(provided=ISearch) sm.unregisterUtility(provided=ISolrConnectionConfig) sm.registerUtility(component=SolrConnectionConfig(), provided=ISolrConnectionConfig) sm.registerUtility(component=mock_search, provided=ISearch) lf_search_view = self.portal.restrictedTraverse( "@@language-fallback-search") results = lf_search_view.search_solr("path_parents:/plone/events") self.assertEqual( set([x.getPath() for x in results]), set([ '/plone/en-de', '/plone/en-de/en-event', '/plone/en/notrans-event' ])) mock_search.search.assert_called_with( "path_parents:/plone/events +Language:en OR all OR de")
def testParseResultsActualResultCount(self): complex_xml_response = getData("complex_xml_response.txt") response = SolrResponse(complex_xml_response) self.assertEqual(response.actual_result_count, 2) response.actual_result_count = 1 results = response.response # the result set is named 'response' self.assertEqual(response.actual_result_count, 1) self.assertEqual(len(results), 2)
def cleanup(self, batch=1000): """ remove entries from solr that don't have a corresponding Zope object or have a different UID than the real object""" manager = queryUtility(ISolrConnectionManager) proc = SolrIndexProcessor(manager) conn = manager.getConnection() log = self.mklog(use_std_log=True) log('cleaning up solr index...\n') key = manager.getSchema().uniqueKey start = 0 resp = SolrResponse(conn.search(q='*:*', rows=batch, start=start)) res = resp.results() log('%s items in solr catalog\n' % resp.response.numFound) deleted = 0 reindexed = 0 while len(res) > 0: for flare in res: try: ob = PloneFlare(flare).getObject() except Exception as err: log('Error getting object, removing: %s (%s)\n' % ( flare['path_string'], err)) conn.delete(flare[key]) deleted += 1 continue if not IUUIDAware.providedBy(ob): log('Object %s of type %s does not support uuids, skipping.\n' % ('/'.join(ob.getPhysicalPath()), ob.meta_type)) continue uuid = IUUID(ob) if uuid != flare[key]: log('indexed under wrong UID, removing: %s\n' % flare['path_string']) conn.delete(flare[key]) deleted += 1 realob_res = SolrResponse(conn.search(q='%s:%s' % (key, uuid))).results() if len(realob_res) == 0: log('no sane entry for last object, reindexing\n') data, missing = proc.getData(ob) prepareData(data) if not missing: boost = boost_values(ob, data) conn.add(boost_values=boost, **data) reindexed += 1 else: log(' missing data, cannot index.\n') log('handled batch of %d items, commiting\n' % len(res)) conn.commit() start += batch resp = SolrResponse(conn.search(q='*:*', rows=batch, start=start)) res = resp.results() msg = 'solr cleanup finished, %s item(s) removed, %s item(s) reindexed\n' % (deleted, reindexed) log(msg) logger.info(msg)
def testParseComplexSearchResults(self): complex_xml_response = getData('complex_xml_response.txt') response = SolrResponse(complex_xml_response) results = response.response # the result set is named 'response' self.assertEqual(results.numFound, '2') self.assertEqual(results.start, '0') self.assertEqual(len(results), 2) first = results[0] self.assertEqual(first.cat, ['software', 'search']) self.assertEqual(len(first.features), 7) self.assertEqual([type(x).__name__ for x in first.features], ['str'] * 6 + ['unicode']) self.assertEqual(first.id, 'SOLR1000') self.assertEqual(first.inStock, True) self.assertEqual(first.incubationdate_dt, DateTime('2006/01/17 GMT')) self.assertEqual(first.manu, 'Apache Software Foundation') self.assertEqual(first.popularity, 10) self.assertEqual(first.price, 0.0) headers = response.responseHeader self.assertEqual(headers['status'], 0) self.assertEqual(headers['QTime'], 0) self.assertEqual(headers['params']['indent'], 'on') self.assertEqual(headers['params']['rows'], '10') self.assertEqual(headers['params']['start'], '0') self.assertEqual(headers['params']['q'], 'id:[* TO *]') self.assertEqual(headers['params']['version'], '2.2')
def testConvertFacetResponse(self): response = SolrResponse(getData("facet_xml_response.txt")) fields = response.facet_counts["facet_fields"] view = DummyView(request=TestRequest()) info = convertFacets(fields, view=view) # the info should consist of 2 dicts with # `counts`, `name` and `title` keys self.assertEqual([sorted(i) for i in info], [["counts", "name", "title"]] * 2) # next let's check the field names self.assertEqual([i["title"] for i in info], ["cat", "inStock"]) # and the fields contents cat, inStock = info self.assertEqual(cat["title"], "cat") self.assertEqual( [(c["name"], c["title"], c["count"]) for c in cat["counts"]], [ ("search", "Title of Search", 1), ("software", "Title of Software", 1), ("electronics", "Title of Electronics", 0), ("monitor", "Title of Monitor", 0), ], ) self.assertEqual(inStock["title"], "inStock") self.assertEqual([(c["name"], c["count"]) for c in inStock["counts"]], [("true", 1)])
def testParseDateFacetSearchResults(self): facet_xml_response = getData("date_facet_xml_response.txt").decode("utf-8") response = SolrResponse(facet_xml_response) results = response.response # the result set is named 'response' self.assertEqual(results.numFound, "42") self.assertEqual(results.start, "0") self.assertEqual(len(results), 0) headers = response.responseHeader self.assertEqual(type(headers), type({})) self.assertEqual(headers["status"], 0) self.assertEqual(headers["QTime"], 5) self.assertEqual(headers["params"]["facet.date"], "timestamp") self.assertEqual(headers["params"]["facet.date.start"], "NOW/DAY-5DAYS") self.assertEqual(headers["params"]["facet.date.end"], "NOW/DAY+1DAY") self.assertEqual(headers["params"]["facet.date.gap"], "+1DAY") self.assertEqual(headers["params"]["rows"], "0") self.assertEqual(headers["params"]["facet"], "true") self.assertEqual(headers["params"]["indent"], "true") self.assertEqual(headers["params"]["q"], "*:*") counts = response.facet_counts self.assertEqual(type(counts), type({})) self.assertEqual(counts["facet_queries"], {}) self.assertEqual(counts["facet_fields"], {}) timestamps = counts["facet_dates"]["timestamp"] self.assertEqual(timestamps["2007-08-11T00:00:00.000Z"], 1) self.assertEqual(timestamps["2007-08-12T00:00:00.000Z"], 5) self.assertEqual(timestamps["2007-08-13T00:00:00.000Z"], 3) self.assertEqual(timestamps["2007-08-14T00:00:00.000Z"], 7) self.assertEqual(timestamps["2007-08-15T00:00:00.000Z"], 2) self.assertEqual(timestamps["2007-08-16T00:00:00.000Z"], 16) self.assertEqual(timestamps["gap"], "+1DAY") self.assertEqual( timestamps["end"].ISO8601(), DateTime("2007-08-17 GMT").ISO8601() )
def testParseFacetSearchResults(self): facet_xml_response = getData("facet_xml_response.txt").decode("utf-8") response = SolrResponse(facet_xml_response) results = response.response # the result set is named 'response' self.assertEqual(results.numFound, "1") self.assertEqual(results.start, "0") self.assertEqual(len(results), 0) headers = response.responseHeader self.assertEqual(type(headers), type({})) self.assertEqual(headers["status"], 0) self.assertEqual(headers["QTime"], 1) self.assertEqual(headers["params"]["facet.limit"], "-1") self.assertEqual(headers["params"]["rows"], "0") self.assertEqual(headers["params"]["facet"], "true") self.assertEqual(headers["params"]["facet.field"], ["cat", "inStock"]) self.assertEqual(headers["params"]["indent"], "10") self.assertEqual(headers["params"]["q"], "solr") counts = response.facet_counts self.assertEqual(type(counts), type({})) self.assertEqual(counts["facet_queries"], {}) self.assertEqual(counts["facet_fields"]["cat"]["electronics"], 0) self.assertEqual(counts["facet_fields"]["cat"]["monitor"], 0) self.assertEqual(counts["facet_fields"]["cat"]["search"], 1) self.assertEqual(counts["facet_fields"]["cat"]["software"], 1) self.assertEqual(counts["facet_fields"]["inStock"]["true"], 1)
def testParseComplexSearchResults(self): complex_xml_response = getData("complex_xml_response.txt") response = SolrResponse(complex_xml_response) results = response.response # the result set is named 'response' self.assertEqual(results.numFound, "2") self.assertEqual(response.actual_result_count, 2) self.assertEqual(results.start, "0") self.assertEqual(len(results), 2) first = results[0] self.assertEqual(first.cat, ["software", "search"]) self.assertEqual(len(first.features), 7) self.assertEqual( [type(x).__name__ for x in first.features], ["str"] * 6 + [six.text_type.__name__], ) self.assertEqual(first.id, "SOLR1000") self.assertEqual(first.inStock, True) self.assertEqual( first.incubationdate_dt.ISO8601(), DateTime("2006/01/17 GMT").ISO8601() ) self.assertEqual(first.manu, "Apache Software Foundation") self.assertEqual(first.popularity, 10) self.assertEqual(first.price, 0.0) headers = response.responseHeader self.assertEqual(headers["status"], 0) self.assertEqual(headers["QTime"], 0) self.assertEqual(headers["params"]["indent"], "on") self.assertEqual(headers["params"]["rows"], "10") self.assertEqual(headers["params"]["start"], "0") self.assertEqual(headers["params"]["q"], "id:[* TO *]") self.assertEqual(headers["params"]["version"], "2.2")
def testParseFacetSearchResults(self): facet_xml_response = getData('facet_xml_response.txt') response = SolrResponse(facet_xml_response) results = response.response # the result set is named 'response' self.assertEqual(results.numFound, '1') self.assertEqual(results.start, '0') self.assertEqual(len(results), 0) headers = response.responseHeader self.assertEqual(type(headers), type({})) self.assertEqual(headers['status'], 0) self.assertEqual(headers['QTime'], 1) self.assertEqual(headers['params']['facet.limit'], '-1') self.assertEqual(headers['params']['rows'], '0') self.assertEqual(headers['params']['facet'], 'true') self.assertEqual(headers['params']['facet.field'], ['cat', 'inStock']) self.assertEqual(headers['params']['indent'], '10') self.assertEqual(headers['params']['q'], 'solr') counts = response.facet_counts self.assertEqual(type(counts), type({})) self.assertEqual(counts['facet_queries'], {}) self.assertEqual(counts['facet_fields']['cat']['electronics'], 0) self.assertEqual(counts['facet_fields']['cat']['monitor'], 0) self.assertEqual(counts['facet_fields']['cat']['search'], 1) self.assertEqual(counts['facet_fields']['cat']['software'], 1) self.assertEqual(counts['facet_fields']['inStock']['true'], 1)
def testParseDateFacetSearchResults(self): facet_xml_response = getData('date_facet_xml_response.txt') response = SolrResponse(facet_xml_response) results = response.response # the result set is named 'response' self.assertEqual(results.numFound, '42') self.assertEqual(results.start, '0') self.assertEqual(len(results), 0) headers = response.responseHeader self.assertEqual(type(headers), type({})) self.assertEqual(headers['status'], 0) self.assertEqual(headers['QTime'], 5) self.assertEqual(headers['params']['facet.date'], 'timestamp') self.assertEqual(headers['params']['facet.date.start'], 'NOW/DAY-5DAYS') self.assertEqual(headers['params']['facet.date.end'], 'NOW/DAY+1DAY') self.assertEqual(headers['params']['facet.date.gap'], '+1DAY') self.assertEqual(headers['params']['rows'], '0') self.assertEqual(headers['params']['facet'], 'true') self.assertEqual(headers['params']['indent'], 'true') self.assertEqual(headers['params']['q'], '*:*') counts = response.facet_counts self.assertEqual(type(counts), type({})) self.assertEqual(counts['facet_queries'], {}) self.assertEqual(counts['facet_fields'], {}) timestamps = counts['facet_dates']['timestamp'] self.assertEqual(timestamps['2007-08-11T00:00:00.000Z'], 1) self.assertEqual(timestamps['2007-08-12T00:00:00.000Z'], 5) self.assertEqual(timestamps['2007-08-13T00:00:00.000Z'], 3) self.assertEqual(timestamps['2007-08-14T00:00:00.000Z'], 7) self.assertEqual(timestamps['2007-08-15T00:00:00.000Z'], 2) self.assertEqual(timestamps['2007-08-16T00:00:00.000Z'], 16) self.assertEqual(timestamps['gap'], '+1DAY') self.assertEqual(timestamps['end'], DateTime('2007-08-17 GMT'))
def test_suggestions_querystring_with_list_parameter(self): portal = self.layer['portal'] request = self.layer['request'] # Setup browser layers notify(BeforeTraverseEvent(portal, request)) request.form.update({'SearchableText': 'bidlung', 'facet.field': ['portal_type', 'review_state']}) view = getMultiAdapter((portal, request), name=u'search') view.solr_response = SolrResponse() view.solr_response.spellcheck = {} view.solr_response.spellcheck['suggestions'] = { 'bidlung': {'endOffset': 246, 'numFound': 5, 'origFreq': 1, 'startOffset': 239, 'suggestion': [{'freq': 2704, 'word': 'bildung'}, {'freq': 1, 'word': 'bidlungs'}, {'freq': 1, 'word': 'bidung'}, {'freq': 561, 'word': 'bildungs'}, {'freq': 233, 'word': 'bislang'}]}, 'correctlySpelled': False, } suggestions = view.suggestions() self.assertEquals('bildung', suggestions[0][0]) self.assertIn('&facet.field=portal_type&facet.field=review_state', suggestions[0][1]) self.assertIn('&SearchableText=bildung', suggestions[0][1])
def search(self, query, **parameters): """ perform a search with the given querystring and parameters """ start = time() config = queryUtility(ISolrConnectionConfig) manager = self.getManager() manager.setSearchTimeout() connection = manager.getConnection() if connection is None: raise SolrInactiveException if not 'rows' in parameters: parameters['rows'] = config.max_results or '' logger.info( 'falling back to "max_results" (%d) without a "rows" ' 'parameter: %r (%r)', config.max_results, query, parameters) if isinstance(query, dict): query = ' '.join(query.values()) logger.debug('searching for %r (%r)', query, parameters) if 'sort' in parameters: # issue warning for unknown sort indices index, order = parameters['sort'].split() schema = manager.getSchema() or {} field = schema.get(index, None) if field is None or not field.stored: logger.warning('sorting on non-stored attribute "%s"', index) response = connection.search(q=query, **parameters) results = SolrResponse(response) response.close() manager.setTimeout(None) elapsed = (time() - start) * 1000 slow = config.slow_query_threshold if slow and elapsed >= slow: logger.info('slow query: %d/%d ms for %r (%r)', results.responseHeader['QTime'], elapsed, query, parameters) return results
def search(self, query, **parameters): """ perform a search with the given querystring and parameters """ start = time() config = self.getConfig() manager = self.getManager() manager.setSearchTimeout() connection = manager.getConnection() if connection is None: raise SolrInactiveException if 'rows' not in parameters: parameters['rows'] = config.max_results or 10000000 # Check if rows param is 0 for backwards compatibility. Before # Solr 4 'rows = 0' meant that there is no limitation. Solr 4 # always expects a rows param > 0 though: # http://wiki.apache.org/solr/CommonQueryParameters#rows if parameters['rows'] == 0: parameters['rows'] = 10000000 logger.debug( 'falling back to "max_results" (%d) without a "rows" ' 'parameter: %r (%r)', config.max_results, query, parameters) if getattr(config, 'highlight_fields', None): if parameters.get('hl', 'false') == 'true'\ and 'hl.fl' not in parameters: parameters['hl'] = 'true' parameters['hl.fl'] = config.highlight_fields or [] parameters['hl.simple.pre'] =\ config.highlight_formatter_pre or ' ' parameters['hl.simple.post'] =\ config.highlight_formatter_post or ' ' parameters['hl.fragsize'] =\ getattr(config, 'highlight_fragsize', None) or 100 if 'fl' not in parameters: if config.field_list: parameters['fl'] = ' '.join(config.field_list) else: parameters['fl'] = '* score' if isinstance(query, dict): query = u' '.join([safe_unicode(val) for val in query.values()]).encode('utf-8') logger.debug('searching for %r (%r)', query, parameters) if 'sort' in parameters: # issue warning for unknown sort indices index, order = parameters['sort'].split() schema = manager.getSchema() or {} field = schema.get(index, None) if field is None or not field.stored: logger.warning('sorting on non-stored attribute "%s"', index) response = connection.search(q=query, **parameters) results = SolrResponse(response) response.close() manager.setTimeout(None) elapsed = (time() - start) * 1000 slow = config.slow_query_threshold if slow and elapsed >= slow: logger.info('slow query: %d/%d ms for %r (%r)', results.responseHeader['QTime'], elapsed, query, parameters) logger.debug('highlighting info: %s' % getattr(results, 'highlighting', {})) return results
def solrSearchResults(request=None, **keywords): """ perform a query using solr after translating the passed in parameters with portal catalog semantics """ site = getSite() search = queryUtility(ISearch, context=site) config = queryUtility(ISolrConnectionConfig, context=site) if request is None: # try to get a request instance, so that flares can be adapted to # ploneflares and urls can be converted into absolute ones etc; # however, in this case any arguments from the request are ignored args = deepcopy(keywords) request = getattr(site, 'REQUEST', None) elif IHTTPRequest.providedBy(request): args = deepcopy(request.form) args.update(keywords) # keywords take precedence else: assert isinstance(request, dict), request args = deepcopy(request) args.update(keywords) # keywords take precedence # if request is a dict, we need the real request in order to # be able to adapt to plone flares request = getattr(site, 'REQUEST', args) if 'path' in args and 'navtree' in args['path']: raise FallBackException # we can't handle navtree queries yet use_solr = args.get('use_solr', False) # A special key to force Solr if not use_solr and config.required: required = set(config.required).intersection(args) if required: for key in required: if not args[key]: raise FallBackException else: raise FallBackException query, params = search.buildQueryAndParameters(**args) if query != {}: __traceback_info__ = (query, params, args) response = search(query, **params) else: return SolrResponse() def wrap(flare): """ wrap a flare object with a helper class """ adapter = queryMultiAdapter((flare, request), IFlare) return adapter is not None and adapter or flare schema = search.getManager().getSchema() or {} results = response.results() for idx, flare in enumerate(results): flare = wrap(flare) for missing in set(schema.stored).difference(flare): flare[missing] = MV results[idx] = wrap(flare) padResults(results, **params) # pad the batch return response
def search(self, query, **parameters): """ perform a search with the given querystring and parameters """ start = time() config = queryUtility(ISolrConnectionConfig) manager = self.getManager() manager.setSearchTimeout() connection = manager.getConnection() if connection is None: raise SolrInactiveException if not 'rows' in parameters: parameters['rows'] = config.max_results or '' logger.info( 'falling back to "max_results" (%d) without a "rows" ' 'parameter: %r (%r)', config.max_results, query, parameters) if getattr(config, 'highlight_fields', None): if parameters.get('hl', 'false') == 'true' and not 'hl.fl' in parameters: parameters['hl'] = 'true' parameters['hl.fl'] = config.highlight_fields or [] parameters[ 'hl.simple.pre'] = config.highlight_formatter_pre or ' ' parameters[ 'hl.simple.post'] = config.highlight_formatter_post or ' ' parameters['hl.fragsize'] = getattr( config, 'highlight_fragsize', None) or 100 if not 'fl' in parameters: if config.field_list: parameters['fl'] = ' '.join(config.field_list) else: parameters['fl'] = '* score' if isinstance(query, dict): query = ' '.join(query.values()) logger.debug('searching for %r (%r)', query, parameters) if 'sort' in parameters: # issue warning for unknown sort indices index, order = parameters['sort'].split() schema = manager.getSchema() or {} field = schema.get(index, None) if field is None or not field.stored: logger.warning('sorting on non-stored attribute "%s"', index) response = connection.search(q=query, **parameters) results = SolrResponse(response) response.close() manager.setTimeout(None) elapsed = (time() - start) * 1000 slow = config.slow_query_threshold if slow and elapsed >= slow: logger.info('slow query: %d/%d ms for %r (%r)', results.responseHeader['QTime'], elapsed, query, parameters) logger.debug('highlighting info: %s' % getattr(results, 'highlighting', {})) return results
def testConvertFacetResponse(self): response = SolrResponse(getData('facet_xml_response.txt')) fields = response.facet_counts['facet_fields'] info = convertFacets(fields, request=TestRequest()) # the info should consist of 2 dicts with `field` and `counts` keys self.assertEqual([sorted(i) for i in info], [['counts', 'title']] * 2) # next let's check the field names self.assertEqual([i['title'] for i in info], ['cat', 'inStock']) # and the fields contents cat, inStock = info self.assertEqual(cat['title'], 'cat') self.assertEqual([(c['name'], c['title'], c['count']) for c in cat['counts']], [('search', 'Title of Search', 1), ('software', 'Title of Software', 1), ('electronics', 'Title of Electronics', 0), ('monitor', 'Title of Monitor', 0)]) self.assertEqual(inStock['title'], 'inStock') self.assertEqual([(c['name'], c['count']) for c in inStock['counts']], [('true', 1)])
def test_facets_order(self): portal = self.layer['portal'] request = self.layer['request'] request.form.update({'facet_field': ['type', 'section', 'topics']}) response = SolrResponse(getData('facets_response.xml')) view = SearchFacetsView(portal, request) view.kw = dict(results=response) config = queryUtility(ISolrConnectionConfig) config.facets = ['type', 'section', 'topics'] facets = view.facets() self.assertEquals(['type', 'section', 'topics'], [facets[0]['title'], facets[1]['title'], facets[2]['title']], msg='Wrong facet order.') config.facets = ['section', 'topics', 'type'] facets = view.facets() self.assertEquals(['section', 'topics', 'type'], [facets[0]['title'], facets[1]['title'], facets[2]['title']], msg='Wrong facet order.')
def testParseSimpleSearchResults(self): search_response = getData('search_response.txt') response = SolrResponse(search_response.split('\n\n', 1)[1]) results = response.response # the result set is named 'response' self.assertEqual(results.numFound, '1') self.assertEqual(results.start, '0') match = results[0] self.assertEqual(len(results), 1) self.assertEqual(match.id, '500') self.assertEqual(match.name, 'python test doc') self.assertEqual(match.popularity, 0) self.assertEqual(match.sku, '500') self.assertEqual(match.timestamp, DateTime('2008-02-29 16:11:46.998 GMT')) headers = response.responseHeader self.assertEqual(headers['status'], 0) self.assertEqual(headers['QTime'], 0) self.assertEqual(headers['params']['wt'], 'xml') self.assertEqual(headers['params']['indent'], 'on') self.assertEqual(headers['params']['rows'], '10') self.assertEqual(headers['params']['q'], 'id:[* TO *]')
def testParseSimpleSearchResults(self): search_response = getData("search_response.txt").decode("utf-8") response = SolrResponse(search_response.split("\n\n", 1)[1]) results = response.response # the result set is named 'response' self.assertEqual(results.numFound, "1") self.assertEqual(results.start, "0") self.assertEqual(response.actual_result_count, 1) match = results[0] self.assertEqual(len(results), 1) self.assertEqual(match.id, "500") self.assertEqual(match.name, "python test doc") self.assertEqual(match.popularity, 0) self.assertEqual(match.sku, "500") self.assertEqual( match.timestamp.ISO8601(), DateTime("2008-02-29 16:11:46.998 GMT").ISO8601() ) headers = response.responseHeader self.assertEqual(headers["status"], 0) self.assertEqual(headers["QTime"], 0) self.assertEqual(headers["params"]["wt"], "xml") self.assertEqual(headers["params"]["indent"], "on") self.assertEqual(headers["params"]["rows"], "10") self.assertEqual(headers["params"]["q"], "id:[* TO *]")
def test_suggestions(self): portal = self.layer['portal'] request = self.layer['request'] # Setup browser layers notify(BeforeTraverseEvent(portal, request)) request.form.update({'SearchableText': 'bidlung', }) view = getMultiAdapter((portal, request), name=u'search') view.solr_response = SolrResponse() view.solr_response.spellcheck = {} view.solr_response.spellcheck['suggestions'] = { 'bidlung': {'endOffset': 246, 'numFound': 5, 'origFreq': 1, 'startOffset': 239, 'suggestion': [{'freq': 2704, 'word': 'bildung'}, {'freq': 1, 'word': 'bidlungs'}, {'freq': 1, 'word': 'bidung'}, {'freq': 561, 'word': 'bildungs'}, {'freq': 233, 'word': 'bislang'}]}, 'platform': {'endOffset': 336, 'numFound': 5, 'origFreq': 9, 'startOffset': 328, 'suggestion': [{'freq': 557, 'word': 'plattform'}, {'freq': 2, 'word': 'platforma'}, {'freq': 2, 'word': 'platforme'}, {'freq': 2, 'word': 'platforms'}, {'freq': 7, 'word': 'plateforme'}]}, 'correctlySpelled': False, } suggestions = view.suggestions() self.assertEquals(suggestions[0][0], 'bildung') self.assertEquals(suggestions[0][1], '&SearchableText=bildung')
def sync(self, batch=1000): """Sync the Solr index with the portal catalog. Records contained in the catalog but not in Solr will be indexed and records not contained in the catalog will be removed. """ manager = queryUtility(ISolrConnectionManager) proc = SolrIndexProcessor(manager) conn = manager.getConnection() key = queryUtility(ISolrConnectionManager).getSchema().uniqueKey zodb_conn = self.context._p_jar catalog = getToolByName(self.context, 'portal_catalog') getIndex = catalog._catalog.getIndex modified_index = getIndex('modified') uid_index = getIndex(key) log = self.mklog() real = timer() # real time lap = timer() # real lap time (for intermediate commits) cpu = timer(clock) # cpu time # get Solr status query = '+%s:[* TO *]' % key response = conn.search(q=query, rows=MAX_ROWS, fl='%s modified' % key) # avoid creating DateTime instances simple_unmarshallers = unmarshallers.copy() simple_unmarshallers['date'] = parse_date_as_datetime flares = SolrResponse(response, simple_unmarshallers) response.close() solr_results = {} solr_uids = set() def _utc_convert(value): t_tup = value.utctimetuple() return ((( (t_tup[0] * 12 + t_tup[1]) * 31 + t_tup[2]) * 24 + t_tup[3]) * 60 + t_tup[4]) for flare in flares: uid = flare[key] solr_uids.add(uid) solr_results[uid] = _utc_convert(flare['modified']) # get catalog status cat_results = {} cat_uids = set() for uid, rid in uid_index._index.items(): cat_uids.add(uid) cat_results[uid] = rid # differences index = cat_uids.difference(solr_uids) solr_uids.difference_update(cat_uids) unindex = solr_uids processed = 0 flush = notimeout(lambda: conn.flush()) def checkPoint(): msg = 'intermediate commit (%d items processed, ' \ 'last batch in %s)...\n' % (processed, lap.next()) log(msg) logger.info(msg) flush() zodb_conn.cacheGC() cpi = checkpointIterator(checkPoint, batch) # Look up objects uid_rid_get = cat_results.get rid_path_get = catalog._catalog.paths.get catalog_traverse = catalog.unrestrictedTraverse def lookup(uid, rid=None, uid_rid_get=uid_rid_get, rid_path_get=rid_path_get, catalog_traverse=catalog_traverse): if rid is None: rid = uid_rid_get(uid) if not rid: return None if not isinstance(rid, int): rid = tuple(rid)[0] path = rid_path_get(rid) if not path: return None try: obj = catalog_traverse(path) except AttributeError: return None return obj log('processing %d "unindex" operations next...\n' % len(unindex)) op = notimeout(lambda uid: conn.delete(id=uid)) for uid in unindex: obj = lookup(uid) if obj is None: op(uid) processed += 1 cpi.next() else: log('not unindexing existing object %r.\n' % uid) log('processing %d "index" operations next...\n' % len(index)) op = notimeout(lambda obj: proc.index(obj)) for uid in index: obj = lookup(uid) if indexable(obj): op(obj) processed += 1 cpi.next() else: log('not indexing unindexable object %r.\n' % uid) if obj is not None: obj._p_deactivate() log('processing "reindex" operations next...\n') op = notimeout(lambda obj: proc.reindex(obj)) cat_mod_get = modified_index._unindex.get solr_mod_get = solr_results.get done = unindex.union(index) for uid, rid in cat_results.items(): if uid in done: continue if isinstance(rid, IITreeSet): rid = rid.keys()[0] if cat_mod_get(rid) != solr_mod_get(uid): obj = lookup(uid, rid=rid) if indexable(obj): op(obj) processed += 1 cpi.next() else: log('not reindexing unindexable object %r.\n' % uid) if obj is not None: obj._p_deactivate() conn.commit() log('solr index synced.\n') msg = 'processed %d object(s) in %s (%s cpu time).' msg = msg % (processed, real.next(), cpu.next()) log(msg) logger.info(msg)
def search(self, query, wt="xml", sow="true", lowercase_operator="true", default_operator="AND", **parameters): """ perform a search with the given querystring and parameters """ start = time() config = self.getConfig() manager = self.getManager() manager.setSearchTimeout() connection = manager.getConnection() if connection is None: raise SolrInactiveException parameters["wt"] = wt parameters["sow"] = sow # split on whitespace parameters["lowercaseOperators"] = lowercase_operator parameters["q.op"] = default_operator if "rows" not in parameters: parameters["rows"] = config.max_results or 10000000 # Check if rows param is 0 for backwards compatibility. Before # Solr 4 'rows = 0' meant that there is no limitation. Solr 4 # always expects a rows param > 0 though: # http://wiki.apache.org/solr/CommonQueryParameters#rows if parameters["rows"] == 0: parameters["rows"] = 10000000 logger.debug( 'falling back to "max_results" (%d) without a "rows" ' "parameter: %r (%r)", config.max_results, query, parameters, ) if getattr(config, "highlight_fields", None): if parameters.get("hl", "false") == "true" and "hl.fl" not in parameters: parameters["hl"] = "true" parameters["hl.fl"] = config.highlight_fields or [] parameters[ "hl.simple.pre"] = config.highlight_formatter_pre or " " parameters[ "hl.simple.post"] = config.highlight_formatter_post or " " parameters["hl.fragsize"] = (getattr( config, "highlight_fragsize", None) or 100) if "fl" not in parameters: if config.field_list: parameters["fl"] = " ".join(config.field_list) else: parameters["fl"] = "* score" if isinstance(query, dict): query = u" ".join([safe_unicode(val) for val in query.values()]) logger.debug("searching for %r (%r)", query, parameters) if "sort" in parameters: # issue warning for unknown sort indices index, order = parameters["sort"].split() schema = manager.getSchema() or {} field = schema.get(index, None) if field is None or not field.stored: logger.warning('sorting on non-stored attribute "%s"', index) response = connection.search(q=query, **parameters) results = SolrResponse(response) response.close() manager.setTimeout(None) elapsed = (time() - start) * 1000 slow = config.slow_query_threshold if slow and elapsed >= slow: logger.info( "slow query: %d/%d ms for %r (%r)", results.responseHeader["QTime"], elapsed, query, parameters, ) logger.debug("highlighting info: %s" % getattr(results, "highlighting", {})) return results
def cleanup(self, batch=1000): """remove entries from solr that don't have a corresponding Zope object or have a different UID than the real object""" manager = queryUtility(ISolrConnectionManager) proc = SolrIndexProcessor(manager) conn = manager.getConnection() log = self.mklog(use_std_log=True) log("cleaning up solr index...\n") key = manager.getSchema().uniqueKey start = 0 resp = SolrResponse(conn.search(q="*:*", rows=batch, start=start)) res = resp.results() log("%s items in solr catalog\n" % resp.response.numFound) deleted = 0 reindexed = 0 while len(res) > 0: for flare in res: try: ob = PloneFlare(flare).getObject() except Exception as err: log("Error getting object, removing: %s (%s)\n" % (flare["path_string"], err)) conn.delete(flare[key]) deleted += 1 continue if ob is None: log("Object not found, removing: %s\n" % (flare["path_string"])) conn.delete(flare[key]) deleted += 1 continue if not IUUIDAware.providedBy(ob): no_skipping_msg = ("Object %s of type %s does not " + "support uuids, skipping.\n") log(no_skipping_msg % ("/".join(ob.getPhysicalPath()), ob.meta_type)) continue uuid = IUUID(ob) if uuid != flare[key]: log("indexed under wrong UID, removing: %s\n" % flare["path_string"]) conn.delete(flare[key]) deleted += 1 realob_res = SolrResponse( conn.search(q="%s:%s" % (key, uuid))).results() if len(realob_res) == 0: log("no sane entry for last object, reindexing\n") data, missing = proc.getData(ob) prepareData(data) if not missing: boost = boost_values(ob, data) conn.add(boost_values=boost, **data) reindexed += 1 else: log(" missing data, cannot index.\n") log("handled batch of %d items, committing\n" % len(res)) conn.commit() start += batch resp = SolrResponse(conn.search(q="*:*", rows=batch, start=start)) res = resp.results() finished_msg = ("solr cleanup finished, %s item(s) removed, " + "%s item(s) reindexed\n") msg = finished_msg % (deleted, reindexed) log(msg) logger.info(msg)
def test5(self): SolrResponse(self.data)
def testParseQuirkyResponse(self): quirky_response = getData("quirky_response.txt").decode("utf-8") response = SolrResponse(quirky_response) results = response.response # the result set is named 'response' empty_uid = [r for r in results if r.UID == ""] self.assertEqual(empty_uid, [])
def results(self): xml_response = getData('quirky_response.txt') response = SolrResponse(xml_response) return response.response # the result set is named 'response'
def testParseQuirkyResponse(self): quirky_response = getData('quirky_response.txt') response = SolrResponse(quirky_response) results = response.response # the result set is named 'response' empty_uid = [r for r in results if r.UID == ''] self.assertEqual(empty_uid, [])