def test_search_fl(self): """ with fl we can select which fields solr returns for results """ solr_results = search(query={'*': '*'}) self.assertEqual(solr_results.hits, 3) for doc in solr_results.docs: self.assertIn('UID', list(doc.keys())) self.assertIn('Title', list(doc.keys())) solr_results = search(query={'*': '*'}, fl='UID') self.assertEqual(solr_results.hits, 3) for doc in solr_results.docs: self.assertIn('UID', list(doc.keys())) self.assertNotIn('Title', list(doc.keys())) solr_results = search(query={'*': '*'}, fl='UID Subject') self.assertEqual(solr_results.hits, 3) for doc in solr_results.docs: self.assertIn('UID', list(doc.keys())) self.assertNotIn('Title', list(doc.keys())) if not api.content.get(UID=doc['UID']).Subject(): self.assertNotIn('Subject', list(doc.keys())) else: self.assertIn('Subject', list(doc.keys()))
def test_maintenance_reindex_with_unwanted_types(self): self.reindex_view() solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 1) set_registry_record( 'enabled_types', ['Document', 'News Item'], interface=IRerSolrpushSettings, ) commit() api.content.transition(obj=self.news, transition='publish') commit() api.content.transition(obj=self.unpublished_doc, transition='publish') commit() solr_results = search(query={ '*': '*', 'b_size': 100000 }, fl='UID,portal_type') self.assertEqual(solr_results.hits, 3) set_registry_record('enabled_types', ['Document'], interface=IRerSolrpushSettings) self.reindex_view() solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') # News isn't removed because reindex view only reindex values from # Plone self.assertEqual(solr_results.hits, 3)
def test_push_to_solr(self): solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 0) # try to push an indexable and published content push_to_solr(self.published_doc) solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 1) self.assertEqual(solr_results.docs[0]['UID'], self.published_doc.UID()) # try to push an indexable and private content push_to_solr(self.unpublished_doc) solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 1) self.assertEqual(solr_results.docs[0]['UID'], self.published_doc.UID()) # try to push a non indexable published content push_to_solr(self.published_news) solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 1) self.assertEqual(solr_results.docs[0]['UID'], self.published_doc.UID()) # try to push a non indexable private content push_to_solr(self.unpublished_news) solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 1) self.assertEqual(solr_results.docs[0]['UID'], self.published_doc.UID())
def test_search_q(self): solr_results = search(query={'SearchableText': 'Document'}) self.assertEqual(solr_results.hits, 2) uids = [x['UID'] for x in solr_results.docs] self.assertIn(self.doc1.UID(), uids) self.assertIn(self.doc2.UID(), uids) self.assertNotIn(self.published_news.UID(), uids) self.assertNotIn(self.unpublished_doc.UID(), uids) self.assertNotIn(self.unpublished_news.UID(), uids) self.assertNotIn(self.event.UID(), uids) solr_results = search(query={'SearchableText': 'lorem ipsum'}) self.assertEqual(solr_results.hits, 2) uids = [x['UID'] for x in solr_results.docs] self.assertIn(self.doc1.UID(), uids) self.assertIn(self.doc2.UID(), uids) solr_results = search(query={'SearchableText': 'lorem amet'}) self.assertEqual(solr_results.hits, 1) uids = [x['UID'] for x in solr_results.docs] self.assertNotIn(self.doc1.UID(), uids) self.assertIn(self.doc2.UID(), uids) solr_results = search(query={'SearchableText': 'lorem OR amet'}) self.assertEqual(solr_results.hits, 2) uids = [x['UID'] for x in solr_results.docs] self.assertIn(self.doc1.UID(), uids) self.assertIn(self.doc2.UID(), uids)
def cleanupSolrIndex(self): if not self.solr_utility: return if not is_solr_active(): logger.warning("Trying to cleanup but solr is not set as active") return elapsed = timer() pc = api.portal.get_tool(name="portal_catalog") if self.solr_utility.enabled_types: brains_to_sync = api.content.find( portal_type=self.solr_utility.enabled_types) else: pc = api.portal.get_tool(name="portal_catalog") brains_to_sync = pc() good_uids = [x.UID for x in brains_to_sync] solr_results = search(query={"*": "*", "b_size": 100000}, fl="UID") uids_to_cleanup = [ x["UID"] for x in solr_results.docs if x["UID"] not in good_uids ] status = self.setupAnnotations(items_len=len(uids_to_cleanup), message="Cleanup items on SOLR") logger.info("##### SOLR CLEANUP STARTED #####") logger.info(" - First of all, cleanup items on SOLR") for uid in uids_to_cleanup: remove_from_solr(uid) status["counter"] = status["counter"] + 1 commit() status["in_progress"] = False elapsed_time = next(elapsed) logger.info( "SOLR indexes cleanup completed in {}".format(elapsed_time))
def test_reset_solr(self): solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 0) api.content.transition(obj=self.unpublished_doc, transition='publish') commit() # try to push an indexable and published content push_to_solr(self.published_doc) push_to_solr(self.unpublished_doc) solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 2) self.assertEqual(solr_results.docs[0]['UID'], self.published_doc.UID()) # cleanup catalog reset_solr() solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 0)
def test_maintenance_sync(self): api.content.transition(obj=self.news, transition='publish') api.content.transition(obj=self.unpublished_doc, transition='publish') commit() set_registry_record( 'enabled_types', ['Document', 'News Item'], interface=IRerSolrpushSettings, ) self.reindex_view() solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 3) set_registry_record('enabled_types', ['Document'], interface=IRerSolrpushSettings) self.sync_view() solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 2)
def test_update_content(self): push_to_solr(self.published_doc) solr_results = search( query={'*': '*', 'b_size': 100000}, fl='UID Description' ) self.assertEqual(solr_results.hits, 1) self.assertEqual(solr_results.docs[0]['UID'], self.published_doc.UID()) self.assertNotIn('Description', solr_results.docs[0]) self.published_doc.description = 'foo description' push_to_solr(self.published_doc) solr_results = search( query={'*': '*', 'b_size': 100000}, fl='UID Description' ) self.assertEqual(solr_results.hits, 1) self.assertEqual(solr_results.docs[0]['UID'], self.published_doc.UID()) self.assertIn('Description', solr_results.docs[0]) self.assertEqual( solr_results.docs[0]['Description'], 'foo description' )
def test_search_all(self): solr_results = search(query={'*': '*'}) # only published and indexable contents are on solr self.assertEqual(solr_results.hits, 3) uids = [x['UID'] for x in solr_results.docs] self.assertIn(self.doc1.UID(), uids) self.assertIn(self.doc2.UID(), uids) self.assertIn(self.published_news.UID(), uids) self.assertNotIn(self.unpublished_doc.UID(), uids) self.assertNotIn(self.unpublished_news.UID(), uids) self.assertNotIn(self.event.UID(), uids)
def test_search_sort_on(self): """ """ # update modification date self.doc2.reindexObject() commit() solr_results = search( query={'portal_type': 'Document', 'sort_on': 'modified'} ) self.assertEqual(solr_results.hits, 2) self.assertEqual(solr_results.docs[0]['UID'], self.doc1.UID()) self.assertEqual(solr_results.docs[1]['UID'], self.doc2.UID()) solr_results = search( query={ 'portal_type': 'Document', 'sort_on': 'modified', 'sort_order': 'reverse', } ) self.assertEqual(solr_results.hits, 2) self.assertEqual(solr_results.docs[0]['UID'], self.doc2.UID()) self.assertEqual(solr_results.docs[1]['UID'], self.doc1.UID())
def test_maintenance_reindex(self): solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 0) self.reindex_view() solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 1) self.assertEqual(solr_results.docs[0]['UID'], self.published_doc.UID()) # now, disable solr indexer and publish other two items set_registry_record('active', False, interface=IRerSolrpushSettings) api.content.transition(obj=self.news, transition='publish') api.content.transition(obj=self.unpublished_doc, transition='publish') commit() set_registry_record('active', True, interface=IRerSolrpushSettings) self.reindex_view() solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 2) self.assertEqual(solr_results.docs[0]['UID'], self.published_doc.UID()) # news items are not enabled self.assertEqual(solr_results.docs[1]['UID'], self.unpublished_doc.UID())
def terms(self): solr_results = search( query={'*': '*'}, fl='UID', facets=True, facet_fields='site_name' ) if isinstance(solr_results, dict) and solr_results.get('error', False): return [] facets = solr_results.facets['facet_fields'].get('site_name', []) if not facets: return [] terms = [] for facet in facets: for key in facet.keys(): terms.append(SimpleTerm(value=key, token=key.encode('utf-8'), title=key)) return terms
def test_items_are_unindexed_when_set_false(self): self.document.showinsearch = False self.document.reindexObject() commit() solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 1)
def test_items_are_indexed_by_default(self): solr_results = search(query={'*': '*', 'b_size': 100000}, fl='UID') self.assertEqual(solr_results.hits, 2)
def test_search_fq(self): # same result if we search by portal_type solr_results = search(query={'portal_type': 'Document'}) self.assertEqual(solr_results.hits, 2) uids = [x['UID'] for x in solr_results.docs] self.assertIn(self.doc1.UID(), uids) self.assertIn(self.doc2.UID(), uids) self.assertNotIn(self.published_news.UID(), uids) self.assertNotIn(self.unpublished_doc.UID(), uids) self.assertNotIn(self.unpublished_news.UID(), uids) self.assertNotIn(self.event.UID(), uids) solr_results = search(query={'portal_type': 'News Item'}) self.assertEqual(solr_results.hits, 1) uids = [x['UID'] for x in solr_results.docs] self.assertNotIn(self.doc1.UID(), uids) self.assertNotIn(self.doc2.UID(), uids) self.assertIn(self.published_news.UID(), uids) self.assertNotIn(self.unpublished_doc.UID(), uids) self.assertNotIn(self.unpublished_news.UID(), uids) self.assertNotIn(self.event.UID(), uids) solr_results = search(query={'Subject': 'foo'}) self.assertEqual(solr_results.hits, 2) uids = [x['UID'] for x in solr_results.docs] self.assertIn(self.doc1.UID(), uids) self.assertNotIn(self.doc2.UID(), uids) self.assertIn(self.published_news.UID(), uids) self.assertNotIn(self.unpublished_doc.UID(), uids) self.assertNotIn(self.unpublished_news.UID(), uids) self.assertNotIn(self.event.UID(), uids) solr_results = search(query={'Subject': 'bar'}) self.assertEqual(solr_results.hits, 1) uids = [x['UID'] for x in solr_results.docs] self.assertIn(self.doc1.UID(), uids) self.assertNotIn(self.doc2.UID(), uids) self.assertNotIn(self.published_news.UID(), uids) self.assertNotIn(self.unpublished_doc.UID(), uids) self.assertNotIn(self.unpublished_news.UID(), uids) self.assertNotIn(self.event.UID(), uids) solr_results = search(query={'Subject': ['foo', 'bar']}) self.assertEqual(solr_results.hits, 2) uids = [x['UID'] for x in solr_results.docs] self.assertIn(self.doc1.UID(), uids) self.assertNotIn(self.doc2.UID(), uids) self.assertIn(self.published_news.UID(), uids) self.assertNotIn(self.unpublished_doc.UID(), uids) self.assertNotIn(self.unpublished_news.UID(), uids) self.assertNotIn(self.event.UID(), uids) solr_results = search(query={'Subject': ['news category']}) self.assertEqual(solr_results.hits, 1) uids = [x['UID'] for x in solr_results.docs] self.assertNotIn(self.doc1.UID(), uids) self.assertNotIn(self.doc2.UID(), uids) self.assertIn(self.published_news.UID(), uids) self.assertNotIn(self.unpublished_doc.UID(), uids) self.assertNotIn(self.unpublished_news.UID(), uids) self.assertNotIn(self.event.UID(), uids)