def convert(self): if self.request.method == 'POST': authenticator = getMultiAdapter((self.context, self.request), name=u'authenticator') if not authenticator.verify(): raise Unauthorized es = ElasticSearchCatalog(self.context) es.convertToElastic() site = aq_parent(self.context) self.request.response.redirect('%s/@@elastic-controlpanel' % ( site.absolute_url()))
class BaseTest(unittest.TestCase): layer = ElasticSearch_INTEGRATION_TESTING def setUp(self): super(BaseTest, self).setUp() self.portal = self.layer['portal'] self.request = self.layer['request'] self.request.environ['testing'] = True self.app = self.layer['app'] registry = getUtility(IRegistry) settings = registry.forInterface(IElasticSettings) settings.enabled = True self.catalog = getToolByName(self.portal, 'portal_catalog') self.catalog._elasticcustomindex = 'plone-test-index' self.es = ElasticSearchCatalog(self.catalog) self.es.convertToElastic() self.catalog.manage_catalogRebuild() # need to commit here so all tests start with a baseline # of elastic enabled self.commit() def commit(self): transaction.commit() # for some reason, commit() resets the site setSite(self.portal) def clearTransactionEntries(self): _hook = hook.getHook(self.es) _hook.remove = [] _hook.index = {} def tearDown(self): super(BaseTest, self).tearDown() self.es.connection.indices.delete(index=self.es.index_name) self.clearTransactionEntries()
class ElasticControlPanelFormWrapper(ControlPanelFormWrapper): index = ViewPageTemplateFile('controlpanel_layout.pt') def __init__(self, *args, **kwargs): super(ElasticControlPanelFormWrapper, self).__init__(*args, **kwargs) self.portal_catalog = getToolByName(self.context, 'portal_catalog') self.es = ElasticSearchCatalog(self.portal_catalog) @property def connection_status(self): try: return self.es.connection.status()['ok'] except AttributeError: try: health_status = self.es.connection.cluster.health()['status'] return health_status in ('green', 'yellow') except Exception: return False except Exception: return False @property def es_info(self): try: info = self.es.connection.info() try: stats = self.es.connection.indices.stats( index=self.es.real_index_name )['indices'][self.es.real_index_name]['primaries'] size_in_mb = stats['store']['size_in_bytes'] / 1024.0 / 1024.0 return [ ('Cluster Name', info.get('name')), ('Elastic Search Version', info['version']['number']), ('Number of docs', stats['docs']['count']), ('Deleted docs', stats['docs']['deleted']), ('Size', str(int(math.ceil(size_in_mb))) + 'MB'), ('Query Count', stats['search']['query_total']) ] except KeyError: return [ ('Cluster Name', info.get('name')), ('Elastic Search Version', info['version']['number']) ] except Exception: logger.warning('Error getting stats', exc_info=True) return [] @property def active(self): return self.es.get_setting('enabled')
def setUp(self): super(BaseTest, self).setUp() self.portal = self.layer['portal'] self.request = self.layer['request'] self.request.environ['testing'] = True self.app = self.layer['app'] registry = getUtility(IRegistry) settings = registry.forInterface(IElasticSettings) settings.enabled = True self.catalog = getToolByName(self.portal, 'portal_catalog') self.catalog._elasticcustomindex = 'plone-test-index' self.es = ElasticSearchCatalog(self.catalog) self.es.convertToElastic() self.catalog.manage_catalogRebuild() # need to commit here so all tests start with a baseline # of elastic enabled self.commit()
class ElasticControlPanelFormWrapper(ControlPanelFormWrapper): index = ViewPageTemplateFile("controlpanel_layout.pt") def __init__(self, *args, **kwargs): super(ElasticControlPanelFormWrapper, self).__init__(*args, **kwargs) self.portal_catalog = getToolByName(self.context, "portal_catalog") self.es = ElasticSearchCatalog(self.portal_catalog) @property def connection_status(self): try: return self.es.connection.status()["ok"] except AttributeError: try: return self.es.connection.cluster.health()["status"] in ("green", "yellow") except: return False except: return False @property def es_info(self): try: info = self.es.connection.info() stats = self.es.connection.indices.stats(index=self.es.real_index_name)["indices"][self.es.real_index_name][ "total" ] return [ ("Cluster Name", info.get("name")), ("Elastic Search Version", info["version"]["number"]), ("Number of docs", stats["docs"]["count"]), ("Deleted docs", stats["docs"]["deleted"]), ("Size", str(int(math.ceil(stats["store"]["size_in_bytes"] / 1024.0 / 1024.0))) + "MB"), ] except Exception: return [] @property def active(self): return self.es.get_setting("enabled")
def unrestrictedSearchResults(self, REQUEST=None, **kw): es = ElasticSearchCatalog(self) return es.searchResults(REQUEST, check_perms=False, **kw)
def catalog_object(self, object, uid=None, idxs=[], update_metadata=1, pghandler=None): es = ElasticSearchCatalog(self) return es.catalog_object(object, uid, idxs, update_metadata, pghandler)
def uncatalog_object(self, uid, obj=None, *args, **kwargs): es = ElasticSearchCatalog(self) return es.uncatalog_object(uid, obj, *args, **kwargs)
def manage_catalogClear(self, *args, **kwargs): """ need to be publishable """ es = ElasticSearchCatalog(self) return es.manage_catalogClear(*args, **kwargs)
def safeSearchResults(self, REQUEST=None, **kw): es = ElasticSearchCatalog(self) return es.searchResults(REQUEST, check_perms=True, **kw)
def index_batch(remove, index, positions, es=None): if es is None: from collective.elasticsearch.es import ElasticSearchCatalog es = ElasticSearchCatalog(api.portal.get_tool('portal_catalog')) setSite(api.portal.get()) conn = es.connection bulk_size = es.get_setting('bulk_size', 50) if len(remove) > 0: bulk_data = [] for uid in remove: bulk_data.append({ 'delete': { '_index': es.index_name, '_type': es.doc_type, '_id': uid } }) es.connection.bulk( index=es.index_name, doc_type=es.doc_type, body=bulk_data) if len(index) > 0: if type(index) in (list, tuple, set): # does not contain objects, must be async, convert to dict index = dict([(k, None) for k in index]) bulk_data = [] for uid, obj in index.items(): if obj is None: obj = uuidToObject(uid) if obj is None: continue bulk_data.extend([{ 'index': { '_index': es.index_name, '_type': es.doc_type, '_id': uid } }, get_index_data(obj, es)]) if len(bulk_data) % bulk_size == 0: conn.bulk( index=es.index_name, doc_type=es.doc_type, body=bulk_data) bulk_data = [] if len(bulk_data) > 0: conn.bulk( index=es.index_name, doc_type=es.doc_type, body=bulk_data) if len(positions) > 0: bulk_data = [] index = getIndex(es.catalogtool._catalog, 'getObjPositionInParent') for uid, ids in positions.items(): if uid == '/': parent = getSite() else: parent = uuidToObject(uid) if parent is None: logger.warn('could not find object to index positions') continue for _id in ids: ob = parent[_id] wrapped_object = get_wrapped_object(ob, es) try: value = index.get_value(wrapped_object) except Exception: continue bulk_data.extend([{ 'update': { '_index': es.index_name, '_type': es.doc_type, '_id': IUUID(ob) } }, { 'doc': { 'getObjPositionInParent': value } }]) if len(bulk_data) % bulk_size == 0: conn.bulk( index=es.index_name, doc_type=es.doc_type, body=bulk_data) bulk_data = [] if len(bulk_data) > 0: conn.bulk( index=es.index_name, doc_type=es.doc_type, body=bulk_data)
def __init__(self, *args, **kwargs): super(ElasticControlPanelFormWrapper, self).__init__(*args, **kwargs) self.portal_catalog = getToolByName(self.context, 'portal_catalog') self.es = ElasticSearchCatalog(self.portal_catalog)
def index_batch(remove, index, positions, es=None): if es is None: from collective.elasticsearch.es import ElasticSearchCatalog es = ElasticSearchCatalog(api.portal.get_tool('portal_catalog')) conn = es.connection bulk_size = es.get_setting('bulk_size', 50) if len(remove) > 0: bulk_data = [] for uid in remove: bulk_data.append({ 'delete': { '_index': es.index_name, '_type': es.doc_type, '_id': uid } }) es.connection.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) if len(index) > 0: if type(index) in (list, tuple, set): # does not contain objects, must be async, convert to dict index = dict([(k, None) for k in index]) bulk_data = [] for uid, obj in index.items(): if obj is None: obj = uuidToObject(uid) if obj is None: continue bulk_data.extend([{ 'index': { '_index': es.index_name, '_type': es.doc_type, '_id': uid } }, get_index_data(uid, obj, es)]) if len(bulk_data) % bulk_size == 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) bulk_data = [] if len(bulk_data) > 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) if len(positions) > 0: bulk_data = [] index = getIndex(es.catalogtool._catalog, 'getObjPositionInParent') for uid, ids in positions.items(): if uid == '/': parent = getSite() else: parent = uuidToObject(uid) if parent is None: logger.warn('could not find object to index positions') continue for _id in ids: ob = parent[_id] wrapped_object = get_wrapped_object(ob, es) try: value = index.get_value(wrapped_object) except: continue bulk_data.extend([{ 'update': { '_index': es.index_name, '_type': es.doc_type, '_id': IUUID(ob) } }, { 'doc': { 'getObjPositionInParent': value } }]) if len(bulk_data) % bulk_size == 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) bulk_data = [] if len(bulk_data) > 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data)
def get_es_catalog(self): """ """ return ElasticSearchCatalog(api.portal.get_tool("portal_catalog"))
def check_site(site): # XXX will store when last check was so we always only look back # to previous check time setSite(site) catalog = api.portal.get_tool('portal_catalog') es = ElasticSearchCatalog(catalog) if not es.enabled: return index_name = audit.get_index_name() es = ESConnectionFactoryFactory()() sannotations = IAnnotations(site) last_checked = sannotations.get(LAST_CHECKED_KEY) if last_checked is None: last_checked = DateTime() - 30 filters = [{ 'term': { 'type': 'workflow' } }, { 'range': { 'date': { 'gt': last_checked.ISO8601() } } }] if len(filters) > 1: qfilter = {'and': filters} else: qfilter = filters[0] query = { "query": { 'filtered': { 'filter': qfilter, 'query': { 'match_all': {} } } } } results = es.search(index=index_name, doc_type=audit.es_doc_type, body=query, sort='date:desc', size=1000) hits = results['hits']['hits'] workflow = api.portal.get_tool('portal_workflow') forced = [] checked = [] for hit in hits: hit = hit['_source'] if hit['object'] in checked: continue try: ob = uuidToObject(hit['object']) checked.append(hit['object']) except Exception: continue try: review_history = workflow.getInfoFor(ob, 'review_history') if not review_history: continue for r in reversed(review_history): if (not r['action'] or r['review_state'] != 'published' or not r.get('comments', '').startswith('OVERRIDE:')): continue if r['time'] < last_checked: # just quit now, we're getting to older history that we don't care about break forced.append({'ob': ob, 'history_entry': r}) except WorkflowException: continue if len(forced) > 0: # sent out email to admins site_url = site.absolute_url() registry = getUtility(IRegistry) public_url = registry.get('plone.public_url') if not public_url: public_url = site_url email_html = EMAIL_BODY + '<ul>' for item in forced: ob = item['ob'] wf_entry = item['history_entry'] try: user = api.user.get(wf_entry['actor']) user_name = user.getProperty('fullname') or user.getId() except Exception: user_name = wf_entry['actor'] email_html += EMAIL_BODY_ITEM.format( content_url=ob.absolute_url().replace(site_url, public_url), content_title=ob.Title(), user_name=user_name, comments=wf_entry.get('comments', '')) email_html += '</ul>' email_subject = "Forced content publication update(Site: %s)" % ( api.portal.get_registry_record('plone.site_title')) for user in api.user.get_users(): user_roles = api.user.get_roles(user=user) email = user.getProperty('email') if (('Manager' not in user_roles and 'Site Administrator' not in user_roles) or not email): continue utils.send_email(email, email_subject, html=email_html) site._p_jar.sync() sannotations[LAST_CHECKED_KEY] = DateTime() transaction.commit()
def index_batch(remove, index, positions, es=None): # noqa: C901 if es is None: from collective.elasticsearch.es import ElasticSearchCatalog es = ElasticSearchCatalog(api.portal.get_tool('portal_catalog')) setSite(api.portal.get()) conn = es.connection bulk_size = es.get_setting('bulk_size', 50) if len(remove) > 0: bulk_data = [] for uid in remove: bulk_data.append({ 'delete': { '_index': es.index_name, '_type': es.doc_type, '_id': uid } }) result = es.connection.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) if "errors" in result and result["errors"] is True: logger.error("Error in bulk indexing removal: %s" % result) if len(index) > 0: if type(index) in (list, tuple, set): # does not contain objects, must be async, convert to dict index = dict([(k, None) for k in index]) bulk_data = [] for uid, obj in index.items(): # If content has been moved (ie by a contentrule) then the object # passed here is the original object, not the moved one. # So if there is a uuid, we use this to get the correct object. # See https://github.com/collective/collective.elasticsearch/issues/65 # noqa if uid is not None: obj = uuidToObject(uid) if obj is None: obj = uuidToObject(uid) if obj is None: continue bulk_data.extend([{ 'index': { '_index': es.index_name, '_type': es.doc_type, '_id': uid } }, get_index_data(obj, es)]) if len(bulk_data) % bulk_size == 0: result = conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) if "errors" in result and result["errors"] is True: logger.error("Error in bulk indexing: %s" % result) bulk_data = [] if len(bulk_data) > 0: result = conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) if "errors" in result and result["errors"] is True: logger.error("Error in bulk indexing: %s" % result) if len(positions) > 0: bulk_data = [] index = getIndex(es.catalogtool._catalog, 'getObjPositionInParent') for uid, ids in positions.items(): if uid == '/': parent = getSite() else: parent = uuidToObject(uid) if parent is None: logger.warn('could not find object to index positions') continue for _id in ids: ob = parent[_id] wrapped_object = get_wrapped_object(ob, es) try: value = index.get_value(wrapped_object) except Exception: continue bulk_data.extend([{ 'update': { '_index': es.index_name, '_type': es.doc_type, '_id': IUUID(ob) } }, { 'doc': { 'getObjPositionInParent': value } }]) if len(bulk_data) % bulk_size == 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) bulk_data = [] if len(bulk_data) > 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data)
class TestES(unittest.TestCase): layer = CASTLE_PLONE_INTEGRATION_TESTING def setUp(self): self.portal = self.layer['portal'] self.request = self.layer['request'] login(self.portal, TEST_USER_NAME) setRoles(self.portal, TEST_USER_ID, ('Member', 'Manager')) transaction.begin() self.folder = api.content.create(type='Folder', id='esfolder1', container=self.portal, title='Foobar folder') self.folder2 = api.content.create(type='Folder', id='esfolder2', container=self.folder, title='Foobar subfolder') self.esdoc1 = api.content.create(type='Document', id='esdoc1', container=self.folder, title='Foobar one') self.esdoc2 = api.content.create(type='Document', id='esdoc2', container=self.folder, subject=('foobar', ), title='Foobar two') self.esdoc3 = api.content.create(type='Document', id='esdoc3', container=self.folder, title='Foobar three') self.esdoc4 = api.content.create(type='Document', id='esdoc4', container=self.folder, title='Foobar four') self.esdoc5 = api.content.create(type='Document', id='esdoc5', container=self.folder2, title='Foobar five') self.esdoc6 = api.content.create(type='Document', id='esdoc6', container=self.folder, title='Foobar six', exclude_from_search=True) ann = IAnnotations(self.esdoc2) ann[COUNT_ANNOTATION_KEY] = { 'twitter_matomo': 5, 'facebook': 5, } for item in [ self.folder, self.esdoc1, self.esdoc2, self.esdoc3, self.esdoc5 ]: api.content.transition(obj=item, to_state='published') item.reindexObject() self._es_update() transaction.commit() url = 'http://{}:9200/plone-test-index/_flush'.format(host) requests.post(url) def _es_update(self): registry = getUtility(IRegistry) settings = registry.forInterface(IElasticSettings) settings.enabled = True settings.sniffer_timeout = 1.0 self.catalog = getToolByName(self.portal, 'portal_catalog') self.catalog._elasticcustomindex = 'plone-test-index' self.es = ElasticSearchCatalog(self.catalog) self.es.recreateCatalog() self.catalog.manage_catalogRebuild() def tearDown(self): transaction.begin() api.content.delete(self.portal.esfolder1) transaction.commit() def _test_ajax_search_rank_social(self): self.request.form.update({ 'SearchableText': 'Foobar', 'portal_type': 'Document' }) view = SearchAjax(self.portal, self.request) result = json.loads(view()) self.assertEquals(result['count'], 3) self.assertEquals(result['results'][0]['path'], '/esfolder1/esdoc2') def test_ajax_search_pt(self): self.request.form.update({ 'SearchableText': 'Foobar', 'portal_type': 'Folder' }) view = SearchAjax(self.portal, self.request) result = json.loads(view()) self.assertEquals(result['count'], 1) self.assertEquals(result['results'][0]['path'], '/esfolder1') def test_ajax_search_subject(self): self.request.form.update({ 'SearchableText': 'Foobar', 'Subject': 'foobar' }) view = SearchAjax(self.portal, self.request) result = json.loads(view()) self.assertEquals(result['count'], 1) self.assertEquals(result['results'][0]['path'], '/esfolder1/esdoc2') def test_es_querylisting_unicode_issue(self): tile = get_tile(self.request, self.portal, 'castle.cms.querylisting', {}) # should not cause errors... self.request.form.update({'Title': 'ma\xf1on'}) self.assertTrue(tile.filter_pattern_config != '{}') tile() def test_ajax_search_with_private_parents(self): self.request.form.update({ 'SearchableText': 'Foobar', # 'Subject': 'foobar' }) view_1 = SearchAjax(self.portal, self.request) result_1 = json.loads(view_1()) self.assertEqual(result_1['count'], 4) api.portal.set_registry_record( 'plone.allow_public_in_private_container', True) view_2 = SearchAjax(self.portal, self.request) result_2 = json.loads(view_2()) self.assertEqual(result_2['count'], 5)
def options(self): search_types = [{ 'id': 'images', 'label': 'Image', 'query': { 'portal_type': 'Image' } }, { 'id': 'page', 'label': 'Page', 'query': { 'portal_type': ['Document', 'Folder'] } }] ptypes = api.portal.get_tool('portal_types') allow_anyway = ['Audio'] for type_id in ptypes.objectIds(): if type_id in ('Link', 'Document', 'Folder'): continue _type = ptypes[type_id] if not _type.global_allow and type_id not in allow_anyway: continue search_types.append({ 'id': type_id.lower(), 'label': _type.title, 'query': { 'portal_type': type_id } }) # search_types.append({ # 'id': 'audio', # 'label': 'Audio', # 'query': { # 'portal_type': 'Audio' # } # }) # search_types.sort(key=lambda type: type['label']) additional_sites = [] registry = getUtility(IRegistry) settings = registry.forInterface(ICrawlerConfiguration, prefix='castle') if settings.crawler_active and settings.crawler_site_maps: es = ElasticSearchCatalog(api.portal.get_tool('portal_catalog')) if es.enabled: query = { "size": 0, "aggregations": { "totals": { "terms": { "field": "domain" } } } } try: result = es.connection.search(index=es.index_name, body=query) for res in result['aggregations']['totals']['buckets']: site_name = res.get('key') if '.' not in site_name or 'amazon' in site_name: continue additional_sites.append(site_name) except TransportError: pass parsed = urlparse(get_public_url()) return json.dumps({ 'searchTypes': sorted(search_types, key=lambda st: st['label']), 'additionalSites': sorted(additional_sites), 'currentSiteLabel': parsed.netloc, 'searchHelpText': api.portal.get_registry_record('castle.search_page_help_text', None), })