예제 #1
0
    def __call__(self, dquery):
        filters = []
        catalog = self.catalogtool._catalog
        idxs = catalog.indexes.keys()
        query = {'match_all': {}}
        for key, value in dquery.items():
            if key not in idxs and key not in ('SearchableText', 'Title', 'Description'):
                continue

            index = getIndex(catalog, key)
            qq = None
            if index is None and key in ('SearchableText', 'Title', 'Description'):
                # deleted index for plone performance but still need on ES
                index = EZCTextIndex(catalog, key)

            qq = index.get_query(key, value)
            if qq is None:
                continue

            if index is not None and index.filter_query:
                filters.append(qq)
            else:
                query = qq
        if len(filters) == 0:
            return query
        else:
            return {
                'filtered': {
                    'filter': {
                        'and': filters
                    },
                    'query': query
                }
            }
예제 #2
0
    def __call__(self):
        properties = self._default_mapping.copy()
        for name in self.catalog.indexes.keys():
            index = getIndex(self.catalog, name)
            if index is not None:
                # prevent create default mapping analyzers
                if name not in properties:
                    properties[name] = index.create_mapping(name)
            else:
                raise Exception('Can not locate index for %s' % (
                    name))

        conn = self.es.connection
        index_name = self.es.index_name
        if conn.indices.exists(index_name):
            # created BEFORE we started creating this as aliases to versions,
            # we can't go anywhere from here beside try updating...
            pass
        else:
            if not self.es.index_version:
                # need to initialize version value
                self.es.bump_index_version()
            index_name_v = '%s_%i' % (index_name, self.es.index_version)
            if not conn.indices.exists(index_name_v):
                conn.indices.create(
                    index_name_v,
                    body=self.get_index_creation_body())
            if not conn.indices.exists_alias(name=index_name):
                conn.indices.put_alias(index=index_name_v, name=index_name)

        for key in properties:
            if key in self._search_attributes:
                properties[key]['store'] = True

        return {'properties': properties}
예제 #3
0
    def convertToElastic(self):
        setattr(self.catalogtool, CONVERTED_ATTR, True)
        self.catalogtool._p_changed = True
        properties = {}
        for name in self.catalog.indexes.keys():
            index = getIndex(self.catalog, name)
            if index is not None:
                properties[name] = index.create_mapping(name)
            else:
                raise Exception("Can not locate index for %s" % (name))

        # XXX then add an index specifically to hold metadata
        # We don't store any of the other index data
        # this will be json encoded
        properties['_metadata'] = {
            'type': 'string',
            'index': 'not_analyzed',
            'store': True
        }

        conn = self.conn
        try:
            conn.create_index(self.catalogsid)
        except IndexAlreadyExistsException:
            pass

        mapping = {'properties': properties}
        conn.indices.put_mapping(doc_type=self.catalogtype,
                                 mapping=mapping,
                                 indices=[self.catalogsid])
        conn.indices.put_mapping(doc_type=self.trns_catalogtype,
                                 mapping=self.trns_mapping,
                                 indices=[self.catalogsid])
예제 #4
0
    def __call__(self, dquery):
        filters = []
        catalog = self.catalogtool._catalog
        idxs = catalog.indexes.keys()
        query = MatchAllQuery()
        for key, value in dquery.items():
            if key not in idxs:
                continue
            index = getIndex(catalog, key)
            if index is None:
                continue
            qq = index.get_query(key, value)
            if qq is None:
                continue
            if type(qq) == tuple:
                qq, is_query = qq
            else:
                is_query = False
            if is_query:
                query = qq
            else:
                filters.append(qq)
        if len(filters) != 0:
            query = FilteredQuery(query, ANDFilter(filters))

        if 'SearchableText' in dquery:
            hl = HighLighter(pre_tags=['<b>'], post_tags=['</b>'])
            hl.add_field('SearchableText')
            return Search(query, highlight=hl)
        else:
            return query
    def __call__(self):
        properties = self._default_mapping.copy()
        for name in self.catalog.indexes.keys():
            index = getIndex(self.catalog, name)
            if index is not None:
                properties[name] = index.create_mapping(name)
            else:
                raise Exception('Can not locate index for %s' % (
                    name))

        conn = self.es.connection
        index_name = self.es.index_name
        if conn.indices.exists(index_name):
            # created BEFORE we started creating this as aliases to versions,
            # we can't go anywhere from here beside try updating...
            pass
        else:
            if not self.es.index_version:
                # need to initialize version value
                self.es.bump_index_version()
            index_name_v = '%s_%i' % (index_name, self.es.index_version)
            if not conn.indices.exists(index_name_v):
                conn.indices.create(
                    index_name_v,
                    body=self.get_index_creation_body())
            if not conn.indices.exists_alias(name=index_name):
                conn.indices.put_alias(index=index_name_v, name=index_name)

        for key in properties:
            if key in self._search_attributes:
                properties[key]['store'] = True

        return {'properties': properties}
예제 #6
0
 def __call__(self, dquery):
     filters = []
     catalog = self.catalogtool._catalog
     idxs = catalog.indexes.keys()
     query = MatchAllQuery()
     for key, value in dquery.items():
         if key not in idxs:
             continue
         index = getIndex(catalog, key)
         if index is None:
             continue
         qq = index.get_query(key, value)
         if qq is None:
             continue
         if type(qq) == tuple:
             qq, is_query = qq
         else:
             is_query = False
         if is_query:
             query = qq
         else:
             filters.append(qq)
     if len(filters) == 0:
         return query
     else:
         return FilteredQuery(query, ANDFilter(filters))
예제 #7
0
 def __call__(self, dquery):
     filters = []
     catalog = self.catalogtool._catalog
     idxs = catalog.indexes.keys()
     query = MatchAllQuery()
     for key, value in dquery.items():
         if key not in idxs:
             continue
         index = getIndex(catalog, key)
         if index is None:
             continue
         qq = index.get_query(key, value)
         if qq is None:
             continue
         if type(qq) == tuple:
             qq, is_query = qq
         else:
             is_query = False
         if is_query:
             query = qq
         else:
             filters.append(qq)
     if len(filters) == 0:
         return query
     else:
         return FilteredQuery(query, ANDFilter(filters))
예제 #8
0
def get_index_data(obj, es):  # noqa: C901
    catalog = es.catalogtool._catalog

    wrapped_object = get_wrapped_object(obj, es)
    index_data = {}
    for index_name in catalog.indexes.keys():
        index = getIndex(catalog, index_name)
        if index is not None:
            try:
                value = index.get_value(wrapped_object)
            except Exception:
                logger.error('Error indexing value: %s: %s\n%s' %
                             ('/'.join(obj.getPhysicalPath()), index_name,
                              traceback.format_exc()))
                value = None
            if value in (None, 'None'):
                # yes, we'll index null data...
                value = None

            # Ignore errors in converting to unicode, so json.dumps
            # does not barf when we're trying to send data to ES.
            if six.PY2:
                if isinstance(value, str):
                    value = six.text_type(value, 'utf-8', 'ignore')
            else:
                if isinstance(value, bytes):
                    value = value.decode('utf-8', 'ignore')

            index_data[index_name] = value

    # in case these indexes are deleted
    # (to increase performance and improve ram usage)
    for name in getESOnlyIndexes():
        if name in index_data:
            continue
        indexer = queryMultiAdapter((obj, es.catalogtool), IIndexer, name=name)
        if indexer is not None:
            try:
                val = indexer()
                if six.PY2:
                    if isinstance(value, str):
                        value = six.text_type(value, 'utf-8', 'ignore')
                else:
                    if isinstance(value, bytes):
                        value = value.decode('utf-8', 'ignore')
                index_data[name] = val
            except Exception:
                logger.error('Error indexing value: %s: %s\n%s' % ('/'.join(
                    obj.getPhysicalPath()), name, traceback.format_exc()))
        else:
            val = getattr(obj, name, None)
            if callable(val):
                val = val()
            index_data[name] = val

    for _, adapter in getAdapters((obj, ), IAdditionalIndexDataProvider):
        index_data.update(adapter(es, index_data))

    return index_data
예제 #9
0
    def catalog_object(self,
                       obj,
                       uid=None,
                       idxs=[],
                       update_metadata=1,
                       pghandler=None):
        mode = self.mode
        if mode in (DISABLE_MODE, DUAL_MODE):
            result = self.patched.catalog_object(obj, uid, idxs,
                                                 update_metadata, pghandler)
            if mode == DISABLE_MODE:
                return result
        wrapped_object = None
        if not IIndexableObject.providedBy(obj):
            # This is the CMF 2.2 compatible approach, which should be used
            # going forward
            wrapper = queryMultiAdapter((obj, self.catalogtool),
                                        IIndexableObject)
            if wrapper is not None:
                wrapped_object = wrapper
            else:
                wrapped_object = obj
        else:
            wrapped_object = obj
        conn = self.conn
        catalog = self.catalog
        if idxs == []:
            idxs = catalog.indexes.keys()
        index_data = {}
        for index_name in idxs:
            index = getIndex(catalog, index_name)
            if index is not None:
                value = index.get_value(wrapped_object)
                if value in (None, 'None'):
                    # yes, we'll index null data...
                    value = None
                index_data[index_name] = value
        if update_metadata:
            metadata = {}
            for meta_name in catalog.names:
                attr = getattr(wrapped_object, meta_name, MV)
                if (attr is not MV and safe_callable(attr)):
                    attr = attr()
                metadata[meta_name] = attr
            # XXX Also, always index path so we can use it with the brain
            # to make urls
            metadata['_path'] = wrapped_object.getPhysicalPath()
            index_data['_metadata'] = dumps(metadata)

        uid = getUID(obj)
        try:
            doc = conn.get(self.catalogsid, self.catalogtype, uid)
            self.registerInTransaction(uid, td.Actions.modify, doc)
        except NotFoundException:
            self.registerInTransaction(uid, td.Actions.add)
        conn.index(index_data, self.catalogsid, self.catalogtype, uid)
        if self.registry.auto_flush:
            conn.refresh()
예제 #10
0
def get_index_data(obj, es):
    catalog = es.catalogtool._catalog

    wrapped_object = get_wrapped_object(obj, es)
    index_data = {}
    for index_name in catalog.indexes.keys():
        index = getIndex(catalog, index_name)
        if index is not None:
            try:
                value = index.get_value(wrapped_object)
            except Exception:
                logger.error('Error indexing value: %s: %s\n%s' % (
                    '/'.join(obj.getPhysicalPath()),
                    index_name,
                    traceback.format_exc()))
                value = None
            if value in (None, 'None'):
                # yes, we'll index null data...
                value = None

            # Ignore errors in converting to unicode, so json.dumps
            # does not barf when we're trying to send data to ES.
            if isinstance(value, str):
                value = unicode(value, 'utf-8', 'ignore')

            index_data[index_name] = value

    # in case these indexes are deleted
    # (to increase performance and improve ram usage)
    for name in getESOnlyIndexes():
        if name in index_data:
            continue
        indexer = queryMultiAdapter((obj, es.catalogtool), IIndexer, name=name)
        if indexer is not None:
            try:
                val = indexer()
                if isinstance(value, str):
                    val = unicode(val, 'utf-8', 'ignore')
                index_data[name] = val
            except Exception:
                logger.error('Error indexing value: %s: %s\n%s' % (
                    '/'.join(obj.getPhysicalPath()),
                    name,
                    traceback.format_exc()))
        else:
            val = getattr(obj, name, None)
            if callable(val):
                val = val()
            index_data[name] = val

    for _, adapter in getAdapters((obj,), IAdditionalIndexDataProvider):
        index_data.update(adapter(es, index_data))

    return index_data
예제 #11
0
    def catalog_object(self, obj, uid=None, idxs=[],
                       update_metadata=1, pghandler=None):
        mode = self.mode
        if mode in (DISABLE_MODE, DUAL_MODE):
            result = self.patched.catalog_object(
                obj, uid, idxs, update_metadata, pghandler)
            if mode == DISABLE_MODE:
                return result
        wrapped_object = None
        if not IIndexableObject.providedBy(obj):
            # This is the CMF 2.2 compatible approach, which should be used
            # going forward
            wrapper = queryMultiAdapter((obj, self.catalogtool), IIndexableObject)
            if wrapper is not None:
                wrapped_object = wrapper
            else:
                wrapped_object = obj
        else:
            wrapped_object = obj
        conn = self.conn
        catalog = self.catalog
        if idxs == []:
            idxs = catalog.indexes.keys()
        index_data = {}
        for index_name in idxs:
            index = getIndex(catalog, index_name)
            if index is not None:
                value = index.get_value(wrapped_object)
                if value in (None, 'None'):
                    # yes, we'll index null data...
                    value = None
                index_data[index_name] = value
        if update_metadata:
            metadata = {}
            for meta_name in catalog.names:
                attr = getattr(wrapped_object, meta_name, MV)
                if (attr is not MV and safe_callable(attr)):
                    attr = attr()
                metadata[meta_name] = attr
            # XXX Also, always index path so we can use it with the brain
            # to make urls
            metadata['_path'] = wrapped_object.getPhysicalPath()
            index_data['_metadata'] = dumps(metadata)

        uid = getUID(obj)
        try:
            doc = conn.get(self.catalogsid, self.catalogtype, uid)
            self.registerInTransaction(uid, td.Actions.modify, doc)
        except NotFoundException:
            self.registerInTransaction(uid, td.Actions.add)
        conn.index(index_data, self.catalogsid, self.catalogtype, uid)
        if self.registry.auto_flush:
            conn.refresh()
예제 #12
0
    def __call__(self, dquery):
        filters = []
        matches = []
        catalog = self.catalogtool._catalog
        idxs = catalog.indexes.keys()
        query = {'match_all': {}}
        es_only_indexes = getESOnlyIndexes()
        for key, value in dquery.items():
            if key not in idxs and key not in es_only_indexes:
                continue

            index = getIndex(catalog, key)
            if index is None and key in es_only_indexes:
                # deleted index for plone performance but still need on ES
                index = EZCTextIndex(catalog, key)

            qq = index.get_query(key, value)
            if qq is None:
                continue

            if index is not None and index.filter_query:
                if isinstance(qq, list):
                    filters.extend(qq)
                else:
                    filters.append(qq)
            else:
                if isinstance(qq, list):
                    matches.extend(qq)
                else:
                    matches.append(qq)
        if len(filters) == 0 and len(matches) == 0:
            return query
        else:
            query = {'bool': dict()}
            if len(filters) > 0:
                query['bool']['filter'] = filters

            if len(matches) > 0:
                query['bool']['should'] = matches
                query['bool']['minimum_should_match'] = 1
            return query
예제 #13
0
    def __call__(self, dquery):
        filters = []
        matches = []
        catalog = self.catalogtool._catalog
        idxs = catalog.indexes.keys()
        query = {'match_all': {}}
        es_only_indexes = getESOnlyIndexes()
        for key, value in dquery.items():
            if key not in idxs and key not in es_only_indexes:
                continue

            index = getIndex(catalog, key)
            if index is None and key in es_only_indexes:
                # deleted index for plone performance but still need on ES
                index = EZCTextIndex(catalog, key)

            qq = index.get_query(key, value)
            if qq is None:
                continue

            if index is not None and index.filter_query:
                if isinstance(qq, list):
                    filters.extend(qq)
                else:
                    filters.append(qq)
            else:
                if isinstance(qq, list):
                    matches.extend(qq)
                else:
                    matches.append(qq)
        if len(filters) == 0 and len(matches) == 0:
            return query
        else:
            query = {
                'bool': {
                    'should': matches,
                    'minimum_should_match': 1,
                    'filter': filters
                }
            }
            return query
예제 #14
0
    def convertToElastic(self):
        setattr(self.catalogtool, CONVERTED_ATTR, True)
        self.catalogtool._p_changed = True
        properties = {}
        for name in self.catalog.indexes.keys():
            index = getIndex(self.catalog, name)
            if index is not None:
                properties[name] = index.create_mapping(name)
            else:
                raise Exception("Can not locate index for %s" % (
                    name))

        # XXX then add an index specifically to hold metadata
        # We don't store any of the other index data
        # this will be json encoded
        properties['_metadata'] = {
            'type': 'string',
            'index': 'not_analyzed',
            'store': True
        }

        conn = self.conn
        try:
            conn.create_index(self.catalogsid)
        except IndexAlreadyExistsException:
            pass

        mapping = {'properties': properties}
        conn.indices.put_mapping(
            doc_type=self.catalogtype,
            mapping=mapping,
            indices=[self.catalogsid])
        conn.indices.put_mapping(
            doc_type=self.trns_catalogtype,
            mapping=self.trns_mapping,
            indices=[self.catalogsid])
예제 #15
0
def index_batch(remove, index, positions, es=None):
    if es is None:
        from collective.elasticsearch.es import ElasticSearchCatalog
        es = ElasticSearchCatalog(api.portal.get_tool('portal_catalog'))

    setSite(api.portal.get())
    conn = es.connection
    bulk_size = es.get_setting('bulk_size', 50)

    if len(remove) > 0:
        bulk_data = []
        for uid in remove:
            bulk_data.append({
                'delete': {
                    '_index': es.index_name,
                    '_type': es.doc_type,
                    '_id': uid
                }
            })
        es.connection.bulk(index=es.index_name,
                           doc_type=es.doc_type,
                           body=bulk_data)

    if len(index) > 0:
        if type(index) in (list, tuple, set):
            # does not contain objects, must be async, convert to dict
            index = dict([(k, None) for k in index])
        bulk_data = []

        for uid, obj in index.items():
            if obj is None:
                obj = uuidToObject(uid)
                if obj is None:
                    continue
            bulk_data.extend([{
                'index': {
                    '_index': es.index_name,
                    '_type': es.doc_type,
                    '_id': uid
                }
            },
                              get_index_data(obj, es)])
            if len(bulk_data) % bulk_size == 0:
                conn.bulk(index=es.index_name,
                          doc_type=es.doc_type,
                          body=bulk_data)
                bulk_data = []

        if len(bulk_data) > 0:
            conn.bulk(index=es.index_name,
                      doc_type=es.doc_type,
                      body=bulk_data)

    if len(positions) > 0:
        bulk_data = []
        index = getIndex(es.catalogtool._catalog, 'getObjPositionInParent')
        for uid, ids in positions.items():
            if uid == '/':
                parent = getSite()
            else:
                parent = uuidToObject(uid)
            if parent is None:
                logger.warn('could not find object to index positions')
                continue
            for _id in ids:
                ob = parent[_id]
                wrapped_object = get_wrapped_object(ob, es)
                try:
                    value = index.get_value(wrapped_object)
                except Exception:
                    continue
                bulk_data.extend([{
                    'update': {
                        '_index': es.index_name,
                        '_type': es.doc_type,
                        '_id': IUUID(ob)
                    }
                }, {
                    'doc': {
                        'getObjPositionInParent': value
                    }
                }])
                if len(bulk_data) % bulk_size == 0:
                    conn.bulk(index=es.index_name,
                              doc_type=es.doc_type,
                              body=bulk_data)
                    bulk_data = []

        if len(bulk_data) > 0:
            conn.bulk(index=es.index_name,
                      doc_type=es.doc_type,
                      body=bulk_data)
예제 #16
0
def index_batch(remove, index, positions, es=None):
    if es is None:
        from collective.elasticsearch.es import ElasticSearchCatalog
        es = ElasticSearchCatalog(api.portal.get_tool('portal_catalog'))
    conn = es.connection
    bulk_size = es.get_setting('bulk_size', 50)

    if len(remove) > 0:
        bulk_data = []
        for uid in remove:
            bulk_data.append({
                'delete': {
                    '_index': es.index_name,
                    '_type': es.doc_type,
                    '_id': uid
                }
            })
        es.connection.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data)

    if len(index) > 0:
        if type(index) in (list, tuple, set):
            # does not contain objects, must be async, convert to dict
            index = dict([(k, None) for k in index])
        bulk_data = []

        for uid, obj in index.items():
            if obj is None:
                obj = uuidToObject(uid)
                if obj is None:
                    continue
            bulk_data.extend([{
                'index': {
                    '_index': es.index_name,
                    '_type': es.doc_type,
                    '_id': uid
                }
            }, get_index_data(uid, obj, es)])
            if len(bulk_data) % bulk_size == 0:
                conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data)
                bulk_data = []

        if len(bulk_data) > 0:
            conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data)

    if len(positions) > 0:
        bulk_data = []
        index = getIndex(es.catalogtool._catalog, 'getObjPositionInParent')
        for uid, ids in positions.items():
            if uid == '/':
                parent = getSite()
            else:
                parent = uuidToObject(uid)
            if parent is None:
                logger.warn('could not find object to index positions')
                continue
            for _id in ids:
                ob = parent[_id]
                wrapped_object = get_wrapped_object(ob, es)
                try:
                    value = index.get_value(wrapped_object)
                except:
                    continue
                bulk_data.extend([{
                    'update': {
                        '_index': es.index_name,
                        '_type': es.doc_type,
                        '_id': IUUID(ob)
                    }
                }, {
                    'doc': {
                        'getObjPositionInParent': value
                    }
                }])
                if len(bulk_data) % bulk_size == 0:
                    conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data)
                    bulk_data = []

        if len(bulk_data) > 0:
            conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data)
예제 #17
0
def index_batch(remove, index, positions, es=None):  # noqa: C901
    if es is None:
        from collective.elasticsearch.es import ElasticSearchCatalog
        es = ElasticSearchCatalog(api.portal.get_tool('portal_catalog'))

    setSite(api.portal.get())
    conn = es.connection
    bulk_size = es.get_setting('bulk_size', 50)

    if len(remove) > 0:
        bulk_data = []
        for uid in remove:
            bulk_data.append({
                'delete': {
                    '_index': es.index_name,
                    '_type': es.doc_type,
                    '_id': uid
                }
            })
        result = es.connection.bulk(index=es.index_name,
                                    doc_type=es.doc_type,
                                    body=bulk_data)

        if "errors" in result and result["errors"] is True:
            logger.error("Error in bulk indexing removal: %s" % result)

    if len(index) > 0:
        if type(index) in (list, tuple, set):
            # does not contain objects, must be async, convert to dict
            index = dict([(k, None) for k in index])
        bulk_data = []

        for uid, obj in index.items():
            # If content has been moved (ie by a contentrule) then the object
            # passed here is the original object, not the moved one.
            # So if there is a uuid, we use this to get the correct object.
            # See https://github.com/collective/collective.elasticsearch/issues/65 # noqa
            if uid is not None:
                obj = uuidToObject(uid)

            if obj is None:
                obj = uuidToObject(uid)
                if obj is None:
                    continue
            bulk_data.extend([{
                'index': {
                    '_index': es.index_name,
                    '_type': es.doc_type,
                    '_id': uid
                }
            },
                              get_index_data(obj, es)])
            if len(bulk_data) % bulk_size == 0:
                result = conn.bulk(index=es.index_name,
                                   doc_type=es.doc_type,
                                   body=bulk_data)

                if "errors" in result and result["errors"] is True:
                    logger.error("Error in bulk indexing: %s" % result)

                bulk_data = []

        if len(bulk_data) > 0:
            result = conn.bulk(index=es.index_name,
                               doc_type=es.doc_type,
                               body=bulk_data)

            if "errors" in result and result["errors"] is True:
                logger.error("Error in bulk indexing: %s" % result)

    if len(positions) > 0:
        bulk_data = []
        index = getIndex(es.catalogtool._catalog, 'getObjPositionInParent')
        for uid, ids in positions.items():
            if uid == '/':
                parent = getSite()
            else:
                parent = uuidToObject(uid)
            if parent is None:
                logger.warn('could not find object to index positions')
                continue
            for _id in ids:
                ob = parent[_id]
                wrapped_object = get_wrapped_object(ob, es)
                try:
                    value = index.get_value(wrapped_object)
                except Exception:
                    continue
                bulk_data.extend([{
                    'update': {
                        '_index': es.index_name,
                        '_type': es.doc_type,
                        '_id': IUUID(ob)
                    }
                }, {
                    'doc': {
                        'getObjPositionInParent': value
                    }
                }])
                if len(bulk_data) % bulk_size == 0:
                    conn.bulk(index=es.index_name,
                              doc_type=es.doc_type,
                              body=bulk_data)
                    bulk_data = []

        if len(bulk_data) > 0:
            conn.bulk(index=es.index_name,
                      doc_type=es.doc_type,
                      body=bulk_data)