def find_by_ace(ace, models, count=False): """ Find documents of models that include ace. Look into ACLEncoderMixin.stringify_acl for details on ace format. :param ace: Stringified ACL entry (ACE) to match agains. :param models: List of document classes objects of which should be found. :param count: Boolean. When True objects count is returned. :returns: Number of matching documents when count=True or documents otherwise. :raises ValueError: If no es-based models passed. """ es_types = _get_es_types(models) if not es_types: raise ValueError('No es-based models passed') params = {'body': _get_es_body(ace)} if count: params['_count'] = True documents = ES(es_types).get_collection(**params) docs_count = (documents if isinstance(documents, int) else len(documents)) log.info('Found {} documents that match ACE {}.'.format( docs_count, str(ace))) return documents
def getitem_es(self, key): es = ES(self.item_model.__name__) obj = es.get_item(id=key) obj.__acl__ = self.item_acl(obj) obj.__parent__ = self obj.__name__ = key return obj
def run(self): ES.setup(self.settings) model_names = split_strip(self.options.models) for model_name in model_names: self.log.info('Processing model `{}`'.format(model_name)) model = engine.get_document_cls(model_name) params = self.options.params or '' params = dict([[k, v[0]] for k, v in urllib.parse.parse_qs(params).items()]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] es = ES(source=model_name, index_name=self.options.index, chunk_size=chunk_size) query_set = model.get_collection(**params) documents = to_dicts(query_set) if self.options.force: self.log.info('Recreating `{}` ES mapping'.format(model_name)) es.delete_mapping() es.put_mapping(body=model.get_es_mapping()) self.log.info('Indexing all `{}` documents'.format(model_name)) es.index(documents) else: self.log.info( 'Indexing missing `{}` documents'.format(model_name)) es.index_missing_documents(documents) return 0
def on_after_update(mapper, connection, target): from .documents import BaseDocument # Reindex old one-to-one related object committed_state = attributes.instance_state(target).committed_state columns = set() for field, value in committed_state.items(): if isinstance(value, BaseDocument): obj_session = object_session(value) # Make sure object is not updated yet if not obj_session.is_modified(value): obj_session.expire(value) index_object(value, with_refs=False) else: id_pos = field.rfind('_id') if id_pos >= 0: rel_name = field[:id_pos] rel = mapper.relationships.get(rel_name, False) if rel and any(c.name == field for c in rel.local_columns): columns.add(rel_name) # Reload `target` to get access to processed fields values columns = columns.union( [c.name for c in class_mapper(target.__class__).columns]) object_session(target).expire(target, attribute_names=columns) index_object(target, with_refs=False, nested_only=True) # Reindex the item's parents. This must be done after the child has been processes for parent, children_field in target.get_parent_documents( nested_only=True): columns = [c.name for c in class_mapper(parent.__class__).columns] object_session(parent).expire(parent, attribute_names=columns) ES(parent.__class__.__name__).index_nested_document( parent, children_field, target)
def reindextask(model, boxsize=5000): """Index model by small chunks (ie: a box, with a reasonable size) """ global log mcls = engine.get_document_cls(model) # proceed by chunks of 'boxsize' count = mcls.get_collection(_count=True) if count < 1: # Hu ? nothing in DB return # Let us be aware of some numbers boxes = count // boxsize rest = count % boxsize es = ES(source=model) # humm quick & dirty: get a connector log.info('Processing model `{}` with {} documents in {} boxes'.format( model, count, boxes)) # dump by 'boxes' ; add one for the rest (NB: if rest=0 the last box will be # empty anyway ) for n in range(boxes + 1): log.info('Indexing missing `{}` documents (box: {}/{})'.format( model, n, boxes + 1)) query_set = mcls.get_collection( _limit=boxsize, _page=n, _sort=mcls.pk_field()) ## don't forget the sort documents = to_dicts(query_set) log.debug('---> from db {} documents ; send to ES'.format( len(documents))) ## TODO: add a control ? The last box size should be equal to 'rest' es.index(documents)
def run(self, quiet=False): from nefertari.elasticsearch import ES ES.setup(self.settings) models_paths = split_strip(self.options.models) for path in models_paths: model = resolve(path) model_name = path.split('.')[-1] params = self.options.params or '' params = dict([[k, v[0]] for k, v in urlparse.parse_qs(params).items()]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] es = ES(source=model_name, index_name=self.options.index) query_set = model.get_collection(**params) documents = to_dicts(query_set) if self.options.force: es.index(documents, chunk_size=chunk_size) else: es.index_missing(documents, chunk_size=chunk_size) return 0
def on_after_delete(mapper, connection, target): from nefertari.elasticsearch import ES request = getattr(target, '_request', None) model_cls = target.__class__ es = ES(model_cls.__name__) obj_id = getattr(target, model_cls.pk_field()) es.delete(obj_id, request=request) es.index_relations(target, request=request)
def on_after_delete(mapper, connection, target): from nefertari.elasticsearch import ES model_cls = target.__class__ es = ES(model_cls.__name__) obj_id = getattr(target, model_cls.pk_field()) es.delete(obj_id) target.expire_parents() es.index_relations(target)
def get_collection_es(self): """ Query ES collection and return results. This is default implementation of querying ES collection with `self._query_params`. It must return found ES collection results for default response renderers to work properly. """ from nefertari.elasticsearch import ES return ES(self.Model.__name__).get_collection(**self._query_params)
def recreate_mapping(model): """ Delete and Put the mapping of a model. Effect: delete all document linked to this mapping in the working index. """ global log mcls = engine.get_document_cls(model) es = ES(model) # delete: work with elasticsearch=1.7.0 es.api.indices.delete_mapping(es.index_name, doc_type=model) # put good old mapping. es.put_mapping(body=mcls.get_es_mapping())
def aggregate(self): """ Perform aggregation and return response. """ from nefertari.elasticsearch import ES aggregations_params = self.pop_aggregations_params() if self.view._auth_enabled: self.check_aggregations_privacy(aggregations_params) self.stub_wrappers() return ES(self.view.Model.__name__).aggregate( _aggregations_params=aggregations_params, **self._query_params)
def on_bulk_delete(model_cls, objects, request): if not getattr(model_cls, '_index_enabled', False): return pk_field = model_cls.pk_field() ids = [getattr(obj, pk_field) for obj in objects] from nefertari.elasticsearch import ES es = ES(source=model_cls.__name__) es.delete(ids, request=request) # Reindex relationships es.bulk_index_relations(objects, request=request)
def on_bulk_update(update_context): model_cls = update_context.mapper.entity if not getattr(model_cls, '_index_enabled', False): return objects = update_context.query.all() if not objects: return from nefertari.elasticsearch import ES es = ES(source=model_cls.__name__) es.index(objects) # Reindex relationships es.bulk_index_relations(objects, nested_only=True)
def index_models(self, model_names): self.log.info('Indexing models documents') params = self.options.params or '' params = dict([[k, v[0]] for k, v in urllib.parse.parse_qs(params).items()]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] for model_name in model_names: self.log.info('Processing model `{}`'.format(model_name)) model = engine.get_document_cls(model_name) es = ES(source=model_name, index_name=self.options.index, chunk_size=chunk_size) query_set = model.get_collection(**params) documents = to_dicts(query_set) self.log.info('Indexing missing `{}` documents'.format(model_name)) es.index_missing_documents(documents)
def on_pre_save(sender, document, **kw): from nefertari.elasticsearch import ES if not kw.get('created', False) and document._get_changed_fields(): ES(document.__class__.__name__).index(document.to_dict())
def on_delete(sender, document, **kw): from nefertari.elasticsearch import ES ES(document.__class__.__name__).delete(document.id)
def on_post_save(sender, document, **kw): from nefertari.elasticsearch import ES if kw.get('created', False): ES(document.__class__.__name__).index(document.to_dict())
def index_object(obj, with_refs=True, **kwargs): es = ES(obj.__class__.__name__) es.index(obj, **kwargs) if with_refs: es.index_relations(obj, **kwargs)
def index_object(obj, with_refs=True, **kwargs): from nefertari.elasticsearch import ES es = ES(obj.__class__.__name__) es.index(obj.to_dict(), **kwargs) if with_refs: es.index_relations(obj, **kwargs)