def run(self, quiet=False): from nefertari.elasticsearch import ES ES.setup(self.settings) models_paths = split_strip(self.options.models) for path in models_paths: model = resolve(path) model_name = path.split('.')[-1] params = self.options.params or '' params = dict([ [k, v[0]] for k, v in urlparse.parse_qs(params).items() ]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] es = ES(source=model_name, index_name=self.options.index) query_set = model.get_collection(**params) documents = to_dicts(query_set) if self.options.force: es.index(documents, chunk_size=chunk_size) else: es.index_missing(documents, chunk_size=chunk_size) return 0
def run(self, quiet=False): from nefertari.elasticsearch import ES ES.setup(self.settings) models_paths = split_strip(self.options.models) for path in models_paths: model = resolve(path) model_name = path.split('.')[-1] params = self.options.params or '' params = dict([[k, v[0]] for k, v in urlparse.parse_qs(params).items()]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] es = ES(source=model_name, index_name=self.options.index) query_set = model.get_collection(**params) documents = to_dicts(query_set) if self.options.force: es.index(documents, chunk_size=chunk_size) else: es.index_missing(documents, chunk_size=chunk_size) return 0
def run(self): ES.setup(self.settings) model_names = split_strip(self.options.models) for model_name in model_names: self.log.info('Processing model `{}`'.format(model_name)) model = engine.get_document_cls(model_name) params = self.options.params or '' params = dict([[k, v[0]] for k, v in urllib.parse.parse_qs(params).items()]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] es = ES(source=model_name, index_name=self.options.index, chunk_size=chunk_size) query_set = model.get_collection(**params) documents = to_dicts(query_set) if self.options.force: self.log.info('Recreating `{}` ES mapping'.format(model_name)) es.delete_mapping() es.put_mapping(body=model.get_es_mapping()) self.log.info('Indexing all `{}` documents'.format(model_name)) es.index(documents) else: self.log.info( 'Indexing missing `{}` documents'.format(model_name)) es.index_missing_documents(documents) return 0
def on_post_bulk_insert(sender,documents,**kw): if not documents: return from nefertari.elasticsearch import ES es = ES(source=documents[0].__class__.__name__) docs = to_dicts(documents) es.index(docs)
def run(self): ES.setup(self.settings) model_names = split_strip(self.options.models) for model_name in model_names: self.log.info('Processing model `{}`'.format(model_name)) model = engine.get_document_cls(model_name) params = self.options.params or '' params = dict([ [k, v[0]] for k, v in urllib.parse.parse_qs(params).items() ]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] es = ES(source=model_name, index_name=self.options.index, chunk_size=chunk_size) query_set = model.get_collection(**params) documents = to_dicts(query_set) if self.options.force: self.log.info('Recreating `{}` ES mapping'.format(model_name)) es.delete_mapping() es.put_mapping(body=model.get_es_mapping()) self.log.info('Indexing all `{}` documents'.format( model_name)) es.index(documents) else: self.log.info('Indexing missing `{}` documents'.format( model_name)) es.index_missing_documents(documents) return 0
def reindextask(model, boxsize=5000): """Index model by small chunks (ie: a box, with a reasonable size) """ global log mcls = engine.get_document_cls(model) # proceed by chunks of 'boxsize' count = mcls.get_collection(_count=True) if count < 1: # Hu ? nothing in DB return # Let us be aware of some numbers boxes = count // boxsize rest = count % boxsize es = ES(source=model) # humm quick & dirty: get a connector log.info('Processing model `{}` with {} documents in {} boxes'.format( model, count, boxes)) # dump by 'boxes' ; add one for the rest (NB: if rest=0 the last box will be # empty anyway ) for n in range(boxes + 1): log.info('Indexing missing `{}` documents (box: {}/{})'.format( model, n, boxes + 1)) query_set = mcls.get_collection( _limit=boxsize, _page=n, _sort=mcls.pk_field()) ## don't forget the sort documents = to_dicts(query_set) log.debug('---> from db {} documents ; send to ES'.format( len(documents))) ## TODO: add a control ? The last box size should be equal to 'rest' es.index(documents)
def on_post_save(sender, document, **kw): """ Add new document to index or update existing. """ from nefertari.elasticsearch import ES common_kw = {'request': getattr(document, '_request', None)} created = kw.get('created', False) if created: es = ES(document.__class__.__name__) es.index(document.to_dict(), **common_kw) elif not created and document._get_changed_fields(): es = ES(document.__class__.__name__) es.index(document.to_dict(), **common_kw) es.index_relations(document, nested_only=True, **common_kw)
def on_bulk_update(model_cls, objects, request): if not getattr(model_cls, '_index_enabled', False): return if not objects: return from nefertari.elasticsearch import ES es = ES(source=model_cls.__name__) documents = to_dicts(objects) es.index(documents, request=request) # Reindex relationships es.bulk_index_relations(objects, request=request, nested_only=True)
def on_bulk_update(update_context): model_cls = update_context.mapper.entity if not getattr(model_cls, '_index_enabled', False): return objects = update_context.query.all() if not objects: return from nefertari.elasticsearch import ES es = ES(source=model_cls.__name__) es.index(objects) # Reindex relationships es.bulk_index_relations(objects, nested_only=True)
def on_bulk_update(update_context): request = getattr( update_context.query, '_request', None) model_cls = update_context.mapper.entity if not getattr(model_cls, '_index_enabled', False): return objects = update_context.query.all() if not objects: return from nefertari.elasticsearch import ES es = ES(source=model_cls.__name__) es.index(objects, request=request) # Reindex relationships es.bulk_index_relations(objects, request=request, nested_only=True)
def index_object(obj, with_refs=True, **kwargs): from nefertari.elasticsearch import ES es = ES(obj.__class__.__name__) es.index(obj.to_dict(), **kwargs) if with_refs: es.index_relations(obj, **kwargs)
def index_object(obj, with_refs=True, **kwargs): es = ES(obj.__class__.__name__) es.index(obj, **kwargs) if with_refs: es.index_relations(obj, **kwargs)