def test_chunked(self): # chunking nothing yields nothing. eq_(list(chunked([], 1)), []) # chunking list where len(list) < n eq_(list(chunked([1], 10)), [(1,)]) # chunking a list where len(list) == n eq_(list(chunked([1, 2], 2)), [(1, 2)]) # chunking list where len(list) > n eq_(list(chunked([1, 2, 3, 4, 5], 2)), [(1, 2), (3, 4), (5,)])
def test_chunked(self): # chunking nothing yields nothing. eq_(list(chunked([], 1)), []) # chunking list where len(list) < n eq_(list(chunked([1], 10)), [(1, )]) # chunking a list where len(list) == n eq_(list(chunked([1, 2], 2)), [(1, 2)]) # chunking list where len(list) > n eq_(list(chunked([1, 2, 3, 4, 5], 2)), [(1, 2), (3, 4), (5, )])
def index_objects(mapping_type, ids, chunk_size=100, es=None, index=None): """Index documents of a specified mapping type. This NOT allows for asynchronous indexing. If a mapping_type extends Indexable, you can add a ``post_save`` hook for the model that it's based on like this:: @receiver(dbsignals.post_save, sender=MyModel) def update_in_index(sender, instance, **kw): # from elasticutils.contrib.django import tasks index_objects(MyMappingType, [instance.id]) :arg mapping_type: the mapping type for these ids :arg ids: the list of ids of things to index :arg chunk_size: the size of the chunk for bulk indexing .. Note:: The default chunk_size is 100. The number of documents you can bulk index at once depends on the size of the documents. :arg es: The `Elasticsearch` to use. If you don't specify an `Elasticsearch`, it'll use `mapping_type.get_es()`. :arg index: The name of the index to use. If you don't specify one it'll use `mapping_type.get_index()`. """ if settings.ES_DISABLED: return if len(ids) == 0: log.warning("0 objects being indexed") else: log.debug('Indexing objects {0}-{1}. [{2}]'.format( ids[0], ids[-1], len(ids))) # Get the model this mapping type is based on. model = mapping_type.get_model() # Retrieve all the objects that we're going to index and do it in # bulk. for id_list in chunked(ids, chunk_size): documents = [] for obj in model.objects.filter(id__in=id_list): try: documents.append(mapping_type.extract_document(obj.id, obj)) except StandardError as exc: log.exception('Unable to extract document {0}: {1}'.format( obj, repr(exc))) if documents: mapping_type.bulk_index(documents, id_field='id', es=es, index=index)
def index_objects(mapping_type, ids, chunk_size=100, es=None, index=None): """Index documents of a specified mapping type. This allows for asynchronous indexing. If a mapping_type extends Indexable, you can add a ``post_save`` hook for the model that it's based on like this:: @receiver(dbsignals.post_save, sender=MyModel) def update_in_index(sender, instance, **kw): from elasticutils.contrib.django import tasks tasks.index_objects.delay(MyMappingType, [instance.id]) :arg mapping_type: the mapping type for these ids :arg ids: the list of ids of things to index :arg chunk_size: the size of the chunk for bulk indexing .. Note:: The default chunk_size is 100. The number of documents you can bulk index at once depends on the size of the documents. :arg es: The `Elasticsearch` to use. If you don't specify an `Elasticsearch`, it'll use `mapping_type.get_es()`. :arg index: The name of the index to use. If you don't specify one it'll use `mapping_type.get_index()`. """ if settings.ES_DISABLED: return log.debug('Indexing objects {0}-{1}. [{2}]'.format( ids[0], ids[-1], len(ids))) # Get the model this mapping type is based on. model = mapping_type.get_model() # Retrieve all the objects that we're going to index and do it in # bulk. for id_list in chunked(ids, chunk_size): documents = [] for obj in model.objects.filter(id__in=id_list): try: documents.append(mapping_type.extract_document(obj.id, obj)) except Exception as exc: log.exception('Unable to extract document {0}: {1}'.format( obj, repr(exc))) if documents: mapping_type.bulk_index(documents, id_field='id', es=es, index=index)
def index_objects(mapping_type, ids, chunk_size=100, public_index=False, **kwargs): if getattr(settings, 'ES_DISABLED', False): return es = get_es() model = mapping_type.get_model() for id_list in chunked(ids, chunk_size): documents = [] qs = model.objects.filter(id__in=id_list) index = mapping_type.get_index(public_index) if public_index: qs = qs.public_indexable().privacy_level(PUBLIC) for item in qs: documents.append(mapping_type.extract_document(item.id, item)) mapping_type.bulk_index(documents, id_field='id', es=es, index=index) mapping_type.refresh_index(es)
def index_objects(mapping_type, ids, chunk_size=100, es=None, index=None): if settings.ES_DISABLED: return # create_mapping(mapping_type) # Get the model this mapping type is based on. model = mapping_type.get_model() # Retrieve all the objects that we're going to index and do it in # bulk. for id_list in chunked(ids, chunk_size): documents = [] for obj in model.objects.filter(id__in=id_list): try: doc = mapping_type.extract_document(obj.id, obj) if isinstance(doc, list): documents.extend(doc) else: documents.append(doc) except StandardError as exc: print 'Unable to extract document {0}: {1}'.format(obj, repr(exc)) if documents: mapping_type.bulk_index(documents, id_field='id', es=es, index=index)