Ejemplo n.º 1
0
def bulk_write(collection, items, merge=True, unsafe=False):
    """Write a set of entities - given as dicts - to the index in bulk
    mode. This will perform validation but is dangerous as it means the
    application has no control over key generation and a few other aspects
    of building the entity.
    """
    namespace = Namespace(collection.foreign_id)
    entities = {}
    for item in items:
        if not is_mapping(item):
            raise InvalidData("Failed to read input data", errors=item)

        entity = model.get_proxy(item)
        if not unsafe:
            entity = namespace.apply(entity)
            entity = remove_checksums(entity)
        entity.context = {'bulk': True, 'collection_id': collection.id}
        if entity.id is None:
            raise InvalidData("No ID for entity", errors=item)

        if entity.id in entities:
            entities[entity.id].merge(entity)
        else:
            entities[entity.id] = entity

        if len(entities) >= BULK_PAGE:
            index.index_bulk(collection.id, entities, merge=merge)
            entities = {}

    if len(entities):
        index.index_bulk(collection.id, entities, merge=merge)

    refresh_collection(collection)
Ejemplo n.º 2
0
 def _generate():
     for data in entities:
         if not is_mapping(data):
             raise InvalidData("Failed to read input data", errors=data)
         entity = model.get_proxy(data)
         if not unsafe:
             entity = remove_checksums(entity)
         yield _process_entity(entity)
Ejemplo n.º 3
0
def bulk_write(collection, iterable, job_id=None, unsafe=False):
    """Write a set of entities - given as dicts - to the index in bulk
    mode. This will perform validation but is dangerous as it means the
    application has no control over key generation and a few other aspects
    of building the entity.
    """
    namespace = Namespace(collection.foreign_id)
    stage = get_stage(collection, OP_INDEX, job_id=job_id)
    entities = []
    for item in iterable:
        if not is_mapping(item):
            raise InvalidData("Failed to read input data", errors=item)
        entity = model.get_proxy(item)
        entity = namespace.apply(entity)
        if not unsafe:
            entity = remove_checksums(entity)
        entities.append(entity)
    index_entities(stage, collection, entities)
Ejemplo n.º 4
0
Archivo: bulk.py Proyecto: pudo/aleph
def bulk_write(collection, items, merge=True, unsafe=False):
    """Write a set of entities - given as dicts - to the index in bulk
    mode. This will perform validation but is dangerous as it means the
    application has no control over key generation and a few other aspects
    of building the entity.
    """
    namespace = Namespace(collection.foreign_id)
    entities = {}
    for item in items:
        if not is_mapping(item):
            raise InvalidData("Failed to read input data", errors=item)

        entity = model.get_proxy(item)
        if not unsafe:
            entity = namespace.apply(entity)
            entity = remove_checksums(entity)
        entity.context = {
            'bulk': True,
            'collection_id': collection.id
        }
        if entity.id is None:
            raise InvalidData("No ID for entity", errors=item)

        if entity.id in entities:
            entities[entity.id].merge(entity)
        else:
            entities[entity.id] = entity

        if len(entities) >= BULK_PAGE:
            index.index_bulk(collection.id, entities, merge=merge)
            entities = {}

    if len(entities):
        index.index_bulk(collection.id, entities, merge=merge)

    refresh_collection(collection)