Beispiel #1
0
def reingest_collection(collection, job_id=None, index=False, flush=True):
    """Trigger a re-ingest for all documents in the collection."""
    job_id = job_id or Job.random_id()
    if flush:
        ingest_flush(collection)
    for document in Document.by_collection(collection.id):
        proxy = document.to_proxy(ns=collection.ns)
        ingest_entity(collection, proxy, job_id=job_id, index=index)
Beispiel #2
0
def ingest_upload(collection_id):
    """
    ---
    post:
      summary: Upload a document to a collection
      description: Upload a document to a collection with id `collection_id`
      parameters:
      - in: path
        name: collection_id
        required: true
        schema:
          type: integer
      requestBody:
        content:
          multipart/form-data:
            schema:
              type: object
              properties:
                file:
                  type: string
                  format: binary
                  description: The document to upload
                meta:
                  $ref: '#/components/schemas/DocumentIngest'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                properties:
                  id:
                    description: id of the uploaded document
                    type: integer
                  status:
                    type: string
                type: object
      tags:
      - Ingest
      - Collection
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    job_id = get_session_id()
    sync = get_flag("sync", default=False)
    index = get_flag("index", default=True)
    meta, foreign_id = _load_metadata()
    parent = _load_parent(collection, meta)
    upload_dir = ensure_path(mkdtemp(prefix="aleph.upload."))
    try:
        content_hash = None
        for storage in request.files.values():
            path = safe_filename(storage.filename, default="upload")
            path = upload_dir.joinpath(path)
            storage.save(str(path))
            content_hash = archive.archive_file(path)
        document = Document.save(
            collection=collection,
            parent=parent,
            foreign_id=foreign_id,
            content_hash=content_hash,
            meta=meta,
            role_id=request.authz.id,
        )
        collection.touch()
        db.session.commit()
        proxy = document.to_proxy(ns=collection.ns)
        if proxy.schema.is_a(Document.SCHEMA_FOLDER) and sync and index:
            index_proxy(collection, proxy, sync=sync)
        ingest_flush(collection, entity_id=proxy.id)
        ingest_entity(collection, proxy, job_id=job_id, index=index)
        _notify(collection, proxy.id)
        return jsonify({"status": "ok", "id": proxy.id}, status=201)
    finally:
        shutil.rmtree(upload_dir)