Python Document.save Examples

Programming Language: Python

Namespace/Package Name: aleph.model

Class/Type: Document

Method/Function: save

Examples at hotexamples.com: 5

Python Document.save - 5 examples found. These are the top rated real world Python examples of aleph.model.Document.save extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

by_id(30)

all(20)

by_keys(11)

all_ids(9)

delete_by_collection(8)

by_collection(6)

save(5)

Document(4)

meta(3)

by_parent(3)

foreign_id(2)

doc_data_to_schema(2)

content_hash(2)

by_meta(2)

collection_id(2)

type(2)

add_country(1)

crawler_last_run(1)

crawler_stats(1)

collections(1)

find_ids(1)

cleanup_deleted(1)

is_crawler_active(1)

pending_count(1)

by_content_hash(1)

source_collection_id(1)

source_id(1)

Example #1

Show file

File: ingest_api.py Project: wantedpixel/aleph

def ingest_upload(collection_id):
    collection = get_db_collection(collection_id, request.authz.WRITE)
    job_id = get_session_id()
    sync = get_flag('sync', default=False)
    meta, foreign_id = _load_metadata()
    parent = _load_parent(collection, meta)
    upload_dir = ensure_path(mkdtemp(prefix='aleph.upload.'))
    try:
        content_hash = None
        for storage in request.files.values():
            path = safe_filename(storage.filename, default='upload')
            path = upload_dir.joinpath(path)
            storage.save(str(path))
            content_hash = archive.archive_file(path)
        document = Document.save(collection=collection,
                                 parent=parent,
                                 foreign_id=foreign_id,
                                 content_hash=content_hash,
                                 meta=meta,
                                 uploader_id=request.authz.id)
        collection.touch()
        db.session.commit()
        proxy = document.to_proxy()
        if proxy.schema.is_a(Document.SCHEMA_FOLDER) and sync:
            index_proxy(collection, proxy, sync=sync)
        ingest_entity(collection, proxy, job_id=job_id, sync=sync)
        document_id = collection.ns.sign(document.id)
        _notify(collection, document_id)
    finally:
        shutil.rmtree(upload_dir)

    return jsonify({'status': 'ok', 'id': document_id}, status=201)

Example #2

Show file

File: ingest_api.py Project: seekersapp2013/aleph

def ingest_upload(collection_id):
    collection = get_db_collection(collection_id, request.authz.WRITE)
    meta, foreign_id = _load_metadata()
    parent = _load_parent(collection, meta)
    upload_dir = ensure_path(mkdtemp(prefix='aleph.upload.'))
    try:
        content_hash = None
        for storage in request.files.values():
            path = safe_filename(storage.filename, default='upload')
            path = upload_dir.joinpath(path)
            storage.save(str(path))
            content_hash = archive.archive_file(path)
        document = Document.save(collection=collection,
                                 parent=parent,
                                 foreign_id=foreign_id,
                                 content_hash=content_hash,
                                 meta=meta,
                                 uploader_id=request.authz.id)
        db.session.commit()
        proxy = document.to_proxy()
        ingest_entity(collection, proxy)
    finally:
        shutil.rmtree(upload_dir)

    return jsonify({
        'status': 'ok',
        'id': stringify(document.id)
    }, status=201)

Example #3

Show file

File: documents.py Project: stofstar/aleph

def crawl_directory(collection, path, parent=None):
    """Crawl the contents of the given path."""
    content_hash = None
    if not path.is_dir():
        content_hash = archive.archive_file(path)
    foreign_id = path.name
    if parent is not None:
        foreign_id = os.path.join(parent.foreign_id, foreign_id)
    meta = {'file_name': path.name}
    document = Document.save(collection,
                             parent=parent,
                             foreign_id=foreign_id,
                             content_hash=content_hash,
                             meta=meta)
    db.session.commit()
    ingest_entity(collection, document.to_proxy())
    log.info("Crawl [%s]: %s -> %s", collection.id, path, document.id)
    if path.is_dir():
        for child in path.iterdir():
            crawl_directory(collection, child, document)

Example #4

Show file

File: documents.py Project: wayne9qiu/aleph

def crawl_directory(collection, path, parent=None, job_id=None):
    """Crawl the contents of the given path."""
    try:
        content_hash = None
        if not path.is_dir():
            content_hash = archive.archive_file(path)
        foreign_id = path.name
        if parent is not None:
            foreign_id = os.path.join(parent.foreign_id, foreign_id)

        # if the job_id is not set yet and path.is_dir(), we know it is the
        # first iteration and we don't create an initial root folder as parent
        # to be consistent with the behaviour of alephclient
        if path.is_dir() and job_id is None:
            document = None
            job_id = Job.random_id()
        else:
            meta = {"file_name": path.name}
            document = Document.save(
                collection,
                parent=parent,
                foreign_id=foreign_id,
                content_hash=content_hash,
                meta=meta,
            )
            db.session.commit()
            job_id = job_id or Job.random_id()
            proxy = document.to_proxy()
            ingest_flush(collection, entity_id=proxy.id)
            ingest_entity(collection, proxy, job_id=job_id)
            log.info("Crawl [%s]: %s -> %s", collection.id, path, document.id)

        if path.is_dir():
            for child in path.iterdir():
                crawl_directory(collection, child, document, job_id)
    except OSError:
        log.exception("Cannot crawl directory: %s", path)

Example #5

Show file

File: ingest_api.py Project: x0rzkov/aleph

def ingest_upload(collection_id):
    """
    ---
    post:
      summary: Upload a document to a collection
      description: Upload a document to a collection with id `collection_id`
      parameters:
      - in: path
        name: collection_id
        required: true
        schema:
          type: integer
      requestBody:
        content:
          multipart/form-data:
            schema:
              type: object
              properties:
                file:
                  type: string
                  format: binary
                  description: The document to upload
                meta:
                  $ref: '#/components/schemas/DocumentIngest'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                properties:
                  id:
                    description: id of the uploaded document
                    type: integer
                  status:
                    type: string
                type: object
      tags:
      - Ingest
      - Collection
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    job_id = get_session_id()
    sync = get_flag('sync', default=False)
    meta, foreign_id = _load_metadata()
    parent = _load_parent(collection, meta)
    upload_dir = ensure_path(mkdtemp(prefix='aleph.upload.'))
    try:
        content_hash = None
        for storage in request.files.values():
            path = safe_filename(storage.filename, default='upload')
            path = upload_dir.joinpath(path)
            storage.save(str(path))
            content_hash = archive.archive_file(path)
        document = Document.save(collection=collection,
                                 parent=parent,
                                 foreign_id=foreign_id,
                                 content_hash=content_hash,
                                 meta=meta,
                                 uploader_id=request.authz.id)
        collection.touch()
        db.session.commit()
        proxy = document.to_proxy()
        if proxy.schema.is_a(Document.SCHEMA_FOLDER) and sync:
            index_proxy(collection, proxy, sync=sync)
        ingest_entity(collection, proxy, job_id=job_id, sync=sync)
        document_id = collection.ns.sign(document.id)
        _notify(collection, document_id)
    finally:
        shutil.rmtree(upload_dir)

    return jsonify({'status': 'ok', 'id': document_id}, status=201)