예제 #1
0
def ingest_upload(collection_id):
    require(request.authz.can(collection_id, request.authz.WRITE))
    sync = get_flag('sync')
    meta, foreign_id = _load_metadata()
    parent_id = _load_parent(collection_id, meta)
    upload_dir = mkdtemp(prefix='aleph.upload.')
    try:
        path = None
        content_hash = None
        for storage in request.files.values():
            path = safe_filename(storage.filename, default='upload')
            path = os.path.join(upload_dir, path)
            storage.save(path)
            content_hash = checksum(path)
        document = Document.by_keys(collection_id=collection_id,
                                    parent_id=parent_id,
                                    foreign_id=foreign_id,
                                    content_hash=content_hash)
        document.update(meta)
        document.schema = Document.SCHEMA
        if content_hash is None:
            document.schema = Document.SCHEMA_FOLDER
        ingest_document(document, path,
                        role_id=request.authz.id,
                        content_hash=content_hash)
    finally:
        shutil.rmtree(upload_dir)

    # Make sure collection counts are always accurate.
    update_document(document, shallow=True, sync=sync)
    return jsonify({
        'status': 'ok',
        'id': stringify(document.id)
    }, status=201)
예제 #2
0
 def test_basic_archive(self):
     checksum = storagelayer.checksum(self.file)
     assert checksum is not None, checksum
     out = self.archive.archive_file(self.file)
     assert checksum == out, (checksum, out)
     out2 = self.archive.archive_file(self.file)
     assert out == out2, (out, out2)
예제 #3
0
def ingest_upload(id):
    collection = obj_or_404(Collection.by_id(id))
    require(request.authz.can_write(collection.id))

    try:
        meta = json.loads(request.form.get('meta', '{}'))
    except Exception as ex:
        raise BadRequest(unicode(ex))
    validate_data(meta, DocumentSchema)

    documents = []
    for storage in request.files.values():
        sec_fn = os.path.join(upload_folder, secure_filename(storage.filename))
        storage.save(sec_fn)
        content_hash = checksum(sec_fn)
        document = Document.by_keys(collection=collection,
                                    content_hash=content_hash)
        document.mime_type = storage.mimetype
        document.file_name = storage.filename
        document.update(meta)
        ingest_document(document, sec_fn, role_id=request.authz.id)
        os.unlink(sec_fn)
        documents.append(document)

    return jsonify({
        'status':
        'ok',
        'documents': [DocumentSchema().dump(d).data for d in documents]
    })
예제 #4
0
def ingest_upload(id):
    collection = get_db_collection(id, request.authz.WRITE)
    meta, foreign_id = _load_metadata(collection)
    parent_id = _load_parent(collection, meta)
    upload_dir = mkdtemp(prefix='aleph.upload.')
    try:
        documents = []
        for storage in request.files.values():
            path = safe_filename(storage.filename, default='upload')
            path = os.path.join(upload_dir, path)
            storage.save(path)
            content_hash = checksum(path)
            document = Document.by_keys(collection=collection,
                                        parent_id=parent_id,
                                        foreign_id=foreign_id,
                                        content_hash=content_hash)
            document.update(meta)
            document.uploader_id = request.authz.id
            ingest_document(document, path)
            documents.append(document)

        if not len(request.files):
            # If there is no files uploaded, try to create an empty
            # directory instead. Maybe this should be more explicit,
            # but it seemed like the most simple way of fitting it
            # into the API.
            document = Document.by_keys(collection=collection,
                                        parent_id=parent_id,
                                        foreign_id=foreign_id)
            document.schema = Document.SCHEMA_FOLDER
            document.update(meta)
            document.uploader_id = request.authz.id
            ingest_document(document, None)
            documents.append(document)
    finally:
        shutil.rmtree(upload_dir)

    if collection.casefile:
        for document in documents:
            params = {'document': document, 'collection': collection}
            publish(Events.INGEST_DOCUMENT,
                    actor_id=document.uploader_id,
                    params=params)

    # Update child counts in index.
    if parent_id is not None:
        index_document_id.apply_async([parent_id], priority=1)

    refresh_index(index=entities_index())
    return jsonify({
        'status':
        'ok',
        'documents': [CombinedSchema().dump(d).data for d in documents]
    })
예제 #5
0
파일: ingest_api.py 프로젝트: nt0z/aleph
def ingest_upload(id):
    collection = get_db_collection(id, request.authz.WRITE)
    meta, foreign_id = _load_metadata(collection)
    parent_id = _load_parent(collection, meta)
    upload_dir = mkdtemp(prefix='aleph.upload.')
    try:
        documents = []
        for storage in request.files.values():
            path = safe_filename(storage.filename, default='upload')
            path = os.path.join(upload_dir, path)
            storage.save(path)
            content_hash = checksum(path)
            document = Document.by_keys(collection_id=collection.id,
                                        parent_id=parent_id,
                                        foreign_id=foreign_id,
                                        content_hash=content_hash)
            document.update(meta)
            document.schema = Document.SCHEMA
            ingest_document(document,
                            path,
                            role_id=request.authz.id,
                            content_hash=content_hash)
            documents.append(document)

        if not len(request.files):
            # If there is no files uploaded, try to create an empty
            # directory instead. Maybe this should be more explicit,
            # but it seemed like the most simple way of fitting it
            # into the API.
            document = Document.by_keys(collection_id=collection.id,
                                        parent_id=parent_id,
                                        foreign_id=foreign_id)
            document.update(meta)
            document.schema = Document.SCHEMA_FOLDER
            ingest_document(document, None, role_id=request.authz.id)
            documents.append(document)
    finally:
        shutil.rmtree(upload_dir)

    # Update child counts in index.
    if parent_id is not None:
        index_document_id.apply_async([parent_id], priority=1)

    # Make sure collection counts are always accurate.
    if get_flag('sync'):
        for document in documents:
            update_document(document, shallow=True, sync=True)

    return jsonify({
        'status':
        'ok',
        'documents': [CombinedSchema().dump(d).data for d in documents]
    })
예제 #6
0
def ingest_upload(id):
    collection = obj_or_404(Collection.by_id(id))
    require(request.authz.can_write(collection.id))
    meta, foreign_id = _load_metadata()
    parent_id = _load_parent(collection, meta)
    upload_dir = mkdtemp()
    try:
        documents = []
        for storage in request.files.values():
            path = safe_filename(storage.filename)
            path = os.path.join(upload_dir, path)
            storage.save(path)
            content_hash = checksum(path)
            document = Document.by_keys(collection=collection,
                                        parent_id=parent_id,
                                        foreign_id=foreign_id,
                                        content_hash=content_hash)
            document.mime_type = storage.mimetype
            if storage.filename:
                document.file_name = os.path.basename(storage.filename)
            document.update(meta)
            ingest_document(document, path,
                            role_id=request.authz.id)
            documents.append(document)

        if not len(request.files):
            # If there is no files uploaded, try to create an empty
            # directory instead. Maybe this should be more explicit,
            # but it seemed like the most simple way of fitting it
            # into the API.
            document = Document.by_keys(collection=collection,
                                        parent_id=parent_id,
                                        foreign_id=foreign_id)
            document.update(meta)
            ingest_document(document, upload_dir,
                            role_id=request.authz.id)
            documents.append(document)
    finally:
        shutil.rmtree(upload_dir)

    # Update child counts in index.
    if parent_id is not None:
        index_document_id.delay(parent_id)

    return jsonify({
        'status': 'ok',
        'documents': [DocumentSchema().dump(d).data for d in documents]
    })
예제 #7
0
파일: manager.py 프로젝트: kaue-cauin/aleph
    def handle_child(self,
                     parent,
                     file_path,
                     title=None,
                     mime_type=None,
                     id=None,
                     file_name=None):
        file_path = decode_path(file_path)
        file_name = decode_path(file_name) or os.path.basename(file_path)

        content_hash = None
        if not os.path.isdir(file_path):
            content_hash = checksum(file_path)

        document = Document.by_keys(parent_id=parent.document.id,
                                    collection=parent.document.collection,
                                    foreign_id=id,
                                    content_hash=content_hash)
        document.title = title or document.meta.get('title')
        document.file_name = file_name or document.meta.get('file_name')
        document.mime_type = mime_type or document.meta.get('mime_type')

        from aleph.ingest import ingest_document
        ingest_document(document, file_path, role_id=parent.role_id)