def ingest_upload(collection_id): require(request.authz.can(collection_id, request.authz.WRITE)) sync = get_flag('sync') meta, foreign_id = _load_metadata() parent_id = _load_parent(collection_id, meta) upload_dir = mkdtemp(prefix='aleph.upload.') try: path = None content_hash = None for storage in request.files.values(): path = safe_filename(storage.filename, default='upload') path = os.path.join(upload_dir, path) storage.save(path) content_hash = checksum(path) document = Document.by_keys(collection_id=collection_id, parent_id=parent_id, foreign_id=foreign_id, content_hash=content_hash) document.update(meta) document.schema = Document.SCHEMA if content_hash is None: document.schema = Document.SCHEMA_FOLDER ingest_document(document, path, role_id=request.authz.id, content_hash=content_hash) finally: shutil.rmtree(upload_dir) # Make sure collection counts are always accurate. update_document(document, shallow=True, sync=sync) return jsonify({ 'status': 'ok', 'id': stringify(document.id) }, status=201)
def test_basic_archive(self): checksum = storagelayer.checksum(self.file) assert checksum is not None, checksum out = self.archive.archive_file(self.file) assert checksum == out, (checksum, out) out2 = self.archive.archive_file(self.file) assert out == out2, (out, out2)
def ingest_upload(id): collection = obj_or_404(Collection.by_id(id)) require(request.authz.can_write(collection.id)) try: meta = json.loads(request.form.get('meta', '{}')) except Exception as ex: raise BadRequest(unicode(ex)) validate_data(meta, DocumentSchema) documents = [] for storage in request.files.values(): sec_fn = os.path.join(upload_folder, secure_filename(storage.filename)) storage.save(sec_fn) content_hash = checksum(sec_fn) document = Document.by_keys(collection=collection, content_hash=content_hash) document.mime_type = storage.mimetype document.file_name = storage.filename document.update(meta) ingest_document(document, sec_fn, role_id=request.authz.id) os.unlink(sec_fn) documents.append(document) return jsonify({ 'status': 'ok', 'documents': [DocumentSchema().dump(d).data for d in documents] })
def ingest_upload(id): collection = get_db_collection(id, request.authz.WRITE) meta, foreign_id = _load_metadata(collection) parent_id = _load_parent(collection, meta) upload_dir = mkdtemp(prefix='aleph.upload.') try: documents = [] for storage in request.files.values(): path = safe_filename(storage.filename, default='upload') path = os.path.join(upload_dir, path) storage.save(path) content_hash = checksum(path) document = Document.by_keys(collection=collection, parent_id=parent_id, foreign_id=foreign_id, content_hash=content_hash) document.update(meta) document.uploader_id = request.authz.id ingest_document(document, path) documents.append(document) if not len(request.files): # If there is no files uploaded, try to create an empty # directory instead. Maybe this should be more explicit, # but it seemed like the most simple way of fitting it # into the API. document = Document.by_keys(collection=collection, parent_id=parent_id, foreign_id=foreign_id) document.schema = Document.SCHEMA_FOLDER document.update(meta) document.uploader_id = request.authz.id ingest_document(document, None) documents.append(document) finally: shutil.rmtree(upload_dir) if collection.casefile: for document in documents: params = {'document': document, 'collection': collection} publish(Events.INGEST_DOCUMENT, actor_id=document.uploader_id, params=params) # Update child counts in index. if parent_id is not None: index_document_id.apply_async([parent_id], priority=1) refresh_index(index=entities_index()) return jsonify({ 'status': 'ok', 'documents': [CombinedSchema().dump(d).data for d in documents] })
def ingest_upload(id): collection = get_db_collection(id, request.authz.WRITE) meta, foreign_id = _load_metadata(collection) parent_id = _load_parent(collection, meta) upload_dir = mkdtemp(prefix='aleph.upload.') try: documents = [] for storage in request.files.values(): path = safe_filename(storage.filename, default='upload') path = os.path.join(upload_dir, path) storage.save(path) content_hash = checksum(path) document = Document.by_keys(collection_id=collection.id, parent_id=parent_id, foreign_id=foreign_id, content_hash=content_hash) document.update(meta) document.schema = Document.SCHEMA ingest_document(document, path, role_id=request.authz.id, content_hash=content_hash) documents.append(document) if not len(request.files): # If there is no files uploaded, try to create an empty # directory instead. Maybe this should be more explicit, # but it seemed like the most simple way of fitting it # into the API. document = Document.by_keys(collection_id=collection.id, parent_id=parent_id, foreign_id=foreign_id) document.update(meta) document.schema = Document.SCHEMA_FOLDER ingest_document(document, None, role_id=request.authz.id) documents.append(document) finally: shutil.rmtree(upload_dir) # Update child counts in index. if parent_id is not None: index_document_id.apply_async([parent_id], priority=1) # Make sure collection counts are always accurate. if get_flag('sync'): for document in documents: update_document(document, shallow=True, sync=True) return jsonify({ 'status': 'ok', 'documents': [CombinedSchema().dump(d).data for d in documents] })
def ingest_upload(id): collection = obj_or_404(Collection.by_id(id)) require(request.authz.can_write(collection.id)) meta, foreign_id = _load_metadata() parent_id = _load_parent(collection, meta) upload_dir = mkdtemp() try: documents = [] for storage in request.files.values(): path = safe_filename(storage.filename) path = os.path.join(upload_dir, path) storage.save(path) content_hash = checksum(path) document = Document.by_keys(collection=collection, parent_id=parent_id, foreign_id=foreign_id, content_hash=content_hash) document.mime_type = storage.mimetype if storage.filename: document.file_name = os.path.basename(storage.filename) document.update(meta) ingest_document(document, path, role_id=request.authz.id) documents.append(document) if not len(request.files): # If there is no files uploaded, try to create an empty # directory instead. Maybe this should be more explicit, # but it seemed like the most simple way of fitting it # into the API. document = Document.by_keys(collection=collection, parent_id=parent_id, foreign_id=foreign_id) document.update(meta) ingest_document(document, upload_dir, role_id=request.authz.id) documents.append(document) finally: shutil.rmtree(upload_dir) # Update child counts in index. if parent_id is not None: index_document_id.delay(parent_id) return jsonify({ 'status': 'ok', 'documents': [DocumentSchema().dump(d).data for d in documents] })
def handle_child(self, parent, file_path, title=None, mime_type=None, id=None, file_name=None): file_path = decode_path(file_path) file_name = decode_path(file_name) or os.path.basename(file_path) content_hash = None if not os.path.isdir(file_path): content_hash = checksum(file_path) document = Document.by_keys(parent_id=parent.document.id, collection=parent.document.collection, foreign_id=id, content_hash=content_hash) document.title = title or document.meta.get('title') document.file_name = file_name or document.meta.get('file_name') document.mime_type = mime_type or document.meta.get('mime_type') from aleph.ingest import ingest_document ingest_document(document, file_path, role_id=parent.role_id)