def update(entity_id): entity = get_db_entity(entity_id, request.authz.WRITE) tag_request(collection_id=entity.collection_id) data = parse_request(EntityUpdateSchema) if get_flag('merge'): props = merge_data(data.get('properties'), entity.data) data['properties'] = props entity.update(data) db.session.commit() data = update_entity(entity, sync=get_flag('sync', True)) return EntitySerializer.jsonify(data)
def create(): """ --- post: summary: Create an entity in a collection description: >- Create an entity in a collection with a given schema and a set of given properties in the database. This is not the API you want to be using to load bulk data, but only for interactive entity manipulation in the UI. Always use the `bulk` API or for loading source datasets, no exceptions. parameters: - in: query name: sign description: Sign entity IDs referenced in nested properties. required: false schema: type: boolean requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityCreate' responses: '200': description: Resturns the created entity content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ data = parse_request("EntityCreate") collection = get_nested_collection(data, request.authz.WRITE) data.pop("id", None) if get_flag("validate", default=False): validate_entity(data) entity_id = upsert_entity( data, collection, authz=request.authz, sync=True, sign=get_flag("sign", default=False), job_id=get_session_id(), ) db.session.commit() tag_request(entity_id=entity_id, collection_id=collection.id) entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def bulk(collection_id): collection = get_db_collection(collection_id, request.authz.WRITE) require(request.authz.can_bulk_import()) merge = get_flag('merge', default=False) # This will disable certain security measures in order to allow bulk # loading of document data. unsafe = get_flag('unsafe', default=False) unsafe = unsafe and request.authz.is_admin entities = ensure_list(request.get_json(force=True)) bulk_write(collection, entities, merge=merge, unsafe=unsafe) refresh_collection(id) return ('', 204)
def create(): require(request.authz.logged_in) data = parse_request(CollectionCreateSchema) role = Role.by_id(request.authz.id) sync = get_flag('sync') collection = create_collection(data, role=role, sync=sync) return CollectionSerializer.jsonify(collection)
def ingest_upload(collection_id): collection = get_db_collection(collection_id, request.authz.WRITE) job_id = get_session_id() sync = get_flag('sync', default=False) meta, foreign_id = _load_metadata() parent = _load_parent(collection, meta) upload_dir = ensure_path(mkdtemp(prefix='aleph.upload.')) try: content_hash = None for storage in request.files.values(): path = safe_filename(storage.filename, default='upload') path = upload_dir.joinpath(path) storage.save(str(path)) content_hash = archive.archive_file(path) document = Document.save(collection=collection, parent=parent, foreign_id=foreign_id, content_hash=content_hash, meta=meta, uploader_id=request.authz.id) collection.touch() db.session.commit() proxy = document.to_proxy() if proxy.schema.is_a(Document.SCHEMA_FOLDER) and sync: index_proxy(collection, proxy, sync=sync) ingest_entity(collection, proxy, job_id=job_id, sync=sync) document_id = collection.ns.sign(document.id) _notify(collection, document_id) finally: shutil.rmtree(upload_dir) return jsonify({'status': 'ok', 'id': document_id}, status=201)
def delete(entity_id): """ --- delete: summary: Delete an entity description: Delete the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string responses: '204': description: No Content tags: - Entity """ entity = get_index_entity(entity_id, request.authz.WRITE) collection = get_db_collection(entity.get("collection_id"), request.authz.WRITE) tag_request(collection_id=collection.id) sync = get_flag("sync", default=True) job_id = get_session_id() delete_entity(collection, entity, sync=sync, job_id=job_id) return ("", 204)
def ingest_upload(collection_id): require(request.authz.can(collection_id, request.authz.WRITE)) sync = get_flag('sync') meta, foreign_id = _load_metadata() parent_id = _load_parent(collection_id, meta) upload_dir = mkdtemp(prefix='aleph.upload.') try: path = None content_hash = None for storage in request.files.values(): path = safe_filename(storage.filename, default='upload') path = os.path.join(upload_dir, path) storage.save(path) content_hash = checksum(path) document = Document.by_keys(collection_id=collection_id, parent_id=parent_id, foreign_id=foreign_id, content_hash=content_hash) document.update(meta) document.schema = Document.SCHEMA if content_hash is None: document.schema = Document.SCHEMA_FOLDER ingest_document(document, path, role_id=request.authz.id, content_hash=content_hash) finally: shutil.rmtree(upload_dir) if document.collection.casefile: # Make sure collection counts are always accurate. update_document(document, sync=sync) return jsonify({ 'status': 'ok', 'id': stringify(document.id) }, status=201)
def create(): """ --- post: summary: Create an entity in a collection description: >- Create an entity in a collection with a given schema and a set of given properties in the database. This is not the API you want to be using to load bulk data, but only for interactive entity manipulation in the UI. Always use the `bulk` API or for loading source datasets, no exceptions. requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityCreate' responses: '200': description: Resturns the created entity content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ data = parse_request('EntityCreate') collection = get_nested_collection(data, request.authz.WRITE) data.pop('id', None) validate = get_flag('validate', default=False) entity_id = upsert_entity(data, collection, sync=True, validate=validate) tag_request(entity_id=entity_id, collection_id=str(collection.id)) entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def reingest(collection_id): """ --- post: summary: Re-ingest a collection description: > Trigger a process to re-parse the content of all documents stored in the collection with id `collection_id`. parameters: - description: The collection ID. in: path name: collection_id required: true schema: minimum: 1 type: integer - in: query name: index description: Index documents while they're being processed. schema: type: boolean responses: '202': description: Accepted tags: - Collection """ collection = get_db_collection(collection_id, request.authz.WRITE) job_id = get_session_id() data = {"index": get_flag("index", False)} queue_task(collection, OP_REINGEST, job_id=job_id, payload=data) return ("", 202)
def ingest_upload(collection_id): require(request.authz.can(collection_id, request.authz.WRITE)) sync = get_flag('sync') meta, foreign_id = _load_metadata() parent_id = _load_parent(collection_id, meta) upload_dir = mkdtemp(prefix='aleph.upload.') try: path = None content_hash = None for storage in request.files.values(): path = safe_filename(storage.filename, default='upload') path = os.path.join(upload_dir, path) storage.save(path) content_hash = checksum(path) document = Document.by_keys(collection_id=collection_id, parent_id=parent_id, foreign_id=foreign_id, content_hash=content_hash) document.update(meta) document.schema = Document.SCHEMA if content_hash is None: document.schema = Document.SCHEMA_FOLDER ingest_document(document, path, role_id=request.authz.id, content_hash=content_hash) finally: shutil.rmtree(upload_dir) # Make sure collection counts are always accurate. update_document(document, shallow=True, sync=sync) return jsonify({'status': 'ok', 'id': stringify(document.id)}, status=201)
def create(): """ --- post: summary: Create a collection description: Create a collection with the given metadata requestBody: content: application/json: schema: $ref: '#/components/schemas/CollectionCreate' tags: - Collection responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Collection' """ require(request.authz.logged_in) data = parse_request('CollectionCreate') sync = get_flag('sync') collection = create_collection(data, request.authz, sync=sync) return CollectionSerializer.jsonify(collection)
def view(collection_id): """ --- get: summary: Get a collection description: Return the collection with id `collection_id` parameters: - description: The collection ID. in: path name: collection_id required: true schema: minimum: 1 type: integer responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/CollectionFull' tags: - Collection """ data = get_index_collection(collection_id) cobj = get_db_collection(collection_id) if get_flag("refresh", False): update_collection_stats(collection_id, ["schema"]) data.update({ "statistics": get_collection_stats(cobj.id), "status": get_status(cobj), "shallow": False, }) return CollectionSerializer.jsonify(data)
def create(): require(request.authz.logged_in) data = parse_request(CollectionSchema) role = Role.by_id(request.authz.id) sync = get_flag('sync') collection = create_collection(data, role=role, sync=sync) return serialize_data(collection, CollectionSchema)
def update(document_id): document = get_db_document(document_id, request.authz.WRITE) data = parse_request(DocumentUpdateSchema) document.update(data) db.session.commit() update_document(document, shallow=True, sync=get_flag('sync', True)) return view(document_id)
def delete(entity_id): """ --- delete: summary: Delete an entity description: Delete the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string responses: '204': description: No Content tags: - Entity """ entity = get_index_entity(entity_id, request.authz.WRITE) collection = get_db_collection(entity.get('collection_id'), request.authz.WRITE) tag_request(collection_id=collection.id) delete_entity(collection, entity, sync=get_flag('sync', True)) db.session.commit() return ('', 204)
def reindex(collection_id): """ --- post: summary: Re-index a collection description: > Re-index the entities in the collection with id `collection_id` parameters: - description: The collection ID. in: path name: collection_id required: true schema: minimum: 1 type: integer - in: query description: Delete the index before re-generating it. name: flush schema: type: boolean responses: '202': description: Accepted tags: - Collection """ collection = get_db_collection(collection_id, request.authz.WRITE) job_id = get_session_id() data = {"flush": get_flag("flush", False)} queue_task(collection, OP_REINDEX, job_id=job_id, payload=data) return ("", 202)
def create(): """ --- post: summary: Create a collection description: Create a collection with the given metadata requestBody: content: application/json: schema: $ref: '#/components/schemas/CollectionCreate' tags: - Collection responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Collection' """ require(request.authz.logged_in) data = parse_request("CollectionCreate") sync = get_flag("sync", True) collection = create_collection(data, request.authz, sync=sync) return view(collection.get("id"))
def update(entity_id): """ --- post: summary: Update an entity description: > Update the entity with id `entity_id`. This only applies to entities which are backed by a database row, i.e. not any entities resulting from a mapping or bulk load. parameters: - in: path name: entity_id required: true schema: type: string format: entity_id requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityUpdate' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ data = parse_request("EntityUpdate") try: entity = get_index_entity(entity_id, request.authz.WRITE) require(check_write_entity(entity, request.authz)) collection = get_db_collection(entity.get("collection_id"), request.authz.WRITE) except NotFound: collection = get_nested_collection(data, request.authz.WRITE) tag_request(collection_id=collection.id) data["id"] = entity_id if get_flag("validate", default=False): validate_entity(data) sync = get_flag("sync", default=True) entity_id = upsert_entity(data, collection, authz=request.authz, sync=sync) db.session.commit() return view(entity_id)
def update(id): collection = get_db_collection(id, request.authz.WRITE) data = parse_request(CollectionSchema) sync = get_flag('sync') collection.update(data) db.session.commit() data = update_collection(collection, sync=sync) return serialize_data(data, CollectionSchema)
def update(entity_id): """ --- post: summary: Update an entity description: > Update the entity with id `entity_id`. This only applies to entities which are backed by a database row, i.e. not any entities resulting from a mapping or bulk load. parameters: - in: path name: entity_id required: true schema: type: string format: entity_id requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityUpdate' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ data = parse_request('EntityUpdate') try: entity = get_index_entity(entity_id, request.authz.WRITE) collection = get_db_collection(entity.get('collection_id'), request.authz.WRITE) except NotFound: collection = get_nested_collection(data, request.authz.WRITE) tag_request(collection_id=collection.id) data['id'] = entity_id sync = get_flag('sync', default=True) validate = get_flag('validate', default=False) entity_id = upsert_entity(data, collection, validate=validate, sync=sync) db.session.commit() entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def update(collection_id): collection = get_db_collection(collection_id, request.authz.WRITE) data = parse_request(CollectionUpdateSchema) sync = get_flag('sync') collection.update(data) db.session.commit() data = update_collection(collection, sync=sync) return CollectionSerializer.jsonify(data)
def update(collection_id): collection = get_db_collection(collection_id, request.authz.WRITE) data = parse_request(CollectionUpdateSchema) sync = get_flag('sync') collection.update(data, request.authz) db.session.commit() data = update_collection(collection, sync=sync) return CollectionSerializer.jsonify(data)
def bulk(id): collection = get_db_collection(id, request.authz.WRITE) require(request.authz.can_bulk_import()) merge = get_flag('merge', default=False) entities = ensure_list(request.get_json(force=True)) bulk_write(collection, entities, merge=merge) refresh_collection(id) return ('', 204)
def process(collection_id): collection = get_db_collection(collection_id, request.authz.WRITE) # re-process the documents data = {'reset': get_flag('reset', True)} queue_task(collection, OP_PROCESS, job_id=get_session_id(), payload=data) collection.touch() db.session.commit() refresh_collection(collection_id) return ('', 202)
def update(entity_id): """ --- post: summary: Update an entity description: > Update the entity with id `entity_id`. This only applies to entities which are backed by a database row, i.e. not any entities resulting from a mapping or bulk load. parameters: - in: path name: entity_id required: true schema: type: string requestBody: content: application/json: schema: $ref: '#/components/schemas/EntityUpdate' responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ entity = get_db_entity(entity_id, request.authz.WRITE) tag_request(collection_id=entity.collection_id) data = parse_request('EntityUpdate') if get_flag('merge'): props = merge_data(data.get('properties'), entity.data) data['properties'] = props entity.update(data) db.session.commit() update_entity(entity, sync=get_flag('sync', True)) entity = get_index_entity(entity_id, request.authz.READ) return EntitySerializer.jsonify(entity)
def ingest_upload(id): collection = get_db_collection(id, request.authz.WRITE) meta, foreign_id = _load_metadata(collection) parent_id = _load_parent(collection, meta) upload_dir = mkdtemp(prefix='aleph.upload.') try: documents = [] for storage in request.files.values(): path = safe_filename(storage.filename, default='upload') path = os.path.join(upload_dir, path) storage.save(path) content_hash = checksum(path) document = Document.by_keys(collection_id=collection.id, parent_id=parent_id, foreign_id=foreign_id, content_hash=content_hash) document.update(meta) document.schema = Document.SCHEMA ingest_document(document, path, role_id=request.authz.id, content_hash=content_hash) documents.append(document) if not len(request.files): # If there is no files uploaded, try to create an empty # directory instead. Maybe this should be more explicit, # but it seemed like the most simple way of fitting it # into the API. document = Document.by_keys(collection_id=collection.id, parent_id=parent_id, foreign_id=foreign_id) document.update(meta) document.schema = Document.SCHEMA_FOLDER ingest_document(document, None, role_id=request.authz.id) documents.append(document) finally: shutil.rmtree(upload_dir) # Update child counts in index. if parent_id is not None: index_document_id.apply_async([parent_id], priority=1) # Make sure collection counts are always accurate. if get_flag('sync'): for document in documents: update_document(document, shallow=True, sync=True) return jsonify({ 'status': 'ok', 'documents': [CombinedSchema().dump(d).data for d in documents] })
def merge(id, other_id): entity = get_db_entity(id, request.authz.WRITE) other = get_db_entity(other_id, request.authz.WRITE) sync = get_flag('sync') try: entity.merge(other) except ValueError as ve: raise BadRequest(ve.message) db.session.commit() data = update_entity(entity, sync=sync) update_entity(other, sync=sync) return serialize_data(data, CombinedSchema)
def merge(id, other_id): entity = get_db_entity(id, request.authz.WRITE) other = get_db_entity(other_id, request.authz.WRITE) try: entity.merge(other) except ValueError as ve: raise BadRequest(ve.message) db.session.commit() sync = get_flag('sync', True) data = update_entity(entity, sync=sync) update_entity(other, sync=sync) return EntitySerializer.jsonify(data)
def bulk(collection_id): """ --- post: summary: Load entities into a collection description: > Bulk load entities into the collection with id `collection_id` parameters: - description: The collection ID. in: path name: collection_id required: true schema: minimum: 1 type: integer - description: >- This will disable checksum security measures in order to allow bulk loading of document data. in: query name: unsafe schema: type: boolean requestBody: description: Entities to be loaded. content: application/json: schema: type: array items: $ref: '#/components/schemas/EntityUpdate' responses: '204': description: No Content tags: - Collection """ collection = get_db_collection(collection_id, request.authz.WRITE) require(request.authz.can_bulk_import()) job_id = get_session_id() # This will disable checksum security measures in order to allow bulk # loading of document data. unsafe = get_flag('unsafe', default=False) unsafe = unsafe and request.authz.is_admin entities = ensure_list(request.get_json(force=True)) bulk_write(collection, entities, job_id=job_id, unsafe=unsafe) collection.touch() db.session.commit() return ('', 204)
def merge(entity_id, other_id): entity = get_db_entity(entity_id, request.authz.WRITE) other = get_db_entity(other_id, request.authz.WRITE) tag_request(collection_id=entity.collection_id) try: entity.merge(other) except ValueError as ve: raise BadRequest(ve.message) db.session.commit() sync = get_flag('sync', True) data = update_entity(entity, sync=sync) update_entity(other, sync=sync) return EntitySerializer.jsonify(data)
def bulk(collection_id): collection = get_db_collection(collection_id, request.authz.WRITE) require(request.authz.can_bulk_import()) job_id = get_session_id() # This will disable checksum security measures in order to allow bulk # loading of document data. unsafe = get_flag('unsafe', default=False) unsafe = unsafe and request.authz.is_admin entities = ensure_list(request.get_json(force=True)) bulk_write(collection, entities, job_id=job_id, unsafe=unsafe) collection.touch() db.session.commit() refresh_collection(collection_id) return ('', 204)
def update(collection_id): """ --- post: summary: Update a collection description: > Change collection metadata and update statistics. parameters: - description: The collection ID. in: path name: collection_id required: true schema: minimum: 1 type: integer requestBody: content: application/json: schema: $ref: '#/components/schemas/CollectionUpdate' tags: - Collection responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Collection' """ collection = get_db_collection(collection_id, request.authz.WRITE) data = parse_request("CollectionUpdate") sync = get_flag("sync") collection.update(data, request.authz) db.session.commit() data = update_collection(collection, sync=sync) return CollectionSerializer.jsonify(data)
def process(collection_id): """ --- post: summary: Process a collection description: Start processing the collection with id `collection_id` parameters: - description: The collection ID. in: path name: collection_id required: true schema: minimum: 1 type: integer - in: query name: ingest schema: type: boolean - in: query name: reset schema: type: boolean responses: '202': description: Accepted tags: - Collection """ collection = get_db_collection(collection_id, request.authz.WRITE) # re-process the documents data = {'reset': get_flag('reset', True)} queue_task(collection, OP_PROCESS, job_id=get_session_id(), payload=data) collection.touch() db.session.commit() refresh_collection(collection_id) return ('', 202)
def delete(collection_id): """ --- delete: summary: Delete a collection description: Delete the collection with id `collection_id` parameters: - description: The collection ID. in: path name: collection_id required: true schema: minimum: 1 type: integer responses: '204': description: No Content tags: - Collection """ collection = get_db_collection(collection_id, request.authz.WRITE) sync = get_flag('sync', default=True) delete_collection(collection, sync=sync) return ('', 204)
def create(): data = parse_request(EntityCreateSchema) collection = get_db_collection(data['collection_id'], request.authz.WRITE) data = create_entity(data, collection, sync=get_flag('sync', True)) tag_request(entity_id=data.get('id'), collection_id=str(collection.id)) return EntitySerializer.jsonify(data)
def delete(collection_id): collection = get_db_collection(collection_id, request.authz.WRITE) sync = get_flag('sync', default=True) delete_collection(collection, sync=sync) return ('', 204)