def delete_collection(collection, keep_metadata=False, sync=False): cancel_queue(collection) aggregator = get_aggregator(collection) try: aggregator.drop() finally: aggregator.close() flush_notifications(collection, sync=sync) index.delete_entities(collection.id, sync=sync) xref_index.delete_xref(collection, sync=sync) deleted_at = collection.deleted_at or datetime.utcnow() Entity.delete_by_collection(collection.id, deleted_at=deleted_at) Mapping.delete_by_collection(collection.id, deleted_at=deleted_at) Diagram.delete_by_collection(collection.id, deleted_at=deleted_at) Document.delete_by_collection(collection.id) if not keep_metadata: # Considering linkages metadata for now, might be wrong: Linkage.delete_by_collection(collection.id) Permission.delete_by_collection(collection.id, deleted_at=deleted_at) collection.delete(deleted_at=deleted_at) db.session.commit() if not keep_metadata: index.delete_collection(collection.id, sync=True) Authz.flush() refresh_collection(collection.id, sync=True)
def prune_entity(collection, entity_id=None, job_id=None): """Prune handles the full deletion of an entity outside of the HTTP request cycle. This involves cleaning up adjacent entities like xref results, notifications and so on.""" # This is recursive and will also delete any entities which # reference the given entity. Usually this is going to be child # documents, or directoships referencing a person. It's a pretty # dangerous operation, though. log.info("[%s] Prune entity: %s", collection, entity_id) for adjacent in index.iter_adjacent(collection.id, entity_id): log.warning("Recursive delete: %s", adjacent.get("id")) delete_entity(collection, adjacent, job_id=job_id) flush_notifications(entity_id, clazz=Entity) obj = Entity.by_id(entity_id, collection=collection) if obj is not None: obj.delete() doc = Document.by_id(entity_id, collection=collection) if doc is not None: doc.delete() EntitySetItem.delete_by_entity(entity_id) Mapping.delete_by_table(entity_id) xref_index.delete_xref(collection, entity_id=entity_id) aggregator = get_aggregator(collection) aggregator.delete(entity_id=entity_id) refresh_entity(collection, entity_id) collection.touch() db.session.commit()
def cleanup_deleted(): from aleph.model import Alert, Entity, Collection from aleph.model import Permission, Role, Document from aleph.model import Diagram, Mapping Mapping.cleanup_deleted() Diagram.cleanup_deleted() Document.cleanup_deleted() Alert.cleanup_deleted() Permission.cleanup_deleted() Entity.cleanup_deleted() Collection.cleanup_deleted() Role.cleanup_deleted() db.session.commit()
def delete_collection(collection, keep_metadata=False, sync=False): reset_collection(collection, sync=False) deleted_at = collection.deleted_at or datetime.utcnow() Entity.delete_by_collection(collection.id, deleted_at=deleted_at) Mapping.delete_by_collection(collection.id, deleted_at=deleted_at) Diagram.delete_by_collection(collection.id, deleted_at=deleted_at) Document.delete_by_collection(collection.id) if not keep_metadata: Permission.delete_by_collection(collection.id, deleted_at=deleted_at) collection.delete(deleted_at=deleted_at) db.session.commit() if not keep_metadata: index.delete_collection(collection.id, sync=sync) Authz.flush() refresh_collection(collection.id, sync=True)
def flush(collection_id, mapping_id): """Flush all entities loaded by mapping with id `mapping_id`. --- post: summary: Flush entities loaded by a mapping parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer example: 2 - description: The mapping id. in: path name: mapping_id required: true schema: minimum: 1 type: integer example: 2 responses: '202': description: No Content tags: - Collection """ collection = get_db_collection(collection_id, request.authz.WRITE) mapping = obj_or_404(Mapping.by_id(mapping_id)) queue_task(collection, OP_FLUSH_MAPPING, job_id=get_session_id(), payload={'mapping_id': mapping.id}) return ('', 202)
def delete(collection_id, mapping_id): """Delete a mapping. --- delete: summary: Delete a mapping parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer example: 2 - description: The mapping id. in: path name: mapping_id required: true schema: minimum: 1 type: integer example: 2 responses: '204': description: No Content tags: - Collection - Mapping """ get_db_collection(collection_id, request.authz.WRITE) mapping = obj_or_404(Mapping.by_id(mapping_id)) mapping.delete() db.session.commit() return ('', 204)
def view(collection_id, mapping_id): """Return the mapping with id `mapping_id`. --- get: summary: Fetch a mapping parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer example: 2 - description: The mapping id. in: path name: mapping_id required: true schema: minimum: 1 type: integer example: 2 responses: '200': content: application/json: schema: $ref: '#/components/schemas/Mapping' description: OK tags: - Collection """ get_db_collection(collection_id, request.authz.WRITE) mapping = obj_or_404(Mapping.by_id(mapping_id)) return MappingSerializer.jsonify(mapping)
def load_mapping(collection, mapping_id, sync=False): """Flush and reload all entities generated by a mapping.""" mapping = Mapping.by_id(mapping_id) if mapping is None: return log.error("Could not find mapping: %s", mapping_id) origin = mapping_origin(mapping.id) aggregator = get_aggregator(collection) aggregator.delete(origin=origin) delete_entities(collection.id, origin=origin, sync=True) if mapping.disabled: return log.info("Mapping is disabled: %s", mapping_id) publish( Events.LOAD_MAPPING, params={ "collection": collection, "table": mapping.table_id }, channels=[collection, mapping.role], actor_id=mapping.role_id, ) try: map_to_aggregator(collection, mapping, aggregator) aggregate_model(collection, aggregator) index_aggregator(collection, aggregator, sync=sync) mapping.set_status(status=Status.SUCCESS) db.session.commit() except Exception as exc: mapping.set_status(status=Status.FAILED, error=str(exc)) db.session.commit() aggregator.delete(origin=origin) finally: aggregator.close()
def get_deep_collection(collection): mappings = Mapping.by_collection(collection.id).count() entitysets = EntitySet.type_counts(collection_id=collection.id) return { "statistics": index.get_collection_stats(collection.id), "counts": {"mappings": mappings, "entitysets": entitysets}, "status": get_status(collection), "shallow": False, }
def delete_collection(collection, keep_metadata=False, sync=False, reset_sync=False): reset_collection(collection, sync=reset_sync) deleted_at = collection.deleted_at or datetime.utcnow() Entity.delete_by_collection(collection.id, deleted_at=deleted_at) Mapping.delete_by_collection(collection.id, deleted_at=deleted_at) Diagram.delete_by_collection(collection.id, deleted_at=deleted_at) Document.delete_by_collection(collection.id) if not keep_metadata: # Considering this metadata for now, might be wrong: Linkage.delete_by_collection(collection.id) Permission.delete_by_collection(collection.id, deleted_at=deleted_at) collection.delete(deleted_at=deleted_at) db.session.commit() if not keep_metadata: index.delete_collection(collection.id, sync=sync) Authz.flush() refresh_collection(collection.id, sync=True)
def load_mapping(stage, collection, mapping_id): """Flush and reload all entities generated by a mapping.""" mapping = Mapping.by_id(mapping_id) if mapping is None: return log.error("Could not find mapping: %s", mapping_id) flush_mapping(stage, collection, mapping_id) publish(Events.LOAD_MAPPING, params={'collection': collection, 'table': mapping.table_id}, channels=[collection, mapping.role], actor_id=mapping.role_id) mapper = make_mapper(collection, mapping) aggregator = get_aggregator(collection) try: writer = aggregator.bulk() entities_count = 0 entity_ids = set() for idx, record in enumerate(mapper.source.records, 1): for entity in mapper.map(record).values(): if entity.schema.is_a('Thing'): entity.add('proof', mapping.table_id) entity = collection.ns.apply(entity) entity_ids.add(entity.id) entities_count += 1 fragment = '%s-%s' % (mapping.id, idx) writer.put(entity, fragment=fragment) if idx > 0 and idx % 500 == 0: payload = { 'entity_ids': entity_ids, 'mapping_id': mapping.id } queue_task(collection, OP_INDEX, job_id=stage.job.id, payload=payload) entity_ids = set() stage.report_finished(500) log.info("[%s] Loaded %s records, %s entities...", collection.foreign_id, idx, entities_count) writer.flush() payload = { 'entity_ids': entity_ids, 'mapping_id': mapping.id } queue_task(collection, OP_INDEX, job_id=stage.job.id, payload=payload) mapping.set_status(status=Mapping.SUCCESS) log.info("[%s] Mapping done (%s entities)", mapping.id, entities_count) except Exception as exc: mapping.set_status(status=Mapping.FAILED, error=str(exc)) finally: aggregator.close()
def trigger(collection_id, mapping_id): """Load entities by running the mapping with id `mapping_id`. Flushes previously loaded entities before loading new entities. --- post: summary: Load entities from a mapping parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer example: 2 - description: The mapping id. in: path name: mapping_id required: true schema: minimum: 1 type: integer example: 2 responses: '202': description: No Content tags: - Collection - Mapping """ collection = get_db_collection(collection_id, request.authz.WRITE) mapping = obj_or_404(Mapping.by_id(mapping_id)) mapping.disabled = False mapping.set_status(Status.PENDING) db.session.commit() job_id = get_session_id() queue_task(collection, OP_LOAD_MAPPING, job_id=job_id, mapping_id=mapping.id) mapping = obj_or_404(Mapping.by_id(mapping_id)) return MappingSerializer.jsonify(mapping, status=202)
def delete_collection(collection, keep_metadata=False, sync=False): cancel_queue(collection) aggregator = get_aggregator(collection) aggregator.drop() flush_notifications(collection, sync=sync) index.delete_entities(collection.id, sync=sync) xref_index.delete_xref(collection, sync=sync) deleted_at = collection.deleted_at or datetime.utcnow() Mapping.delete_by_collection(collection.id) EntitySet.delete_by_collection(collection.id, deleted_at) Entity.delete_by_collection(collection.id) Document.delete_by_collection(collection.id) if not keep_metadata: Permission.delete_by_collection(collection.id) collection.delete(deleted_at=deleted_at) db.session.commit() if not keep_metadata: index.delete_collection(collection.id, sync=True) Authz.flush() refresh_collection(collection.id)
def delete_entity(collection, entity, deleted_at=None, sync=False): # This is recursive and will also delete any entities which # reference the given entity. Usually this is going to be child # documents, or directoships referencing a person. It's a pretty # dangerous operation, though. entity_id = collection.ns.sign(entity.get("id")) for adjacent in index.iter_adjacent(entity): log.warning("Recursive delete: %r", adjacent) delete_entity(collection, adjacent, deleted_at=deleted_at, sync=sync) flush_notifications(entity_id, clazz=Entity) obj = Entity.by_id(entity_id, collection=collection) if obj is not None: obj.delete() doc = Document.by_id(entity_id, collection=collection) if doc is not None: doc.delete() index.delete_entity(entity_id, sync=sync) EntitySetItem.delete_by_entity(entity_id) Mapping.delete_by_table(entity_id) xref_index.delete_xref(collection, entity_id=entity_id, sync=sync) delete_aggregator_entity(collection, entity_id) refresh_entity(collection, entity_id)
def update(collection_id, mapping_id): """Update the mapping with id `mapping_id`. --- post: summary: Update a mapping parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer example: 2 - description: The mapping id. in: path name: mapping_id required: true schema: minimum: 1 type: integer example: 2 requestBody: content: application/json: schema: $ref: '#/components/schemas/MappingCreate' responses: '200': content: application/json: schema: $ref: '#/components/schemas/Mapping' description: OK tags: - Collection - Mapping """ get_db_collection(collection_id, request.authz.WRITE) mapping = obj_or_404(Mapping.by_id(mapping_id)) data = parse_request("MappingCreate") mapping.update( query=load_query(), table_id=get_table_id(data), entityset_id=get_entityset_id(data), ) db.session.commit() return MappingSerializer.jsonify(mapping)
def update(collection_id, mapping_id): """Update the mapping with id `mapping_id`. --- post: summary: Update a mapping parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer example: 2 - description: The mapping id. in: path name: mapping_id required: true schema: minimum: 1 type: integer example: 2 requestBody: content: application/json: schema: $ref: '#/components/schemas/MappingCreate' responses: '200': content: application/json: schema: $ref: '#/components/schemas/Mapping' description: OK tags: - Collection """ get_db_collection(collection_id, request.authz.WRITE) mapping = obj_or_404(Mapping.by_id(mapping_id)) data = parse_request('MappingCreate') entity_id = data.get('table_id') query = load_query() entity = get_index_entity(entity_id, request.authz.READ) mapping.update(query=query, table_id=entity.get('id')) return MappingSerializer.jsonify(mapping)
def index(collection_id): """Returns a list of mappings for the collection and table. --- get: summary: List mappings parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer - description: The table id. in: query name: table schema: type: string requestBody: responses: '200': content: application/json: schema: type: object allOf: - $ref: '#/components/schemas/QueryResponse' properties: results: type: array items: $ref: '#/components/schemas/Mapping' description: OK tags: - Collection - Mapping """ collection = get_db_collection(collection_id) parser = QueryParser(request.args, request.authz) table_id = first(parser.filters.get("table")) q = Mapping.by_collection(collection.id, table_id=table_id) result = DatabaseQueryResult(request, q, parser=parser) return MappingSerializer.jsonify_result(result)
def flush(collection_id, mapping_id): """Flush all entities loaded by mapping with id `mapping_id`. --- post: summary: Flush entities loaded by a mapping parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer example: 2 - description: The mapping id. in: path name: mapping_id required: true schema: minimum: 1 type: integer example: 2 responses: '202': description: No Content tags: - Collection - Mapping """ collection = get_db_collection(collection_id, request.authz.WRITE) mapping = obj_or_404(Mapping.by_id(mapping_id)) mapping.disabled = True mapping.last_run_status = None mapping.last_run_err_msg = None db.session.add(mapping) db.session.commit() queue_task( collection, OP_FLUSH_MAPPING, job_id=get_session_id(), mapping_id=mapping_id, ) return ("", 202)
def create(collection_id): """Create a mapping. --- post: summary: Create a mapping parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer example: 2 requestBody: content: application/json: schema: $ref: '#/components/schemas/MappingCreate' responses: '200': content: application/json: schema: $ref: '#/components/schemas/Mapping' description: OK tags: - Collection - Mapping """ collection = get_db_collection(collection_id, request.authz.WRITE) data = parse_request("MappingCreate") mapping = Mapping.create( load_query(), get_table_id(data), collection, request.authz.id, entityset_id=get_entityset_id(data), ) db.session.commit() return MappingSerializer.jsonify(mapping)
def delete(collection_id, mapping_id): """Delete a mapping. --- delete: summary: Delete a mapping parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer example: 2 - description: The mapping id. in: path name: mapping_id required: true schema: minimum: 1 type: integer example: 2 responses: '204': description: No Content tags: - Collection - Mapping """ collection = get_db_collection(collection_id, request.authz.WRITE) mapping = obj_or_404(Mapping.by_id(mapping_id)) mapping.delete() db.session.commit() queue_task( collection, OP_FLUSH_MAPPING, job_id=get_session_id(), payload={"mapping_id": mapping_id}, ) return ("", 204)
def create(collection_id): """Create a mapping. --- post: summary: Create a mapping parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer example: 2 requestBody: content: application/json: schema: $ref: '#/components/schemas/MappingCreate' responses: '200': content: application/json: schema: $ref: '#/components/schemas/Mapping' description: OK tags: - Collection - Mapping """ collection = get_db_collection(collection_id, request.authz.WRITE) data = parse_request('MappingCreate') entity_id = data.get('table_id') query = load_query() entity = get_index_entity(entity_id, request.authz.READ) mapping = Mapping.create(query, entity.get('id'), collection, request.authz.id) # noqa return MappingSerializer.jsonify(mapping)
def trigger(collection_id, mapping_id): """Load entities by running the mapping with id `mapping_id`. Flushes previously loaded entities before loading new entities. --- post: summary: Load entities from a mapping parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer example: 2 - description: The mapping id. in: path name: mapping_id required: true schema: minimum: 1 type: integer example: 2 responses: '202': description: No Content tags: - Collection - Mapping """ collection = get_db_collection(collection_id, request.authz.WRITE) mapping = obj_or_404(Mapping.by_id(mapping_id)) job_id = get_session_id() payload = {'mapping_id': mapping.id} queue_task(collection, OP_LOAD_MAPPING, job_id=job_id, payload=payload) collection.touch() db.session.commit() return ('', 202)