Example #1
0
def upsert_entity(data,
                  collection,
                  authz=None,
                  sync=False,
                  sign=False,
                  job_id=None):
    """Create or update an entity in the database. This has a side effect  of migrating
    entities created via the _bulk API or a mapper to a database entity in the event
    that it gets edited by the user.
    """
    from aleph.logic.profiles import profile_fragments

    entity = None
    entity_id = collection.ns.sign(data.get("id"))
    if entity_id is not None:
        entity = Entity.by_id(entity_id, collection=collection)
    if entity is None:
        role_id = authz.id if authz is not None else None
        entity = Entity.create(data, collection, sign=sign, role_id=role_id)
    else:
        entity.update(data, collection, sign=sign)
    collection.touch()

    proxy = entity.to_proxy()
    aggregator = get_aggregator(collection)
    aggregator.delete(entity_id=proxy.id)
    aggregator.put(proxy, origin=MODEL_ORIGIN)
    profile_fragments(collection, aggregator, entity_id=proxy.id)

    index.index_proxy(collection, proxy, sync=sync)
    refresh_entity(collection, proxy.id)
    queue_task(collection, OP_UPDATE_ENTITY, job_id=job_id, entity_id=proxy.id)
    return entity.id
Example #2
0
def upsert_entity(data, collection, authz=None, sync=False):
    """Create or update an entity in the database. This has a side hustle
    of migrating entities created via the _bulk API or a mapper to a
    database entity in the event that it gets edited by the user.
    """
    entity = None
    entity_id = collection.ns.sign(data.get("id"))
    if entity_id is not None:
        entity = Entity.by_id(entity_id, collection=collection)
    if entity is None:
        role_id = authz.id if authz is not None else None
        entity = Entity.create(data, collection, role_id=role_id)
    else:
        entity.update(data, collection)

    # Inline name properties from adjacent entities. See the
    # docstring on `inline_names` for a more detailed discussion.
    proxy = entity.to_proxy()
    entity_ids = proxy.get_type_values(registry.entity)
    for rel in index.entities_by_ids(entity_ids):
        inline_names(proxy, model.get_proxy(rel))
    entity.data = proxy.properties
    db.session.add(entity)

    delete_aggregator_entity(collection, entity.id)
    index.index_proxy(collection, proxy, sync=sync)
    refresh_entity(collection, entity.id)
    return entity.id
Example #3
0
def ingest_upload(collection_id):
    collection = get_db_collection(collection_id, request.authz.WRITE)
    job_id = get_session_id()
    sync = get_flag('sync', default=False)
    meta, foreign_id = _load_metadata()
    parent = _load_parent(collection, meta)
    upload_dir = ensure_path(mkdtemp(prefix='aleph.upload.'))
    try:
        content_hash = None
        for storage in request.files.values():
            path = safe_filename(storage.filename, default='upload')
            path = upload_dir.joinpath(path)
            storage.save(str(path))
            content_hash = archive.archive_file(path)
        document = Document.save(collection=collection,
                                 parent=parent,
                                 foreign_id=foreign_id,
                                 content_hash=content_hash,
                                 meta=meta,
                                 uploader_id=request.authz.id)
        collection.touch()
        db.session.commit()
        proxy = document.to_proxy()
        if proxy.schema.is_a(Document.SCHEMA_FOLDER) and sync:
            index_proxy(collection, proxy, sync=sync)
        ingest_entity(collection, proxy, job_id=job_id, sync=sync)
        document_id = collection.ns.sign(document.id)
        _notify(collection, document_id)
    finally:
        shutil.rmtree(upload_dir)

    return jsonify({'status': 'ok', 'id': document_id}, status=201)
Example #4
0
def ingest_entity(collection, proxy, job_id=None, sync=False):
    """Send the given FtM entity proxy to the ingest-file service."""
    if proxy.schema.is_a(Document.SCHEMA_FOLDER):
        index_proxy(collection, proxy, sync=sync)
    log.debug("Ingest entity [%s]: %s", proxy.id, proxy.caption)
    stage = get_stage(collection, OP_INGEST, job_id=job_id)
    from aleph.logic.aggregator import get_aggregator_name
    context = {
        'languages': collection.languages,
        'balkhash_name': get_aggregator_name(collection),
        'next_stage': OP_INDEX,
        'sync': sync
    }
    stage.queue(proxy.to_dict(), context)
Example #5
0
 def setUp(self):
     super(MappingAPITest, self).setUp()
     self.col = self.create_collection(foreign_id="map1")
     aggregator = get_aggregator(self.col)
     aggregator.delete()
     _, self.headers = self.login(is_admin=True)
     self.rolex = self.create_user(foreign_id="user_3")
     _, self.headers_x = self.login(foreign_id="user_3")
     self.fixture = self.get_fixture_path("experts.csv")
     self.content_hash = archive.archive_file(self.fixture)
     data = {
         "id": "foo",
         "schema": "Table",
         "properties": {
             "csvHash": self.content_hash,
             "contentHash": self.content_hash,
             "mimeType": "text/csv",
             "fileName": "experts.csv",
             "name": "experts.csv",
         },
     }
     self.ent = EntityProxy.from_dict(model, data, cleaned=False)
     self.ent.id = self.col.ns.sign(self.ent.id)
     index_proxy(self.col, self.ent)
     data = {
         "id": "foo2",
         "schema": "Table",
         "properties": {
             "csvHash": self.content_hash,
             "contentHash": self.content_hash,
             "mimeType": "text/csv",
             "fileName": "experts.csv",
             "name": "experts.csv",
         },
     }
     self.ent2 = EntityProxy.from_dict(model, data, cleaned=False)
     self.ent2.id = self.col.ns.sign(self.ent2.id)
     index_proxy(self.col, self.ent2)
     data = {
         "id": "bar",
         "schema": "LegalEntity",
         "properties": {
             "name": "John Doe"
         },
     }
     ent = EntityProxy.from_dict(model, data, cleaned=False)
     ent.id = self.col.ns.sign(ent.id)
     index_proxy(self.col, ent)
Example #6
0
 def setUp(self):
     super(MappingAPITest, self).setUp()
     self.col = self.create_collection(data={'foreign_id': 'map1'})
     _, self.headers = self.login(is_admin=True)
     self.rolex = self.create_user(foreign_id='user_3')
     _, self.headers_x = self.login(foreign_id='user_3')
     self.fixture = self.get_fixture_path('experts.csv')
     self.content_hash = archive.archive_file(self.fixture)
     data = {
         'id': 'foo',
         'schema': 'Table',
         'properties': {
             'csvHash': self.content_hash,
             'contentHash': self.content_hash,
             'mimeType': 'text/csv',
             'fileName': 'experts.csv',
             'name': 'experts.csv'
         }
     }
     self.ent = EntityProxy.from_dict(model, data)
     self.ent.id = self.col.ns.sign(self.ent.id)
     index_proxy(self.col, self.ent)
     data = {
         'id': 'foo2',
         'schema': 'Table',
         'properties': {
             'csvHash': self.content_hash,
             'contentHash': self.content_hash,
             'mimeType': 'text/csv',
             'fileName': 'experts.csv',
             'name': 'experts.csv'
         }
     }
     self.ent2 = EntityProxy.from_dict(model, data)
     self.ent2.id = self.col.ns.sign(self.ent2.id)
     index_proxy(self.col, self.ent2)
     data = {
         'id': 'bar',
         'schema': 'LegalEntity',
         'properties': {
             'name': 'John Doe'
         }
     }
     ent = EntityProxy.from_dict(model, data)
     ent.id = self.col.ns.sign(ent.id)
     index_proxy(self.col, ent)
Example #7
0
def ingest_upload(collection_id):
    """
    ---
    post:
      summary: Upload a document to a collection
      description: Upload a document to a collection with id `collection_id`
      parameters:
      - in: path
        name: collection_id
        required: true
        schema:
          type: integer
      requestBody:
        content:
          multipart/form-data:
            schema:
              type: object
              properties:
                file:
                  type: string
                  format: binary
                  description: The document to upload
                meta:
                  $ref: '#/components/schemas/DocumentIngest'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                properties:
                  id:
                    description: id of the uploaded document
                    type: integer
                  status:
                    type: string
                type: object
      tags:
      - Ingest
      - Collection
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    job_id = get_session_id()
    sync = get_flag('sync', default=False)
    meta, foreign_id = _load_metadata()
    parent = _load_parent(collection, meta)
    upload_dir = ensure_path(mkdtemp(prefix='aleph.upload.'))
    try:
        content_hash = None
        for storage in request.files.values():
            path = safe_filename(storage.filename, default='upload')
            path = upload_dir.joinpath(path)
            storage.save(str(path))
            content_hash = archive.archive_file(path)
        document = Document.save(collection=collection,
                                 parent=parent,
                                 foreign_id=foreign_id,
                                 content_hash=content_hash,
                                 meta=meta,
                                 uploader_id=request.authz.id)
        collection.touch()
        db.session.commit()
        proxy = document.to_proxy()
        if proxy.schema.is_a(Document.SCHEMA_FOLDER) and sync:
            index_proxy(collection, proxy, sync=sync)
        ingest_entity(collection, proxy, job_id=job_id, sync=sync)
        document_id = collection.ns.sign(document.id)
        _notify(collection, document_id)
    finally:
        shutil.rmtree(upload_dir)

    return jsonify({'status': 'ok', 'id': document_id}, status=201)