Beispiel #1
0
def view(entity_id):
    """
    ---
    get:
      summary: Get an entity
      description: Return the entity with id `entity_id`
      parameters:
      - in: path
        name: entity_id
        required: true
        schema:
          type: string
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Entity'
      tags:
      - Entity
    """
    enable_cache()
    excludes = ["text", "numeric.*"]
    entity = get_index_entity(entity_id, request.authz.READ, excludes=excludes)
    tag_request(collection_id=entity.get("collection_id"))
    proxy = model.get_proxy(entity)
    html = proxy.first("bodyHtml", quiet=True)
    source_url = proxy.first("sourceUrl", quiet=True)
    encoding = proxy.first("encoding", quiet=True)
    entity["safeHtml"] = sanitize_html(html, source_url, encoding=encoding)
    entity["shallow"] = False
    return EntitySerializer.jsonify(entity)
Beispiel #2
0
def similar(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    tag_request(collection_id=entity.get('collection_id'))
    entity = model.get_proxy(entity)
    result = MatchQuery.handle(request, entity=entity)
    return EntitySerializer.jsonify_result(result)
Beispiel #3
0
def entities_index(entityset_id):
    """Search entities in the entity set with id `entityset_id`.
    ---
    get:
      summary: Search entities in the entity set with id `entityset_id`
      description: >
        Supports all query filters and arguments present in the normal
        entity search API, but all resulting entities will be members of
        the set.
      parameters:
      - description: The entityset id.
        in: path
        name: entityset_id
        required: true
        schema:
          type: string
        example: 3a0d91ece2dce88ad3259594c7b642485235a048
      responses:
        '200':
          description: Resturns a list of entities in result
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EntitiesResponse'
      tags:
      - EntitySet
    """
    entityset = get_entityset(entityset_id, request.authz.READ)
    parser = SearchQueryParser(request.args, request.authz)
    tag_request(query=parser.text, prefix=parser.prefix)
    result = EntitySetItemsQuery.handle(request,
                                        parser=parser,
                                        entityset=entityset)
    return EntitySerializer.jsonify_result(result)
Beispiel #4
0
def view(entity_id):
    """
    ---
    get:
      summary: Get an entity
      description: Return the entity with id `entity_id`
      parameters:
      - in: path
        name: entity_id
        required: true
        schema:
          type: string
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Entity'
      tags:
      - Entity
    """
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    tag_request(collection_id=entity.get('collection_id'))
    return EntitySerializer.jsonify(entity)
Beispiel #5
0
def similar(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    entity = model.get_proxy(entity)
    record_audit(Audit.ACT_ENTITY, id=entity_id)
    result = MatchQuery.handle(request, entity=entity)
    return EntitySerializer.jsonify_result(result)
Beispiel #6
0
def create():
    """
    ---
    post:
      summary: Create an entity in a collection
      description: >-
        Create an entity in a collection with a given schema and a set of given
        properties in the database. This is not the API you want to be using to
        load bulk data, but only for interactive entity manipulation in the UI.
        Always use the `bulk` API or for loading source datasets, no
        exceptions.
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EntityCreate'
      responses:
        '200':
          description: Resturns the created entity
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Entity'
      tags:
        - Entity
    """
    data = parse_request('EntityCreate')
    collection = get_nested_collection(data, request.authz.WRITE)
    data.pop('id', None)
    validate = get_flag('validate', default=False)
    entity_id = upsert_entity(data, collection, sync=True, validate=validate)
    tag_request(entity_id=entity_id, collection_id=str(collection.id))
    entity = get_index_entity(entity_id, request.authz.READ)
    return EntitySerializer.jsonify(entity)
Beispiel #7
0
def create():
    data = parse_request(EntityCreateSchema)
    collection = get_db_collection(data['collection_id'], request.authz.WRITE)
    entity_id = create_entity(data, collection, sync=True)
    tag_request(entity_id=entity_id, collection_id=str(collection.id))
    entity = get_index_entity(entity_id, request.authz.READ)
    return EntitySerializer.jsonify(entity)
Beispiel #8
0
def match():
    entity = parse_request(EntityUpdateSchema)
    entity = model.get_proxy(entity)
    tag_request(schema=entity.schema.name, caption=entity.caption)
    collection_ids = request.args.getlist('collection_ids')
    result = MatchQuery.handle(request, entity=entity,
                               collection_ids=collection_ids)
    return EntitySerializer.jsonify_result(result)
Beispiel #9
0
def match():
    entity = parse_request(EntityUpdateSchema)
    record_audit(Audit.ACT_MATCH, entity=entity)
    entity = model.get_proxy(entity)
    collection_ids = request.args.getlist('collection_ids')
    result = MatchQuery.handle(request, entity=entity,
                               collection_ids=collection_ids)
    return EntitySerializer.jsonify_result(result)
Beispiel #10
0
def similar(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    tag_request(collection_id=entity.get('collection_id'))
    entity = model.get_proxy(entity)
    record_audit(Audit.ACT_ENTITY, id=entity_id)
    result = MatchQuery.handle(request, entity=entity)
    return EntitySerializer.jsonify_result(result)
Beispiel #11
0
def match():
    entity = parse_request(EntityUpdateSchema)
    record_audit(Audit.ACT_MATCH, entity=entity)
    entity = model.get_proxy(entity)
    tag_request(schema=entity.schema.name, caption=entity.caption)
    collection_ids = request.args.getlist('collection_ids')
    result = MatchQuery.handle(request, entity=entity,
                               collection_ids=collection_ids)
    return EntitySerializer.jsonify_result(result)
Beispiel #12
0
def update(entity_id):
    entity = get_db_entity(entity_id, request.authz.WRITE)
    data = parse_request(EntityUpdateSchema)
    if get_flag('merge'):
        props = merge_data(data.get('properties'), entity.data)
        data['properties'] = props
    entity.update(data)
    db.session.commit()
    data = update_entity(entity, sync=get_flag('sync', True))
    return EntitySerializer.jsonify(data)
Beispiel #13
0
def update(entity_id):
    entity = get_db_entity(entity_id, request.authz.WRITE)
    tag_request(collection_id=entity.collection_id)
    data = parse_request(EntityUpdateSchema)
    if get_flag('merge'):
        props = merge_data(data.get('properties'), entity.data)
        data['properties'] = props
    entity.update(data)
    db.session.commit()
    data = update_entity(entity, sync=get_flag('sync', True))
    return EntitySerializer.jsonify(data)
Beispiel #14
0
def index():
    # enable_cache(vary_user=True)
    parser = SearchQueryParser(request.args, request.authz)
    result = EntitiesQuery.handle(request, parser=parser)
    links = {}
    if request.authz.logged_in and result.total <= EXPORT_MAX:
        query = list(request.args.items(multi=True))
        links['export'] = url_for('entities_api.export',
                                  format='excel',
                                  _authorize=True,
                                  _query=query)
    return EntitySerializer.jsonify_result(result, extra={'links': links})
Beispiel #15
0
def index():
    # enable_cache(vary_user=True)
    parser = SearchQueryParser(request.args, request.authz)
    tag_request(query=parser.text, prefix=parser.prefix)
    result = EntitiesQuery.handle(request, parser=parser)
    links = {}
    if request.authz.logged_in and result.total <= EXPORT_MAX:
        query = list(request.args.items(multi=True))
        links['export'] = url_for('entities_api.export',
                                  format='excel',
                                  _authorize=True,
                                  _query=query)
    return EntitySerializer.jsonify_result(result, extra={'links': links})
Beispiel #16
0
def create():
    """
    ---
    post:
      summary: Create an entity in a collection
      description: >-
        Create an entity in a collection with a given schema and a set of given
        properties in the database. This is not the API you want to be using to
        load bulk data, but only for interactive entity manipulation in the UI.
        Always use the `bulk` API or for loading source datasets, no
        exceptions.
      parameters:
      - in: query
        name: sign
        description: Sign entity IDs referenced in nested properties.
        required: false
        schema:
          type: boolean
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EntityCreate'
      responses:
        '200':
          description: Resturns the created entity
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Entity'
      tags:
        - Entity
    """
    data = parse_request("EntityCreate")
    collection = get_nested_collection(data, request.authz.WRITE)
    data.pop("id", None)
    if get_flag("validate", default=False):
        validate_entity(data)
    entity_id = upsert_entity(
        data,
        collection,
        authz=request.authz,
        sync=True,
        sign=get_flag("sign", default=False),
        job_id=get_session_id(),
    )
    db.session.commit()
    tag_request(entity_id=entity_id, collection_id=collection.id)
    entity = get_index_entity(entity_id, request.authz.READ)
    return EntitySerializer.jsonify(entity)
Beispiel #17
0
def merge(id, other_id):
    entity = get_db_entity(id, request.authz.WRITE)
    other = get_db_entity(other_id, request.authz.WRITE)

    try:
        entity.merge(other)
    except ValueError as ve:
        raise BadRequest(ve.message)

    db.session.commit()
    sync = get_flag('sync', True)
    data = update_entity(entity, sync=sync)
    update_entity(other, sync=sync)
    return EntitySerializer.jsonify(data)
Beispiel #18
0
def merge(entity_id, other_id):
    entity = get_db_entity(entity_id, request.authz.WRITE)
    other = get_db_entity(other_id, request.authz.WRITE)
    tag_request(collection_id=entity.collection_id)

    try:
        entity.merge(other)
    except ValueError as ve:
        raise BadRequest(ve.message)

    db.session.commit()
    sync = get_flag('sync', True)
    data = update_entity(entity, sync=sync)
    update_entity(other, sync=sync)
    return EntitySerializer.jsonify(data)
Beispiel #19
0
def index():
    # enable_cache(vary_user=True)
    parser = SearchQueryParser(request.args, request.authz)
    if parser.text:
        QueryLog.save(request.authz.id, request._session_id, parser.text)
        db.session.commit()
    tag_request(query=parser.text, prefix=parser.prefix)
    result = EntitiesQuery.handle(request, parser=parser)
    links = {}
    if request.authz.logged_in and result.total <= MAX_PAGE:
        query = list(request.args.items(multi=True))
        links['export'] = url_for('entities_api.export',
                                  _authorize=True,
                                  _query=query)
    return EntitySerializer.jsonify_result(result, extra={'links': links})
Beispiel #20
0
def update(entity_id):
    """
    ---
    post:
      summary: Update an entity
      description: >
        Update the entity with id `entity_id`. This only applies to
        entities which are backed by a database row, i.e. not any
        entities resulting from a mapping or bulk load.
      parameters:
      - in: path
        name: entity_id
        required: true
        schema:
          type: string
          format: entity_id
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EntityUpdate'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Entity'
      tags:
      - Entity
    """
    data = parse_request('EntityUpdate')
    try:
        entity = get_index_entity(entity_id, request.authz.WRITE)
        collection = get_db_collection(entity.get('collection_id'),
                                       request.authz.WRITE)
    except NotFound:
        collection = get_nested_collection(data, request.authz.WRITE)
    tag_request(collection_id=collection.id)
    data['id'] = entity_id
    sync = get_flag('sync', default=True)
    validate = get_flag('validate', default=False)
    entity_id = upsert_entity(data, collection, validate=validate, sync=sync)
    db.session.commit()
    entity = get_index_entity(entity_id, request.authz.READ)
    return EntitySerializer.jsonify(entity)
Beispiel #21
0
def similar(entity_id):
    """
    ---
    get:
      summary: Get similar entities
      description: >
        Get a list of similar entities to the entity with id `entity_id`
      parameters:
      - in: path
        name: entity_id
        required: true
        schema:
          type: string
      - in: query
        name: 'filter:schema'
        schema:
          items:
            type: string
          type: array
      - in: query
        name: 'filter:schemata'
        schema:
          items:
            type: string
          type: array
      responses:
        '200':
          description: Returns a list of entities
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EntitiesResponse'
      tags:
      - Entity
    """
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    tag_request(collection_id=entity.get('collection_id'))
    entity = model.get_proxy(entity)
    result = MatchQuery.handle(request, entity=entity)
    return EntitySerializer.jsonify_result(result)
Beispiel #22
0
def update(entity_id):
    """
    ---
    post:
      summary: Update an entity
      description: >
        Update the entity with id `entity_id`. This only applies to
        entities which are backed by a database row, i.e. not any
        entities resulting from a mapping or bulk load.
      parameters:
      - in: path
        name: entity_id
        required: true
        schema:
          type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EntityUpdate'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Entity'
      tags:
      - Entity
    """
    entity = get_db_entity(entity_id, request.authz.WRITE)
    tag_request(collection_id=entity.collection_id)
    data = parse_request('EntityUpdate')
    if get_flag('merge'):
        props = merge_data(data.get('properties'), entity.data)
        data['properties'] = props
    entity.update(data)
    db.session.commit()
    update_entity(entity, sync=get_flag('sync', True))
    entity = get_index_entity(entity_id, request.authz.READ)
    return EntitySerializer.jsonify(entity)
Beispiel #23
0
def match():
    """
    ---
    post:
      summary: Query for similar entities
      description: >-
        Query for similar entities matching a given entity inside a given list
        of collections.
      parameters:
      - in: query
        name: collection_ids
        schema:
          type: array
          items:
            type: string
      responses:
        '200':
          description: Returns a list of entities in result
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EntitiesResponse'
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EntityUpdate'
      tags:
      - Entity
    """
    entity = parse_request("EntityUpdate")
    entity = model.get_proxy(entity, cleaned=False)
    tag_request(schema=entity.schema.name, caption=entity.caption)
    collection_ids = request.args.getlist("collection_ids")
    result = MatchQuery.handle(request,
                               entity=entity,
                               collection_ids=collection_ids)
    return EntitySerializer.jsonify_result(result)
Beispiel #24
0
def view(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    record_audit(Audit.ACT_ENTITY, id=entity_id)
    tag_request(collection_id=entity.get('collection_id'))
    return EntitySerializer.jsonify(entity)
Beispiel #25
0
def create():
    data = parse_request(EntityCreateSchema)
    collection = get_db_collection(data['collection_id'], request.authz.WRITE)
    data = create_entity(data, collection, sync=get_flag('sync', True))
    tag_request(entity_id=data.get('id'), collection_id=str(collection.id))
    return EntitySerializer.jsonify(data)
Beispiel #26
0
def view(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    record_audit(Audit.ACT_ENTITY, id=entity_id)
    tag_request(collection_id=entity.get('collection_id'))
    return EntitySerializer.jsonify(entity)
Beispiel #27
0
def create():
    data = parse_request(EntityCreateSchema)
    collection = get_db_collection(data['collection_id'], request.authz.WRITE)
    data = create_entity(data, collection, sync=get_flag('sync', True))
    tag_request(entity_id=data.get('id'), collection_id=str(collection.id))
    return EntitySerializer.jsonify(data)
Beispiel #28
0
def index():
    """
    ---
    get:
      summary: Search entities
      description: >
        Returns a list of entities matching the given search criteria.

        A filter can be applied to show only results from a particular
        collection: `?filter:collection_id={collection_id}`.

        If you know you only want to search documents (unstructured, ingested
        data) or entities (structured data which may have been extracted from
        a dataset, or entered by a human) you can use these arguments with the
        `/documents` or `/entities` endpoints.
      parameters:
      - description: >-
          A query string in ElasticSearch query syntax. Can include field
          searches, such as `title:penguin`
        in: query
        name: q
        schema:
          type: string
      - description: >-
          Return facet values for the given metadata field, such as
          `languages`, `countries`, `mime_type` or `extension`. This can be
          specified multiple times for more than one facet to be added.
        in: query
        name: facet
        schema:
          type: string
      - description: >
          Filter the results by the given field. This is useful when used in
          conjunction with facet to create a drill-down mechanism. Useful
          fields are:

          - `collection_id`, documents belonging to a particular collection.

          - `title`, of the document.

          - `file_name`, of the source file.

          - `source_url`, URL of the source file.

          - `extension`, file extension of the source file.

          - `languages`, in the document.

          - `countries`, associated with the document.

          - `keywords`, from the document.

          - `emails`, email addresses mentioned in the document.

          - `domains`, websites mentioned in the document.

          - `phones`, mentioned in the document.

          - `dates`, in any of the following formats: yyyy-MM-dd, yyyy-MM,
          yyyy-MM-d, yyyy-M, yyyy

          - `mime_type`, of the source file.

          - `author`, according to the source file's metadata.

          - `summary`, of the document.

          - `text`, entire text extracted from the document.

          - `created_at`, when the document was added to aleph (yyyy-mm
          -ddThh:ii:ss.uuuuuu).

          - `updated_at`, when the document was modified in aleph (yyyy
          -mm-ddThh:ii:ss.uuuuuu).
        in: query
        name: 'filter:{field_name}'
        schema:
          type: string
      - description: 'The number of results to return, max. 10,000.'
        in: query
        name: limit
        schema:
          type: integer
      - description: >
            The number of results to skip at the beginning of the result set.
        in: query
        name: offset
        schema:
          type: integer
      responses:
        '200':
          description: Resturns a list of entities in result
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EntitiesResponse'
      tags:
      - Entity
    """
    # enable_cache(vary_user=True)
    parser = SearchQueryParser(request.values, request.authz)
    result = EntitiesQuery.handle(request, parser=parser)
    tag_request(query=result.query.to_text(), prefix=parser.prefix)
    links = {}
    if request.authz.logged_in and result.total <= MAX_PAGE:
        query = list(request.args.items(multi=True))
        links["export"] = url_for("entities_api.export", _query=query)
    return EntitySerializer.jsonify_result(result, extra={"links": links})
Beispiel #29
0
def index():
    # enable_cache(vary_user=True)
    parser = SearchQueryParser(request.args, request.authz)
    result = EntitiesQuery.handle(request, parser=parser)
    return EntitySerializer.jsonify_result(result)