Ejemplo n.º 1
0
def map_to_aggregator(collection, mapping, aggregator):
    table = get_entity(mapping.table_id)
    if table is None:
        table = aggregator.get(mapping.table_id)
    if table is None:
        raise RuntimeError("Table cannot be found: %s" % mapping.table_id)
    config = {"csv_url": _get_table_csv_link(table), "entities": mapping.query}
    mapper = model.make_mapping(config, key_prefix=collection.foreign_id)
    origin = mapping_origin(mapping.id)
    aggregator.delete(origin=origin)
    writer = aggregator.bulk()
    idx = 0
    for idx, record in enumerate(mapper.source.records, 1):
        if idx > 0 and idx % 1000 == 0:
            log.info("[%s] Mapped %s rows ...", mapping.id, idx)
        for entity in mapper.map(record).values():
            entity.context = mapping.get_proxy_context()
            if entity.schema.is_a("Thing"):
                entity.add("proof", mapping.table_id)
            entity = collection.ns.apply(entity)
            entity = remove_checksums(entity)
            writer.put(entity, fragment=idx, origin=origin)
            if mapping.entityset is not None:
                save_entityset_item(
                    mapping.entityset,
                    collection,
                    entity.id,
                    added_by_id=mapping.role_id,
                )
    writer.flush()
    log.info("[%s] Mapping done (%s rows)", mapping.id, idx)
Ejemplo n.º 2
0
def item_update(entityset_id):
    """Add an item to the entity set with id `entityset_id`, or change
    the items judgement.

    To delete an item from the entity set, apply the judgement: `no_judgement`.
    ---
    post:
      summary: Add item to an entityset
      parameters:
      - description: The entityset id.
        in: path
        name: entityset_id
        required: true
        schema:
          type: string
        example: 3a0d91ece2dce88ad3259594c7b642485235a048
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EntitySetItemUpdate'
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EntitySetItem'
          description: OK
        '204':
          description: Item removed
      tags:
      - EntitySetItem
    """
    entityset = get_entityset(entityset_id, request.authz.WRITE)
    data = parse_request("EntitySetItemUpdate")
    entity = data.pop("entity", {})
    entity_id = data.pop("entity_id", entity.get("id"))
    entity = get_index_entity(entity_id, request.authz.READ)
    collection = get_db_collection(entity["collection_id"])
    data["added_by_id"] = request.authz.id
    data.pop("collection", None)
    item = save_entityset_item(entityset, collection, entity_id, **data)
    db.session.commit()
    job_id = get_session_id()
    queue_task(collection, OP_UPDATE_ENTITY, job_id=job_id, entity_id=entity_id)
    if item is not None:
        # The entityset is needed to check if the item is writeable in the serializer:
        item = item.to_dict(entityset=entityset)
    else:
        item = {
            "id": "$".join((entityset_id, entity_id)),
            "entityset_id": entityset_id,
            "entityset_collection_id": entityset.collection_id,
            "entity_id": entity_id,
            "collection_id": entity["collection_id"],
            "judgement": Judgement.NO_JUDGEMENT,
        }
    return EntitySetItemSerializer.jsonify(item)
Ejemplo n.º 3
0
def decide_pairwise(collection, entity, match_collection, match, judgement,
                    authz):
    """Store user feedback from an pairwise judgement as an profile-type EntitySet
    The problem here is that we're trying to translate a single pair-wise user
    judgement into a merge or split judgement regarding a cluster of entities.

    This works for most cases, with the exception that a profile, once
    established, cannot be split in a way that preserves what entities
    were linked to what other entities originally."""

    if not isinstance(judgement, Judgement):
        judgement = Judgement(judgement)

    # This will raise a InvalidData error if the two types are not compatible
    model.common_schema(entity.get("schema"), match.get("schema"))

    profile = EntitySet.by_entity_id(
        entity.get("id"),
        collection_ids=[collection.id],
        types=[EntitySet.PROFILE],
        judgements=[Judgement.POSITIVE],
    ).first()
    if profile is None:
        data = {"type": EntitySet.PROFILE, "label": "profile"}
        profile = EntitySet.create(data, collection, authz)
        item = save_entityset_item(
            profile,
            collection,
            entity.get("id"),
            judgement=Judgement.POSITIVE,
            added_by_id=authz.id,
        )
    item = save_entityset_item(
        profile,
        match_collection,
        match.get("id"),
        judgement=judgement,
        compared_to_entity_id=entity.get("id"),
        added_by_id=authz.id,
    )
    db.session.commit()

    if item is not None:
        return item.entityset
Ejemplo n.º 4
0
 def test_profile_similar(self):
     url = "/api/2/profiles/%s/similar" % self.profile.id
     res = self.client.get(url)
     assert res.status_code == 403, res.json
     _, headers = self.login(foreign_id="rolex")
     res = self.client.get(url, headers=headers)
     assert res.status_code == 200, res.json
     assert res.json["total"] == 0, res.json
     save_entityset_item(
         self.profile,
         self.col3,
         self.ent3.id,
         judgement=Judgement.NO_JUDGEMENT,
     )
     db.session.commit()
     self.grant_publish(self.col3)
     res = self.client.get(url, headers=headers)
     assert res.status_code == 200, res.json
     assert res.json["total"] == 1, res.json
Ejemplo n.º 5
0
def bulk(collection_id):
    """
    ---
    post:
      summary: Load entities into a collection
      description: >
        Bulk load entities into the collection with id `collection_id`
      parameters:
      - description: The collection ID.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
      - description: >-
          This will disable checksum security measures in order to allow bulk
          loading of document data.
        in: query
        name: unsafe
        schema:
          type: boolean
      requestBody:
        description: Entities to be loaded.
        content:
          application/json:
            schema:
              type: array
              items:
                $ref: '#/components/schemas/EntityUpdate'
      responses:
        '204':
          description: No Content
      tags:
      - Collection
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    require(request.authz.can_bulk_import())
    job_id = get_session_id()
    entityset = request.args.get("entityset_id")
    if entityset is not None:
        entityset = get_entityset(entityset, request.authz.WRITE)

    # This will disable checksum security measures in order to allow bulk
    # loading of document data:
    safe = get_flag("safe", default=True)
    # Flag is only available for admins:
    if not request.authz.is_admin:
        safe = True

    # Let UI tools change the entities created by this:
    mutable = get_flag("mutable", default=False)
    entities = ensure_list(request.get_json(force=True))
    entity_ids = list()
    for entity_id in bulk_write(
        collection, entities, safe=safe, mutable=mutable, role_id=request.authz.id
    ):
        entity_ids.append(entity_id)
        if entityset is not None:
            save_entityset_item(
                entityset,
                collection,
                entity_id,
                added_by_id=request.authz.id,
            )
    collection.touch()
    db.session.commit()
    queue_task(collection, OP_INDEX, job_id=job_id, entity_ids=entity_ids)
    return ("", 204)
Ejemplo n.º 6
0
def entities_update(entityset_id):
    """
    ---
    post:
      summary: Update an entity and add it to the entity set.
      description: >
        Update the entity with id `entity_id`. If it does not exist it will be
        created. If the user cannot edit the given entity, it is merely added
        to the entity set. New entities are always created in the collection of
        the entity set.

        Aside from these idiosyncracies, this is the same as `/api/2/entities/<id>`,
        but handles entity set membership transparently.
      parameters:
      - description: The entityset id.
        in: path
        name: entityset_id
        required: true
        schema:
          type: string
        example: 3a0d91ece2dce88ad3259594c7b642485235a048
      - in: query
        name: sign
        description: Sign entity IDs referenced in nested properties.
        required: false
        schema:
          type: boolean
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EntityUpdate'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Entity'
      tags:
      - Entity
    """
    entityset = get_entityset(entityset_id, request.authz.WRITE)
    data = parse_request("EntityUpdate")
    entity_id = data.get("id", make_textid())
    try:
        entity = get_index_entity(entity_id, request.authz.READ)
        collection = get_db_collection(entity.get("collection_id"),
                                       request.authz.READ)
    except NotFound:
        entity = None
        collection = entityset.collection
    tag_request(collection_id=entityset.collection_id)
    if entity is None or check_write_entity(entity, request.authz):
        if get_flag("validate", default=False):
            validate_entity(data)
        entity_id = upsert_entity(
            data,
            collection,
            authz=request.authz,
            sync=get_flag("sync", default=True),
            sign=get_flag("sign", default=False),
            job_id=get_session_id(),
        )

    save_entityset_item(
        entityset,
        collection,
        entity_id,
        added_by_id=request.authz.id,
    )
    db.session.commit()
    return entity_view(entity_id)