Exemple #1
0
def cleanup_deleted():
    from aleph.model import Collection, Role
    from aleph.model import EntitySet, EntitySetItem

    EntitySetItem.cleanup_deleted()
    EntitySet.cleanup_deleted()
    Collection.cleanup_deleted()
    Role.cleanup_deleted()
    db.session.commit()
Exemple #2
0
def decide_xref(xref, judgement, authz):
    """Store user feedback from an Xref result as an profile-type EntitySet
    The problem here is that we're trying to translate a single pair-wise
    user judgement into a merge or split judgement regarding a cluster of
    entities.

    This works for most cases, with the exception that a profile, once
    established, cannot be split in a way that preserves what entities
    were linked to what other entities originally."""

    if not isinstance(judgement, Judgement):
        judgement = Judgement(judgement)

    entity_id = xref.get("entity_id")
    collection = Collection.by_id(xref.get("collection_id"))
    entity_profile = EntitySet.by_entity_id(
        entity_id,
        judgements=[Judgement.POSITIVE],
        collection_ids=[collection.id],
        types=[EntitySet.PROFILE],
    ).first()

    match_id = xref.get("match_id")
    match_collection_id = xref.get("match_collection_id")
    match_profile = EntitySet.by_entity_id(
        match_id,
        judgements=[Judgement.POSITIVE],
        collection_ids=[collection.id],
        types=[EntitySet.PROFILE],
    ).first()

    # If we are undecided, and we stay undecided, not much to change.
    if entity_profile is None or match_profile is None:
        if judgement == Judgement.NO_JUDGEMENT:
            return

    if entity_profile is None:
        entity_profile = create_profile(collection, authz)
        profile_add_entities(entity_profile, entity_id, collection.id, None,
                             Judgement.POSITIVE, authz)

    if judgement is Judgement.POSITIVE and match_profile is not None:
        # Case 1: both entities have profiles and the match is positive
        entity_profile = entity_profile.merge(match_profile, authz.id)
    else:
        # Case 2: any other judgement
        # NOTE: Another case of NEGATIVE judgements triggering a
        # `split_profile` could be useful, however it isn't implemented
        # here so that we don't lose judgements. This however should be
        # strongly considered in order to reverse profile mergers. The question
        # is: what to do with old judgements on a pair when we do this?
        profile_add_entities(entity_profile, match_id, match_collection_id,
                             entity_id, judgement, authz)
    db.session.commit()
    return entity_profile
Exemple #3
0
def create_entityset(collection, data, authz):
    """Create an entity set. This will create or update any entities
    that already exist in the entityset and sign their IDs into the collection.
    """
    old_to_new_id_map = {}
    entity_ids = []
    for entity in data.pop("entities", []):
        old_id = entity.get("id")
        new_id = upsert_entity(entity, collection, sync=True)
        old_to_new_id_map[old_id] = new_id
        entity_ids.append(new_id)
    layout = data.get("layout", {})
    data["layout"] = replace_layout_ids(layout, old_to_new_id_map)
    entityset = EntitySet.create(data, collection, authz)
    for entity_id in entity_ids:
        save_entityset_item(entityset, collection, entity_id)
    publish(
        Events.CREATE_ENTITYSET,
        params={
            "collection": collection,
            "entityset": entityset
        },
        channels=[collection, authz.role],
        actor_id=authz.id,
    )
    return entityset
 def test_bulk_entitysets_api(self):
     role, headers = self.login(is_admin=True)
     authz = Authz.from_role(role)
     data = {"type": EntitySet.LIST, "label": "Foo"}
     eset = EntitySet.create(data, self.col, authz)
     db.session.commit()
     eset_id = eset.id
     data = json.dumps([
         {
             "id": "4345800498380953840",
             "schema": "Person",
             "properties": {
                 "name": "Osama bin Laden"
             },
         },
         {
             "id": "7598743983789743598",
             "schema": "Person",
             "properties": {
                 "name": "Osama bin Laden"
             },
         },
     ])
     url = "/api/2/collections/%s/_bulk?entityset_id=%s" % (self.col.id,
                                                            eset_id)
     res = self.client.post(url, headers=headers, data=data)
     assert res.status_code == 204, res
     query = "/api/2/entitysets/%s/entities?filter:schema=Person" % eset_id
     res = self.client.get(query, headers=headers)
     assert res.json["total"] == 2, res.json
Exemple #5
0
def get_deep_collection(collection):
    mappings = Mapping.by_collection(collection.id).count()
    entitysets = EntitySet.type_counts(collection_id=collection.id)
    return {
        "statistics": index.get_collection_stats(collection.id),
        "counts": {"mappings": mappings, "entitysets": entitysets},
        "status": get_status(collection),
        "shallow": False,
    }
Exemple #6
0
def decide_pairwise(collection, entity, match_collection, match, judgement,
                    authz):
    """Store user feedback from an pairwise judgement as an profile-type EntitySet
    The problem here is that we're trying to translate a single pair-wise user
    judgement into a merge or split judgement regarding a cluster of entities.

    This works for most cases, with the exception that a profile, once
    established, cannot be split in a way that preserves what entities
    were linked to what other entities originally."""

    if not isinstance(judgement, Judgement):
        judgement = Judgement(judgement)

    # This will raise a InvalidData error if the two types are not compatible
    model.common_schema(entity.get("schema"), match.get("schema"))

    profile = EntitySet.by_entity_id(
        entity.get("id"),
        collection_ids=[collection.id],
        types=[EntitySet.PROFILE],
        judgements=[Judgement.POSITIVE],
    ).first()
    if profile is None:
        data = {"type": EntitySet.PROFILE, "label": "profile"}
        profile = EntitySet.create(data, collection, authz)
        item = save_entityset_item(
            profile,
            collection,
            entity.get("id"),
            judgement=Judgement.POSITIVE,
            added_by_id=authz.id,
        )
    item = save_entityset_item(
        profile,
        match_collection,
        match.get("id"),
        judgement=judgement,
        compared_to_entity_id=entity.get("id"),
        added_by_id=authz.id,
    )
    db.session.commit()

    if item is not None:
        return item.entityset
Exemple #7
0
def collection_profiles(collection_id, judgements=None, deleted=False):
    if judgements is not None:
        judgements = list(map(Judgement, judgements))
    entity_sets = EntitySet.by_collection_id(collection_id,
                                             types=[EntitySet.PROFILE])
    for entity_set in entity_sets:
        items = entity_set.profile(judgements=judgements,
                                   deleted=deleted).all()
        if items:
            yield (entity_set, items)
Exemple #8
0
def delete_collection(collection, keep_metadata=False, sync=False):
    cancel_queue(collection)
    aggregator = get_aggregator(collection)
    aggregator.drop()
    flush_notifications(collection, sync=sync)
    index.delete_entities(collection.id, sync=sync)
    xref_index.delete_xref(collection, sync=sync)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Mapping.delete_by_collection(collection.id)
    EntitySet.delete_by_collection(collection.id, deleted_at)
    Entity.delete_by_collection(collection.id)
    Document.delete_by_collection(collection.id)
    if not keep_metadata:
        Permission.delete_by_collection(collection.id)
        collection.delete(deleted_at=deleted_at)
    db.session.commit()
    if not keep_metadata:
        index.delete_collection(collection.id, sync=True)
        Authz.flush()
    refresh_collection(collection.id)
Exemple #9
0
def index():
    """Returns a list of entitysets for the role
    ---
    get:
      summary: List entitysets
      parameters:
      - description: The collection id.
        in: query
        name: 'filter:collection_id'
        required: true
        schema:
          minimum: 1
          type: integer
      - description: The type of the entity set
        in: query
        name: 'filter:type'
        required: false
        schema:
          type: string
      - description: Quert string for searches
        in: query
        name: 'prefix'
        required: false
        schema:
          type: string
      responses:
        '200':
          content:
            application/json:
              schema:
                type: object
                allOf:
                - $ref: '#/components/schemas/QueryResponse'
                properties:
                  results:
                    type: array
                    items:
                      $ref: '#/components/schemas/EntitySet'
          description: OK
      tags:
        - EntitySet
    """
    parser = QueryParser(request.args, request.authz)
    types = parser.filters.get("type")
    q = EntitySet.by_authz(request.authz, types=types, prefix=parser.prefix)
    q = q.order_by(EntitySet.updated_at.desc())
    collection_ids = ensure_list(parser.filters.get("collection_id"))
    if len(collection_ids):
        q = q.filter(EntitySet.collection_id.in_(collection_ids))
    result = DatabaseQueryResult(request, q, parser=parser)
    return EntitySetSerializer.jsonify_result(result)
Exemple #10
0
def dump_profiles(outfile, foreign_id=None):
    """Export profile entityset items for the given collection."""
    entitysets = EntitySet.by_type(EntitySet.PROFILE)
    if foreign_id is not None:
        collection = get_collection(foreign_id)
        entitysets = entitysets.filter(
            EntitySet.collection_id == collection.id)
    encoder = JSONEncoder(sort_keys=True)
    for entityset in entitysets:
        for item in entityset.items():
            data = item.to_dict(entityset=entityset)
            data["entity"] = get_expanded_entity(data.get("entity_id"))
            data["compared_to_entity"] = get_expanded_entity(
                data.get("compared_to_entity_id"))
            outfile.write(encoder.encode(data) + "\n")
Exemple #11
0
def get_deep_role(role):
    authz = Authz.from_role(role)
    alerts = Alert.by_role_id(role.id).count()
    exports = Export.by_role_id(role.id).count()
    casefiles = Collection.all_casefiles(authz=authz).count()
    entitysets = EntitySet.type_counts(authz=authz)
    return {
        "counts": {
            "alerts": alerts,
            "entitysets": entitysets,
            "casefiles": casefiles,
            "exports": exports,
        },
        "shallow": False,
    }
Exemple #12
0
def get_entitysets_by_entity(entity_id,
                             collection_ids=None,
                             judgements=None,
                             types=None,
                             labels=None):
    if judgements is not None:
        judgements = list(map(Judgement, judgements))
    entitysets = EntitySet.by_entity_id(
        entity_id,
        collection_ids=collection_ids,
        judgements=judgements,
        types=types,
        labels=labels,
    )
    return entitysets
Exemple #13
0
def profile_fragments(collection, aggregator, entity_id=None):
    """In order to make the profile_id visible on entities in a collection,
    we generate stub entities in the FtM store that contain only a context.
    """
    aggregator.delete(origin=ORIGIN)
    writer = aggregator.bulk()
    profile_id = None
    for (profile_id, entity_id) in EntitySet.all_profiles(collection.id,
                                                          entity_id=entity_id):
        data = {
            "id": entity_id,
            "schema": Entity.THING,
            "profile_id": profile_id
        }
        writer.put(model.get_proxy(data), origin=ORIGIN)
    writer.flush()
    return profile_id
Exemple #14
0
def _query_item(entity, entitysets=True):
    """Cross-reference an entity or document, given as an indexed document."""
    query = match_query(entity)
    if query == none_query():
        return

    log.debug("Candidate [%s]: %s", entity.schema.name, entity.caption)
    entityset_ids = EntitySet.entity_entitysets(entity.id) if entitysets else []
    query = {"query": query, "size": 50, "_source": ENTITY_SOURCE}
    index = entities_read_index(schema=list(entity.schema.matchable_schemata))
    result = es.search(index=index, body=query)
    for result in result.get("hits").get("hits"):
        result = unpack_result(result)
        if result is None:
            continue
        match = model.get_proxy(result)
        score = compare(model, entity, match)
        log.debug("Match: %s <[%.2f]> %s", entity.caption, score, match.caption)
        yield score, entity, result.get("collection_id"), match, entityset_ids
Exemple #15
0
    def setUp(self):
        super(ProfilesApiTestCase, self).setUp()
        self.rolex = self.create_user(foreign_id="rolex")
        self.col1 = self.create_collection()
        self.grant(self.col1, self.rolex, True, True)
        authz = Authz.from_role(self.rolex)
        self.profile = EntitySet.create(
            {
                "label": "x",
                "type": EntitySet.PROFILE
            }, self.col1, authz)
        ent1 = {
            "schema": "LegalEntity",
            "properties": {
                "name": "Donald Trump",
                "address": "721 Fifth Avenue, New York, NY",
                "phone": "+12024561414",
            },
        }
        self.ent1 = self.create_entity(ent1, self.col1)
        index_entity(self.ent1)
        EntitySetItem.save(self.profile,
                           self.ent1.id,
                           collection_id=self.col1.id)

        self.col2 = self.create_collection()
        self.grant_publish(self.col2)
        ent2 = {
            "schema": "Person",
            "properties": {
                "name": "Donald J. Trump",
                "position": "45th President of the US",
                "phone": "+12024561414",
            },
        }
        self.ent2 = self.create_entity(ent2, self.col2)
        index_entity(self.ent2)
        EntitySetItem.save(self.profile,
                           self.ent2.id,
                           collection_id=self.col2.id)

        ent_false = {
            "schema": "LegalEntity",
            "properties": {
                "name": "Donald Trump, Jr",
                "email": "*****@*****.**"
            },
        }
        self.ent_false = self.create_entity(ent_false, self.col2)
        index_entity(self.ent_false)
        EntitySetItem.save(
            self.profile,
            self.ent_false.id,
            collection_id=self.col2.id,
            judgement=Judgement.NEGATIVE,
        )

        self.col3 = self.create_collection()
        ent3 = {
            "schema": "LegalEntity",
            "properties": {
                "name": "Donald John Trump",
                "birthDate": "1964"
            },
        }
        self.ent3 = self.create_entity(ent3, self.col3)
        index_entity(self.ent3)
        EntitySetItem.save(self.profile,
                           self.ent3.id,
                           collection_id=self.col3.id)
        db.session.commit()
Exemple #16
0
def create_profile(collection, authz):
    data = {"type": EntitySet.PROFILE, "label": "profile"}
    return EntitySet.create(data, collection, authz)
Exemple #17
0
def get_entityset(entityset_id):
    return EntitySet.by_id(entityset_id)
Exemple #18
0
def get_entityset(entityset_id, action=Authz.READ):
    eset = obj_or_404(EntitySet.by_id(entityset_id))
    require(request.authz.can(eset.collection_id, action))
    return eset