Python entities_by_ids Exemples, aleph.index.entities.entities_by_ids Python Exemples

Exemple #1

0

Afficher le fichier

def _iter_match_batch(stub, sheet, batch):
    matchable = [s.name for s in model if s.matchable]
    entities = set()
    for match in batch:
        entities.add(match.get("entity_id"))
        entities.add(match.get("match_id"))
        resolver.queue(stub, Collection, match.get("match_collection_id"))

    resolver.resolve(stub)
    entities = entities_by_ids(list(entities), schemata=matchable)
    entities = {e.get("id"): e for e in entities}

    for obj in batch:
        entity = entities.get(str(obj.get("entity_id")))
        match = entities.get(str(obj.get("match_id")))
        collection_id = obj.get("match_collection_id")
        collection = resolver.get(stub, Collection, collection_id)
        if entity is None or match is None or collection is None:
            continue
        eproxy = model.get_proxy(entity)
        mproxy = model.get_proxy(match)
        sheet.append(
            [
                obj.get("score"),
                eproxy.caption,
                _format_date(eproxy),
                _format_country(eproxy),
                collection.get("label"),
                mproxy.caption,
                _format_date(mproxy),
                _format_country(mproxy),
                entity_url(eproxy.id),
                entity_url(mproxy.id),
            ]
        )

Exemple #2

0

Afficher le fichier

Fichier : entities.py Projet : wayne9qiu/aleph

def upsert_entity(data, collection, authz=None, sync=False):
    """Create or update an entity in the database. This has a side hustle
    of migrating entities created via the _bulk API or a mapper to a
    database entity in the event that it gets edited by the user.
    """
    entity = None
    entity_id = collection.ns.sign(data.get("id"))
    if entity_id is not None:
        entity = Entity.by_id(entity_id, collection=collection)
    if entity is None:
        role_id = authz.id if authz is not None else None
        entity = Entity.create(data, collection, role_id=role_id)
    else:
        entity.update(data, collection)

    # Inline name properties from adjacent entities. See the
    # docstring on `inline_names` for a more detailed discussion.
    proxy = entity.to_proxy()
    entity_ids = proxy.get_type_values(registry.entity)
    for rel in index.entities_by_ids(entity_ids):
        inline_names(proxy, model.get_proxy(rel))
    entity.data = proxy.properties
    db.session.add(entity)

    delete_aggregator_entity(collection, entity.id)
    index.index_proxy(collection, proxy, sync=sync)
    refresh_entity(collection, entity.id)
    return entity.id

Exemple #3

0

Afficher le fichier

Fichier : xref.py Projet : pudo/aleph

def _iter_match_batch(batch, authz):
    matchable = [s.name for s in model if s.matchable]
    entities = set()
    for match in batch:
        entities.add(match.entity_id)
        entities.add(match.match_id)

    entities = entities_by_ids(list(entities), schemata=matchable)
    entities = {e.get('id'): e for e in entities}
    for obj in batch:
        if not authz.can(obj.match_collection_id, authz.READ):
            continue
        entity = entities.get(str(obj.entity_id))
        match = entities.get(str(obj.match_id))
        collection = get_collection(obj.match_collection_id)
        if entity is None or match is None or collection is None:
            continue
        eproxy = model.get_proxy(entity)
        mproxy = model.get_proxy(match)
        yield (
            int(obj.score * 100),
            eproxy.caption,
            _format_date(eproxy),
            _format_country(eproxy),
            collection.get('label'),
            mproxy.caption,
            _format_date(mproxy),
            _format_country(mproxy),
            entity_url(eproxy.id),
            entity_url(mproxy.id),
        )

Exemple #4

0

Afficher le fichier

Fichier : resolver.py Projet : modulexcite/aleph

def resolve(stub):
    _instrument_stub(stub)
    cache_keys = {}
    schemata = {}
    for clazz, key, schema in stub._rx_queue:
        if (clazz, key) in stub._rx_cache:
            continue

        cid = cache.object_key(clazz, key)
        cache_keys[cid] = (clazz, key)
        schemata[cid] = schema

    keys = list(cache_keys.keys())
    queries = defaultdict(list)
    for cid, value in cache.get_many_complex(keys):
        clazz, key = cache_keys.get(cid)
        if value is None:
            log.info("MISS [%s]: %s", clazz.__name__, key)
            if clazz == Entity:
                queries[schemata.get(cid)].append(key)
            loader = LOADERS.get(clazz)
            if loader is not None:
                value = loader(key)
        stub._rx_cache[(clazz, key)] = value

    for schema, ids in queries.items():
        for entity in entities_by_ids(ids, schemata=schema, cached=True):
            stub._rx_cache[(Entity, entity.get('id'))] = entity

Exemple #5

0

Afficher le fichier

Fichier : resolver.py Projet : pudo/aleph

def resolve(stub):
    _instrument_stub(stub)
    cache_keys = {}
    schemata = {}
    for clazz, key, schema in stub._rx_queue:
        if (clazz, key) in stub._rx_cache:
            continue

        cid = cache.object_key(clazz, key)
        cache_keys[cid] = (clazz, key)
        schemata[cid] = schema

    keys = list(cache_keys.keys())
    queries = defaultdict(list)
    for cid, value in cache.get_many_complex(keys):
        clazz, key = cache_keys.get(cid)
        if value is None:
            # log.info("MISS [%s]: %s", clazz.__name__, key)
            if clazz == Entity:
                queries[schemata.get(cid)].append(key)
            loader = LOADERS.get(clazz)
            if loader is not None:
                value = loader(key)
        stub._rx_cache[(clazz, key)] = value

    for schema, ids in queries.items():
        for entity in entities_by_ids(ids, schemata=schema, cached=True):
            stub._rx_cache[(Entity, entity.get('id'))] = entity

Exemple #6

0

Afficher le fichier

def xref_item(stage, collection, entity_id=None, against_collection_ids=None):
    "Cross-reference an entity against others to generate potential matches."
    entity_ids = [entity_id]
    # This is running as a background job. In order to avoid running each
    # entity one by one, we do it 101 at a time. This avoids sending redudant
    # queries to the database and elasticsearch, making cross-ref much faster.
    for task in stage.get_tasks(limit=100):
        entity_ids.append(task.payload.get('entity_id'))
    stage.mark_done(len(entity_ids) - 1)
    # log.debug("Have %d entity IDs for xref", len(entity_ids))
    for data in entities_by_ids(entity_ids, includes=['schema', 'properties']):
        proxy = model.get_proxy(data)
        # log.info("XRef: %r", proxy)
        dq = db.session.query(Match)
        dq = dq.filter(Match.entity_id == proxy.id)
        dq.delete()
        matches = xref_query_item(proxy, collection_ids=against_collection_ids)
        for (score, other_id, other) in matches:
            log.info("Xref [%.3f]: %s <=> %s", score, proxy, other)
            obj = Match()
            obj.entity_id = proxy.id
            obj.collection_id = collection.id
            obj.match_id = other.id
            obj.match_collection_id = other_id
            obj.score = score
            db.session.add(obj)
    db.session.commit()

Exemple #7

0

Afficher le fichier

def _iter_match_batch(batch, authz):
    matchable = [s.name for s in model if s.matchable]
    entities = set()
    for match in batch:
        entities.add(match.entity_id)
        entities.add(match.match_id)

    entities = entities_by_ids(list(entities), schemata=matchable)
    entities = {e.get('id'): e for e in entities}
    for obj in batch:
        if not authz.can(obj.match_collection_id, authz.READ):
            continue
        entity = entities.get(str(obj.entity_id))
        match = entities.get(str(obj.match_id))
        collection = get_collection(obj.match_collection_id)
        if entity is None or match is None or collection is None:
            continue
        eproxy = model.get_proxy(entity)
        mproxy = model.get_proxy(match)
        yield (
            int(obj.score * 100),
            eproxy.caption,
            _format_date(eproxy),
            _format_country(eproxy),
            collection.get('label'),
            mproxy.caption,
            _format_date(mproxy),
            _format_country(mproxy),
            entity_url(eproxy.id),
            entity_url(mproxy.id),
        )

Exemple #8

0

Afficher le fichier

Fichier : xref.py Projet : nt0z/aleph

def _iter_match_batch(batch, authz):
    entities = set()
    collections = set()
    for match in batch:
        entities.add(match.entity_id)
        entities.add(match.match_id)
        collections.add(match.match_collection_id)

    collections = Collection.all_by_ids(collections, authz=authz)
    collections = {c.id: c.label for c in collections}
    entities = entities_by_ids(list(entities), authz=authz)
    entities = {e.get('id'): e for e in entities}
    for obj in batch:
        entity = entities.get(str(obj.entity_id))
        match = entities.get(str(obj.match_id))
        collection = collections.get(obj.match_collection_id)
        if entity is None or match is None or collection is None:
            continue
        eproxy = model.get_proxy(entity)
        mproxy = model.get_proxy(match)
        yield (
            int(obj.score * 100),
            eproxy.caption,
            _format_date(eproxy),
            _format_country(eproxy),
            collection,
            mproxy.caption,
            _format_date(mproxy),
            _format_country(mproxy),
            entity_url(eproxy.id),
            entity_url(mproxy.id),
        )

Exemple #9

0

Afficher le fichier

Fichier : entities.py Projet : catskillmarina/aleph

def update_entity(collection, entity_id=None):
    """Update xref and aggregator after an entity has been edited."""
    from aleph.logic.xref import xref_entity
    from aleph.logic.profiles import profile_fragments

    log.info("[%s] Update entity: %s", collection, entity_id)
    entity = index.get_entity(entity_id)
    proxy = model.get_proxy(entity)
    if collection.casefile:
        xref_entity(collection, proxy)

    aggregator = get_aggregator(collection, origin=MODEL_ORIGIN)
    profile_fragments(collection, aggregator, entity_id=entity_id)

    # Inline name properties from adjacent entities. See the
    # docstring on `inline_names` for a more detailed discussion.
    prop = proxy.schema.get("namesMentioned")
    if prop is not None:
        entity_ids = proxy.get_type_values(registry.entity)
        names = set()
        for related in index.entities_by_ids(entity_ids):
            related = model.get_proxy(related)
            names.update(related.get_type_values(registry.name))

        if len(names) > 0:
            name_proxy = model.make_entity(proxy.schema)
            name_proxy.id = proxy.id
            name_proxy.add(prop, names)
            aggregator.put(name_proxy, fragment="names")

    index_aggregator(collection, aggregator, entity_ids=[entity_id])
    refresh_entity(collection, proxy.id)

Exemple #10

0

Afficher le fichier

Fichier : expand.py Projet : nt0z/aleph

 def _resolve_entities(self, cache):
     entities = set()
     for (type_, id_) in cache.keys():
         if type_ == Entity:
             entities.add(id_)
     if not len(entities):
         return
     for entity in entities_by_ids(list(entities)):
         cache[(Entity, entity.get('id'))] = entity

Exemple #11

0

Afficher le fichier

Fichier : entities_api.py Projet : ponycalypsenow/aleph

def content(entity_id):
    """
    ---
    get:
      summary: Get the content of an entity
      description: >
        Return the text and/or html content of the entity with id `entity_id`
      parameters:
      - in: path
        name: entity_id
        required: true
        schema:
          type: string
      responses:
        '200':
          content:
            application/json:
              schema:
                properties:
                  headers:
                    type: object
                  html:
                    type: string
                  text:
                    type: string
                type: object
          description: OK
        '404':
          description: Not Found
      tags:
      - Entity
    """
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    tag_request(collection_id=entity.get('collection_id'))
    for entity in entities_by_ids([entity_id],
                                  schemata=entity.get('schema'),
                                  excludes=['text']):
        proxy = model.get_proxy(entity)
        html = proxy.first('bodyHtml', quiet=True)
        source_url = proxy.first('sourceUrl', quiet=True)
        encoding = proxy.first('encoding', quiet=True)
        html = sanitize_html(html, source_url, encoding=encoding)
        headers = proxy.first('headers', quiet=True)
        headers = registry.json.unpack(headers)
        return jsonify({
            'headers': headers,
            'text': proxy.first('bodyText', quiet=True),
            'html': html
        })
    return ('', 404)

Exemple #12

0

Afficher le fichier

Fichier : entities_api.py Projet : seekersapp2013/aleph

def content(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    tag_request(collection_id=entity.get('collection_id'))
    for entity in entities_by_ids([entity_id],
                                  schemata=entity.get('schema'),
                                  excludes=['text']):
        proxy = model.get_proxy(entity)
        html = sanitize_html(proxy.first('bodyHtml', quiet=True),
                             proxy.first('sourceUrl', quiet=True))
        headers = proxy.first('headers', quiet=True)
        headers = registry.json.unpack(headers)
        return jsonify({
            'headers': headers,
            'text': proxy.first('bodyText', quiet=True),
            'html': html
        })
    return ('', 404)

Exemple #13

0

Afficher le fichier

Fichier : entities_api.py Projet : pudo/aleph

def content(entity_id):
    enable_cache()
    entity = get_index_entity(entity_id, request.authz.READ)
    tag_request(collection_id=entity.get('collection_id'))
    for entity in entities_by_ids([entity_id],
                                  schemata=entity.get('schema'),
                                  excludes=['text']):
        proxy = model.get_proxy(entity)
        record_audit(Audit.ACT_ENTITY, id=entity_id)
        html = sanitize_html(proxy.first('bodyHtml', quiet=True),
                             proxy.first('sourceUrl', quiet=True))
        headers = proxy.first('headers', quiet=True)
        headers = registry.json.unpack(headers)
        return jsonify({
            'headers': headers,
            'text': proxy.first('bodyText', quiet=True),
            'html': html
        })
    return ('', 404)

Exemple #14

0

Afficher le fichier

def render_diagram(entityset):
    """Generate an HTML snippet from a diagram object."""
    entity_ids = entityset.entities
    entities = []
    for entity in entities_by_ids(entity_ids, cached=True):
        for field in list(entity.keys()):
            if field not in FIELDS:
                entity.pop(field)
        entities.append(entity)

    # TODO: add viewport
    return render_template(
        "diagram.html",
        data={
            "entities": entities,
            "layout": entityset.layout,
            "viewport": {"center": {"x": 0, "y": 0}},
        },
        entityset=entityset,
        settings=settings,
    )

Exemple #15

0

Afficher le fichier

Fichier : entities.py Projet : moreymat/aleph

def inline_names(aggregator, proxy):
    """Attempt to solve a weird UI problem. Imagine, for example, we
    are showing a list of payments between a sender and a beneficiary to
    a user. They may now conduct a search for a term present in the sender
    or recipient name, but there will be no result, because the name is
    only indexed with the parties, but not in the payment. This is part of
    a partial work-around to that.

    This is really bad in theory, but really useful in practice. Shoot me.
    """
    prop = proxy.schema.get("namesMentioned")
    if prop is None:
        return
    entity_ids = proxy.get_type_values(registry.entity)
    names = set()
    for related in index.entities_by_ids(entity_ids):
        related = model.get_proxy(related)
        names.update(related.get_type_values(registry.name))

    if len(names) > 0:
        name_proxy = model.make_entity(proxy.schema)
        name_proxy.id = proxy.id
        name_proxy.add(prop, names)
        aggregator.put(name_proxy, fragment="names")

Exemple #16

0

Afficher le fichier

def _query_matches(collection, entity_ids):
    """Generate matches for indexing."""
    for data in entities_by_ids(entity_ids):
        entity = model.get_proxy(data)
        yield from _query_item(collection, entity)