コード例 #1
0
def xref_collection(stage, collection):
    """Cross-reference all the entities and documents in a collection."""
    index.delete_xref(collection)
    matchable = [s.name for s in model if s.matchable]
    entities = iter_entities(collection_id=collection.id, schemata=matchable)
    for entity in entities:
        queue_task(collection, OP_XREF_ITEM, job_id=stage.job.id,
                   payload={'entity_id': entity.get('id')})
コード例 #2
0
def xref_collection(collection_id, other_id=None):
    """Cross-reference all the entities and documents in a collection."""
    matchable = [s.name for s in model if s.matchable]
    entities = iter_entities(collection_id=collection_id,
                             schemata=matchable,
                             excludes=['text', 'roles', 'properties.*'])
    for entity in entities:
        _xref_item(entity, collection_id=other_id)
コード例 #3
0
def xref_collection(stage, collection, against_collection_ids=None):
    """Cross-reference all the entities and documents in a collection."""
    matchable = [s.name for s in model if s.matchable]
    entities = iter_entities(collection_id=collection.id, schemata=matchable)
    for entity in entities:
        payload = {
            'entity_id': entity.get('id'),
            'against_collection_ids': against_collection_ids
        }
        queue_task(collection,
                   OP_XREF_ITEM,
                   job_id=stage.job.id,
                   payload=payload)
コード例 #4
0
def entities(collection_id=None):
    require(request.authz.can_export())
    schemata = ensure_list(request.args.getlist('schema'))
    excludes = ['text', 'roles', 'fingerprints']
    includes = ensure_list(request.args.getlist('include'))
    includes = [f for f in includes if f not in excludes]
    if collection_id is not None:
        get_db_collection(id, request.authz.READ)
        record_audit(Audit.ACT_COLLECTION, id=id)
    entities = iter_entities(authz=request.authz,
                             collection_id=collection_id,
                             schemata=schemata,
                             excludes=excludes,
                             includes=includes)
    return stream_ijson(entities)
コード例 #5
0
ファイル: collections.py プロジェクト: nt0z/aleph
def generate_sitemap(collection_id):
    """Generate entries for a collection-based sitemap.xml file."""
    # cf. https://www.sitemaps.org/protocol.html
    entities = iter_entities(authz=Authz.from_role(None),
                             collection_id=collection_id,
                             schemata=[Entity.THING],
                             includes=['schemata', 'updated_at'])
    # strictly, the limit for sitemap.xml is 50,000
    for entity in islice(entities, 49500):
        updated_at = entity.get('updated_at', '').split('T', 1)[0]
        if Document.SCHEMA in entity.get('schemata', []):
            url = document_url(entity.get('id'))
        else:
            url = entity_url(entity.get('id'))
        yield (url, updated_at)
コード例 #6
0
ファイル: triples.py プロジェクト: arezola/aleph
def export_collection(collection):
    uri = URIRef(ui_url('collections', collection.id))
    g = Graph()
    g.add((uri, RDF.type, DCMI.Collection))
    g.add((uri, RDFS.label, Literal(collection.label)))
    g.add((uri, DCMI.identifier, Literal(collection.foreign_id)))
    g.add((uri, ALEPH.category, ALEPH[collection.category]))

    for line in itergraph(g):
        yield line

    entities = iter_entities(collection_id=collection.id, excludes=['text'])
    for entity in entities:
        g = export_entity(entity, uri)
        for line in itergraph(g):
            yield line
コード例 #7
0
ファイル: stream_api.py プロジェクト: pudo/aleph
def entities(collection_id=None):
    require(request.authz.can_stream())
    log.debug("Stream entities [%r] begins... (coll: %s)",
              request.authz, collection_id)
    schemata = ensure_list(request.args.getlist('schema'))
    excludes = ['text', 'roles', 'fingerprints']
    includes = ensure_list(request.args.getlist('include'))
    includes = [f for f in includes if f not in excludes]
    if collection_id is not None:
        get_db_collection(collection_id, request.authz.READ)
        record_audit(Audit.ACT_COLLECTION, id=collection_id)
    entities = iter_entities(authz=request.authz,
                             collection_id=collection_id,
                             schemata=schemata,
                             excludes=excludes,
                             includes=includes)
    return stream_ijson(entities)
コード例 #8
0
def entities(collection_id=None):
    """
    ---
    get:
      summary: Stream collection entities.
      description: >
        Stream a JSON form of each entity in the given collection, or
        throughout the entire database.
      parameters:
      - description: The collection ID.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
      responses:
        '200':
          description: OK
          content:
            application/x-ndjson:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/Entity'
      tags:
      - Entity
    """
    log.debug("Stream entities [%r] begins... (coll: %s)", request.authz,
              collection_id)
    schemata = ensure_list(request.args.getlist("schema"))
    includes = ensure_list(request.args.getlist("include"))
    includes = includes or PROXY_INCLUDES
    if collection_id is not None:
        get_db_collection(collection_id, request.authz.WRITE)
    else:
        require(request.authz.is_admin)
    entities = iter_entities(
        authz=request.authz,
        collection_id=collection_id,
        schemata=schemata,
        includes=includes,
    )
    return stream_ijson(entities)
コード例 #9
0
def entities(collection_id=None):
    """
    ---
    get:
      summary: Stream collection entities.
      description: >
        Stream a JSON form of each entity in the given collection, or
        throughout the entire database.
      parameters:
      - description: The collection ID.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
      responses:
        '200':
          description: OK
          content:
            application/x-ndjson:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/Entity'
      tags:
      - Entity
    """
    require(request.authz.can_stream())
    log.debug("Stream entities [%r] begins... (coll: %s)", request.authz,
              collection_id)
    schemata = ensure_list(request.args.getlist('schema'))
    excludes = ['text', 'fingerprints']
    includes = ensure_list(request.args.getlist('include'))
    includes = [f for f in includes if f not in excludes]
    if collection_id is not None:
        get_db_collection(collection_id, request.authz.READ)
    entities = iter_entities(authz=request.authz,
                             collection_id=collection_id,
                             schemata=schemata,
                             excludes=excludes,
                             includes=includes)
    return stream_ijson(entities)
コード例 #10
0
ファイル: collections.py プロジェクト: jbaehne/aleph
def generate_sitemap(collection_id):
    """Generate entries for a collection-based sitemap.xml file."""
    # cf. https://www.sitemaps.org/protocol.html
    document = model.get(Document.SCHEMA)
    entities = iter_entities(authz=Authz.from_role(None),
                             collection_id=collection_id,
                             schemata=[Entity.THING],
                             includes=['schema', 'updated_at'])
    # strictly, the limit for sitemap.xml is 50,000
    for entity in islice(entities, 49500):
        updated_at = entity.get('updated_at', '').split('T', 1)[0]
        updated_at = max(settings.SITEMAP_FLOOR, updated_at)
        schema = model.get(entity.get('schema'))
        if schema is None:
            continue
        if schema.is_a(document):
            url = document_url(entity.get('id'))
        else:
            url = entity_url(entity.get('id'))
        yield (url, updated_at)