Example #1
0
def generate_sitemap(collection_id):
    """Generate entries for a collection-based sitemap.xml file."""
    # cf. https://www.sitemaps.org/protocol.html
    query = {
        'query': {
            'bool': {
                'filter': [
                    {'term': {'collection_id': collection_id}},
                    {'term': {'schemata': Entity.THING}},
                    authz_query(Authz.from_role(None))
                ]
            }
        },
        '_source': {'includes': ['schemata', 'updated_at']}
    }
    scanner = scan(es, index=entities_index(), query=query)
    # strictly, the limit for sitemap.xml is 50,000
    for res in islice(scanner, 49500):
        source = res.get('_source', {})
        updated_at = source.get('updated_at', '').split('T', 1)[0]
        if Document.SCHEMA in source.get('schemata', []):
            url = document_url(res.get('_id'))
        else:
            url = entity_url(res.get('_id'))
        yield (url, updated_at)
Example #2
0
def _resp_canonical(resp, document_id):
    # EXPERIMENTAL HACK
    # the idea here is to tell search engines that they should not index
    # source documents, but instead go for the UI version of the site.
    link_header = '<%s>; rel="canonical"' % document_url(document_id)
    resp.headers['Link'] = link_header
    return resp
Example #3
0
def generate_sitemap(collection_id):
    """Generate entries for a collection-based sitemap.xml file."""
    # cf. https://www.sitemaps.org/protocol.html
    entities = iter_entities(authz=Authz.from_role(None),
                             collection_id=collection_id,
                             schemata=[Entity.THING],
                             includes=['schemata', 'updated_at'])
    # strictly, the limit for sitemap.xml is 50,000
    for entity in islice(entities, 49500):
        updated_at = entity.get('updated_at', '').split('T', 1)[0]
        if Document.SCHEMA in entity.get('schemata', []):
            url = document_url(entity.get('id'))
        else:
            url = entity_url(entity.get('id'))
        yield (url, updated_at)
Example #4
0
 def document_links(self, data, pk, schemata):
     links = {
         'self': url_for('documents_api.view', document_id=pk),
         'tags': url_for('entities_api.tags', id=pk),
         'ui': document_url(pk)
     }
     if data.get('content_hash'):
         links['file'] = url_for('documents_api.file',
                                 document_id=pk,
                                 _authorize=True)
     if schemata.intersection([Document.SCHEMA_PDF]):
         links['pdf'] = url_for('documents_api.pdf',
                                document_id=pk,
                                _authorize=True)
     if schemata.intersection([Document.SCHEMA_PDF, Document.SCHEMA_TABLE]):
         links['records'] = url_for('documents_api.records', document_id=pk)
     if schemata.intersection([Document.SCHEMA_FOLDER]):
         query = (('filter:parent.id', pk),)
         links['children'] = url_for('documents_api.index', _query=query)
     return links
Example #5
0
def generate_sitemap(collection_id):
    """Generate entries for a collection-based sitemap.xml file."""
    # cf. https://www.sitemaps.org/protocol.html
    document = model.get(Document.SCHEMA)
    entities = iter_entities(authz=Authz.from_role(None),
                             collection_id=collection_id,
                             schemata=[Entity.THING],
                             includes=['schema', 'updated_at'])
    # strictly, the limit for sitemap.xml is 50,000
    for entity in islice(entities, 49500):
        updated_at = entity.get('updated_at', '').split('T', 1)[0]
        updated_at = max(settings.SITEMAP_FLOOR, updated_at)
        schema = model.get(entity.get('schema'))
        if schema is None:
            continue
        if schema.is_a(document):
            url = document_url(entity.get('id'))
        else:
            url = entity_url(entity.get('id'))
        yield (url, updated_at)
Example #6
0
def resolve_id(object_id, clazz):
    """From an object ID and class type, generate a human-readable
    label and a link that can be rendered into the notification.
    """
    if clazz == Role:
        role = Role.by_id(object_id)
        return role.name, None
    elif clazz == Alert:
        alert = Alert.by_id(object_id)
        return alert.query, None
    elif clazz == Collection:
        collection = Collection.by_id(object_id)
        if collection is not None:
            return collection.label, collection_url(object_id)
    elif clazz in [Document, Entity]:
        entity = get_entity(object_id)
        if entity is not None:
            if Document.SCHEMA in entity.get('schemata'):
                title = entity.get('title', entity.get('file_name'))
                return title, document_url(object_id)
            else:
                return entity.get('name'), entity_url(object_id)
    return None, None
Example #7
0
def sitemap(id):
    """Generate entries for a collection-based sitemap.xml file."""
    # cf. https://www.sitemaps.org/protocol.html
    collection = get_db_collection(id, request.authz.READ)
    document = model.get(Document.SCHEMA)
    entries = []
    for entity in get_sitemap_entities(id):
        updated_at = entity.get('updated_at', '').split('T', 1)[0]
        updated_at = max(settings.SITEMAP_FLOOR, updated_at)
        schema = model.get(entity.get('schema'))
        if schema is None:
            continue
        if schema.is_a(document):
            url = document_url(entity.get('id'))
        else:
            url = entity_url(entity.get('id'))
        entries.append((url, updated_at))
    url = collection_url(collection_id=collection.id)
    updated_at = collection.updated_at.date().isoformat()
    return render_xml('sitemap.xml',
                      url=url,
                      updated_at=updated_at,
                      entries=entries)