def statistics(): """Get a summary of the data acessible to the current user.""" enable_cache() collections = request.authz.collections(request.authz.READ) for collection_id in collections: resolver.queue(request, Collection, collection_id) resolver.resolve(request) # Summarise stats. This is meant for display, so the counting is a bit # inconsistent between counting all collections, and source collections # only. schemata = defaultdict(int) countries = defaultdict(int) categories = defaultdict(int) for collection_id in collections: data = resolver.get(request, Collection, collection_id) if data is None or data.get('casefile'): continue categories[data.get('category')] += 1 for schema, count in data.get('schemata', {}).items(): schemata[schema] += count for country in data.get('countries', []): countries[country] += 1 return jsonify({ 'collections': len(collections), 'schemata': dict(schemata), 'countries': dict(countries), 'categories': dict(categories), 'things': sum(schemata.values()), })
def tags(entity_id): """ --- get: summary: Get entity tags description: >- Get tags for the entity with id `entity_id`. parameters: - in: path name: entity_id required: true schema: type: string responses: '200': description: OK content: application/json: schema: type: object allOf: - $ref: '#/components/schemas/QueryResponse' properties: results: type: array items: $ref: '#/components/schemas/EntityTag' tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get("collection_id")) results = entity_tags(model.get_proxy(entity), request.authz) return jsonify({"status": "ok", "total": len(results), "results": results})
def sitemap(): """ --- get: summary: Get a sitemap description: >- Returns a site map for search engine robots. This lists each published collection on the current instance. responses: '200': description: OK content: text/xml: schema: type: object tags: - System """ enable_cache(vary_user=False) request.rate_limit = None collections = [] for collection in Collection.all_authz(Authz.from_role(None)): updated_at = collection.updated_at.date().isoformat() updated_at = max(settings.SITEMAP_FLOOR, updated_at) collections.append({ 'url': collection_url(collection.id), 'updated_at': updated_at }) return render_xml('sitemap.xml', collections=collections)
def view(entity_id): """ --- get: summary: Get an entity description: Return the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) return EntitySerializer.jsonify(entity)
def view(entity_id): """ --- get: summary: Get an entity description: Return the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Entity' tags: - Entity """ enable_cache() excludes = ["text", "numeric.*"] entity = get_index_entity(entity_id, request.authz.READ, excludes=excludes) tag_request(collection_id=entity.get("collection_id")) proxy = model.get_proxy(entity) html = proxy.first("bodyHtml", quiet=True) source_url = proxy.first("sourceUrl", quiet=True) encoding = proxy.first("encoding", quiet=True) entity["safeHtml"] = sanitize_html(html, source_url, encoding=encoding) entity["shallow"] = False return EntitySerializer.jsonify(entity)
def similar(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) entity = model.get_proxy(entity) result = MatchQuery.handle(request, entity=entity) return EntitySerializer.jsonify_result(result)
def similar(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) entity = model.get_proxy(entity) record_audit(Audit.ACT_ENTITY, id=entity_id) result = MatchQuery.handle(request, entity=entity) return EntitySerializer.jsonify_result(result)
def metadata(): """Get operational metadata for the frontend. --- get: summary: Retrieve system metadata from the application. responses: '200': description: OK content: application/json: schema: type: object tags: - System """ locale = get_locale() enable_cache(vary_user=False, vary=str(locale)) key = cache.key('metadata', settings.PROCESS_ID, locale) data = cache.get_complex(key) if data is not None: return jsonify(data) auth = {} if settings.PASSWORD_LOGIN: auth['password_login_uri'] = url_for('sessions_api.password_login') auth['registration_uri'] = url_for('roles_api.create_code') if settings.OAUTH: auth['oauth_uri'] = url_for('sessions_api.oauth_init') locales = settings.UI_LANGUAGES locales = {l: Locale(l).get_language_name(l) for l in locales} data = { 'status': 'ok', 'maintenance': request.authz.in_maintenance, 'app': { 'title': settings.APP_TITLE, 'description': settings.APP_DESCRIPTION, 'version': __version__, 'banner': settings.APP_BANNER, 'ui_uri': settings.APP_UI_URL, 'samples': settings.SAMPLE_SEARCHES, 'logo': settings.APP_LOGO, 'favicon': settings.APP_FAVICON, 'locale': str(locale), 'locales': locales }, 'categories': Collection.CATEGORIES, 'countries': registry.country.names, 'languages': registry.language.names, 'model': model, 'auth': auth } cache.set_complex(key, data, expires=120) return jsonify(data)
def sitemap(): enable_cache(vary_user=False) collections = [] for collection in Collection.all_authz(Authz.from_role(None)): updated_at = collection.updated_at.date().isoformat() updated_at = max(settings.SITEMAP_FLOOR, updated_at) collections.append({ 'url': collection_url(collection.id), 'updated_at': updated_at }) return render_xml('sitemap.xml', collections=collections)
def similar(profile_id): """ --- get: summary: Get similar entities description: > Get a list of similar entities to the profile with id `profile_id` parameters: - in: path name: profile_id required: true schema: type: string - in: query name: 'filter:schema' schema: items: type: string type: array - in: query name: 'filter:schemata' schema: items: type: string type: array responses: '200': description: Returns a list of entities content: application/json: schema: $ref: '#/components/schemas/EntitiesResponse' tags: - Profile """ enable_cache() profile = obj_or_404(get_profile(profile_id, authz=request.authz)) require(request.authz.can(profile.get("collection_id"), request.authz.READ)) tag_request(collection_id=profile.get("collection_id")) exclude = [item["entity_id"] for item in profile["items"]] result = MatchQuery.handle(request, entity=profile["merged"], exclude=exclude) entities = list(result.results) result.results = [] for obj in entities: item = { "score": compare(model, profile["merged"], obj), "judgement": Judgement.NO_JUDGEMENT, "entity": obj, } result.results.append(item) return SimilarSerializer.jsonify_result(result)
def references(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) results = [] for prop, total in entity_references(entity, request.authz): results.append({ 'count': total, 'property': prop, 'schema': prop.schema.name, }) return jsonify({'status': 'ok', 'total': len(results), 'results': results})
def similar(entity_id): """ --- get: summary: Get similar entities description: > Get a list of similar entities to the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string - in: query name: 'filter:schema' schema: items: type: string type: array - in: query name: 'filter:schemata' schema: items: type: string type: array responses: '200': description: Returns a list of scored and judged entities content: application/json: schema: $ref: '#/components/schemas/SimilarResponse' tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get("collection_id")) proxy = model.get_proxy(entity) result = MatchQuery.handle(request, entity=proxy) entities = list(result.results) pairs = [(entity_id, s.get("id")) for s in entities] judgements = pairwise_judgements(pairs, entity.get("collection_id")) result.results = [] for obj in entities: item = { "score": compare(model, proxy, obj), "judgement": judgements.get((entity_id, obj.get("id"))), "entity": obj, } result.results.append(item) return SimilarSerializer.jsonify_result(result)
def content(entity_id): """ --- get: summary: Get the content of an entity description: > Return the text and/or html content of the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string responses: '200': content: application/json: schema: properties: headers: type: object html: type: string text: type: string type: object description: OK '404': description: Not Found tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) for entity in entities_by_ids([entity_id], schemata=entity.get('schema'), excludes=['text']): proxy = model.get_proxy(entity) html = proxy.first('bodyHtml', quiet=True) source_url = proxy.first('sourceUrl', quiet=True) encoding = proxy.first('encoding', quiet=True) html = sanitize_html(html, source_url, encoding=encoding) headers = proxy.first('headers', quiet=True) headers = registry.json.unpack(headers) return jsonify({ 'headers': headers, 'text': proxy.first('bodyText', quiet=True), 'html': html }) return ('', 404)
def tags(entity_id): """ --- get: summary: Get entity tags description: >- Get tags for the entity with id `entity_id`. Tags include the query string to make a search by that particular tag. parameters: - in: path name: entity_id required: true schema: type: string responses: '200': description: OK content: application/json: schema: type: object allOf: - $ref: '#/components/schemas/QueryResponse' properties: results: type: array items: $ref: '#/components/schemas/EntityTag' tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) results = [] for (field, value, total) in entity_tags(entity, request.authz): qvalue = quote(value.encode('utf-8')) key = ('filter:%s' % field, qvalue) results.append({ 'id': query_string([key]), 'value': value, 'field': field, 'count': total, }) results.sort(key=lambda p: p['count'], reverse=True) return jsonify({ 'status': 'ok', 'total': len(results), 'results': results })
def openapi(): """Generate an OpenAPI 3.0 documentation JSON file for the API.""" enable_cache(vary_user=False) spec = get_openapi_spec(current_app) for name, view in current_app.view_functions.items(): if name in ( "static", "base_api.openapi", "base_api.api_v1_message", "sessions_api.oauth_callback", ): continue log.info("%s - %s", name, view.__qualname__) spec.path(view=view) return jsonify(spec.to_dict())
def statistics(): """Get a summary of the data acessible to the current user. --- get: summary: System-wide user statistics. description: > Get a summary of the data acessible to the current user. responses: '200': description: OK content: application/json: schema: type: object tags: - System """ enable_cache() collections = request.authz.collections(request.authz.READ) for collection_id in collections: resolver.queue(request, Collection, collection_id) resolver.resolve(request) # Summarise stats. This is meant for display, so the counting is a bit # inconsistent between counting all collections, and source collections # only. schemata = defaultdict(int) countries = defaultdict(int) categories = defaultdict(int) for collection_id in collections: data = resolver.get(request, Collection, collection_id) if data is None or data.get('casefile'): continue categories[data.get('category')] += 1 things = get_collection_things(collection_id) for schema, count in things.items(): schemata[schema] += count for country in data.get('countries', []): countries[country] += 1 return jsonify({ 'collections': len(collections), 'schemata': dict(schemata), 'countries': dict(countries), 'categories': dict(categories), 'things': sum(schemata.values()), })
def tags(entity_id): """ --- get: summary: Get entity tags description: >- Get tags for the entity with id `entity_id`. Tags include the query string to make a search by that particular tag. parameters: - in: path name: entity_id required: true schema: type: string responses: '200': description: OK content: application/json: schema: type: object allOf: - $ref: '#/components/schemas/QueryResponse' properties: results: type: array items: $ref: '#/components/schemas/EntityTag' tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get("collection_id")) results = [] for (field, value, total) in entity_tags(entity, request.authz): qvalue = quote(value.encode("utf-8")) key = ("filter:%s" % field, qvalue) qid = query_string([key]) results.append({ "id": qid, "value": value, "field": field, "count": total }) results.sort(key=lambda p: p["count"], reverse=True) return jsonify({"status": "ok", "total": len(results), "results": results})
def references(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) record_audit(Audit.ACT_ENTITY, id=entity_id) results = [] for prop, total in entity_references(entity, request.authz): results.append({ 'count': total, 'property': prop, 'schema': prop.schema.name, }) return jsonify({ 'status': 'ok', 'total': len(results), 'results': results })
def tags(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) results = [] for (field, value, total) in entity_tags(entity, request.authz): qvalue = quote(value.encode('utf-8')) key = ('filter:%s' % field, qvalue) results.append({ 'id': query_string([key]), 'value': value, 'field': field, 'count': total, }) results.sort(key=lambda p: p['count'], reverse=True) return jsonify({'status': 'ok', 'total': len(results), 'results': results})
def references(entity_id): """ --- get: summary: Get entity references description: >- Get the schema-wise aggregation of references to the entity with id `entity_id`. This can be used to find and display adjacent entities. parameters: - in: path name: entity_id required: true schema: type: string responses: '200': description: OK content: application/json: schema: type: object allOf: - $ref: '#/components/schemas/QueryResponse' properties: results: type: array items: $ref: '#/components/schemas/EntityReference' tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) results = [] for prop, total in entity_references(entity, request.authz): results.append({ 'count': total, 'property': prop, 'schema': prop.schema.name, }) return jsonify({ 'status': 'ok', 'total': len(results), 'results': results })
def content(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) for entity in entities_by_ids([entity_id], schemata=entity.get('schema'), excludes=['text']): proxy = model.get_proxy(entity) html = sanitize_html(proxy.first('bodyHtml', quiet=True), proxy.first('sourceUrl', quiet=True)) headers = proxy.first('headers', quiet=True) headers = registry.json.unpack(headers) return jsonify({ 'headers': headers, 'text': proxy.first('bodyText', quiet=True), 'html': html }) return ('', 404)
def content(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) for entity in entities_by_ids([entity_id], schemata=entity.get('schema'), excludes=['text']): proxy = model.get_proxy(entity) record_audit(Audit.ACT_ENTITY, id=entity_id) html = sanitize_html(proxy.first('bodyHtml', quiet=True), proxy.first('sourceUrl', quiet=True)) headers = proxy.first('headers', quiet=True) headers = registry.json.unpack(headers) return jsonify({ 'headers': headers, 'text': proxy.first('bodyText', quiet=True), 'html': html }) return ('', 404)
def statistics(): """Get a summary of the data acessible to the current user.""" enable_cache() collections = request.authz.collections(request.authz.READ) for collection_id in collections: resolver.queue(request, Collection, collection_id) for role_id in request.authz.roles: resolver.queue(request, Role, role_id) resolver.resolve(request) # Summarise stats. This is meant for display, so the counting is a bit # inconsistent between counting all collections, and source collections # only. schemata = defaultdict(int) countries = defaultdict(int) categories = defaultdict(int) for collection_id in collections: data = resolver.get(request, Collection, collection_id) if data is None or data.get('casefile'): continue categories[data.get('category')] += 1 for schema, count in data.get('schemata', {}).items(): schemata[schema] += count for country in data.get('countries', []): countries[country] += 1 # Add a users roles to the home page: groups = [] for role_id in request.authz.roles: data = resolver.get(request, Role, role_id) if data is None or data.get('type') != Role.GROUP: continue groups.append(RoleSerializer().serialize(data)) return jsonify({ 'collections': len(collections), 'schemata': dict(schemata), 'countries': dict(countries), 'categories': dict(categories), 'groups': groups, 'things': sum(schemata.values()), })
def similar(entity_id): """ --- get: summary: Get similar entities description: > Get a list of similar entities to the entity with id `entity_id` parameters: - in: path name: entity_id required: true schema: type: string - in: query name: 'filter:schema' schema: items: type: string type: array - in: query name: 'filter:schemata' schema: items: type: string type: array responses: '200': description: Returns a list of entities content: application/json: schema: $ref: '#/components/schemas/EntitiesResponse' tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) entity = model.get_proxy(entity) result = MatchQuery.handle(request, entity=entity) return EntitySerializer.jsonify_result(result)
def tags(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) record_audit(Audit.ACT_ENTITY, id=entity_id) results = [] for (field, value, total) in entity_tags(entity, request.authz): qvalue = quote(value.encode('utf-8')) key = ('filter:%s' % field, qvalue) results.append({ 'id': query_string([key]), 'value': value, 'field': field, 'count': total, }) results.sort(key=lambda p: p['count'], reverse=True) return jsonify({ 'status': 'ok', 'total': len(results), 'results': results })
def metadata(): locale = get_locale() enable_cache(vary_user=False, vary=str(locale)) key = cache.key('metadata', locale) data = cache.get_complex(key) if data is not None: return jsonify(data) auth = {} if settings.PASSWORD_LOGIN: auth['password_login_uri'] = url_for('sessions_api.password_login') auth['registration_uri'] = url_for('roles_api.create_code') if settings.OAUTH: auth['oauth_uri'] = url_for('sessions_api.oauth_init') data = { 'status': 'ok', 'maintenance': request.authz.in_maintenance, 'app': { 'title': settings.APP_TITLE, 'description': settings.APP_DESCRIPTION, 'version': __version__, 'banner': settings.APP_BANNER, 'ui_uri': settings.APP_UI_URL, 'samples': settings.SAMPLE_SEARCHES, 'logo': settings.APP_LOGO, 'favicon': settings.APP_FAVICON, 'locale': str(locale), 'locales': settings.UI_LANGUAGES }, 'categories': Collection.CATEGORIES, 'countries': registry.country.names, 'languages': registry.language.names, 'model': model, 'auth': auth } cache.set_complex(key, data, expire=120) return jsonify(data)
def statistics(): """Get a summary of the data acessible to an anonymous user. Changed [3.9]: Previously, this would return user-specific stats. --- get: summary: System-wide user statistics. description: > Get a summary of the data acessible to an anonymous user. responses: '200': description: OK content: application/json: schema: type: object tags: - System """ enable_cache(vary_user=False) key = cache.key(cache.STATISTICS) data = {"countries": [], "schemata": [], "categories": []} data = cache.get_complex(key) or data return jsonify(data)
def expand(entity_id): """Returns a list of diagrams for the role --- get: summary: Expand an entity to get its adjacent entities description: >- Get the property-wise list of entities adjacent to the entity with id `entity_id`. parameters: - in: path name: entity_id required: true schema: type: string - in: query name: edge_types description: types of edges to expand. Must is a matchable FtM type required: true schema: type: string - description: properties to filter on in: query name: 'filter:property' schema: type: string - in: query description: number of entities to return per property name: limit schema: type: number responses: '200': description: OK content: application/json: schema: type: object allOf: - $ref: '#/components/schemas/QueryResponse' properties: results: type: array items: $ref: '#/components/schemas/EntityExpand' tags: - Entity """ enable_cache() entity = get_index_entity(entity_id, request.authz.READ) edge_types = request.args.getlist('edge_types') collection_id = entity.get('collection_id') tag_request(collection_id=collection_id) parser = QueryParser(request.args, request.authz, max_limit=MAX_EXPAND_ENTITIES) properties = parser.filters.get('property') results = [] for (prop, total, proxies) in entity_expand(entity, collection_ids=[collection_id], edge_types=edge_types, properties=properties, authz=request.authz, limit=parser.limit): results.append({ 'count': total, 'property': prop.name, 'entities': [proxy.to_dict() for proxy in proxies] }) return jsonify({ 'status': 'ok', 'total': sum(result['count'] for result in results), 'results': results })
def view(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) record_audit(Audit.ACT_ENTITY, id=entity_id) tag_request(collection_id=entity.get('collection_id')) return EntitySerializer.jsonify(entity)