def _metadata_locale(locale): # This is cached in part because latency on this endpoint is # particularly relevant to the first render being shown to a # user. auth = {} if settings.PASSWORD_LOGIN: auth["password_login_uri"] = url_for("sessions_api.password_login") auth["registration_uri"] = url_for("roles_api.create_code") if settings.OAUTH: auth["oauth_uri"] = url_for("sessions_api.oauth_init") locales = settings.UI_LANGUAGES locales = {loc: Locale(loc).get_language_name(loc) for loc in locales} return { "status": "ok", "maintenance": request.authz.in_maintenance, "app": { "title": settings.APP_TITLE, "version": __version__, "banner": settings.APP_BANNER, "ui_uri": settings.APP_UI_URL, "logo": settings.APP_LOGO, "favicon": settings.APP_FAVICON, "locale": locale, "locales": locales, }, "categories": Collection.CATEGORIES, "frequencies": Collection.FREQUENCIES, "pages": load_pages(locale), "model": model.to_dict(), "token": None, "auth": auth, }
def callback(): resp = oauth_provider.authorized_response() if resp is None or isinstance(resp, OAuthException): log.warning("Failed OAuth: %r", resp) # FIXME: notify the user, somehow. return redirect(url_for('base_api.ui')) session['oauth'] = resp session['roles'] = [Role.system(Role.SYSTEM_USER)] if 'googleapis.com' in oauth_provider.base_url: me = oauth_provider.get('userinfo') user_id = 'google:%s' % me.data.get('id') role = Role.load_or_create(user_id, Role.USER, me.data.get('name'), email=me.data.get('email')) elif 'occrp.org' in oauth_provider.base_url or \ 'investigativedashboard.org' in oauth_provider.base_url: me = oauth_provider.get('api/2/accounts/profile/') user_id = 'idashboard:user:%s' % me.data.get('id') role = Role.load_or_create(user_id, Role.USER, me.data.get('display_name'), email=me.data.get('email'), is_admin=me.data.get('is_admin')) for group in me.data.get('groups', []): group_id = 'idashboard:%s' % group.get('id') group_role = Role.load_or_create(group_id, Role.GROUP, group.get('name')) session['roles'].append(group_role.id) else: raise RuntimeError("Unknown OAuth URL: %r" % oauth_provider.base_url) session['roles'].append(role.id) session['user'] = role.id db.session.commit() log.info("Logged in: %r", role) return redirect(url_for('base_api.ui'))
def entity_links(self, data, pk, schemata): return { 'self': url_for('entities_api.view', id=pk), 'references': url_for('entities_api.references', id=pk), 'tags': url_for('entities_api.tags', id=pk), 'ui': entity_url(pk) }
def metadata(): enable_cache(vary_user=False) auth = {} if settings.PASSWORD_LOGIN: auth['password_login_uri'] = url_for('sessions_api.password_login') auth['registration_uri'] = url_for('roles_api.create_code') if settings.OAUTH: auth['oauth_uri'] = url_for('sessions_api.oauth_init') return jsonify({ 'status': 'ok', 'maintenance': request.authz.in_maintenance, 'app': { 'title': settings.APP_TITLE, 'version': __version__, 'ui_uri': six.text_type(app_ui_url), 'samples': settings.SAMPLE_SEARCHES, 'logo': settings.APP_LOGO, 'favicon': settings.APP_FAVICON }, 'categories': Collection.CATEGORIES, 'countries': countries.names, 'languages': languages.names, 'schemata': model, 'auth': auth })
def status(): authz = request.authz enable_cache(vary_user=True) providers = sorted(oauth.remote_apps.values(), key=lambda p: p.label) providers = [{ 'name': p.name, 'label': p.label, 'login': url_for('.login', provider=p.name), } for p in providers] if get_config('PASSWORD_LOGIN'): providers.append({ 'name': 'password', 'label': 'Email', 'registration': get_config('PASSWORD_REGISTRATION'), 'login': url_for('.password_login'), 'register': url_for('roles_api.invite_email') }) return jsonify({ 'logged_in': authz.logged_in, 'api_key': authz.role.api_key if authz.logged_in else None, 'role': authz.role, 'roles': authz.roles, 'public_roles': get_public_roles(), 'permissions': { 'read': authz.collections[authz.READ], 'write': authz.collections[authz.WRITE] }, 'logout': url_for('.logout'), 'providers': providers, })
def _serialize(self, obj): pk = obj.get('id') obj['links'] = { 'self': url_for('collections_api.view', id=pk), 'xref': url_for('xref_api.index', id=pk), 'xref_csv': url_for('xref_api.csv_export', id=pk, _authorize=obj.get('secret')), 'reconcile': url_for('reconcile_api.reconcile', collection_id=pk, _authorize=obj.get('secret')), 'ui': collection_url(pk) } obj['writeable'] = request.authz.can(pk, request.authz.WRITE) creator_id = obj.pop('creator_id', None) obj['creator'] = self.resolve(Role, creator_id, RoleSerializer) obj['team'] = [] for role_id in ensure_list(obj.get('team_id')): role = self.resolve(Role, role_id, RoleSerializer) if role is not None: obj['team'].append(role) obj.pop('_index', None) return self._clean_response(obj)
def metadata(): enable_cache(vary_user=False) providers = [] for provider in oauth.remote_apps.values(): providers.append({ 'name': provider.name, 'label': provider.label, 'login': url_for('sessions_api.oauth_init', provider=provider.name), }) auth = {'password_login': get_config('PASSWORD_LOGIN'), 'oauth': providers} if auth['password_login']: auth['registration'] = get_config('PASSWORD_REGISTRATION') auth['password_login_uri'] = url_for('sessions_api.password_login') auth['registration_uri'] = url_for('roles_api.create_code') return jsonify({ 'status': 'ok', 'maintenance': request.authz.in_maintenance, 'app': { 'title': six.text_type(app_title), 'ui_uri': six.text_type(app_ui_url), 'samples': get_config('SAMPLE_SEARCHES') }, 'categories': get_config('COLLECTION_CATEGORIES', {}), 'countries': countries.names, 'languages': languages.names, 'schemata': model, 'auth': auth })
def _serialize(self, obj): pk = obj.get("id") authz = request.authz if obj.get("secret") else None obj["links"] = { "self": url_for("collections_api.view", collection_id=pk), "xref": url_for("xref_api.index", collection_id=pk), "xref_export": url_for("xref_api.export", collection_id=pk, _authz=authz), "reconcile": url_for( "reconcile_api.reconcile", collection_id=pk, _authz=authz, ), "ui": collection_url(pk), } obj["shallow"] = obj.get("shallow", True) obj["writeable"] = request.authz.can(pk, request.authz.WRITE) creator_id = obj.pop("creator_id", None) obj["creator"] = self.resolve(Role, creator_id, RoleSerializer) obj["team"] = [] for role_id in ensure_list(obj.pop("team_id", [])): if request.authz.can_read_role(role_id): role = self.resolve(Role, role_id, RoleSerializer) obj["team"].append(role) return obj
def _serialize(self, obj): pk = obj.get("id") collection_id = obj.pop("collection_id", None) obj["collection"] = self.resolve( Collection, collection_id, CollectionSerializer ) proxy = model.get_proxy(obj) properties = obj.get("properties", {}) for prop in proxy.iterprops(): if prop.type != registry.entity: continue values = ensure_list(properties.get(prop.name)) properties[prop.name] = [] for value in values: entity = self.resolve(Entity, value, EntitySerializer) properties[prop.name].append(entity or value) links = { "self": url_for("entities_api.view", entity_id=pk), "references": url_for("entities_api.references", entity_id=pk), "tags": url_for("entities_api.tags", entity_id=pk), "ui": entity_url(pk), } if proxy.schema.is_a(Document.SCHEMA): content_hash = first(properties.get("contentHash")) if content_hash: name = entity_filename(proxy) mime = first(properties.get("mimeType")) links["file"] = archive_url( content_hash, file_name=name, mime_type=mime, expire=request.authz.expire, ) pdf_hash = first(properties.get("pdfHash")) if pdf_hash: name = entity_filename(proxy, extension="pdf") links["pdf"] = archive_url( pdf_hash, file_name=name, mime_type=PDF, expire=request.authz.expire, ) csv_hash = first(properties.get("csvHash")) if csv_hash: name = entity_filename(proxy, extension="csv") links["csv"] = archive_url( csv_hash, file_name=name, mime_type=CSV, expire=request.authz.expire, ) obj["links"] = links obj["latinized"] = transliterate_values(proxy) obj["writeable"] = check_write_entity(obj, request.authz) obj["shallow"] = obj.get("shallow", True) return obj
def add_urls(doc): doc['archive_url'] = url_for('data.package', collection=doc.get('collection'), package_id=doc.get('id')) doc['manifest_url'] = url_for('data.manifest', collection=doc.get('collection'), package_id=doc.get('id')) return doc
def _serialize(self, obj): pk = obj.get('id') obj['id'] = str(pk) authz = request.authz collection_id = obj.pop('collection_id', None) obj['collection'] = self.resolve(Collection, collection_id, CollectionSerializer) schema = model.get(obj.get('schema')) if schema is None: return None obj['schemata'] = schema.names properties = obj.get('properties', {}) for prop in schema.properties.values(): if prop.type != registry.entity: continue values = ensure_list(properties.get(prop.name)) properties[prop.name] = [] for value in values: entity = self.resolve(Entity, value, EntitySerializer) properties[prop.name].append(entity) links = { 'self': url_for('entities_api.view', entity_id=pk), 'references': url_for('entities_api.references', entity_id=pk), 'tags': url_for('entities_api.tags', entity_id=pk), 'ui': entity_url(pk) } if schema.is_a(Document.SCHEMA): links['content'] = url_for('entities_api.content', entity_id=pk) file_name = first(properties.get('fileName')) content_hash = first(properties.get('contentHash')) if content_hash: mime_type = first(properties.get('mimeType')) name = safe_filename(file_name, default=pk) links['file'] = archive_url(request.authz.id, content_hash, file_name=name, mime_type=mime_type) pdf_hash = first(properties.get('pdfHash')) if pdf_hash: name = safe_filename(file_name, default=pk, extension='.pdf') links['pdf'] = archive_url(request.authz.id, pdf_hash, file_name=name, mime_type=PDF) csv_hash = first(properties.get('csvHash')) if csv_hash: name = safe_filename(file_name, default=pk, extension='.csv') links['csv'] = archive_url(request.authz.id, csv_hash, file_name=name, mime_type=CSV) obj['links'] = links obj['writeable'] = authz.can(collection_id, authz.WRITE) obj.pop('_index', None) return self._clean_response(obj)
def execute_documents_query(args, q): """ Execute the query and return a set of results. """ result = es.search(index=es_index, doc_type=TYPE_DOCUMENT, body=q) hits = result.get('hits', {}) output = { 'status': 'ok', 'results': [], 'offset': q['from'], 'limit': q['size'], 'total': hits.get('total'), 'next': None, 'facets': {}, 'watchlists': {} } convert_aggregations(result, output, args) next_offset = output['offset'] + output['limit'] if output['total'] > next_offset: params = {'offset': next_offset} for k, v in args.iterlists(): if k in ['offset']: continue params[k] = v output['next'] = url_for('search.query', **params) sub_queries = [] for doc in hits.get('hits', []): document = doc.get('_source') document['id'] = int(doc.get('_id')) document['score'] = doc.get('_score') document['records'] = {'results': [], 'total': 0} sq = records_query(document['id'], args) if sq is not None: sub_queries.append(json.dumps({})) sub_queries.append(json.dumps(sq)) document['api_url'] = url_for('document.view', document_id=doc.get('_id')) document['data_url'] = url_for('document.file', document_id=doc.get('_id')) output['results'].append(document) if len(sub_queries): res = es.msearch(index=es_index, doc_type=TYPE_RECORD, body='\n'.join(sub_queries)) for doc in output['results']: for sq in res.get('responses', []): sqhits = sq.get('hits', {}) for hit in sqhits.get('hits', {}): record = hit.get('_source') if doc['id'] != record.get('document_id'): continue record['score'] = hit.get('_score') record['text'] = hit.get('highlight', {}).get('text') doc['records']['results'].append(record) doc['records']['total'] = sqhits.get('total', 0) return output
def entity_links(self, data, pk, schemata): return { 'self': url_for('entities_api.view', id=pk), # 'similar': url_for('entities_api.similar', id=pk), # 'documents': url_for('entities_api.documents', id=pk), 'references': url_for('entities_api.references', id=pk), 'pivot': url_for('entities_api.pivot', id=pk), 'ui': entity_url(pk) }
def login(provider): if provider not in PROVIDERS: raise BadRequest('Unknown provider: %s' % provider) if current_user.is_authenticated(): return redirect(url_for('ui')) session.clear() callback = url_for('.%s_authorized' % provider) session['next_url'] = request.args.get('next_url', url_for('ui')) return PROVIDERS[provider].authorize(callback=callback)
def hypermedia(self, data): pk = str(data.get('id')) data['links'] = { 'self': url_for('collections_api.view', id=pk), 'xref': url_for('xref_api.index', id=pk), 'xref_csv': url_for('xref_api.csv_export', id=pk, _authorize=True), 'ui': collection_url(pk) } data['writeable'] = request.authz.can(pk, request.authz.WRITE) return data
def metadata(): """Get operational metadata for the frontend. --- get: summary: Retrieve system metadata from the application. responses: '200': description: OK content: application/json: schema: type: object tags: - System """ locale = get_locale() enable_cache(vary_user=False, vary=str(locale)) key = cache.key('metadata', settings.PROCESS_ID, locale) data = cache.get_complex(key) if data is not None: return jsonify(data) auth = {} if settings.PASSWORD_LOGIN: auth['password_login_uri'] = url_for('sessions_api.password_login') auth['registration_uri'] = url_for('roles_api.create_code') if settings.OAUTH: auth['oauth_uri'] = url_for('sessions_api.oauth_init') locales = settings.UI_LANGUAGES locales = {l: Locale(l).get_language_name(l) for l in locales} data = { 'status': 'ok', 'maintenance': request.authz.in_maintenance, 'app': { 'title': settings.APP_TITLE, 'description': settings.APP_DESCRIPTION, 'version': __version__, 'banner': settings.APP_BANNER, 'ui_uri': settings.APP_UI_URL, 'samples': settings.SAMPLE_SEARCHES, 'logo': settings.APP_LOGO, 'favicon': settings.APP_FAVICON, 'locale': str(locale), 'locales': locales }, 'categories': Collection.CATEGORIES, 'countries': registry.country.names, 'languages': registry.language.names, 'model': model, 'auth': auth } cache.set_complex(key, data, expires=120) return jsonify(data)
def to_dict(self): return { 'id': self.id, 'api_url': url_for('lists.view', id=self.id), 'entities_api_url': url_for('entities.index', list=self.id), 'label': self.label, 'public': self.public, 'creator_id': self.creator_id, 'created_at': self.created_at, 'updated_at': self.updated_at }
def to_dict(self): return { "id": self.id, "api_url": url_for("watchlists.view", id=self.id), "entities_api_url": url_for("entities.index", list=self.id), "label": self.label, "foreign_id": self.foreign_id, "creator_id": self.creator_id, "created_at": self.created_at, "updated_at": self.updated_at, }
def _serialize(self, obj): pk = obj.get('id') collection_id = obj.pop('collection_id', None) obj['collection'] = self.resolve(Collection, collection_id, CollectionSerializer) proxy = model.get_proxy(obj) obj['schemata'] = proxy.schema.names properties = obj.get('properties', {}) for prop in proxy.iterprops(): if prop.type != registry.entity: continue values = ensure_list(properties.get(prop.name)) properties[prop.name] = [] for value in values: entity = self.resolve(Entity, value, EntitySerializer) properties[prop.name].append(entity or value) links = { 'self': url_for('entities_api.view', entity_id=pk), 'references': url_for('entities_api.references', entity_id=pk), 'tags': url_for('entities_api.tags', entity_id=pk), 'ui': entity_url(pk) } if proxy.schema.is_a(Document.SCHEMA): links['content'] = url_for('entities_api.content', entity_id=pk) content_hash = first(properties.get('contentHash')) if content_hash: name = entity_filename(proxy) mime_type = first(properties.get('mimeType')) links['file'] = archive_url(request.authz.id, content_hash, file_name=name, mime_type=mime_type) pdf_hash = first(properties.get('pdfHash')) if pdf_hash: name = entity_filename(proxy, extension='pdf') links['pdf'] = archive_url(request.authz.id, pdf_hash, file_name=name, mime_type=PDF) csv_hash = first(properties.get('csvHash')) if csv_hash: name = entity_filename(proxy, extension='csv') links['csv'] = archive_url(request.authz.id, csv_hash, file_name=name, mime_type=CSV) obj['links'] = links write = request.authz.WRITE obj['writeable'] = request.authz.can(collection_id, write) return obj
def reconcile_index(collection=None): domain = settings.APP_UI_URL.strip("/") label = settings.APP_TITLE suggest_query = [] schemata = list(model) if collection is not None: label = "%s (%s)" % (collection.get("label"), label) suggest_query.append(("filter:collection_id", collection.get("id"))) things = get_collection_things(collection.get("id")) schemata = [model.get(s) for s in things.keys()] return jsonify({ "name": label, "identifierSpace": "http://rdf.freebase.com/ns/type.object.id", "schemaSpace": "http://rdf.freebase.com/ns/type.object.id", "view": { "url": entity_url("{{id}}") }, "preview": { "url": entity_url("{{id}}"), "width": 800, "height": 400 }, "suggest": { "entity": { "service_url": domain, "service_path": url_for( "reconcile_api.suggest_entity", _query=suggest_query, _authz=request.authz, _relative=True, ), }, "type": { "service_url": domain, "service_path": url_for("reconcile_api.suggest_type", _relative=True), }, "property": { "service_url": domain, "service_path": url_for("reconcile_api.suggest_property", _relative=True), }, }, "defaultTypes": [get_freebase_type(s) for s in schemata if s.matchable], })
def metadata(): """Get operational metadata for the frontend. --- get: summary: Retrieve system metadata from the application. responses: '200': description: OK content: application/json: schema: type: object tags: - System """ locale = get_locale() auth = {} if settings.PASSWORD_LOGIN: auth['password_login_uri'] = url_for('sessions_api.password_login') auth['registration_uri'] = url_for('roles_api.create_code') if settings.OAUTH: auth['oauth_uri'] = url_for('sessions_api.oauth_init') locales = settings.UI_LANGUAGES locales = {l: Locale(l).get_language_name(l) for l in locales} data = { 'status': 'ok', 'maintenance': request.authz.in_maintenance, 'app': { 'title': settings.APP_TITLE, 'description': settings.APP_DESCRIPTION, 'version': __version__, 'banner': settings.APP_BANNER, 'ui_uri': settings.APP_UI_URL, 'samples': settings.SAMPLE_SEARCHES, 'logo': settings.APP_LOGO, 'favicon': settings.APP_FAVICON, 'locale': str(locale), 'locales': locales }, 'categories': Collection.CATEGORIES, 'model': model, 'token': None, 'auth': auth } if settings.SINGLE_USER: role = Role.load_cli_user() authz = Authz.from_role(role) data['token'] = authz.to_token(role=role) return jsonify(data)
def reconcile_index(collection=None): domain = settings.APP_UI_URL.strip('/') label = settings.APP_TITLE suggest_query = [] schemata = list(model) if collection is not None: label = '%s (%s)' % (collection.get('label'), label) suggest_query.append(('filter:collection_id', collection.get('id'))) things = get_collection_things(collection.get('id')) schemata = [model.get(s) for s in things.keys()] return jsonify({ 'name': label, 'identifierSpace': 'http://rdf.freebase.com/ns/type.object.id', 'schemaSpace': 'http://rdf.freebase.com/ns/type.object.id', 'view': { 'url': entity_url('{{id}}') }, 'preview': { 'url': entity_url('{{id}}'), 'width': 800, 'height': 400 }, 'suggest': { 'entity': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_entity', _query=suggest_query, _authorize=True, _relative=True) }, 'type': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_type', _relative=True) }, 'property': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_property', _relative=True) } }, 'defaultTypes': [get_freebase_type(s) for s in schemata if s.matchable] })
def status(): oauth_providers = {} for name, provider in PROVIDERS.items(): if not isinstance(provider, Stub): oauth_providers[name] = url_for('.login', provider=name) return jsonify({ 'logged_in': authz.logged_in(), 'api_key': current_user.api_key if authz.logged_in() else None, 'user': current_user if authz.logged_in() else None, 'permissions': {}, 'logins': oauth_providers, 'logout': url_for('.logout') })
def view(document_id): doc = get_document(document_id) enable_cache() data = doc.to_dict() if doc.parent is not None: data['parent'] = doc.parent.to_dict() log_event(request, document_id=doc.id) data['data_url'] = archive.generate_url(doc.content_hash) if data['data_url'] is None: data['data_url'] = url_for('documents_api.file', document_id=document_id) if doc.pdf_version: data['pdf_url'] = url_for('documents_api.pdf', document_id=document_id) return jsonify(data)
def status(): oauth_providers = {} for name, provider in PROVIDERS.items(): if not isinstance(provider, Stub): oauth_providers[name] = url_for('.login', provider=name) return jsonify({ 'logged_in': authz.logged_in(), 'is_admin': authz.is_admin(), 'api_key': current_user.api_key if authz.logged_in() else None, 'user': current_user if authz.logged_in() else None, 'permissions': {}, 'logins': oauth_providers, 'logout': url_for('.logout') })
def execute_entities_query(args, query, doc_counts=False): """Execute the query and return a set of results.""" result, hits, output = execute_basic(TYPE_ENTITY, query) convert_entity_aggregations(result, output, args) sub_queries = [] for doc in hits.get('hits', []): entity = doc.get('_source') entity['id'] = doc.get('_id') entity['score'] = doc.get('_score') entity['api_url'] = url_for('entities_api.view', id=doc.get('_id')) output['results'].append(entity) sq = {'term': {'entities.uuid': entity['id']}} sq = authz_sources_filter(sq) sq = {'size': 0, 'query': sq} sub_queries.append(json.dumps({})) sub_queries.append(json.dumps(sq)) if doc_counts and len(sub_queries): res = get_es().msearch(index=get_es_index(), doc_type=TYPE_DOCUMENT, body='\n'.join(sub_queries)) for (entity, res) in zip(output['results'], res.get('responses')): entity['doc_count'] = res.get('hits', {}).get('total') return output
def to_dict(self, counts=False): data = super(Collection, self).to_dict() data.update({ 'api_url': url_for('collections_api.view', id=self.id), 'foreign_id': self.foreign_id, 'creator_id': self.creator_id, 'label': self.label, 'summary': self.summary, 'category': self.category, 'countries': self.countries, 'managed': self.managed, 'private': self.private, 'public': self.is_public }) if counts: # Query how many enitites and documents are in this collection. from aleph.model.entity import Entity data.update({ 'doc_count': self.get_document_count(), 'entity_count': self.get_entity_count(Entity.STATE_ACTIVE), 'pending_count': self.get_entity_count(Entity.STATE_PENDING) }) return data
def package(collection, package_id): package = get_package(collection, package_id) if package.source is None: raise NotFound() return redirect(url_for('data.resource', collection=collection, package_id=package_id, path=package.source.path))
def _serialize(self, obj): pk = obj.get('id') obj['links'] = {'self': url_for('alerts_api.view', id=pk)} role_id = obj.pop('role_id', None) obj['writeable'] = role_id == stringify(request.authz.id) # obj['role'] = self.resolve(Role, role_id, RoleSerializer) return obj
def execute_tabular_query(document_id, table_id, args, query): """ Execute a query against records and return a set of results. """ result = es.search(index=es_index, doc_type=TYPE_RECORD, body=query) hits = result.get('hits', {}) output = { 'status': 'ok', 'results': [], 'offset': query['from'], 'limit': query['size'], 'total': hits.get('total'), 'next': None } next_offset = output['offset'] + output['limit'] if output['total'] > next_offset: params = {'offset': next_offset} for k, v in args.iterlists(): if k in ['offset']: continue params[k] = v output['next'] = url_for('table.rows', document_id=document_id, table_id=table_id, **params) for rec in hits.get('hits', []): record = rec.get('_source').get('raw') record['_id'] = rec.get('_source', {}).get('row_id') output['results'].append(record) return output
def to_dict(self): return { 'id': self.id, 'api_url': url_for('users.view', id=self.id), 'email': self.email, 'display_name': self.display_name }
def oauth_init(): if not settings.OAUTH: abort(404) callback_url = url_for('.oauth_callback') state = get_best_next_url(request.args.get('next'), request.referrer) return oauth.provider.authorize(callback=callback_url, state=state)
def get_results(query, limit): collections = {} for i, row in enumerate(scan_iter(query)): if i >= limit: return data = { 'file_url': url_for('documents_api.file', document_id=row.get('_id')) } for name, value in row.get('_source').items(): if name == 'collection_id': colls = [] for coll in value: if coll not in collections: source = Collection.by_id(coll) if source is None: collections[coll] = '[Deleted collection %s]' % value else: collections[coll] = source.label colls.append(collections[coll]) value = ', '.join(sorted(colls)) name = 'collections' if name not in FIELDS: continue if isinstance(value, (list, tuple, set)): value = ', '.join(value) data[name] = value yield data
def view(entityset_id): """Return the entityset with id `entityset_id`. --- get: summary: Fetch an entityset parameters: - description: The entityset id. in: path name: entityset_id required: true schema: type: string example: 3a0d91ece2dce88ad3259594c7b642485235a048 responses: '200': content: application/json: schema: $ref: '#/components/schemas/EntitySet' description: OK tags: - EntitySet """ entityset = get_entityset(entityset_id, request.authz.READ) if entityset.type == EntitySet.PROFILE: return redirect(url_for("profile_api.view", profile_id=entityset_id)) data = entityset.to_dict() data["shallow"] = False return EntitySetSerializer.jsonify(data)
def _serialize(self, obj): obj['links'] = { 'self': url_for('alerts_api.view', alert_id=obj.get('id')) } role_id = obj.pop('role_id', None) obj['writeable'] = request.authz.can_write_role(role_id) return obj
def password_login(): """Provides email and password authentication.""" data = request_data() email = data.get('email') password = data.get('password') if not email or not password: abort(404) log_event(request) q = Role.by_email(email) q = q.filter(Role.password_digest != None) # noqa role = q.first() # Try a password authentication and an LDAP authentication if it is enabled if role and role.check_password(password) is False: return Unauthorized("Authentication has failed.") elif not role: role = Role.authenticate_using_ldap(email, password) if not role: return Unauthorized("Authentication has failed.") session['user'] = role.id session['next_url'] = extract_next_url(request) return jsonify({ 'logout': url_for('.logout'), 'api_key': role.api_key, 'role': role })
def get_results(query, limit): sources = {} for i, row in enumerate(scan_iter(query)): if i >= limit: return data = { 'file_url': url_for('documents_api.file', document_id=row.get('_id')) } for name, value in row.get('_source').items(): if name == 'source_id': if value not in sources: source = Source.by_id(value) if source is None: sources[value] = '[Deleted source %s]' % value else: sources[value] = source.label value = sources[value] name = 'source' if name not in FIELDS: continue if isinstance(value, (list, tuple, set)): value = ', '.join(value) data[name] = value yield data
def _serialize(self, obj): obj["links"] = { "self": url_for("alerts_api.view", alert_id=obj.get("id")) } role_id = obj.pop("role_id", None) obj["writeable"] = request.authz.can_write_role(role_id) return obj
def transient(self, data): data['$uri'] = url_for('documents_api.view', document_id=data.get('id')) data['$ui'] = document_url(data.get('id')) collection_id = data.get('collection_id') data['$writeable'] = request.authz.can_write(collection_id) return data
def view(document_id): doc = get_document(document_id) enable_cache() data = doc.to_dict() data['data_url'] = get_archive().generate_url(doc.meta) if data['data_url'] is None: data['data_url'] = url_for('documents_api.file', document_id=document_id) if doc.meta.is_pdf: data['pdf_url'] = data['data_url'] else: data['pdf_url'] = get_archive().generate_url(doc.meta.pdf) if data['pdf_url'] is None: data['pdf_url'] = url_for('documents_api.pdf', document_id=document_id) data['source'] = doc.source return jsonify(data)
def to_dict(self): data = super(Role, self).to_dict() data['api_url'] = url_for('roles_api.view', id=self.id) data['foreign_id'] = self.foreign_id data['is_admin'] = self.is_admin data['email'] = self.email data['type'] = self.type return data
def archive_url(role_id, content_hash, file_name=None, mime_type=None): """Create an access authorization link for an archive blob.""" if content_hash is None: return None payload = dict(r=role_id, h=content_hash, f=file_name, t=mime_type) claim = jwt.encode(payload, settings.SECRET_KEY).decode('utf-8') return url_for('archive_api.retrieve', _authorize=True, _query=[('claim', claim)])
def to_dict(self): return { 'api_url': url_for('sources.view', id=self.id), 'id': self.id, 'foreign_id': self.foreign_id, 'label': self.label, 'created_at': self.created_at, 'updated_at': self.updated_at }
def to_dict(self): return { 'id': self.id, 'api_url': url_for('collections_api.view', id=self.id), 'label': self.label, 'foreign_id': self.foreign_id, 'creator_id': self.creator_id, 'created_at': self.created_at, 'updated_at': self.updated_at }
def to_dict(self): data = super(Collection, self).to_dict() try: from aleph.authz import collection_public data['public'] = collection_public(self) except: pass data['api_url'] = url_for('collections_api.view', id=self.id) data['foreign_id'] = self.foreign_id data['creator_id'] = self.creator_id return data
def query(): enable_cache(vary_user=True, vary=authz.collections(authz.READ)) query = documents_query(request.args) query['size'] = get_limit(default=100) query['from'] = get_offset() result = execute_documents_query(request.args, query) params = next_params(request.args, result) if params is not None: result['next'] = url_for('search_api.query', **params) return jsonify(result)
def to_dict(self): return { 'api_url': url_for('sources.view', slug=self.slug), 'slug': self.slug, 'label': self.label, 'public': self.public, 'crawler': self.crawler, # 'config': self.config, 'created_at': self.created_at, 'updated_at': self.updated_at }
def view(document_id): doc = get_document(document_id) enable_cache() data = doc.to_dict() data['data_url'] = get_archive().generate_url(doc.meta) if data['data_url'] is None: data['data_url'] = url_for('documents_api.file', document_id=document_id) if doc.meta.is_pdf: data['pdf_url'] = data['data_url'] else: try: data['pdf_url'] = get_archive().generate_url(doc.meta.pdf) except Exception as ex: log.info('Could not generate PDF url: %r', ex) if data.get('pdf_url') is None: data['pdf_url'] = url_for('documents_api.pdf', document_id=document_id) data['source'] = doc.source return jsonify(data)
def to_dict(self): return { 'id': self.id, 'api_url': url_for('entities.view', id=self.id), 'label': self.label, 'category': self.category, 'creator_id': self.creator_id, 'selectors': [s.text for s in self.selectors], 'list': self.list_id, 'created_at': self.created_at, 'updated_at': self.updated_at }
def rows(document_id, table_id): document, tabular = get_tabular(document_id, table_id) query = tabular_query(document_id, table_id, request.args) query['size'] = get_limit(default=100) query['from'] = get_offset() result = execute_tabular_query(query) params = next_params(request.args, result) if params is not None: result['next'] = url_for('documents_api.rows', document_id=document_id, table_id=table_id, **params) return jsonify(result)
def reconcile_index(collection=None): domain = settings.APP_UI_URL.strip('/') label = settings.APP_TITLE suggest_query = [] schemata = list(model) if collection is not None: label = '%s (%s)' % (collection.get('label'), label) suggest_query.append(('filter:collection_id', collection.get('id'))) schemata = [model.get(s) for s in collection.get('schemata').keys()] return jsonify({ 'name': label, 'identifierSpace': 'http://rdf.freebase.com/ns/type.object.id', 'schemaSpace': 'http://rdf.freebase.com/ns/type.object.id', 'view': {'url': entity_url('{{id}}')}, 'preview': { 'url': entity_url('{{id}}'), 'width': 800, 'height': 400 }, 'suggest': { 'entity': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_entity', _query=suggest_query, _authorize=True, _relative=True) }, 'type': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_type', _relative=True) }, 'property': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_property', _relative=True) } }, 'defaultTypes': [get_freebase_type(s) for s in schemata if s.matchable] })
def similar_entities(entity, args, collections): """Merge suggestions API.""" shoulds = [] for term in entity.terms: shoulds.append({ 'multi_match': { "fields": ["name^50", "terms^25", "summary^5"], "query": term, "fuzziness": 2 } }) shoulds.append({ 'multi_match': { "fields": ["name_latin^10", "terms_latin^5", "summary_latin"], "query": latinize_text(term), "fuzziness": 2 } }) q = { "bool": { "should": shoulds, "must_not": { "ids": { "values": [entity.id] } }, "must": { "terms": { "collection_id": collections } }, "minimum_should_match": 1 } } q = { 'size': 10, 'query': authz_filter(q), '_source': DEFAULT_FIELDS } options = [] result = get_es().search(index=get_es_index(), doc_type=TYPE_ENTITY, body=q) for res in result.get('hits', {}).get('hits', []): entity = res.get('_source') entity['id'] = res.get('_id') entity['score'] = res.get('_score') entity['api_url'] = url_for('entities_api.view', id=res.get('_id')) options.append(entity) return { 'results': options }
def records(document_id): document = get_document(document_id) enable_cache(vary_user=True) query = records_query(document.id, request.args) if query is None: return jsonify({"status": "ok", "message": "no query"}) query["size"] = get_limit(default=30) query["from"] = get_offset() result = execute_records_query(query) params = next_params(request.args, result) if params is not None: result["next"] = url_for("search_api.record", document_id=document_id, **params) return jsonify(result)
def index(): # enable_cache(vary_user=True) parser = SearchQueryParser(request.args, request.authz) tag_request(query=parser.text, prefix=parser.prefix) result = EntitiesQuery.handle(request, parser=parser) links = {} if request.authz.logged_in and result.total <= EXPORT_MAX: query = list(request.args.items(multi=True)) links['export'] = url_for('entities_api.export', format='excel', _authorize=True, _query=query) return EntitySerializer.jsonify_result(result, extra={'links': links})
def status(): enable_cache(vary_user=True) return jsonify({ 'logged_in': authz.logged_in(), 'api_key': request.auth_role.api_key if authz.logged_in() else None, 'role': request.auth_role, 'roles': list(request.auth_roles), 'permissions': { 'read': authz.collections(authz.READ), 'write': authz.collections(authz.WRITE) }, 'logout': url_for('.logout') })