def index(id): collection = get_db_collection(id, request.authz.WRITE) record_audit(Audit.ACT_COLLECTION, id=id) roles = [r for r in Role.all_groups() if check_visible(r, request.authz)] q = Permission.all() q = q.filter(Permission.collection_id == collection.id) permissions = [] for permission in q.all(): if not check_visible(permission.role, request.authz): continue permissions.append(permission) if permission.role in roles: roles.remove(permission.role) # this workaround ensures that all groups are visible for the user to # select in the UI even if they are not currently associated with the # collection. for role in roles: if collection.casefile and role.is_public: continue permissions.append({ 'collection_id': collection.id, 'write': False, 'read': False, 'role_id': str(role.id) }) permissions = PermissionSerializer().serialize_many(permissions) return jsonify({ 'total': len(permissions), 'results': permissions })
def index(collection_id): collection = get_db_collection(collection_id) record_audit(Audit.ACT_COLLECTION, id=collection.id) parser = QueryParser(request.args, request.authz) q = Match.group_by_collection(collection.id, authz=request.authz) result = DatabaseQueryResult(request, q, parser=parser) return MatchCollectionsSerializer.jsonify_result(result)
def similar(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) entity = model.get_proxy(entity) record_audit(Audit.ACT_ENTITY, id=entity_id) result = MatchQuery.handle(request, entity=entity) return EntitySerializer.jsonify_result(result)
def similar(id): enable_cache() entity = get_index_entity(id, request.authz.READ) entity = model.get_proxy(entity) record_audit(Audit.ACT_ENTITY, id=id) result = MatchQuery.handle(request, entity=entity, schema=CombinedSchema) return jsonify(result)
def file(document_id): document = get_db_document(document_id) record_audit(Audit.ACT_ENTITY, id=document_id) resp = _serve_archive(document.content_hash, document.safe_file_name, document.mime_type) return _resp_canonical(resp, document_id)
def triples(collection_id): require(request.authz.can_stream()) log.debug("Stream triples [%r] begins... (coll: %s)", request.authz, collection_id) collection = get_db_collection(collection_id, request.authz.READ) record_audit(Audit.ACT_COLLECTION, id=collection_id) return Response(export_collection(collection), mimetype='text/plain')
def triples(collection_id): require(request.authz.can_stream()) log.debug("Stream triples [%r] begins... (coll: %s)", request.authz, collection_id) collection = get_db_collection(collection_id, request.authz.READ) record_audit(Audit.ACT_COLLECTION, id=collection_id) return Response(export_collection(collection), mimetype='text/plain')
def oauth_callback(): if not settings.OAUTH: abort(404) resp = oauth.provider.authorized_response() if resp is None or isinstance(resp, OAuthException): log.warning("Failed OAuth: %r", resp) return Unauthorized("Authentication has failed.") response = signals.handle_oauth_session.send(provider=oauth.provider, oauth=resp) for (_, role) in response: if role is None: continue db.session.commit() update_role(role) log.info("Logged in: %r", role) request.authz = Authz.from_role(role) record_audit(Audit.ACT_LOGIN) token = request.authz.to_token(role=role) token = token.decode('utf-8') state = request.args.get('state') next_url = get_best_next_url(state, request.referrer) next_url, _ = urldefrag(next_url) next_url = '%s#token=%s' % (next_url, token) return redirect(next_url) log.error("No OAuth handler for %r was installed.", oauth.provider.name) return Unauthorized("Authentication has failed.")
def index(id): collection = get_db_collection(id, request.authz.WRITE) record_audit(Audit.ACT_COLLECTION, id=id) roles = [r for r in Role.all_groups() if check_visible(r, request.authz)] q = Permission.all() q = q.filter(Permission.collection_id == collection.id) permissions = [] for permission in q.all(): if not check_visible(permission.role, request.authz): continue permissions.append(permission) if permission.role in roles: roles.remove(permission.role) # this workaround ensures that all groups are visible for the user to # select in the UI even if they are not currently associated with the # collection. for role in roles: if collection.casefile and role.is_public: continue permissions.append({ 'collection_id': collection.id, 'write': False, 'read': False, 'role_id': str(role.id) }) permissions = PermissionSerializer().serialize_many(permissions) return jsonify({'total': len(permissions), 'results': permissions})
def index(collection_id): collection = get_db_collection(collection_id) record_audit(Audit.ACT_COLLECTION, id=collection.id) parser = QueryParser(request.args, request.authz) q = Match.group_by_collection(collection.id, authz=request.authz) result = DatabaseQueryResult(request, q, parser=parser) return MatchCollectionsSerializer.jsonify_result(result)
def documents(id): enable_cache() entity = get_index_entity(id, request.authz.READ) record_audit(Audit.ACT_ENTITY, id=id) result = EntityDocumentsQuery.handle(request, entity=entity, schema=CombinedSchema) return jsonify(result)
def handle(cls, request, parser=None, **kwargs): if parser is None: parser = SearchQueryParser(request.args, request.authz) # Log the search keys = ['prefix', 'text', 'filters'] record_audit(Audit.ACT_SEARCH, keys=keys, **parser.to_dict()) result = cls(parser, **kwargs).search() return cls.RESULT_CLASS(request, parser, result)
def similar(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) entity = model.get_proxy(entity) record_audit(Audit.ACT_ENTITY, id=entity_id) result = MatchQuery.handle(request, entity=entity) return EntitySerializer.jsonify_result(result)
def handle(cls, request, parser=None, **kwargs): if parser is None: parser = SearchQueryParser(request.args, request.authz) # Log the search keys = ['prefix', 'text', 'filters'] record_audit(Audit.ACT_SEARCH, keys=keys, **parser.to_dict()) result = cls(parser, **kwargs).search() return cls.RESULT_CLASS(request, parser, result)
def match(): entity = parse_request(EntityUpdateSchema) record_audit(Audit.ACT_MATCH, entity=entity) entity = model.get_proxy(entity) collection_ids = request.args.getlist('collection_ids') result = MatchQuery.handle(request, entity=entity, collection_ids=collection_ids) return EntitySerializer.jsonify_result(result)
def matches(collection_id, other_id): collection = get_db_collection(collection_id) record_audit(Audit.ACT_COLLECTION, id=collection.id) other = get_db_collection(other_id) record_audit(Audit.ACT_COLLECTION, id=other.id) parser = QueryParser(request.args, request.authz) q = Match.find_by_collection(collection.id, other.id) result = DatabaseQueryResult(request, q, parser=parser) return MatchSerializer.jsonify_result(result)
def match(): entity = parse_request(EntityUpdateSchema) record_audit(Audit.ACT_MATCH, entity=entity) entity = model.get_proxy(entity) tag_request(schema=entity.schema.name, caption=entity.caption) collection_ids = request.args.getlist('collection_ids') result = MatchQuery.handle(request, entity=entity, collection_ids=collection_ids) return EntitySerializer.jsonify_result(result)
def matches(id, other_id): collection = get_db_collection(id) record_audit(Audit.ACT_COLLECTION, id=collection.id) other = get_db_collection(other_id) record_audit(Audit.ACT_COLLECTION, id=other.id) parser = QueryParser(request.args, request.authz) q = Match.find_by_collection(collection.id, other.id) result = DatabaseQueryResult(request, q, parser=parser, schema=MatchSchema) return jsonify(result)
def pdf(document_id): document = get_db_document(document_id) record_audit(Audit.ACT_ENTITY, id=document_id) if not document.supports_pages: raise BadRequest("PDF is only available for text documents") file_name = document.safe_file_name if document.pdf_version != document.content_hash: file_name = '%s.pdf' % file_name resp = _serve_archive(document.pdf_version, file_name, PDF) return _resp_canonical(resp, document_id)
def records(document_id): enable_cache() document = get_db_document(document_id) record_audit(Audit.ACT_ENTITY, id=document_id) if not document.supports_records: raise BadRequest("This document does not have records.") result = RecordsQuery.handle(request, document=document, schema=RecordSchema) return jsonify(result)
def record(document_id, index): enable_cache() document = get_db_document(document_id) record_audit(Audit.ACT_ENTITY, id=document_id) if not document.supports_records: raise BadRequest("This document does not have records.") record = DocumentRecord.by_index(document.id, index) if record is None: raise NotFound("No such record: %s" % index) return serialize_data(record, RecordSchema)
def match(): enable_cache() entity = parse_request(EntityUpdateSchema) record_audit(Audit.ACT_MATCH, entity=entity) entity = model.get_proxy(entity) collection_ids = request.args.getlist('collection_ids') result = MatchQuery.handle(request, entity=entity, collection_ids=collection_ids, schema=CombinedSchema) return jsonify(result)
def content(document_id): enable_cache() document = get_db_document(document_id) record_audit(Audit.ACT_ENTITY, id=document_id) return jsonify({ 'headers': document.headers, 'text': document.body_text, 'html': sanitize_html(document.body_raw, document.source_url) })
def references(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) record_audit(Audit.ACT_ENTITY, id=entity_id) results = [] for prop, total in entity_references(entity, request.authz): results.append({ 'count': total, 'property': prop, 'schema': prop.schema.name, }) return jsonify({'status': 'ok', 'total': len(results), 'results': results})
def records(document_id=None, collection_id=None): require(request.authz.can_export()) if collection_id is not None: get_db_collection(collection_id, request.authz.READ) record_audit(Audit.ACT_COLLECTION, id=collection_id) elif document_id is not None: get_db_document(document_id) record_audit(Audit.ACT_ENTITY, id=document_id) else: # no authz on records, this means *full* export. require(request.authz.is_admin) records = iter_records(document_id=document_id, collection_id=collection_id) return stream_ijson(records)
def references(id): enable_cache() entity = get_index_entity(id, request.authz.READ) record_audit(Audit.ACT_ENTITY, id=id) results = [] for prop, total in entity_references(entity, request.authz): key = ('filter:properties.%s' % prop.name, id) link = url_for('entities_api.index', _query=(key, )) results.append({ 'count': total, 'property': prop, 'schema': prop.schema.name, 'results': link }) return jsonify({'status': 'ok', 'total': len(results), 'results': results})
def entities(collection_id=None): require(request.authz.can_export()) schemata = ensure_list(request.args.getlist('schema')) excludes = ['text', 'roles', 'fingerprints'] includes = ensure_list(request.args.getlist('include')) includes = [f for f in includes if f not in excludes] if collection_id is not None: get_db_collection(id, request.authz.READ) record_audit(Audit.ACT_COLLECTION, id=id) entities = iter_entities(authz=request.authz, collection_id=collection_id, schemata=schemata, excludes=excludes, includes=includes) return stream_ijson(entities)
def view(document_id): enable_cache() data = get_index_document(document_id) document = get_db_document(document_id) data['headers'] = document.headers # TODO: should this be it's own API? Probably so, but for that it would # be unclear if we should JSON wrap it, or serve plain with the correct # MIME type? if Document.SCHEMA_HTML in document.model.names: data['html'] = sanitize_html(document.body_raw, document.source_url) if Document.SCHEMA_TEXT in document.model.names: data['text'] = document.body_text if Document.SCHEMA_IMAGE in document.model.names: data['text'] = document.body_text record_audit(Audit.ACT_ENTITY, id=document_id) return serialize_data(data, CombinedSchema)
def tags(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) record_audit(Audit.ACT_ENTITY, id=entity_id) results = [] for (field, value, total) in entity_tags(entity, request.authz): qvalue = quote(value.encode('utf-8')) key = ('filter:%s' % field, qvalue) results.append({ 'id': query_string([key]), 'value': value, 'field': field, 'count': total, }) results.sort(key=lambda p: p['count'], reverse=True) return jsonify({'status': 'ok', 'total': len(results), 'results': results})
def entities(collection_id=None): require(request.authz.can_stream()) log.debug("Stream entities [%r] begins... (coll: %s)", request.authz, collection_id) schemata = ensure_list(request.args.getlist('schema')) excludes = ['text', 'roles', 'fingerprints'] includes = ensure_list(request.args.getlist('include')) includes = [f for f in includes if f not in excludes] if collection_id is not None: get_db_collection(collection_id, request.authz.READ) record_audit(Audit.ACT_COLLECTION, id=collection_id) entities = iter_entities(authz=request.authz, collection_id=collection_id, schemata=schemata, excludes=excludes, includes=includes) return stream_ijson(entities)
def references(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) record_audit(Audit.ACT_ENTITY, id=entity_id) results = [] for prop, total in entity_references(entity, request.authz): results.append({ 'count': total, 'property': prop, 'schema': prop.schema.name, }) return jsonify({ 'status': 'ok', 'total': len(results), 'results': results })
def content(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) for entity in entities_by_ids([entity_id], schemata=entity.get('schema'), excludes=['text']): proxy = model.get_proxy(entity) record_audit(Audit.ACT_ENTITY, id=entity_id) html = sanitize_html(proxy.first('bodyHtml', quiet=True), proxy.first('sourceUrl', quiet=True)) headers = proxy.first('headers', quiet=True) headers = registry.json.unpack(headers) return jsonify({ 'headers': headers, 'text': proxy.first('bodyText', quiet=True), 'html': html }) return ('', 404)
def password_login(): """Provides email and password authentication.""" data = parse_request(LoginSchema) role = Role.by_email(data.get('email')) if role is None or not role.has_password: return Unauthorized("Authentication has failed.") if not role.check_password(data.get('password')): return Unauthorized("Authentication has failed.") db.session.commit() update_role(role) authz = Authz.from_role(role) request.authz = authz record_audit(Audit.ACT_LOGIN) return jsonify({ 'status': 'ok', 'token': authz.to_token(role=role) })
def content(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) for entity in entities_by_ids([entity_id], schemata=entity.get('schema'), excludes=['text']): proxy = model.get_proxy(entity) record_audit(Audit.ACT_ENTITY, id=entity_id) html = sanitize_html(proxy.first('bodyHtml', quiet=True), proxy.first('sourceUrl', quiet=True)) headers = proxy.first('headers', quiet=True) headers = registry.json.unpack(headers) return jsonify({ 'headers': headers, 'text': proxy.first('bodyText', quiet=True), 'html': html }) return ('', 404)
def tags(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) tag_request(collection_id=entity.get('collection_id')) record_audit(Audit.ACT_ENTITY, id=entity_id) results = [] for (field, value, total) in entity_tags(entity, request.authz): qvalue = quote(value.encode('utf-8')) key = ('filter:%s' % field, qvalue) results.append({ 'id': query_string([key]), 'value': value, 'field': field, 'count': total, }) results.sort(key=lambda p: p['count'], reverse=True) return jsonify({ 'status': 'ok', 'total': len(results), 'results': results })
def retrieve(): claim = request.args.get('claim') role_id, content_hash, file_name, mime_type = archive_claim(claim) require(request.authz.id == role_id) record_audit(Audit.ACT_ARCHIVE, content_hash=content_hash) tag_request(content_hash=content_hash, file_name=file_name) url = archive.generate_url(content_hash, file_name=file_name, mime_type=mime_type) if url is not None: return redirect(url) try: local_path = archive.load_file(content_hash) if local_path is None: return Response(status=404) return send_file(local_path, as_attachment=True, conditional=True, attachment_filename=file_name, mimetype=mime_type) finally: archive.cleanup_file(content_hash)
def view(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) record_audit(Audit.ACT_ENTITY, id=entity_id) tag_request(collection_id=entity.get('collection_id')) return EntitySerializer.jsonify(entity)
def view(id): collection = get_index_collection(id) record_audit(Audit.ACT_COLLECTION, id=id) return serialize_data(collection, CollectionSchema)
def view(entity_id): enable_cache() entity = get_index_entity(entity_id, request.authz.READ) record_audit(Audit.ACT_ENTITY, id=entity_id) tag_request(collection_id=entity.get('collection_id')) return EntitySerializer.jsonify(entity)
def csv_export(collection_id): collection = get_db_collection(collection_id, request.authz.READ) record_audit(Audit.ACT_COLLECTION, id=collection.id) matches = export_matches_csv(collection.id, request.authz) return stream_csv(stream_with_context(matches))
def csv_export(id): collection = get_db_collection(id, request.authz.READ) record_audit(Audit.ACT_COLLECTION, id=id) matches = export_matches_csv(collection.id, request.authz) return stream_csv(stream_with_context(matches))
def view(id): collection = get_index_collection(id) record_audit(Audit.ACT_COLLECTION, id=id) return CollectionSerializer.jsonify(collection)
def rdf(id): collection = get_db_collection(id, request.authz.READ) record_audit(Audit.ACT_COLLECTION, id=id) return Response(export_collection(collection), mimetype='text/plain')
def view(collection_id): collection = get_index_collection(collection_id) record_audit(Audit.ACT_COLLECTION, id=collection_id) return CollectionSerializer.jsonify(collection)