def view(document_id): enable_cache() document = get_index_document(document_id) # TODO: should this be it's own API? Probably so, but for that it would # be unclear if we should JSON wrap it, or serve plain with the correct # MIME type? if document.get('type') == Document.TYPE_SCROLL: obj = get_document(document_id) document['text'] = obj.body_text elif document.get('type') == Document.TYPE_HTML: obj = get_document(document_id) document['html'] = sanitize_html(obj.body_raw) return jsonify(document, schema=DocumentSchema)
def records(document_id): enable_cache() document = get_document(document_id) result = RecordsQuery.handle_request(request, document=document, schema=RecordSchema) return jsonify(result)
def table(document_id, table_id): document = get_document(document_id) enable_cache(vary_user=True) try: return jsonify(document.meta.tables[table_id]) except IndexError: raise NotFound("No such table: %s" % table_id)
def record(document_id, index): enable_cache() document = get_document(document_id) record = DocumentRecord.by_index(document.id, index) if record is None: raise NotFound("No such record: %s" % index) return jsonify(record, schema=RecordSchema)
def update(document_id): document = get_document(document_id, request.authz.WRITE) data = parse_request(schema=DocumentSchema) document.update(data) db.session.commit() update_document(document) return view(document_id)
def update(document_id): document = get_document(document_id, action=request.authz.WRITE) data = request_data() document.update(data) db.session.commit() log_event(request, document_id=document.id) update_document(document) return view(document_id)
def record(document_id, index): enable_cache() document = get_document(document_id) if not document.has_records(): raise BadRequest("This document does not have records.") record = DocumentRecord.by_index(document.id, index) if record is None: raise NotFound("No such record: %s" % index) return jsonify(record, schema=RecordSchema)
def records(document_id): enable_cache() document = get_document(document_id) if not document.has_records(): raise BadRequest("This document does not have records.") result = RecordsQuery.handle_request(request, document=document, schema=RecordSchema) return jsonify(result)
def record(document_id, index): document = get_document(document_id) q = db.session.query(DocumentRecord) q = q.filter(DocumentRecord.document_id == document.id) q = q.filter(DocumentRecord.index == index) record = q.first() if record is None: raise NotFound("No such page: %s" % index) enable_cache(server_side=True) return jsonify(record)
def references(document_id): doc = get_document(document_id) q = db.session.query(Reference) q = q.filter(Reference.document_id == doc.id) q = q.filter(Reference.origin == 'regex') q = q.join(Entity) q = q.filter(Entity.state == Entity.STATE_ACTIVE) q = q.filter(Entity.collection_id.in_(request.authz.collections_read)) q = q.order_by(Reference.weight.desc()) return jsonify(Pager(q, document_id=document_id))
def references(document_id): doc = get_document(document_id) q = db.session.query(Reference) q = q.filter(Reference.document_id == doc.id) q = q.join(Entity) q = q.filter(Entity.state == Entity.STATE_ACTIVE) clause = Collection.id.in_(authz.collections(authz.READ)) q = q.filter(Entity.collections.any(clause)) q = q.order_by(Reference.weight.desc()) return jsonify({'results': q.all()})
def update(document_id): document = get_document(document_id) # This is a special requirement for documents, so # they cannot escalate privs: authz.require(authz.collection_write(document.source_collection_id)) data = request_data() document.update(data, writeable=authz.collections(authz.WRITE)) db.session.commit() log_event(request, document_id=document.id) update_document(document) return view(document_id)
def update_collections(document_id): document = get_document(document_id) data = request_data() if not isinstance(data, list) or \ False in [isinstance(d, int) for d in data]: raise BadRequest() document.update_collections(data, writeable=authz.collections(authz.WRITE)) db.session.commit() log_event(request, document_id=document.id) update_document(document) return view_collections(document_id)
def file(document_id): document = get_document(document_id) enable_cache(server_side=True) url = get_archive().generate_url(document.meta) if url is not None: return redirect(url) local_path = get_archive().load_file(document.meta) fh = open(local_path, 'rb') return send_file(fh, as_attachment=True, attachment_filename=document.meta.file_name, mimetype=document.meta.mime_type)
def records(document_id): document = get_document(document_id) enable_cache(vary_user=True) state = QueryState(request.args, request.authz) query = records_query(document.id, state) result = execute_records_query(document.id, state, query) params = next_params(request.args, result) if params is not None: result['next'] = url_for('documents_api.records', document_id=document_id, **params) return jsonify(result)
def pdf(document_id): document = get_document(document_id) enable_cache(server_side=True) if document.type != Document.TYPE_TEXT: raise BadRequest("PDF is only available for text documents") pdf = document.meta.pdf url = get_archive().generate_url(pdf) if url is not None: return redirect(url) local_path = get_archive().load_file(pdf) fh = open(local_path, 'rb') return send_file(fh, mimetype=pdf.mime_type)
def records(document_id): document = get_document(document_id) enable_cache(vary_user=True) query = records_query(document.id, request.args) if query is None: return jsonify({"status": "ok", "message": "no query"}) query["size"] = get_limit(default=30) query["from"] = get_offset() result = execute_records_query(query) params = next_params(request.args, result) if params is not None: result["next"] = url_for("search_api.record", document_id=document_id, **params) return jsonify(result)
def view(document_id): doc = get_document(document_id) enable_cache() data = doc.to_dict() if doc.parent is not None: data['parent'] = doc.parent.to_dict() log_event(request, document_id=doc.id) data['data_url'] = archive.generate_url(doc.content_hash) if data['data_url'] is None: data['data_url'] = url_for('documents_api.file', document_id=document_id) if doc.pdf_version: data['pdf_url'] = url_for('documents_api.pdf', document_id=document_id) return jsonify(data)
def records(document_id): document = get_document(document_id) enable_cache(vary_user=True) query = records_query(document.id, request.args) if query is None: return jsonify({'status': 'ok', 'message': 'no query'}) query['size'] = get_limit(default=30) query['from'] = get_offset() result = execute_records_query(query) params = next_params(request.args, result) if params is not None: result['next'] = url_for('search_api.record', document_id=document_id, **params) return jsonify(result)
def pdf(document_id): document = get_document(document_id) enable_cache(server_side=True) log_event(request, document_id=document.id) if document.type != Document.TYPE_TEXT: raise BadRequest("PDF is only available for text documents") url = archive.generate_url(document.pdf_version, mime_type=PDF_MIME) if url is not None: return redirect(url) path = archive.load_file(document.pdf_version, file_name=document.file_name) if path is None: raise NotFound("Missing PDF file.") return send_file(open(path, 'rb'), mimetype=PDF_MIME)
def file(document_id): document = get_document(document_id) enable_cache(server_side=True) log_event(request, document_id=document.id) url = archive.generate_url(document.meta) if url is not None: return redirect(url) local_path = archive.load_file(document.meta) if not os.path.isfile(local_path): raise NotFound("File does not exist.") fh = open(local_path, 'rb') return send_file(fh, as_attachment=True, attachment_filename=document.meta.file_name, mimetype=document.meta.mime_type)
def view(document_id): doc = get_document(document_id) enable_cache() data = doc.to_dict() data['data_url'] = get_archive().generate_url(doc.meta) if data['data_url'] is None: data['data_url'] = url_for('documents_api.file', document_id=document_id) if doc.meta.is_pdf: data['pdf_url'] = data['data_url'] else: data['pdf_url'] = get_archive().generate_url(doc.meta.pdf) if data['pdf_url'] is None: data['pdf_url'] = url_for('documents_api.pdf', document_id=document_id) data['source'] = doc.source return jsonify(data)
def pdf(document_id): document = get_document(document_id) enable_cache(server_side=True) log_event(request, document_id=document.id) if document.type != Document.TYPE_TEXT: raise BadRequest("PDF is only available for text documents") pdf = document.meta.pdf url = get_archive().generate_url(pdf) if url is not None: return redirect(url) try: local_path = get_archive().load_file(pdf) fh = open(local_path, 'rb') except Exception as ex: raise NotFound("Missing PDF file: %r" % ex) return send_file(fh, mimetype=pdf.mime_type)
def pdf(document_id): document = get_document(document_id) enable_cache(server_side=True) log_event(request, document_id=document.id) if document.type != Document.TYPE_TEXT: raise BadRequest("PDF is only available for text documents") pdf = document.meta.pdf url = archive.generate_url(pdf) if url is not None: return redirect(url) try: local_path = archive.load_file(pdf) fh = open(local_path, 'rb') except Exception as ex: raise NotFound("Missing PDF file: %r" % ex) return send_file(fh, mimetype=pdf.mime_type)
def records(document_id): document = get_document(document_id) enable_cache(vary_user=True) query = records_query(document.id, request.args) if query is None: return jsonify({ 'status': 'ok', 'message': 'no query' }) query['size'] = get_limit(default=30) query['from'] = get_offset() result = execute_records_query(query) params = next_params(request.args, result) if params is not None: result['next'] = url_for('search_api.record', document_id=document_id, **params) return jsonify(result)
def view(document_id): doc = get_document(document_id) enable_cache() data = doc.to_dict() log_event(request, document_id=doc.id) data['data_url'] = archive.generate_url(doc.meta) if data['data_url'] is None: data['data_url'] = url_for('documents_api.file', document_id=document_id) if doc.meta.is_pdf: data['pdf_url'] = data['data_url'] else: try: data['pdf_url'] = archive.generate_url(doc.meta.pdf) except Exception as ex: log.info('Could not generate PDF url: %r', ex) if data.get('pdf_url') is None: data['pdf_url'] = url_for('documents_api.pdf', document_id=document_id) return jsonify(data)
def view(document_id): doc = get_document(document_id) enable_cache() data = doc.to_dict() data['data_url'] = get_archive().generate_url(doc.meta) if data['data_url'] is None: data['data_url'] = url_for('documents_api.file', document_id=document_id) if doc.meta.is_pdf: data['pdf_url'] = data['data_url'] else: try: data['pdf_url'] = get_archive().generate_url(doc.meta.pdf) except Exception as ex: log.info('Could not generate PDF url: %r', ex) if data.get('pdf_url') is None: data['pdf_url'] = url_for('documents_api.pdf', document_id=document_id) data['source'] = doc.source return jsonify(data)
def view(document_id): doc = get_document(document_id) enable_cache() data = doc.to_dict() data['data_url'] = get_archive().generate_url(doc.meta) if data['data_url'] is None: data['data_url'] = url_for('documents_api.file', document_id=document_id) if doc.meta.is_pdf: data['pdf_url'] = data['data_url'] else: try: data['pdf_url'] = get_archive().generate_url(doc.meta.pdf) except Exception as ex: log.info('Could not generate PDF url: %r', ex) if data.get('pdf_url') is None: data['pdf_url'] = url_for('documents_api.pdf', document_id=document_id) data['source'] = doc.source #data['metadata'] = {k:v for k,v in data['metadata'].items() if k in ALLOWED_METADATA} return jsonify(data)
def view_collections(document_id): doc = get_document(document_id) return jsonify(doc.collection_ids)
def delete(document_id): document = get_document(document_id, request.authz.WRITE) delete_document(document) update_collection(document.collection) return jsonify({'status': 'ok'}, status=410)
def file(document_id): document = get_document(document_id) return _serve_archive(document.content_hash, document.file_name, document.mime_type)
def pdf(document_id): document = get_document(document_id) if document.type != Document.TYPE_PDF: raise BadRequest("PDF is only available for text documents") return _serve_archive(document.pdf_version, document.file_name, PDF_MIME)
def view(document_id): doc = get_document(document_id) enable_cache() data = doc.to_dict() data['source'] = doc.source return jsonify(data)