def records(document_id=None, collection_id=None):
    require(request.authz.can_export())
    if collection_id is not None:
        get_db_collection(collection_id, request.authz.READ)
        record_audit(Audit.ACT_COLLECTION, id=collection_id)
    elif document_id is not None:
        get_db_document(document_id)
        record_audit(Audit.ACT_ENTITY, id=document_id)
    else:
        # no authz on records, this means *full* export.
        require(request.authz.is_admin)
    records = iter_records(document_id=document_id,
                           collection_id=collection_id)
    return stream_ijson(records)
Example #2
0
def file(document_id):
    document = get_db_document(document_id)
    record_audit(Audit.ACT_ENTITY, id=document_id)
    resp = _serve_archive(document.content_hash,
                          document.safe_file_name,
                          document.mime_type)
    return _resp_canonical(resp, document_id)
Example #3
0
def update(document_id):
    document = get_db_document(document_id, request.authz.WRITE)
    data = parse_request(DocumentUpdateSchema)
    document.update(data)
    db.session.commit()
    update_document(document, shallow=True, sync=get_flag('sync', True))
    return view(document_id)
Example #4
0
def pdf(document_id):
    document = get_db_document(document_id)
    if not document.supports_pages:
        raise BadRequest("PDF is only available for text documents")
    file_name = document.safe_file_name
    if document.pdf_version != document.content_hash:
        file_name = '%s.pdf' % file_name
    return _serve_archive(document.pdf_version, file_name, PDF_MIME)
Example #5
0
def update(document_id):
    document = get_db_document(document_id, request.authz.WRITE)
    data = parse_request(DocumentUpdateSchema)
    document.update(data)
    db.session.commit()
    update_document(document)
    update_collection(document.collection)
    return view(document_id)
Example #6
0
def record(document_id, index):
    enable_cache()
    document = get_db_document(document_id)
    if not document.supports_records:
        raise BadRequest("This document does not have records.")
    record = DocumentRecord.by_index(document.id, index)
    if record is None:
        raise NotFound("No such record: %s" % index)
    return jsonify(record, schema=RecordSchema)
Example #7
0
def records(document_id):
    enable_cache()
    document = get_db_document(document_id)
    if not document.supports_records:
        raise BadRequest("This document does not have records.")
    result = RecordsQuery.handle(request,
                                 document=document,
                                 schema=RecordSchema)
    return jsonify(result)
Example #8
0
def record(document_id, index):
    enable_cache()
    document = get_db_document(document_id)
    record_audit(Audit.ACT_ENTITY, id=document_id)
    if not document.supports_records:
        raise BadRequest("This document does not have records.")
    record = DocumentRecord.by_index(document.id, index)
    if record is None:
        raise NotFound("No such record: %s" % index)
    return serialize_data(record, RecordSchema)
Example #9
0
def pdf(document_id):
    document = get_db_document(document_id)
    record_audit(Audit.ACT_ENTITY, id=document_id)
    if not document.supports_pages:
        raise BadRequest("PDF is only available for text documents")
    file_name = document.safe_file_name
    if document.pdf_version != document.content_hash:
        file_name = '%s.pdf' % file_name
    resp = _serve_archive(document.pdf_version, file_name, PDF)
    return _resp_canonical(resp, document_id)
Example #10
0
def content(document_id):
    enable_cache()
    document = get_db_document(document_id)
    record_audit(Audit.ACT_ENTITY, id=document_id)
    return jsonify({
        'headers':
        document.headers,
        'text':
        document.body_text,
        'html':
        sanitize_html(document.body_raw, document.source_url)
    })
Example #11
0
def view(document_id):
    enable_cache()
    data = get_index_document(document_id)
    document = get_db_document(document_id)
    data['headers'] = document.headers
    # TODO: should this be it's own API? Probably so, but for that it would
    # be unclear if we should JSON wrap it, or serve plain with the correct
    # MIME type?
    if Document.SCHEMA_HTML in document.model.names:
        data['html'] = sanitize_html(document.body_raw, document.source_url)
    if Document.SCHEMA_TEXT in document.model.names:
        data['text'] = document.body_text
    return jsonify(data, schema=CombinedSchema)
Example #12
0
def delete(document_id):
    document = get_db_document(document_id, request.authz.WRITE)
    delete_document(document, sync=True)
    return ('', 204)
Example #13
0
def delete(document_id):
    document = get_db_document(document_id, request.authz.WRITE)
    delete_document(document, sync=True)
    return ('', 204)
Example #14
0
def file(document_id):
    document = get_db_document(document_id)
    return _serve_archive(document.content_hash, document.safe_file_name,
                          document.mime_type)
Example #15
0
def delete(document_id):
    document = get_db_document(document_id, request.authz.WRITE)
    delete_document(document)
    update_collection(document.collection)
    return jsonify({'status': 'ok'}, status=410)
Example #16
0
def delete(document_id):
    document = get_db_document(document_id, request.authz.WRITE)
    delete_document(document)
    update_collection(document.collection)
    refresh_index(entities_index())
    return ('', 204)
Example #17
0
def file(document_id):
    document = get_db_document(document_id)
    resp = _serve_archive(document.content_hash, document.safe_file_name,
                          document.mime_type)
    return _resp_canonical(resp, document_id)