Beispiel #1
0
def add_document_to_bookshelf(
    document_or_uri: t.Union[BaseDocument, DocumentUri],
    category_name: str,
    tags_names: list[str],
    should_add_to_fts: bool,
    database_file: t.PathLike,
):
    """Add the given document to the bookshelf database."""
    document = (
        create_document(document_or_uri)
        if isinstance(document_or_uri, DocumentUri)
        else document_or_uri
    )
    if (existing_doc := Document.get_or_none(uri=document.uri)) is not None:
        log.debug("Document already in the database...")
        if should_add_to_fts:
            log.debug("Checking index...")
            db_page_count = (
                DocumentFTSIndex.select()
                .where(DocumentFTSIndex.document_id == existing_doc.get_id())
                .count()
            )
            if db_page_count == len(document):
                log.debug("Document index is OK")
                return
            else:
                log.debug("Document index is not well formed. Rebuilding index...")
                existing_doc.delete_instance()
Beispiel #2
0
def test_epub_document_section_at_text_position(asset):
    uri = DocumentUri.from_filename(asset("epub30-spec.epub"))
    epub = create_document(uri)
    position_to_section_title = {
        247743: "1.1. Purpose and Scope",
        370161: "3.1.1. HTML5",
        127838: "4.3.2. Metadata ",
        242323: "B.4.1.2. Description",
        17556: "Terminology",
        34564: "2.6. Rendering and CSS",
        349355: "Acknowledgements and Contributors",
        363566: "EPUB 3 Changes from EPUB 2.0.1",
        371108: "3.1.5. Content Switching",
        135534: "4.3.2. Metadata ",
        130440: "4.3.2. Metadata ",
        60425: "2.2. Reading System Conformance",
        49786: "4.6. Scripting",
        278229: "3.5.2. Media Overlays Metadata Vocabulary",
        63656: "3.4.1. The ",
        380720: "4.1.4. Filesystem Container",
        173840: "2.1.3.1.3. Vocabulary Association",
        25363: "1.2. Roadmap",
        114545: "4.2.2. Default Vocabulary",
        9227: "EPUB 3 Specifications - Table of Contents",
    }
    for (text_position, section_title) in position_to_section_title.items():
        section = epub.get_section_at_position(text_position)
        assert section.title == section_title
Beispiel #3
0
def test_serde_toc_tree(asset):
    uri = DocumentUri.from_filename(asset("epub30-spec.epub"))
    epub_document = create_document(uri)

    constructed = load_toc_tree(dump_toc_tree(epub_document.toc_tree))
    assert len(epub_document.toc_tree) == len(constructed)

    compare_pairs = zip(constructed.iter_children(),
                        epub_document.toc_tree.iter_children())
    assert all(t.title == s.title for (t, s) in compare_pairs)
Beispiel #4
0
def test_wire_serde(asset, library):
    uri = DocumentUri.from_filename(asset("epub30-spec.epub"))
    epub_document = create_document(uri)

    serialized = library.dumps(dump_toc_tree(epub_document.toc_tree))
    deserialized = library.loads(serialized)

    constructed = load_toc_tree(deserialized)
    compare_pairs = zip(constructed.iter_children(),
                        epub_document.toc_tree.iter_children())
    assert all(t.title == s.title for (t, s) in compare_pairs)
Beispiel #5
0
def _import_document(category_name, should_add_to_fts, filename):
    try:
        uri = DocumentUri.from_filename(filename)
        with contextlib.closing(create_document(uri)) as document:
            add_document_to_bookshelf(
                document,
                category_name,
                tags_names=(),
                should_add_to_fts=should_add_to_fts,
                database_file=DEFAULT_BOOKSHELF_DATABASE_FILE,
            )
    except:
        return
Beispiel #6
0
def add_to_bookshelf_view():
    data = request.json
    doc_uri = data["document_uri"]
    try:
        document = create_document(DocumentUri.from_uri_string(doc_uri))
    except:
        log.exception(f"Failed to open document: {doc_uri}", exc_info=True)
        abort(400, f"Failed to open document: {doc_uri}")
    else:
        if document.__internal__:
            abort(400, f"Document is an internal document: {doc_uri}")
        else:
            local_bookshelf_process_executor.submit(
                add_document_to_bookshelf,
                document,
                data["category"],
                data["tags"],
                data["should_add_to_fts"],
                data["database_file"],
            )
            return {"status": "OK", "document_uri": doc_uri}
Beispiel #7
0
def test_epub_metadata(asset):
    uri = DocumentUri.from_filename(asset("The Diary of a Nobody.epub"))
    epub = create_document(uri)
    assert epub.metadata.title == "The Diary of a Nobody"
    assert epub.metadata.author == "George Grossmith"