Beispiel #1
0
def test_reader_by_id(all_zims, filename):
    zim = Archive(all_zims / filename)

    # test index access
    for index in range(0, zim.entry_count - 1):
        assert zim._get_entry_by_id(index)._index == index
        assert zim._get_entry_by_id(index).get_item()._index >= 0
Beispiel #2
0
def test_content_ref_keep(all_zims):
    """Get the memoryview on a content and loose the reference on the article.
    We try to load a lot of other articles to detect possible use of dandling pointer
    """
    archive = Archive(all_zims / "zimfile.zim")
    content = None

    def get_content():
        nonlocal content
        entry = archive.get_entry_by_path("A/That_Lucky_Old_Sun")
        item = entry.get_item()
        assert isinstance(item.content, memoryview)
        content = item.content

    get_content()  # Now we have a content but no reference to the entry/item.
    gc.collect()
    # Load a lot of content
    for i in range(0, archive.entry_count, 2):
        entry = archive._get_entry_by_id(i)
        if not entry.is_redirect:
            _ = entry.get_item().content
    # Check everything is ok
    assert len(content) == 3559
    assert (
        bytes(content[:100]) == b'<!DOCTYPE html>\n<html class="client-js"><head>\n  '
        b'<meta charset="UTF-8">\n  <title>That Lucky Old Sun<'  # noqa
    )
    for name, value in {
            "creator": "python-libzim",
            "description": "Created in python",
            "name": "Hola",
            "publisher": "Monadical",
            "title": "Test Zim",
    }.items():

        zc.add_metadata(name.title(), value.encode("UTF-8"))

import sys
print(sys.getdefaultencoding())

print("Testing reader")
zim = Archive(zim_file_path)
entry = zim.get_entry_by_path("Monadical")
print(f"Main entry is at {zim.main_entry.get_item().path}")
print(f"Entry {entry.title} at {entry.path} is {entry.get_item().size}b:")
# print(bytes(entry.get_item().content).decode("UTF-8"))
print("iterator!!!")
import chardet
for i in range(0, zim.entry_count):
    entry = zim._get_entry_by_id(i)
    if not entry.is_redirect:
        strI = bytes(entry.get_item().content).decode('utf-8')
        # strI = str(bytes(entry.get_item().content),
        #            encoding='unicode-escape')
        charI = chardet.detect(bytes(entry.get_item().content))
        print(charI)
        # print("itemI:`%s`" % strI)
        print(strI)