def test_creator_compression(fpath, lipsum_item):
    """make sure we can create ZIM files with various compression algorithms

    also makes sure we're getting different sizes using diffrent alg.
    based on a piece of text that should give different results"""
    filesizes = {}
    for comp in libzim.writer.Compression.__members__.keys():
        fpath_str = fpath.with_name(f"{fpath.name}_{comp}_str.zim")
        with Creator(fpath_str).config_compression(comp) as c:
            c.add_item(lipsum_item)

        fpath_val = fpath.with_name(f"{fpath.name}_{comp}_val.zim")
        comp_val = getattr(libzim.writer.Compression, comp)
        with Creator(fpath_val).config_compression(comp_val) as c:
            c.add_item(lipsum_item)

        assert Archive(fpath_str).checksum
        assert Archive(fpath_str).filesize == Archive(fpath_val).filesize
        filesizes[comp] = Archive(fpath_str).filesize

    for a, b in itertools.combinations(filesizes.keys(), 2):
        assert filesizes[a] != filesizes[b]

    # now don't specify
    with Creator(fpath) as c:
        c.add_item(lipsum_item)

    # default should be zstd
    assert Archive(fpath).filesize == filesizes["zstd"]
Example #2
0
def test_open_badfile(tmpdir):
    fpath = tmpdir / "not-exist.zim"
    with pytest.raises(RuntimeError):
        Archive(fpath)

    fpath = tmpdir / "not-zim.zim"
    with open(fpath, "w") as fh:
        fh.write("text file")
    with pytest.raises(RuntimeError):
        Archive(fpath)
def test_reader_has_index(all_zims, filename, has_fulltext_index,
                          has_title_index):
    zim = Archive(all_zims / filename)

    # we should not get a fulltext index but title should
    assert zim.has_fulltext_index is has_fulltext_index
    assert zim.has_title_index is has_title_index
Example #4
0
def test_reader_by_id(all_zims, filename):
    zim = Archive(all_zims / filename)

    # test index access
    for index in range(0, zim.entry_count - 1):
        assert zim._get_entry_by_id(index)._index == index
        assert zim._get_entry_by_id(index).get_item()._index >= 0
def test_creator_indexing(fpath, lipsum_item, indexing, language, expected):
    fpath = "lolo.zim"
    with Creator(fpath).config_indexing(indexing, language) as c:
        c.add_item(lipsum_item)

    zim = Archive(fpath)
    assert zim.has_fulltext_index == indexing
Example #6
0
def test_content_ref_keep(all_zims):
    """Get the memoryview on a content and loose the reference on the article.
    We try to load a lot of other articles to detect possible use of dandling pointer
    """
    archive = Archive(all_zims / "zimfile.zim")
    content = None

    def get_content():
        nonlocal content
        entry = archive.get_entry_by_path("A/That_Lucky_Old_Sun")
        item = entry.get_item()
        assert isinstance(item.content, memoryview)
        content = item.content

    get_content()  # Now we have a content but no reference to the entry/item.
    gc.collect()
    # Load a lot of content
    for i in range(0, archive.entry_count, 2):
        entry = archive._get_entry_by_id(i)
        if not entry.is_redirect:
            _ = entry.get_item().content
    # Check everything is ok
    assert len(content) == 3559
    assert (
        bytes(content[:100]) == b'<!DOCTYPE html>\n<html class="client-js"><head>\n  '
        b'<meta charset="UTF-8">\n  <title>That Lucky Old Sun<'  # noqa
    )
def test_fileprovider(fpath, lipsum):
    lipsum_fpath = fpath.with_name("lipsum.html")
    with open(lipsum_fpath, "w") as fh:
        for _ in range(0, 10):
            fh.write(lipsum)

    item = StaticItem(path=HOME_PATH,
                      filepath=lipsum_fpath,
                      mimetype="text/html")
    assert HOME_PATH in str(item)
    assert item.get_title() in str(item)

    with Creator(fpath) as c:
        c.add_item(item)

    zim = Archive(fpath)
    with open(lipsum_fpath, "rb") as fh:
        assert bytes(
            zim.get_entry_by_path(HOME_PATH).get_item().content) == fh.read()

    # test feed streaming
    cp = item.get_contentprovider()
    b = cp.feed()
    while b.size():
        assert isinstance(b, Blob)
        b = cp.feed()
def test_creator_metadata_nooverwrite(fpath, lipsum_item):
    with Creator(fpath) as c:
        c.add_item(lipsum_item)
        c.add_metadata("Key", "first")
        # re-setting a value prints a warning and ignore it
        c.add_metadata("Key", "second")
    zim = Archive(fpath)
    assert zim.get_metadata("Key").decode("UTF-8") == "first"
def test_creator_metadata(fpath, lipsum_item):
    metadata = {
        # kiwix-mandatory
        "Name":
        "wikipedia_fr_football",
        "Title":
        "English Wikipedia",
        "Creator":
        "English speaking Wikipedia contributors",
        "Publisher":
        "Wikipedia user Foobar",
        "Date":
        "2009-11-21",
        "Description":
        "All articles (without images) from the english Wikipedia",
        "Language":
        "eng",
        # optional
        "Longdescription":
        ("This ZIM file contains all articles (without images) "
         "from the english Wikipedia by 2009-11-10."
         " The topics are ..."),
        "Licence":
        "CC-BY",
        "Tags":
        "wikipedia;_category:wikipedia;_pictures:no;"
        "_videos:no;_details:yes;_ftindex:yes",
        "Flavour":
        "nopic",
        "Source":
        "https://en.wikipedia.org/",
        "Counter":
        "image/jpeg=5;image/gif=3;image/png=2",
        "Scraper":
        "sotoki 1.2.3",
    }

    # ensure we can't add if not started
    c = Creator(fpath)
    with pytest.raises(RuntimeError, match="not started"):
        key = next(iter(metadata.keys()))
        c.add_metadata(key, metadata.get(key))
    del c

    with Creator(fpath) as c:
        c.add_item(lipsum_item)
        for name, value in metadata.items():
            if name == "Date":
                continue
            c.add_metadata(name, value)

        mdate = datetime.date(
            *[int(x) for x in metadata.get("Date").split("-")])
        c.add_metadata("Date", mdate)

    zim = Archive(fpath)
    for name, value in metadata.items():
        assert zim.get_metadata(name).decode("UTF-8") == value
Example #10
0
def test_reader_checksum(all_zims, filename, has_checksum, is_valid):
    zim = Archive(all_zims / filename)

    # verify checksum
    assert zim.has_checksum is has_checksum
    assert isinstance(zim.checksum, str)
    assert len(zim.checksum) == 32 if has_checksum else 0
    assert zim.checksum != zim.uuid
    assert zim.check() is is_valid
def test_creator_mainpath(fpath, lipsum_item):
    main_path = HOME_PATH
    with Creator(fpath).set_mainpath(main_path) as c:
        c.add_item(lipsum_item)

    zim = Archive(fpath)
    assert zim.has_main_entry is True
    assert zim.main_entry.path == "mainPage"
    assert zim.main_entry.get_item().path == main_path

    fpath.unlink()

    with Creator(fpath) as c:
        c.add_item(lipsum_item)
    zim = Archive(fpath)
    assert zim.has_main_entry is False
    with pytest.raises(RuntimeError):
        assert zim.main_entry
Example #12
0
def test_reader_metadata(
    all_zims, filename, metadata_keys, test_metadata, test_metadata_value
):

    zim = Archive(all_zims / filename)

    # make sure metadata_keys is empty
    assert zim.metadata_keys == metadata_keys
    if test_metadata:
        assert zim.get_metadata(test_metadata).decode("UTF-8") == test_metadata_value
def test_creator_faviconpath(fpath, favicon_data):
    favicon_path = HOME_PATH
    favicon_item = StaticItem(mimetype="image/png",
                              path=favicon_path,
                              content=favicon_data)
    with Creator(fpath).set_faviconpath(favicon_path) as c:
        c.add_item(favicon_item)

    zim = Archive(fpath)
    assert zim.has_favicon_entry is True
    assert zim.favicon_entry.path == "favicon"
    assert zim.favicon_entry.get_item().path == favicon_path

    fpath.unlink()

    with Creator(fpath) as c:
        c.add_item(favicon_item)
    zim = Archive(fpath)
    assert zim.has_favicon_entry is False
    with pytest.raises(RuntimeError):
        assert zim.favicon_entry
Example #14
0
def test_archive_equality(all_zims):
    class Different:
        def __init__(self, filename):
            self.filename = filename

    class Sub(Archive):
        pass

    class Sub2(Archive):
        @property
        def filename(self):
            return 1

    fpath1 = all_zims / "zimfile.zim"
    fpath2 = all_zims / "example.zim"
    zim = Archive(fpath1)

    assert zim != Archive(fpath2)
    assert zim == Archive(fpath1)
    assert zim != Different(fpath1)
    assert zim == Sub(fpath1)
    assert zim != Sub2(fpath1)
Example #15
0
def test_reader_redirect(all_zims, filename, test_redirect, test_redirect_to):
    zim = Archive(all_zims / filename)

    if test_redirect:
        assert zim.get_entry_by_path(test_redirect).is_redirect

        if test_redirect_to:
            target_entry = zim.get_entry_by_path(test_redirect)
            assert target_entry.get_redirect_entry().path == test_redirect_to
            # make sure get_item resolves it
            assert target_entry.get_item().path == test_redirect_to
            # should be last redirect
            assert target_entry.get_redirect_entry().is_redirect is False
            with pytest.raises(RuntimeError):
                target_entry.get_redirect_entry().get_redirect_entry()
def test_stringprovider(fpath, lipsum):
    item = StaticItem(path=HOME_PATH, content=lipsum, mimetype="text/html")
    assert HOME_PATH in str(item)
    assert item.get_title() in str(item)

    with Creator(fpath) as c:
        c.add_item(item)

    zim = Archive(fpath)
    assert bytes(zim.get_entry_by_path(
        HOME_PATH).get_item().content) == lipsum.encode("UTF-8")

    # test feed streaming
    cp = item.get_contentprovider()
    b = cp.feed()
    while b.size():
        assert isinstance(b, Blob)
        b = cp.feed()
Example #17
0
def test_reader_archive(all_zims, filename, filesize, new_ns, mutlipart, zim_uuid):
    fpath = all_zims / filename
    zim = Archive(fpath)

    # check externaly verifiable data
    assert zim.filename == fpath
    assert zim.filesize == os.path.getsize(fpath)
    if filesize is not None:
        assert zim.filesize == filesize
    assert zim.has_new_namespace_scheme is new_ns
    assert zim.is_multipart is mutlipart
    assert str(fpath) in str(zim)

    # ensure uuid is returned
    assert isinstance(zim.uuid, uuid.UUID)
    assert len(zim.uuid.hex) == 32
    if zim_uuid:
        assert zim.uuid.hex == zim_uuid
def test_reader_suggest_search(
    all_zims,
    filename,
    entry_count,
    suggestion_string,
    suggestion_count,
    suggestion_result,
    search_string,
    search_count,
    search_result,
):
    zim = Archive(all_zims / filename)

    # suggestion and search results
    assert zim.entry_count == entry_count
    assert (zim.get_estimated_suggestions_results_count(suggestion_string) ==
            suggestion_count)
    assert list(zim.suggest(suggestion_string)) == suggestion_result
    assert zim.get_estimated_search_results_count(
        search_string) == search_count
    assert list(zim.search(search_string)) == search_result
Example #19
0
def test_reader_get_entries(
    all_zims,
    filename,
    test_path,
    test_title,
    test_mimetype,
    test_size,
    test_content_includes,
):
    zim = Archive(all_zims / filename)

    # entries
    with pytest.raises(KeyError):
        zim.get_entry_by_path("___missing")

    if test_path:
        assert zim.has_entry_by_path(test_path)
        entry = zim.get_entry_by_path(test_path)
        assert entry.title == test_title
        assert entry.path == test_path
        assert test_path in str(entry)
        assert test_title in str(entry)

        item = entry.get_item()
        assert item.title == test_title
        assert item.path == test_path
        assert test_path in str(item)
        assert test_title in str(item)
        assert item.mimetype == test_mimetype
        assert item.size == test_size
        assert isinstance(item.content, memoryview)
        assert test_content_includes in bytes(item.content).decode("UTF-8")

    with pytest.raises(KeyError):
        zim.get_entry_by_title("___missing")

    if test_title:
        assert zim.has_entry_by_title(test_title)
        assert zim.get_entry_by_title(test_title).path == entry.path
def test_creator_redirection(fpath, lipsum_item):
    # ensure we can't add if not started
    c = Creator(fpath)
    with pytest.raises(RuntimeError, match="not started"):
        c.add_redirection("home", "hello", HOME_PATH)
    del c

    with Creator(fpath) as c:
        c.add_item(lipsum_item)
        c.add_redirection("home", "hello", HOME_PATH)
        c.add_redirection("accueil", "bonjour", HOME_PATH)

    zim = Archive(fpath)
    assert zim.entry_count == 3
    assert zim.has_entry_by_path("home") is True
    assert zim.has_entry_by_path("accueil") is True
    assert zim.get_entry_by_path("home").is_redirect
    assert (zim.get_entry_by_path("home").get_redirect_entry().path ==
            zim.get_entry_by_path(HOME_PATH).path)
    assert zim.get_entry_by_path("accueil").get_item().path == HOME_PATH
    assert "home" in list(zim.suggest("hello"))
    assert "accueil" in list(zim.suggest("bonjour"))
def test_reader_main_favicon_entries(all_zims, filename, new_ns,
                                     has_main_entry, has_favicon_entry):
    zim = Archive(all_zims / filename)

    # make sure we have no main entry
    assert zim.has_main_entry is has_main_entry
    if has_main_entry is False:
        with pytest.raises(RuntimeError):
            assert zim.main_entry
    else:
        assert zim.main_entry
        if new_ns:
            assert zim.main_entry.path == "mainPath"

    # make sure we have no favicon entry
    assert zim.has_favicon_entry is has_favicon_entry
    if has_favicon_entry is False:
        with pytest.raises(RuntimeError):
            assert zim.favicon_entry
    else:
        assert zim.favicon_entry
        if new_ns:
            assert zim.favicon_entry.path == "-/favicon"
def test_creator_filename(fpath):
    with Creator(fpath) as c:
        assert c.filename == fpath
    assert Archive(fpath).filename == fpath
Example #23
0
item = TestItem("Monadical_SAS", "Monadical", content)
item2 = TestItem("Monadical_2", "Monadical 2", content2)

zim_file_path = f"kiwix-test-{uuid.uuid1()}.zim"

print(f"Testing writer for {zim_file_path}")
with Creator(zim_file_path).config_indexing(True, "eng").config_minclustersize(
    512
) as zc:
    zc.set_mainpath("Monadical")
    zc.add_item(item)
    zc.add_item(item2)
    for name, value in {
        "creator": "python-libzim",
        "description": "Created in python",
        "name": "Hola",
        "publisher": "Monadical",
        "title": "Test Zim",
    }.items():

        zc.add_metadata(name.title(), value.encode("UTF-8"))


print("Testing reader")
zim = Archive(zim_file_path)
entry = zim.get_entry_by_path("Monadical")
print(f"Main entry is at {zim.main_entry.get_item().path}")
print(f"Entry {entry.title} at {entry.path} is {entry.get_item().size}b:")
print(bytes(entry.get_item().content).decode("UTF-8"))
Example #24
0
# -*- coding:utf-8 -*-

from libzim.reader import Archive

zim = Archive("/home/xiaohe/wikiZIM/wikipedia_zh_all.zim")
print(f"Main entry is at {zim.main_entry.get_item().path}")
entry = zim.get_entry_by_path("性交")
print(f"Entry {entry.title} at {entry.path} is {entry.get_item().size}b:")
print(bytes(entry.get_item().content).decode("UTF-8"))