Ejemplo n.º 1
0
def test_read_with_mode(
    mode: str,
    from_file: bool,
    tmp_path: Path,
    data_pattern_locate: Callable[[bytes], Tuple[int, int]],
) -> None:
    filename: Union[Path, BytesIO]

    if from_file:
        filename = tmp_path / "archive.xz"
        filename.write_bytes(FILE_BYTES)
    else:
        filename = BytesIO(FILE_BYTES)

    if from_file and "x" in mode:
        with pytest.raises(FileExistsError):
            XZFile(filename, mode=mode)

    else:
        with XZFile(filename, mode=mode) as xzfile:
            if "r" in mode:
                assert len(xzfile) == 400
                assert data_pattern_locate(xzfile.read(20)) == (0, 20)
            elif "w" in mode or "x" in mode:
                assert len(xzfile) == 0
            else:
                with pytest.raises(UnsupportedOperation):
                    xzfile.read(20)
Ejemplo n.º 2
0
 def __init__(
     self,
     filename: _LZMAFilenameType,
     mode: str,
     *,
     check: int = -1,
     preset: _LZMAPresetType = None,
     filters: _LZMAFiltersType = None,
     block_read_strategy: Optional[_BlockReadStrategyType] = None,
     encoding: Optional[str] = None,
     errors: Optional[str] = None,
     newline: Optional[str] = None,
 ) -> None:
     self.xz_file = XZFile(
         filename,
         mode.replace("t", ""),
         check=check,
         preset=preset,
         filters=filters,
         block_read_strategy=block_read_strategy,
     )
     super().__init__(
         cast(BinaryIO, self.xz_file),
         encoding,
         errors,
         newline,
     )
Ejemplo n.º 3
0
class _XZFileText(TextIOWrapper):
    def __init__(
        self,
        filename: _LZMAFilenameType,
        mode: str,
        *,
        check: int = -1,
        preset: _LZMAPresetType = None,
        filters: _LZMAFiltersType = None,
        block_read_strategy: Optional[_BlockReadStrategyType] = None,
        encoding: Optional[str] = None,
        errors: Optional[str] = None,
        newline: Optional[str] = None,
    ) -> None:
        self.xz_file = XZFile(
            filename,
            mode.replace("t", ""),
            check=check,
            preset=preset,
            filters=filters,
            block_read_strategy=block_read_strategy,
        )
        super().__init__(
            cast(BinaryIO, self.xz_file),
            encoding,
            errors,
            newline,
        )

    check: int = proxy_property("check", "xz_file")
    preset: _LZMAPresetType = proxy_property("preset", "xz_file")
    filters: _LZMAFiltersType = proxy_property("filters", "xz_file")
    stream_boundaries: List[int] = proxy_property("stream_boundaries",
                                                  "xz_file")
    block_boundaries: List[int] = proxy_property("block_boundaries", "xz_file")
    block_read_strategy: _BlockReadStrategyType = proxy_property(
        "block_read_strategy", "xz_file")

    @property
    def mode(self) -> str:
        return f"{self.xz_file.mode}t"

    @wraps(XZFile.change_stream)
    def change_stream(self) -> None:
        self.flush()
        self.xz_file.change_stream()

    @wraps(XZFile.change_block)
    def change_block(self) -> None:
        self.flush()
        self.xz_file.change_block()
Ejemplo n.º 4
0
def test_required_abilities(mode: str, ability: str,
                            init_has_ability: bool) -> None:
    fileobj = Mock(wraps=BytesIO(FILE_BYTES))
    getattr(fileobj, ability).return_value = init_has_ability

    expected_ability = (ability == "seekable" or "+" in mode
                        or ((ability == "readable") == ("r" in mode)))

    if not init_has_ability and expected_ability:
        with pytest.raises(ValueError):
            XZFile(fileobj, mode=mode)
    else:
        with XZFile(fileobj, mode=mode) as xzfile:
            assert getattr(xzfile, ability)() == expected_ability
            assert xzfile.mode == mode.replace("b", "")
Ejemplo n.º 5
0
def test_read_strategy_calls() -> None:
    fileobj = BytesIO(FILE_BYTES_MANY_SMALL_BLOCKS)

    strategy = Mock()

    with XZFile(fileobj, block_read_strategy=strategy) as xz_file:
        blocks = [
            block
            # pylint: disable=protected-access
            for stream in xz_file._fileobjs.values()
            for block in stream._fileobjs.values()
        ]

        # read one byte of each block
        for i in range(10):
            xz_file.seek(i * 10 + 2)
            assert xz_file.read(1) == b"2"
            assert strategy.method_calls == [
                call.on_create(blocks[i]),
                call.on_read(blocks[i]),
            ]
            strategy.method_calls.clear()

        # read all
        xz_file.seek(0)
        xz_file.read()
        assert strategy.method_calls == [
            call_item
            for i in range(10) for call_item in (call.on_read(blocks[i]),
                                                 call.on_delete(blocks[i]))
        ]
Ejemplo n.º 6
0
def test_change_check() -> None:
    fileobj = BytesIO()

    with XZFile(fileobj, "w", check=1) as xzfile:
        xzfile.write(b"aa")
        xzfile.change_stream()
        xzfile.check = 4
        xzfile.write(b"bb")
        xzfile.change_stream()
        xzfile.write(b"cc")
        xzfile.change_stream()
        xzfile.write(b"dd")

    assert fileobj.getvalue() == bytes.fromhex(
        # stream 1
        "fd377a585a0000016922de36"
        "0200210116000000742fe5a30100016161000000d7198a07"
        "00011602d06110d2"
        "9042990d010000000001595a"
        # stream 2
        "fd377a585a0000016922de36"
        "0200210116000000742fe5a30100016262000000ae1baeb5"
        "00011602d06110d2"
        "9042990d010000000001595a"
        # stream 3 (changed check)
        "fd377a585a000004e6d6b446"
        "0200210116000000742fe5a30100016363000000330d82b4bacc99a6"
        "00011a02dc2ea57e"
        "1fb6f37d010000000004595a"
        # stream 4 (changed check)
        "fd377a585a000004e6d6b446"
        "0200210116000000742fe5a301000164640000009265d6d903b6a5a6"
        "00011a02dc2ea57e"
        "1fb6f37d010000000004595a")
Ejemplo n.º 7
0
def test_change_check_on_existing() -> None:
    fileobj = BytesIO(
        bytes.fromhex(
            # stream 1
            "fd377a585a0000016922de36"
            "0200210116000000742fe5a30100016161000000d7198a07"
            "00011602d06110d2"
            "9042990d010000000001595a"))

    with XZFile(fileobj, "r+", check=4) as xzfile:
        xzfile.seek(0, SEEK_END)
        xzfile.write(b"bb")
        xzfile.change_stream()
        xzfile.write(b"cc")

    assert fileobj.getvalue() == bytes.fromhex(
        # stream 1
        "fd377a585a0000016922de36"
        "0200210116000000742fe5a30100016161000000d7198a07"  # existing
        "0200210116000000742fe5a30100016262000000ae1baeb5"  # same check
        "00021602160200008ba0042b"
        "3e300d8b020000000001595a"
        # stream 2 (changed check)
        "fd377a585a000004e6d6b446"
        "0200210116000000742fe5a30100016363000000330d82b4bacc99a6"
        "00011a02dc2ea57e"
        "1fb6f37d010000000004595a")
Ejemplo n.º 8
0
def test_change_filters_on_existing() -> None:
    fileobj = BytesIO(
        bytes.fromhex(
            # stream 1
            "fd377a585a0000016922de36"
            "0200210116000000742fe5a30100016161000000d7198a07"
            "00011602d06110d2"
            "9042990d010000000001595a"))

    with XZFile(fileobj, "r+", filters=[{
            "id": 3,
            "dist": 1
    }, {
            "id": 33
    }]) as xzfile:
        xzfile.seek(0, SEEK_END)
        xzfile.write(b"bb")
        xzfile.change_block()
        xzfile.write(b"cc")

    assert fileobj.getvalue() == bytes.fromhex(
        "fd377a585a0000016922de36"
        "0200210116000000742fe5a30100016161000000d7198a07"  # existing
        "02010301002101167920c4ee0100016200000000ae1baeb5"  # new filters
        "02010301002101167920c4ee0100016300000000791ab2db"  # new filters
        "0003160216021602c47fe57f"
        "3e300d8b020000000001595a")
Ejemplo n.º 9
0
def test_fileno(tmp_path: Path) -> None:
    file_path = tmp_path / "file.xz"
    file_path.write_bytes(FILE_BYTES)

    with file_path.open("rb") as fin:
        with XZFile(fin) as xzfile:
            assert xzfile.fileno() == fin.fileno()
Ejemplo n.º 10
0
def xz_open(
    filename: _LZMAFilenameType,
    mode: str = "rb",
    *,
    # XZFile kwargs
    check: int = -1,
    preset: _LZMAPresetType = None,
    filters: _LZMAFiltersType = None,
    block_read_strategy: Optional[_BlockReadStrategyType] = None,
    # text-mode kwargs
    encoding: Optional[str] = None,
    errors: Optional[str] = None,
    newline: Optional[str] = None,
) -> Union[XZFile, _XZFileText]:
    """Open an XZ file in binary or text mode.

    filename can be either an actual file name (given as a str, bytes,
    or PathLike object), in which case the named file is opened, or it
    can be an existing file object to read from or write to.

    For binary mode, this function is equivalent to the XZFile
    constructor: XZFile(filename, mode, ...). In this case, the
    encoding, errors and newline arguments must not be provided.

    For text mode, an XZFile object is created, and wrapped in an
    io.TextIOWrapper instance with the specified encoding, error
    handling behavior, and line ending(s).
    """
    if "t" in mode:
        if "b" in mode:
            raise ValueError(f"Invalid mode: {mode}")

        return _XZFileText(
            filename,
            mode,
            check=check,
            preset=preset,
            filters=filters,
            block_read_strategy=block_read_strategy,
            encoding=encoding,
            errors=errors,
            newline=newline,
        )

    if encoding is not None:
        raise ValueError("Argument 'encoding' not supported in binary mode")
    if errors is not None:
        raise ValueError("Argument 'errors' not supported in binary mode")
    if newline is not None:
        raise ValueError("Argument 'newline' not supported in binary mode")

    return XZFile(
        filename,
        mode,
        check=check,
        preset=preset,
        filters=filters,
        block_read_strategy=block_read_strategy,
    )
Ejemplo n.º 11
0
def test_change_preset() -> None:
    fileobj = BytesIO()

    with XZFile(fileobj, "w", check=1) as xzfile:
        xzfile.write(b"aa")
        xzfile.change_block()
        xzfile.preset = 9
        xzfile.write(b"bb")
        xzfile.change_block()
        xzfile.write(b"cc")
        xzfile.change_block()
        xzfile.write(b"dd")
        xzfile.change_stream()
        xzfile.write(b"ee")
        xzfile.change_block()
        xzfile.write(b"ff")
        xzfile.change_stream()
        xzfile.write(b"gg")
        xzfile.change_block()
        xzfile.write(b"hh")

    assert fileobj.getvalue() == bytes.fromhex(
        ## stream 1
        # header
        "fd377a585a0000016922de36"
        # block 1
        "0200210116000000742fe5a30100016161000000d7198a07"
        # block 2
        "0200210116000000742fe5a30100016262000000ae1baeb5"
        # block 3 (changed preset)
        "020021011c00000010cf58cc0100016363000000791ab2db"
        # block 4 (changed preset)
        "020021011c00000010cf58cc01000164640000001d19970a"
        # index
        "0004160216021602160200008a2bb83b"
        # footer
        "9be35140030000000001595a"
        ## stream 2
        # header
        "fd377a585a0000016922de36"
        # block 1 (changed preset)
        "020021011c00000010cf58cc0100016565000000ca188b64"
        # block 2 (changed preset)
        "020021011c00000010cf58cc0100016666000000b31aafd6"
        # index
        "00021602160200008ba0042b"
        # footer
        "3e300d8b020000000001595a"
        ## stream 3
        # header
        "fd377a585a0000016922de36"
        # block 1 (changed preset)
        "020021011c00000010cf58cc0100016767000000641bb3b8"
        # block 2 (changed preset)
        "020021011c00000010cf58cc01000168680000003a1a94af"
        # index
        "00021602160200008ba0042b"
        # footer
        "3e300d8b020000000001595a")
Ejemplo n.º 12
0
def test_write_empty(mode: str, start_empty: bool) -> None:
    filename = BytesIO(b"" if start_empty else FILE_BYTES)

    with pytest.warns(RuntimeWarning):
        with XZFile(filename, mode=mode):
            pass

    assert filename.getvalue() == b""
Ejemplo n.º 13
0
def test_fileno_error(tmp_path: Path) -> None:
    file_path = tmp_path / "file.xz"
    file_path.write_bytes(FILE_BYTES)

    with file_path.open("rb") as fin:
        mock = Mock(wraps=fin)
        mock.fileno.side_effect = AttributeError()
        with XZFile(mock) as xzfile:
            with pytest.raises(UnsupportedOperation):
                xzfile.fileno()
Ejemplo n.º 14
0
def test_read_default_strategy(max_block_read_nb: Optional[int]) -> None:
    fileobj = Mock(wraps=BytesIO(FILE_BYTES_MANY_SMALL_BLOCKS))

    max_block_read_nb_ = 8 if max_block_read_nb is None else max_block_read_nb

    with XZFile(
            fileobj,
            block_read_strategy=None if max_block_read_nb is None else
            RollingBlockReadStrategy(max_block_read_nb),
    ) as xz_file:
        fileobj.method_calls.clear()

        order = list(range(10))

        # read one byte of each block
        for i in order:
            xz_file.seek(i * 10 + 2)
            assert xz_file.read(1) == b"2"
            assert fileobj.method_calls == [
                # read whole block at once
                call.seek(12 + 36 * i, SEEK_SET),
                call.read(36),
            ], i
            fileobj.method_calls.clear()

        # read next byte of each block in reverse order
        order.reverse()
        for i in order:
            xz_file.seek(i * 10 + 3)
            assert xz_file.read(1) == b"3"
            if i < 10 - (max_block_read_nb_):
                # decompressor has been cleared, read again
                assert fileobj.method_calls == [
                    # read whole block at once
                    call.seek(12 + 36 * i, SEEK_SET),
                    call.read(36),
                ], i
                fileobj.method_calls.clear()
            else:
                # data cached in decompressor, no need to read
                assert not fileobj.method_calls

        if max_block_read_nb_ > 1:
            # test that alternating between two blocks is fast
            for i in (0, 9):
                xz_file.seek(i * 10 + 4)
                assert xz_file.read(1) == b"4"
            fileobj.method_calls.clear()
            for i in (0, 9):
                xz_file.seek(i * 10 + 5)
                assert xz_file.read(1) == b"5"
                # data cached in decompressor, no need to read
                assert not fileobj.method_calls
Ejemplo n.º 15
0
def test_change_preset_on_existing() -> None:
    fileobj = BytesIO(
        bytes.fromhex(
            # stream 1
            "fd377a585a0000016922de36"
            "0200210116000000742fe5a30100016161000000d7198a07"
            "00011602d06110d2"
            "9042990d010000000001595a"))

    with XZFile(fileobj, "r+", preset=9) as xzfile:
        xzfile.seek(0, SEEK_END)
        xzfile.write(b"bb")
        xzfile.change_block()
        xzfile.write(b"cc")

    assert fileobj.getvalue() == bytes.fromhex(
        "fd377a585a0000016922de36"
        "0200210116000000742fe5a30100016161000000d7198a07"  # existing
        "020021011c00000010cf58cc0100016262000000ae1baeb5"  # new preset
        "020021011c00000010cf58cc0100016363000000791ab2db"  # new preset
        "0003160216021602c47fe57f"
        "3e300d8b020000000001595a")
Ejemplo n.º 16
0
def test_read(
    filetype: str,
    tmp_path: Path,
    data_pattern_locate: Callable[[bytes], Tuple[int, int]],
) -> None:
    filename: Union[Path, BytesIO, str]

    if filetype == "fileobj":
        filename = BytesIO(FILE_BYTES)
    else:
        filename = tmp_path / "archive.xz"
        filename.write_bytes(FILE_BYTES)
        if filetype == "filename":
            filename = os.fspath(filename)

    with XZFile(filename) as xzfile:
        assert len(xzfile) == 400
        assert xzfile.stream_boundaries == [0, 190]
        assert xzfile.block_boundaries == [0, 100, 190, 250, 310, 370]

        # read from start
        assert data_pattern_locate(xzfile.read(20)) == (0, 20)

        # read from middle of a block
        xzfile.seek(40)
        assert data_pattern_locate(xzfile.read(20)) == (40, 20)

        # read accross two blocks
        xzfile.seek(90)
        assert data_pattern_locate(xzfile.read(20)) == (90, 20)

        # read middle of an other block
        xzfile.seek(160)
        assert data_pattern_locate(xzfile.read(20)) == (160, 20)

        # read accross two streams
        xzfile.seek(180)
        assert data_pattern_locate(xzfile.read(20)) == (180, 20)

        # read middle of an other block
        xzfile.seek(320)
        assert data_pattern_locate(xzfile.read(20)) == (320, 20)

        # read accross two blocks
        xzfile.seek(360)
        assert data_pattern_locate(xzfile.read(20)) == (360, 20)

        # read until the end
        assert data_pattern_locate(xzfile.read()) == (380, 20)

        # go backward and read
        xzfile.seek(210)
        assert data_pattern_locate(xzfile.read(20)) == (210, 20)

        # read in previous stream (going backward from last read in that stream)
        xzfile.seek(60)
        assert data_pattern_locate(xzfile.read(20)) == (60, 20)

        # read all
        xzfile.seek(0)
        assert data_pattern_locate(xzfile.read()) == (0, 400)

        # read from pas end
        assert xzfile.seek(500) == 500
        assert xzfile.read() == b""
Ejemplo n.º 17
0
def test_invalid_mode(mode: str) -> None:
    filename = BytesIO(FILE_BYTES)
    with pytest.raises(ValueError) as exc_info:
        XZFile(filename, mode)
    assert str(exc_info.value) == f"invalid mode: {mode}"
Ejemplo n.º 18
0
def test_read_invalid_stream_padding() -> None:
    filename = BytesIO(FILE_BYTES + b"\x00" * 3)

    with pytest.raises(XZError) as exc_info:
        XZFile(filename)
    assert str(exc_info.value) == "file: invalid size"
Ejemplo n.º 19
0
def test_read_invalid_filename_type() -> None:
    with pytest.raises(TypeError) as exc_info:
        XZFile(42)  # type: ignore[arg-type]
    assert (str(exc_info.value) ==
            "filename must be a str, bytes, file or PathLike object")
Ejemplo n.º 20
0
def test_read_no_stream(data: bytes) -> None:
    filename = BytesIO(data)

    with pytest.raises(XZError) as exc_info:
        XZFile(filename)
    assert str(exc_info.value) == "file: no streams"
Ejemplo n.º 21
0
def test_write_with_mode(mode: str, from_file: bool, file_exists: bool,
                         tmp_path: Path) -> None:
    initial_data = bytes.fromhex(
        "fd377a585a000004e6d6b446"  # header
        "0200210116000000742fe5a301000278797a0000f5e0ef978aa11258"  # block
        "00011b030b2fb910"  # index
        "1fb6f37d010000000004595a"  # footer
    )

    filename: Union[Path, BytesIO]

    if from_file:
        filename = tmp_path / "archive.xz"
        if file_exists:
            filename.write_bytes(initial_data)
    else:
        if file_exists:
            filename = BytesIO(initial_data)
        else:
            filename = BytesIO()

    if not file_exists and "r" in mode:
        if from_file:
            with pytest.raises(FileNotFoundError):
                XZFile(filename, mode=mode)

        else:
            with pytest.raises(XZError) as exc_info:
                XZFile(filename, mode=mode)
            assert str(exc_info.value) == "file: no streams"

    elif from_file and file_exists and "x" in mode:
        with pytest.raises(FileExistsError):
            XZFile(filename, mode=mode)

    else:
        expected_success = "r" not in mode or "+" in mode

        with XZFile(filename, mode=mode) as xzfile:
            assert xzfile.tell() == 0
            if "r" in mode:
                xzfile.seek(0, SEEK_END)

            if expected_success:
                xzfile.write(b"abc")
            else:
                with pytest.raises(UnsupportedOperation):
                    xzfile.write(b"abc")

        if expected_success:
            if from_file:
                value = cast(Path, filename).read_bytes()
            else:
                value = cast(BytesIO, filename).getvalue()
            if "r" in mode:
                expected_value = bytes.fromhex(
                    "fd377a585a000004e6d6b446"  # header
                    "0200210116000000742fe5a301000278797a0000f5e0ef978aa11258"  # old block
                    "0200210116000000742fe5a301000261626300002776271a4a09d82c"  # new block
                    "00021b031b0300000f285259"  # index
                    "b1c467fb020000000004595a"  # footer
                )
            else:
                expected_value = bytes.fromhex(
                    "fd377a585a000004e6d6b446"  # header
                    "0200210116000000742fe5a301000261626300002776271a4a09d82c"  # new block
                    "00011b030b2fb910"  # index
                    "1fb6f37d010000000004595a"  # footer
                )
            assert value == expected_value
Ejemplo n.º 22
0
def test_write() -> None:
    filename = BytesIO()

    with XZFile(filename, "w") as xzfile:
        assert len(xzfile) == 0
        assert xzfile.stream_boundaries == []
        assert xzfile.block_boundaries == []

        xzfile.change_stream()  # no initial stream change
        assert len(xzfile) == 0
        assert xzfile.stream_boundaries == []
        assert xzfile.block_boundaries == []

        xzfile.change_block()  # no initial block change
        assert len(xzfile) == 0
        assert xzfile.stream_boundaries == []
        assert xzfile.block_boundaries == []

        xzfile.write(b"abc")
        assert len(xzfile) == 3
        assert xzfile.stream_boundaries == [0]
        assert xzfile.block_boundaries == [0]

        xzfile.seek(7)
        xzfile.write(b"def")
        assert len(xzfile) == 10
        assert xzfile.stream_boundaries == [0]
        assert xzfile.block_boundaries == [0]

        xzfile.change_block()
        assert len(xzfile) == 10
        assert xzfile.stream_boundaries == [0]
        assert xzfile.block_boundaries == [0, 10]

        xzfile.change_block()  # no double block change
        assert len(xzfile) == 10
        assert xzfile.stream_boundaries == [0]
        assert xzfile.block_boundaries == [0, 10]

        xzfile.write(b"ghi")
        assert len(xzfile) == 13
        assert xzfile.stream_boundaries == [0]
        assert xzfile.block_boundaries == [0, 10]

        xzfile.change_stream()
        assert len(xzfile) == 13
        assert xzfile.stream_boundaries == [0, 13]
        assert xzfile.block_boundaries == [0, 10]

        xzfile.change_stream()  # no double stream change
        assert len(xzfile) == 13
        assert xzfile.stream_boundaries == [0, 13]
        assert xzfile.block_boundaries == [0, 10]

        xzfile.write(b"jkl")
        assert len(xzfile) == 16
        assert xzfile.stream_boundaries == [0, 13]
        assert xzfile.block_boundaries == [0, 10, 13]

    assert filename.getvalue() == bytes.fromhex(
        # stream 1
        "fd377a585a000004e6d6b4460200210116000000742fe5a30100096162630000"
        "0000646566000000b8179b68f9f2cff30200210116000000742fe5a301000267"
        "686900005d4f3084613135140002220a1b0300001b1c3777b1c467fb02000000"
        "0004595a"
        # stream 2
        "fd377a585a000004e6d6b4460200210116000000742fe5a30100026a6b6c0000"
        "2cf7f76df2f5538800011b030b2fb9101fb6f37d010000000004595a")