def test_read_with_mode( mode: str, from_file: bool, tmp_path: Path, data_pattern_locate: Callable[[bytes], Tuple[int, int]], ) -> None: filename: Union[Path, BytesIO] if from_file: filename = tmp_path / "archive.xz" filename.write_bytes(FILE_BYTES) else: filename = BytesIO(FILE_BYTES) if from_file and "x" in mode: with pytest.raises(FileExistsError): XZFile(filename, mode=mode) else: with XZFile(filename, mode=mode) as xzfile: if "r" in mode: assert len(xzfile) == 400 assert data_pattern_locate(xzfile.read(20)) == (0, 20) elif "w" in mode or "x" in mode: assert len(xzfile) == 0 else: with pytest.raises(UnsupportedOperation): xzfile.read(20)
def __init__( self, filename: _LZMAFilenameType, mode: str, *, check: int = -1, preset: _LZMAPresetType = None, filters: _LZMAFiltersType = None, block_read_strategy: Optional[_BlockReadStrategyType] = None, encoding: Optional[str] = None, errors: Optional[str] = None, newline: Optional[str] = None, ) -> None: self.xz_file = XZFile( filename, mode.replace("t", ""), check=check, preset=preset, filters=filters, block_read_strategy=block_read_strategy, ) super().__init__( cast(BinaryIO, self.xz_file), encoding, errors, newline, )
class _XZFileText(TextIOWrapper): def __init__( self, filename: _LZMAFilenameType, mode: str, *, check: int = -1, preset: _LZMAPresetType = None, filters: _LZMAFiltersType = None, block_read_strategy: Optional[_BlockReadStrategyType] = None, encoding: Optional[str] = None, errors: Optional[str] = None, newline: Optional[str] = None, ) -> None: self.xz_file = XZFile( filename, mode.replace("t", ""), check=check, preset=preset, filters=filters, block_read_strategy=block_read_strategy, ) super().__init__( cast(BinaryIO, self.xz_file), encoding, errors, newline, ) check: int = proxy_property("check", "xz_file") preset: _LZMAPresetType = proxy_property("preset", "xz_file") filters: _LZMAFiltersType = proxy_property("filters", "xz_file") stream_boundaries: List[int] = proxy_property("stream_boundaries", "xz_file") block_boundaries: List[int] = proxy_property("block_boundaries", "xz_file") block_read_strategy: _BlockReadStrategyType = proxy_property( "block_read_strategy", "xz_file") @property def mode(self) -> str: return f"{self.xz_file.mode}t" @wraps(XZFile.change_stream) def change_stream(self) -> None: self.flush() self.xz_file.change_stream() @wraps(XZFile.change_block) def change_block(self) -> None: self.flush() self.xz_file.change_block()
def test_required_abilities(mode: str, ability: str, init_has_ability: bool) -> None: fileobj = Mock(wraps=BytesIO(FILE_BYTES)) getattr(fileobj, ability).return_value = init_has_ability expected_ability = (ability == "seekable" or "+" in mode or ((ability == "readable") == ("r" in mode))) if not init_has_ability and expected_ability: with pytest.raises(ValueError): XZFile(fileobj, mode=mode) else: with XZFile(fileobj, mode=mode) as xzfile: assert getattr(xzfile, ability)() == expected_ability assert xzfile.mode == mode.replace("b", "")
def test_read_strategy_calls() -> None: fileobj = BytesIO(FILE_BYTES_MANY_SMALL_BLOCKS) strategy = Mock() with XZFile(fileobj, block_read_strategy=strategy) as xz_file: blocks = [ block # pylint: disable=protected-access for stream in xz_file._fileobjs.values() for block in stream._fileobjs.values() ] # read one byte of each block for i in range(10): xz_file.seek(i * 10 + 2) assert xz_file.read(1) == b"2" assert strategy.method_calls == [ call.on_create(blocks[i]), call.on_read(blocks[i]), ] strategy.method_calls.clear() # read all xz_file.seek(0) xz_file.read() assert strategy.method_calls == [ call_item for i in range(10) for call_item in (call.on_read(blocks[i]), call.on_delete(blocks[i])) ]
def test_change_check() -> None: fileobj = BytesIO() with XZFile(fileobj, "w", check=1) as xzfile: xzfile.write(b"aa") xzfile.change_stream() xzfile.check = 4 xzfile.write(b"bb") xzfile.change_stream() xzfile.write(b"cc") xzfile.change_stream() xzfile.write(b"dd") assert fileobj.getvalue() == bytes.fromhex( # stream 1 "fd377a585a0000016922de36" "0200210116000000742fe5a30100016161000000d7198a07" "00011602d06110d2" "9042990d010000000001595a" # stream 2 "fd377a585a0000016922de36" "0200210116000000742fe5a30100016262000000ae1baeb5" "00011602d06110d2" "9042990d010000000001595a" # stream 3 (changed check) "fd377a585a000004e6d6b446" "0200210116000000742fe5a30100016363000000330d82b4bacc99a6" "00011a02dc2ea57e" "1fb6f37d010000000004595a" # stream 4 (changed check) "fd377a585a000004e6d6b446" "0200210116000000742fe5a301000164640000009265d6d903b6a5a6" "00011a02dc2ea57e" "1fb6f37d010000000004595a")
def test_change_check_on_existing() -> None: fileobj = BytesIO( bytes.fromhex( # stream 1 "fd377a585a0000016922de36" "0200210116000000742fe5a30100016161000000d7198a07" "00011602d06110d2" "9042990d010000000001595a")) with XZFile(fileobj, "r+", check=4) as xzfile: xzfile.seek(0, SEEK_END) xzfile.write(b"bb") xzfile.change_stream() xzfile.write(b"cc") assert fileobj.getvalue() == bytes.fromhex( # stream 1 "fd377a585a0000016922de36" "0200210116000000742fe5a30100016161000000d7198a07" # existing "0200210116000000742fe5a30100016262000000ae1baeb5" # same check "00021602160200008ba0042b" "3e300d8b020000000001595a" # stream 2 (changed check) "fd377a585a000004e6d6b446" "0200210116000000742fe5a30100016363000000330d82b4bacc99a6" "00011a02dc2ea57e" "1fb6f37d010000000004595a")
def test_change_filters_on_existing() -> None: fileobj = BytesIO( bytes.fromhex( # stream 1 "fd377a585a0000016922de36" "0200210116000000742fe5a30100016161000000d7198a07" "00011602d06110d2" "9042990d010000000001595a")) with XZFile(fileobj, "r+", filters=[{ "id": 3, "dist": 1 }, { "id": 33 }]) as xzfile: xzfile.seek(0, SEEK_END) xzfile.write(b"bb") xzfile.change_block() xzfile.write(b"cc") assert fileobj.getvalue() == bytes.fromhex( "fd377a585a0000016922de36" "0200210116000000742fe5a30100016161000000d7198a07" # existing "02010301002101167920c4ee0100016200000000ae1baeb5" # new filters "02010301002101167920c4ee0100016300000000791ab2db" # new filters "0003160216021602c47fe57f" "3e300d8b020000000001595a")
def test_fileno(tmp_path: Path) -> None: file_path = tmp_path / "file.xz" file_path.write_bytes(FILE_BYTES) with file_path.open("rb") as fin: with XZFile(fin) as xzfile: assert xzfile.fileno() == fin.fileno()
def xz_open( filename: _LZMAFilenameType, mode: str = "rb", *, # XZFile kwargs check: int = -1, preset: _LZMAPresetType = None, filters: _LZMAFiltersType = None, block_read_strategy: Optional[_BlockReadStrategyType] = None, # text-mode kwargs encoding: Optional[str] = None, errors: Optional[str] = None, newline: Optional[str] = None, ) -> Union[XZFile, _XZFileText]: """Open an XZ file in binary or text mode. filename can be either an actual file name (given as a str, bytes, or PathLike object), in which case the named file is opened, or it can be an existing file object to read from or write to. For binary mode, this function is equivalent to the XZFile constructor: XZFile(filename, mode, ...). In this case, the encoding, errors and newline arguments must not be provided. For text mode, an XZFile object is created, and wrapped in an io.TextIOWrapper instance with the specified encoding, error handling behavior, and line ending(s). """ if "t" in mode: if "b" in mode: raise ValueError(f"Invalid mode: {mode}") return _XZFileText( filename, mode, check=check, preset=preset, filters=filters, block_read_strategy=block_read_strategy, encoding=encoding, errors=errors, newline=newline, ) if encoding is not None: raise ValueError("Argument 'encoding' not supported in binary mode") if errors is not None: raise ValueError("Argument 'errors' not supported in binary mode") if newline is not None: raise ValueError("Argument 'newline' not supported in binary mode") return XZFile( filename, mode, check=check, preset=preset, filters=filters, block_read_strategy=block_read_strategy, )
def test_change_preset() -> None: fileobj = BytesIO() with XZFile(fileobj, "w", check=1) as xzfile: xzfile.write(b"aa") xzfile.change_block() xzfile.preset = 9 xzfile.write(b"bb") xzfile.change_block() xzfile.write(b"cc") xzfile.change_block() xzfile.write(b"dd") xzfile.change_stream() xzfile.write(b"ee") xzfile.change_block() xzfile.write(b"ff") xzfile.change_stream() xzfile.write(b"gg") xzfile.change_block() xzfile.write(b"hh") assert fileobj.getvalue() == bytes.fromhex( ## stream 1 # header "fd377a585a0000016922de36" # block 1 "0200210116000000742fe5a30100016161000000d7198a07" # block 2 "0200210116000000742fe5a30100016262000000ae1baeb5" # block 3 (changed preset) "020021011c00000010cf58cc0100016363000000791ab2db" # block 4 (changed preset) "020021011c00000010cf58cc01000164640000001d19970a" # index "0004160216021602160200008a2bb83b" # footer "9be35140030000000001595a" ## stream 2 # header "fd377a585a0000016922de36" # block 1 (changed preset) "020021011c00000010cf58cc0100016565000000ca188b64" # block 2 (changed preset) "020021011c00000010cf58cc0100016666000000b31aafd6" # index "00021602160200008ba0042b" # footer "3e300d8b020000000001595a" ## stream 3 # header "fd377a585a0000016922de36" # block 1 (changed preset) "020021011c00000010cf58cc0100016767000000641bb3b8" # block 2 (changed preset) "020021011c00000010cf58cc01000168680000003a1a94af" # index "00021602160200008ba0042b" # footer "3e300d8b020000000001595a")
def test_write_empty(mode: str, start_empty: bool) -> None: filename = BytesIO(b"" if start_empty else FILE_BYTES) with pytest.warns(RuntimeWarning): with XZFile(filename, mode=mode): pass assert filename.getvalue() == b""
def test_fileno_error(tmp_path: Path) -> None: file_path = tmp_path / "file.xz" file_path.write_bytes(FILE_BYTES) with file_path.open("rb") as fin: mock = Mock(wraps=fin) mock.fileno.side_effect = AttributeError() with XZFile(mock) as xzfile: with pytest.raises(UnsupportedOperation): xzfile.fileno()
def test_read_default_strategy(max_block_read_nb: Optional[int]) -> None: fileobj = Mock(wraps=BytesIO(FILE_BYTES_MANY_SMALL_BLOCKS)) max_block_read_nb_ = 8 if max_block_read_nb is None else max_block_read_nb with XZFile( fileobj, block_read_strategy=None if max_block_read_nb is None else RollingBlockReadStrategy(max_block_read_nb), ) as xz_file: fileobj.method_calls.clear() order = list(range(10)) # read one byte of each block for i in order: xz_file.seek(i * 10 + 2) assert xz_file.read(1) == b"2" assert fileobj.method_calls == [ # read whole block at once call.seek(12 + 36 * i, SEEK_SET), call.read(36), ], i fileobj.method_calls.clear() # read next byte of each block in reverse order order.reverse() for i in order: xz_file.seek(i * 10 + 3) assert xz_file.read(1) == b"3" if i < 10 - (max_block_read_nb_): # decompressor has been cleared, read again assert fileobj.method_calls == [ # read whole block at once call.seek(12 + 36 * i, SEEK_SET), call.read(36), ], i fileobj.method_calls.clear() else: # data cached in decompressor, no need to read assert not fileobj.method_calls if max_block_read_nb_ > 1: # test that alternating between two blocks is fast for i in (0, 9): xz_file.seek(i * 10 + 4) assert xz_file.read(1) == b"4" fileobj.method_calls.clear() for i in (0, 9): xz_file.seek(i * 10 + 5) assert xz_file.read(1) == b"5" # data cached in decompressor, no need to read assert not fileobj.method_calls
def test_change_preset_on_existing() -> None: fileobj = BytesIO( bytes.fromhex( # stream 1 "fd377a585a0000016922de36" "0200210116000000742fe5a30100016161000000d7198a07" "00011602d06110d2" "9042990d010000000001595a")) with XZFile(fileobj, "r+", preset=9) as xzfile: xzfile.seek(0, SEEK_END) xzfile.write(b"bb") xzfile.change_block() xzfile.write(b"cc") assert fileobj.getvalue() == bytes.fromhex( "fd377a585a0000016922de36" "0200210116000000742fe5a30100016161000000d7198a07" # existing "020021011c00000010cf58cc0100016262000000ae1baeb5" # new preset "020021011c00000010cf58cc0100016363000000791ab2db" # new preset "0003160216021602c47fe57f" "3e300d8b020000000001595a")
def test_read( filetype: str, tmp_path: Path, data_pattern_locate: Callable[[bytes], Tuple[int, int]], ) -> None: filename: Union[Path, BytesIO, str] if filetype == "fileobj": filename = BytesIO(FILE_BYTES) else: filename = tmp_path / "archive.xz" filename.write_bytes(FILE_BYTES) if filetype == "filename": filename = os.fspath(filename) with XZFile(filename) as xzfile: assert len(xzfile) == 400 assert xzfile.stream_boundaries == [0, 190] assert xzfile.block_boundaries == [0, 100, 190, 250, 310, 370] # read from start assert data_pattern_locate(xzfile.read(20)) == (0, 20) # read from middle of a block xzfile.seek(40) assert data_pattern_locate(xzfile.read(20)) == (40, 20) # read accross two blocks xzfile.seek(90) assert data_pattern_locate(xzfile.read(20)) == (90, 20) # read middle of an other block xzfile.seek(160) assert data_pattern_locate(xzfile.read(20)) == (160, 20) # read accross two streams xzfile.seek(180) assert data_pattern_locate(xzfile.read(20)) == (180, 20) # read middle of an other block xzfile.seek(320) assert data_pattern_locate(xzfile.read(20)) == (320, 20) # read accross two blocks xzfile.seek(360) assert data_pattern_locate(xzfile.read(20)) == (360, 20) # read until the end assert data_pattern_locate(xzfile.read()) == (380, 20) # go backward and read xzfile.seek(210) assert data_pattern_locate(xzfile.read(20)) == (210, 20) # read in previous stream (going backward from last read in that stream) xzfile.seek(60) assert data_pattern_locate(xzfile.read(20)) == (60, 20) # read all xzfile.seek(0) assert data_pattern_locate(xzfile.read()) == (0, 400) # read from pas end assert xzfile.seek(500) == 500 assert xzfile.read() == b""
def test_invalid_mode(mode: str) -> None: filename = BytesIO(FILE_BYTES) with pytest.raises(ValueError) as exc_info: XZFile(filename, mode) assert str(exc_info.value) == f"invalid mode: {mode}"
def test_read_invalid_stream_padding() -> None: filename = BytesIO(FILE_BYTES + b"\x00" * 3) with pytest.raises(XZError) as exc_info: XZFile(filename) assert str(exc_info.value) == "file: invalid size"
def test_read_invalid_filename_type() -> None: with pytest.raises(TypeError) as exc_info: XZFile(42) # type: ignore[arg-type] assert (str(exc_info.value) == "filename must be a str, bytes, file or PathLike object")
def test_read_no_stream(data: bytes) -> None: filename = BytesIO(data) with pytest.raises(XZError) as exc_info: XZFile(filename) assert str(exc_info.value) == "file: no streams"
def test_write_with_mode(mode: str, from_file: bool, file_exists: bool, tmp_path: Path) -> None: initial_data = bytes.fromhex( "fd377a585a000004e6d6b446" # header "0200210116000000742fe5a301000278797a0000f5e0ef978aa11258" # block "00011b030b2fb910" # index "1fb6f37d010000000004595a" # footer ) filename: Union[Path, BytesIO] if from_file: filename = tmp_path / "archive.xz" if file_exists: filename.write_bytes(initial_data) else: if file_exists: filename = BytesIO(initial_data) else: filename = BytesIO() if not file_exists and "r" in mode: if from_file: with pytest.raises(FileNotFoundError): XZFile(filename, mode=mode) else: with pytest.raises(XZError) as exc_info: XZFile(filename, mode=mode) assert str(exc_info.value) == "file: no streams" elif from_file and file_exists and "x" in mode: with pytest.raises(FileExistsError): XZFile(filename, mode=mode) else: expected_success = "r" not in mode or "+" in mode with XZFile(filename, mode=mode) as xzfile: assert xzfile.tell() == 0 if "r" in mode: xzfile.seek(0, SEEK_END) if expected_success: xzfile.write(b"abc") else: with pytest.raises(UnsupportedOperation): xzfile.write(b"abc") if expected_success: if from_file: value = cast(Path, filename).read_bytes() else: value = cast(BytesIO, filename).getvalue() if "r" in mode: expected_value = bytes.fromhex( "fd377a585a000004e6d6b446" # header "0200210116000000742fe5a301000278797a0000f5e0ef978aa11258" # old block "0200210116000000742fe5a301000261626300002776271a4a09d82c" # new block "00021b031b0300000f285259" # index "b1c467fb020000000004595a" # footer ) else: expected_value = bytes.fromhex( "fd377a585a000004e6d6b446" # header "0200210116000000742fe5a301000261626300002776271a4a09d82c" # new block "00011b030b2fb910" # index "1fb6f37d010000000004595a" # footer ) assert value == expected_value
def test_write() -> None: filename = BytesIO() with XZFile(filename, "w") as xzfile: assert len(xzfile) == 0 assert xzfile.stream_boundaries == [] assert xzfile.block_boundaries == [] xzfile.change_stream() # no initial stream change assert len(xzfile) == 0 assert xzfile.stream_boundaries == [] assert xzfile.block_boundaries == [] xzfile.change_block() # no initial block change assert len(xzfile) == 0 assert xzfile.stream_boundaries == [] assert xzfile.block_boundaries == [] xzfile.write(b"abc") assert len(xzfile) == 3 assert xzfile.stream_boundaries == [0] assert xzfile.block_boundaries == [0] xzfile.seek(7) xzfile.write(b"def") assert len(xzfile) == 10 assert xzfile.stream_boundaries == [0] assert xzfile.block_boundaries == [0] xzfile.change_block() assert len(xzfile) == 10 assert xzfile.stream_boundaries == [0] assert xzfile.block_boundaries == [0, 10] xzfile.change_block() # no double block change assert len(xzfile) == 10 assert xzfile.stream_boundaries == [0] assert xzfile.block_boundaries == [0, 10] xzfile.write(b"ghi") assert len(xzfile) == 13 assert xzfile.stream_boundaries == [0] assert xzfile.block_boundaries == [0, 10] xzfile.change_stream() assert len(xzfile) == 13 assert xzfile.stream_boundaries == [0, 13] assert xzfile.block_boundaries == [0, 10] xzfile.change_stream() # no double stream change assert len(xzfile) == 13 assert xzfile.stream_boundaries == [0, 13] assert xzfile.block_boundaries == [0, 10] xzfile.write(b"jkl") assert len(xzfile) == 16 assert xzfile.stream_boundaries == [0, 13] assert xzfile.block_boundaries == [0, 10, 13] assert filename.getvalue() == bytes.fromhex( # stream 1 "fd377a585a000004e6d6b4460200210116000000742fe5a30100096162630000" "0000646566000000b8179b68f9f2cff30200210116000000742fe5a301000267" "686900005d4f3084613135140002220a1b0300001b1c3777b1c467fb02000000" "0004595a" # stream 2 "fd377a585a000004e6d6b4460200210116000000742fe5a30100026a6b6c0000" "2cf7f76df2f5538800011b030b2fb9101fb6f37d010000000004595a")