def calccrc(self, data: bytes): self.nextheadersize = len(data) self.nextheadercrc = calculate_crc32(data) assert self.nextheaderofs is not None buf = io.BytesIO() write_real_uint64(buf, self.nextheaderofs) write_real_uint64(buf, self.nextheadersize) write_uint32(buf, self.nextheadercrc) startdata = buf.getvalue() self.startheadercrc = calculate_crc32(startdata)
def _read(self, file: BinaryIO) -> None: file.seek(len(MAGIC_7Z), 0) self.version = read_bytes(file, 2) self.startheadercrc, _ = read_uint32(file) self.nextheaderofs, data = read_real_uint64(file) crc = calculate_crc32(data) self.nextheadersize, data = read_real_uint64(file) crc = calculate_crc32(data, crc) self.nextheadercrc, data = read_uint32(file) crc = calculate_crc32(data, crc) if crc != self.startheadercrc: raise Bad7zFile('invalid header data')
def calccrc(self, header: Header): buf = io.BytesIO() header.write(buf) data = buf.getvalue() self.nextheadersize = len(data) self.nextheadercrc = calculate_crc32(data) assert self.nextheaderofs is not None buf = io.BytesIO() write_uint64(buf, self.nextheaderofs) write_uint64(buf, self.nextheadersize) write_uint32(buf, self.nextheadercrc) data = buf.getvalue() self.startheadercrc = calculate_crc32(data)
def compress(self, fp: BinaryIO, folder, f: Handler): compressor = folder.get_compressor() length = 0 for indata in f.read(Configuration.get('read_blocksize')): arcdata = compressor.compress(indata) folder.crc = calculate_crc32(arcdata, folder.crc) length += len(arcdata) fp.write(arcdata) arcdata = compressor.flush() folder.crc = calculate_crc32(arcdata, folder.crc) length += len(arcdata) fp.write(arcdata) return length
def compress(self, fp: BinaryIO, folder, f: Handler): """Compress specified file-ish into folder where fp placed.""" compressor = folder.get_compressor() length = 0 for indata in f.read(Configuration.get('read_blocksize')): arcdata = compressor.compress(indata) folder.crc = calculate_crc32(arcdata, folder.crc) length += len(arcdata) fp.write(arcdata) arcdata = compressor.flush() folder.crc = calculate_crc32(arcdata, folder.crc) length += len(arcdata) fp.write(arcdata) return length
def _read(self, file: BinaryIO) -> None: file.seek(len(MAGIC_7Z), 0) major_version = file.read(1) minor_version = file.read(1) self.version = (major_version, minor_version) self.startheadercrc, _ = read_uint32(file) self.nextheaderofs, data = read_real_uint64(file) crc = calculate_crc32(data) self.nextheadersize, data = read_real_uint64(file) crc = calculate_crc32(data, crc) self.nextheadercrc, data = read_uint32(file) crc = calculate_crc32(data, crc) if crc != self.startheadercrc: raise Bad7zFile("invalid header data")
def compress(self, fp: BinaryIO, folder, fq: IO[Any]): """Compress specified file-ish into folder where fp placed.""" compressor = folder.get_compressor() length = 0 for indata in fq.read(READ_BLOCKSIZE): arcdata = compressor.compress(indata) folder.crc = calculate_crc32(arcdata, folder.crc) length += len(arcdata) fp.write(arcdata) arcdata = compressor.flush() folder.crc = calculate_crc32(arcdata, folder.crc) length += len(arcdata) fp.write(arcdata) return length
def decompress(self, fp, max_length: int = -1) -> bytes: if max_length < 0: data = self._read_data(fp) res = self._buf[self._pos:] + self._decompress( self._unused + data, max_length) self._buf = bytearray() self._unused = bytearray() self._pos = 0 else: current_buf_len = len(self._buf) - self._pos if current_buf_len >= max_length: # we already have enough data res = self._buf[self._pos:self._pos + max_length] self._pos += max_length else: data = self._read_data(fp) if len(self._unused) > 0: tmp = self._decompress(self._unused + data, max_length) self._unused = bytearray() else: tmp = self._decompress(data, max_length) if current_buf_len + len(tmp) <= max_length: res = self._buf[self._pos:] + tmp self._buf = bytearray() self._pos = 0 else: res = self._buf[self._pos:] + tmp[:max_length - current_buf_len] self._buf = bytearray(tmp[max_length - current_buf_len:]) self._pos = 0 self.digest = calculate_crc32(res, self.digest) return res
def compress(self, fd, fp, crc=0): data = fd.read(READ_BLOCKSIZE) insize = len(data) foutsize = 0 while data: crc = calculate_crc32(data, crc) for i, compressor in enumerate(self.chain): self._unpacksizes[i] += len(data) data = compressor.compress(data) self.packsize += len(data) self.digest = calculate_crc32(data, self.digest) foutsize += len(data) fp.write(data) data = fd.read(READ_BLOCKSIZE) insize += len(data) return insize, foutsize, crc
def _get_headerdata_from_streams(self, fp: BinaryIO, streams: StreamsInfo) -> BytesIO: """get header data from given streams.unpackinfo and packinfo. folder data are stored in raw data positioned in afterheader.""" buffer = io.BytesIO() src_start = self._start_pos for folder in streams.unpackinfo.folders: if folder.is_encrypted(): raise UnsupportedCompressionMethodError() uncompressed = folder.unpacksizes if not isinstance(uncompressed, (list, tuple)): uncompressed = [uncompressed] * len(folder.coders) compressed_size = streams.packinfo.packsizes[0] uncompressed_size = uncompressed[-1] src_start += streams.packinfo.packpos fp.seek(src_start, 0) decompressor = folder.get_decompressor(compressed_size) folder_data = decompressor.decompress( fp.read(compressed_size))[:uncompressed_size] src_start += uncompressed_size if folder.digestdefined: if folder.crc != calculate_crc32(folder_data): raise Bad7zFile('invalid block data') buffer.write(folder_data) buffer.seek(0, 0) return buffer
def decompress(self, fp, max_length: int = -1) -> bytes: # read data from disk rest_size = self.input_size - self.consumed read_size = min(rest_size, READ_BLOCKSIZE) data = fp.read(read_size) self.consumed += len(data) # if max_length < 0: res = self._buf + self._decompress(self._unused + data, max_length) self._buf = bytearray() self._unused = bytearray() else: current_buf_len = len(self._buf) if current_buf_len >= max_length: self._unused.extend(data) res = self._buf[:max_length] self._buf = self._buf[max_length:] else: if len(self._unused) > 0: tmp = self._decompress(self._unused + data, max_length) self._unused = bytearray() else: tmp = self._decompress(data, max_length) if current_buf_len + len(tmp) <= max_length: res = self._buf + tmp self._buf = bytearray() else: res = self._buf + tmp[:max_length - current_buf_len] self._buf = bytearray(tmp[max_length - current_buf_len:]) self.digest = calculate_crc32(res, self.digest) return res
def _test_digest_raw(self, pos: int, size: int, crc: int) -> bool: self.fp.seek(pos) remaining_size = size digest = None while remaining_size > 0: block = min(READ_BLOCKSIZE, remaining_size) digest = calculate_crc32(self.fp.read(block), digest) remaining_size -= block return digest == crc
def _test_digest_raw(self, pos, size, crc): self.fp.seek(pos) remaining_size = size digest = None while remaining_size > 0: block = min(Configuration.read_blocksize, remaining_size) digest = calculate_crc32(self.fp.read(block), digest) remaining_size -= block return digest == crc
def calccrc(self, length: int, header_crc: int): self.nextheadersize = length self.nextheadercrc = header_crc buf = io.BytesIO() write_real_uint64(buf, self.nextheaderofs) write_real_uint64(buf, self.nextheadersize) write_uint32(buf, self.nextheadercrc) startdata = buf.getvalue() self.startheadercrc = calculate_crc32(startdata)
def decompress(self, data: bytes, max_length: Optional[int] = None) -> bytes: self.consumed += len(data) if max_length is not None: folder_data = self.decompressor.decompress(data, max_length=max_length) else: folder_data = self.decompressor.decompress(data) # calculate CRC with uncompressed data if self.crc is not None: self.digest = calculate_crc32(folder_data, self.digest) return folder_data
def flush(self, fp): data = None for i, compressor in enumerate(self.chain): if data: self._unpacksizes[i] += len(data) data = compressor.compress(data) data += compressor.flush() else: data = compressor.flush() self.packsize += len(data) self.digest = calculate_crc32(data, self.digest) fp.write(data) return len(data)
def _read(self, fp: BinaryIO, buffer: BytesIO, start_pos: int, password) -> None: """ Decode header data or encoded header data from buffer. When buffer consist of encoded buffer, it get stream data from it and call itself recursively """ self._start_pos = start_pos fp.seek(self._start_pos) pid = buffer.read(1) if not pid: # empty archive return if pid == PROPERTY.HEADER: self._extract_header_info(buffer) return if pid != PROPERTY.ENCODED_HEADER: raise TypeError("Unknown field: %r" % id) # pragma: no-cover # get from encoded header streams = HeaderStreamsInfo.retrieve(buffer) buffer2 = io.BytesIO() src_start = self._start_pos for folder in streams.unpackinfo.folders: uncompressed = folder.unpacksizes if not isinstance(uncompressed, (list, tuple)): uncompressed = [uncompressed] * len(folder.coders) compressed_size = streams.packinfo.packsizes[0] uncompressed_size = uncompressed[-1] folder.password = password src_start += streams.packinfo.packpos fp.seek(src_start, 0) decompressor = folder.get_decompressor(compressed_size) remaining = uncompressed_size folder_data = bytearray() while remaining > 0: folder_data += decompressor.decompress(fp, max_length=remaining) remaining = uncompressed_size - len(folder_data) self.size += compressed_size src_start += compressed_size if folder.digestdefined: streams.packinfo.enable_digests = True if folder.crc != calculate_crc32(folder_data): raise Bad7zFile("invalid block data") buffer2.write(folder_data) buffer2.seek(0, 0) pid = buffer2.read(1) if pid != PROPERTY.HEADER: raise TypeError("Unknown field: %r" % pid) # pragma: no-cover self._extract_header_info(buffer2)
def test_simple_compress_and_decompress(): sevenzip_compressor = py7zr.compression.SevenZipCompressor() lzc = sevenzip_compressor.compressor out1 = lzc.compress(b"Some data\n") out2 = lzc.compress(b"Another piece of data\n") out3 = lzc.compress(b"Even more data\n") out4 = lzc.flush() result = b"".join([out1, out2, out3, out4]) size = len(result) # filters = sevenzip_compressor.filters decompressor = lzma.LZMADecompressor(format=lzma.FORMAT_RAW, filters=filters) out5 = decompressor.decompress(result) assert out5 == b'Some data\nAnother piece of data\nEven more data\n' # coders = sevenzip_compressor.coders crc = calculate_crc32(result) decompressor = py7zr.compression.SevenZipDecompressor(coders, size, crc) out6 = decompressor.decompress(result) assert out6 == b'Some data\nAnother piece of data\nEven more data\n'
def write(self, file: BinaryIO, afterheader: int, encoded: bool = True): startpos = file.tell() if encoded: startpos = self._encode_header(file, afterheader) else: write_byte(file, Property.HEADER) # Archive properties if self.main_streams is not None: self.main_streams.write(file) # Files Info if self.files_info is not None: self.files_info.write(file) if self.properties is not None: self.properties.write(file) # AdditionalStreams if self.additional_streams is not None: self.additional_streams.write(file) write_byte(file, Property.END) endpos = file.tell() header_len = endpos - startpos file.seek(startpos, io.SEEK_SET) crc = calculate_crc32(file.read(header_len)) file.seek(endpos, io.SEEK_SET) return startpos, header_len, crc
def _read_header_data(self) -> BytesIO: self.fp.seek(self.sig_header.nextheaderofs, os.SEEK_CUR) buffer = io.BytesIO(self.fp.read(self.sig_header.nextheadersize)) if self.sig_header.nextheadercrc != calculate_crc32(buffer.getvalue()): raise Bad7zFile('invalid header data') return buffer
def write(self, data): self.digest = calculate_crc32(data, self.digest) return self._fp.write(data)
def _write_archive(self): compressor = self.folder.get_compressor() # TODO: support multiple compresssion folder; current single solid folder outsize = 0 self.header.main_streams.packinfo.numstreams = 1 num_unpack_streams = 0 self.header.main_streams.substreamsinfo.digests = [] self.header.main_streams.substreamsinfo.digestsdefined = [] last_file_index = 0 foutsize = 0 for i, f in enumerate(self.files): file_info = f.file_properties() self.header.files_info.files.append(file_info) self.header.files_info.emptyfiles.append(f.emptystream) foutsize = 0 if f.is_symlink: last_file_index = i num_unpack_streams += 1 link_target = pathlib.Path(os.readlink(f.origin)) if str(link_target).startswith('\\\\?\\'): tgt = os.readlink(f.origin).encode('utf-8') else: link_parent = pathlib.Path( os.path.abspath(os.path.dirname(f.origin))) tgt = str( link_target.relative_to(link_parent)).encode('utf-8') insize = len(tgt) crc = calculate_crc32(tgt, 0) out = compressor.compress(tgt) outsize += len(out) foutsize += len(out) self.fp.write(out) self.header.main_streams.substreamsinfo.digests.append(crc) self.header.main_streams.substreamsinfo.digestsdefined.append( True) self.header.main_streams.substreamsinfo.unpacksizes.append( insize) self.header.files_info.files[i]['maxsize'] = foutsize elif not f.emptystream: last_file_index = i num_unpack_streams += 1 insize = 0 with pathlib.Path(f.origin).open(mode='rb') as fd: data = fd.read(Configuration.read_blocksize) insize += len(data) crc = 0 # type: int while data: crc = calculate_crc32(data, crc) out = compressor.compress(data) outsize += len(out) foutsize += len(out) self.fp.write(out) data = fd.read(Configuration.read_blocksize) insize += len(data) self.header.main_streams.substreamsinfo.digests.append(crc) self.header.main_streams.substreamsinfo.digestsdefined.append( True) self.header.files_info.files[i]['maxsize'] = foutsize self.header.main_streams.substreamsinfo.unpacksizes.append( insize) else: out = compressor.flush() outsize += len(out) foutsize += len(out) self.fp.write(out) if len(self.files) > 0: self.header.files_info.files[last_file_index][ 'maxsize'] = foutsize # Update size data in header self.header.main_streams.packinfo.packsizes = [outsize] self.folder.unpacksizes = [ sum(self.header.main_streams.substreamsinfo.unpacksizes) ] self.header.main_streams.substreamsinfo.num_unpackstreams_folders = [ num_unpack_streams ] # Write header (header_pos, header_len, header_crc) = self.header.write(self.fp, self.afterheader, encoded=self.encoded_header_mode) self.sig_header.nextheaderofs = header_pos - self.afterheader self.sig_header.calccrc(header_len, header_crc) self.sig_header.write(self.fp) return
def archive(self, fp: BinaryIO, folder): """Run archive task for specified 7zip folder.""" compressor = folder.get_compressor() outsize = 0 self.header.main_streams.packinfo.numstreams = 1 num_unpack_streams = 0 self.header.main_streams.substreamsinfo.digests = [] self.header.main_streams.substreamsinfo.digestsdefined = [] last_file_index = 0 foutsize = 0 for i, f in enumerate(self.files): file_info = f.file_properties() self.header.files_info.files.append(file_info) self.header.files_info.emptyfiles.append(f.emptystream) foutsize = 0 if f.is_symlink: last_file_index = i num_unpack_streams += 1 dirname = os.path.dirname(f.origin) basename = os.path.basename(f.origin) link_target = readlink(str(pathlib.Path(dirname) / basename)) # type: str tgt = link_target.encode('utf-8') # type: bytes insize = len(tgt) crc = calculate_crc32(tgt, 0) # type: int out = compressor.compress(tgt) outsize += len(out) foutsize += len(out) fp.write(out) self.header.main_streams.substreamsinfo.digests.append(crc) self.header.main_streams.substreamsinfo.digestsdefined.append( True) self.header.main_streams.substreamsinfo.unpacksizes.append( insize) self.header.files_info.files[i]['maxsize'] = foutsize elif not f.emptystream: last_file_index = i num_unpack_streams += 1 insize = 0 with pathlib.Path(f.origin).open(mode='rb') as fd: data = fd.read(READ_BLOCKSIZE) insize += len(data) crc = 0 while data: crc = calculate_crc32(data, crc) out = compressor.compress(data) outsize += len(out) foutsize += len(out) fp.write(out) data = fd.read(READ_BLOCKSIZE) insize += len(data) self.header.main_streams.substreamsinfo.digests.append(crc) self.header.main_streams.substreamsinfo.digestsdefined.append( True) self.header.files_info.files[i]['maxsize'] = foutsize self.header.main_streams.substreamsinfo.unpacksizes.append( insize) else: out = compressor.flush() outsize += len(out) foutsize += len(out) fp.write(out) if len(self.files) > 0: self.header.files_info.files[last_file_index][ 'maxsize'] = foutsize # Update size data in header self.header.main_streams.packinfo.packsizes = [outsize] folder.unpacksizes = [ sum(self.header.main_streams.substreamsinfo.unpacksizes) ] self.header.main_streams.substreamsinfo.num_unpackstreams_folders = [ num_unpack_streams ]