Ejemplo n.º 1
0
 def calccrc(self, data: bytes):
     self.nextheadersize = len(data)
     self.nextheadercrc = calculate_crc32(data)
     assert self.nextheaderofs is not None
     buf = io.BytesIO()
     write_real_uint64(buf, self.nextheaderofs)
     write_real_uint64(buf, self.nextheadersize)
     write_uint32(buf, self.nextheadercrc)
     startdata = buf.getvalue()
     self.startheadercrc = calculate_crc32(startdata)
Ejemplo n.º 2
0
 def _read(self, file: BinaryIO) -> None:
     file.seek(len(MAGIC_7Z), 0)
     self.version = read_bytes(file, 2)
     self.startheadercrc, _ = read_uint32(file)
     self.nextheaderofs, data = read_real_uint64(file)
     crc = calculate_crc32(data)
     self.nextheadersize, data = read_real_uint64(file)
     crc = calculate_crc32(data, crc)
     self.nextheadercrc, data = read_uint32(file)
     crc = calculate_crc32(data, crc)
     if crc != self.startheadercrc:
         raise Bad7zFile('invalid header data')
Ejemplo n.º 3
0
 def calccrc(self, header: Header):
     buf = io.BytesIO()
     header.write(buf)
     data = buf.getvalue()
     self.nextheadersize = len(data)
     self.nextheadercrc = calculate_crc32(data)
     assert self.nextheaderofs is not None
     buf = io.BytesIO()
     write_uint64(buf, self.nextheaderofs)
     write_uint64(buf, self.nextheadersize)
     write_uint32(buf, self.nextheadercrc)
     data = buf.getvalue()
     self.startheadercrc = calculate_crc32(data)
Ejemplo n.º 4
0
 def compress(self, fp: BinaryIO, folder, f: Handler):
     compressor = folder.get_compressor()
     length = 0
     for indata in f.read(Configuration.get('read_blocksize')):
         arcdata = compressor.compress(indata)
         folder.crc = calculate_crc32(arcdata, folder.crc)
         length += len(arcdata)
         fp.write(arcdata)
     arcdata = compressor.flush()
     folder.crc = calculate_crc32(arcdata, folder.crc)
     length += len(arcdata)
     fp.write(arcdata)
     return length
Ejemplo n.º 5
0
 def compress(self, fp: BinaryIO, folder, f: Handler):
     """Compress specified file-ish into folder where fp placed."""
     compressor = folder.get_compressor()
     length = 0
     for indata in f.read(Configuration.get('read_blocksize')):
         arcdata = compressor.compress(indata)
         folder.crc = calculate_crc32(arcdata, folder.crc)
         length += len(arcdata)
         fp.write(arcdata)
     arcdata = compressor.flush()
     folder.crc = calculate_crc32(arcdata, folder.crc)
     length += len(arcdata)
     fp.write(arcdata)
     return length
Ejemplo n.º 6
0
 def _read(self, file: BinaryIO) -> None:
     file.seek(len(MAGIC_7Z), 0)
     major_version = file.read(1)
     minor_version = file.read(1)
     self.version = (major_version, minor_version)
     self.startheadercrc, _ = read_uint32(file)
     self.nextheaderofs, data = read_real_uint64(file)
     crc = calculate_crc32(data)
     self.nextheadersize, data = read_real_uint64(file)
     crc = calculate_crc32(data, crc)
     self.nextheadercrc, data = read_uint32(file)
     crc = calculate_crc32(data, crc)
     if crc != self.startheadercrc:
         raise Bad7zFile("invalid header data")
Ejemplo n.º 7
0
 def compress(self, fp: BinaryIO, folder, fq: IO[Any]):
     """Compress specified file-ish into folder where fp placed."""
     compressor = folder.get_compressor()
     length = 0
     for indata in fq.read(READ_BLOCKSIZE):
         arcdata = compressor.compress(indata)
         folder.crc = calculate_crc32(arcdata, folder.crc)
         length += len(arcdata)
         fp.write(arcdata)
     arcdata = compressor.flush()
     folder.crc = calculate_crc32(arcdata, folder.crc)
     length += len(arcdata)
     fp.write(arcdata)
     return length
Ejemplo n.º 8
0
 def decompress(self, fp, max_length: int = -1) -> bytes:
     if max_length < 0:
         data = self._read_data(fp)
         res = self._buf[self._pos:] + self._decompress(
             self._unused + data, max_length)
         self._buf = bytearray()
         self._unused = bytearray()
         self._pos = 0
     else:
         current_buf_len = len(self._buf) - self._pos
         if current_buf_len >= max_length:  # we already have enough data
             res = self._buf[self._pos:self._pos + max_length]
             self._pos += max_length
         else:
             data = self._read_data(fp)
             if len(self._unused) > 0:
                 tmp = self._decompress(self._unused + data, max_length)
                 self._unused = bytearray()
             else:
                 tmp = self._decompress(data, max_length)
             if current_buf_len + len(tmp) <= max_length:
                 res = self._buf[self._pos:] + tmp
                 self._buf = bytearray()
                 self._pos = 0
             else:
                 res = self._buf[self._pos:] + tmp[:max_length -
                                                   current_buf_len]
                 self._buf = bytearray(tmp[max_length - current_buf_len:])
                 self._pos = 0
     self.digest = calculate_crc32(res, self.digest)
     return res
Ejemplo n.º 9
0
 def compress(self, fd, fp, crc=0):
     data = fd.read(READ_BLOCKSIZE)
     insize = len(data)
     foutsize = 0
     while data:
         crc = calculate_crc32(data, crc)
         for i, compressor in enumerate(self.chain):
             self._unpacksizes[i] += len(data)
             data = compressor.compress(data)
         self.packsize += len(data)
         self.digest = calculate_crc32(data, self.digest)
         foutsize += len(data)
         fp.write(data)
         data = fd.read(READ_BLOCKSIZE)
         insize += len(data)
     return insize, foutsize, crc
Ejemplo n.º 10
0
    def _get_headerdata_from_streams(self, fp: BinaryIO,
                                     streams: StreamsInfo) -> BytesIO:
        """get header data from given streams.unpackinfo and packinfo.
        folder data are stored in raw data positioned in afterheader."""
        buffer = io.BytesIO()
        src_start = self._start_pos
        for folder in streams.unpackinfo.folders:
            if folder.is_encrypted():
                raise UnsupportedCompressionMethodError()

            uncompressed = folder.unpacksizes
            if not isinstance(uncompressed, (list, tuple)):
                uncompressed = [uncompressed] * len(folder.coders)
            compressed_size = streams.packinfo.packsizes[0]
            uncompressed_size = uncompressed[-1]

            src_start += streams.packinfo.packpos
            fp.seek(src_start, 0)
            decompressor = folder.get_decompressor(compressed_size)
            folder_data = decompressor.decompress(
                fp.read(compressed_size))[:uncompressed_size]
            src_start += uncompressed_size
            if folder.digestdefined:
                if folder.crc != calculate_crc32(folder_data):
                    raise Bad7zFile('invalid block data')
            buffer.write(folder_data)
        buffer.seek(0, 0)
        return buffer
Ejemplo n.º 11
0
 def decompress(self, fp, max_length: int = -1) -> bytes:
     # read data from disk
     rest_size = self.input_size - self.consumed
     read_size = min(rest_size, READ_BLOCKSIZE)
     data = fp.read(read_size)
     self.consumed += len(data)
     #
     if max_length < 0:
         res = self._buf + self._decompress(self._unused + data, max_length)
         self._buf = bytearray()
         self._unused = bytearray()
     else:
         current_buf_len = len(self._buf)
         if current_buf_len >= max_length:
             self._unused.extend(data)
             res = self._buf[:max_length]
             self._buf = self._buf[max_length:]
         else:
             if len(self._unused) > 0:
                 tmp = self._decompress(self._unused + data, max_length)
                 self._unused = bytearray()
             else:
                 tmp = self._decompress(data, max_length)
             if current_buf_len + len(tmp) <= max_length:
                 res = self._buf + tmp
                 self._buf = bytearray()
             else:
                 res = self._buf + tmp[:max_length - current_buf_len]
                 self._buf = bytearray(tmp[max_length - current_buf_len:])
     self.digest = calculate_crc32(res, self.digest)
     return res
Ejemplo n.º 12
0
 def _test_digest_raw(self, pos: int, size: int, crc: int) -> bool:
     self.fp.seek(pos)
     remaining_size = size
     digest = None
     while remaining_size > 0:
         block = min(READ_BLOCKSIZE, remaining_size)
         digest = calculate_crc32(self.fp.read(block), digest)
         remaining_size -= block
     return digest == crc
Ejemplo n.º 13
0
 def _test_digest_raw(self, pos, size, crc):
     self.fp.seek(pos)
     remaining_size = size
     digest = None
     while remaining_size > 0:
         block = min(Configuration.read_blocksize, remaining_size)
         digest = calculate_crc32(self.fp.read(block), digest)
         remaining_size -= block
     return digest == crc
Ejemplo n.º 14
0
 def calccrc(self, length: int, header_crc: int):
     self.nextheadersize = length
     self.nextheadercrc = header_crc
     buf = io.BytesIO()
     write_real_uint64(buf, self.nextheaderofs)
     write_real_uint64(buf, self.nextheadersize)
     write_uint32(buf, self.nextheadercrc)
     startdata = buf.getvalue()
     self.startheadercrc = calculate_crc32(startdata)
Ejemplo n.º 15
0
 def decompress(self, data: bytes, max_length: Optional[int] = None) -> bytes:
     self.consumed += len(data)
     if max_length is not None:
         folder_data = self.decompressor.decompress(data, max_length=max_length)
     else:
         folder_data = self.decompressor.decompress(data)
     # calculate CRC with uncompressed data
     if self.crc is not None:
         self.digest = calculate_crc32(folder_data, self.digest)
     return folder_data
Ejemplo n.º 16
0
 def flush(self, fp):
     data = None
     for i, compressor in enumerate(self.chain):
         if data:
             self._unpacksizes[i] += len(data)
             data = compressor.compress(data)
             data += compressor.flush()
         else:
             data = compressor.flush()
     self.packsize += len(data)
     self.digest = calculate_crc32(data, self.digest)
     fp.write(data)
     return len(data)
Ejemplo n.º 17
0
 def _read(self, fp: BinaryIO, buffer: BytesIO, start_pos: int,
           password) -> None:
     """
     Decode header data or encoded header data from buffer.
     When buffer consist of encoded buffer, it get stream data
     from it and call itself recursively
     """
     self._start_pos = start_pos
     fp.seek(self._start_pos)
     pid = buffer.read(1)
     if not pid:
         # empty archive
         return
     if pid == PROPERTY.HEADER:
         self._extract_header_info(buffer)
         return
     if pid != PROPERTY.ENCODED_HEADER:
         raise TypeError("Unknown field: %r" % id)  # pragma: no-cover
     # get from encoded header
     streams = HeaderStreamsInfo.retrieve(buffer)
     buffer2 = io.BytesIO()
     src_start = self._start_pos
     for folder in streams.unpackinfo.folders:
         uncompressed = folder.unpacksizes
         if not isinstance(uncompressed, (list, tuple)):
             uncompressed = [uncompressed] * len(folder.coders)
         compressed_size = streams.packinfo.packsizes[0]
         uncompressed_size = uncompressed[-1]
         folder.password = password
         src_start += streams.packinfo.packpos
         fp.seek(src_start, 0)
         decompressor = folder.get_decompressor(compressed_size)
         remaining = uncompressed_size
         folder_data = bytearray()
         while remaining > 0:
             folder_data += decompressor.decompress(fp,
                                                    max_length=remaining)
             remaining = uncompressed_size - len(folder_data)
         self.size += compressed_size
         src_start += compressed_size
         if folder.digestdefined:
             streams.packinfo.enable_digests = True
             if folder.crc != calculate_crc32(folder_data):
                 raise Bad7zFile("invalid block data")
         buffer2.write(folder_data)
     buffer2.seek(0, 0)
     pid = buffer2.read(1)
     if pid != PROPERTY.HEADER:
         raise TypeError("Unknown field: %r" % pid)  # pragma: no-cover
     self._extract_header_info(buffer2)
Ejemplo n.º 18
0
def test_simple_compress_and_decompress():
    sevenzip_compressor = py7zr.compression.SevenZipCompressor()
    lzc = sevenzip_compressor.compressor
    out1 = lzc.compress(b"Some data\n")
    out2 = lzc.compress(b"Another piece of data\n")
    out3 = lzc.compress(b"Even more data\n")
    out4 = lzc.flush()
    result = b"".join([out1, out2, out3, out4])
    size = len(result)
    #
    filters = sevenzip_compressor.filters
    decompressor = lzma.LZMADecompressor(format=lzma.FORMAT_RAW, filters=filters)
    out5 = decompressor.decompress(result)
    assert out5 == b'Some data\nAnother piece of data\nEven more data\n'
    #
    coders = sevenzip_compressor.coders
    crc = calculate_crc32(result)
    decompressor = py7zr.compression.SevenZipDecompressor(coders, size, crc)
    out6 = decompressor.decompress(result)
    assert out6 == b'Some data\nAnother piece of data\nEven more data\n'
Ejemplo n.º 19
0
 def write(self, file: BinaryIO, afterheader: int, encoded: bool = True):
     startpos = file.tell()
     if encoded:
         startpos = self._encode_header(file, afterheader)
     else:
         write_byte(file, Property.HEADER)
         # Archive properties
         if self.main_streams is not None:
             self.main_streams.write(file)
         # Files Info
         if self.files_info is not None:
             self.files_info.write(file)
         if self.properties is not None:
             self.properties.write(file)
         # AdditionalStreams
         if self.additional_streams is not None:
             self.additional_streams.write(file)
         write_byte(file, Property.END)
     endpos = file.tell()
     header_len = endpos - startpos
     file.seek(startpos, io.SEEK_SET)
     crc = calculate_crc32(file.read(header_len))
     file.seek(endpos, io.SEEK_SET)
     return startpos, header_len, crc
Ejemplo n.º 20
0
 def _read_header_data(self) -> BytesIO:
     self.fp.seek(self.sig_header.nextheaderofs, os.SEEK_CUR)
     buffer = io.BytesIO(self.fp.read(self.sig_header.nextheadersize))
     if self.sig_header.nextheadercrc != calculate_crc32(buffer.getvalue()):
         raise Bad7zFile('invalid header data')
     return buffer
Ejemplo n.º 21
0
 def write(self, data):
     self.digest = calculate_crc32(data, self.digest)
     return self._fp.write(data)
Ejemplo n.º 22
0
    def _write_archive(self):
        compressor = self.folder.get_compressor()
        # TODO: support multiple compresssion folder; current single solid folder
        outsize = 0
        self.header.main_streams.packinfo.numstreams = 1
        num_unpack_streams = 0
        self.header.main_streams.substreamsinfo.digests = []
        self.header.main_streams.substreamsinfo.digestsdefined = []
        last_file_index = 0
        foutsize = 0
        for i, f in enumerate(self.files):
            file_info = f.file_properties()
            self.header.files_info.files.append(file_info)
            self.header.files_info.emptyfiles.append(f.emptystream)
            foutsize = 0
            if f.is_symlink:
                last_file_index = i
                num_unpack_streams += 1
                link_target = pathlib.Path(os.readlink(f.origin))
                if str(link_target).startswith('\\\\?\\'):
                    tgt = os.readlink(f.origin).encode('utf-8')
                else:
                    link_parent = pathlib.Path(
                        os.path.abspath(os.path.dirname(f.origin)))
                    tgt = str(
                        link_target.relative_to(link_parent)).encode('utf-8')
                insize = len(tgt)
                crc = calculate_crc32(tgt, 0)
                out = compressor.compress(tgt)
                outsize += len(out)
                foutsize += len(out)
                self.fp.write(out)
                self.header.main_streams.substreamsinfo.digests.append(crc)
                self.header.main_streams.substreamsinfo.digestsdefined.append(
                    True)
                self.header.main_streams.substreamsinfo.unpacksizes.append(
                    insize)
                self.header.files_info.files[i]['maxsize'] = foutsize
            elif not f.emptystream:
                last_file_index = i
                num_unpack_streams += 1
                insize = 0
                with pathlib.Path(f.origin).open(mode='rb') as fd:
                    data = fd.read(Configuration.read_blocksize)
                    insize += len(data)
                    crc = 0  # type: int
                    while data:
                        crc = calculate_crc32(data, crc)
                        out = compressor.compress(data)
                        outsize += len(out)
                        foutsize += len(out)
                        self.fp.write(out)
                        data = fd.read(Configuration.read_blocksize)
                        insize += len(data)
                    self.header.main_streams.substreamsinfo.digests.append(crc)
                    self.header.main_streams.substreamsinfo.digestsdefined.append(
                        True)
                    self.header.files_info.files[i]['maxsize'] = foutsize
                self.header.main_streams.substreamsinfo.unpacksizes.append(
                    insize)

        else:
            out = compressor.flush()
            outsize += len(out)
            foutsize += len(out)
            self.fp.write(out)
            if len(self.files) > 0:
                self.header.files_info.files[last_file_index][
                    'maxsize'] = foutsize
        # Update size data in header
        self.header.main_streams.packinfo.packsizes = [outsize]
        self.folder.unpacksizes = [
            sum(self.header.main_streams.substreamsinfo.unpacksizes)
        ]
        self.header.main_streams.substreamsinfo.num_unpackstreams_folders = [
            num_unpack_streams
        ]
        # Write header
        (header_pos, header_len,
         header_crc) = self.header.write(self.fp,
                                         self.afterheader,
                                         encoded=self.encoded_header_mode)
        self.sig_header.nextheaderofs = header_pos - self.afterheader
        self.sig_header.calccrc(header_len, header_crc)
        self.sig_header.write(self.fp)
        return
Ejemplo n.º 23
0
 def archive(self, fp: BinaryIO, folder):
     """Run archive task for specified 7zip folder."""
     compressor = folder.get_compressor()
     outsize = 0
     self.header.main_streams.packinfo.numstreams = 1
     num_unpack_streams = 0
     self.header.main_streams.substreamsinfo.digests = []
     self.header.main_streams.substreamsinfo.digestsdefined = []
     last_file_index = 0
     foutsize = 0
     for i, f in enumerate(self.files):
         file_info = f.file_properties()
         self.header.files_info.files.append(file_info)
         self.header.files_info.emptyfiles.append(f.emptystream)
         foutsize = 0
         if f.is_symlink:
             last_file_index = i
             num_unpack_streams += 1
             dirname = os.path.dirname(f.origin)
             basename = os.path.basename(f.origin)
             link_target = readlink(str(pathlib.Path(dirname) /
                                        basename))  # type: str
             tgt = link_target.encode('utf-8')  # type: bytes
             insize = len(tgt)
             crc = calculate_crc32(tgt, 0)  # type: int
             out = compressor.compress(tgt)
             outsize += len(out)
             foutsize += len(out)
             fp.write(out)
             self.header.main_streams.substreamsinfo.digests.append(crc)
             self.header.main_streams.substreamsinfo.digestsdefined.append(
                 True)
             self.header.main_streams.substreamsinfo.unpacksizes.append(
                 insize)
             self.header.files_info.files[i]['maxsize'] = foutsize
         elif not f.emptystream:
             last_file_index = i
             num_unpack_streams += 1
             insize = 0
             with pathlib.Path(f.origin).open(mode='rb') as fd:
                 data = fd.read(READ_BLOCKSIZE)
                 insize += len(data)
                 crc = 0
                 while data:
                     crc = calculate_crc32(data, crc)
                     out = compressor.compress(data)
                     outsize += len(out)
                     foutsize += len(out)
                     fp.write(out)
                     data = fd.read(READ_BLOCKSIZE)
                     insize += len(data)
                 self.header.main_streams.substreamsinfo.digests.append(crc)
                 self.header.main_streams.substreamsinfo.digestsdefined.append(
                     True)
                 self.header.files_info.files[i]['maxsize'] = foutsize
             self.header.main_streams.substreamsinfo.unpacksizes.append(
                 insize)
     else:
         out = compressor.flush()
         outsize += len(out)
         foutsize += len(out)
         fp.write(out)
         if len(self.files) > 0:
             self.header.files_info.files[last_file_index][
                 'maxsize'] = foutsize
     # Update size data in header
     self.header.main_streams.packinfo.packsizes = [outsize]
     folder.unpacksizes = [
         sum(self.header.main_streams.substreamsinfo.unpacksizes)
     ]
     self.header.main_streams.substreamsinfo.num_unpackstreams_folders = [
         num_unpack_streams
     ]