Exemple #1
0
 def _read(self, fp: BinaryIO):
     self.numfiles = read_uint64(fp)
     self.files = [{'emptystream': False} for _ in range(self.numfiles)]
     numemptystreams = 0
     while True:
         typ = read_uint64(fp)
         if typ > 255:
             raise Bad7zFile('invalid type, must be below 256, is %d' % typ)
         prop = struct.pack(b'B', typ)
         if prop == Property.END:
             break
         size = read_uint64(fp)
         if prop == Property.DUMMY:
             # Added by newer versions of 7z to adjust padding.
             fp.seek(size, os.SEEK_CUR)
             continue
         buffer = io.BytesIO(fp.read(size))
         if prop == Property.EMPTY_STREAM:
             isempty = read_boolean(buffer, self.numfiles)
             list(map(lambda x, y: x.update({'emptystream': y}), self.files, isempty))
             for x in isempty:
                 if x:
                     numemptystreams += 1
             self.emptyfiles = [False] * numemptystreams
             self.antifiles = [False] * numemptystreams
         elif prop == Property.EMPTY_FILE:
             self.emptyfiles = read_boolean(buffer, numemptystreams)
         elif prop == Property.ANTI:
             self.antifiles = read_boolean(buffer, numemptystreams)
         elif prop == Property.NAME:
             external = buffer.read(1)
             if external == b'\x00':
                 self._read_name(buffer)
             else:
                 self.dataindex = read_uint64(buffer)
                 # try to read external data
                 current_pos = fp.tell()
                 fp.seek(self.dataindex, 0)
                 self._read_name(buffer)
                 fp.seek(current_pos, 0)
         elif prop == Property.CREATION_TIME:
             self._readTimes(buffer, self.files, 'creationtime')
         elif prop == Property.LAST_ACCESS_TIME:
             self._readTimes(buffer, self.files, 'lastaccesstime')
         elif prop == Property.LAST_WRITE_TIME:
             self._readTimes(buffer, self.files, 'lastwritetime')
         elif prop == Property.ATTRIBUTES:
             defined = read_boolean(buffer, self.numfiles, checkall=1)
             external = buffer.read(1)
             if external == b'\x00':
                 self._read_attributes(buffer, defined)
             else:
                 self.dataindex = read_uint64(buffer)
                 # try to read external data
                 current_pos = fp.tell()
                 fp.seek(self.dataindex, 0)
                 self._read_attributes(fp, defined)
                 fp.seek(current_pos, 0)
         else:
             raise Bad7zFile('invalid type %r' % (prop))
Exemple #2
0
 def _read(self, fp: BinaryIO):
     numfiles = read_uint64(fp)
     self.files = [{'emptystream': False} for _ in range(numfiles)]
     numemptystreams = 0
     while True:
         prop = fp.read(1)
         if prop == Property.END:
             break
         size = read_uint64(fp)
         if prop == Property.DUMMY:
             # Added by newer versions of 7z to adjust padding.
             fp.seek(size, os.SEEK_CUR)
             continue
         buffer = io.BytesIO(fp.read(size))
         if prop == Property.EMPTY_STREAM:
             isempty = read_boolean(buffer, numfiles, checkall=False)
             list(
                 map(lambda x, y: x.update({'emptystream': y}), self.files,
                     isempty))  # type: ignore
             numemptystreams += isempty.count(True)
         elif prop == Property.EMPTY_FILE:
             self.emptyfiles = read_boolean(buffer,
                                            numemptystreams,
                                            checkall=False)
         elif prop == Property.ANTI:
             self.antifiles = read_boolean(buffer,
                                           numemptystreams,
                                           checkall=False)
         elif prop == Property.NAME:
             external = buffer.read(1)
             if external == b'\x00':
                 self._read_name(buffer)
             else:
                 dataindex = read_uint64(buffer)
                 current_pos = fp.tell()
                 fp.seek(dataindex, 0)
                 self._read_name(fp)
                 fp.seek(current_pos, 0)
         elif prop == Property.CREATION_TIME:
             self._read_times(buffer, 'creationtime')
         elif prop == Property.LAST_ACCESS_TIME:
             self._read_times(buffer, 'lastaccesstime')
         elif prop == Property.LAST_WRITE_TIME:
             self._read_times(buffer, 'lastwritetime')
         elif prop == Property.ATTRIBUTES:
             defined = read_boolean(buffer, numfiles, checkall=True)
             external = buffer.read(1)
             if external == b'\x00':
                 self._read_attributes(buffer, defined)
             else:
                 dataindex = read_uint64(buffer)
                 # try to read external data
                 current_pos = fp.tell()
                 fp.seek(dataindex, 0)
                 self._read_attributes(fp, defined)
                 fp.seek(current_pos, 0)
         elif prop == Property.START_POS:
             self._read_start_pos(buffer)
         else:
             raise Bad7zFile('invalid type %r' % prop)
Exemple #3
0
    def _get_headerdata_from_streams(self, fp: BinaryIO,
                                     streams: StreamsInfo) -> BytesIO:
        """get header data from given streams.unpackinfo and packinfo.
        folder data are stored in raw data positioned in afterheader."""
        buffer = io.BytesIO()
        src_start = self._start_pos
        for folder in streams.unpackinfo.folders:
            if folder.is_encrypted():
                raise UnsupportedCompressionMethodError()

            uncompressed = folder.unpacksizes
            if not isinstance(uncompressed, (list, tuple)):
                uncompressed = [uncompressed] * len(folder.coders)
            compressed_size = streams.packinfo.packsizes[0]
            uncompressed_size = uncompressed[-1]

            src_start += streams.packinfo.packpos
            fp.seek(src_start, 0)
            decompressor = folder.get_decompressor(compressed_size)
            folder_data = decompressor.decompress(
                fp.read(compressed_size))[:uncompressed_size]
            src_start += uncompressed_size
            if folder.digestdefined:
                if folder.crc != calculate_crc32(folder_data):
                    raise Bad7zFile('invalid block data')
            buffer.write(folder_data)
        buffer.seek(0, 0)
        return buffer
Exemple #4
0
 def _retrieve_coders_info(self, file: BinaryIO):
     pid = file.read(1)
     if pid != Property.CODERS_UNPACK_SIZE:
         raise Bad7zFile('coders unpack size id expected but %s found' % repr(pid))
     for folder in self.folders:
         folder.unpacksizes = [read_uint64(file) for _ in range(folder.totalout)]
     pid = file.read(1)
     if pid == Property.CRC:
         defined = read_boolean(file, self.numfolders, checkall=True)
         crcs = read_crcs(file, self.numfolders)
         for idx, folder in enumerate(self.folders):
             folder.digestdefined = defined[idx]
             folder.crc = crcs[idx]
         pid = file.read(1)
     if pid != Property.END:
         raise Bad7zFile('end id expected but %s found' % repr(pid))
Exemple #5
0
 def read(self, file: BinaryIO) -> None:
     pid = file.read(1)
     if pid == PROPERTY.PACK_INFO:
         self.packinfo = PackInfo.retrieve(file)
         pid = file.read(1)
     if pid == PROPERTY.UNPACK_INFO:
         self.unpackinfo = UnpackInfo.retrieve(file)
         pid = file.read(1)
     if pid == PROPERTY.SUBSTREAMS_INFO:
         if self.unpackinfo is None:
             raise Bad7zFile("Header is broken")
         self.substreamsinfo = SubstreamsInfo.retrieve(
             file, self.unpackinfo.numfolders, self.unpackinfo.folders)
         pid = file.read(1)
     if pid != PROPERTY.END:
         raise Bad7zFile("end id expected but %s found" %
                         repr(pid))  # pragma: no-cover
Exemple #6
0
 def _retrieve_coders_info(self, file: BinaryIO):
     pid = file.read(1)
     if pid != Property.CODERS_UNPACK_SIZE:
         raise Bad7zFile('coders unpack size id expected but %s found' % repr(pid))  # pragma: no-cover
     for folder in self.folders:
         for c in folder.coders:
             for _ in range(c['numoutstreams']):
                 folder.unpacksizes.append(read_uint64(file))
     pid = file.read(1)
     if pid == Property.CRC:
         defined = read_boolean(file, self.numfolders, checkall=True)
         crcs = read_crcs(file, self.numfolders)
         for idx, folder in enumerate(self.folders):
             folder.digestdefined = defined[idx]
             folder.crc = crcs[idx]
         pid = file.read(1)
     if pid != Property.END:
         raise Bad7zFile('end id expected but 0x{:02x} found at 0x{:08x}'.format(ord(pid), file.tell()))  # pragma: no-cover  # noqa
Exemple #7
0
 def _extract_header_info(self, fp: BinaryIO) -> None:
     pid = fp.read(1)
     if pid == Property.MAIN_STREAMS_INFO:
         self.main_streams = StreamsInfo.retrieve(fp)
         pid = fp.read(1)
     if pid == Property.FILES_INFO:
         self.files_info = FilesInfo.retrieve(fp)
         pid = fp.read(1)
     if pid != Property.END:
         raise Bad7zFile('end id expected but %s found' % (repr(pid)))  # pragma: no-cover
Exemple #8
0
 def _read(self, file: BinaryIO) -> None:
     file.seek(len(MAGIC_7Z), 0)
     self.version = read_bytes(file, 2)
     self.startheadercrc, _ = read_uint32(file)
     self.nextheaderofs, data = read_real_uint64(file)
     crc = calculate_crc32(data)
     self.nextheadersize, data = read_real_uint64(file)
     crc = calculate_crc32(data, crc)
     self.nextheadercrc, data = read_uint32(file)
     crc = calculate_crc32(data, crc)
     if crc != self.startheadercrc:
         raise Bad7zFile('invalid header data')
Exemple #9
0
 def read(self, file: BinaryIO) -> None:
     pid = file.read(1)
     if pid == Property.PACK_INFO:
         self.packinfo = PackInfo.retrieve(file)
         pid = file.read(1)
     if pid == Property.UNPACK_INFO:
         self.unpackinfo = UnpackInfo.retrieve(file)
         pid = file.read(1)
     if pid == Property.SUBSTREAMS_INFO:
         self.substreamsinfo = SubstreamsInfo.retrieve(file, self.unpackinfo.numfolders, self.unpackinfo.folders)
         pid = file.read(1)
     if pid != Property.END:
         raise Bad7zFile('end id expected but %s found' % repr(pid))
Exemple #10
0
 def _read(self, file: BinaryIO):
     self.packpos = read_uint64(file)
     self.numstreams = read_uint64(file)
     pid = file.read(1)
     if pid == Property.SIZE:
         self.packsizes = [read_uint64(file) for _ in range(self.numstreams)]
         pid = file.read(1)
         if pid == Property.CRC:
             self.crcs = [read_uint64(file) for _ in range(self.numstreams)]
             pid = file.read(1)
     if pid != Property.END:
         raise Bad7zFile('end id expected but %s found' % repr(pid))
     self.packpositions = [sum(self.packsizes[:i]) for i in range(self.numstreams)]  # type: List[int]
     return self
Exemple #11
0
 def _real_get_contents(self, fp: BinaryIO) -> None:
     if not self._check_7zfile(fp):
         raise Bad7zFile('not a 7z file')
     self.sig_header = SignatureHeader.retrieve(self.fp)
     self.afterheader = self.fp.tell()
     buffer = self._read_header_data()
     header = Header.retrieve(self.fp, buffer, self.afterheader)
     if header is None:
         return
     self.header = header
     buffer.close()
     self.files = ArchiveFileList()
     if getattr(self.header, 'files_info', None) is not None:
         self._filelist_retrieve()
Exemple #12
0
 def _read(self, file: BinaryIO) -> None:
     file.seek(len(MAGIC_7Z), 0)
     major_version = file.read(1)
     minor_version = file.read(1)
     self.version = (major_version, minor_version)
     self.startheadercrc, _ = read_uint32(file)
     self.nextheaderofs, data = read_real_uint64(file)
     crc = calculate_crc32(data)
     self.nextheadersize, data = read_real_uint64(file)
     crc = calculate_crc32(data, crc)
     self.nextheadercrc, data = read_uint32(file)
     crc = calculate_crc32(data, crc)
     if crc != self.startheadercrc:
         raise Bad7zFile("invalid header data")
Exemple #13
0
 def _read(self, fp: BinaryIO, buffer: BytesIO, start_pos: int,
           password) -> None:
     """
     Decode header data or encoded header data from buffer.
     When buffer consist of encoded buffer, it get stream data
     from it and call itself recursively
     """
     self._start_pos = start_pos
     fp.seek(self._start_pos)
     pid = buffer.read(1)
     if not pid:
         # empty archive
         return
     if pid == PROPERTY.HEADER:
         self._extract_header_info(buffer)
         return
     if pid != PROPERTY.ENCODED_HEADER:
         raise TypeError("Unknown field: %r" % id)  # pragma: no-cover
     # get from encoded header
     streams = HeaderStreamsInfo.retrieve(buffer)
     buffer2 = io.BytesIO()
     src_start = self._start_pos
     for folder in streams.unpackinfo.folders:
         uncompressed = folder.unpacksizes
         if not isinstance(uncompressed, (list, tuple)):
             uncompressed = [uncompressed] * len(folder.coders)
         compressed_size = streams.packinfo.packsizes[0]
         uncompressed_size = uncompressed[-1]
         folder.password = password
         src_start += streams.packinfo.packpos
         fp.seek(src_start, 0)
         decompressor = folder.get_decompressor(compressed_size)
         remaining = uncompressed_size
         folder_data = bytearray()
         while remaining > 0:
             folder_data += decompressor.decompress(fp,
                                                    max_length=remaining)
             remaining = uncompressed_size - len(folder_data)
         self.size += compressed_size
         src_start += compressed_size
         if folder.digestdefined:
             streams.packinfo.enable_digests = True
             if folder.crc != calculate_crc32(folder_data):
                 raise Bad7zFile("invalid block data")
         buffer2.write(folder_data)
     buffer2.seek(0, 0)
     pid = buffer2.read(1)
     if pid != PROPERTY.HEADER:
         raise TypeError("Unknown field: %r" % pid)  # pragma: no-cover
     self._extract_header_info(buffer2)
Exemple #14
0
 def _read(self, file: BinaryIO, numfolders: int, folders: List[Folder]):
     pid = file.read(1)
     if pid == PROPERTY.NUM_UNPACK_STREAM:
         self.num_unpackstreams_folders = [
             read_uint64(file) for _ in range(numfolders)
         ]
         pid = file.read(1)
     else:
         self.num_unpackstreams_folders = [1] * numfolders
     if pid == PROPERTY.SIZE:
         self.unpacksizes = []
         for i in range(len(self.num_unpackstreams_folders)):
             totalsize = 0  # type: int
             for j in range(1, self.num_unpackstreams_folders[i]):
                 size = read_uint64(file)
                 self.unpacksizes.append(size)
                 totalsize += size
             self.unpacksizes.append(folders[i].get_unpack_size() -
                                     totalsize)
         pid = file.read(1)
     num_digests = 0
     num_digests_total = 0
     for i in range(numfolders):
         numsubstreams = self.num_unpackstreams_folders[i]
         if numsubstreams != 1 or not folders[i].digestdefined:
             num_digests += numsubstreams
         num_digests_total += numsubstreams
     if pid == PROPERTY.CRC:
         defined = read_boolean(file, num_digests, checkall=True)
         crcs = read_crcs(file, num_digests)
         didx = 0
         for i in range(numfolders):
             folder = folders[i]
             numsubstreams = self.num_unpackstreams_folders[i]
             if numsubstreams == 1 and folder.digestdefined and folder.crc is not None:
                 self.digestsdefined.append(True)
                 self.digests.append(folder.crc)
             else:
                 for j in range(numsubstreams):
                     self.digestsdefined.append(defined[didx])
                     self.digests.append(crcs[didx])
                     didx += 1
         pid = file.read(1)
     if pid != PROPERTY.END:
         raise Bad7zFile("end id expected but %r found" %
                         pid)  # pragma: no-cover
     if not self.digestsdefined:
         self.digestsdefined = [False] * num_digests_total
         self.digests = [0] * num_digests_total
Exemple #15
0
 def _extract_header_info(self, fp: BinaryIO) -> None:
     pid = fp.read(1)
     if pid == Property.ARCHIVE_PROPERTIES:
         self.properties = ArchiveProperties.retrieve(fp)
         pid = fp.read(1)
     if pid == Property.ADDITIONAL_STREAMS_INFO:
         self.additional_streams = StreamsInfo.retrieve(fp)
         pid = fp.read(1)
     if pid == Property.MAIN_STREAMS_INFO:
         self.main_streams = StreamsInfo.retrieve(fp)
         pid = fp.read(1)
     if pid == Property.FILES_INFO:
         self.files_info = FilesInfo.retrieve(fp)
         pid = fp.read(1)
     if pid != Property.END:
         raise Bad7zFile('end id expected but %s found' % (repr(pid)))
Exemple #16
0
 def _read(self, file: BinaryIO):
     pid = file.read(1)
     if pid != Property.FOLDER:
         raise Bad7zFile('folder id expected but %s found' % repr(pid))
     self.numfolders = read_uint64(file)
     self.folders = []
     external = read_byte(file)
     if external == 0x00:
         self.folders = [Folder.retrieve(file) for _ in range(self.numfolders)]
     else:
         datastreamidx = read_uint64(file)
         current_pos = file.tell()
         file.seek(datastreamidx, 0)
         self.folders = [Folder.retrieve(file) for _ in range(self.numfolders)]
         file.seek(current_pos, 0)
     self._retrieve_coders_info(file)
Exemple #17
0
 def _read(self, file: BinaryIO):
     self.packpos = read_uint64(file)
     self.numstreams = read_uint64(file)
     pid = file.read(1)
     if pid == Property.SIZE:
         self.packsizes = [read_uint64(file) for _ in range(self.numstreams)]
         pid = file.read(1)
         if pid == Property.CRC:
             self.enable_digests = True
             self.digestdefined = read_boolean(file, self.numstreams, True)
             for crcexist in self.digestdefined:
                 if crcexist:
                     self.crcs.append(read_uint32(file)[0])
             pid = file.read(1)
     if pid != Property.END:
         raise Bad7zFile('end id expected but %s found' % repr(pid))  # pragma: no-cover  # noqa
     self.packpositions = [sum(self.packsizes[:i]) for i in range(self.numstreams + 1)]  # type: List[int]
     return self
Exemple #18
0
 def _read(self, file: BinaryIO):
     pid = file.read(1)
     if pid != PROPERTY.FOLDER:
         raise Bad7zFile("folder id expected but %s found" %
                         repr(pid))  # pragma: no-cover
     self.numfolders = read_uint64(file)
     self.folders = []
     external = read_byte(file)
     if external == 0x00:
         self.folders = [
             Folder.retrieve(file) for _ in range(self.numfolders)
         ]
     else:  # pragma: no-cover  # there is no live example
         datastreamidx = read_uint64(file)
         current_pos = file.tell()
         file.seek(datastreamidx, 0)
         self.folders = [
             Folder.retrieve(file) for _ in range(self.numfolders)
         ]
         file.seek(current_pos, 0)
     self._retrieve_coders_info(file)
Exemple #19
0
 def _read_header_data(self) -> BytesIO:
     self.fp.seek(self.sig_header.nextheaderofs, os.SEEK_CUR)
     buffer = io.BytesIO(self.fp.read(self.sig_header.nextheadersize))
     if self.sig_header.nextheadercrc != calculate_crc32(buffer.getvalue()):
         raise Bad7zFile('invalid header data')
     return buffer