def _parse_cffile(self, offset): """ Parse a CFFILE entry """ fmt = '<I' # uncompressed size fmt += 'I' # uncompressed offset of this file in the folder fmt += 'H' # index into the CFFOLDER area fmt += 'H' # date fmt += 'H' # time fmt += 'H' # attribs try: vals = struct.unpack_from(fmt, self._buf_file, offset) except struct.error as e: raise CorruptionError(str(e)) # debugging if os.getenv('PYTHON_CABARCHIVE_DEBUG'): print "CFFILE", vals # parse filename offset += struct.calcsize(fmt) filename = '' for i in range(0, 255): filename_c = self._buf_file[offset + i] if filename_c == b'\0': break filename += filename_c # add file f = CabFile(filename) f._date_decode(vals[3]) f._time_decode(vals[4]) f._attr_decode(vals[5]) f.contents = self._folder_data[vals[2]][vals[1]:vals[1] + vals[0]] if len(f.contents) != vals[0]: raise CorruptionError("Corruption inside archive, %s is size %i but " "expected size %i" % (filename, len(f.contents), vals[0])) self.files.append(f) # return offset to next entry return 16 + len(filename) + 1
def parse(self, buf): """ Parse .cab data """ # we've got an external binary to help us if self._decompressor: import tempfile import subprocess import shutil # write to temp file src = tempfile.NamedTemporaryFile(mode='wb', prefix='cabarchive_', suffix=".cab", dir=self._tmpdir, delete=True) src.write(buf) src.flush() # decompress to a temp directory dest_fn = tempfile.mkdtemp(prefix='cabarchive_', dir=self._tmpdir) argv = [self._decompressor, '--quiet', '--directory', dest_fn, src.name] ps = subprocess.Popen(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if ps.wait() != 0: raise CorruptionError("Failed to extract: %s" % ps.stderr.read()) # add all the fake CFFILE objects for fn in _listdir_recurse(dest_fn): cff = CabFile(os.path.basename(fn)) cff.contents = open(fn, 'rb').read() self.add_file(cff) shutil.rmtree(dest_fn) src.close() return # slurp the whole buffer at once self._buf_file = buf # read the file header fmt = '<4s' # signature fmt += 'xxxx' # reserved1 fmt += 'I' # size fmt += 'xxxx' # reserved2 fmt += 'I' # offset to CFFILE fmt += 'xxxx' # reserved3 fmt += 'BB' # version minor, major fmt += 'H' # no of CFFOLDERs fmt += 'H' # no of CFFILEs fmt += 'H' # flags fmt += 'H' # setID fmt += 'H' # cnt of cabs in set # fmt += 'H' # reserved cab size # fmt += 'B' # reserved folder size # fmt += 'B' # reserved block size # fmt += 'B' # per-cabinet reserved area try: vals = struct.unpack_from(fmt, self._buf_file, 0) except struct.error as e: raise CorruptionError(str(e)) # debugging if os.getenv('PYTHON_CABARCHIVE_DEBUG'): print "CFHEADER", vals # check magic bytes if vals[0] != b'MSCF': raise NotSupportedError('Data is not application/vnd.ms-cab-compressed') # check size matches if vals[1] != len(self._buf_file): raise CorruptionError('Cab file internal size does not match data') # check version if vals[4] != 1 or vals[3] != 3: raise NotSupportedError('Version %i.%i not supported' % (vals[4], vals[3])) # chained cabs not supported if vals[9] != 0: raise NotSupportedError('Chained cab file not supported') # verify we actually have data nr_files = vals[6] if nr_files == 0: raise CorruptionError('The cab file is empty') # verify we got complete data off_cffile = vals[2] if off_cffile > len(self._buf_file): raise CorruptionError('Cab file corrupt') # chained cabs not supported if vals[7] != 0: raise CorruptionError('Expected header flags to be cleared') # read this so we can do round-trip self.set_id = vals[8] # we don't support compressed folders in multi-folder archives if vals[5] > 1: self._is_multi_folder = True # parse CFFOLDER offset = struct.calcsize(fmt) for i in range(vals[5]): self._parse_cffolder(i, offset) offset += struct.calcsize(FMT_CFFOLDER) # parse CFFILEs for i in range(0, nr_files): off_cffile += self._parse_cffile(off_cffile)