def _get_file(url, zinfo, targetpath=None, strip=False): z_start = zinfo.header_offset + zipfile.sizeFileHeader + len( zinfo.filename) + len(zinfo.extra) z_end = z_start + zinfo.compress_size req = urllib2.Request(url) req.add_header("Range", "bytes=%s-%s" % (z_start, z_end)) f = urllib2.urlopen(req) data = f.read() tmp = cStringIO.StringIO(data) z = zipfile.ZipExtFile(fileobj=tmp, zipinfo=zinfo) if targetpath is None: targetpath = os.getcwd() if (targetpath[-1:] in (os.path.sep, os.path.altsep) and len(os.path.splitdrive(targetpath)[1]) > 1): targetpath = targetpath[:-1] if strip: targetpath = os.path.join(targetpath, zinfo.filename.split('/')[-1]) else: if zinfo.filename[0] == '/': targetpath = os.path.join(targetpath, zinfo.filename[1:]) else: targetpath = os.path.join(targetpath, zinfo.filename) targetpath = os.path.normpath(targetpath) upperdirs = os.path.dirname(targetpath) if upperdirs and not os.path.exists(upperdirs): os.makedirs(upperdirs) if zinfo.filename[-1] == '/': if not os.path.isdir(targetpath): os.mkdir(targetpath) return targetpath target = file(targetpath, "wb") shutil.copyfileobj(z, target) z.close() target.close() return targetpath
def main(filename): print 'Reading %s' % sys.argv[1] f = open(filename, 'rb') while True: # Read and parse a file header fheader = f.read(zipfile.sizeFileHeader) if len(fheader) < zipfile.sizeFileHeader: print 'Found end of file. Some entries missed.' break fheader = struct.unpack(zipfile.structFileHeader, fheader) if fheader[zipfile._FH_SIGNATURE] == 'PK\x01\x02': print 'Found start of central directory. All entries processed.' break fname = f.read(fheader[zipfile._FH_FILENAME_LENGTH]) if fheader[zipfile._FH_EXTRA_FIELD_LENGTH]: f.read(fheader[zipfile._FH_EXTRA_FIELD_LENGTH]) print 'Found %s' % fname # Fake a zipinfo record zi = zipfile.ZipInfo() zi.compress_size = fheader[zipfile._FH_COMPRESSED_SIZE] zi.compress_type = fheader[zipfile._FH_COMPRESSION_METHOD] zi.flag_bits = fheader[zipfile._FH_GENERAL_PURPOSE_FLAG_BITS] zi.file_size = fheader[zipfile._FH_UNCOMPRESSED_SIZE] # Read the file contents zef = zipfile.ZipExtFile(f, 'rb', zi) data = zef.read() # Sanity checks if len(data) != fheader[zipfile._FH_UNCOMPRESSED_SIZE]: raise Exception( "Unzipped data doesn't match expected size! %d != %d, in %s" % (len(data), fheader[zipfile._FH_UNCOMPRESSED_SIZE], fname)) calc_crc = zipfile.crc32(data) & 0xffffffff if calc_crc != fheader[zipfile._FH_CRC]: raise Exception('CRC mismatch! %d != %d, in %s' % (calc_crc, fheader[zipfile._FH_CRC], fname)) # Write the file write_data(fname, data) f.close()
def install(self): path = self.download() with zipfile.ZipExtFile(path, 'r') as z: z.extractall(self.plugin_dest)
def main(filename): print('Reading %s Central Directory' % filename) # Get info from ZIP Central Directory with zipfile.ZipFile(filename, 'r') as myzip: files = myzip.namelist() print('Found %d file(s) from Central Directory:' % (len(files))) print('- ' + '\n- '.join(files)) print('Reading %s ZIP entry manually' % sys.argv[1]) f = open(filename, 'rb') while True: # Read and parse a file header fheader = f.read(zipfile.sizeFileHeader) if len(fheader) < zipfile.sizeFileHeader: print('Found end of file. Some entries missed.') break fheader = struct.unpack(zipfile.structFileHeader, fheader) if fheader[zipfile._FH_SIGNATURE] == zipfile.stringCentralDir: print('Found start of central directory. All entries processed.') break if fheader[zipfile._FH_SIGNATURE] != zipfile.stringFileHeader: raise Exception('Size mismatch! File Header expected, got "%s"' % (fheader[zipfile._FH_SIGNATURE])) if fheader[zipfile._FH_GENERAL_PURPOSE_FLAG_BITS] & 0x8: data_descriptor = True fname = f.read(fheader[zipfile._FH_FILENAME_LENGTH]) if fheader[zipfile._FH_EXTRA_FIELD_LENGTH]: f.read(fheader[zipfile._FH_EXTRA_FIELD_LENGTH]) print('Found %s' % fname.decode()) # Fake a zipinfo record zi = zipfile.ZipInfo() zi.filename = fname zi.compress_size = fheader[zipfile._FH_COMPRESSED_SIZE] zi.compress_type = fheader[zipfile._FH_COMPRESSION_METHOD] zi.flag_bits = fheader[zipfile._FH_GENERAL_PURPOSE_FLAG_BITS] zi.file_size = fheader[zipfile._FH_UNCOMPRESSED_SIZE] zi.CRC = fheader[zipfile._FH_CRC] if data_descriptor: # Compress size is zero # Get the real sizes with datadescriptor ddescriptor = fdescriptor_reader(f, f.tell()) if ddescriptor is None: break zi.compress_size = ddescriptor[_DD_COMPRESSED_SIZE] zi.file_size = ddescriptor[_DD_UNCOMPRESSED_SIZE] zi.CRC = ddescriptor[_DD_CRC] #print(zi) # Read the file contents zef = zipfile.ZipExtFile(f, 'rb', zi) data = zef.read() # Sanity checks if len(data) != zi.file_size: raise Exception( "Unzipped data doesn't match expected size! %d != %d, in %s" % (len(data), zi.file_size, fname)) calc_crc = zipfile.crc32(data) & 0xffffffff if calc_crc != zi.CRC: raise Exception('CRC mismatch! %d != %d, in %s' % (calc_crc, zi.CRC, fname)) # Write the file write_data(fname, data) if data_descriptor: # skip dataDescriptor before reading the next file f.seek(f.tell() + sizeDataDescriptor) f.close()
def open(self, name, mode="r", pwd=None): """Return file-like object for 'name'.""" if mode not in ("r", "U", "rU"): raise RuntimeError('open() requires mode "r", "U", or "rU"') if 'U' in mode: import warnings warnings.warn("'U' mode is deprecated", DeprecationWarning, 2) if pwd and not isinstance(pwd, bytes): raise TypeError("pwd: expected bytes, got %s" % type(pwd)) if not self.fp: raise RuntimeError( "Attempt to read ZIP archive that was already closed") # Make sure we have an info object if isinstance(name, zipfile.ZipInfo): # 'name' is already an info object zinfo = name else: # Get info object for name zinfo = self.getinfo(name) self._fileRefCnt += 1 zef_file = zipfile._SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock) try: # Skip the file header: fheader = zef_file.read(zipfile.sizeFileHeader) if len(fheader) != zipfile.sizeFileHeader: raise zipfile.BadZipFile("Truncated file header") fheader = struct.unpack(zipfile.structFileHeader, fheader) if fheader[zipfile._FH_SIGNATURE] != zipfile.stringFileHeader: raise zipfile.BadZipFile("Bad magic number for file header") fname = zef_file.read(fheader[zipfile._FH_FILENAME_LENGTH]) if fheader[zipfile._FH_EXTRA_FIELD_LENGTH]: zef_file.read(fheader[zipfile._FH_EXTRA_FIELD_LENGTH]) if zinfo.flag_bits & 0x20: # Zip 2.7: compressed patched data raise NotImplementedError( "compressed patched data (flag bit 5)") if zinfo.flag_bits & 0x40: # strong encryption raise NotImplementedError("strong encryption (flag bit 6)") if zinfo.flag_bits & 0x800: # UTF-8 filename fname_str = fname.decode("utf-8") else: fname_str = fname.decode("cp437") fname_str = '/'.join(fname_str.split('\\')) if fname_str != zinfo.orig_filename: raise zipfile.BadZipFile( 'File name in directory %r and header %r differ.' % (zinfo.orig_filename, fname_str)) # check for encrypted flag & handle password is_encrypted = zinfo.flag_bits & 0x1 zd = None if is_encrypted: if not pwd: pwd = self.pwd if not pwd: raise RuntimeError("File %s is encrypted, password " "required for extraction" % name) zd = zipfile._ZipDecrypter(pwd) # The first 12 bytes in the cypher stream is an encryption header # used to strengthen the algorithm. The first 11 bytes are # completely random, while the 12th contains the MSB of the CRC, # or the MSB of the file time depending on the header type # and is used to check the correctness of the password. header = zef_file.read(12) h = list(map(zd, header[0:12])) if zinfo.flag_bits & 0x8: # compare against the file type from extended local headers check_byte = (zinfo._raw_time >> 8) & 0xff else: # compare against the CRC otherwise check_byte = (zinfo.CRC >> 24) & 0xff if h[11] != check_byte: raise RuntimeError("Bad password for file", name) return zipfile.ZipExtFile(zef_file, mode, zinfo, zd, True) except: zef_file.close() raise
def getlist(self, recursive=None, directory=None, flatten=None): "Reads the Zip File to construct a list of ZipFile objects" if recursive is None: recursive = self.recursive self.files = [] self.groups = {} if flatten is None: flatten = self.flat if self.File is None and directory is None: self.File = zf.ZipExtFile(self._dialog(), "r") close_me = True elif isinstance(directory, zf.ZipFile): if directory.fp: self.File = directory close_me = False else: self.File = zf.ZipFile(directory, "r") close_me = True elif isinstance(directory, string_types) and path.isdir( directory): # Fall back to DataFolder return super(ZipFolderMixin, self).getlist(recursive, directory, flatten) elif isinstance(directory, string_types) and zf.is_zipfile(directory): self.File = zf.ZipFile(directory, "r") close_me = True elif isinstance(self.File, zf.ZipFile): if self.File.fp: close_me = False else: self.File = zf.ZipFile(self.File.filename, "r") close_me = True else: raise IOError( "{} does not appear to be zip file!".format(directory)) # At this point directory contains an open h5py.File object, or possibly a group self.path = self.File.filename files = [x.filename for x in self.File.filelist] for p in self.exclude: # Remove excluded files if isinstance(p, string_types): for f in list(fnmatch.filter(files, p)): del files[files.index(f)] if isinstance(p, _pattern_type): matched = [] # For reg expts we iterate over all files, but we can't delete matched # files as we go as we're iterating over them - so we store the # indices and delete them later. for f in files: if p.search(f): matched.append(files.index(f)) matched.sort(reverse=True) for i in matched: # reverse sort the matching indices to safely delete del files[i] for p in self.pattern: # pattern is a list of strings and regeps if isinstance(p, string_types): for f in fnmatch.filter(files, p): del files[files.index(f)] f.replace(path.sep, "/") self.append(f) elif isinstance(p, _pattern_type): matched = [] # For reg expts we iterate over all files, but we can't delete matched # files as we go as we're iterating over them - so we store the # indices and delete them later. for ix, f in enumerate(files): if p.search(f): f.replace(path.sep, "/") self.append(f) else: matched.append(ix) for i in reversed( matched ): # reverse sort the matching indices to safely delete del files[i] self._zip_contents = files if flatten is None or not flatten: self.unflatten() if close_me: self.File.close() return self