def get_uncompressed_size(file): fileobj = open(file, 'r') fileobj.seek(-8, 2) crc32 = gzip.read32(fileobj) isize = gzip.read32(fileobj) # may exceed 2GB fileobj.close() return isize
def getContentSize(path): '''get the file content size (expanded size for compressed one)''' try: f_in = _open(path, 'rb') if is_gzipped(path): f_in.seek(-8, 2) crc32 = gzip.read32(f_in) isize = gzip.read32(f_in) f_in.close() return isize else: f_in.seek(0, 2) pos = f_in.tell() f_in.close() return pos except Exception as e: return -1
def _get_uncompressed_filesize(filename): """ copied from gzip.py as per http://www.gzip.org/zlib/rfc-gzip.html#header-trailer http://stackoverflow.com/a/1704576/161718 and https://gist.github.com/ozanturksever/4968827 :param filename: the name of gzipped file :return the size of uncompressed file: """ with open(filename, 'rb') as gzfile: gzfile.seek(-4, 2) return gzip.read32(gzfile) # may exceed 2GB
def get_fd_and_file_size(file_path): """Return a file descriptor and the file size for the given file path. The file descriptor will have the default mode ('r') and will be seeked to the beginning. The file size returned is that of the uncompressed file, in case the given file_path points to a gzipped file. """ if file_path.endswith('.gz'): # The last 4 bytes of the file contains the uncompressed file's # size, modulo 2**32. This code is somewhat stolen from the gzip # module in Python 2.6. fd = gzip.open(file_path) fd.fileobj.seek(-4, os.SEEK_END) isize = gzip.read32(fd.fileobj) # may exceed 2GB file_size = isize & 0xffffffffL fd.fileobj.seek(0) else: fd = open(file_path) file_size = os.path.getsize(file_path) return fd, file_size
def _getUnzippedFileSize(self, gzippedFile): file = open(gzippedFile, 'r') file.seek(-4, os.SEEK_END) return gzip.read32(file)