def _open_rarfile(self): rar_filename = os.path.join(TESTS_DIR, 'test_corrupted.rar') rar = RarFile(rar_filename) rar.setpassword('testing') return rar
class PackFile: def __init__(self, file, mode=None): """ :param file: 要处理的文件 :param mode: 要处理的格式,不输入会有一套智能匹配算法 'rar': 'zip': docx后缀的,默认采用zip格式解压 """ # 1、确定压缩格式 name, ext = os.path.splitext(file) ext = ext.lower() if not mode: if ext in ('.docx', '.zip'): mode = 'zip' elif ext == '.rar': mode = 'rar' else: dprint(ext) # 从文件扩展名无法得知压缩格式 raise ValueError self.mode = mode # 2、确定是用的解压“引擎” if mode == 'zip': self.proc = zipfile.ZipFile(file) elif mode == 'rar': try: from unrar.rarfile import RarFile except ModuleNotFoundError: dprint( ) # 缺少unrar模块,安装详见: https://blog.csdn.net/code4101/article/details/79328636 raise ModuleNotFoundError self.proc = RarFile(file) # 3、解压文件夹目录,None表示还未解压 self.tempfolder = None def open(self, member, pwd=None): """Return file-like object for 'member'. 'member' may be a filename or a RarInfo object. """ return self.proc.open(member, pwd) def read(self, member, pwd=None): """Return file bytes (as a string) for name.""" return self.proc.read(member, pwd) def namelist(self): """>> self.namelist() # 获得文件清单列表 1 [Content_Types].xml 2 _rels/.rels ...... 20 word/fontTable.xml 21 docProps/app.xml """ return self.proc.namelist() def setpassword(self, pwd): """Set default password for encrypted files.""" return self.proc.setpassword(pwd) def getinfo(self, name): """ >> self.getinfo('word/document.xml') # 获得某个文件的信息 <ZipInfo filename='word/document.xml' compress_type=deflate file_size=140518 compress_size=10004> """ return self.proc.getinfo(name) def infolist(self, prefix=None, zipinfo=True): """>> self.infolist() # getinfo的多文件版本 1 <ZipInfo filename='[Content_Types].xml' compress_type=deflate file_size=1495 compress_size=383> 2 <ZipInfo filename='_rels/.rels' compress_type=deflate file_size=590 compress_size=243> ...... 20 <ZipInfo filename='word/fontTable.xml' compress_type=deflate file_size=1590 compress_size=521> 21 <ZipInfo filename='docProps/app.xml' compress_type=deflate file_size=720 compress_size=384> :param prefix: 可以筛选文件的前缀,例如“word/”可以筛选出word目录下的 :param zipinfo: 返回的list每个元素是zipinfo数据类型 """ ls = self.proc.infolist() if prefix: ls = list(filter(lambda t: t.filename.startswith(prefix), ls)) if not zipinfo: ls = list(map(lambda x: x.filename, ls)) return ls def printdir(self): """Print a table of contents for the RAR file.""" return self.proc.printdir() def testrar(self): """Read all the files and check the CRC.""" return self.proc.testrar() def extract(self, member, path=None, pwd=None): """注意,如果写extract('word/document.xml', 'a'),那么提取出来的文件是在'a/word/document.xml' """ return self.proc.extract(member, path, pwd) def extractall(self, path=None, members=None, pwd=None): """Extract all members from the archive to the current working directory. `path' specifies a different directory to extract to. `members' is optional and must be a subset of the list returned by namelist(). """ return self.proc.extractall(path, members, pwd) def extractall2tempfolder(self): """将文件解压到一个临时文件夹,并返回临时文件夹目录""" if not self.tempfolder: self.tempfolder = tempfile.mkdtemp() self.proc.extractall(path=self.tempfolder) return self.tempfolder def clear_tempfolder(self): """删除创建的临时文件夹内容""" filesdel(self.tempfolder) def __enter__(self): """使用with ... as ...语法能自动建立解压目录和删除 注意:这里返回的不是PackFile对象,而是解压后的目录 """ path = self.extractall2tempfolder() return path def __exit__(self, exc_type, exc_val, exc_tb): self.clear_tempfolder()