Ejemplo n.º 1
0
class PackFile:
    def __init__(self, file, mode=None):
        """
        :param file: 要处理的文件
        :param mode: 要处理的格式,不输入会有一套智能匹配算法
            'rar':
            'zip': docx后缀的,默认采用zip格式解压
        """
        # 1、确定压缩格式
        name, ext = os.path.splitext(file)
        ext = ext.lower()
        if not mode:
            if ext in ('.docx', '.zip'):
                mode = 'zip'
            elif ext == '.rar':
                mode = 'rar'
            else:
                dprint(ext)  # 从文件扩展名无法得知压缩格式
                raise ValueError
        self.mode = mode

        # 2、确定是用的解压“引擎”
        if mode == 'zip':
            self.proc = zipfile.ZipFile(file)
        elif mode == 'rar':
            try:
                from unrar.rarfile import RarFile
            except ModuleNotFoundError:
                dprint(
                )  # 缺少unrar模块,安装详见: https://blog.csdn.net/code4101/article/details/79328636
                raise ModuleNotFoundError
            self.proc = RarFile(file)
        # 3、解压文件夹目录,None表示还未解压
        self.tempfolder = None

    def open(self, member, pwd=None):
        """Return file-like object for 'member'.

           'member' may be a filename or a RarInfo object.
        """
        return self.proc.open(member, pwd)

    def read(self, member, pwd=None):
        """Return file bytes (as a string) for name."""
        return self.proc.read(member, pwd)

    def namelist(self):
        """>> self.namelist()  # 获得文件清单列表
             1           [Content_Types].xml
             2                   _rels/.rels
            ......
            20            word/fontTable.xml
            21              docProps/app.xml
        """
        return self.proc.namelist()

    def setpassword(self, pwd):
        """Set default password for encrypted files."""
        return self.proc.setpassword(pwd)

    def getinfo(self, name):
        """
        >> self.getinfo('word/document.xml')  # 获得某个文件的信息
        <ZipInfo filename='word/document.xml' compress_type=deflate file_size=140518 compress_size=10004>
        """
        return self.proc.getinfo(name)

    def infolist(self, prefix=None, zipinfo=True):
        """>> self.infolist()  # getinfo的多文件版本
             1           <ZipInfo filename='[Content_Types].xml' compress_type=deflate file_size=1495 compress_size=383>
             2                    <ZipInfo filename='_rels/.rels' compress_type=deflate file_size=590 compress_size=243>
            ......
            20            <ZipInfo filename='word/fontTable.xml' compress_type=deflate file_size=1590 compress_size=521>
            21               <ZipInfo filename='docProps/app.xml' compress_type=deflate file_size=720 compress_size=384>

            :param prefix:
                可以筛选文件的前缀,例如“word/”可以筛选出word目录下的
            :param zipinfo:
                返回的list每个元素是zipinfo数据类型
        """
        ls = self.proc.infolist()
        if prefix:
            ls = list(filter(lambda t: t.filename.startswith(prefix), ls))
        if not zipinfo:
            ls = list(map(lambda x: x.filename, ls))
        return ls

    def printdir(self):
        """Print a table of contents for the RAR file."""
        return self.proc.printdir()

    def testrar(self):
        """Read all the files and check the CRC."""
        return self.proc.testrar()

    def extract(self, member, path=None, pwd=None):
        """注意,如果写extract('word/document.xml', 'a'),那么提取出来的文件是在'a/word/document.xml'
        """
        return self.proc.extract(member, path, pwd)

    def extractall(self, path=None, members=None, pwd=None):
        """Extract all members from the archive to the current working
           directory. `path' specifies a different directory to extract to.
           `members' is optional and must be a subset of the list returned
           by namelist().
        """
        return self.proc.extractall(path, members, pwd)

    def extractall2tempfolder(self):
        """将文件解压到一个临时文件夹,并返回临时文件夹目录"""
        if not self.tempfolder:
            self.tempfolder = tempfile.mkdtemp()
            self.proc.extractall(path=self.tempfolder)
        return self.tempfolder

    def clear_tempfolder(self):
        """删除创建的临时文件夹内容"""
        filesdel(self.tempfolder)

    def __enter__(self):
        """使用with ... as ...语法能自动建立解压目录和删除
        注意:这里返回的不是PackFile对象,而是解压后的目录
        """
        path = self.extractall2tempfolder()
        return path

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.clear_tempfolder()