Ejemplo n.º 1
0
def extract_rar(rar: rarfile.RarFile,
                rar_entry: rarfile.RarInfo,
                tmp_dir_name,
                passwd=None):
    """
    Extract rar entry from archive using unrar module,
    also realise smart behaviour - apply a list of predefined passwords to encrypted rar entry
    if passwd param is not set. Look at DEFAULT_ARC_PASSWORDS in configuration.
    :param rar: unrar.rarfile.RarFile instance
    :param rar_entry: unrar.rarfile.RarInfo instance
    :param tmp_dir_name: path to tmp dir (string)
    :param passwd: password string
    :return: Nothing
    """
    try:
        rar.extract(rar_entry.filename, tmp_dir_name, pwd=passwd)
        log.debug(
            f"===>Extracted:{rar_entry.filename} to {tmp_dir_name} without pwd"
        )
        return
    except (rarfile.BadRarFile,
            RuntimeError) as err:  # Only if no password given
        log.debug(
            f"===>Can't extract:{rar_entry.filename} to {tmp_dir_name} without pwd: {str(err)}"
        )
        if str(err).startswith('Bad header data') or \
            str(err).startswith('File is encrypted'):  # may be file is encrypted
            for passwd in cfg.DEFAULT_ARC_PASSWORDS:  # TODO - store passwords encrypted in db
                try:
                    rar.extract(rar_entry.filename, tmp_dir_name, pwd=passwd)
                    log.debug(
                        f"===>Extracted:{rar_entry.filename} to {tmp_dir_name} with pwd"
                    )
                    return
                except (rarfile.BadRarFile, RuntimeError) as e:  # Bad password
                    log.debug(
                        f"===>Can't extract:{rar_entry.filename} to {tmp_dir_name} with pwd: {str(e)}"
                    )
                    continue
            raise err
Ejemplo n.º 2
0
class PackFile:
    def __init__(self, file, mode=None):
        """
        :param file: 要处理的文件
        :param mode: 要处理的格式,不输入会有一套智能匹配算法
            'rar':
            'zip': docx后缀的,默认采用zip格式解压
        """
        # 1、确定压缩格式
        name, ext = os.path.splitext(file)
        ext = ext.lower()
        if not mode:
            if ext in ('.docx', '.zip'):
                mode = 'zip'
            elif ext == '.rar':
                mode = 'rar'
            else:
                dprint(ext)  # 从文件扩展名无法得知压缩格式
                raise ValueError
        self.mode = mode

        # 2、确定是用的解压“引擎”
        if mode == 'zip':
            self.proc = zipfile.ZipFile(file)
        elif mode == 'rar':
            try:
                from unrar.rarfile import RarFile
            except ModuleNotFoundError:
                dprint(
                )  # 缺少unrar模块,安装详见: https://blog.csdn.net/code4101/article/details/79328636
                raise ModuleNotFoundError
            self.proc = RarFile(file)
        # 3、解压文件夹目录,None表示还未解压
        self.tempfolder = None

    def open(self, member, pwd=None):
        """Return file-like object for 'member'.

           'member' may be a filename or a RarInfo object.
        """
        return self.proc.open(member, pwd)

    def read(self, member, pwd=None):
        """Return file bytes (as a string) for name."""
        return self.proc.read(member, pwd)

    def namelist(self):
        """>> self.namelist()  # 获得文件清单列表
             1           [Content_Types].xml
             2                   _rels/.rels
            ......
            20            word/fontTable.xml
            21              docProps/app.xml
        """
        return self.proc.namelist()

    def setpassword(self, pwd):
        """Set default password for encrypted files."""
        return self.proc.setpassword(pwd)

    def getinfo(self, name):
        """
        >> self.getinfo('word/document.xml')  # 获得某个文件的信息
        <ZipInfo filename='word/document.xml' compress_type=deflate file_size=140518 compress_size=10004>
        """
        return self.proc.getinfo(name)

    def infolist(self, prefix=None, zipinfo=True):
        """>> self.infolist()  # getinfo的多文件版本
             1           <ZipInfo filename='[Content_Types].xml' compress_type=deflate file_size=1495 compress_size=383>
             2                    <ZipInfo filename='_rels/.rels' compress_type=deflate file_size=590 compress_size=243>
            ......
            20            <ZipInfo filename='word/fontTable.xml' compress_type=deflate file_size=1590 compress_size=521>
            21               <ZipInfo filename='docProps/app.xml' compress_type=deflate file_size=720 compress_size=384>

            :param prefix:
                可以筛选文件的前缀,例如“word/”可以筛选出word目录下的
            :param zipinfo:
                返回的list每个元素是zipinfo数据类型
        """
        ls = self.proc.infolist()
        if prefix:
            ls = list(filter(lambda t: t.filename.startswith(prefix), ls))
        if not zipinfo:
            ls = list(map(lambda x: x.filename, ls))
        return ls

    def printdir(self):
        """Print a table of contents for the RAR file."""
        return self.proc.printdir()

    def testrar(self):
        """Read all the files and check the CRC."""
        return self.proc.testrar()

    def extract(self, member, path=None, pwd=None):
        """注意,如果写extract('word/document.xml', 'a'),那么提取出来的文件是在'a/word/document.xml'
        """
        return self.proc.extract(member, path, pwd)

    def extractall(self, path=None, members=None, pwd=None):
        """Extract all members from the archive to the current working
           directory. `path' specifies a different directory to extract to.
           `members' is optional and must be a subset of the list returned
           by namelist().
        """
        return self.proc.extractall(path, members, pwd)

    def extractall2tempfolder(self):
        """将文件解压到一个临时文件夹,并返回临时文件夹目录"""
        if not self.tempfolder:
            self.tempfolder = tempfile.mkdtemp()
            self.proc.extractall(path=self.tempfolder)
        return self.tempfolder

    def clear_tempfolder(self):
        """删除创建的临时文件夹内容"""
        filesdel(self.tempfolder)

    def __enter__(self):
        """使用with ... as ...语法能自动建立解压目录和删除
        注意:这里返回的不是PackFile对象,而是解压后的目录
        """
        path = self.extractall2tempfolder()
        return path

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.clear_tempfolder()