Esempio n. 1
0
    def base_test_failure_bad_zip(self):
        resolve_bundle_mock = MagicMock(return_value=(self.bundle_file_name,
                                                      self.bundle_file))
        stderr = MagicMock()
        bundle_open_mock = MagicMock(
            side_effect=BadZipFile('test bad zip error'))

        with patch('conductr_cli.resolver.resolve_bundle', resolve_bundle_mock), \
                patch('conductr_cli.conduct_load.open_bundle', bundle_open_mock):
            logging_setup.configure_logging(MagicMock(**self.default_args),
                                            err_output=stderr)
            result = conduct_load.load(MagicMock(**self.default_args))
            self.assertFalse(result)

        resolve_bundle_mock.assert_called_with(self.custom_settings,
                                               self.bundle_resolve_cache_dir,
                                               self.bundle_file,
                                               self.offline_mode)
        bundle_open_mock.assert_called_with(
            self.bundle_file_name, self.bundle_file,
            bundle_utils.conf(self.bundle_file))

        self.assertEqual(
            as_error(
                strip_margin(
                    """|Error: Problem with the bundle: test bad zip error
                                     |""")), self.output(stderr))
Esempio n. 2
0
    def test_parse_when_zipfile_fails(self, mock_file_parser, mock_zipfile):
        mock_file_parser.is_zip_file.return_value = True
        mock_file_parser.return_value.parse.return_value = any_file()
        mock_zipfile.side_effect = BadZipFile()

        with self.assertRaises(BadZipFile):
            ApkParser().parse("any-file-path", extended_processing=False)
Esempio n. 3
0
def __download_climate_observations_data(remote_file: str) -> bytes:

    try:
        zip_file = download_file_from_dwd(remote_file)
    except InvalidURL as e:
        raise InvalidURL(
            f"Error: the station data {remote_file} could not be reached."
        ) from e
    except Exception:
        raise FailedDownload(f"Download failed for {remote_file}")

    try:
        zip_file_opened = ZipFile(zip_file)

        # Files of archive
        archive_files = zip_file_opened.namelist()

        for file in archive_files:
            # If found file load file in bytes, close zipfile and return bytes
            if file.startswith(PRODUCT_FILE_IDENTIFIER):
                file_in_bytes = zip_file_opened.open(file).read()

                zip_file_opened.close()

                return file_in_bytes

        # If whatsoever no file was found and returned already throw exception
        raise ProductFileNotFound(
            f"The archive of {remote_file} does not hold a 'produkt' file.")

    except BadZipFile as e:
        raise BadZipFile(
            f"The archive of {remote_file} seems to be corrupted.") from e
Esempio n. 4
0
    def extract_zip(self):
        assert self.FILE_COUNT>0
        try:
            with zipfile.ZipFile(self.archive_path, "r") as zip:
                namelist = zip.namelist()
                print("namelist():", namelist)
                if len(namelist) != self.FILE_COUNT:
                    msg = (
                        "Wrong archive content?!?"
                        " There exists %i files, but it should exist %i."
                        "Existing names are: %r"
                    ) % (len(namelist), self.FILE_COUNT, namelist)
                    log.error(msg)
                    raise RuntimeError(msg)

                for filename in namelist:
                    content = zip.read(filename)
                    dst = self.file_rename(filename)

                    out_filename=os.path.join(self.ROM_PATH, dst)
                    with open(out_filename, "wb") as f:
                        f.write(content)

                    if dst == filename:
                        print("%r extracted" % out_filename)
                    else:
                        print("%r extracted to %r" % (filename, out_filename))

                    self.post_processing(out_filename)

        except BadZipFile as err:
            msg = "Error extracting archive %r: %s" % (self.archive_path, err)
            log.error(msg)
            raise BadZipFile(msg)
Esempio n. 5
0
    def open(self, zinfo):
        zef_file = self.fp

        if not zef_file:
            raise RuntimeError(
                "Attempt to read ZIP archive that was already closed")

        zef_file.seek(zinfo.header_offset, 0)

        # Skip the file header:
        fheader = zef_file.read(sizeFileHeader)
        if len(fheader) != sizeFileHeader:
            raise BadZipFile("Truncated file header")
        fheader = struct.unpack(structFileHeader, fheader)
        if fheader[_FH_SIGNATURE] != stringFileHeader:
            raise BadZipFile("Bad magic number for file header")

        fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
        if fheader[_FH_EXTRA_FIELD_LENGTH]:
            zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])

        if zinfo.flag_bits & 0x20:
            # Zip 2.7: compressed patched data
            raise NotImplementedError("compressed patched data (flag bit 5)")

        if zinfo.flag_bits & 0x40:
            # strong encryption
            raise NotImplementedError("strong encryption (flag bit 6)")

        if zinfo.flag_bits & _UTF8_EXTENSION_FLAG:
            # UTF-8 filename
            fname_str = fname.decode("utf-8")
        else:
            fname_str = fname.decode("cp437")

        if fname_str != zinfo.orig_filename:
            raise BadZipFile(
                'File name in directory %r and header %r differ.' %
                (zinfo.orig_filename, fname))

        if sys.version_info[:2] < (2, 7):
            return _ZipExtFile(zef_file, zinfo)
        elif sys.version_info[:2] < (3, 4) and sys.platform == 'win32':
            return ZipExtFile(zef_file, 'r', zinfo)
        else:
            return ZipExtFile(zef_file, 'r', zinfo, None, close_fileobj=False)
Esempio n. 6
0
    def load(self, filepath: PathLike, pwd: OptBytes = None) -> None:
        """Load Workspace from file.

        Args:
            filepath: String or :term:`path-like object`, that points to a valid
                workspace file. If the filepath points to a valid workspace
                file, then the class instance is initialized with a memory copy
                of the file. If the given file, however, does not exist, isn't a
                valid ZipFile, or does not contain a workspace configuration,
                respectively one of the errors FileNotFoundError, BadZipFile or
                BadWsFile is raised.
            pwd: Bytes representing password of workspace file.

        """
        # Initialize instance Variables, Buffer and buffered ZipFile
        self._changed = False
        self._path = env.expand(filepath)
        self._pwd = pwd
        self._buffer = BytesIO()
        self._file = ZipFile(self._buffer, mode='w')

        # Copy contents from ZipFile to buffered ZipFile
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", UserWarning)
            try:
                with ZipFile(self.path, mode='r') as fh:
                    for zinfo in fh.infolist():
                        data = fh.read(zinfo, pwd=pwd)
                        # TODO ([email protected]): The zipfile standard
                        # module currently does not support encryption in write
                        # mode of new ZipFiles. See:
                        # https://docs.python.org/3/library/zipfile.html
                        # When support is provided, the below line for writing
                        # files shall be replaced by:
                        # self._file.writestr(zinfo, data, pwd=pwd)
                        self._file.writestr(zinfo, data)
            except FileNotFoundError as err:
                raise FileNotFoundError(
                    f"file '{self.path}' does not exist") from err
            except BadZipFile as err:
                raise BadZipFile(
                    f"file '{self.path}' is not a valid ZIP file") from err

        # Try to open and load workspace configuration from buffer
        structure = {
            'dc': self._get_attr_types(group='dc'),
            'hooks': self._get_attr_types(category='hooks')}
        try:
            with self.open(self._config_file) as file:
                cfg = inifile.load(file, structure=structure)
        except KeyError as err:
            raise BadWsFile(
                f"workspace '{self.path}' is not valid: "
                f"file '{self._config_file}' could not be loaded") from err

        # Link configuration
        self._set_attr_values(cfg.get('dc', {}), group='dc') # type: ignore
Esempio n. 7
0
    def perform_download():
        r = requests.get(queue_base + 'task/%s/artifacts/%s' % (task_id, artifact_name), stream=True)

        with open(artifact_path, 'wb') as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)

        if artifact_path.endswith('.zip') and not is_zipfile(artifact_path):
            raise BadZipFile('File is not a zip file')
Esempio n. 8
0
    def unzip(self, overwrite: bool = False):

        if self.zip_content and not overwrite:
            raise FileExistsError(self.tmpdir.abspath())

        logger.debug('unzipping miz to temp dir')

        try:

            with ZipFile(self.miz_path.abspath()) as zip_file:

                logger.debug('reading infolist')

                self.zip_content = [f.filename for f in zip_file.infolist()]

                for item in zip_file.infolist():  # not using ZipFile.extractall() for security reasons
                    assert isinstance(item, ZipInfo)

                    logger.debug('unzipping item: {}'.format(item))

                    try:
                        zip_file.extract(item, self.tmpdir.abspath())
                    except:
                        logger.error('failed to extract archive member: {}'.format(item))
                        raise

        except BadZipFile:
            raise BadZipFile(self.miz_path.abspath())

        except:
            logger.exception('error while unzipping miz file: {}'.format(self.miz_path.abspath()))
            raise

        logger.debug('checking miz content')

        # noinspection PyTypeChecker
        for miz_item in map(
                join,
                [self.tmpdir.abspath()],
                [
                    'mission',
                    'options',
                    'warehouses',
                    'l10n/DEFAULT/dictionary',
                    'l10n/DEFAULT/mapResource'
                ]):

            if not exists(miz_item):
                logger.error('missing file in miz: {}'.format(miz_item))
                raise FileNotFoundError(miz_item)

        for filename in self.zip_content:
            p = self.tmpdir.joinpath(filename)
            if not p.exists():
                raise FileNotFoundError(p.abspath())

        logger.debug('all files have been found, miz successfully unzipped')
Esempio n. 9
0
    def perform_download() -> None:
        r = requests.get(url, stream=True)
        r.raise_for_status()

        with open(path, "wb") as f:
            for chunk in r.iter_content(chunk_size=1048576):
                f.write(chunk)

        if path.endswith(".zip") and not is_zipfile(path):
            raise BadZipFile("File is not a zip file")
Esempio n. 10
0
 def _open_zip(self, filepath: str) -> ZipFile:
     try:
         return ZipFile(filepath, mode='r')
     except FileNotFoundError:
         raise FileNotFoundError(f'Zip archive {filepath} was not found')
     except BadZipFile:
         raise BadZipFile(f'File: {filepath} is not ZIP archive')
     except LargeZipFile:
         raise LargeZipFile(
             f'ZIP file {filepath} Too large for open, MAX size 4GB')
Esempio n. 11
0
    def perform_download():
        r = requests.get(url, stream=True)
        r.raise_for_status()

        with open(artifact_path, "wb") as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)

        if artifact_path.endswith(".zip") and not is_zipfile(artifact_path):
            raise BadZipFile("File is not a zip file")
Esempio n. 12
0
    def perform_download():
        r = requests.get(
            queue_base + "task/{}/artifacts/{}".format(task_id, artifact_name),
            stream=True,
        )

        r.raise_for_status()

        with open(artifact_path, "wb") as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)

        if artifact_path.endswith(".zip") and not is_zipfile(artifact_path):
            raise BadZipFile("File is not a zip file")
Esempio n. 13
0
 def _extract_zip(self) -> None:
     """ 解压缩当前文档的 zip 文件到 work_dir,以 guid 为子文件夹名称
     """
     self.note_extract_dir = self.documents_dir.joinpath(self.guid)
     # 如果目标文件夹已经存在,就不解压了
     if self.note_extract_dir.exists():
         # logger.info(f'{self.note_extract_dir!s} |{self.title}| 已经存在,跳过。')
         return
     try:
         zip_file = ZipFile(self.note_file)
         zip_file.extractall(self.note_extract_dir)
     except BadZipFile as e:
         msg = f'ZIP 文件错误,可能是需要密码。 {self.note_file!s} |{self.title}|'
         raise BadZipFile(msg)
Esempio n. 14
0
def _download_climate_observations_data_parallel(
        remote_file: Union[str, Path]) -> BytesIO:
    """
    This function downloads the station data for which the link is
    provided by the 'select_dwd' function. It checks the shortened filepath (just
    the zipfile) for its parameters, creates the full filepath and downloads the
    file(s) according to the set up folder.

    Args:
        remote_file: contains path to file that should be downloaded
            and the path to the folder to store the files

    Returns:
        stores data on local file system

    """
    try:
        zip_file = download_file_from_dwd(remote_file,
                                          DWDCDCBase.CLIMATE_OBSERVATIONS)
    except InvalidURL as e:
        raise InvalidURL(
            f"Error: the station data {remote_file} couldn't be reached."
        ) from e
    except Exception:
        raise FailedDownload(f"Download failed for {remote_file}")

    try:
        zip_file_opened = ZipFile(zip_file)

        # Files of archive
        archive_files = zip_file_opened.namelist()

        for file in archive_files:
            # If found file load file in bytes, close zipfile and return bytes
            if file.startswith(PRODUCT_FILE_IDENTIFIER):
                file_in_bytes = BytesIO(zip_file_opened.open(file).read())

                zip_file_opened.close()

                return file_in_bytes

        # If whatsoever no file was found and returned already throw exception
        raise ProductFileNotFound(
            f"The archive of {remote_file} does not hold a 'produkt' file.")

    except BadZipFile as e:
        raise BadZipFile(
            f"The archive of {remote_file} seems to be corrupted.") from e
Esempio n. 15
0
    def extract_zip(self):
        assert self.FILE_COUNT>0
        try:
            with zipfile.ZipFile(self.archive_path, "r") as zip:
                content = zip.read("ROMS/6809/EXT_BASIC_NO_USING.hex")
                out_filename=os.path.join(self.ROM_PATH, "EXT_BASIC_NO_USING.hex")
                with open(out_filename, "wb") as f:
                    f.write(content)

                print("%r extracted" % out_filename)
                self.post_processing(out_filename)

        except BadZipFile as err:
            msg = "Error extracting archive %r: %s" % (self.archive_path, err)
            log.error(msg)
            raise BadZipFile(msg)
Esempio n. 16
0
 def convert_to_cbz(self) -> None:
     """
     Takes all of the previously downloaded pages and compresses them in
     a .cbz file, erasing them afterwards.
     """
     with ZipFile("{}.cbz".format(self.name), mode="a") as cbz_file:
         images = os.listdir(self.name)
         for image in images:
             image_location = "{}/{}".format(self.name, image)
             cbz_file.write(image_location, image)
             os.remove(image_location)
         os.rmdir(self.name)
         if cbz_file.testzip() is not None:
             raise BadZipFile(
                 "Error while testing the archive; it might be corrupted."
             )
Esempio n. 17
0
 def _add(self, zipinfo):
     d = self
     if zipinfo.filename.endswith('/'):  # directory
         parts = zipinfo.filename.rstrip('/').split('/')
         filename = ''
         for part in parts:
             filename += part + '/'
             d = d.directories.setdefault(part, ZipTree(filename))
     else:  # file
         parts = zipinfo.filename.split('/')
         basename = parts.pop()
         for part in parts:
             d = d.directories.setdefault(part, ZipTree())
         if basename not in d.files:
             d.files[basename] = zipinfo
         else:
             raise BadZipFile('duplicate file entry in zipfile')
Esempio n. 18
0
 def clean_file(self):
     data = self.cleaned_data["file"]
     try:
         zipfile = ZipFile(data)
         interesting_files = {
             x
             for x in zipfile.namelist()
             if x.lower()[-4:] in (".shp", ".shx", ".dbf")
         }
         extensions = sorted([x.lower()[-4:] for x in interesting_files])
         if extensions != [".dbf", ".shp", ".shx"]:
             raise BadZipFile()
     except BadZipFile:
         raise forms.ValidationError(
             "This is not a zip file, or it doesn't contain exactly one .shp, .shx "
             "and .dbf file.")
     return data
Esempio n. 19
0
    def unzip(self, overwrite: bool = False):
        """
        Flattens a MIZ file into the temp dir

        Args:
            overwrite: allow overwriting exiting files

        """

        if self.zip_content and not overwrite:
            raise FileExistsError(str(self.temp_dir))

        LOGGER.debug('unzipping miz to temp dir')

        try:

            with ZipFile(str(self.miz_path)) as zip_file:

                LOGGER.debug('reading infolist')

                self.zip_content = [f.filename for f in zip_file.infolist()]

                self._extract_files_from_zip(zip_file)

        except BadZipFile:
            raise BadZipFile(str(self.miz_path))

        except:  # noqa: E722
            LOGGER.exception('error while unzipping miz file: %s',
                             self.miz_path)
            raise

        LOGGER.debug('checking miz content')

        # noinspection PyTypeChecker
        for miz_item in [
                'mission', 'options', 'warehouses', 'l10n/DEFAULT/dictionary',
                'l10n/DEFAULT/mapResource'
        ]:
            if not Path(self.temp_dir.joinpath(miz_item)).exists():
                LOGGER.error('missing file in miz: %s', miz_item)
                raise FileNotFoundError(miz_item)

        self._check_extracted_content()

        LOGGER.debug('all files have been found, miz successfully unzipped')
Esempio n. 20
0
def most_recent_tfidf():
    list_of_files = glob.glob(
        res_path + '/models/tfidf/tfidf_matrix_*.npz'
    )  # * means all if need specific format then *.csv
    latest_file = max(list_of_files, key=os.path.getmtime)
    retries = 0
    while True:
        retries += 1
        try:
            npz = scipy.sparse.load_npz(latest_file)
            break
        except (AttributeError, BadZipFile) as e:
            print('Issue when loading tfidf matrix, retrying in 10 sec', e)
            sleep(10)
        if retries > 5:
            raise BadZipFile(
                'Issue when loading tfidf matrix, over limit retry counter')
    return npz
Esempio n. 21
0
    def load(self, filepath: PathLike, pwd: OptBytes = None) -> None:
        """Load Workspace from file.

        Args:
            filepath: String or :term:`path-like object`, that points to a valid
                ZipFile file. If the filepath points to a valid ZipFile, then
                the class instance is initialized with a memory copy of the
                file. If the given file, however, does not exist or isn't a
                valid ZipFile respectively one of the errors FileNotFoundError
                or BadZipFile is raised.
            pwd: Bytes representing password of ZipFile.

        """
        # Initialize instance Variables, Buffer and buffered ZipFile
        self._changed = False
        self._path = env.expand(filepath)
        self._pwd = pwd
        self._buffer = io.BytesIO()
        self._file = ZipFile(self._buffer, mode='w')

        if not self._path:
            raise ValueError('')  # TODO

        # Copy contents from ZipFile to buffered ZipFile
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", UserWarning)
            try:
                with ZipFile(self._path, mode='r') as fh:
                    for zinfo in fh.infolist():
                        data = fh.read(zinfo, pwd=pwd)
                        # TODO ([email protected]): The zipfile standard
                        # module currently does not support encryption in write
                        # mode of new ZipFiles. See:
                        # https://docs.python.org/3/library/zipfile.html
                        # When support is provided, the below line for writing
                        # files shall be replaced by:
                        # self._file.writestr(zinfo, data, pwd=pwd)
                        self._file.writestr(zinfo, data)
            except FileNotFoundError as err:
                raise FileNotFoundError(
                    f"file '{self.path}' does not exist") from err
            except BadZipFile as err:
                raise BadZipFile(
                    f"file '{self.path}' is not a valid ZIP file") from err
Esempio n. 22
0
def __download_climate_observations_data(remote_file: str) -> bytes:

    try:
        file = download_file(remote_file, ttl=CacheExpiry.FIVE_MINUTES)
    except InvalidURL as e:
        raise InvalidURL(f"Error: the station data {remote_file} could not be reached.") from e
    except Exception:
        raise FailedDownload(f"Download failed for {remote_file}")

    try:
        zfs = ZipFileSystem(file)
    except BadZipFile as e:
        raise BadZipFile(f"The archive of {remote_file} seems to be corrupted.") from e

    product_file = zfs.glob("produkt*")

    if len(product_file) != 1:
        raise ProductFileNotFound(f"The archive of {remote_file} does not hold a 'produkt' file.")

    return zfs.open(product_file[0]).read()
Esempio n. 23
0
    def extract_zip(self):
        assert self.FILE_COUNT > 0
        try:
            with zipfile.ZipFile(self.archive_path, "r") as zip:
                namelist = zip.namelist()
                print("namelist():", namelist)
                if namelist != self.ARCHIVE_NAMES:
                    msg = ("Wrong archive content?!?"
                           " namelist should be: %r") % self.ARCHIVE_NAMES
                    log.error(msg)
                    raise RuntimeError(msg)

                zip.extractall(path=self.ROM_PATH)

        except BadZipFile as err:
            msg = "Error extracting archive %r: %s" % (self.archive_path, err)
            log.error(msg)
            raise BadZipFile(msg)

        hex2bin(src=os.path.join(self.ROM_PATH, "ExBasROM.hex"),
                dst=self.rom_path,
                verbose=False)
Esempio n. 24
0
    def get_zip_infos(self, *filenames):
        """Read in the table of contents for the ZIP file."""
        fp = self.fp
        max_file_count = self.max_file_count

        if not fp:
            raise RuntimeError(
                "Attempt to read ZIP archive that was already closed")

        filenames = set(filenames)
        if len(filenames) == 0:
            return

        try:
            endrec = _EndRecData(fp)
        except OSError:
            raise BadZipFile("File is not a zip file")
        if not endrec:
            raise BadZipFile("File is not a zip file")

        size_cd = endrec[_ECD_SIZE]  # bytes in central directory
        offset_cd = endrec[_ECD_OFFSET]  # offset of central directory

        # "concat" is zero, unless zip was concatenated to another file
        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
            # If Zip64 extension structures are present, account for them
            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)

        # start_dir:  Position of start of central directory
        start_dir = offset_cd + concat
        fp.seek(start_dir, 0)
        data = fp.read(size_cd)
        fp = BytesIO(data)
        total = 0
        file_count = 0
        while total < size_cd:
            centdir = fp.read(sizeCentralDir)
            if len(centdir) != sizeCentralDir:
                raise BadZipFile("Truncated central directory")
            centdir = struct.unpack(structCentralDir, centdir)
            if centdir[_CD_SIGNATURE] != stringCentralDir:
                raise BadZipFile("Bad magic number for central directory")
            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
            flags = centdir[5]
            if flags & _UTF8_EXTENSION_FLAG:
                # UTF-8 file names extension
                filename = filename.decode('utf-8')
            else:
                # Historical ZIP filename encoding
                filename = filename.decode('cp437')
            # Create ZipInfo instance to store file information
            x = ZipInfo(filename)
            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
            (x.create_version, x.create_system, x.extract_version, x.reserved,
             x.flag_bits, x.compress_type, t, d, x.CRC, x.compress_size,
             x.file_size) = centdir[1:12]
            if x.extract_version > MAX_EXTRACT_VERSION:
                raise NotImplementedError("zip file version %.1f" %
                                          (x.extract_version / 10))
            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
            # Convert date/time code to (year, month, day, hour, min, sec)
            x._raw_time = t
            x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F, t >> 11,
                           (t >> 5) & 0x3F, (t & 0x1F) * 2)

            x._decodeExtra()
            x.header_offset = x.header_offset + concat

            # update total bytes read from central directory
            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] +
                     centdir[_CD_EXTRA_FIELD_LENGTH] +
                     centdir[_CD_COMMENT_LENGTH])

            file_count += 1
            if max_file_count is not None and file_count > max_file_count:
                raise TooManyFiles('Too many files in egg')

            if x.filename in filenames:
                filenames.discard(x.filename)
                yield x

            if len(filenames) == 0:
                return
Esempio n. 25
0
    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
        """
        Returns file-like object for 'name'.

        @param      name    is a string for the file name within the ZIP file, or a ZipInfo
                            object.
        @param      mode    should be 'r' to read a file already in the ZIP file, or 'w' to
                            write to a file newly added to the archive.
        @param      pwd     is the password to decrypt files (only used for reading).

        When writing, if the file size is not known in advance but may exceed
        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
        files.  If the size is known in advance, it is best to pass a ZipInfo
        instance for name, with zinfo.file_size set.
        """
        if mode not in {"r", "w"}:
            raise ValueError('open() requires mode "r" or "w"')
        if pwd and not isinstance(pwd, bytes):
            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
        if pwd and (mode == "w"):
            raise ValueError("pwd is only supported for reading files")
        if not self.fp:
            raise ValueError(
                "Attempt to use ZIP archive that was already closed")

        # Make sure we have an info object
        if isinstance(name, ZipInfo):
            # 'name' is already an info object
            zinfo = name
        elif mode == 'w':
            zinfo = ZipInfo(name)
            zinfo.compress_type = self.compression
        else:
            # Get info object for name
            zinfo = self.getinfo(name)

        if mode == 'w':
            return self._open_to_write(zinfo, force_zip64=force_zip64)

        if hasattr(self, "_writing") and self._writing:
            raise ValueError("Can't read from the ZIP file while there "
                             "is an open writing handle on it. "
                             "Close the writing handle before trying to read.")

        # Open for reading:
        self._fileRefCnt += 1
        if sys.version_info[:2] <= (3, 5):
            zef_file = _SharedFile(  # pylint: disable=E1120
                self.fp, zinfo.header_offset, self._fpclose, self._lock)
        zef_file = _SharedFile(
            self.fp, zinfo.header_offset, self._fpclose, self._lock,
            lambda: hasattr(self, "_writing") and self._writing)
        try:
            # Skip the file header:
            fheader = zef_file.read(sizeFileHeader)
            if len(fheader) != sizeFileHeader:
                raise BadZipFile("Truncated file header")
            fheader = struct.unpack(structFileHeader, fheader)
            if fheader[_FH_SIGNATURE] != stringFileHeader:
                raise BadZipFile("Bad magic number for file header")

            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
            if fheader[_FH_EXTRA_FIELD_LENGTH]:
                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])

            if zinfo.flag_bits & 0x20:
                # Zip 2.7: compressed patched data
                raise NotImplementedError(
                    "compressed patched data (flag bit 5)")

            if zinfo.flag_bits & 0x40:
                # strong encryption
                raise NotImplementedError("strong encryption (flag bit 6)")

            if zinfo.flag_bits & 0x800:
                # UTF-8 filename
                fname_str = fname.decode("utf-8")
            else:
                fname_str = fname.decode("cp437")

            if sys.platform.startswith("win"):
                if fname_str.replace("\\", "/") != zinfo.orig_filename.replace(
                        "\\", "/"):
                    raise BadZipFile(
                        'File name in directory %r and header %r differ.' %
                        (zinfo.orig_filename, fname))
            else:
                if fname_str != zinfo.orig_filename:
                    raise BadZipFile(
                        'File name in directory %r and header %r differ.' %
                        (zinfo.orig_filename, fname))

            # check for encrypted flag & handle password
            is_encrypted = zinfo.flag_bits & 0x1
            zd = None
            if is_encrypted:
                if not pwd:
                    pwd = self.pwd
                if not pwd:
                    raise RuntimeError("File %r is encrypted, password "
                                       "required for extraction" % name)

                zd = _ZipDecrypter(pwd)
                # The first 12 bytes in the cypher stream is an encryption header
                #  used to strengthen the algorithm. The first 11 bytes are
                #  completely random, while the 12th contains the MSB of the CRC,
                #  or the MSB of the file time depending on the header type
                #  and is used to check the correctness of the password.
                header = zef_file.read(12)
                h = list(map(zd, header[0:12]))
                if zinfo.flag_bits & 0x8:
                    # compare against the file type from extended local headers
                    check_byte = (zinfo._raw_time >> 8) & 0xff
                else:
                    # compare against the CRC otherwise
                    check_byte = (zinfo.CRC >> 24) & 0xff
                if h[11] != check_byte:
                    raise RuntimeError("Bad password for file %r" % name)

            return ZipExtFile(zef_file, mode, zinfo, zd, True)
        except Exception:
            zef_file.close()
            raise
    def decompress(self, chunk):
        chunkbuffer = BytesBuffer()
        chunkbuffer.write(chunk)
        output = BytesBuffer()
        while not self.finished and len(chunkbuffer) > 0:
            if not self.current_fheader:
                # Read the first part of the header (constant size of sizeFileHeader).
                if len(self.header_buf) <= sizeFileHeader:
                    bytes_remaining = sizeFileHeader - len(self.header_buf)
                    self.header_buf.write(chunkbuffer.read(bytes_remaining))

                if len(self.header_buf) >= sizeFileHeader:
                    fheader = self.header_buf.read(sizeFileHeader)
                    self.current_fheader = struct.unpack(
                        structFileHeader, fheader)
                    if self.current_fheader[_FH_SIGNATURE] == stringCentralDir:
                        # We've reached the central directory. This means that we've finished iterating through
                        # all entries in the zip file. We can do this check because the file header signature
                        # and central directory signature are stored in the same spot (index 0) and with the same format.
                        self.finished = True
                        break
                    if self.current_fheader[_FH_SIGNATURE] != stringFileHeader:
                        raise BadZipFile("Bad magic number for file header")
                    self.current_file_compressed_size = self.current_fheader[
                        _FH_COMPRESSED_SIZE]
                    self.current_file_uncompressed_size = self.current_fheader[
                        _FH_UNCOMPRESSED_SIZE]
                    # Finished reading the first part of the header.
                    self.buf.write(fheader)

            if self.current_fheader and not self.current_zipinfo:
                # Read the second part of the header (variable size sizeFileHeaderExtra obtained by reading self.current_fheader).
                sizeFileHeader2 = (
                    self.current_fheader[_FH_FILENAME_LENGTH] +
                    self.current_fheader[_FH_EXTRA_FIELD_LENGTH])
                if len(self.header_buf_2) <= sizeFileHeader2:
                    bytes_remaining = sizeFileHeader2 - len(self.header_buf_2)
                    self.header_buf_2.write(chunkbuffer.read(bytes_remaining))

                if len(self.header_buf_2) >= sizeFileHeader2:
                    fheader_2 = self.header_buf_2.read(sizeFileHeader2)
                    # Finished reading the entire header.
                    self.buf.write(fheader_2)

                    with StreamingZipFile(self.buf) as zf:
                        # Header finished; write the tarfile header now.
                        zinfo = zf.next()
                        self.current_zipinfo = zinfo
                        tarinfo = tarfile.TarInfo(name=zinfo.filename)
                        tarinfo.size = zinfo.file_size
                        tarinfo.mode = 0o755  # ZipFile doesn't store permissions, so we just set it to a sensible default.
                        tarinfo.type = tarfile.DIRTYPE if zinfo.is_dir(
                        ) else tarfile.REGTYPE
                        tarinfo.mtime = time.mktime(
                            datetime.datetime(*zinfo.date_time).timetuple()
                        )  # From https://fossies.org/linux/littleutils/scripts/zip2tarcat.in
                        self.output.addfile(tarinfo)

            if self.current_zipinfo:
                # Header finished; write (up to) the entire body of the current member.
                bytes_remaining = (self.current_file_compressed_size -
                                   self.current_file_compressed_bytes_obtained)
                remaining = chunkbuffer.read(bytes_remaining)
                self.current_file_compressed_bytes_obtained += len(remaining)
                self.buf.write(remaining)

                # Extract what's remaining from the zipfile and write it to the tarfile.
                if not self.current_zef:
                    with StreamingZipFile(self.buf) as zf:
                        self.current_zef = zf.open(self.current_zipinfo)

                # ZipExtFile._read1(n) reads up to n compressed bytes. We use this rather than ZipExtFile.read(n), which takes in uncompressed
                # bytes, because we only know how much compressed bytes we have added so far.
                uncompressed = self.current_zef._read1(len(remaining))
                self.output.fileobj.write(uncompressed)
                self.output.offset += len(uncompressed)

                if self.current_file_compressed_bytes_obtained == self.current_file_compressed_size:
                    # We've obtained the entire file.
                    # Write the remainder of the block, if needed, and then reset the current information.
                    # This code for writing the remainder of the block is taken from
                    # https://github.com/python/cpython/blob/9d2c2a8e3b8fe18ee1568bfa4a419847b3e78575/Lib/tarfile.py#L2008-L2012.
                    blocks, remainder = divmod(
                        self.current_file_compressed_size, tarfile.BLOCKSIZE)
                    if remainder > 0:
                        assert self.output.fileobj is not None
                        self.output.fileobj.write(
                            tarfile.NUL * (tarfile.BLOCKSIZE - remainder))
                        blocks += 1
                    self.output.offset += blocks * tarfile.BLOCKSIZE  # type: ignore
                    # Reset the current information.
                    self.reset_info()

        return output.read()
Esempio n. 27
0
    def next(self):
        """Return the next member of the archive as a ZipInfo object. Returns
        None if there is no more available. This method is analogous to
        TarFile.next().

        We construct a ZipInfo object using the information stored in the next file header.
        The logic here is based on the implementation of ZipFile._RealGetContents(), which
        constructs a ZipInfo object from information in a central directory file header, but
        modified to work with the file-header-specific struct
        (for the implementation of ZipFile._RealGetContents(), see
        https://github.com/python/cpython/blob/048f54dc75d51e8a1c5822ab7b2828295192aaa5/Lib/zipfile.py#L1316).
        """
        fp = self.fp

        # First, advance to the next header, if needed.
        fp.read(self._next_header_pos - fp.tell())

        # Read the next header.
        fheader = fp.read(sizeFileHeader)
        if len(fheader) != sizeFileHeader:
            raise BadZipFile("Truncated file header")
        fheader = struct.unpack(structFileHeader, fheader)
        if fheader[_FH_SIGNATURE] == stringCentralDir:
            # We've reached the central directory. This means that we've finished iterating through
            # all entries in the zip file. We can do this check because the file header signature
            # and central directory signature are stored in the same spot (index 0) and with the same format.
            self._loaded = True
            return None
        if fheader[_FH_SIGNATURE] != stringFileHeader:
            raise BadZipFile("Bad magic number for file header")
        filename = fp.read(fheader[_FH_FILENAME_LENGTH])
        flags = fheader[_FH_GENERAL_PURPOSE_FLAG_BITS]
        if flags & 0x800:
            # UTF-8 file names extension
            filename = filename.decode('utf-8')
        else:
            # Historical ZIP filename encoding
            filename = filename.decode('cp437')
        # Create ZipInfo instance to store file information
        x = ZipInfo(filename)
        x.extra = fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
        x.header_offset = self._next_header_pos

        # The file header stores nearly all the same information needed for ZipInfo as what the
        # central directory file header stores, except for a couple of missing fields.
        # We just set them to 0 here.
        x.comment = 0
        x.create_version, x.create_system = 0, 0
        x.volume, x.internal_attr, x.external_attr = 0, 0, 0

        (x.extract_version, x.reserved, x.flag_bits, x.compress_type, t, d,
         x.CRC, x.compress_size, x.file_size) = fheader[1:10]
        if x.extract_version > MAX_EXTRACT_VERSION:
            raise NotImplementedError("zip file version %.1f" %
                                      (x.extract_version / 10))

        # Convert date/time code to (year, month, day, hour, min, sec)
        # This comes from the original cpython code.
        x._raw_time = t
        x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F, t >> 11,
                       (t >> 5) & 0x3F, (t & 0x1F) * 2)

        x._decodeExtra()
        self.filelist.append(x)
        self.NameToInfo[x.filename] = x
        self._next_header_pos = (fp.tell() + x.compress_size
                                 )  # Beginning of the next file's header.
        return x