Exemple #1
0
def apk2dex(src, dst, verbose=True):
    """
    Extract the classes.dex out of the apk and rename it to the apk filename.

    :param str src: source apk file
    :param str dst: destination folder to unzip
    :param bool verbose: Whether to print message.
    :return: None
    """
    target = "classes.dex"
    try:
        if not os.path.isdir(dst):
            os.makedirs(dst)
    except:
        raise ValueError("Incorrect output folder specified.")
    try:
        archive = zipfile.ZipFile(src)
        found = False
        for file in archive.namelist():
            if file == target:
                archive.extract(file, dst)
                found = True
                break
        if found:
            if verbose:
                print("Extracted file {}".format(src))
            os.rename(os.path.join(dst, target),
                      os.path.join(dst,
                                   src.split("/")[-1][:-4] + ".dex"))
        else:
            raise zipfile.BadZipFile(
                "No classes.dex found in file {}".format(src))
    except:
        raise zipfile.BadZipFile("Invalid or corrupted file {}".format(src))
Exemple #2
0
    def to_internal_value(self, data):
        data = super().to_internal_value(data)

        # Ensure the file type is one we support.
        if not data.name.endswith(VALID_SOURCE_EXTENSIONS):
            error_msg = (
                'Unsupported file type, please upload an archive file ({extensions}).'
            )
            raise exceptions.ValidationError(
                error_msg.format(
                    extensions=(', '.join(VALID_SOURCE_EXTENSIONS))))

        # Check inside to see if the file extension matches the content.
        try:
            _, ext = os.path.splitext(data.name)
            if ext == '.zip':
                # testzip() returns None if there are no broken CRCs.
                if SafeZip(data).zip_file.testzip() is not None:
                    raise zipfile.BadZipFile()
            else:
                # For tar files we need to do a little more work.
                mode = 'r:bz2' if ext == '.bz2' else 'r:gz'
                with tarfile.open(mode=mode, fileobj=data) as archive:
                    for member in archive.getmembers():
                        archive_member_validator(archive, member)
        except (zipfile.BadZipFile, tarfile.ReadError, OSError, EOFError):
            raise exceptions.ValidationError('Invalid or broken archive.')

        return data
    def extract3DAsset(self, zippath: str, dstdir: str, classes: list,
                       name: str):
        """
        Extract textures, preview from Quixel 3d asset.
        Rename all textures by type. Rename preview to "preview".
        Extract some information from json info file in archive.

        :param zippath: path to Quixel 3d zip archive
        :param dstdir: destination folder to extract
        :param classes: list of classes to which the asset belongs
        :param name: name of asset (it will be placed in the "info.json" file)
        """

        if not ZF.is_zipfile(zippath):
            raise ZF.BadZipFile(zippath + " is not a zip file")

        zobj = ZF.ZipFile(zippath, "r")
        info = self.getInfoFromZip(zobj, classes, name)
        textureMap = self.textureMapFromZip(zobj)
        folder = os.path.join(dstdir, str(info["id"]))

        if not os.path.exists(folder):
            os.makedirs(folder)

        for zname, textype in textureMap:
            extension = os.path.splitext(zname)[-1]
            zobj.extract(zname, folder)
            os.rename(src=os.path.join(folder, zname),
                      dst=os.path.join(folder, textype + extension))

        self.extractPreviewFromZip(zobj, folder, "preview")
        self.saveToJSON(obj=info, path=os.path.join(folder, "info.json"))
Exemple #4
0
    def extract(self, path: pathlib.Path, data_dir: pathlib.Path, file_list_file: pathlib.Path) -> None:
        """Attempt to extract the zip archive. Save metadata about the list of files in the downloaded dataset in
        ``file_list_file``.

        :param path: Path to the zip archive.
        :param data_dir: Path to the data dir to extract data files to.
        :file_list_file: Path to the file that stores the list of files in the downloaded dataset.
        :raises zipfile.BadZipFile: The zip archive was unable to be read.
        """
        try:
            myzip = zipfile.ZipFile(path)
        except zipfile.BadZipFile as e:
            raise zipfile.BadZipFile(f'Failed to unarchive zip file "{path}"\ncaused by:\n{e}')
        with myzip:
            FileListFileContents = Dict[str, Dict[str, Union[bool, int]]]
            contents: FileListFileContents = {}
            metadata: Dict[str, Union[str, FileListFileContents]] = {}

            metadata['type'] = 'application/zip'
            for member in myzip.infolist():
                contents[member.filename] = {'isdir': member.is_dir()}
                if not member.is_dir():
                    contents[member.filename]['size'] = member.file_size
            metadata['contents'] = contents

            with open(file_list_file, mode='w') as f:
                json.dump(metadata, f, indent=2)
            myzip.extractall(path=data_dir)
Exemple #5
0
def unpack_worksheet(archive, filename):

    temp_file = tempfile.TemporaryFile(mode='r+',
                                       prefix='openpyxl.',
                                       suffix='.unpack.temp')

    zinfo = archive.getinfo(filename)

    if zinfo.compress_type == zipfile.ZIP_STORED:
        decoder = None
    elif zinfo.compress_type == zipfile.ZIP_DEFLATED:
        decoder = zlib.decompressobj(-zlib.MAX_WBITS)
    else:
        raise zipfile.BadZipFile("Unrecognized compression method")

    archive.fp.seek(_get_file_offset(archive, zinfo))
    bytes_to_read = zinfo.compress_size

    while True:
        buff = archive.fp.read(min(bytes_to_read, 102400))
        if not buff:
            break
        bytes_to_read -= len(buff)
        if decoder:
            buff = decoder.decompress(buff)
        temp_file.write(buff)

    if decoder:
        temp_file.write(decoder.decompress('Z'))

    return temp_file
Exemple #6
0
def extract_zip(infile):
    """
    Extract the required parts of the QuinCe export ZIP
    """

    manifest = None
    content = None

    if not zipfile.is_zipfile(infile):
        raise zipfile.BadZipFile('Supplied file is not a ZIP file')

    basename = os.path.splitext(os.path.basename(infile))[0]

    with zipfile.ZipFile(infile) as in_zip:
        manifest_path = zipfile.Path(in_zip, f'{basename}/manifest.json')
        if not manifest_path.exists():
            raise KeyError('ZIP file is missing manifest.json')

        manifest = json.loads(manifest_path.read_text())
        dataset_name = manifest['manifest']['metadata']['name']

        dataset_filename = f'{basename}/dataset/SOCAT/{dataset_name}.tsv'
        dataset_path = zipfile.Path(in_zip, dataset_filename)
        if not dataset_path.exists():
            raise KeyError('ZIP file does not contain SOCAT export')
        content = in_zip.read(dataset_filename)

    return manifest, content
Exemple #7
0
    def init_zip(self, file: str, **kwargs):
        """
        Initialize a version with a zip file
        :param file: File path to the zip file
        :param kwargs: Arguments to pass to pandas.read_csv
        :return:
        """
        from .data_table import DataTable

        if self._has_data():
            self._import_error()

        if not zipfile.is_zipfile(file):
            raise zipfile.BadZipFile("%f is not a zip file" % file)

        zip_file: zipfile.ZipFile = zipfile.ZipFile(file)

        for f in zip_file.infolist():
            path = zip_file.extract(
                f, flask.current_app.config["UPLOAD_FOLDER"].rstrip("/") +
                "/temp_%s/" % self.id)

            new_table = DataTable(self, os.path.basename(path))
            self.tables.append(new_table)
            new_table.init_csv(path, **kwargs)

        self.loaded = True
        self.description = "INIT FROM ZIP %s" % os.path.basename(file)
        self._update_db()
Exemple #8
0
 def _upload(self, request):
     path = self.expand_path(request.POST['path'])
     errors = {}
     for name, f in request.FILES.items():
         ext = os.path.splitext(f.name)[1]
         if ext == '.zip' and f.content_type == 'application/zip':
             z = zipfile.ZipFile(f)
         
             if z.testzip() is not None:
                 raise zipfile.BadZipFile()
             
             # TODO: Need more validation
             # See: http://docs.python.org/library/zipfile.html#zipfile.ZipFile.extractall
             z.extractall(path)
         else:
             try:
                 dest = open(os.path.join(path, os.path.basename(f.name)), 'wb+')
                 for chunk in f.chunks():
                     dest.write(chunk)
                     dest.close()
             except:
                 errors[name] = "Could not upload %s" % os.path.basename(f.name)
     if len(errors):
         return {'success': False, 'errors': errors}
     else:
         return {'success': True}
Exemple #9
0
 def open(self):
     if self._tmp_filename:
         raise zipfile.BadZipFile("ZipStream object can't be reused")
     self._ensure_base_path(self.filename)
     self._tmp_filename = '{}.tmp'.format(self.filename)
     self._log.info("creating '%s' archive", self.filename)
     self._zip = zipfile.ZipFile(self._tmp_filename, "w",
                                 self._compress_type)
     return self
Exemple #10
0
    def __get_sublime_pkg_contents(self, pkg_filename):
        if not zipfile.is_zipfile(pkg_filename):
            raise zipfile.BadZipFile(
                "Invalid sublime-package file '{}'".format(name))

        pName = os.path.basename(pkg_filename)
        with zipfile.ZipFile(pkg_filename) as zFile:
            return PackageFileSet(
                [entry.filename for entry in zFile.infolist()])
Exemple #11
0
    def clone(self,
              file,
              filenames_or_infolist=None,
              ignore_hidden_files=False):
        """ Clone the a zip file using the given file (filename or filepointer).

        Args:
          file (File, str): file-like object or filename of file to write the
            new zip file to.
          filenames_or_infolist (list(str), list(ZipInfo), optional): list of
            members from this zip file to include in the new zip file.
          ignore_hidden_files (boolean): flag to indicate wether hidden files
            (data inbetween managed memebers of the archive) should be included.

        Returns:
            A new ZipFile object of the cloned zipfile open in append mode.

            If copying hidden files then clone will attempt to maintain the
            relative order between the files and members in the archive

        Raises:
            BadZipFile exception.
        """
        # if we are filtering or need to commit changes then create via ZipFile
        if (filenames_or_infolist or self.requires_commit
                or ignore_hidden_files):

            files = self._gather_and_filter_files(
                filenames_or_infolist=filenames_or_infolist,
                ignore_hidden_files=ignore_hidden_files,
                sort=True)

            with ZipFileExtended(file, mode="w") as clone:

                for f in files:
                    if isinstance(f, zipfile.ZipInfo):
                        bytes = self.read_compressed(f.filename)
                        clone.write_compressed(f, bytes)
                    else:
                        bytes = f.read(f.length)
                        clone._write_hidden(bytes)

        else:
            # We are copying with no modifications - just copy bytes
            self._quick_clone(file)

        clone = ZipFileExtended(file,
                                mode="a",
                                compression=self.compression,
                                allowZip64=self._allowZip64)
        badfile = clone.testzip()
        if (badfile):
            raise zipfile.BadZipFile(
                "Error when cloning zipfile, failed zipfile check: {} file is corrupt"
                .format(badfile))
        return clone
Exemple #12
0
    def test_close_on_exception(self):
        """Check that the zipfile is closed if an exception is raised in the
        'with' block."""
        with zipfile.ZipFile(TESTFN2, "w") as zipfp:
            for fpath, fdata in SMALL_TEST_DATA:
                zipfp.writestr(fpath, fdata)

        try:
            with zipfile.ZipFile(TESTFN2, "r") as zipfp2:
                raise zipfile.BadZipFile()
        except zipfile.BadZipFile:
            self.assertIsNone(zipfp2.fp, 'zipfp is not closed')
Exemple #13
0
    def __get_sublime_pkg_zip_list(self, pkg_filename):
        if pkg_filename in self.zip_list:
            return self.zip_list[pkg_filename]

        if not zipfile.is_zipfile(pkg_filename):
            raise zipfile.BadZipFile("Invalid sublime-package file '%s'" %
                                     pkg_filename)

        with zipfile.ZipFile(pkg_filename) as zFile:
            self.zip_list[pkg_filename] = zFile.infolist()

        return self.zip_list[pkg_filename]
Exemple #14
0
 def extract(self, path=None):
     TestContext.log('Extracting ' + self.file_name + (' (' + self.full_name + ')' if self.full_name else '') + '...')
     try:
         with zipfile.ZipFile(os.path.join(Student.STUDENTS_PATH, self.file_name)) as submission_file:
             with path if path else tempfile.TemporaryDirectory() as extract_path:
                 submission_file.extractall(extract_path)
                 self.extracted_path = extract_path
                 self.extracted = True
         
     except zipfile.BadZipFile as e:
         TestContext.log('Failed to extract ' + self.file_name, 1)
         TestContext.log(e,1)
         raise zipfile.BadZipFile("File " + self.file_name + " is not a zip file.")
Exemple #15
0
    def __init__(self,
                 directory: Union[str, os.PathLike],
                 completed_only=False):
        """
        Class to aggregate phase results for all subdirectories in a given directory.

        The whole directory structure is traversed and a Phase object created for each directory that contains a
        metadata file.

        Parameters
        ----------
        directory
            A directory in which the outputs of search_outputs are kept. This is searched recursively.
        completed_only
            If `True` only search_outputs with a .completed file (indicating the phase was completed)
            are included in the aggregator.
        """

        # TODO : Progress bar here

        print("Aggregator loading search_outputs... could take some time.")

        self._directory = directory
        search_outputs = []

        for root, _, filenames in os.walk(directory):
            for filename in filenames:
                if filename.endswith(".zip"):
                    try:
                        with zipfile.ZipFile(path.join(root, filename),
                                             "r") as f:
                            f.extractall(path.join(root, filename[:-4]))
                    except zipfile.BadZipFile:
                        raise zipfile.BadZipFile(f"File is not a zip file: \n "
                                                 f"{root} \n"
                                                 f"{filename}")

        for root, _, filenames in os.walk(directory):
            if "metadata" in filenames:
                if not completed_only or ".completed" in filenames:
                    search_outputs.append(SearchOutput(root))

        if len(search_outputs) == 0:
            print(f"\nNo search_outputs found in {directory}\n")
        else:
            print(
                f"\n A total of {str(len(search_outputs))} search_outputs and results were found."
            )
        super().__init__(search_outputs)
Exemple #16
0
def extract_csv_from_zip(filename):
    """Function to extract the csv file contained within the zip file."""
    try:
        with zipfile.ZipFile(filename, 'r') as zf:
            contents = zf.namelist()
            if len(contents) != 1:
                raise zipfile.BadZipFile(
                    'There is more than 1 file in the .zip archive.')

            zipfile_basename, zipfile_ext = splitext(basename(filename))
            contents_filename = basename(contents[0])
            contents_basename, contents_ext = splitext(contents_filename)

            if zipfile_basename != contents_basename:
                raise zipfile.BadZipFile(
                    'Wrong file name in archive {0} - the filename should match the .zip '
                    'filename with a different file extension.'.format(
                        filename))

            return zf.open(contents[0], mode='r')

    except zipfile.BadZipFile as e:
        raise zipfile.BadZipFile(
            'Input file is not a valid .zip file: {0}'.format(str(e)))
Exemple #17
0
 def unzip_archive(self):
     self.log_notify("fetching tweet archive from \"{}\"..".format(
         self.archive_dir))
     try:
         if os.path.isfile(self.archive_dir) == False:
             raise FileNotFoundError()
         if zipfile.is_zipfile(self.archive_dir) == False:
             raise zipfile.BadZipFile()
         with zipfile.ZipFile(self.archive_dir) as zf:
             zf.extractall(self.extract_dir)
     except zipfile.BadZipFile:
         self.log_err(
             str("\"{}\" is not a zip file!").format(self.archive_dir))
     except FileNotFoundError:
         self.log_err("cannot locate twitter archive.")
Exemple #18
0
def _download_and_extract(url, extract_path=None):
    """
    Download and unzip datasets (helper function).

    This code was modified from
    https://github.com/tslearn-team/tslearn/blob
    /775daddb476b4ab02268a6751da417b8f0711140/tslearn/datasets.py#L28

    Parameters
    ----------
    url : string
        Url pointing to file to download
    extract_path : string, optional (default: None)
        path to extract downloaded zip to, None defaults
        to sktime/datasets/data

    Returns
    -------
    extract_path : string or None
        if successful, string containing the path of the extracted file, None
        if it wasn't succesful

    """
    file_name = os.path.basename(url)
    dl_dir = tempfile.mkdtemp()
    zip_file_name = os.path.join(dl_dir, file_name)
    urlretrieve(url, zip_file_name)

    if extract_path is None:
        extract_path = os.path.join(MODULE,
                                    "data/%s/" % file_name.split(".")[0])
    else:
        extract_path = os.path.join(extract_path,
                                    "%s/" % file_name.split(".")[0])

    try:
        if not os.path.exists(extract_path):
            os.makedirs(extract_path)
        zipfile.ZipFile(zip_file_name, "r").extractall(extract_path)
        shutil.rmtree(dl_dir)
        return extract_path
    except zipfile.BadZipFile:
        shutil.rmtree(dl_dir)
        if os.path.exists(extract_path):
            shutil.rmtree(extract_path)
        raise zipfile.BadZipFile(
            "Could not unzip dataset. Please make sure the URL is valid.")
def parse_input(input_string: str) -> zipfile.ZipFile:
    """Parse the request input (a Base64 encoded zip file string) into a zip file.

    Raises:
        zipfile.BadZipFile: When the input string is not a zip file.
        FileNotFoundError: When the input archive does not have a main.tex file.
    """
    input_zipfile = zipfile.ZipFile(io.BytesIO(base64.b64decode(input_string)))
    try:
        input_zipfile.testzip()
    except zipfile.BadZipFile:
        raise zipfile.BadZipFile("input zipfile: invalid")
    if "main.tex" not in input_zipfile.namelist():
        raise FileNotFoundError(
            "The input zip file does not contain a main.tex file.")
    logger.debug("input zipfile: valid")
    return input_zipfile
Exemple #20
0
def open_bytes_book_in_zip(path):
    """
    book format fb2 inside zip-archive
    :param path: path of archive
    :return: text of book in xml-format
    """
    if zipfile.is_zipfile(path):
        zf = zipfile.ZipFile(path)
        for zip_filename in zf.namelist():
            data = zf.read(zip_filename)
            if data.startswith(b'<?xml'):
                break
        else:
            raise FileNotFoundError("Внутри архива нет xml")
    else:
        raise zipfile.BadZipFile("File has different compression format")
    return data
Exemple #21
0
 def get_archive_dir(self):
     self.log_notify("Please enter the path to your twitter .zip archive")
     while True:
         try:
             response = input("> ")
             if os.path.isfile(response) == False:
                 raise FileNotFoundError()
             if zipfile.is_zipfile(response) == False:
                 raise zipfile.BadZipFile()
             return response
         except zipfile.BadZipFile:
             self.log_warn(
                 str("\"{}\" is not a zip file!").format(response))
         except FileNotFoundError:
             self.log_warn(str("cannot find file \"{}\"").format(response))
         except PermissionError:
             self.log_warn(
                 str("cannot open \"{}\" (invalid permsisions).").format(
                     response))
Exemple #22
0
    def load_file(self):
        try:
            if not zipfile.is_zipfile(self._file_name):
                raise zipfile.BadZipFile()

            self._zip_file_handler = zipfile.ZipFile(self._file_name, mode='r')
            epub_book = EPUBBook(self._zip_file_handler)

            if not self._check_mimetype():
                pass

            root_file = self._get_root_file_path()

            return epub_book

        except zipfile.BadZipFile:
            print('[Error]', self._file_name, 'is not compressed properly')
        except OSError:
            print('[Error] Cannot open', self._file_name)
Exemple #23
0
    def restore(self):
        """
        Copy files from the ``.zip`` file to the samples folder.
        """

        if path.exists(self._zip_path):
            shutil.rmtree(
                self.output_path,
                ignore_errors=True
            )

            try:
                with zipfile.ZipFile(self._zip_path, "r") as f:
                    f.extractall(self.output_path)
            except zipfile.BadZipFile as e:
                raise zipfile.BadZipFile(
                    f"Unable to restore the zip file at the path {self._zip_path}"
                ) from e

            os.remove(self._zip_path)
Exemple #24
0
def explode(out, zip, name):
    # Given a 'zip' instance, copy data from the 'name' to the 'out' stream.
    zinfo = zip.getinfo(name)
 
    if zinfo.compress_type == zipfile.ZIP_STORED:
        decoder = None
    elif zinfo.compress_type == zipfile.ZIP_DEFLATED:
        decoder = zlib.decompressobj(-zlib.MAX_WBITS)
    else:
        raise zipfile.BadZipFile("unsupported compression method")
 
    # Navigate to the file header and skip over it
    zip.fp.seek(zinfo.header_offset)
    fheader = zip.fp.read(30)
    if fheader[0:4] != zipfile.stringFileHeader:
        raise zipfile.BadZipfile, "Bad magic number for file header"
 
    fheader = struct.unpack(zipfile.structFileHeader, fheader)
    fname = zip.fp.read(fheader[zipfile._FH_FILENAME_LENGTH])
    if fheader[zipfile._FH_EXTRA_FIELD_LENGTH]:
        zip.fp.read(fheader[zipfile._FH_EXTRA_FIELD_LENGTH])
 
    if fname != zinfo.orig_filename:
        raise zipfile.BadZipfile, \
            'File name in directory "%s" and header "%s" differ.' % (
                zinfo.orig_filename, fname)
 
    size = zinfo.compress_size
 
    while 1:
        data = zip.fp.read(min(size, 8192))
        if not data:
            break
        size -= len(data)
        if decoder:
            data = decoder.decompress(data)
        out.write(data)
 
    if decoder:
        out.write(decoder.decompress('Z'))
        out.write(decoder.flush())
Exemple #25
0
    def __init__(self, tdm_path, tdx_path="", encoding="utf-8"):
        self._folder, self._tdm_filename = os.path.split(tdm_path)

        if zipfile.is_zipfile(tdm_path):
            with zipfile.ZipFile(tdm_path) as zipf:
                if len(zipf.namelist()) == 0:
                    raise zipfile.BadZipFile(
                        f"The compressed file {tdm_path} does not contain any readable files."
                    )
                with zipf.open(zipf.namelist()[0]) as file:
                    self._root = ElementTree.parse(file).getroot()
        else:
            with open(tdm_path, "r", encoding=encoding) as file:
                self._root = ElementTree.parse(file).getroot()

        self._namespace = {"usi": self._root.tag.split("}")[0].strip("{")}

        self._xml_tdm_root = self._root.find(".//tdm_root")
        self._xml_chgs = list(
            map(
                lambda usi: self._root.find(f".//tdm_channelgroup[@id='{usi}']"
                                            ),
                re.findall(r'id\("(.+?)"\)',
                           self._xml_tdm_root.findtext("channelgroups")),
            ))

        byte_order = self._root.find(".//file").get("byteOrder")
        if byte_order == "littleEndian":
            self._endian = "<"
        elif byte_order == "bigEndian":
            self._endian = ">"
        else:
            raise TypeError("Unknown endian format in TDM file")

        self._tdx_order = "C"  # Set binary file reading to column-major style
        if tdx_path == "":
            self._tdx_path = os.path.join(
                self._folder,
                self._root.find(".//file").get("url"))
        else:
            self._tdx_path = tdx_path
Exemple #26
0
    def _decodeExtra(self):
        # Try to decode the extra field.
        extra = self.extra
        unpack = struct.unpack

        self.is_encrypted = len(self.extra) >= 168 and self.extra[168] > 0x00

        # The following is the default ZipInfo decode, minus a few steps that would mark an encrypted it as invalid
        # TODO: only do this if self.is_encrypted, otherwise call super?

        while len(extra) >= 4:
            tp, ln = unpack("<HH", extra[:4])
            if tp == 0x0001:
                if ln >= 24:
                    counts = unpack("<QQQ", extra[4:28])
                elif ln == 16:
                    counts = unpack("<QQ", extra[4:20])
                elif ln == 8:
                    counts = unpack("<Q", extra[4:12])
                elif ln == 0:
                    counts = ()
                else:
                    raise zipfile.BadZipFile(
                        "Corrupt extra field %04x (size=%d)" % (tp, ln))

                idx = 0

                # ZIP64 extension (large files and/or large archives)
                if self.file_size in (0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF):
                    self.file_size = counts[idx]
                    idx += 1

                if self.compress_size == 0xFFFFFFFF:
                    self.compress_size = counts[idx]
                    idx += 1

                if self.header_offset == 0xFFFFFFFF:
                    old = self.header_offset
                    self.header_offset = counts[idx]
                    idx += 1
            extra = extra[ln + 4:]
Exemple #27
0
def zipinfo_fixup_filename(inf: zipfile.ZipInfo):
    # Support UTF-8 filenames using extra fields
    # Code from https://github.com/python/cpython/pull/23736
    extra = inf.extra
    unpack = struct.unpack

    while len(extra) >= 4:
        type_, length = struct.unpack("<HH", extra[:4])
        if length + 4 > len(extra):
            raise zipfile.BadZipFile(
                f"Corrupt extra field {type_:04x} (size={length})")

        if type_ == 0x7075:
            data = extra[4:length + 4]
            # Unicode Path Extra Field
            up_version, _up_name_crc = unpack("<BL", data[:5])
            up_unicode_name = data[5:].decode("utf-8")
            if up_version == 1:
                inf.filename = up_unicode_name

        extra = extra[length + 4:]
Exemple #28
0
def _download_dwd_data(remote_file: Union[str, Path]) -> BytesIO:
    """
    This function downloads the stationdata for which the link is
    provided by the 'select_dwd' function. It checks the shortened filepath (just
    the zipfile) for its parameters, creates the full filepath and downloads the
    file(s) according to the set up folder.

    Args:
        remote_file: contains path to file that should be downloaded
            and the path to the folder to store the files

    Returns:
        stores data on local file system

    """
    file_server = create_remote_file_name(remote_file)

    try:
        with urllib.request.urlopen(file_server) as url_request:
            zip_file = BytesIO(url_request.read())
    except urllib.error.URLError as e:
        raise e(f"Error: the stationdata {file_server} couldn't be reached.")
    except:
        raise FailedDownload(f"Download failed for {file_server}")

    try:
        with zipfile.ZipFile(zip_file) as zip_file_opened:
            produkt_file = [
                file_in_zip for file_in_zip in zip_file_opened.namelist()
                if find_all_matchstrings_in_string(file_in_zip,
                                                   STATIONDATA_MATCHSTRINGS)
            ].pop(0)
            file = BytesIO(zip_file_opened.open(produkt_file).read())
    except zipfile.BadZipFile as e:
        raise zipfile.BadZipFile(
            f"The zipfile seems to be corrupted.\n {str(e)}")

    return file
Exemple #29
0
    def peek(cls, filepath):
        """

        Returns
        -------
        tuple
            Tuple of UUID, type, and provenance.

        """
        if not zipfile.is_zipfile(filepath):
            raise zipfile.BadZipFile(
                "%r is not a readable ZIP file, or the file does not exist" %
                filepath)

        root_dir = cls._get_root_dir(filepath)
        with zipfile.ZipFile(filepath, mode='r') as zf:
            version = cls._load_version(zf, root_dir)
            if version != cls._VERSION:
                raise ValueError(
                    "Unsupported archive format version %r. "
                    "Supported version(s): %r" % (version, cls._VERSION))

            return cls._load_metadata(zf, root_dir)
Exemple #30
0
    def _get(self):
        """
        Retrieve and extract the dependency.
        """

        if self.source_url_type == DependencySourceType.Git:
            # TODO: Implement git dependencies.
            raise UnsupportedSourceTypeError(DependencySourceType.Git)
        elif self.source_url_type == DependencySourceType.Archive:
            # Extract and build filesystem
            with tempfile.NamedTemporaryFile(delete=False) as tmp_file_handle:
                logger.info(
                    f'{self.colourized_name} - Downloading archive ({self.source_url})'
                )

                start_time = time.time()
                response = requests.get(self.source_url, stream=True)
                total_length = response.headers.get('content-length')

                # no content length header
                if total_length is None:
                    tmp_file_handle.write(response.content)
                else:
                    total_length = int(total_length)
                    with click.progressbar(length=total_length,
                                           label='Downloading...') as bar:
                        for chunk in response.iter_content(chunk_size=4096):
                            tmp_file_handle.write(chunk)
                            bar.update(len(chunk))

            try:
                if not zipfile.is_zipfile(tmp_file_handle.name):
                    raise zipfile.BadZipFile()
            except:
                logger.exception(
                    f'Invalid archive file provided for \'{self.name}\' dependency.'
                )
                return False

            logger.info(f'{self.colourized_name} - Extracting archive')
            with zipfile.ZipFile(tmp_file_handle.name) as zip_file:
                archive_extract_items = self.args.get('archive_extract_items',
                                                      None)
                file_list = []

                ARCHIVE_EXTRACT_ITEMS_SCHEMA = {
                    'type': 'object',
                    'properties': {
                        'dirs': {
                            'type': 'array',
                            'items': {
                                'type': 'string'
                            }
                        },
                        'files': {
                            'type': 'array',
                            'items': {
                                'type': 'string'
                            }
                        }
                    }
                }

                try:
                    validate_json(instance=archive_extract_items,
                                  schema=ARCHIVE_EXTRACT_ITEMS_SCHEMA)
                    dirs = archive_extract_items.get('dirs', list())
                    files = archive_extract_items.get('files', list())

                    if len(dirs) == len(files) == 0:
                        raise

                    for target_dir in dirs:
                        for file in zip_file.namelist():
                            if file.startswith(target_dir):
                                file_list.append(file)

                    file_list += files
                except:
                    file_list = zip_file.namelist()

                with click.progressbar(file_list,
                                       label='Extracting...') as bar:
                    for name in bar:
                        zip_file.extract(name, self.destination_path)

            # Delete temporary file
            tmp_file_path = Path(tmp_file_handle.name)
            if tmp_file_path.is_file():
                tmp_file_path.unlink()

        return True