Beispiel #1
0
def _generate_compressed_files(file_path, delete=True):
    """
    This is a generator which yields compressed versions of a file
    'file_path'.

    The 'delete' argument specifies whether the compressed files that this
    generator yields have to be automatically deleted.
    """

    # Make sure the temporary files start with the same name as 'file_obj' in
    # order to simplify debugging.
    prefix = os.path.splitext(os.path.basename(file_path))[0] + '.'
    # Put the temporary files in the directory with 'file_obj'
    directory = os.path.dirname(file_path)

    compressors = [
        ("bzip2", None, ".bz2", "-c -k"),
        ("pbzip2", None, ".p.bz2", "-c -k"),
        ("gzip", None, ".gz", "-c"),
        ("pigz", None, ".p.gz", "-c -k"),
        ("xz", None, ".xz", "-c -k"),
        ("lzop", None, ".lzo", "-c -k"),
        ("lz4", None, ".lz4", "-c -k"),
        ("zstd", None, ".zst", ""),
        # The "-P -C /" trick is used to avoid silly warnings:
        # "tar: Removing leading `/' from member names"
        ("bzip2", "tar", ".tar.bz2", "-c -j -O -P -C /"),
        ("gzip", "tar", ".tar.gz", "-c -z -O -P -C /"),
        ("xz", "tar", ".tar.xz", "-c -J -O -P -C /"),
        ("lzop", "tar", ".tar.lzo", "-c --lzo -O -P -C /"),
        ("lz4", "tar", ".tar.lz4", "-c -Ilz4 -O -P -C /"),
        ("zstd", "tar", ".tar.zst", "-c -Izstd -O -P -C /"),
        ("zip", None, ".zip", "-q -j -")
    ]

    for decompressor, archiver, suffix, options in compressors:
        if not BmapHelpers.program_is_available(decompressor):
            continue
        if archiver and not BmapHelpers.program_is_available(archiver):
            continue

        tmp_file_obj = tempfile.NamedTemporaryFile('wb+',
                                                   prefix=prefix,
                                                   delete=delete,
                                                   dir=directory,
                                                   suffix=suffix)

        if archiver:
            args = archiver + " " + options + " " + file_path
        else:
            args = decompressor + " " + options + " " + file_path
        child_process = subprocess.Popen(args,
                                         shell=True,
                                         stderr=subprocess.PIPE,
                                         stdout=tmp_file_obj)
        child_process.wait()
        tmp_file_obj.flush()
        yield tmp_file_obj.name
        tmp_file_obj.close()
Beispiel #2
0
def _generate_compressed_files(file_path, delete=True):
    """
    This is a generator which yields compressed versions of a file
    'file_path'.

    The 'delete' argument specifies whether the compressed files that this
    generator yields have to be automatically deleted.
    """

    # Make sure the temporary files start with the same name as 'file_obj' in
    # order to simplify debugging.
    prefix = os.path.splitext(os.path.basename(file_path))[0] + '.'
    # Put the temporary files in the directory with 'file_obj'
    directory = os.path.dirname(file_path)

    compressors = [("bzip2",  None, ".bz2",   "-c -k"),
                   ("pbzip2", None, ".p.bz2", "-c -k"),
                   ("gzip",   None, ".gz",    "-c"),
                   ("pigz",   None, ".p.gz",  "-c -k"),
                   ("xz",     None, ".xz",    "-c -k"),
                   ("lzop",   None, ".lzo",   "-c -k"),
                   ("lz4",    None, ".lz4",   "-c -k"),
                   # The "-P -C /" trick is used to avoid silly warnings:
                   # "tar: Removing leading `/' from member names"
                   ("bzip2", "tar", ".tar.bz2", "-c -j -O -P -C /"),
                   ("gzip",  "tar", ".tar.gz",  "-c -z -O -P -C /"),
                   ("xz",    "tar", ".tar.xz",  "-c -J -O -P -C /"),
                   ("lzop",  "tar", ".tar.lzo", "-c --lzo -O -P -C /"),
                   ("lz4",   "tar", ".tar.lz4", "-c -Ilz4 -O -P -C /"),
                   ("zip",   None,  ".zip",     "-q -j -")]

    for decompressor, archiver, suffix, options in compressors:
        if not BmapHelpers.program_is_available(decompressor):
            continue
        if archiver and not BmapHelpers.program_is_available(archiver):
            continue

        tmp_file_obj = tempfile.NamedTemporaryFile('wb+', prefix=prefix,
                                                   delete=delete, dir=directory,
                                                   suffix=suffix)

        if archiver:
            args = archiver + " " + options + " " + file_path
        else:
            args = decompressor + " " + options + " " + file_path
        child_process = subprocess.Popen(args, shell=True,
                                         stderr=subprocess.PIPE,
                                         stdout=tmp_file_obj)
        child_process.wait()
        tmp_file_obj.flush()
        yield tmp_file_obj.name
        tmp_file_obj.close()
    def _open_url_ssh(self, parsed_url):
        """
        This function opens a file on a remote host using SSH. The URL has to
        have this format: "ssh://username@hostname:path". Currently we only
        support password-based authentication.
        """

        username = parsed_url.username
        password = parsed_url.password
        path = parsed_url.path
        hostname = parsed_url.hostname
        if username:
            hostname = username + "@" + hostname

        # Make sure the ssh client program is installed
        if not BmapHelpers.program_is_available("ssh"):
            raise Error("the \"ssh\" program is not available but it is "
                        "required for downloading over the ssh protocol")

        # Prepare the commands that we are going to run
        if password:
            # In case of password we have to use the sshpass tool to pass the
            # password to the ssh client utility
            popen_args = ["sshpass",
                          "-p" + password,
                          "ssh",
                          "-o StrictHostKeyChecking=no",
                          "-o PubkeyAuthentication=no",
                          "-o PasswordAuthentication=yes",
                          hostname]

            # Make sure the sshpass program is installed
            if not BmapHelpers.program_is_available("ssh"):
                raise Error("the \"sshpass\" program is not available but it "
                            "is required for password-based SSH authentication")
        else:
            popen_args = ["ssh",
                          "-o StrictHostKeyChecking=no",
                          "-o PubkeyAuthentication=yes",
                          "-o PasswordAuthentication=no",
                          "-o BatchMode=yes",
                          hostname]

        # Test if we can successfully connect
        child_process = subprocess.Popen(popen_args + ["true"])
        child_process.wait()
        retcode = child_process.returncode
        if retcode != 0:
            decoded = _decode_sshpass_exit_code(retcode)
            raise Error("cannot connect to \"%s\": %s (error code %d)"
                        % (hostname, decoded, retcode))

        # Test if file exists by running "test -f path && test -r path" on the
        # host
        command = "test -f " + path + " && test -r " + path
        child_process = subprocess.Popen(popen_args + [command],
                                         bufsize=1024*1024,
                                         stdout=subprocess.PIPE)
        child_process.wait()
        if child_process.returncode != 0:
            raise Error("\"%s\" on \"%s\" cannot be read: make sure it "
                        "exists, is a regular file, and you have read "
                        "permissions" % (path, hostname))

        # Read the entire file using 'cat'
        child_process = subprocess.Popen(popen_args + ["cat " + path],
                                         stdout=subprocess.PIPE)

        # Now the contents of the file should be available from sub-processes
        # stdout
        self._f_objs.append(child_process.stdout)

        self._child_processes.append(child_process)
        self.is_url = True
        self._force_fake_seek = True
    def _open_compressed_file(self):
        """
        Detect file compression type and open it with the corresponding
        compression module, or just plain 'open() if the file is not
        compressed.
        """

        def is_gzip(name):
            """Returns 'True' if file 'name' is compressed with 'gzip'."""
            if name.endswith('.gzip') or \
               (name.endswith('.gz') and not name.endswith('.tar.gz')):
                return True
            return False

        def is_bzip2(name):
            """Returns 'True' if file 'name' is compressed with 'bzip2'."""
            if name.endswith('.bz2') and not name.endswith('.tar.bz2'):
                return True
            return False

        def is_xz(name):
            """Returns 'True' if file 'name' is compressed with 'xz'."""
            if name.endswith('.xz') and not name.endswith('.tar.xz'):
                return True
            return False

        def is_lzop(name):
            """Returns 'True' if file 'name' is compressed with 'lzop'."""
            if name.endswith('.lzo') and not name.endswith('.tar.lzo'):
                return True
            return False

        def is_tar_gz(name):
            """
            Returns 'True' if file 'name' is a tar archive compressed with
            'gzip'.
            """

            if name.endswith('.tar.gz') or name.endswith('.tgz'):
                return True
            return False

        def is_tar_bz2(name):
            """
            Returns 'True' if file 'name' is a tar archive compressed with
            'bzip2'.
            """

            if name.endswith('.tar.bz2') or name.endswith('.tbz') or \
               name.endswith('.tbz2') or name.endswith('.tb2'):
                return True
            return False

        def is_tar_xz(name):
            """
            Returns 'True' if file 'name' is a tar archive compressed with 'xz'.
            """

            if name.endswith('.tar.xz') or name.endswith('.txz'):
                return True
            return False

        def is_tar_lzo(name):
            """
            Returns 'True' if file 'name' is a tar archive compressed with
            'lzop'.
            """

            if name.endswith('.tar.lzo') or name.endswith('.tzo'):
                return True
            return False

        archiver = None
        if is_tar_gz(self.name) or is_gzip(self.name):
            self.compression_type = 'gzip'
            if BmapHelpers.program_is_available("pigz"):
                decompressor = "pigz"
            else:
                decompressor = "gzip"

            if is_gzip(self.name):
                args = "-d -c"
            else:
                archiver = "tar"
                args = "-x -z -O"
        elif is_tar_bz2(self.name) or is_bzip2(self.name):
            self.compression_type = 'bzip2'
            if BmapHelpers.program_is_available("pbzip2"):
                decompressor = "pbzip2"
            else:
                decompressor = "bzip2"

            if is_bzip2(self.name):
                args = "-d -c"
            else:
                archiver = "tar"
                args = "-x -j -O"
        elif is_tar_xz(self.name) or is_xz(self.name):
            self.compression_type = 'xz'
            decompressor = "xz"
            if is_xz(self.name):
                args = "-d -c"
            else:
                archiver = "tar"
                args = "-x -J -O"
        elif is_tar_lzo(self.name) or is_lzop(self.name):
            self.compression_type = 'lzo'
            decompressor = "lzop"
            if is_lzop(self.name):
                args = "-d -c"
            else:
                archiver = "tar"
                args = "-x --lzo -O"
        else:
            if not self.is_url:
                self.size = os.fstat(self._f_objs[-1].fileno()).st_size
            return

        # Make sure decompressor and the archiver programs are available
        if not BmapHelpers.program_is_available(decompressor):
            raise Error("the \"%s\" program is not available but it is "
                        "required decompressing \"%s\""
                        % (decompressor, self.name))
        if archiver and not BmapHelpers.program_is_available(archiver):
            raise Error("the \"%s\" program is not available but it is "
                        "required reading \"%s\"" % (archiver, self.name))

        # Start the decompressor process. We'll send the data to its stdin and
        # read the decompressed data from its stdout.
        if archiver:
            args = archiver + " " + args
        else:
            args = decompressor + " " + args
        child_process = subprocess.Popen(args, shell=True,
                                         bufsize=1024*1024,
                                         stdin=subprocess.PIPE,
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.PIPE)

        args = (self._f_objs[-1], child_process.stdin, )
        self._rthread = threading.Thread(target=self._read_thread, args=args)
        self._rthread.daemon = True
        self._rthread.start()

        self._force_fake_seek = True
        self._f_objs.append(child_process.stdout)
        self._child_processes.append(child_process)
Beispiel #5
0
    def _open_compressed_file(self):
        """
        Detect file compression type and open it with the corresponding
        compression module, or just plain 'open() if the file is not
        compressed.
        """
        def is_gzip(name):
            """Returns 'True' if file 'name' is compressed with 'gzip'."""
            if name.endswith('.gzip') or \
               (name.endswith('.gz') and not name.endswith('.tar.gz')):
                return True
            return False

        def is_bzip2(name):
            """Returns 'True' if file 'name' is compressed with 'bzip2'."""
            if name.endswith('.bz2') and not name.endswith('.tar.bz2'):
                return True
            return False

        def is_xz(name):
            """Returns 'True' if file 'name' is compressed with 'xz'."""
            if name.endswith('.xz') and not name.endswith('.tar.xz'):
                return True
            return False

        def is_lzop(name):
            """Returns 'True' if file 'name' is compressed with 'lzop'."""
            if name.endswith('.lzo') and not name.endswith('.tar.lzo'):
                return True
            return False

        def is_lz4(name):
            """Returns 'True' if file 'name' is compressed with 'lz4'."""
            if name.endswith('.lz4') and not name.endswith('.tar.lz4'):
                return True
            return False

        def is_zst(name):
            """Returns 'True' if file 'name' is compressed with 'zstd'."""
            if name.endswith('.zst') and not name.endswith('.tar.zst'):
                return True
            return False

        def is_tar_gz(name):
            """
            Returns 'True' if file 'name' is a tar archive compressed with
            'gzip'.
            """

            if name.endswith('.tar.gz') or name.endswith('.tgz'):
                return True
            return False

        def is_tar_bz2(name):
            """
            Returns 'True' if file 'name' is a tar archive compressed with
            'bzip2'.
            """

            if name.endswith('.tar.bz2') or name.endswith('.tbz') or \
               name.endswith('.tbz2') or name.endswith('.tb2'):
                return True
            return False

        def is_tar_xz(name):
            """
            Returns 'True' if file 'name' is a tar archive compressed with 'xz'.
            """

            if name.endswith('.tar.xz') or name.endswith('.txz'):
                return True
            return False

        def is_tar_lzo(name):
            """
            Returns 'True' if file 'name' is a tar archive compressed with
            'lzop'.
            """

            if name.endswith('.tar.lzo') or name.endswith('.tzo'):
                return True
            return False

        def is_tar_lz4(name):
            """
            Returns 'True' if file 'name' is a tar archive compressed with
            'lz4'.
            """

            if name.endswith('.tar.lz4') or name.endswith('.tlz4'):
                return True
            return False

        def is_tar_zst(name):
            """
            Returns 'True' if file 'name' is a tar archive compressed with
            'zstd'.
            """

            if name.endswith('.tar.zst') or name.endswith('.tzst'):
                return True
            return False

        archiver = None
        if is_tar_gz(self.name) or is_gzip(self.name):
            self.compression_type = 'gzip'
            if BmapHelpers.program_is_available("pigz"):
                decompressor = "pigz"
            else:
                decompressor = "gzip"

            if is_gzip(self.name):
                args = "-d -c"
            else:
                archiver = "tar"
                args = "-x -z -O"
        elif is_tar_bz2(self.name) or is_bzip2(self.name):
            self.compression_type = 'bzip2'
            if BmapHelpers.program_is_available("pbzip2"):
                decompressor = "pbzip2"
            else:
                decompressor = "bzip2"

            if is_bzip2(self.name):
                args = "-d -c"
            else:
                archiver = "tar"
                args = "-x -j -O"
        elif is_tar_xz(self.name) or is_xz(self.name):
            self.compression_type = 'xz'
            decompressor = "xz"
            if is_xz(self.name):
                args = "-d -c"
            else:
                archiver = "tar"
                args = "-x -J -O"
        elif is_tar_lzo(self.name) or is_lzop(self.name):
            self.compression_type = 'lzo'
            decompressor = "lzop"
            if is_lzop(self.name):
                args = "-d -c"
            else:
                archiver = "tar"
                args = "-x --lzo -O"
        elif self.name.endswith(".zip"):
            self.compression_type = 'zip'
            decompressor = "funzip"
            args = ""
        elif is_tar_lz4(self.name) or is_lz4(self.name):
            self.compression_type = 'lz4'
            decompressor = "lz4"
            if is_lz4(self.name):
                args = "-d -c"
            else:
                archiver = "tar"
                args = "-x -Ilz4 -O"
        elif is_tar_zst(self.name) or is_zst(self.name):
            self.compression_type = 'zst'
            decompressor = "zstd"
            if is_zst(self.name):
                args = "-d"
            else:
                archiver = "tar"
                args = "-x -Izstd -O"
        else:
            if not self.is_url:
                self.size = os.fstat(self._f_objs[-1].fileno()).st_size
            return

        if archiver == "tar":
            # This will get rid of messages like:
            #     tar: Removing leading `/' from member names'.
            args += " -P -C /"

        # Make sure decompressor and the archiver programs are available
        if not BmapHelpers.program_is_available(decompressor):
            raise Error("the \"%s\" program is not available but it is "
                        "required decompressing \"%s\"" %
                        (decompressor, self.name))
        if archiver and not BmapHelpers.program_is_available(archiver):
            raise Error("the \"%s\" program is not available but it is "
                        "required reading \"%s\"" % (archiver, self.name))

        # Start the decompressor process. We'll send the data to its stdin and
        # read the decompressed data from its stdout.
        if archiver:
            args = archiver + " " + args
        else:
            args = decompressor + " " + args

        if self.is_url:
            child_stdin = subprocess.PIPE
        else:
            child_stdin = self._f_objs[-1].fileno()

        child_process = subprocess.Popen(args,
                                         shell=True,
                                         bufsize=1024 * 1024,
                                         stdin=child_stdin,
                                         stdout=subprocess.PIPE)

        if child_stdin == subprocess.PIPE:
            # A separate reader thread is created only when we are reading via
            # urllib2.
            args = (
                self._f_objs[-1],
                child_process.stdin,
            )
            self._rthread = threading.Thread(target=self._read_thread,
                                             args=args)
            self._rthread.daemon = True
            self._rthread.start()

        self._fake_seek = True
        self._f_objs.append(child_process.stdout)
        self._child_processes.append(child_process)