def _generate_compressed_files(file_path, delete=True): """ This is a generator which yields compressed versions of a file 'file_path'. The 'delete' argument specifies whether the compressed files that this generator yields have to be automatically deleted. """ # Make sure the temporary files start with the same name as 'file_obj' in # order to simplify debugging. prefix = os.path.splitext(os.path.basename(file_path))[0] + '.' # Put the temporary files in the directory with 'file_obj' directory = os.path.dirname(file_path) compressors = [ ("bzip2", None, ".bz2", "-c -k"), ("pbzip2", None, ".p.bz2", "-c -k"), ("gzip", None, ".gz", "-c"), ("pigz", None, ".p.gz", "-c -k"), ("xz", None, ".xz", "-c -k"), ("lzop", None, ".lzo", "-c -k"), ("lz4", None, ".lz4", "-c -k"), ("zstd", None, ".zst", ""), # The "-P -C /" trick is used to avoid silly warnings: # "tar: Removing leading `/' from member names" ("bzip2", "tar", ".tar.bz2", "-c -j -O -P -C /"), ("gzip", "tar", ".tar.gz", "-c -z -O -P -C /"), ("xz", "tar", ".tar.xz", "-c -J -O -P -C /"), ("lzop", "tar", ".tar.lzo", "-c --lzo -O -P -C /"), ("lz4", "tar", ".tar.lz4", "-c -Ilz4 -O -P -C /"), ("zstd", "tar", ".tar.zst", "-c -Izstd -O -P -C /"), ("zip", None, ".zip", "-q -j -") ] for decompressor, archiver, suffix, options in compressors: if not BmapHelpers.program_is_available(decompressor): continue if archiver and not BmapHelpers.program_is_available(archiver): continue tmp_file_obj = tempfile.NamedTemporaryFile('wb+', prefix=prefix, delete=delete, dir=directory, suffix=suffix) if archiver: args = archiver + " " + options + " " + file_path else: args = decompressor + " " + options + " " + file_path child_process = subprocess.Popen(args, shell=True, stderr=subprocess.PIPE, stdout=tmp_file_obj) child_process.wait() tmp_file_obj.flush() yield tmp_file_obj.name tmp_file_obj.close()
def _generate_compressed_files(file_path, delete=True): """ This is a generator which yields compressed versions of a file 'file_path'. The 'delete' argument specifies whether the compressed files that this generator yields have to be automatically deleted. """ # Make sure the temporary files start with the same name as 'file_obj' in # order to simplify debugging. prefix = os.path.splitext(os.path.basename(file_path))[0] + '.' # Put the temporary files in the directory with 'file_obj' directory = os.path.dirname(file_path) compressors = [("bzip2", None, ".bz2", "-c -k"), ("pbzip2", None, ".p.bz2", "-c -k"), ("gzip", None, ".gz", "-c"), ("pigz", None, ".p.gz", "-c -k"), ("xz", None, ".xz", "-c -k"), ("lzop", None, ".lzo", "-c -k"), ("lz4", None, ".lz4", "-c -k"), # The "-P -C /" trick is used to avoid silly warnings: # "tar: Removing leading `/' from member names" ("bzip2", "tar", ".tar.bz2", "-c -j -O -P -C /"), ("gzip", "tar", ".tar.gz", "-c -z -O -P -C /"), ("xz", "tar", ".tar.xz", "-c -J -O -P -C /"), ("lzop", "tar", ".tar.lzo", "-c --lzo -O -P -C /"), ("lz4", "tar", ".tar.lz4", "-c -Ilz4 -O -P -C /"), ("zip", None, ".zip", "-q -j -")] for decompressor, archiver, suffix, options in compressors: if not BmapHelpers.program_is_available(decompressor): continue if archiver and not BmapHelpers.program_is_available(archiver): continue tmp_file_obj = tempfile.NamedTemporaryFile('wb+', prefix=prefix, delete=delete, dir=directory, suffix=suffix) if archiver: args = archiver + " " + options + " " + file_path else: args = decompressor + " " + options + " " + file_path child_process = subprocess.Popen(args, shell=True, stderr=subprocess.PIPE, stdout=tmp_file_obj) child_process.wait() tmp_file_obj.flush() yield tmp_file_obj.name tmp_file_obj.close()
def _open_url_ssh(self, parsed_url): """ This function opens a file on a remote host using SSH. The URL has to have this format: "ssh://username@hostname:path". Currently we only support password-based authentication. """ username = parsed_url.username password = parsed_url.password path = parsed_url.path hostname = parsed_url.hostname if username: hostname = username + "@" + hostname # Make sure the ssh client program is installed if not BmapHelpers.program_is_available("ssh"): raise Error("the \"ssh\" program is not available but it is " "required for downloading over the ssh protocol") # Prepare the commands that we are going to run if password: # In case of password we have to use the sshpass tool to pass the # password to the ssh client utility popen_args = ["sshpass", "-p" + password, "ssh", "-o StrictHostKeyChecking=no", "-o PubkeyAuthentication=no", "-o PasswordAuthentication=yes", hostname] # Make sure the sshpass program is installed if not BmapHelpers.program_is_available("ssh"): raise Error("the \"sshpass\" program is not available but it " "is required for password-based SSH authentication") else: popen_args = ["ssh", "-o StrictHostKeyChecking=no", "-o PubkeyAuthentication=yes", "-o PasswordAuthentication=no", "-o BatchMode=yes", hostname] # Test if we can successfully connect child_process = subprocess.Popen(popen_args + ["true"]) child_process.wait() retcode = child_process.returncode if retcode != 0: decoded = _decode_sshpass_exit_code(retcode) raise Error("cannot connect to \"%s\": %s (error code %d)" % (hostname, decoded, retcode)) # Test if file exists by running "test -f path && test -r path" on the # host command = "test -f " + path + " && test -r " + path child_process = subprocess.Popen(popen_args + [command], bufsize=1024*1024, stdout=subprocess.PIPE) child_process.wait() if child_process.returncode != 0: raise Error("\"%s\" on \"%s\" cannot be read: make sure it " "exists, is a regular file, and you have read " "permissions" % (path, hostname)) # Read the entire file using 'cat' child_process = subprocess.Popen(popen_args + ["cat " + path], stdout=subprocess.PIPE) # Now the contents of the file should be available from sub-processes # stdout self._f_objs.append(child_process.stdout) self._child_processes.append(child_process) self.is_url = True self._force_fake_seek = True
def _open_compressed_file(self): """ Detect file compression type and open it with the corresponding compression module, or just plain 'open() if the file is not compressed. """ def is_gzip(name): """Returns 'True' if file 'name' is compressed with 'gzip'.""" if name.endswith('.gzip') or \ (name.endswith('.gz') and not name.endswith('.tar.gz')): return True return False def is_bzip2(name): """Returns 'True' if file 'name' is compressed with 'bzip2'.""" if name.endswith('.bz2') and not name.endswith('.tar.bz2'): return True return False def is_xz(name): """Returns 'True' if file 'name' is compressed with 'xz'.""" if name.endswith('.xz') and not name.endswith('.tar.xz'): return True return False def is_lzop(name): """Returns 'True' if file 'name' is compressed with 'lzop'.""" if name.endswith('.lzo') and not name.endswith('.tar.lzo'): return True return False def is_tar_gz(name): """ Returns 'True' if file 'name' is a tar archive compressed with 'gzip'. """ if name.endswith('.tar.gz') or name.endswith('.tgz'): return True return False def is_tar_bz2(name): """ Returns 'True' if file 'name' is a tar archive compressed with 'bzip2'. """ if name.endswith('.tar.bz2') or name.endswith('.tbz') or \ name.endswith('.tbz2') or name.endswith('.tb2'): return True return False def is_tar_xz(name): """ Returns 'True' if file 'name' is a tar archive compressed with 'xz'. """ if name.endswith('.tar.xz') or name.endswith('.txz'): return True return False def is_tar_lzo(name): """ Returns 'True' if file 'name' is a tar archive compressed with 'lzop'. """ if name.endswith('.tar.lzo') or name.endswith('.tzo'): return True return False archiver = None if is_tar_gz(self.name) or is_gzip(self.name): self.compression_type = 'gzip' if BmapHelpers.program_is_available("pigz"): decompressor = "pigz" else: decompressor = "gzip" if is_gzip(self.name): args = "-d -c" else: archiver = "tar" args = "-x -z -O" elif is_tar_bz2(self.name) or is_bzip2(self.name): self.compression_type = 'bzip2' if BmapHelpers.program_is_available("pbzip2"): decompressor = "pbzip2" else: decompressor = "bzip2" if is_bzip2(self.name): args = "-d -c" else: archiver = "tar" args = "-x -j -O" elif is_tar_xz(self.name) or is_xz(self.name): self.compression_type = 'xz' decompressor = "xz" if is_xz(self.name): args = "-d -c" else: archiver = "tar" args = "-x -J -O" elif is_tar_lzo(self.name) or is_lzop(self.name): self.compression_type = 'lzo' decompressor = "lzop" if is_lzop(self.name): args = "-d -c" else: archiver = "tar" args = "-x --lzo -O" else: if not self.is_url: self.size = os.fstat(self._f_objs[-1].fileno()).st_size return # Make sure decompressor and the archiver programs are available if not BmapHelpers.program_is_available(decompressor): raise Error("the \"%s\" program is not available but it is " "required decompressing \"%s\"" % (decompressor, self.name)) if archiver and not BmapHelpers.program_is_available(archiver): raise Error("the \"%s\" program is not available but it is " "required reading \"%s\"" % (archiver, self.name)) # Start the decompressor process. We'll send the data to its stdin and # read the decompressed data from its stdout. if archiver: args = archiver + " " + args else: args = decompressor + " " + args child_process = subprocess.Popen(args, shell=True, bufsize=1024*1024, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) args = (self._f_objs[-1], child_process.stdin, ) self._rthread = threading.Thread(target=self._read_thread, args=args) self._rthread.daemon = True self._rthread.start() self._force_fake_seek = True self._f_objs.append(child_process.stdout) self._child_processes.append(child_process)
def _open_compressed_file(self): """ Detect file compression type and open it with the corresponding compression module, or just plain 'open() if the file is not compressed. """ def is_gzip(name): """Returns 'True' if file 'name' is compressed with 'gzip'.""" if name.endswith('.gzip') or \ (name.endswith('.gz') and not name.endswith('.tar.gz')): return True return False def is_bzip2(name): """Returns 'True' if file 'name' is compressed with 'bzip2'.""" if name.endswith('.bz2') and not name.endswith('.tar.bz2'): return True return False def is_xz(name): """Returns 'True' if file 'name' is compressed with 'xz'.""" if name.endswith('.xz') and not name.endswith('.tar.xz'): return True return False def is_lzop(name): """Returns 'True' if file 'name' is compressed with 'lzop'.""" if name.endswith('.lzo') and not name.endswith('.tar.lzo'): return True return False def is_lz4(name): """Returns 'True' if file 'name' is compressed with 'lz4'.""" if name.endswith('.lz4') and not name.endswith('.tar.lz4'): return True return False def is_zst(name): """Returns 'True' if file 'name' is compressed with 'zstd'.""" if name.endswith('.zst') and not name.endswith('.tar.zst'): return True return False def is_tar_gz(name): """ Returns 'True' if file 'name' is a tar archive compressed with 'gzip'. """ if name.endswith('.tar.gz') or name.endswith('.tgz'): return True return False def is_tar_bz2(name): """ Returns 'True' if file 'name' is a tar archive compressed with 'bzip2'. """ if name.endswith('.tar.bz2') or name.endswith('.tbz') or \ name.endswith('.tbz2') or name.endswith('.tb2'): return True return False def is_tar_xz(name): """ Returns 'True' if file 'name' is a tar archive compressed with 'xz'. """ if name.endswith('.tar.xz') or name.endswith('.txz'): return True return False def is_tar_lzo(name): """ Returns 'True' if file 'name' is a tar archive compressed with 'lzop'. """ if name.endswith('.tar.lzo') or name.endswith('.tzo'): return True return False def is_tar_lz4(name): """ Returns 'True' if file 'name' is a tar archive compressed with 'lz4'. """ if name.endswith('.tar.lz4') or name.endswith('.tlz4'): return True return False def is_tar_zst(name): """ Returns 'True' if file 'name' is a tar archive compressed with 'zstd'. """ if name.endswith('.tar.zst') or name.endswith('.tzst'): return True return False archiver = None if is_tar_gz(self.name) or is_gzip(self.name): self.compression_type = 'gzip' if BmapHelpers.program_is_available("pigz"): decompressor = "pigz" else: decompressor = "gzip" if is_gzip(self.name): args = "-d -c" else: archiver = "tar" args = "-x -z -O" elif is_tar_bz2(self.name) or is_bzip2(self.name): self.compression_type = 'bzip2' if BmapHelpers.program_is_available("pbzip2"): decompressor = "pbzip2" else: decompressor = "bzip2" if is_bzip2(self.name): args = "-d -c" else: archiver = "tar" args = "-x -j -O" elif is_tar_xz(self.name) or is_xz(self.name): self.compression_type = 'xz' decompressor = "xz" if is_xz(self.name): args = "-d -c" else: archiver = "tar" args = "-x -J -O" elif is_tar_lzo(self.name) or is_lzop(self.name): self.compression_type = 'lzo' decompressor = "lzop" if is_lzop(self.name): args = "-d -c" else: archiver = "tar" args = "-x --lzo -O" elif self.name.endswith(".zip"): self.compression_type = 'zip' decompressor = "funzip" args = "" elif is_tar_lz4(self.name) or is_lz4(self.name): self.compression_type = 'lz4' decompressor = "lz4" if is_lz4(self.name): args = "-d -c" else: archiver = "tar" args = "-x -Ilz4 -O" elif is_tar_zst(self.name) or is_zst(self.name): self.compression_type = 'zst' decompressor = "zstd" if is_zst(self.name): args = "-d" else: archiver = "tar" args = "-x -Izstd -O" else: if not self.is_url: self.size = os.fstat(self._f_objs[-1].fileno()).st_size return if archiver == "tar": # This will get rid of messages like: # tar: Removing leading `/' from member names'. args += " -P -C /" # Make sure decompressor and the archiver programs are available if not BmapHelpers.program_is_available(decompressor): raise Error("the \"%s\" program is not available but it is " "required decompressing \"%s\"" % (decompressor, self.name)) if archiver and not BmapHelpers.program_is_available(archiver): raise Error("the \"%s\" program is not available but it is " "required reading \"%s\"" % (archiver, self.name)) # Start the decompressor process. We'll send the data to its stdin and # read the decompressed data from its stdout. if archiver: args = archiver + " " + args else: args = decompressor + " " + args if self.is_url: child_stdin = subprocess.PIPE else: child_stdin = self._f_objs[-1].fileno() child_process = subprocess.Popen(args, shell=True, bufsize=1024 * 1024, stdin=child_stdin, stdout=subprocess.PIPE) if child_stdin == subprocess.PIPE: # A separate reader thread is created only when we are reading via # urllib2. args = ( self._f_objs[-1], child_process.stdin, ) self._rthread = threading.Thread(target=self._read_thread, args=args) self._rthread.daemon = True self._rthread.start() self._fake_seek = True self._f_objs.append(child_process.stdout) self._child_processes.append(child_process)