def decompress_file(archive, dir_): """Decompress `archive` into a directory `dir_` Parameters ---------- archive: str dir_: str """ with swallow_outputs() as cmo: archive = ensure_bytes(archive) dir_ = ensure_bytes(dir_) patoolib.util.check_existing_filename(archive) patoolib.util.check_existing_filename(dir_, onlyfiles=False) # Call protected one to avoid the checks on existence on unixified path outdir = unixify_path(dir_) # should be supplied in PY3 to avoid b'' outdir = ensure_unicode(outdir) archive = ensure_unicode(archive) format_compression = patoolib.get_archive_format(archive) if format_compression == ('gzip', None): # Yarik fell into the trap of being lazy and not providing proper # support for .gz .xz etc "stream archivers" formats in handling # of archives. ATM out support for .gz relies on behavior of 7z while # extracting them and respecting possibly present .gz filename # header field. # See more https://github.com/datalad/datalad/pull/3176#issuecomment-466819861 # TODO: provide proper handling of all those archives without # relying on any filename been stored in the header program = patoolib.find_archive_program(format_compression[0], 'extract') if basename(program) != '7z': raise MissingExternalDependency( "cmd:7z", msg="(Not) Funny enough but ATM we need p7zip installation " "to handle .gz files extraction 'correctly'") patoolib._extract_archive(unixify_path(archive), outdir=outdir, verbosity=100) if cmo.out: lgr.debug("patool gave stdout:\n%s", cmo.out) if cmo.err: lgr.debug("patool gave stderr:\n%s", cmo.err) # Note: (ben) Experienced issue, where extracted tarball # lacked execution bit of directories, leading to not being # able to delete them while having write permission. # Can't imagine a situation, where we would want to fail on # that kind of mess. So, to be sure set it. if not on_windows: os.chmod(dir_, os.stat(dir_).st_mode | os.path.stat.S_IEXEC) for root, dirs, files in os.walk(dir_, followlinks=False): for d in dirs: subdir = opj(root, d) os.chmod(subdir, os.stat(subdir).st_mode | os.path.stat.S_IEXEC)
def check_7z(): try: patoolib.find_archive_program('7z', 'extract') except patoolib.util.PatoolError as e: warning = ('This step requires a working installation of the 7zip program. <br>' + 'Please install 7zip on your system before continuing.<br>') if sys.platform == 'win32': warning += 'You can download it from <a href="http://www.7zip.org">www.7zip.org</a>.' elif sys.platform == 'darwin': warning += ('You can install 7zip with <a href="https://brew.sh">homebrew</a>.<br>' + 'brew install p7zip') elif sys.platform.startswith('linux'): warning += ("Use your linux distribtion's package manager for the installation.<br>" + 'eg: sudo apt install p7zip-full') else: warning += 'More infos here: <a href="http://www.7zip.org">www.7zip.org</a>' return warning return ''
def decompress_file(archive, dir_, leading_directories='strip'): """Decompress `archive` into a directory `dir_` Parameters ---------- archive: str dir_: str leading_directories: {'strip', None} If `strip`, and archive contains a single leading directory under which all content is stored, all the content will be moved one directory up and that leading directory will be removed. """ if not exists(dir_): lgr.debug("Creating directory %s to extract archive into" % dir_) os.makedirs(dir_) with swallow_outputs() as cmo: archive = assure_bytes(archive) dir_ = assure_bytes(dir_) patoolib.util.check_existing_filename(archive) patoolib.util.check_existing_filename(dir_, onlyfiles=False) # Call protected one to avoid the checks on existence on unixified path outdir = unixify_path(dir_) if not PY2: # should be supplied in PY3 to avoid b'' outdir = assure_unicode(outdir) archive = assure_unicode(archive) format_compression = patoolib.get_archive_format(archive) if format_compression == ('gzip', None): # Yarik fell into the trap of being lazy and not providing proper # support for .gz .xz etc "stream archivers" formats in handling # of archives. ATM out support for .gz relies on behavior of 7z while # extracting them and respecting possibly present .gz filename # header field. # See more https://github.com/datalad/datalad/pull/3176#issuecomment-466819861 # TODO: provide proper handling of all those archives without # relying on any filename been stored in the header program = patoolib.find_archive_program(format_compression[0], 'extract') if basename(program) != '7z': raise MissingExternalDependency( "cmd:7z", msg="(Not) Funny enough but ATM we need p7zip installation " "to handle .gz files extraction 'correctly'") patoolib._extract_archive(unixify_path(archive), outdir=outdir, verbosity=100) if cmo.out: lgr.debug("patool gave stdout:\n%s" % cmo.out) if cmo.err: lgr.debug("patool gave stderr:\n%s" % cmo.err) # Note: (ben) Experienced issue, where extracted tarball # lacked execution bit of directories, leading to not being # able to delete them while having write permission. # Can't imagine a situation, where we would want to fail on # that kind of mess. So, to be sure set it. if not on_windows: os.chmod(dir_, os.stat(dir_).st_mode | os.path.stat.S_IEXEC) for root, dirs, files in os.walk(dir_, followlinks=False): for d in dirs: subdir = opj(root, d) os.chmod(subdir, os.stat(subdir).st_mode | os.path.stat.S_IEXEC) if leading_directories == 'strip': _, dirs, files = next(os.walk(dir_)) if not len(files) and len(dirs) == 1: # move all the content under dirs[0] up 1 level widow_dir = opj(dir_, dirs[0]) lgr.debug("Moving content within %s upstairs" % widow_dir) subdir, subdirs_, files_ = next(os.walk(opj(dir_, dirs[0]))) for f in subdirs_ + files_: os.rename(opj(subdir, f), opj(dir_, f)) rmdir(widow_dir) elif leading_directories is None: pass # really do nothing else: raise NotImplementedError("Not supported %s" % leading_directories)
def decompress_file(archive, dir_, leading_directories='strip'): """Decompress `archive` into a directory `dir_` Parameters ---------- archive: str dir_: str leading_directories: {'strip', None} If `strip`, and archive contains a single leading directory under which all content is stored, all the content will be moved one directory up and that leading directory will be removed. """ if not exists(dir_): lgr.debug("Creating directory %s to extract archive into" % dir_) os.makedirs(dir_) with swallow_outputs() as cmo: archive = assure_bytes(archive) dir_ = assure_bytes(dir_) patoolib.util.check_existing_filename(archive) patoolib.util.check_existing_filename(dir_, onlyfiles=False) # Call protected one to avoid the checks on existence on unixified path outdir = unixify_path(dir_) if not PY2: # should be supplied in PY3 to avoid b'' outdir = assure_unicode(outdir) archive = assure_unicode(archive) format_compression = patoolib.get_archive_format(archive) if format_compression == ('gzip', None): # Yarik fell into the trap of being lazy and not providing proper # support for .gz .xz etc "stream archivers" formats in handling # of archives. ATM out support for .gz relies on behavior of 7z while # extracting them and respecting possibly present .gz filename # header field. # See more https://github.com/datalad/datalad/pull/3176#issuecomment-466819861 # TODO: provide proper handling of all those archives without # relying on any filename been stored in the header program = patoolib.find_archive_program( format_compression[0], 'extract') if basename(program) != '7z': raise MissingExternalDependency( "cmd:7z", msg="(Not) Funny enough but ATM we need p7zip installation " "to handle .gz files extraction 'correctly'" ) patoolib._extract_archive(unixify_path(archive), outdir=outdir, verbosity=100) if cmo.out: lgr.debug("patool gave stdout:\n%s" % cmo.out) if cmo.err: lgr.debug("patool gave stderr:\n%s" % cmo.err) # Note: (ben) Experienced issue, where extracted tarball # lacked execution bit of directories, leading to not being # able to delete them while having write permission. # Can't imagine a situation, where we would want to fail on # that kind of mess. So, to be sure set it. if not on_windows: os.chmod(dir_, os.stat(dir_).st_mode | os.path.stat.S_IEXEC) for root, dirs, files in os.walk(dir_, followlinks=False): for d in dirs: subdir = opj(root, d) os.chmod(subdir, os.stat(subdir).st_mode | os.path.stat.S_IEXEC) if leading_directories == 'strip': _, dirs, files = next(os.walk(dir_)) if not len(files) and len(dirs) == 1: # move all the content under dirs[0] up 1 level widow_dir = opj(dir_, dirs[0]) lgr.debug("Moving content within %s upstairs" % widow_dir) subdir, subdirs_, files_ = next(os.walk(opj(dir_, dirs[0]))) for f in subdirs_ + files_: os.rename(opj(subdir, f), opj(dir_, f)) rmdir(widow_dir) elif leading_directories is None: pass # really do nothing else: raise NotImplementedError("Not supported %s" % leading_directories)