def decompress_file(archive, dir_):
    """Decompress `archive` into a directory `dir_`

    Parameters
    ----------
    archive: str
    dir_: str
    """
    with swallow_outputs() as cmo:
        archive = ensure_bytes(archive)
        dir_ = ensure_bytes(dir_)
        patoolib.util.check_existing_filename(archive)
        patoolib.util.check_existing_filename(dir_, onlyfiles=False)
        # Call protected one to avoid the checks on existence on unixified path
        outdir = unixify_path(dir_)
        # should be supplied in PY3 to avoid b''
        outdir = ensure_unicode(outdir)
        archive = ensure_unicode(archive)

        format_compression = patoolib.get_archive_format(archive)
        if format_compression == ('gzip', None):
            # Yarik fell into the trap of being lazy and not providing proper
            # support for .gz .xz etc "stream archivers" formats in handling
            # of archives. ATM out support for .gz relies on behavior of 7z while
            # extracting them and respecting possibly present .gz filename
            # header field.
            # See more https://github.com/datalad/datalad/pull/3176#issuecomment-466819861
            # TODO: provide proper handling of all those archives without
            # relying on any filename been stored in the header
            program = patoolib.find_archive_program(format_compression[0],
                                                    'extract')
            if basename(program) != '7z':
                raise MissingExternalDependency(
                    "cmd:7z",
                    msg="(Not) Funny enough but ATM we need p7zip installation "
                    "to handle .gz files extraction 'correctly'")

        patoolib._extract_archive(unixify_path(archive),
                                  outdir=outdir,
                                  verbosity=100)
        if cmo.out:
            lgr.debug("patool gave stdout:\n%s", cmo.out)
        if cmo.err:
            lgr.debug("patool gave stderr:\n%s", cmo.err)

    # Note: (ben) Experienced issue, where extracted tarball
    # lacked execution bit of directories, leading to not being
    # able to delete them while having write permission.
    # Can't imagine a situation, where we would want to fail on
    # that kind of mess. So, to be sure set it.

    if not on_windows:
        os.chmod(dir_, os.stat(dir_).st_mode | os.path.stat.S_IEXEC)
        for root, dirs, files in os.walk(dir_, followlinks=False):
            for d in dirs:
                subdir = opj(root, d)
                os.chmod(subdir,
                         os.stat(subdir).st_mode | os.path.stat.S_IEXEC)
def check_7z():
    try:
        patoolib.find_archive_program('7z', 'extract')
    except patoolib.util.PatoolError as e:
        warning = ('This step requires a working installation of the 7zip program. <br>' +
                   'Please install 7zip on your system before continuing.<br>')
        if sys.platform == 'win32':
            warning += 'You can download it from <a href="http://www.7zip.org">www.7zip.org</a>.'
        elif sys.platform == 'darwin':
            warning += ('You can install 7zip with <a href="https://brew.sh">homebrew</a>.<br>' +
                        'brew install p7zip')
        elif sys.platform.startswith('linux'):
            warning += ("Use your linux distribtion's package manager for the installation.<br>" +
                        'eg: sudo apt install p7zip-full')
        else:
            warning += 'More infos here: <a href="http://www.7zip.org">www.7zip.org</a>'

        return warning
    return ''
Exemple #3
0
def decompress_file(archive, dir_, leading_directories='strip'):
    """Decompress `archive` into a directory `dir_`

    Parameters
    ----------
    archive: str
    dir_: str
    leading_directories: {'strip', None}
      If `strip`, and archive contains a single leading directory under which
      all content is stored, all the content will be moved one directory up
      and that leading directory will be removed.
    """
    if not exists(dir_):
        lgr.debug("Creating directory %s to extract archive into" % dir_)
        os.makedirs(dir_)

    with swallow_outputs() as cmo:
        archive = assure_bytes(archive)
        dir_ = assure_bytes(dir_)
        patoolib.util.check_existing_filename(archive)
        patoolib.util.check_existing_filename(dir_, onlyfiles=False)
        # Call protected one to avoid the checks on existence on unixified path
        outdir = unixify_path(dir_)
        if not PY2:
            # should be supplied in PY3 to avoid b''
            outdir = assure_unicode(outdir)
            archive = assure_unicode(archive)

        format_compression = patoolib.get_archive_format(archive)
        if format_compression == ('gzip', None):
            # Yarik fell into the trap of being lazy and not providing proper
            # support for .gz .xz etc "stream archivers" formats in handling
            # of archives. ATM out support for .gz relies on behavior of 7z while
            # extracting them and respecting possibly present .gz filename
            # header field.
            # See more https://github.com/datalad/datalad/pull/3176#issuecomment-466819861
            # TODO: provide proper handling of all those archives without
            # relying on any filename been stored in the header
            program = patoolib.find_archive_program(format_compression[0],
                                                    'extract')
            if basename(program) != '7z':
                raise MissingExternalDependency(
                    "cmd:7z",
                    msg="(Not) Funny enough but ATM we need p7zip installation "
                    "to handle .gz files extraction 'correctly'")

        patoolib._extract_archive(unixify_path(archive),
                                  outdir=outdir,
                                  verbosity=100)
        if cmo.out:
            lgr.debug("patool gave stdout:\n%s" % cmo.out)
        if cmo.err:
            lgr.debug("patool gave stderr:\n%s" % cmo.err)

    # Note: (ben) Experienced issue, where extracted tarball
    # lacked execution bit of directories, leading to not being
    # able to delete them while having write permission.
    # Can't imagine a situation, where we would want to fail on
    # that kind of mess. So, to be sure set it.

    if not on_windows:
        os.chmod(dir_, os.stat(dir_).st_mode | os.path.stat.S_IEXEC)
        for root, dirs, files in os.walk(dir_, followlinks=False):
            for d in dirs:
                subdir = opj(root, d)
                os.chmod(subdir,
                         os.stat(subdir).st_mode | os.path.stat.S_IEXEC)

    if leading_directories == 'strip':
        _, dirs, files = next(os.walk(dir_))
        if not len(files) and len(dirs) == 1:
            # move all the content under dirs[0] up 1 level
            widow_dir = opj(dir_, dirs[0])
            lgr.debug("Moving content within %s upstairs" % widow_dir)
            subdir, subdirs_, files_ = next(os.walk(opj(dir_, dirs[0])))
            for f in subdirs_ + files_:
                os.rename(opj(subdir, f), opj(dir_, f))
            rmdir(widow_dir)
    elif leading_directories is None:
        pass  # really do nothing
    else:
        raise NotImplementedError("Not supported %s" % leading_directories)
Exemple #4
0
def decompress_file(archive, dir_, leading_directories='strip'):
    """Decompress `archive` into a directory `dir_`

    Parameters
    ----------
    archive: str
    dir_: str
    leading_directories: {'strip', None}
      If `strip`, and archive contains a single leading directory under which
      all content is stored, all the content will be moved one directory up
      and that leading directory will be removed.
    """
    if not exists(dir_):
        lgr.debug("Creating directory %s to extract archive into" % dir_)
        os.makedirs(dir_)

    with swallow_outputs() as cmo:
        archive = assure_bytes(archive)
        dir_ = assure_bytes(dir_)
        patoolib.util.check_existing_filename(archive)
        patoolib.util.check_existing_filename(dir_, onlyfiles=False)
        # Call protected one to avoid the checks on existence on unixified path
        outdir = unixify_path(dir_)
        if not PY2:
            # should be supplied in PY3 to avoid b''
            outdir = assure_unicode(outdir)
            archive = assure_unicode(archive)

        format_compression = patoolib.get_archive_format(archive)
        if format_compression == ('gzip', None):
            # Yarik fell into the trap of being lazy and not providing proper
            # support for .gz .xz etc "stream archivers" formats in handling
            # of archives. ATM out support for .gz relies on behavior of 7z while
            # extracting them and respecting possibly present .gz filename
            # header field.
            # See more https://github.com/datalad/datalad/pull/3176#issuecomment-466819861
            # TODO: provide proper handling of all those archives without
            # relying on any filename been stored in the header
            program = patoolib.find_archive_program(
                format_compression[0], 'extract')
            if basename(program) != '7z':
                raise MissingExternalDependency(
                    "cmd:7z",
                    msg="(Not) Funny enough but ATM we need p7zip installation "
                        "to handle .gz files extraction 'correctly'"
                )

        patoolib._extract_archive(unixify_path(archive),
                                  outdir=outdir,
                                  verbosity=100)
        if cmo.out:
            lgr.debug("patool gave stdout:\n%s" % cmo.out)
        if cmo.err:
            lgr.debug("patool gave stderr:\n%s" % cmo.err)

    # Note: (ben) Experienced issue, where extracted tarball
    # lacked execution bit of directories, leading to not being
    # able to delete them while having write permission.
    # Can't imagine a situation, where we would want to fail on
    # that kind of mess. So, to be sure set it.

    if not on_windows:
        os.chmod(dir_,
                 os.stat(dir_).st_mode |
                 os.path.stat.S_IEXEC)
        for root, dirs, files in os.walk(dir_, followlinks=False):
            for d in dirs:
                subdir = opj(root, d)
                os.chmod(subdir,
                         os.stat(subdir).st_mode |
                         os.path.stat.S_IEXEC)

    if leading_directories == 'strip':
        _, dirs, files = next(os.walk(dir_))
        if not len(files) and len(dirs) == 1:
            # move all the content under dirs[0] up 1 level
            widow_dir = opj(dir_, dirs[0])
            lgr.debug("Moving content within %s upstairs" % widow_dir)
            subdir, subdirs_, files_ = next(os.walk(opj(dir_, dirs[0])))
            for f in subdirs_ + files_:
                os.rename(opj(subdir, f), opj(dir_, f))
            rmdir(widow_dir)
    elif leading_directories is None:
        pass   # really do nothing
    else:
        raise NotImplementedError("Not supported %s" % leading_directories)