Ejemplo n.º 1
0
def extract(location, target_dir, arch_type='*'):
    """
    Extract all files from a 7zip-supported archive file at location in the
    target_dir directory. Return a list of warning messages.
    Raise exception on errors.

    `arch_type` is the type of 7zip archive passed to the -t 7zip option. Can be
    None.
    """
    assert location
    assert target_dir
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))

    if is_rar(location):
        raise ExtractErrorFailedToExtract('RAR extraction disactivated')

    # note: there are some issues with the extraction of debian .deb ar files
    # see sevenzip bug http://sourceforge.net/p/sevenzip/bugs/1472/

    # 7z arguments
    extract = 'x'
    yes_to_all = '-y'

    # NB: we use t* to ensure that all archive types are honored
    if not arch_type:
        arch_type = ''
    else:
        arch_type = '-t' + arch_type

    # pass an empty password  so that extraction with passwords WILL fail
    password = '******'

    # renaming may not behave the same way on all OSes in particular Mac and Windows
    auto_rename_dupe_names = '-aou'

    # These things do not work well with p7zip for now:
    # - ensure that we treat the FS as case insensitive even if it is
    #   this ensure we have consistent names across OSes
    #   case_insensitive = '-ssc-'
    # - force any console output to be UTF-8 encoded
    #   TODO: add this may be for a UTF output on Windows only
    #   output_as_utf = '-sccUTF-8'
    #   working_tmp_dir = '-w<path>'

    # NB: we force running in the GMT timezone, because 7z is unable to set
    # the TZ correctly when the archive does not contain TZ info. This does
    # not work on Windows, because 7z is not using the TZ env var there.
    timezone = dict(os.environ)
    timezone.update({u'TZ': u'GMT'})
    timezone = command.get_env(timezone)
    # Note: 7z does extract in the current directory so we cwd to the target dir first
    args = [
        extract, yes_to_all, auto_rename_dupe_names, arch_type, abs_location,
        password
    ]

    lib_dir = get_location(EXTRACTCODE_7ZIP_LIBDIR)
    cmd_loc = get_location(EXTRACTCODE_7ZIP_EXE)

    ex_args = dict(
        cmd_loc=cmd_loc,
        args=args,
        lib_dir=lib_dir,
        cwd=abs_target_dir,
        env=timezone,
    )

    if TRACE:
        logger.debug('extract: args:')
        pprint(ex_args)

    rc, stdout, stderr = command.execute2(**ex_args)

    if rc != 0:
        if TRACE:
            logger.debug(
                'extract: failure: {rc}\nstderr: {stderr}\nstdout: {stdout}\n'.
                format(**locals()))
        error = get_7z_errors(stdout) or UNKNOWN_ERROR
        raise ExtractErrorFailedToExtract(error)

    extractcode.remove_backslashes_and_dotdots(abs_target_dir)
    return get_7z_warnings(stdout)
Ejemplo n.º 2
0
def list_entries(location, arch_type='*'):
    """
    Tield Entry tuples for each entry found in a 7zip-supported archive file at
    `location`. Use the provided 7zip `arch_type` CLI archive type code (e.g. with
    the "-t* 7z" cli type option) (can be None).
    """
    assert location
    abs_location = os.path.abspath(os.path.expanduser(location))

    if is_rar(location):
        return []

    # 7z arguments
    listing = 'l'

    # NB: we use t* to ensure that all archive types are honored
    if not arch_type:
        arch_type = ''
    else:
        arch_type = '-t' + arch_type

    # pass an empty password  so that extraction with passwords WILL fail
    password = '******'
    tech_info = '-slt'

    output_as_utf = ''
    if on_windows:
        output_as_utf = '-sccUTF-8'

    # NB: we force running in the GMT timezone, because 7z is unable to set
    # the TZ correctly when the archive does not contain TZ info. This does
    # not work on Windows, because 7z is not using the TZ env var there.
    timezone = dict(os.environ)
    timezone.update({u'TZ': u'GMT'})
    timezone = command.get_env(timezone)

    args = [
        listing,
        tech_info,
        arch_type,
        output_as_utf,
        abs_location,
        password,
    ]

    lib_dir = get_location(EXTRACTCODE_7ZIP_LIBDIR)
    cmd_loc = get_location(EXTRACTCODE_7ZIP_EXE)

    rc, stdout, stderr = command.execute2(cmd_loc=cmd_loc,
                                          args=args,
                                          lib_dir=lib_dir,
                                          env=timezone,
                                          to_files=True)

    if TRACE:
        logger.debug(
            'list_entries: rc: {rc}\nstderr: file://{stderr}\nstdout: file://{stdout}\n'
            .format(**locals()))

    if rc != 0:
        # FIXME: this test is useless
        _error = get_7z_errors(stdout) or UNKNOWN_ERROR

    # the listing was produced as UTF on windows to avoid damaging binary
    # paths in console outputs
    utf = bool(output_as_utf)

    return parse_7z_listing(stdout, utf)
Ejemplo n.º 3
0
def build_7z_extract_command(location,
                             target_dir,
                             single_entry=None,
                             arch_type='*'):
    """
    Return a mapping of 7z command line aguments to extract the archive at
    `location` to `target_dir`.

    If `single_entry` contains an Entry, provide the command to extract only
    that single entry "path" in the current directory without any leading path.
    """

    # 7z arguments
    if single_entry:
        # do not use full path
        extract = 'e'
    else:
        extract = 'x'

    yes_to_all = '-y'

    # NB: we use t* to ensure that all archive types are honored
    if not arch_type:
        arch_type = ''
    else:
        arch_type = '-t' + arch_type

    # pass an empty password  so that extraction with passwords WILL fail
    password = '******'

    # renaming may not behave the same way on all OSes in particular Mac and Windows
    auto_rename_dupe_names = '-aou'

    # Ensure that we treat the FS as case insensitive if that's what it is
    # -ssc    Set case-sensitive mode. It's default for Posix/Linux systems.
    # -ssc-    Set case-insensitive mode. It's default for Windows systems.
    # historically, this was not needed on macOS, but now APFS is case
    # insentitive as a default
    if on_windows or on_macos_14_or_higher or not is_case_sensitive_fs:
        case_sensitive = '-ssc-'
    else:
        case_sensitive = '-ssc'

    # These does not work well with p7zip for now:
    # - force any console output to be UTF-8 encoded
    #   TODO: add this may be for a UTF output on Windows only
    #   output_as_utf = '-sccUTF-8'
    #   working_tmp_dir = '-w<path>'

    # NB: we force running in the GMT timezone, because 7z is unable to set
    # the TZ correctly when the archive does not contain TZ info. This does
    # not work on Windows, because 7z is not using the TZ env var there.
    timezone = dict(os.environ)
    timezone.update({u'TZ': u'GMT'})
    timezone = command.get_env(timezone)
    # Note: 7z does extract in the current directory so we cwd to the target dir first
    args = [
        extract,
        yes_to_all,
        case_sensitive,
        auto_rename_dupe_names,
        arch_type,
        password,
        '--',
        location,
    ]

    if single_entry:
        args += [shlex_quote(single_entry.path)]

    lib_dir, cmd_loc = get_bin_locations()

    ex_args = dict(
        cmd_loc=cmd_loc,
        args=args,
        lib_dir=lib_dir,
        cwd=target_dir,
        env=timezone,
    )

    if TRACE:
        logger.debug('extract: args:')
        pprint.pprint(ex_args)

    return ex_args