def extract(location, target_dir, arch_type='*'): """ Extract all files from a 7zip-supported archive file at location in the target_dir directory. Return a list of warning messages. Raise exception on errors. `arch_type` is the type of 7zip archive passed to the -t 7zip option. Can be None. """ assert location assert target_dir abs_location = os.path.abspath(os.path.expanduser(location)) abs_target_dir = os.path.abspath(os.path.expanduser(target_dir)) if is_rar(location): raise ExtractErrorFailedToExtract('RAR extraction disactivated') # note: there are some issues with the extraction of debian .deb ar files # see sevenzip bug http://sourceforge.net/p/sevenzip/bugs/1472/ # 7z arguments extract = 'x' yes_to_all = '-y' # NB: we use t* to ensure that all archive types are honored if not arch_type: arch_type = '' else: arch_type = '-t' + arch_type # pass an empty password so that extraction with passwords WILL fail password = '******' # renaming may not behave the same way on all OSes in particular Mac and Windows auto_rename_dupe_names = '-aou' # These things do not work well with p7zip for now: # - ensure that we treat the FS as case insensitive even if it is # this ensure we have consistent names across OSes # case_insensitive = '-ssc-' # - force any console output to be UTF-8 encoded # TODO: add this may be for a UTF output on Windows only # output_as_utf = '-sccUTF-8' # working_tmp_dir = '-w<path>' # NB: we force running in the GMT timezone, because 7z is unable to set # the TZ correctly when the archive does not contain TZ info. This does # not work on Windows, because 7z is not using the TZ env var there. timezone = dict(os.environ) timezone.update({u'TZ': u'GMT'}) timezone = command.get_env(timezone) # Note: 7z does extract in the current directory so we cwd to the target dir first args = [ extract, yes_to_all, auto_rename_dupe_names, arch_type, abs_location, password ] lib_dir = get_location(EXTRACTCODE_7ZIP_LIBDIR) cmd_loc = get_location(EXTRACTCODE_7ZIP_EXE) ex_args = dict( cmd_loc=cmd_loc, args=args, lib_dir=lib_dir, cwd=abs_target_dir, env=timezone, ) if TRACE: logger.debug('extract: args:') pprint(ex_args) rc, stdout, stderr = command.execute2(**ex_args) if rc != 0: if TRACE: logger.debug( 'extract: failure: {rc}\nstderr: {stderr}\nstdout: {stdout}\n'. format(**locals())) error = get_7z_errors(stdout) or UNKNOWN_ERROR raise ExtractErrorFailedToExtract(error) extractcode.remove_backslashes_and_dotdots(abs_target_dir) return get_7z_warnings(stdout)
def list_entries(location, arch_type='*'): """ Tield Entry tuples for each entry found in a 7zip-supported archive file at `location`. Use the provided 7zip `arch_type` CLI archive type code (e.g. with the "-t* 7z" cli type option) (can be None). """ assert location abs_location = os.path.abspath(os.path.expanduser(location)) if is_rar(location): return [] # 7z arguments listing = 'l' # NB: we use t* to ensure that all archive types are honored if not arch_type: arch_type = '' else: arch_type = '-t' + arch_type # pass an empty password so that extraction with passwords WILL fail password = '******' tech_info = '-slt' output_as_utf = '' if on_windows: output_as_utf = '-sccUTF-8' # NB: we force running in the GMT timezone, because 7z is unable to set # the TZ correctly when the archive does not contain TZ info. This does # not work on Windows, because 7z is not using the TZ env var there. timezone = dict(os.environ) timezone.update({u'TZ': u'GMT'}) timezone = command.get_env(timezone) args = [ listing, tech_info, arch_type, output_as_utf, abs_location, password, ] lib_dir = get_location(EXTRACTCODE_7ZIP_LIBDIR) cmd_loc = get_location(EXTRACTCODE_7ZIP_EXE) rc, stdout, stderr = command.execute2(cmd_loc=cmd_loc, args=args, lib_dir=lib_dir, env=timezone, to_files=True) if TRACE: logger.debug( 'list_entries: rc: {rc}\nstderr: file://{stderr}\nstdout: file://{stdout}\n' .format(**locals())) if rc != 0: # FIXME: this test is useless _error = get_7z_errors(stdout) or UNKNOWN_ERROR # the listing was produced as UTF on windows to avoid damaging binary # paths in console outputs utf = bool(output_as_utf) return parse_7z_listing(stdout, utf)
def build_7z_extract_command(location, target_dir, single_entry=None, arch_type='*'): """ Return a mapping of 7z command line aguments to extract the archive at `location` to `target_dir`. If `single_entry` contains an Entry, provide the command to extract only that single entry "path" in the current directory without any leading path. """ # 7z arguments if single_entry: # do not use full path extract = 'e' else: extract = 'x' yes_to_all = '-y' # NB: we use t* to ensure that all archive types are honored if not arch_type: arch_type = '' else: arch_type = '-t' + arch_type # pass an empty password so that extraction with passwords WILL fail password = '******' # renaming may not behave the same way on all OSes in particular Mac and Windows auto_rename_dupe_names = '-aou' # Ensure that we treat the FS as case insensitive if that's what it is # -ssc Set case-sensitive mode. It's default for Posix/Linux systems. # -ssc- Set case-insensitive mode. It's default for Windows systems. # historically, this was not needed on macOS, but now APFS is case # insentitive as a default if on_windows or on_macos_14_or_higher or not is_case_sensitive_fs: case_sensitive = '-ssc-' else: case_sensitive = '-ssc' # These does not work well with p7zip for now: # - force any console output to be UTF-8 encoded # TODO: add this may be for a UTF output on Windows only # output_as_utf = '-sccUTF-8' # working_tmp_dir = '-w<path>' # NB: we force running in the GMT timezone, because 7z is unable to set # the TZ correctly when the archive does not contain TZ info. This does # not work on Windows, because 7z is not using the TZ env var there. timezone = dict(os.environ) timezone.update({u'TZ': u'GMT'}) timezone = command.get_env(timezone) # Note: 7z does extract in the current directory so we cwd to the target dir first args = [ extract, yes_to_all, case_sensitive, auto_rename_dupe_names, arch_type, password, '--', location, ] if single_entry: args += [shlex_quote(single_entry.path)] lib_dir, cmd_loc = get_bin_locations() ex_args = dict( cmd_loc=cmd_loc, args=args, lib_dir=lib_dir, cwd=target_dir, env=timezone, ) if TRACE: logger.debug('extract: args:') pprint.pprint(ex_args) return ex_args