def extract_tar_file(archive_path, output_dir, relative_to, extractors=None): """ Extract regular or compressed tar archive into the output directory. archive_path is the pathlib.Path to the archive to unpack output_dir is a pathlib.Path to the directory to unpack. It must already exist. relative_to is a pathlib.Path for directories that should be stripped relative to the root of the archive, or None if no path components should be stripped. extractors is a dictionary of PlatformEnum to a command or path to the extractor binary. Defaults to 'tar' for tar, and '_use_registry' for 7-Zip and WinRAR. Raises ExtractionError if unexpected issues arise during unpacking. """ if extractors is None: extractors = DEFAULT_EXTRACTORS current_platform = get_running_platform() if current_platform == PlatformEnum.WINDOWS: # Try to use 7-zip first sevenzip_cmd = extractors.get(ExtractorEnum.SEVENZIP) if sevenzip_cmd == USE_REGISTRY: sevenzip_cmd = str(_find_7z_by_registry()) sevenzip_bin = _find_extractor_by_cmd(sevenzip_cmd) if sevenzip_bin is not None: _extract_tar_with_7z(sevenzip_bin, archive_path, output_dir, relative_to) return # Use WinRAR if 7-zip is not found winrar_cmd = extractors.get(ExtractorEnum.WINRAR) if winrar_cmd == USE_REGISTRY: winrar_cmd = str(_find_winrar_by_registry()) winrar_bin = _find_extractor_by_cmd(winrar_cmd) if winrar_bin is not None: _extract_tar_with_winrar(winrar_bin, archive_path, output_dir, relative_to) return get_logger().warning( 'Neither 7-zip nor WinRAR were found. Falling back to Python extractor...') elif current_platform == PlatformEnum.UNIX: # NOTE: 7-zip isn't an option because it doesn't preserve file permissions tar_bin = _find_extractor_by_cmd(extractors.get(ExtractorEnum.TAR)) if not tar_bin is None: _extract_tar_with_tar(tar_bin, archive_path, output_dir, relative_to) return else: # This is not a normal code path, so make it clear. raise NotImplementedError(current_platform) # Fallback to Python-based extractor on all platforms _extract_tar_with_python(archive_path, output_dir, relative_to)
def extract_with_7z( archive_path, output_dir, relative_to, #pylint: disable=too-many-arguments extractors=None): """ Extract archives with 7-zip into the output directory. Only supports archives with one layer of unpacking, so compressed tar archives don't work. archive_path is the pathlib.Path to the archive to unpack output_dir is a pathlib.Path to the directory to unpack. It must already exist. relative_to is a pathlib.Path for directories that should be stripped relative to the root of the archive. extractors is a dictionary of PlatformEnum to a command or path to the extractor binary. Defaults to 'tar' for tar, and '_use_registry' for 7-Zip. Raises ExtractionError if unexpected issues arise during unpacking. """ # TODO: It would be nice to extend this to support arbitrary standard IO chaining of 7z # instances, so _extract_tar_with_7z and other future formats could use this. if extractors is None: extractors = DEFAULT_EXTRACTORS sevenzip_cmd = extractors.get(ExtractorEnum.SEVENZIP) if sevenzip_cmd == USE_REGISTRY: if not get_running_platform() == PlatformEnum.WINDOWS: get_logger().error('"%s" for 7-zip is only available on Windows', sevenzip_cmd) raise ExtractionError() sevenzip_cmd = str(_find_7z_by_registry()) sevenzip_bin = _find_extractor_by_cmd(sevenzip_cmd) if not relative_to is None and (output_dir / relative_to).exists(): get_logger().error('Temporary unpacking directory already exists: %s', output_dir / relative_to) raise ExtractionError() cmd = (sevenzip_bin, 'x', str(archive_path), '-aoa', '-o{}'.format(str(output_dir))) get_logger().debug('7z command line: %s', ' '.join(cmd)) result = subprocess.run(cmd) if result.returncode != 0: get_logger().error('7z command returned %s', result.returncode) raise ExtractionError() _process_relative_to(output_dir, relative_to)
def extract_with_7z( archive_path, output_dir, relative_to, #pylint: disable=too-many-arguments extractors=None): """ Extract archives with 7-zip into the output directory. Only supports archives with one layer of unpacking, so compressed tar archives don't work. archive_path is the pathlib.Path to the archive to unpack output_dir is a pathlib.Path to the directory to unpack. It must already exist. relative_to is a pathlib.Path for directories that should be stripped relative to the root of the archive. extractors is a dictionary of PlatformEnum to a command or path to the extractor binary. Defaults to 'tar' for tar, and '_use_registry' for 7-Zip. Raises ExtractionError if unexpected issues arise during unpacking. """ # TODO: It would be nice to extend this to support arbitrary standard IO chaining of 7z # instances, so _extract_tar_with_7z and other future formats could use this. if extractors is None: extractors = DEFAULT_EXTRACTORS sevenzip_cmd = extractors.get(ExtractorEnum.SEVENZIP) if sevenzip_cmd == SEVENZIP_USE_REGISTRY: if not get_running_platform() == PlatformEnum.WINDOWS: get_logger().error('"%s" for 7-zip is only available on Windows', sevenzip_cmd) raise ExtractionError() sevenzip_cmd = str(_find_7z_by_registry()) sevenzip_bin = _find_extractor_by_cmd(sevenzip_cmd) if not relative_to is None and (output_dir / relative_to).exists(): get_logger().error('Temporary unpacking directory already exists: %s', output_dir / relative_to) raise ExtractionError() cmd = (sevenzip_bin, 'x', str(archive_path), '-aoa', '-o{}'.format(str(output_dir))) get_logger().debug('7z command line: %s', ' '.join(cmd)) result = subprocess.run(cmd) if result.returncode != 0: get_logger().error('7z command returned %s', result.returncode) raise ExtractionError() _process_relative_to(output_dir, relative_to)
def extract_with_winrar( archive_path, output_dir, relative_to, #pylint: disable=too-many-arguments extractors=None): """ Extract archives with WinRAR into the output directory. Only supports archives with one layer of unpacking, so compressed tar archives don't work. archive_path is the pathlib.Path to the archive to unpack output_dir is a pathlib.Path to the directory to unpack. It must already exist. relative_to is a pathlib.Path for directories that should be stripped relative to the root of the archive. extractors is a dictionary of PlatformEnum to a command or path to the extractor binary. Defaults to 'tar' for tar, and '_use_registry' for WinRAR. Raises ExtractionError if unexpected issues arise during unpacking. """ if extractors is None: extractors = DEFAULT_EXTRACTORS winrar_cmd = extractors.get(ExtractorEnum.WINRAR) if winrar_cmd == USE_REGISTRY: if not get_running_platform() == PlatformEnum.WINDOWS: get_logger().error('"%s" for WinRAR is only available on Windows', winrar_cmd) raise ExtractionError() winrar_cmd = str(_find_winrar_by_registry()) winrar_bin = _find_extractor_by_cmd(winrar_cmd) if not relative_to is None and (output_dir / relative_to).exists(): get_logger().error('Temporary unpacking directory already exists: %s', output_dir / relative_to) raise ExtractionError() cmd = (winrar_bin, 'x', '-o+', str(archive_path), str(output_dir)) get_logger().debug('WinRAR command line: %s', ' '.join(cmd)) result = subprocess.run(cmd) if result.returncode != 0: get_logger().error('WinRAR command returned %s', result.returncode) raise ExtractionError() _process_relative_to(output_dir, relative_to)
def extract_tar_file(archive_path, output_dir, relative_to, extractors=None): """ Extract regular or compressed tar archive into the output directory. archive_path is the pathlib.Path to the archive to unpack output_dir is a pathlib.Path to the directory to unpack. It must already exist. relative_to is a pathlib.Path for directories that should be stripped relative to the root of the archive, or None if no path components should be stripped. extractors is a dictionary of PlatformEnum to a command or path to the extractor binary. Defaults to 'tar' for tar, and '_use_registry' for 7-Zip. Raises ExtractionError if unexpected issues arise during unpacking. """ if extractors is None: extractors = DEFAULT_EXTRACTORS current_platform = get_running_platform() if current_platform == PlatformEnum.WINDOWS: sevenzip_cmd = extractors.get(ExtractorEnum.SEVENZIP) if sevenzip_cmd == SEVENZIP_USE_REGISTRY: sevenzip_cmd = str(_find_7z_by_registry()) sevenzip_bin = _find_extractor_by_cmd(sevenzip_cmd) if not sevenzip_bin is None: _extract_tar_with_7z(sevenzip_bin, archive_path, output_dir, relative_to) return elif current_platform == PlatformEnum.UNIX: # NOTE: 7-zip isn't an option because it doesn't preserve file permissions tar_bin = _find_extractor_by_cmd(extractors.get(ExtractorEnum.TAR)) if not tar_bin is None: _extract_tar_with_tar(tar_bin, archive_path, output_dir, relative_to) return else: # This is not a normal code path, so make it clear. raise NotImplementedError(current_platform) # Fallback to Python-based extractor on all platforms _extract_tar_with_python(archive_path, output_dir, relative_to)