def extract_file(location, target, kinds=extractcode.default_kinds): """ Extract a single archive at `location` in the `target` directory if it is of a kind supported in the `kinds` kind tuple. """ warnings = [] errors = [] extractor = archive.get_extractor(location, kinds) if TRACE: logger.debug('extract_file: extractor: for: %(location)r with kinds: %(kinds)r : ' % locals() + getattr(extractor, '__module__', '') + '.' + getattr(extractor, '__name__', '')) if extractor: yield ExtractEvent(location, target, done=False, warnings=[], errors=[]) try: # extract first to a temp directory. # if there is an error, the extracted files will not be moved # to target tmp_tgt = fileutils.get_temp_dir('extract') abs_location = abspath(expanduser(location)) warnings.extend(extractor(abs_location, tmp_tgt)) fileutils.copytree(tmp_tgt, target) fileutils.delete(tmp_tgt) except Exception, e: if TRACE: logger.debug('extract_file: ERROR: %(location)r: %(errors)r, %(e)r.\n' % locals()) errors = [str(e).strip(' \'"')] finally:
def create_html_app_assets(results, output_file): """ Given an html-app output_file, create the corresponding `_files` directory and copy the assets to this directory. The target directory is deleted if it exists. Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or HtmlAppAssetCopyError if the copy was not possible. """ try: if is_stdout(output_file): raise HtmlAppAssetCopyWarning() assets_dir = join(get_template_dir('html-app'), 'assets') # delete old assets tgt_dirs = get_html_app_files_dirs(output_file) target_dir = join(*tgt_dirs) if exists(target_dir): fileutils.delete(target_dir) # copy assets fileutils.copytree(assets_dir, target_dir) # write json data import json root_path, assets_dir = get_html_app_files_dirs(output_file) with open(join(root_path, assets_dir, 'data.json'), 'w') as f: f.write('data=' + json.dumps(results)) # create help file with open(join(root_path, assets_dir, 'help.html'), 'w') as f: f.write(get_html_app_help(basename(output_file.name))) except HtmlAppAssetCopyWarning, w: raise w
def test_copytree_copies_unreadable_files(self): src = self.get_test_loc('fileutils/exec', copy=True) dst = self.get_temp_dir() src_file1 = join(src, 'a.bat') src_file2 = join(src, 'subtxt', 'a.txt') try: # make some unreadable source files make_non_readable(src_file1) if on_posix: assert not filetype.is_readable(src_file1) make_non_readable(src_file2) if on_posix: assert not filetype.is_readable(src_file2) # copy proper dest_dir = join(dst, 'dest') fileutils.copytree(src, dest_dir) dest_file1 = join(dest_dir, 'a.bat') assert os.path.exists(dest_file1) assert filetype.is_readable(dest_file1) dest_file2 = join(dest_dir, 'subtxt', 'a.txt') assert os.path.exists(dest_file2) assert filetype.is_readable(dest_file2) finally: fileutils.chmod(src, fileutils.RW, recurse=True) fileutils.chmod(dst, fileutils.RW, recurse=True)
def test_copytree_does_not_keep_non_writable_permissions(self): src = self.get_test_loc('fileutils/exec', copy=True) dst = self.get_temp_dir() try: src_file = join(src, 'subtxt/a.txt') make_non_writable(src_file) assert not filetype.is_writable(src_file) src_dir = join(src, 'subtxt') make_non_writable(src_dir) if on_posix: assert not filetype.is_writable(src_dir) # copy proper dest_dir = join(dst, 'dest') fileutils.copytree(src, dest_dir) dst_file = join(dest_dir, 'subtxt/a.txt') assert os.path.exists(dst_file) assert filetype.is_writable(dst_file) dest_dir2 = join(dest_dir, 'subtxt') assert os.path.exists(dest_dir2) assert filetype.is_writable(dest_dir) finally: fileutils.chmod(src, fileutils.RW, recurse=True) fileutils.chmod(dst, fileutils.RW, recurse=True)
def create_html_app(output_file, results, version, scanned_path): # NOQA """ Given an html-app output_file, generate that file, create the data.js data file from the results and create the corresponding `_files` directory and copy the data and assets to this directory. The target directory is deleted if it exists. Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or HtmlAppAssetCopyError if the copy was not possible. """ try: if is_stdout(output_file): raise HtmlAppAssetCopyWarning() source_assets_dir = join(TEMPLATES_DIR, 'html-app', 'assets') # Return a tuple of (parent_dir, dir_name) directory named after the # `output_location` output_locationfile_base_name (stripped from extension) and # a `_files` suffix Return empty strings if output is to stdout. output_location = output_file.name tgt_root_path = dirname(output_location) tgt_assets_dir = file_base_name(output_location) + '_files' # delete old assets target_assets_dir = join(tgt_root_path, tgt_assets_dir) if exists(target_assets_dir): delete(target_assets_dir) # copy assets copytree(source_assets_dir, target_assets_dir) template = get_template( join(TEMPLATES_DIR, 'html-app', 'template.html')) rendered_html = template.render(assets_dir=target_assets_dir, scanned_path=scanned_path, version=version) output_file.write(rendered_html) # create help file help_template = get_template( join(TEMPLATES_DIR, 'html-app', 'help_template.html')) rendered_help = help_template.render(main_app=output_location) with io.open(join(target_assets_dir, 'help.html'), 'w', encoding='utf-8') as f: f.write(rendered_help) # FIXME: this should a regular JSON scan format with io.open(join(target_assets_dir, 'data.js'), 'w') as f: f.write('data=') json.dump(list(results), f) except HtmlAppAssetCopyWarning as w: raise w except Exception as e: # NOQA import traceback msg = 'ERROR: cannot create HTML application.\n' + traceback.format_exc( ) raise HtmlAppAssetCopyError(msg)
def try_to_extract(location, target_dir, extractor): """ Extract archive at `location` to `target_dir` trying the `extractor` function. If extract fails, just return without returning warnings nor raising exceptions. Note: there are a few cases where we want to attempt extracting something but do not care if this fails. """ abs_location = os.path.abspath(os.path.expanduser(location)) abs_target_dir = compat.unicode( os.path.abspath(os.path.expanduser(target_dir))) temp_target = compat.unicode( fileutils.get_temp_dir(prefix='extractcode-extract1-')) warnings = [] try: warnings = extractor(abs_location, temp_target) if TRACE: logger.debug('try_to_extract: temp_target: %(temp_target)r' % locals()) fileutils.copytree(temp_target, abs_target_dir) except: return warnings finally: fileutils.delete(temp_target) return warnings
def extract_file(location, target, kinds=extractcode.default_kinds, verbose=False): """ Extract a single archive at `location` in the `target` directory if it is of a kind supported in the `kinds` kind tuple. """ warnings = [] errors = [] extractor = archive.get_extractor(location, kinds) if TRACE: logger.debug('extract_file: extractor: for: %(location)r with kinds: %(kinds)r : ' % locals() + getattr(extractor, '__module__', '') + '.' + getattr(extractor, '__name__', '')) if extractor: yield ExtractEvent(location, target, done=False, warnings=[], errors=[]) try: # extract first to a temp directory: if there is an error, the # extracted files will not be moved to target tmp_tgt = fileutils.get_temp_dir(prefix='scancode-extract-') abs_location = abspath(expanduser(location)) warns = extractor(abs_location, tmp_tgt) or [] warnings.extend(warns) fileutils.copytree(tmp_tgt, target) fileutils.delete(tmp_tgt) except Exception as e: errors = [str(e).strip(' \'"')] if verbose: errors.append(traceback.format_exc()) if TRACE: tb = traceback.format_exc() logger.debug('extract_file: ERROR: %(location)r: %(errors)r\n%(e)r\n%(tb)s' % locals()) finally: yield ExtractEvent(location, target, done=True, warnings=warnings, errors=errors)
def get_licenses(self, scancode_licenses): """ Return a mapping of key -> ScanCode License objects either fetched externally or loaded from the existing `self.original_dir` """ print('Fetching and storing external licenses in:', self.original_dir) licenses = [] for lic, text in self.fetch_licenses(scancode_licenses): try: with io.open(lic.text_file, 'w', encoding='utf-8') as tf: tf.write(text) lic.dump() licenses.append(lic) except: if TRACE: print() print(repr(lic)) raise print('Stored %d external licenses in: %r.' % ( len(licenses), self.original_dir, )) print('Modified (or not modified) external licenses will be in: %r.' % (self.update_dir, )) fileutils.copytree(self.original_dir, self.update_dir) print('New external licenses will be in: %r.' % (self.new_dir, )) return load_licenses(self.update_dir, with_deprecated=True)
def extract_file(location, target, kinds=extractcode.default_kinds): """ Extract a single archive at `location` in the `target` directory if it is of a kind supported in the `kinds` kind tuple. """ warnings = [] errors = [] extractor = archive.get_extractor(location, kinds) if DEBUG: logger.debug( 'extract_file: extractor: for: %(location)r with kinds: r(kinds)r : ' % locals() + getattr(extractor, '__module__', '') + '.' + getattr(extractor, '__name__', '')) if extractor: yield ExtractEvent(location, target, done=False, warnings=[], errors=[]) try: # extract first to a temp directory. # if there is an error, the extracted files will not be moved # to target tmp_tgt = fileutils.get_temp_dir('extract') abs_location = abspath(expanduser(location)) warnings.extend(extractor(abs_location, tmp_tgt)) fileutils.copytree(tmp_tgt, target) fileutils.delete(tmp_tgt) except Exception, e: if DEBUG: logger.debug( 'extract_file: ERROR: %(location)r: %(errors)r, %(e)r.\n' % locals()) errors = [str(e).strip(' \'"')] finally:
def create_html_app_assets(results, output_file): """ Given an html-app output_file, create the corresponding `_files` directory and copy the assets to this directory. The target directory is deleted if it exists. Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or HtmlAppAssetCopyError if the copy was not possible. """ try: if is_stdout(output_file): raise HtmlAppAssetCopyWarning() assets_dir = join(get_template_dir('html-app'), 'assets') tgt_dirs = get_html_app_files_dirs(output_file) target_dir = join(*tgt_dirs) if exists(target_dir): fileutils.delete(target_dir) fileutils.copytree(assets_dir, target_dir) # write json data import json root_path, assets_dir = get_html_app_files_dirs(output_file) with open(join(root_path, assets_dir, 'data.json'), 'w') as f: f.write('data=' + json.dumps(results)) except HtmlAppAssetCopyWarning, w: raise w
def get_test_loc(self, test_path, copy=False, debug=False): """ Given a `test_path` relative to the self.test_data_dir directory, return the location to a test file or directory for this path. Copy to a temp test location if `copy` is True. """ if debug: import inspect caller = inspect.stack()[1][3] print('\nself.get_test_loc,%(caller)s,"%(test_path)s"' % locals()) test_loc = get_test_loc(test_path, self.test_data_dir, debug=debug) if copy: base_name = os.path.basename(test_loc) if filetype.is_file(test_loc): # target must be an existing dir target_dir = self.get_temp_dir() fileutils.copyfile(test_loc, target_dir) test_loc = os.path.join(target_dir, base_name) else: # target must be a NON existing dir target_dir = os.path.join(self.get_temp_dir(), base_name) fileutils.copytree(test_loc, target_dir) # cleanup of VCS that could be left over from checkouts self.remove_vcs(target_dir) test_loc = target_dir return test_loc
def create_html_app_assets(results, output_file): """ Given an html-app output_file, create the corresponding `_files` directory and copy the assets to this directory. The target directory is deleted if it exists. Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or HtmlAppAssetCopyError if the copy was not possible. """ try: if is_stdout(output_file): raise HtmlAppAssetCopyWarning() assets_dir = join(get_template_dir('html-app'), 'assets') # delete old assets tgt_dirs = get_html_app_files_dirs(output_file) target_dir = join(*tgt_dirs) if exists(target_dir): fileutils.delete(target_dir) # copy assets fileutils.copytree(assets_dir, target_dir) # write json data root_path, assets_dir = get_html_app_files_dirs(output_file) with codecs.open(join(root_path, assets_dir, 'data.json'), 'wb', encoding='utf-8') as f: f.write('data=') json.dump(results, f, iterable_as_array=True) # create help file with codecs.open(join(root_path, assets_dir, 'help.html'), 'wb', encoding='utf-8') as f: f.write(get_html_app_help(basename(output_file.name))) except HtmlAppAssetCopyWarning, w: raise w
def extract_with_fallback(location, target_dir, extractor1, extractor2): """ Extract archive at `location` to `target_dir` trying first `extractor1` function. If extract fails, attempt extraction again with the `extractor2` function. Return a list of warning messages. Raise exceptions on errors. Note: there are a few cases where the primary extractor for a type may fail and a secondary extractor will succeed. """ abs_location = os.path.abspath(os.path.expanduser(location)) abs_target_dir = unicode(os.path.abspath(os.path.expanduser(target_dir))) # attempt extract first to a temp dir temp_target1 = unicode(fileutils.get_temp_dir('extract1')) try: warnings = extractor1(abs_location, temp_target1) if TRACE: logger.debug('extract_with_fallback: temp_target1: %(temp_target1)r' % locals()) fileutils.copytree(temp_target1, abs_target_dir) except: try: temp_target2 = unicode(fileutils.get_temp_dir('extract2')) warnings = extractor2(abs_location, temp_target2) if TRACE: logger.debug('extract_with_fallback: temp_target2: %(temp_target2)r' % locals()) fileutils.copytree(temp_target2, abs_target_dir) finally: fileutils.delete(temp_target2) finally: fileutils.delete(temp_target1) return warnings
def test_copytree_does_not_copy_fifo(self): # Windows does not support pipes src = self.get_test_loc('fileutils/filetype', copy=True) dest = self.get_temp_dir() src_file = join(src, 'myfifo') os.mkfifo(src_file) # @UndefinedVariable dest_dir = join(dest, 'dest') fileutils.copytree(src, dest_dir) assert not os.path.exists(join(dest_dir, 'myfifo'))
def extract( location, kinds=extractcode.default_kinds, recurse=False, replace_originals=False, ignore_pattern=(), ): """ Walk and extract any archives found at `location` (either a file or directory). Extract only archives of a kind listed in the `kinds` kind tuple. Return an iterable of ExtractEvent tuples for each extracted archive. This can be used to track extraction progress: - one event is emitted just before extracting an archive. The ExtractEvent warnings and errors are empty. The `done` flag is False. - one event is emitted right after extracting an archive. The ExtractEvent warnings and errors contains warnings and errors if any. The `done` flag is True. If `recurse` is True, extract recursively archives nested inside other archives. If `recurse` is false, then do not extract further an already extracted archive identified by the corresponding extract suffix location. If `replace_originals` is True, the extracted archives are replaced by the extracted content. Note that while the original file system is walked top-down, breadth-first, if recurse and a nested archive is found, it is extracted to full depth first before resuming the file system walk. """ processed_events = [] processed_events_append = processed_events.append for event in extract_files(location, kinds, recurse, ignore_pattern): yield event if replace_originals: processed_events_append(event) # move files around if replace_originals: for xevent in reversed(processed_events): if xevent.done: source = xevent.source target = xevent.target if TRACE: logger.debug( 'extract:replace_originals: replace %(source)r by %(target)r' % locals()) fileutils.delete(source) fileutils.copytree(target, source) fileutils.delete(target)
def get_licenses(self): """ Return a mapping of key -> ScanCode License objects either fetched externally or loaded from the existing `self.src_dir` """ if self.fetched: print('Reusing (possibly modified) external licenses stored in:', self.update_dir) return load_licenses(self.update_dir, with_deprecated=True) else: print('Fetching and storing external licenses in:', self.src_dir) licenses = {l.key: l for l in self.fetch_licenses()} print('Stored %d external licenses in: %r.' % (len(licenses), self.src_dir,)) fileutils.copytree(self.src_dir, self.update_dir) print('Modified external licenses will be in: %r.' % (self.update_dir,)) print('New external licenses will be in: %r.' % (self.new_dir,)) print('Deleted external licenses will be in: %r.' % (self.del_dir,)) return load_licenses(self.update_dir, with_deprecated=True)
def create_html_app_assets(output_file): """ Given an html-app output_file, create the corresponding `_files` directory and copy the assets to this directory. The target directory is deleted if it exists. Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or HtmlAppAssetCopyError if the copy was not possible. """ try: assets_dir = join(get_template_dir("html-app"), "assets") tgt_dirs = get_html_app_files_dirs(output_file) if not tgt_dirs: raise HtmlAppAssetCopyWarning() target_dir = join(*tgt_dirs) if exists(target_dir): fileutils.delete(target_dir) fileutils.copytree(assets_dir, target_dir) except HtmlAppAssetCopyWarning, w: raise w
def create_html_app_assets(output_file): """ Given an html-app output_file, create the corresponding `_files` directory and copy the assets to this directory. The target directory is deleted if it exists. Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or HtmlAppAssetCopyError if the copy was not possible. """ try: assets_dir = join(get_template_dir('html-app'), 'assets') tgt_dirs = get_html_app_files_dirs(output_file) if not tgt_dirs: raise HtmlAppAssetCopyWarning() target_dir = join(*tgt_dirs) if exists(target_dir): fileutils.delete(target_dir) fileutils.copytree(assets_dir, target_dir) except HtmlAppAssetCopyWarning, w: raise w
def get_test_loc(self, test_path, copy=False): """ Given a `test_path` relative to the self.test_data_dir directory, return the location to a test file or directory for this path. Copy to a temp test location if `copy` is True. """ test_loc = get_test_loc(test_path, self.test_data_dir) if copy: base_name = os.path.basename(test_loc) if filetype.is_file(test_loc): # target must be an existing dir target_dir = self.get_temp_dir() fileutils.copyfile(test_loc, target_dir) test_loc = os.path.join(target_dir, base_name) else: # target must be a NON existing dir target_dir = os.path.join(self.get_temp_dir(), base_name) fileutils.copytree(test_loc, target_dir) # cleanup of VCS that could be left over from checkouts self.remove_vcs(target_dir) test_loc = target_dir return test_loc
def try_to_extract(location, target_dir, extractor): """ Extract archive at `location` to `target_dir` trying the `extractor` function. If extract fails, just return without returning warnings nor raising exceptions. Note: there are a few cases where we want to attempt extracting something but do not care if this fails. """ abs_location = os.path.abspath(os.path.expanduser(location)) abs_target_dir = unicode(os.path.abspath(os.path.expanduser(target_dir))) temp_target = unicode(fileutils.get_temp_dir('extract1')) warnings = [] try: warnings = extractor(abs_location, temp_target) if TRACE: logger.debug('try_to_extract: temp_target: %(temp_target)r' % locals()) fileutils.copytree(temp_target, abs_target_dir) except: return warnings finally: fileutils.delete(temp_target) return warnings
def get_licenses(self): """ Return a mapping of key -> ScanCode License objects either fetched externally or loaded from the existing `self.src_dir` """ if self.fetched: print('Reusing (possibly modified) external licenses stored in:', self.update_dir) return load_licenses(self.update_dir, with_deprecated=True) else: print('Fetching and storing external licenses in:', self.src_dir) licenses = {l.key: l for l in self.fetch_licenses()} print('Stored %d external licenses in: %r.' % ( len(licenses), self.src_dir, )) fileutils.copytree(self.src_dir, self.update_dir) print('Modified external licenses will be in: %r.' % (self.update_dir, )) print('New external licenses will be in: %r.' % (self.new_dir, )) print('Deleted external licenses will be in: %r.' % (self.del_dir, )) return load_licenses(self.update_dir, with_deprecated=True)
def rebuild_rootfs(img, target_dir): """ Extract and merge or "squash" all layers of the `image` Image in a single rootfs in `target_dir`. Extraction is done in sequence from the bottom (root or initial) layer to the top (or latest) layer and the "whiteouts" unionfs/overlayfs procedure is applied at each step as per the OCI spec: https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts Return a list of deleted "whiteout" files. Raise an Exception on errrors. The extraction process consists of these steps: - extract the layer in a temp directory - find whiteouts in that layer temp dir - remove files/directories corresponding to these whiteouts in the target directory - remove whiteouts special marker files or dirs in the tempdirectory - move layer to the target directory, overwriting existing files See also some related implementations and links: https://github.com/moby/moby/blob/d1f470946/pkg/archive/whiteouts.go https://github.com/virt-manager/virt-bootstrap/blob/8a7e752d/src/virtBootstrap/whiteout.py https://github.com/goldmann/docker-squash https://github.com/moby/moby/blob/master/image/spec/v1.md https://github.com/moby/moby/blob/master/image/spec/v1.1.md https://github.com/moby/moby/blob/master/image/spec/v1.2.md """ assert os.path.isdir(target_dir) # log deletions deletions = [] for layer_num, layer in enumerate(img.layers): if TRACE: logger.debug(f'Extracting layer {layer_num} - {layer.layer_id} ' f'tarball: {layer.archive_location}') # 1. extract a layer to temp. # Note that we are not preserving any special file and any file permission extracted_loc = tempfile.mkdtemp('container_inspector-docker') layer.extract(extracted_location=extracted_loc) if TRACE: logger.debug(f' Extracted layer to: {extracted_loc}') # 2. find whiteouts in that layer. whiteouts = list(find_whiteouts(extracted_loc)) if TRACE: logger.debug( ' Merging extracted layers and applying unionfs whiteouts') if TRACE: logger.debug(' Whiteouts:\n' + ' \n'.join(map(repr, whiteouts))) # 3. remove whiteouts in the previous layer stack (e.g. the WIP rootfs) for whiteout_marker_loc, whiteable_path in whiteouts: if TRACE: logger.debug( f' Deleting dir or file with whiteout marker: {whiteout_marker_loc}' ) whiteable_loc = os.path.join(target_dir, whiteable_path) delete(whiteable_loc) # also delete the whiteout marker file delete(whiteout_marker_loc) deletions.append(whiteable_loc) # 4. finall copy/overwrite the extracted layer over the WIP rootfs if TRACE: logger.debug( f' Moving extracted layer from: {extracted_loc} to: {target_dir}' ) copytree(extracted_loc, target_dir) if TRACE: logger.debug(f' Moved layer to: {target_dir}') delete(extracted_loc) return deletions
def extract_file_by_file(location, target_dir, arch_type='*', skip_symlinks=True): """ Extract all files using a one-by-one process from a 7zip-supported archive file at location in the `target_dir` directory. Return a list of warning messages if any or an empty list. Raise exception on errors. `arch_type` is the type of 7zip archive passed to the -t 7zip option. Can be None. """ abs_location = os.path.abspath(os.path.expanduser(location)) abs_target_dir = os.path.abspath(os.path.expanduser(target_dir)) entries, errors_msgs = list_entries(location, arch_type) entries = list(entries) # Determine if we need a one-by-one approach: technically the aproach is to # check if we have files that are in the same dir and have the same name # when the case is ignored. We take a simpler approach: we check if all # paths are unique when we ignore the case: for that we only check that the # length of two paths sets are the same: one set as-is and the other # lowercased. paths_as_is = set(e.path for e in entries) paths_no_case = set(p.lower() for p in paths_as_is) need_by_file = len(paths_as_is) != len(paths_no_case) if not need_by_file: # use regular extract return extract_all_files_at_once(location=location, target_dir=target_dir, arch_type=arch_type) # now we are extracting one file at a time. this is a tad painful because we # are dealing with a full command execution at each time. errors = {} warnings = {} tmp_dir = fileutils.get_temp_dir(prefix='extractcode-extract-') for i, entry in enumerate(entries): if not entry.is_file: continue tmp_extract_dir = os.path.join(tmp_dir, str(i)) fileutils.create_dir(tmp_extract_dir) ex_args = build_7z_extract_command( location=location, target_dir=tmp_extract_dir, single_entry=entry, arch_type=arch_type, ) rc, stdout, stderr = command.execute2(**ex_args) error = get_7z_errors(stdout, stderr) if error or rc != 0: error = error or UNKNOWN_ERROR if TRACE: logger.debug( 'extract: failure: {rc}\n' 'stderr: {stderr}\nstdout: {stdout}'.format(**locals())) errors[entry.path] = error continue # these are all for a single file path warns = get_7z_warnings(stdout) or {} wmsg = '\n'.join(warns.values()) if wmsg: if entry.path in warnings: warnings[entry.path] += '\n' + wmsg else: warnings[entry.path] = wmsg # finally move that extracted file to its target location, possibly renamed source_file_name = fileutils.file_name(entry.path) source_file_loc = os.path.join(tmp_extract_dir, source_file_name) if not os.path.exists(source_file_loc): if entry.path in errors: errors[entry.path] += '\nNo file name extracted.' else: errors[entry.path] = 'No file name extracted.' continue safe_path = paths.safe_path(entry.path, posix=True) target_file_loc = os.path.join(target_dir, safe_path) target_file_dir = os.path.dirname(target_file_loc) fileutils.create_dir(target_file_dir) unique_target_file_loc = extractcode.new_name(target_file_loc, is_dir=False) if TRACE: logger.debug( 'extract: unique_target_file_loc: from {} to {}'.format( target_file_loc, unique_target_file_loc)) if os.path.isfile(source_file_loc): fileutils.copyfile(source_file_loc, unique_target_file_loc) else: fileutils.copytree(source_file_loc, unique_target_file_loc) extractcode.remove_backslashes_and_dotdots(abs_target_dir) if errors: raise ExtractErrorFailedToExtract(errors) return convert_warnings_to_list(warnings)
def rebuild_rootfs(image, target_dir, layerid_len=DEFAULT_ID_LEN): """ Extract and merge all layers to target_dir. Extraction is done in sequence from bottom (root) to top (latest layer). Return a mapping of errors and a list of whiteouts/deleted files. The extraction process consists of these steps: - extract the layer in a temp directory - move layer to the target directory, overwriting existing files - if any, remove AUFS special files/dirs in the target directory - if any, remove whiteouts file/directory pairs in the target directory """ from extractcode.extract import extract_file assert filetype.is_dir(target_dir) assert os.path.exists(target_dir) extract_errors = [] # log whiteouts deletions whiteouts = [] for layer_id, layer in image.layers.items(): layer_tarball = join(image.repo_dir, layer_id[:layerid_len], LAYER_TAR_FILE) logger.debug('Extracting layer tarball: %(layer_tarball)r' % locals()) temp_target = fileutils.get_temp_dir('conan-docker') xevents = list(extract_file(layer_tarball, temp_target)) for x in xevents: if x.warnings or x.errors: extract_errors.extend(xevents) # FIXME: the order of ops is WRONG: we are getting whiteouts incorrectly # it should be: # 1. extract a layer to temp. # 2. find whiteouts in that layer. # 3. remove whiteouts in the previous layer stack (e.g. the WIP rootfs) # 4. finall copy the extracted layer over the WIP rootfs # move extracted layer to target_dir logger.debug( 'Moving extracted layer from: %(temp_target)r to: %(target_dir)r') fileutils.copytree(temp_target, target_dir) fileutils.delete(temp_target) logger.debug( 'Merging extracted layers and applying AUFS whiteouts/deletes') for top, dirs, files in fileutils.walk(target_dir): # delete AUFS dirs and apply whiteout deletions for dr in dirs[:]: whiteable_dir = join(top, dr) if dr.startswith(WHITEOUT_PREFIX): # delete the .wh. dir... dirs.remove(dr) logger.debug('Deleting whiteout dir: %(whiteable_dir)r' % locals()) fileutils.delete(whiteable_dir) # ... and delete the corresponding dir it does "whiteout" base_dir = dr[len(WHITEOUT_PREFIX):] try: dirs.remove(base_dir) except ValueError: # FIXME: should we really raise an exception here? msg = ('Inconsistent layers: ' 'missing directory to whiteout: %(base_dir)r' % locals()) raise InconsistentLayersError(msg) wdo = join(top, base_dir) logger.debug('Deleting real dir: %(wdo)r' % locals()) fileutils.delete(wdo) whiteouts.append(wdo) # delete AUFS special dirs elif dr.startswith(WHITEOUT_SPECIAL_DIR_PREFIX): dirs.remove(dr) logger.debug( 'Deleting AUFS special dir: %(whiteable_dir)r' % locals()) fileutils.delete(whiteable_dir) # delete AUFS files and apply whiteout deletions all_files = set(files) for fl in all_files: whiteable_file = join(top, fl) if fl.startswith(WHITEOUT_PREFIX): # delete the .wh. marker file... logger.debug('Deleting whiteout file: %(whiteable_file)r' % locals()) fileutils.delete(whiteable_file) # ... and delete the corresponding file it does "whiteout" # e.g. logically delete base_file = fl[len(WHITEOUT_PREFIX):] wfo = join(top, base_file) whiteouts.append(wfo) if base_file in all_files: logger.debug('Deleting real file: %(wfo)r' % locals()) fileutils.delete(wfo) # delete AUFS special files elif fl.startswith(WHITEOUT_SPECIAL_DIR_PREFIX): logger.debug( 'Deleting AUFS special file: %(whiteable_file)r' % locals()) fileutils.delete(whiteable_file) whiteouts.append(whiteable_file) return extract_errors, whiteouts