def test_fileutils_walk_can_walk_a_single_file(self): test_file = self.get_test_loc('fileutils/walk/f') result = list(fileutils.walk(test_file)) expected = [ (fileutils.parent_directory(test_file), [], ['f']) ] assert expected == result
def test_fileutils_walk_can_walk_an_empty_dir(self): test_dir = self.get_temp_dir() result = list(fileutils.walk(test_dir)) expected = [ (test_dir, [], []) ] assert expected == result
def test_fileutils_walk_with_unicode_path(self): test_dir = self.extract_test_zip("fileutils/walk/unicode.zip") test_dir = join(test_dir, "unicode") test_dir = unicode(test_dir) result = list(fileutils.walk(test_dir)) expected = [(unicode(test_dir), ["a"], [u"2.csv"]), (unicode(test_dir) + sep + "a", [], [u"gru\u0308n.png"])] assert expected == result
def test_walk_can_walk_non_utf8_path_from_unicode_path(self): test_dir = self.extract_test_tar_raw('fileutils/walk_non_utf8/non_unicode.tgz') test_dir = join(test_dir, 'non_unicode') if not on_linux: test_dir = unicode(test_dir) result = list(fileutils.walk(test_dir))[0] _dirpath, _dirnames, filenames = result assert 18 == len(filenames)
def test_fileutils_walk_with_unicode_path(self): test_dir = self.extract_test_zip('fileutils/walk/unicode.zip') test_dir = join(test_dir, 'unicode') test_dir = unicode(test_dir) result = list(fileutils.walk(test_dir)) expected = [(unicode(test_dir), ['a'], [u'2.csv']), (unicode(test_dir) + sep + 'a', [], [u'gru\u0308n.png'])] assert expected == result
def test_fileutils_walk(self): test_dir = self.get_test_loc('fileutils/walk') base = self.get_test_loc('fileutils') result = [(as_posixpath(t.replace(base, '')), d, sorted(f),) for t, d, f in fileutils.walk(test_dir)] expected = [ ('/walk', ['d1'], ['f', 'unicode.zip']), ('/walk/d1', ['d2'], ['f1']), ('/walk/d1/d2', ['d3'], ['f2']), ('/walk/d1/d2/d3', [], ['f3']) ] assert expected == result
def collect_dockerfiles(location, echo=print): """ Collect all Dockerfiles in a directory tree. Return a map of location -> Dockerfile data """ dfiles = {} for top, dirs, files in fileutils.walk(location): for f in files: dfiles.update(get_dockerfile(join(top, f), echo)) logger.debug('collect_dockerfiles: %(dfiles)r' % locals()) return dfiles
def test_fileutils_walk(self): test_dir = self.get_test_loc("fileutils/walk") base = self.get_test_loc("fileutils") result = [(as_posixpath(t.replace(base, "")), d, f) for t, d, f in fileutils.walk(test_dir)] expected = [ ("/walk", ["d1"], ["f", "unicode.zip"]), ("/walk/d1", ["d2"], ["f1"]), ("/walk/d1/d2", ["d3"], ["f2"]), ("/walk/d1/d2/d3", [], ["f3"]), ] assert expected == result
def test_fileutils_walk_with_unicode_path(self): test_dir = self.extract_test_zip('fileutils/walk/unicode.zip') test_dir = join(test_dir, 'unicode') if on_linux: test_dir = unicode(test_dir) result = list(x[-1] for x in fileutils.walk(test_dir)) if on_linux: expected = [['2.csv'], ['gru\xcc\x88n.png']] else: expected = [[u'2.csv'], [u'gru\u0308n.png']] assert expected == result
def collect_images_v10(location, echo=print, layerid_len=DEFAULT_ID_LEN): """ Collect all images in a directory tree. Return a map of location -> Image """ images = {} for top, dirs, files in fileutils.walk(location): image = get_image_v10(top, echo, layerid_len=layerid_len) logger.debug('collect_images_v10: image: %(image)r' % locals()) images.update(image) for d in dirs: image = get_image_v10(join(top, d), echo, layerid_len=layerid_len) logger.debug('collect_images_v10: image: %(image)r' % locals()) images.update(image) logger.debug('collect_images_v10: images: %(images)r' % locals()) return images
def extract(location, kinds=extractcode.default_kinds, recurse=False): """ Walk and extract any archives found at `location` (either a file or directory). Extract only archives of a kind listed in the `kinds` kind tuple. Return an iterable of ExtractEvent tuples for each extracted archive. This can be used to track extraction progress: - one event is emitted just before extracting an archive. The ExtractEvent warnings and errors are empty. The `done` flag is False. - one event is emitted right after extracting an archive. The ExtractEvent warnings and errors contains warnings and errors if any. The `done` flag is True. If `recurse` is True, extract recursively archives nested inside other archives If `recurse` is false, then do not extract further an already extracted archive identified by the corresponding extract suffix location. Note that while the original file system is walked top-down, breadth-first, if recurse and a nested archive is found, it is extracted to full depth first before resuming the file system walk. """ ignored = partial(ignore.is_ignored, ignores=ignore.default_ignores, unignores={}) if TRACE: logger.debug('extract:start: %(location)r recurse: %(recurse)r\n' % locals()) abs_location = abspath(expanduser(location)) for top, dirs, files in fileutils.walk(abs_location, ignored): if TRACE: logger.debug( 'extract:walk: top: %(top)r dirs: %(dirs)r files: r(files)r' % locals()) if not recurse: if TRACE: drs = set(dirs) for d in dirs[:]: if extractcode.is_extraction_path(d): dirs.remove(d) if TRACE: logger.debug('extract:walk: not recurse: removed dirs:' + repr(drs.symmetric_difference(set(dirs)))) for f in files: loc = join(top, f) if not recurse and extractcode.is_extraction_path(loc): if TRACE: logger.debug( 'extract:walk not recurse: skipped file: %(loc)r' % locals()) continue if not archive.should_extract(loc, kinds): if TRACE: logger.debug( 'extract:walk: skipped file: not should_extract: %(loc)r' % locals()) continue target = join(abspath(top), extractcode.get_extraction_path(loc)) if TRACE: logger.debug('extract:target: %(target)r' % locals()) for xevent in extract_file(loc, target, kinds): if TRACE: logger.debug('extract:walk:extraction event: %(xevent)r' % locals()) yield xevent if recurse: if TRACE: logger.debug( 'extract:walk: recursing on target: %(target)r' % locals()) for xevent in extract(target, kinds, recurse): if TRACE: logger.debug( 'extract:walk:recurse:extraction event: %(xevent)r' % locals()) yield xevent
def extract_files( location, kinds=extractcode.default_kinds, recurse=False, ignore_pattern=(), ): """ Extract the files found at `location`. Extract only archives of a kind listed in the `kinds` kind tuple. If `recurse` is True, extract recursively archives nested inside other archives. If `recurse` is false, then do not extract further an already extracted archive identified by the corresponding extract suffix location. """ ignored = partial(ignore.is_ignored, ignores=ignore.default_ignores, unignores={}) if TRACE: logger.debug('extract:start: %(location)r recurse: %(recurse)r\n' % locals()) abs_location = abspath(expanduser(location)) for top, dirs, files in fileutils.walk(abs_location, ignored): if TRACE: logger.debug( 'extract:walk: top: %(top)r dirs: %(dirs)r files: r(files)r' % locals()) if not recurse: if TRACE: drs = set(dirs) for d in dirs[:]: if extractcode.is_extraction_path(d): dirs.remove(d) if TRACE: logger.debug('extract:walk: not recurse: removed dirs:' + repr(drs.symmetric_difference(set(dirs)))) for f in files: loc = join(top, f) if not recurse and extractcode.is_extraction_path(loc): if TRACE: logger.debug( 'extract:walk not recurse: skipped file: %(loc)r' % locals()) continue if not archive.should_extract(loc, kinds, ignore_pattern): if TRACE: logger.debug( 'extract:walk: skipped file: not should_extract: %(loc)r' % locals()) continue target = join(abspath(top), extractcode.get_extraction_path(loc)) if TRACE: logger.debug('extract:target: %(target)r' % locals()) # extract proper for xevent in extract_file(loc, target, kinds): if TRACE: logger.debug('extract:walk:extraction event: %(xevent)r' % locals()) yield xevent if recurse: if TRACE: logger.debug( 'extract:walk: recursing on target: %(target)r' % locals()) for xevent in extract( location=target, kinds=kinds, recurse=recurse, ignore_pattern=ignore_pattern, ): if TRACE: logger.debug( 'extract:walk:recurse:extraction event: %(xevent)r' % locals()) yield xevent
def extract(location, kinds=extractcode.default_kinds, recurse=False): """ Walk and extract any archives found at `location` (either a file or directory). Extract only archives of a kind listed in the `kinds` kind tuple. Return an iterable of ExtractEvent tuples for each extracted archive. This can be used to track extraction progress: - one event is emitted just before extracting an archive. The ExtractEvent warnings and errors are empty. The `done` flag is False. - one event is emitted right after extracting an archive. The ExtractEvent warnings and errors contains warnings and errors if any. The `done` flag is True. If `recurse` is True, extract recursively archives nested inside other archives If `recurse` is false, then do not extract further an already extracted archive identified by the corresponding extract suffix location. Note that while the original file system is walked top-down, breadth-first, if recurse and a nested archive is found, it is extracted to full depth first before resuming the file system walk. """ ignored = partial(ignore.is_ignored, ignores=ignore.default_ignores, unignores={}) if TRACE: logger.debug('extract:start: %(location)r recurse: %(recurse)r\n' % locals()) abs_location = abspath(expanduser(location)) for top, dirs, files in fileutils.walk(abs_location, ignored): if TRACE: logger.debug('extract:walk: top: %(top)r dirs: %(dirs)r files: r(files)r' % locals()) if not recurse: if TRACE: drs = set(dirs) for d in dirs[:]: if extractcode.is_extraction_path(d): dirs.remove(d) if TRACE: logger.debug('extract:walk: not recurse: removed dirs:' + repr(drs.symmetric_difference(set(dirs)))) for f in files: loc = join(top, f) if not recurse and extractcode.is_extraction_path(loc): if TRACE: logger.debug('extract:walk not recurse: skipped file: %(loc)r' % locals()) continue if not archive.should_extract(loc, kinds): if TRACE: logger.debug('extract:walk: skipped file: not should_extract: %(loc)r' % locals()) continue target = join(abspath(top), extractcode.get_extraction_path(loc)) if TRACE: logger.debug('extract:target: %(target)r' % locals()) for xevent in extract_file(loc, target, kinds): if TRACE: logger.debug('extract:walk:extraction event: %(xevent)r' % locals()) yield xevent if recurse: if TRACE: logger.debug('extract:walk: recursing on target: %(target)r' % locals()) for xevent in extract(target, kinds, recurse): if TRACE: logger.debug('extract:walk:recurse:extraction event: %(xevent)r' % locals()) yield xevent
def rebuild_rootfs(image, target_dir, layerid_len=DEFAULT_ID_LEN): """ Extract and merge all layers to target_dir. Extraction is done in sequence from bottom (root) to top (latest layer). Return a mapping of errors and a list of whiteouts/deleted files. The extraction process consists of these steps: - extract the layer in a temp directory - move layer to the target directory, overwriting existing files - if any, remove AUFS special files/dirs in the target directory - if any, remove whiteouts file/directory pairs in the target directory """ from extractcode.extract import extract_file assert filetype.is_dir(target_dir) assert os.path.exists(target_dir) extract_errors = [] # log whiteouts deletions whiteouts = [] for layer_id, layer in image.layers.items(): layer_tarball = join(image.repo_dir, layer_id[:layerid_len], LAYER_TAR_FILE) logger.debug('Extracting layer tarball: %(layer_tarball)r' % locals()) temp_target = fileutils.get_temp_dir('conan-docker') xevents = list(extract_file(layer_tarball, temp_target)) for x in xevents: if x.warnings or x.errors: extract_errors.extend(xevents) # FIXME: the order of ops is WRONG: we are getting whiteouts incorrectly # it should be: # 1. extract a layer to temp. # 2. find whiteouts in that layer. # 3. remove whiteouts in the previous layer stack (e.g. the WIP rootfs) # 4. finall copy the extracted layer over the WIP rootfs # move extracted layer to target_dir logger.debug( 'Moving extracted layer from: %(temp_target)r to: %(target_dir)r') fileutils.copytree(temp_target, target_dir) fileutils.delete(temp_target) logger.debug( 'Merging extracted layers and applying AUFS whiteouts/deletes') for top, dirs, files in fileutils.walk(target_dir): # delete AUFS dirs and apply whiteout deletions for dr in dirs[:]: whiteable_dir = join(top, dr) if dr.startswith(WHITEOUT_PREFIX): # delete the .wh. dir... dirs.remove(dr) logger.debug('Deleting whiteout dir: %(whiteable_dir)r' % locals()) fileutils.delete(whiteable_dir) # ... and delete the corresponding dir it does "whiteout" base_dir = dr[len(WHITEOUT_PREFIX):] try: dirs.remove(base_dir) except ValueError: # FIXME: should we really raise an exception here? msg = ('Inconsistent layers: ' 'missing directory to whiteout: %(base_dir)r' % locals()) raise InconsistentLayersError(msg) wdo = join(top, base_dir) logger.debug('Deleting real dir: %(wdo)r' % locals()) fileutils.delete(wdo) whiteouts.append(wdo) # delete AUFS special dirs elif dr.startswith(WHITEOUT_SPECIAL_DIR_PREFIX): dirs.remove(dr) logger.debug( 'Deleting AUFS special dir: %(whiteable_dir)r' % locals()) fileutils.delete(whiteable_dir) # delete AUFS files and apply whiteout deletions all_files = set(files) for fl in all_files: whiteable_file = join(top, fl) if fl.startswith(WHITEOUT_PREFIX): # delete the .wh. marker file... logger.debug('Deleting whiteout file: %(whiteable_file)r' % locals()) fileutils.delete(whiteable_file) # ... and delete the corresponding file it does "whiteout" # e.g. logically delete base_file = fl[len(WHITEOUT_PREFIX):] wfo = join(top, base_file) whiteouts.append(wfo) if base_file in all_files: logger.debug('Deleting real file: %(wfo)r' % locals()) fileutils.delete(wfo) # delete AUFS special files elif fl.startswith(WHITEOUT_SPECIAL_DIR_PREFIX): logger.debug( 'Deleting AUFS special file: %(whiteable_file)r' % locals()) fileutils.delete(whiteable_file) whiteouts.append(whiteable_file) return extract_errors, whiteouts