def test_extract_file_function(self): test_file = self.get_test_loc('extract/basic_non_nested.tar.gz', copy=True) base = fileutils.parent_directory(test_file) expected = ['a/b/a.txt', 'a/b/b.txt', 'a/c/c.txt'] cleaned_test_file = test_file.replace(base, '') expected_event = [ extract.ExtractEvent( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file), done=False, warnings=[], errors=[] ), extract.ExtractEvent( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file), done=True, warnings=[], errors=[] ) ] target = extractcode.get_extraction_path(test_file) result = list(extract.extract_file(test_file, target)) result = [r._replace( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file)) for r in result] assert expected_event == result check_files(target, expected)
def test_extract_file_function(self): test_file = self.get_test_loc('extract/basic_non_nested.tar.gz', copy=True) base = fileutils.parent_directory(test_file) expected = ['a/b/a.txt', 'a/b/b.txt', 'a/c/c.txt'] cleaned_test_file = test_file.replace(base, '') expected_event = [ extract.ExtractEvent( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file), done=False, warnings=[], errors=[]), extract.ExtractEvent( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file), done=True, warnings=[], errors=[]) ] target = extractcode.get_extraction_path(test_file) result = list(extract.extract_file(test_file, target)) result = [ r._replace( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file)) for r in result ] assert expected_event == result check_files(target, expected)
def test_extract_archive_non_nested(self): test_dir = self.get_test_loc('extract/basic_non_nested.tar.gz', copy=True) expected = ( 'a/b/a.txt', 'a/b/b.txt', 'a/c/c.txt', ) result = extract.extract(test_dir, recurse=False) check_no_error(result) check_files(extractcode.get_extraction_path(test_dir), expected) result = extract.extract(test_dir, recurse=True) check_no_error(result) check_files(extractcode.get_extraction_path(test_dir), expected)
def test_extract_archive_shallow_with_readonly_inside(self): test_file = self.get_test_loc('extract/readonly/read_only.tar.gz', copy=True) """ This test file was created with: import tarfile, time, datetime, StringIO, os TEXT = 'something\n' tar = tarfile.open('read_only.tar.gz', 'w:gz') for i in range(0, 2): tarinfo = tarfile.TarInfo() tarinfo.name = 'somefilename-%i.txt' % i tarinfo.uid = 123 tarinfo.gid = 456 tarinfo.uname = 'johndoe' tarinfo.gname = 'fake' tarinfo.type = tarfile.REGTYPE tarinfo.mode = 0 # this is the readonly part tarinfo.mtime = time.mktime(datetime.datetime.now().timetuple()) file = StringIO.StringIO() file.write(TEXT) file.seek(0) tarinfo.size = len(TEXT) tar.addfile(tarinfo, file) tar.close() """ result = list(extract.extract(test_file, recurse=False)) check_no_error(result) expected = ( 'somefilename-0.txt', 'somefilename-1.txt', ) test_dir = extractcode.get_extraction_path(test_file) check_files(test_dir, expected)
def extract(location, kinds=extractcode.default_kinds, recurse=False): """ Walk and extract any archives found at `location` (either a file or directory). Extract only archives of a kind listed in the `kinds` kind tuple. Return an iterable of ExtractEvent tuples for each extracted archive. This can be used to track extraction progress: - one event is emitted just before extracting an archive. The ExtractEvent warnings and errors are empty. The `done` flag is False. - one event is emitted right after extracting an archive. The ExtractEvent warnings and errors contains warnings and errors if any. The `done` flag is True. If `recurse` is True, extract recursively archives nested inside other archives If `recurse` is false, then do not extract further an already extracted archive identified by the corresponding extract suffix location. Note that while the original file system is walked top-down, breadth-first, if recurse and a nested archive is found, it is extracted to full depth first before resuming the file system walk. """ ignored = partial(ignore.is_ignored, ignores=ignore.default_ignores, unignores={}) if TRACE: logger.debug('extract:start: %(location)r recurse: %(recurse)r\n' % locals()) abs_location = abspath(expanduser(location)) for top, dirs, files in fileutils.walk(abs_location, ignored): if TRACE: logger.debug( 'extract:walk: top: %(top)r dirs: %(dirs)r files: r(files)r' % locals()) if not recurse: if TRACE: drs = set(dirs) for d in dirs[:]: if extractcode.is_extraction_path(d): dirs.remove(d) if TRACE: logger.debug('extract:walk: not recurse: removed dirs:' + repr(drs.symmetric_difference(set(dirs)))) for f in files: loc = join(top, f) if not recurse and extractcode.is_extraction_path(loc): if TRACE: logger.debug( 'extract:walk not recurse: skipped file: %(loc)r' % locals()) continue if not archive.should_extract(loc, kinds): if TRACE: logger.debug( 'extract:walk: skipped file: not should_extract: %(loc)r' % locals()) continue target = join(abspath(top), extractcode.get_extraction_path(loc)) if TRACE: logger.debug('extract:target: %(target)r' % locals()) for xevent in extract_file(loc, target, kinds): if TRACE: logger.debug('extract:walk:extraction event: %(xevent)r' % locals()) yield xevent if recurse: if TRACE: logger.debug( 'extract:walk: recursing on target: %(target)r' % locals()) for xevent in extract(target, kinds, recurse): if TRACE: logger.debug( 'extract:walk:recurse:extraction event: %(xevent)r' % locals()) yield xevent
def extract_files( location, kinds=extractcode.default_kinds, recurse=False, ignore_pattern=(), ): """ Extract the files found at `location`. Extract only archives of a kind listed in the `kinds` kind tuple. If `recurse` is True, extract recursively archives nested inside other archives. If `recurse` is false, then do not extract further an already extracted archive identified by the corresponding extract suffix location. """ ignored = partial(ignore.is_ignored, ignores=ignore.default_ignores, unignores={}) if TRACE: logger.debug('extract:start: %(location)r recurse: %(recurse)r\n' % locals()) abs_location = abspath(expanduser(location)) for top, dirs, files in fileutils.walk(abs_location, ignored): if TRACE: logger.debug( 'extract:walk: top: %(top)r dirs: %(dirs)r files: r(files)r' % locals()) if not recurse: if TRACE: drs = set(dirs) for d in dirs[:]: if extractcode.is_extraction_path(d): dirs.remove(d) if TRACE: logger.debug('extract:walk: not recurse: removed dirs:' + repr(drs.symmetric_difference(set(dirs)))) for f in files: loc = join(top, f) if not recurse and extractcode.is_extraction_path(loc): if TRACE: logger.debug( 'extract:walk not recurse: skipped file: %(loc)r' % locals()) continue if not archive.should_extract(loc, kinds, ignore_pattern): if TRACE: logger.debug( 'extract:walk: skipped file: not should_extract: %(loc)r' % locals()) continue target = join(abspath(top), extractcode.get_extraction_path(loc)) if TRACE: logger.debug('extract:target: %(target)r' % locals()) # extract proper for xevent in extract_file(loc, target, kinds): if TRACE: logger.debug('extract:walk:extraction event: %(xevent)r' % locals()) yield xevent if recurse: if TRACE: logger.debug( 'extract:walk: recursing on target: %(target)r' % locals()) for xevent in extract( location=target, kinds=kinds, recurse=recurse, ignore_pattern=ignore_pattern, ): if TRACE: logger.debug( 'extract:walk:recurse:extraction event: %(xevent)r' % locals()) yield xevent
def extract(location, kinds=extractcode.default_kinds, recurse=False): """ Walk and extract any archives found at `location` (either a file or directory). Extract only archives of a kind listed in the `kinds` kind tuple. Return an iterable of ExtractEvent tuples for each extracted archive. This can be used to track extraction progress: - one event is emitted just before extracting an archive. The ExtractEvent warnings and errors are empty. The `done` flag is False. - one event is emitted right after extracting an archive. The ExtractEvent warnings and errors contains warnings and errors if any. The `done` flag is True. If `recurse` is True, extract recursively archives nested inside other archives If `recurse` is false, then do not extract further an already extracted archive identified by the corresponding extract suffix location. Note that while the original file system is walked top-down, breadth-first, if recurse and a nested archive is found, it is extracted to full depth first before resuming the file system walk. """ ignored = partial(ignore.is_ignored, ignores=ignore.default_ignores, unignores={}) if TRACE: logger.debug('extract:start: %(location)r recurse: %(recurse)r\n' % locals()) abs_location = abspath(expanduser(location)) for top, dirs, files in fileutils.walk(abs_location, ignored): if TRACE: logger.debug('extract:walk: top: %(top)r dirs: %(dirs)r files: r(files)r' % locals()) if not recurse: if TRACE: drs = set(dirs) for d in dirs[:]: if extractcode.is_extraction_path(d): dirs.remove(d) if TRACE: logger.debug('extract:walk: not recurse: removed dirs:' + repr(drs.symmetric_difference(set(dirs)))) for f in files: loc = join(top, f) if not recurse and extractcode.is_extraction_path(loc): if TRACE: logger.debug('extract:walk not recurse: skipped file: %(loc)r' % locals()) continue if not archive.should_extract(loc, kinds): if TRACE: logger.debug('extract:walk: skipped file: not should_extract: %(loc)r' % locals()) continue target = join(abspath(top), extractcode.get_extraction_path(loc)) if TRACE: logger.debug('extract:target: %(target)r' % locals()) for xevent in extract_file(loc, target, kinds): if TRACE: logger.debug('extract:walk:extraction event: %(xevent)r' % locals()) yield xevent if recurse: if TRACE: logger.debug('extract:walk: recursing on target: %(target)r' % locals()) for xevent in extract(target, kinds, recurse): if TRACE: logger.debug('extract:walk:recurse:extraction event: %(xevent)r' % locals()) yield xevent