def test_extract_file_function(self):
        test_file = self.get_test_loc('extract/basic_non_nested.tar.gz', copy=True)
        base = fileutils.parent_directory(test_file)
        expected = ['a/b/a.txt', 'a/b/b.txt', 'a/c/c.txt']
        cleaned_test_file = test_file.replace(base, '')
        expected_event = [
            extract.ExtractEvent(
                source=cleaned_test_file,
                target=extractcode.get_extraction_path(cleaned_test_file),
                done=False, warnings=[], errors=[]
            ),
            extract.ExtractEvent(
                source=cleaned_test_file,
                target=extractcode.get_extraction_path(cleaned_test_file),
                done=True, warnings=[], errors=[]
            )
        ]

        target = extractcode.get_extraction_path(test_file)
        result = list(extract.extract_file(test_file, target))
        result = [r._replace(
                    source=cleaned_test_file,
                    target=extractcode.get_extraction_path(cleaned_test_file))
                  for r in result]
        assert expected_event == result
        check_files(target, expected)
    def test_extract_file_function(self):
        test_file = self.get_test_loc('extract/basic_non_nested.tar.gz',
                                      copy=True)
        base = fileutils.parent_directory(test_file)
        expected = ['a/b/a.txt', 'a/b/b.txt', 'a/c/c.txt']
        cleaned_test_file = test_file.replace(base, '')
        expected_event = [
            extract.ExtractEvent(
                source=cleaned_test_file,
                target=extractcode.get_extraction_path(cleaned_test_file),
                done=False,
                warnings=[],
                errors=[]),
            extract.ExtractEvent(
                source=cleaned_test_file,
                target=extractcode.get_extraction_path(cleaned_test_file),
                done=True,
                warnings=[],
                errors=[])
        ]

        target = extractcode.get_extraction_path(test_file)
        result = list(extract.extract_file(test_file, target))
        result = [
            r._replace(
                source=cleaned_test_file,
                target=extractcode.get_extraction_path(cleaned_test_file))
            for r in result
        ]
        assert expected_event == result
        check_files(target, expected)
Example #3
0
    def test_extract_archive_non_nested(self):
        test_dir = self.get_test_loc('extract/basic_non_nested.tar.gz', copy=True)
        expected = (
            'a/b/a.txt',
            'a/b/b.txt',
            'a/c/c.txt',
        )
        result = extract.extract(test_dir, recurse=False)
        check_no_error(result)
        check_files(extractcode.get_extraction_path(test_dir), expected)

        result = extract.extract(test_dir, recurse=True)
        check_no_error(result)
        check_files(extractcode.get_extraction_path(test_dir), expected)
    def test_extract_archive_shallow_with_readonly_inside(self):
        test_file = self.get_test_loc('extract/readonly/read_only.tar.gz',
                                      copy=True)
        """
        This test file was created with:
            import tarfile, time, datetime, StringIO, os
            TEXT = 'something\n'
            tar = tarfile.open('read_only.tar.gz', 'w:gz')
            for i in range(0, 2):
                tarinfo = tarfile.TarInfo()
                tarinfo.name = 'somefilename-%i.txt' % i
                tarinfo.uid = 123
                tarinfo.gid = 456
                tarinfo.uname = 'johndoe'
                tarinfo.gname = 'fake'
                tarinfo.type = tarfile.REGTYPE
                tarinfo.mode = 0 # this is the readonly part
                tarinfo.mtime = time.mktime(datetime.datetime.now().timetuple())
                file = StringIO.StringIO()
                file.write(TEXT)
                file.seek(0)
                tarinfo.size = len(TEXT)
                tar.addfile(tarinfo, file)
            tar.close()
        """
        result = list(extract.extract(test_file, recurse=False))
        check_no_error(result)

        expected = (
            'somefilename-0.txt',
            'somefilename-1.txt',
        )
        test_dir = extractcode.get_extraction_path(test_file)
        check_files(test_dir, expected)
    def test_extract_archive_shallow_with_readonly_inside(self):
        test_file = self.get_test_loc('extract/readonly/read_only.tar.gz', copy=True)
        """
        This test file was created with:
            import tarfile, time, datetime, StringIO, os
            TEXT = 'something\n'
            tar = tarfile.open('read_only.tar.gz', 'w:gz')
            for i in range(0, 2):
                tarinfo = tarfile.TarInfo()
                tarinfo.name = 'somefilename-%i.txt' % i
                tarinfo.uid = 123
                tarinfo.gid = 456
                tarinfo.uname = 'johndoe'
                tarinfo.gname = 'fake'
                tarinfo.type = tarfile.REGTYPE
                tarinfo.mode = 0 # this is the readonly part
                tarinfo.mtime = time.mktime(datetime.datetime.now().timetuple())
                file = StringIO.StringIO()
                file.write(TEXT)
                file.seek(0)
                tarinfo.size = len(TEXT)
                tar.addfile(tarinfo, file)
            tar.close()
        """
        result = list(extract.extract(test_file, recurse=False))
        check_no_error(result)

        expected = (
            'somefilename-0.txt',
            'somefilename-1.txt',
        )
        test_dir = extractcode.get_extraction_path(test_file)
        check_files(test_dir, expected)
Example #6
0
def extract(location, kinds=extractcode.default_kinds, recurse=False):
    """
    Walk and extract any archives found at `location` (either a file or
    directory). Extract only archives of a kind listed in the `kinds` kind tuple.

    Return an iterable of ExtractEvent tuples for each extracted archive. This
    can be used to track extraction progress:

     - one event is emitted just before extracting an archive. The ExtractEvent
       warnings and errors are empty. The `done` flag is False.

     - one event is emitted right after extracting an archive. The ExtractEvent
       warnings and errors contains warnings and errors if any. The `done` flag
       is True.

    If `recurse` is True, extract recursively archives nested inside other
    archives If `recurse` is false, then do not extract further an already
    extracted archive identified by the corresponding extract suffix location.

    Note that while the original file system is walked top-down, breadth-first,
    if recurse and a nested archive is found, it is extracted to full depth
    first before resuming the file system walk.
    """
    ignored = partial(ignore.is_ignored,
                      ignores=ignore.default_ignores,
                      unignores={})
    if TRACE:
        logger.debug('extract:start: %(location)r  recurse: %(recurse)r\n' %
                     locals())
    abs_location = abspath(expanduser(location))
    for top, dirs, files in fileutils.walk(abs_location, ignored):
        if TRACE:
            logger.debug(
                'extract:walk: top:  %(top)r dirs: %(dirs)r files: r(files)r' %
                locals())

        if not recurse:
            if TRACE:
                drs = set(dirs)
            for d in dirs[:]:
                if extractcode.is_extraction_path(d):
                    dirs.remove(d)
            if TRACE:
                logger.debug('extract:walk: not recurse: removed dirs:' +
                             repr(drs.symmetric_difference(set(dirs))))
        for f in files:
            loc = join(top, f)
            if not recurse and extractcode.is_extraction_path(loc):
                if TRACE:
                    logger.debug(
                        'extract:walk not recurse: skipped  file: %(loc)r' %
                        locals())
                continue

            if not archive.should_extract(loc, kinds):
                if TRACE:
                    logger.debug(
                        'extract:walk: skipped file: not should_extract: %(loc)r'
                        % locals())
                continue

            target = join(abspath(top), extractcode.get_extraction_path(loc))
            if TRACE:
                logger.debug('extract:target: %(target)r' % locals())
            for xevent in extract_file(loc, target, kinds):
                if TRACE:
                    logger.debug('extract:walk:extraction event: %(xevent)r' %
                                 locals())
                yield xevent

            if recurse:
                if TRACE:
                    logger.debug(
                        'extract:walk: recursing on target: %(target)r' %
                        locals())
                for xevent in extract(target, kinds, recurse):
                    if TRACE:
                        logger.debug(
                            'extract:walk:recurse:extraction event: %(xevent)r'
                            % locals())
                    yield xevent
Example #7
0
def extract_files(
        location,
        kinds=extractcode.default_kinds,
        recurse=False,
        ignore_pattern=(),
):
    """
    Extract the files found at `location`.

    Extract only archives of a kind listed in the `kinds` kind tuple.

    If `recurse` is True, extract recursively archives nested inside other
    archives. If `recurse` is false, then do not extract further an already
    extracted archive identified by the corresponding extract suffix location.
    """
    ignored = partial(ignore.is_ignored,
                      ignores=ignore.default_ignores,
                      unignores={})
    if TRACE:
        logger.debug('extract:start: %(location)r  recurse: %(recurse)r\n' %
                     locals())

    abs_location = abspath(expanduser(location))
    for top, dirs, files in fileutils.walk(abs_location, ignored):
        if TRACE:
            logger.debug(
                'extract:walk: top:  %(top)r dirs: %(dirs)r files: r(files)r' %
                locals())

        if not recurse:
            if TRACE:
                drs = set(dirs)
            for d in dirs[:]:
                if extractcode.is_extraction_path(d):
                    dirs.remove(d)
            if TRACE:
                logger.debug('extract:walk: not recurse: removed dirs:' +
                             repr(drs.symmetric_difference(set(dirs))))

        for f in files:
            loc = join(top, f)
            if not recurse and extractcode.is_extraction_path(loc):
                if TRACE:
                    logger.debug(
                        'extract:walk not recurse: skipped  file: %(loc)r' %
                        locals())
                continue

            if not archive.should_extract(loc, kinds, ignore_pattern):
                if TRACE:
                    logger.debug(
                        'extract:walk: skipped file: not should_extract: %(loc)r'
                        % locals())
                continue

            target = join(abspath(top), extractcode.get_extraction_path(loc))
            if TRACE:
                logger.debug('extract:target: %(target)r' % locals())

            # extract proper
            for xevent in extract_file(loc, target, kinds):
                if TRACE:
                    logger.debug('extract:walk:extraction event: %(xevent)r' %
                                 locals())
                yield xevent

            if recurse:
                if TRACE:
                    logger.debug(
                        'extract:walk: recursing on target: %(target)r' %
                        locals())
                for xevent in extract(
                        location=target,
                        kinds=kinds,
                        recurse=recurse,
                        ignore_pattern=ignore_pattern,
                ):
                    if TRACE:
                        logger.debug(
                            'extract:walk:recurse:extraction event: %(xevent)r'
                            % locals())
                    yield xevent
Example #8
0
def extract(location, kinds=extractcode.default_kinds, recurse=False):
    """
    Walk and extract any archives found at `location` (either a file or
    directory). Extract only archives of a kind listed in the `kinds` kind tuple.

    Return an iterable of ExtractEvent tuples for each extracted archive. This
    can be used to track extraction progress:

     - one event is emitted just before extracting an archive. The ExtractEvent
       warnings and errors are empty. The `done` flag is False.

     - one event is emitted right after extracting an archive. The ExtractEvent
       warnings and errors contains warnings and errors if any. The `done` flag
       is True.

    If `recurse` is True, extract recursively archives nested inside other
    archives If `recurse` is false, then do not extract further an already
    extracted archive identified by the corresponding extract suffix location.

    Note that while the original file system is walked top-down, breadth-first,
    if recurse and a nested archive is found, it is extracted to full depth
    first before resuming the file system walk.
    """
    ignored = partial(ignore.is_ignored, ignores=ignore.default_ignores, unignores={})
    if TRACE:
        logger.debug('extract:start: %(location)r  recurse: %(recurse)r\n' % locals())
    abs_location = abspath(expanduser(location))
    for top, dirs, files in fileutils.walk(abs_location, ignored):
        if TRACE:
            logger.debug('extract:walk: top:  %(top)r dirs: %(dirs)r files: r(files)r' % locals())

        if not recurse:
            if TRACE:
                drs = set(dirs)
            for d in dirs[:]:
                if extractcode.is_extraction_path(d):
                    dirs.remove(d)
            if TRACE:
                logger.debug('extract:walk: not recurse: removed dirs:' + repr(drs.symmetric_difference(set(dirs))))
        for f in files:
            loc = join(top, f)
            if not recurse and extractcode.is_extraction_path(loc):
                if TRACE:
                    logger.debug('extract:walk not recurse: skipped  file: %(loc)r' % locals())
                continue

            if not archive.should_extract(loc, kinds):
                if TRACE:
                    logger.debug('extract:walk: skipped file: not should_extract: %(loc)r' % locals())
                continue

            target = join(abspath(top), extractcode.get_extraction_path(loc))
            if TRACE:
                logger.debug('extract:target: %(target)r' % locals())
            for xevent in extract_file(loc, target, kinds):
                if TRACE:
                    logger.debug('extract:walk:extraction event: %(xevent)r' % locals())
                yield xevent

            if recurse:
                if TRACE:
                    logger.debug('extract:walk: recursing on target: %(target)r' % locals())
                for xevent in extract(target, kinds, recurse):
                    if TRACE:
                        logger.debug('extract:walk:recurse:extraction event: %(xevent)r' % locals())
                    yield xevent