Ejemplo n.º 1
0
def extract_file(location, target, kinds=extractcode.default_kinds):
    """
    Extract a single archive at `location` in the `target` directory if it is
    of a kind supported in the `kinds` kind tuple.
    """
    warnings = []
    errors = []
    extractor = archive.get_extractor(location, kinds)
    if TRACE:
        logger.debug('extract_file: extractor: for: %(location)r with kinds: %(kinds)r : ' % locals()
                     + getattr(extractor, '__module__', '')
                     + '.' + getattr(extractor, '__name__', ''))
    if extractor:
        yield ExtractEvent(location, target, done=False, warnings=[], errors=[])
        try:
            # extract first to a temp directory.
            # if there is an error,  the extracted files will not be moved
            # to target
            tmp_tgt = fileutils.get_temp_dir('extract')
            abs_location = abspath(expanduser(location))
            warnings.extend(extractor(abs_location, tmp_tgt))
            fileutils.copytree(tmp_tgt, target)
            fileutils.delete(tmp_tgt)
        except Exception, e:
            if TRACE:
                logger.debug('extract_file: ERROR: %(location)r: %(errors)r, %(e)r.\n' % locals())
            errors = [str(e).strip(' \'"')]
        finally:
Ejemplo n.º 2
0
def create_html_app_assets(results, output_file):
    """
    Given an html-app output_file, create the corresponding `_files` directory
    and copy the assets to this directory. The target directory is deleted if it
    exists.

    Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or
    HtmlAppAssetCopyError if the copy was not possible.
    """
    try:
        if is_stdout(output_file):
            raise HtmlAppAssetCopyWarning()
        assets_dir = join(get_template_dir('html-app'), 'assets')

        # delete old assets
        tgt_dirs = get_html_app_files_dirs(output_file)
        target_dir = join(*tgt_dirs)
        if exists(target_dir):
            fileutils.delete(target_dir)

        # copy assets
        fileutils.copytree(assets_dir, target_dir)

        # write json data
        import json
        root_path, assets_dir = get_html_app_files_dirs(output_file)
        with open(join(root_path, assets_dir, 'data.json'), 'w') as f:
            f.write('data=' + json.dumps(results))

        # create help file
        with open(join(root_path, assets_dir, 'help.html'), 'w') as f:
            f.write(get_html_app_help(basename(output_file.name)))
    except HtmlAppAssetCopyWarning, w:
        raise w
Ejemplo n.º 3
0
    def test_copytree_copies_unreadable_files(self):
        src = self.get_test_loc('fileutils/exec', copy=True)
        dst = self.get_temp_dir()
        src_file1 = join(src, 'a.bat')
        src_file2 = join(src, 'subtxt', 'a.txt')

        try:
            # make some unreadable source files
            make_non_readable(src_file1)
            if on_posix:
                assert not filetype.is_readable(src_file1)

            make_non_readable(src_file2)
            if on_posix:
                assert not filetype.is_readable(src_file2)

            # copy proper
            dest_dir = join(dst, 'dest')
            fileutils.copytree(src, dest_dir)

            dest_file1 = join(dest_dir, 'a.bat')
            assert os.path.exists(dest_file1)
            assert filetype.is_readable(dest_file1)

            dest_file2 = join(dest_dir, 'subtxt', 'a.txt')
            assert os.path.exists(dest_file2)
            assert filetype.is_readable(dest_file2)

        finally:
            fileutils.chmod(src, fileutils.RW, recurse=True)
            fileutils.chmod(dst, fileutils.RW, recurse=True)
Ejemplo n.º 4
0
    def test_copytree_does_not_keep_non_writable_permissions(self):
        src = self.get_test_loc('fileutils/exec', copy=True)
        dst = self.get_temp_dir()

        try:
            src_file = join(src, 'subtxt/a.txt')
            make_non_writable(src_file)
            assert not filetype.is_writable(src_file)

            src_dir = join(src, 'subtxt')
            make_non_writable(src_dir)
            if on_posix:
                assert not filetype.is_writable(src_dir)

            # copy proper
            dest_dir = join(dst, 'dest')
            fileutils.copytree(src, dest_dir)

            dst_file = join(dest_dir, 'subtxt/a.txt')
            assert os.path.exists(dst_file)
            assert filetype.is_writable(dst_file)

            dest_dir2 = join(dest_dir, 'subtxt')
            assert os.path.exists(dest_dir2)
            assert filetype.is_writable(dest_dir)
        finally:
            fileutils.chmod(src, fileutils.RW, recurse=True)
            fileutils.chmod(dst, fileutils.RW, recurse=True)
Ejemplo n.º 5
0
def create_html_app(output_file, results, version, scanned_path):  # NOQA
    """
    Given an html-app output_file, generate that file, create the data.js data
    file from the results and create the corresponding `_files` directory and
    copy the data and assets to this directory. The target directory is deleted
    if it exists.

    Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or
    HtmlAppAssetCopyError if the copy was not possible.
    """
    try:
        if is_stdout(output_file):
            raise HtmlAppAssetCopyWarning()

        source_assets_dir = join(TEMPLATES_DIR, 'html-app', 'assets')

        # Return a tuple of (parent_dir, dir_name) directory named after the
        # `output_location` output_locationfile_base_name (stripped from extension) and
        # a `_files` suffix Return empty strings if output is to stdout.
        output_location = output_file.name
        tgt_root_path = dirname(output_location)
        tgt_assets_dir = file_base_name(output_location) + '_files'

        # delete old assets
        target_assets_dir = join(tgt_root_path, tgt_assets_dir)
        if exists(target_assets_dir):
            delete(target_assets_dir)

        # copy assets
        copytree(source_assets_dir, target_assets_dir)

        template = get_template(
            join(TEMPLATES_DIR, 'html-app', 'template.html'))
        rendered_html = template.render(assets_dir=target_assets_dir,
                                        scanned_path=scanned_path,
                                        version=version)
        output_file.write(rendered_html)

        # create help file
        help_template = get_template(
            join(TEMPLATES_DIR, 'html-app', 'help_template.html'))
        rendered_help = help_template.render(main_app=output_location)
        with io.open(join(target_assets_dir, 'help.html'),
                     'w',
                     encoding='utf-8') as f:
            f.write(rendered_help)

        # FIXME: this should a regular JSON scan format
        with io.open(join(target_assets_dir, 'data.js'), 'w') as f:
            f.write('data=')
            json.dump(list(results), f)

    except HtmlAppAssetCopyWarning as w:
        raise w

    except Exception as e:  # NOQA
        import traceback
        msg = 'ERROR: cannot create HTML application.\n' + traceback.format_exc(
        )
        raise HtmlAppAssetCopyError(msg)
Ejemplo n.º 6
0
def try_to_extract(location, target_dir, extractor):
    """
    Extract archive at `location` to `target_dir` trying the `extractor` function.
    If extract fails, just return without returning warnings nor raising exceptions.

    Note: there are a few cases where we want to attempt extracting something
    but do not care if this fails.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = compat.unicode(
        os.path.abspath(os.path.expanduser(target_dir)))
    temp_target = compat.unicode(
        fileutils.get_temp_dir(prefix='extractcode-extract1-'))
    warnings = []
    try:
        warnings = extractor(abs_location, temp_target)
        if TRACE:
            logger.debug('try_to_extract: temp_target: %(temp_target)r' %
                         locals())
        fileutils.copytree(temp_target, abs_target_dir)
    except:
        return warnings
    finally:
        fileutils.delete(temp_target)
    return warnings
Ejemplo n.º 7
0
def extract_file(location, target, kinds=extractcode.default_kinds, verbose=False):
    """
    Extract a single archive at `location` in the `target` directory if it is
    of a kind supported in the `kinds` kind tuple.
    """
    warnings = []
    errors = []
    extractor = archive.get_extractor(location, kinds)
    if TRACE:
        logger.debug('extract_file: extractor: for: %(location)r with kinds: %(kinds)r : ' % locals()
                     + getattr(extractor, '__module__', '')
                     + '.' + getattr(extractor, '__name__', ''))
    if extractor:
        yield ExtractEvent(location, target, done=False, warnings=[], errors=[])
        try:
            # extract first to a temp directory: if there is an error,  the
            # extracted files will not be moved to target
            tmp_tgt = fileutils.get_temp_dir(prefix='scancode-extract-')
            abs_location = abspath(expanduser(location))
            warns = extractor(abs_location, tmp_tgt) or []
            warnings.extend(warns)
            fileutils.copytree(tmp_tgt, target)
            fileutils.delete(tmp_tgt)
        except Exception as e:
            errors = [str(e).strip(' \'"')]
            if verbose:
                errors.append(traceback.format_exc())
            if TRACE:
                tb = traceback.format_exc()
                logger.debug('extract_file: ERROR: %(location)r: %(errors)r\n%(e)r\n%(tb)s' % locals())

        finally:
            yield ExtractEvent(location, target, done=True, warnings=warnings, errors=errors)
Ejemplo n.º 8
0
    def get_licenses(self, scancode_licenses):
        """
        Return a mapping of key -> ScanCode License objects either fetched
        externally or loaded from the existing `self.original_dir`
        """
        print('Fetching and storing external licenses in:', self.original_dir)

        licenses = []
        for lic, text in self.fetch_licenses(scancode_licenses):
            try:
                with io.open(lic.text_file, 'w', encoding='utf-8') as tf:
                    tf.write(text)
                lic.dump()
                licenses.append(lic)
            except:
                if TRACE:
                    print()
                    print(repr(lic))
                raise

        print('Stored %d external licenses in: %r.' % (
            len(licenses),
            self.original_dir,
        ))

        print('Modified (or not modified) external licenses will be in: %r.' %
              (self.update_dir, ))
        fileutils.copytree(self.original_dir, self.update_dir)

        print('New external licenses will be in: %r.' % (self.new_dir, ))

        return load_licenses(self.update_dir, with_deprecated=True)
Ejemplo n.º 9
0
def extract_file(location, target, kinds=extractcode.default_kinds):
    """
    Extract a single archive at `location` in the `target` directory if it is
    of a kind supported in the `kinds` kind tuple.
    """
    warnings = []
    errors = []
    extractor = archive.get_extractor(location, kinds)
    if DEBUG:
        logger.debug(
            'extract_file: extractor: for: %(location)r with kinds: r(kinds)r : '
            % locals() + getattr(extractor, '__module__', '') + '.' +
            getattr(extractor, '__name__', ''))
    if extractor:
        yield ExtractEvent(location,
                           target,
                           done=False,
                           warnings=[],
                           errors=[])
        try:
            # extract first to a temp directory.
            # if there is an error,  the extracted files will not be moved
            # to target
            tmp_tgt = fileutils.get_temp_dir('extract')
            abs_location = abspath(expanduser(location))
            warnings.extend(extractor(abs_location, tmp_tgt))
            fileutils.copytree(tmp_tgt, target)
            fileutils.delete(tmp_tgt)
        except Exception, e:
            if DEBUG:
                logger.debug(
                    'extract_file: ERROR: %(location)r: %(errors)r, %(e)r.\n' %
                    locals())
            errors = [str(e).strip(' \'"')]
        finally:
Ejemplo n.º 10
0
def create_html_app_assets(results, output_file):
    """
    Given an html-app output_file, create the corresponding `_files` directory
    and copy the assets to this directory. The target directory is deleted if it
    exists.

    Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or
    HtmlAppAssetCopyError if the copy was not possible.
    """
    try:
        if is_stdout(output_file):
            raise HtmlAppAssetCopyWarning()
        assets_dir = join(get_template_dir('html-app'), 'assets')

        tgt_dirs = get_html_app_files_dirs(output_file)
        target_dir = join(*tgt_dirs)
        if exists(target_dir):
            fileutils.delete(target_dir)
        fileutils.copytree(assets_dir, target_dir)

        # write json data
        import json
        root_path, assets_dir = get_html_app_files_dirs(output_file)
        with open(join(root_path, assets_dir, 'data.json'), 'w') as f:
            f.write('data=' + json.dumps(results))
    except HtmlAppAssetCopyWarning, w:
        raise w
Ejemplo n.º 11
0
    def get_test_loc(self, test_path, copy=False, debug=False):
        """
        Given a `test_path` relative to the self.test_data_dir directory, return the
        location to a test file or directory for this path. Copy to a temp
        test location if `copy` is True.
        """
        if debug:
            import inspect
            caller = inspect.stack()[1][3]
            print('\nself.get_test_loc,%(caller)s,"%(test_path)s"' % locals())

        test_loc = get_test_loc(test_path, self.test_data_dir, debug=debug)
        if copy:
            base_name = os.path.basename(test_loc)
            if filetype.is_file(test_loc):
                # target must be an existing dir
                target_dir = self.get_temp_dir()
                fileutils.copyfile(test_loc, target_dir)
                test_loc = os.path.join(target_dir, base_name)
            else:
                # target must be a NON existing dir
                target_dir = os.path.join(self.get_temp_dir(), base_name)
                fileutils.copytree(test_loc, target_dir)
                # cleanup of VCS that could be left over from checkouts
                self.remove_vcs(target_dir)
                test_loc = target_dir
        return test_loc
Ejemplo n.º 12
0
def create_html_app_assets(results, output_file):
    """
    Given an html-app output_file, create the corresponding `_files` directory
    and copy the assets to this directory. The target directory is deleted if it
    exists.

    Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or
    HtmlAppAssetCopyError if the copy was not possible.
    """
    try:
        if is_stdout(output_file):
            raise HtmlAppAssetCopyWarning()
        assets_dir = join(get_template_dir('html-app'), 'assets')

        # delete old assets
        tgt_dirs = get_html_app_files_dirs(output_file)
        target_dir = join(*tgt_dirs)
        if exists(target_dir):
            fileutils.delete(target_dir)

        # copy assets
        fileutils.copytree(assets_dir, target_dir)

        # write json data
        root_path, assets_dir = get_html_app_files_dirs(output_file)
        with codecs.open(join(root_path, assets_dir, 'data.json'), 'wb', encoding='utf-8') as f:
            f.write('data=')
            json.dump(results, f, iterable_as_array=True)

        # create help file
        with codecs.open(join(root_path, assets_dir, 'help.html'), 'wb', encoding='utf-8') as f:
            f.write(get_html_app_help(basename(output_file.name)))
    except HtmlAppAssetCopyWarning, w:
        raise w
Ejemplo n.º 13
0
    def get_test_loc(self, test_path, copy=False, debug=False):
        """
        Given a `test_path` relative to the self.test_data_dir directory, return the
        location to a test file or directory for this path. Copy to a temp
        test location if `copy` is True.
        """
        if debug:
            import inspect
            caller = inspect.stack()[1][3]
            print('\nself.get_test_loc,%(caller)s,"%(test_path)s"' % locals())

        test_loc = get_test_loc(test_path, self.test_data_dir, debug=debug)
        if copy:
            base_name = os.path.basename(test_loc)
            if filetype.is_file(test_loc):
                # target must be an existing dir
                target_dir = self.get_temp_dir()
                fileutils.copyfile(test_loc, target_dir)
                test_loc = os.path.join(target_dir, base_name)
            else:
                # target must be a NON existing dir
                target_dir = os.path.join(self.get_temp_dir(), base_name)
                fileutils.copytree(test_loc, target_dir)
                # cleanup of VCS that could be left over from checkouts
                self.remove_vcs(target_dir)
                test_loc = target_dir
        return test_loc
Ejemplo n.º 14
0
def extract_with_fallback(location, target_dir, extractor1, extractor2):
    """
    Extract archive at `location` to `target_dir` trying first `extractor1` function.
    If extract fails, attempt extraction again with the `extractor2` function.
    Return a list of warning messages. Raise exceptions on errors.

    Note: there are a few cases where the primary extractor for a type may fail and
    a secondary extractor will succeed.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = unicode(os.path.abspath(os.path.expanduser(target_dir)))
    # attempt extract first to a temp dir
    temp_target1 = unicode(fileutils.get_temp_dir('extract1'))
    try:
        warnings = extractor1(abs_location, temp_target1)
        if TRACE:
            logger.debug('extract_with_fallback: temp_target1: %(temp_target1)r' % locals())
        fileutils.copytree(temp_target1, abs_target_dir)
    except:
        try:
            temp_target2 = unicode(fileutils.get_temp_dir('extract2'))
            warnings = extractor2(abs_location, temp_target2)
            if TRACE:
                logger.debug('extract_with_fallback: temp_target2: %(temp_target2)r' % locals())
            fileutils.copytree(temp_target2, abs_target_dir)
        finally:
            fileutils.delete(temp_target2)
    finally:
        fileutils.delete(temp_target1)
    return warnings
Ejemplo n.º 15
0
 def test_copytree_does_not_copy_fifo(self):
     # Windows does not support pipes
     src = self.get_test_loc('fileutils/filetype', copy=True)
     dest = self.get_temp_dir()
     src_file = join(src, 'myfifo')
     os.mkfifo(src_file)  # @UndefinedVariable
     dest_dir = join(dest, 'dest')
     fileutils.copytree(src, dest_dir)
     assert not os.path.exists(join(dest_dir, 'myfifo'))
Ejemplo n.º 16
0
def extract(
        location,
        kinds=extractcode.default_kinds,
        recurse=False,
        replace_originals=False,
        ignore_pattern=(),
):
    """
    Walk and extract any archives found at `location` (either a file or
    directory). Extract only archives of a kind listed in the `kinds` kind tuple.

    Return an iterable of ExtractEvent tuples for each extracted archive. This
    can be used to track extraction progress:

     - one event is emitted just before extracting an archive. The ExtractEvent
       warnings and errors are empty. The `done` flag is False.

     - one event is emitted right after extracting an archive. The ExtractEvent
       warnings and errors contains warnings and errors if any. The `done` flag
       is True.

    If `recurse` is True, extract recursively archives nested inside other
    archives. If `recurse` is false, then do not extract further an already
    extracted archive identified by the corresponding extract suffix location.

    If `replace_originals` is True, the extracted archives are replaced by the
    extracted content.

    Note that while the original file system is walked top-down, breadth-first,
    if recurse and a nested archive is found, it is extracted to full depth
    first before resuming the file system walk.
    """
    processed_events = []
    processed_events_append = processed_events.append
    for event in extract_files(location, kinds, recurse, ignore_pattern):
        yield event
        if replace_originals:
            processed_events_append(event)

    # move files around
    if replace_originals:
        for xevent in reversed(processed_events):
            if xevent.done:
                source = xevent.source
                target = xevent.target
                if TRACE:
                    logger.debug(
                        'extract:replace_originals: replace %(source)r by %(target)r'
                        % locals())
                fileutils.delete(source)
                fileutils.copytree(target, source)
                fileutils.delete(target)
Ejemplo n.º 17
0
 def get_licenses(self):
     """
     Return a mapping of key -> ScanCode License objects either
     fetched externally or loaded from the existing `self.src_dir`
     """
     if self.fetched:
         print('Reusing (possibly modified) external licenses stored in:', self.update_dir)
         return load_licenses(self.update_dir, with_deprecated=True)
     else:
         print('Fetching and storing external licenses in:', self.src_dir)
         licenses = {l.key: l for l in self.fetch_licenses()}
         print('Stored %d external licenses in: %r.' % (len(licenses), self.src_dir,))
         fileutils.copytree(self.src_dir, self.update_dir)
         print('Modified external licenses will be in: %r.' % (self.update_dir,))
         print('New external licenses will be in: %r.' % (self.new_dir,))
         print('Deleted external licenses will be in: %r.' % (self.del_dir,))
         return load_licenses(self.update_dir, with_deprecated=True)
Ejemplo n.º 18
0
def create_html_app_assets(output_file):
    """
    Given an html-app output_file, create the corresponding `_files` directory
    and copy the assets to this directory. The target directory is deleted if it
    exists.

    Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or
    HtmlAppAssetCopyError if the copy was not possible.
    """
    try:
        assets_dir = join(get_template_dir("html-app"), "assets")
        tgt_dirs = get_html_app_files_dirs(output_file)
        if not tgt_dirs:
            raise HtmlAppAssetCopyWarning()
        target_dir = join(*tgt_dirs)
        if exists(target_dir):
            fileutils.delete(target_dir)
        fileutils.copytree(assets_dir, target_dir)
    except HtmlAppAssetCopyWarning, w:
        raise w
Ejemplo n.º 19
0
def create_html_app_assets(output_file):
    """
    Given an html-app output_file, create the corresponding `_files` directory
    and copy the assets to this directory. The target directory is deleted if it
    exists.

    Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or
    HtmlAppAssetCopyError if the copy was not possible.
    """
    try:
        assets_dir = join(get_template_dir('html-app'), 'assets')
        tgt_dirs = get_html_app_files_dirs(output_file)
        if not tgt_dirs:
            raise HtmlAppAssetCopyWarning()
        target_dir = join(*tgt_dirs)
        if exists(target_dir):
            fileutils.delete(target_dir)
        fileutils.copytree(assets_dir, target_dir)
    except HtmlAppAssetCopyWarning, w:
        raise w
Ejemplo n.º 20
0
 def get_test_loc(self, test_path, copy=False):
     """
     Given a `test_path` relative to the self.test_data_dir directory, return the
     location to a test file or directory for this path. Copy to a temp
     test location if `copy` is True.
     """
     test_loc = get_test_loc(test_path, self.test_data_dir)
     if copy:
         base_name = os.path.basename(test_loc)
         if filetype.is_file(test_loc):
             # target must be an existing dir
             target_dir = self.get_temp_dir()
             fileutils.copyfile(test_loc, target_dir)
             test_loc = os.path.join(target_dir, base_name)
         else:
             # target must be a NON existing dir
             target_dir = os.path.join(self.get_temp_dir(), base_name)
             fileutils.copytree(test_loc, target_dir)
             # cleanup of VCS that could be left over from checkouts
             self.remove_vcs(target_dir)
             test_loc = target_dir
     return test_loc
Ejemplo n.º 21
0
def try_to_extract(location, target_dir, extractor):
    """
    Extract archive at `location` to `target_dir` trying the `extractor` function.
    If extract fails, just return without returning warnings nor raising exceptions.

    Note: there are a few cases where we want to attempt extracting something
    but do not care if this fails.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = unicode(os.path.abspath(os.path.expanduser(target_dir)))
    temp_target = unicode(fileutils.get_temp_dir('extract1'))
    warnings = []
    try:
        warnings = extractor(abs_location, temp_target)
        if TRACE:
            logger.debug('try_to_extract: temp_target: %(temp_target)r' % locals())
        fileutils.copytree(temp_target, abs_target_dir)
    except:
        return warnings
    finally:
        fileutils.delete(temp_target)
    return warnings
Ejemplo n.º 22
0
 def get_test_loc(self, test_path, copy=False):
     """
     Given a `test_path` relative to the self.test_data_dir directory, return the
     location to a test file or directory for this path. Copy to a temp
     test location if `copy` is True.
     """
     test_loc = get_test_loc(test_path, self.test_data_dir)
     if copy:
         base_name = os.path.basename(test_loc)
         if filetype.is_file(test_loc):
             # target must be an existing dir
             target_dir = self.get_temp_dir()
             fileutils.copyfile(test_loc, target_dir)
             test_loc = os.path.join(target_dir, base_name)
         else:
             # target must be a NON existing dir
             target_dir = os.path.join(self.get_temp_dir(), base_name)
             fileutils.copytree(test_loc, target_dir)
             # cleanup of VCS that could be left over from checkouts
             self.remove_vcs(target_dir)
             test_loc = target_dir
     return test_loc
Ejemplo n.º 23
0
 def get_licenses(self):
     """
     Return a mapping of key -> ScanCode License objects either
     fetched externally or loaded from the existing `self.src_dir`
     """
     if self.fetched:
         print('Reusing (possibly modified) external licenses stored in:',
               self.update_dir)
         return load_licenses(self.update_dir, with_deprecated=True)
     else:
         print('Fetching and storing external licenses in:', self.src_dir)
         licenses = {l.key: l for l in self.fetch_licenses()}
         print('Stored %d external licenses in: %r.' % (
             len(licenses),
             self.src_dir,
         ))
         fileutils.copytree(self.src_dir, self.update_dir)
         print('Modified external licenses will be in: %r.' %
               (self.update_dir, ))
         print('New external licenses will be in: %r.' % (self.new_dir, ))
         print('Deleted external licenses will be in: %r.' %
               (self.del_dir, ))
         return load_licenses(self.update_dir, with_deprecated=True)
Ejemplo n.º 24
0
def rebuild_rootfs(img, target_dir):
    """
    Extract and merge or "squash" all layers of the `image` Image in a single
    rootfs in `target_dir`. Extraction is done in sequence from the bottom (root
    or initial) layer to the top (or latest) layer and the "whiteouts"
    unionfs/overlayfs procedure is applied at each step as per the OCI spec:
    https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts

    Return a list of deleted "whiteout" files.
    Raise an Exception on errrors.

    The extraction process consists of these steps:
     - extract the layer in a temp directory
     - find whiteouts in that layer temp dir
     - remove files/directories corresponding to these whiteouts in the target directory
     - remove whiteouts special marker files or dirs in the tempdirectory
     - move layer to the target directory, overwriting existing files

    See also some related implementations and links:
    https://github.com/moby/moby/blob/d1f470946/pkg/archive/whiteouts.go
    https://github.com/virt-manager/virt-bootstrap/blob/8a7e752d/src/virtBootstrap/whiteout.py
    https://github.com/goldmann/docker-squash

    https://github.com/moby/moby/blob/master/image/spec/v1.md
    https://github.com/moby/moby/blob/master/image/spec/v1.1.md
    https://github.com/moby/moby/blob/master/image/spec/v1.2.md
    """

    assert os.path.isdir(target_dir)

    # log  deletions
    deletions = []

    for layer_num, layer in enumerate(img.layers):
        if TRACE:
            logger.debug(f'Extracting layer {layer_num} - {layer.layer_id} '
                         f'tarball: {layer.archive_location}')

        # 1. extract a layer to temp.
        # Note that we are not preserving any special file and any file permission
        extracted_loc = tempfile.mkdtemp('container_inspector-docker')
        layer.extract(extracted_location=extracted_loc)
        if TRACE: logger.debug(f'  Extracted layer to: {extracted_loc}')

        # 2. find whiteouts in that layer.
        whiteouts = list(find_whiteouts(extracted_loc))
        if TRACE:
            logger.debug(
                '  Merging extracted layers and applying unionfs whiteouts')
        if TRACE:
            logger.debug('  Whiteouts:\n' +
                         '     \n'.join(map(repr, whiteouts)))

        # 3. remove whiteouts in the previous layer stack (e.g. the WIP rootfs)
        for whiteout_marker_loc, whiteable_path in whiteouts:
            if TRACE:
                logger.debug(
                    f'    Deleting dir or file with whiteout marker: {whiteout_marker_loc}'
                )
            whiteable_loc = os.path.join(target_dir, whiteable_path)
            delete(whiteable_loc)
            # also delete the whiteout marker file
            delete(whiteout_marker_loc)
            deletions.append(whiteable_loc)

        # 4. finall copy/overwrite the extracted layer over the WIP rootfs
        if TRACE:
            logger.debug(
                f'  Moving extracted layer from: {extracted_loc} to: {target_dir}'
            )
        copytree(extracted_loc, target_dir)
        if TRACE: logger.debug(f'  Moved layer to: {target_dir}')
        delete(extracted_loc)

    return deletions
Ejemplo n.º 25
0
def extract_file_by_file(location,
                         target_dir,
                         arch_type='*',
                         skip_symlinks=True):
    """
    Extract all files using a one-by-one process from a 7zip-supported archive
    file at location in the `target_dir` directory.

    Return a list of warning messages if any or an empty list.
    Raise exception on errors.

    `arch_type` is the type of 7zip archive passed to the -t 7zip option.
    Can be None.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))

    entries, errors_msgs = list_entries(location, arch_type)
    entries = list(entries)

    # Determine if we need a one-by-one approach: technically the aproach is to
    # check if we have files that are in the same dir and have the same name
    # when the case is ignored. We take a simpler approach: we check if all
    # paths are unique when we ignore the case: for that we only check that the
    # length of two paths sets are the same: one set as-is and the other
    # lowercased.

    paths_as_is = set(e.path for e in entries)
    paths_no_case = set(p.lower() for p in paths_as_is)
    need_by_file = len(paths_as_is) != len(paths_no_case)

    if not need_by_file:
        # use regular extract
        return extract_all_files_at_once(location=location,
                                         target_dir=target_dir,
                                         arch_type=arch_type)

    # now we are extracting one file at a time. this is a tad painful because we
    # are dealing with a full command execution at each time.

    errors = {}
    warnings = {}
    tmp_dir = fileutils.get_temp_dir(prefix='extractcode-extract-')
    for i, entry in enumerate(entries):

        if not entry.is_file:
            continue

        tmp_extract_dir = os.path.join(tmp_dir, str(i))
        fileutils.create_dir(tmp_extract_dir)

        ex_args = build_7z_extract_command(
            location=location,
            target_dir=tmp_extract_dir,
            single_entry=entry,
            arch_type=arch_type,
        )
        rc, stdout, stderr = command.execute2(**ex_args)

        error = get_7z_errors(stdout, stderr)
        if error or rc != 0:
            error = error or UNKNOWN_ERROR
            if TRACE:
                logger.debug(
                    'extract: failure: {rc}\n'
                    'stderr: {stderr}\nstdout: {stdout}'.format(**locals()))
            errors[entry.path] = error
            continue

        # these are all for a single file path
        warns = get_7z_warnings(stdout) or {}
        wmsg = '\n'.join(warns.values())
        if wmsg:
            if entry.path in warnings:
                warnings[entry.path] += '\n' + wmsg
            else:
                warnings[entry.path] = wmsg

        # finally move that extracted file to its target location, possibly renamed
        source_file_name = fileutils.file_name(entry.path)
        source_file_loc = os.path.join(tmp_extract_dir, source_file_name)
        if not os.path.exists(source_file_loc):
            if entry.path in errors:
                errors[entry.path] += '\nNo file name extracted.'
            else:
                errors[entry.path] = 'No file name extracted.'
            continue

        safe_path = paths.safe_path(entry.path, posix=True)
        target_file_loc = os.path.join(target_dir, safe_path)
        target_file_dir = os.path.dirname(target_file_loc)
        fileutils.create_dir(target_file_dir)

        unique_target_file_loc = extractcode.new_name(target_file_loc,
                                                      is_dir=False)

        if TRACE:
            logger.debug(
                'extract: unique_target_file_loc: from {} to {}'.format(
                    target_file_loc, unique_target_file_loc))

        if os.path.isfile(source_file_loc):
            fileutils.copyfile(source_file_loc, unique_target_file_loc)
        else:
            fileutils.copytree(source_file_loc, unique_target_file_loc)

    extractcode.remove_backslashes_and_dotdots(abs_target_dir)
    if errors:
        raise ExtractErrorFailedToExtract(errors)

    return convert_warnings_to_list(warnings)
Ejemplo n.º 26
0
def rebuild_rootfs(image, target_dir, layerid_len=DEFAULT_ID_LEN):
    """
    Extract and merge all layers to target_dir. Extraction is done in
    sequence from bottom (root) to top (latest layer).

    Return a mapping of errors and a list of whiteouts/deleted files.

    The extraction process consists of these steps:
     - extract the layer in a temp directory
     - move layer to the target directory, overwriting existing files
     - if any, remove AUFS special files/dirs in the target directory
     - if any, remove whiteouts file/directory pairs in the target directory
    """

    from extractcode.extract import extract_file

    assert filetype.is_dir(target_dir)
    assert os.path.exists(target_dir)
    extract_errors = []
    # log whiteouts deletions
    whiteouts = []

    for layer_id, layer in image.layers.items():
        layer_tarball = join(image.repo_dir, layer_id[:layerid_len],
                             LAYER_TAR_FILE)
        logger.debug('Extracting layer tarball: %(layer_tarball)r' % locals())
        temp_target = fileutils.get_temp_dir('conan-docker')
        xevents = list(extract_file(layer_tarball, temp_target))
        for x in xevents:
            if x.warnings or x.errors:
                extract_errors.extend(xevents)

        # FIXME: the order of ops is WRONG: we are getting whiteouts incorrectly
        # it should be:
        # 1. extract a layer to temp.
        # 2. find whiteouts in that layer.
        # 3. remove whiteouts in the previous layer stack (e.g. the WIP rootfs)
        # 4. finall copy the extracted layer over the WIP rootfs

        # move extracted layer to target_dir
        logger.debug(
            'Moving extracted layer from: %(temp_target)r to: %(target_dir)r')
        fileutils.copytree(temp_target, target_dir)
        fileutils.delete(temp_target)

        logger.debug(
            'Merging extracted layers and applying AUFS whiteouts/deletes')
        for top, dirs, files in fileutils.walk(target_dir):
            # delete AUFS dirs and apply whiteout deletions
            for dr in dirs[:]:
                whiteable_dir = join(top, dr)
                if dr.startswith(WHITEOUT_PREFIX):
                    # delete the .wh. dir...
                    dirs.remove(dr)
                    logger.debug('Deleting whiteout dir: %(whiteable_dir)r' %
                                 locals())
                    fileutils.delete(whiteable_dir)

                    # ... and delete the corresponding dir it does "whiteout"
                    base_dir = dr[len(WHITEOUT_PREFIX):]
                    try:
                        dirs.remove(base_dir)
                    except ValueError:
                        # FIXME: should we really raise an exception here?
                        msg = ('Inconsistent layers: '
                               'missing directory to whiteout: %(base_dir)r' %
                               locals())
                        raise InconsistentLayersError(msg)
                    wdo = join(top, base_dir)
                    logger.debug('Deleting real dir:  %(wdo)r' % locals())
                    fileutils.delete(wdo)
                    whiteouts.append(wdo)

                # delete AUFS special dirs
                elif dr.startswith(WHITEOUT_SPECIAL_DIR_PREFIX):
                    dirs.remove(dr)
                    logger.debug(
                        'Deleting AUFS special dir:  %(whiteable_dir)r' %
                        locals())
                    fileutils.delete(whiteable_dir)

            # delete AUFS files and apply whiteout deletions
            all_files = set(files)
            for fl in all_files:
                whiteable_file = join(top, fl)
                if fl.startswith(WHITEOUT_PREFIX):
                    # delete the .wh. marker file...
                    logger.debug('Deleting whiteout file: %(whiteable_file)r' %
                                 locals())
                    fileutils.delete(whiteable_file)
                    # ... and delete the corresponding file it does "whiteout"
                    # e.g. logically delete
                    base_file = fl[len(WHITEOUT_PREFIX):]

                    wfo = join(top, base_file)
                    whiteouts.append(wfo)
                    if base_file in all_files:
                        logger.debug('Deleting real file:  %(wfo)r' % locals())
                        fileutils.delete(wfo)

                # delete AUFS special files
                elif fl.startswith(WHITEOUT_SPECIAL_DIR_PREFIX):
                    logger.debug(
                        'Deleting AUFS special file:  %(whiteable_file)r' %
                        locals())
                    fileutils.delete(whiteable_file)
                    whiteouts.append(whiteable_file)

    return extract_errors, whiteouts