Esempio n. 1
0
    def initialize(self, dep11_dir):
        dep11_dir = os.path.abspath(dep11_dir)

        conf = load_generator_config(dep11_dir)
        if not conf:
            return False

        self._all_pkgs = defaultdict(partial(defaultdict, partial(defaultdict, list)))

        self._dep11_url = conf.get("MediaBaseUrl")
        self._icon_sizes = conf.get("IconSizes")
        if not self._icon_sizes:
            self._icon_sizes = ["128x128", "64x64"]

        self._archive_root = conf.get("ArchiveRoot")

        cache_dir = os.path.join(dep11_dir, "cache")
        if conf.get("CacheDir"):
            cache_dir = conf.get("CacheDir")

        self._export_dir = os.path.join(dep11_dir, "export")
        if conf.get("ExportDir"):
            self._export_dir = conf.get("ExportDir")

        self._langpack_dir = os.path.join(dep11_dir, "langpacks")

        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)
        if not os.path.exists(self._export_dir):
            os.makedirs(self._export_dir)
        if not os.path.exists(self._langpack_dir):
            os.makedirs(self._langpack_dir)

        self._suites_data = conf['Suites']

        self._distro_name = conf.get("DistroName")
        if not self._distro_name:
            self._distro_name = "Debian"

        # the RepositoryName property is only interesting for
        # 3rd-party repositories using this generator, which don't want
        # to conflict with the main distro repository data.
        self._repo_name = conf.get("RepositoryName")
        if not self._repo_name:
            self._repo_name = self._distro_name

        # initialize our on-disk metadata pool
        self._cache = DataCache(self._get_media_dir())
        ret = self._cache.open(cache_dir)

        os.chdir(dep11_dir)
        return ret
Esempio n. 2
0
    def initialize(self, dep11_dir):
        dep11_dir = os.path.abspath(dep11_dir)

        conf = load_generator_config(dep11_dir)
        if not conf:
            return False

        self._dep11_url = conf.get("MediaBaseUrl")
        self._icon_sizes = conf.get("IconSizes")
        if not self._icon_sizes:
            self._icon_sizes = ["128x128", "64x64"]

        self._archive_root = conf.get("ArchiveRoot")

        cache_dir = os.path.join(dep11_dir, "cache")
        if conf.get("CacheDir"):
            cache_dir = conf.get("CacheDir")

        self._export_dir = os.path.join(dep11_dir, "export")
        if conf.get("ExportDir"):
            self._export_dir = conf.get("ExportDir")

        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)
        if not os.path.exists(self._export_dir):
            os.makedirs(self._export_dir)

        self._suites_data = conf['Suites']

        self._distro_name = conf.get("DistroName")
        if not self._distro_name:
            self._distro_name = "Debian"

        # initialize our on-dik metadata pool
        self._cache = DataCache(self._get_media_dir())
        ret = self._cache.open(cache_dir)

        os.chdir(dep11_dir)
        return ret
    def initialize(self, dep11_dir):
        dep11_dir = os.path.abspath(dep11_dir)

        conf = load_generator_config(dep11_dir)
        if not conf:
            return False

        self._archive_root = conf.get("ArchiveRoot")

        self._html_url = conf.get("HtmlBaseUrl")
        if not self._html_url:
            self._html_url = "."

        self._template_dir = os.path.join(get_data_dir(), "templates", "default")

        self._distro_name = conf.get("DistroName", "Debian")

        self._export_dir = os.path.join(dep11_dir, "export")
        if conf.get("ExportDir"):
            self._export_dir = conf.get("ExportDir")

        if not os.path.exists(self._export_dir):
            os.makedirs(self._export_dir)

        self._suites_data = conf['Suites']

        self._html_export_dir = os.path.join(self._export_dir, "html")

        self._dep11_url = conf.get("MediaBaseUrl")

        # load metadata cache
        cache_dir = os.path.join(dep11_dir, "cache")
        if conf.get("CacheDir"):
            cache_dir = conf.get("CacheDir")
        self._cache = DataCache(os.path.join(self._export_dir, "media"))
        self._cache.open(cache_dir)

        os.chdir(dep11_dir)
        return True
Esempio n. 4
0
class DEP11Generator:
    def __init__(self):
        pass


    def initialize(self, dep11_dir):
        dep11_dir = os.path.abspath(dep11_dir)

        conf = load_generator_config(dep11_dir)
        if not conf:
            return False

        self._dep11_url = conf.get("MediaBaseUrl")
        self._icon_sizes = conf.get("IconSizes")
        if not self._icon_sizes:
            self._icon_sizes = ["128x128", "64x64"]

        self._archive_root = conf.get("ArchiveRoot")

        cache_dir = os.path.join(dep11_dir, "cache")
        if conf.get("CacheDir"):
            cache_dir = conf.get("CacheDir")

        self._export_dir = os.path.join(dep11_dir, "export")
        if conf.get("ExportDir"):
            self._export_dir = conf.get("ExportDir")

        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)
        if not os.path.exists(self._export_dir):
            os.makedirs(self._export_dir)

        self._suites_data = conf['Suites']

        self._distro_name = conf.get("DistroName")
        if not self._distro_name:
            self._distro_name = "Debian"

        # initialize our on-dik metadata pool
        self._cache = DataCache(self._get_media_dir())
        ret = self._cache.open(cache_dir)

        os.chdir(dep11_dir)
        return ret


    def _get_media_dir(self):
        mdir = os.path.join(self._export_dir, "media")
        if not os.path.exists(mdir):
            os.makedirs(mdir)
        return mdir


    def _get_packages_for(self, suite, component, arch):
        return read_packages_dict_from_file(self._archive_root, suite, component, arch).values()


    def make_icon_tar(self, suitename, component, pkglist):
        '''
         Generate icons-%(size).tar.gz
        '''
        dep11_mediadir = self._get_media_dir()
        names_seen = set()
        tar_location = os.path.join(self._export_dir, "data", suitename, component)

        size_tars = dict()

        for pkg in pkglist:
            pkid = get_pkg_id(pkg['name'], pkg['version'], pkg['arch'])

            gids = self._cache.get_cpt_gids_for_pkg(pkid)
            if not gids:
                # no component global-ids == no icons to add to the tarball
                continue

            for gid in gids:
                for size in self._icon_sizes:
                    icon_location_glob = os.path.join (dep11_mediadir, component, gid, "icons", size, "*.png")

                    tar = None
                    if size not in size_tars:
                        icon_tar_fname = os.path.join(tar_location, "icons-%s.tar.gz" % (size))
                        size_tars[size] = tarfile.open(icon_tar_fname+".new", "w:gz")
                    tar = size_tars[size]

                    for filename in glob.glob(icon_location_glob):
                        icon_name = os.path.basename(filename)
                        if size+"/"+icon_name in names_seen:
                            continue
                        tar.add(filename, arcname=icon_name)
                        names_seen.add(size+"/"+icon_name)

        for tar in size_tars.values():
            tar.close()
            # FIXME Ugly....
            safe_move_file(tar.name, tar.name.replace(".new", ""))


    def process_suite(self, suite_name):
        '''
        Extract new metadata for a given suite.
        '''

        suite = self._suites_data.get(suite_name)
        if not suite:
            log.error("Suite '%s' not found!" % (suite_name))
            return False

        dep11_mediadir = self._get_media_dir()

        # We need 'forkserver' as startup method to prevent deadlocks on join()
        # Something in the extractor is doing weird things, makes joining impossible
        # when using simple fork as startup method.
        mp.set_start_method('forkserver')

        for component in suite['components']:
            all_cpt_pkgs = list()
            for arch in suite['architectures']:
                pkglist = self._get_packages_for(suite_name, component, arch)

                # compile a list of packages that we need to look into
                pkgs_todo = dict()
                for pkg in pkglist:
                    pkid = get_pkg_id(pkg['name'], pkg['version'], pkg['arch'])

                    # check if we scanned the package already
                    if self._cache.package_exists(pkid):
                        continue
                    pkgs_todo[pkid] = pkg

                # set up metadata extractor
                iconf = ContentsListIconFinder(suite_name, component, arch, self._archive_root)
                mde = MetadataExtractor(suite_name,
                                component,
                                self._icon_sizes,
                                self._cache,
                                iconf)

                # Multiprocessing can't cope with LMDB open in the cache,
                # but instead of throwing an error or doing something else
                # that makes debugging easier, it just silently skips each
                # multprocessing task. Stupid thing.
                # (remember to re-open the cache later)
                self._cache.close()

                # set up multiprocessing
                with mp.Pool(maxtasksperchild=16) as pool:
                    def handle_results(message):
                        log.info(message)

                    def handle_error(e):
                        traceback.print_exception(type(e), e, e.__traceback__)
                        log.error(str(e))
                        pool.terminate()
                        sys.exit(5)

                    log.info("Processing %i packages in %s/%s/%s" % (len(pkgs_todo), suite_name, component, arch))
                    for pkid, pkg in pkgs_todo.items():
                        package_fname = os.path.join (self._archive_root, pkg['filename'])
                        if not os.path.exists(package_fname):
                            log.warning('Package not found: %s' % (package_fname))
                            continue
                        pool.apply_async(extract_metadata,
                                    (mde, suite_name, pkg['name'], package_fname, pkg['version'], pkg['arch'], pkid),
                                    callback=handle_results, error_callback=handle_error)
                    pool.close()
                    pool.join()

                # reopen the cache, we need it
                self._cache.reopen()

                hints_dir = os.path.join(self._export_dir, "hints", suite_name, component)
                if not os.path.exists(hints_dir):
                    os.makedirs(hints_dir)
                dep11_dir = os.path.join(self._export_dir, "data", suite_name, component)
                if not os.path.exists(dep11_dir):
                    os.makedirs(dep11_dir)

                # now write data to disk
                hints_fname = os.path.join(hints_dir, "DEP11Hints_%s.yml.gz" % (arch))
                data_fname = os.path.join(dep11_dir, "Components-%s.yml.gz" % (arch))

                hints_f = gzip.open(hints_fname+".new", 'wb')
                data_f = gzip.open(data_fname+".new", 'wb')

                dep11_header = get_dep11_header(suite_name, component, os.path.join(self._dep11_url, component))
                data_f.write(bytes(dep11_header, 'utf-8'))

                for pkg in pkglist:
                    pkid = get_pkg_id(pkg['name'], pkg['version'], pkg['arch'])
                    data = self._cache.get_metadata_for_pkg(pkid)
                    if data:
                        data_f.write(bytes(data, 'utf-8'))
                    hint = self._cache.get_hints(pkid)
                    if hint:
                        hints_f.write(bytes(hint, 'utf-8'))

                data_f.close()
                safe_move_file(data_fname+".new", data_fname)

                hints_f.close()
                safe_move_file(hints_fname+".new", hints_fname)

                all_cpt_pkgs.extend(pkglist)

            # create icon tarball
            self.make_icon_tar(suite_name, component, all_cpt_pkgs)

            log.info("Completed metadata extraction for suite %s/%s" % (suite_name, component))


    def expire_cache(self):
        pkgids = set()
        for suite_name in self._suites_data:
            suite = self._suites_data[suite_name]
            for component in suite['components']:
                for arch in suite['architectures']:
                    pkglist = self._get_packages_for(suite_name, component, arch)
                    for pkg in pkglist:
                        pkid = get_pkg_id(pkg['name'], pkg['version'], pkg['arch'])
                        pkgids.add(pkid)

        # clean cache
        oldpkgs = self._cache.get_packages_not_in_set(pkgids)
        for pkid in oldpkgs:
            pkid = str(pkid, 'utf-8')
            self._cache.remove_package(pkid)
        # ensure we don't leave cruft
        self._cache.remove_orphaned_components()


    def remove_processed(self, suite_name):
        '''
        Delete information about processed packages, to reprocess them later.
        '''

        suite = self._suites_data.get(suite_name)
        if not suite:
            log.error("Suite '%s' not found!" % (suite_name))
            return False

        for component in suite['components']:
            all_cpt_pkgs = list()
            for arch in suite['architectures']:
                pkglist = self._get_packages_for(suite_name, component, arch)

                for pkg in pkglist:
                    package_fname = os.path.join (self._archive_root, pkg['filename'])
                    pkid = get_pkg_id(pkg['name'], pkg['version'], pkg['arch'])

                    # we ignore packages without any interesting metadata here
                    if self._cache.is_ignored(pkid):
                        continue

                    self._cache.remove_package(pkid)

        # drop all components which don't have packages
        self._cache.remove_orphaned_components()
Esempio n. 5
0
class DEP11Generator:
    def __init__(self):
        pass


    def initialize(self, dep11_dir):
        dep11_dir = os.path.abspath(dep11_dir)

        conf = load_generator_config(dep11_dir)
        if not conf:
            return False

        self._all_pkgs = defaultdict(partial(defaultdict, partial(defaultdict, list)))

        self._dep11_url = conf.get("MediaBaseUrl")
        self._icon_sizes = conf.get("IconSizes")
        if not self._icon_sizes:
            self._icon_sizes = ["128x128", "64x64"]

        self._archive_root = conf.get("ArchiveRoot")

        cache_dir = os.path.join(dep11_dir, "cache")
        if conf.get("CacheDir"):
            cache_dir = conf.get("CacheDir")

        self._export_dir = os.path.join(dep11_dir, "export")
        if conf.get("ExportDir"):
            self._export_dir = conf.get("ExportDir")

        self._langpack_dir = os.path.join(dep11_dir, "langpacks")

        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)
        if not os.path.exists(self._export_dir):
            os.makedirs(self._export_dir)
        if not os.path.exists(self._langpack_dir):
            os.makedirs(self._langpack_dir)

        self._suites_data = conf['Suites']

        self._distro_name = conf.get("DistroName")
        if not self._distro_name:
            self._distro_name = "Debian"

        # the RepositoryName property is only interesting for
        # 3rd-party repositories using this generator, which don't want
        # to conflict with the main distro repository data.
        self._repo_name = conf.get("RepositoryName")
        if not self._repo_name:
            self._repo_name = self._distro_name

        # initialize our on-disk metadata pool
        self._cache = DataCache(self._get_media_dir())
        ret = self._cache.open(cache_dir)

        os.chdir(dep11_dir)
        return ret


    def _get_media_dir(self):
        mdir = os.path.join(self._export_dir, "media")
        if not os.path.exists(mdir):
            os.makedirs(mdir)
        return mdir


    def _get_packages_for(self, suite, component, arch, with_desc=True):
        return read_packages_dict_from_file(self._archive_root, suite, component, arch, with_description=with_desc).values()


    def make_icon_tar(self, suitename, component, pkglist):
        '''
         Generate icons-%(size).tar.gz
        '''
        dep11_mediadir = self._get_media_dir()
        names_seen = set()
        tar_location = os.path.join(self._export_dir, "data", suitename, component)

        size_tars = dict()

        for pkg in pkglist:
            pkid = pkg.pkid

            gids = self._cache.get_cpt_gids_for_pkg(pkid)
            if not gids:
                # no component global-ids == no icons to add to the tarball
                continue

            for gid in gids:
                for size in self._icon_sizes:
                    icon_location_glob = os.path.join (dep11_mediadir, "*", gid, "icons", size, "*.png")

                    tar = None
                    if size not in size_tars:
                        icon_tar_fname = os.path.join(tar_location, "icons-%s.tar.gz" % (size))
                        size_tars[size] = tarfile.open(icon_tar_fname+".new", "w:gz")
                    tar = size_tars[size]

                    for filename in glob.glob(icon_location_glob):
                        icon_name = os.path.basename(filename)
                        if size+"/"+icon_name in names_seen:
                            continue
                        tar.add(filename, arcname=icon_name)
                        names_seen.add(size+"/"+icon_name)

        for tar in size_tars.values():
            tar.close()
            # FIXME Ugly....
            safe_move_file(tar.name, tar.name.replace(".new", ""))


    def process_suite(self, suite_name):
        '''
        Extract new metadata for a given suite.
        '''

        suite = self._suites_data.get(suite_name)
        if not suite:
            log.error("Suite '%s' not found!" % (suite_name))
            return False
        base_suite_name = suite.get('baseSuite')

        base_suite = self._suites_data.get(base_suite_name) if base_suite_name else None

        # We need 'forkserver' as startup method to prevent deadlocks on join()
        # Something in the extractor is doing weird things, makes joining impossible
        # when using simple fork as startup method.
        mp.set_start_method('forkserver')

        for component in suite['components']:
            for arch in suite['architectures']:
                self._all_pkgs[suite_name][component][arch] = \
                    self._get_packages_for(suite_name, component, arch)

        if base_suite:
            for component in base_suite['components']:
                for arch in base_suite['architectures']:
                    self._all_pkgs[base_suite_name][component][arch] = \
                        self._get_packages_for(base_suite_name, component, arch)

        langpacks = None

        for component in suite['components']:
            all_cpt_pkgs = list()
            new_components = False
            for arch in suite['architectures']:
                pkglist = self._all_pkgs[suite_name][component][arch]
                suite_component_arch = "%s/%s/%s" % (suite_name, component, arch)

                dep11_dir = os.path.join(self._export_dir, "data", suite_name, component)
                data_fname = os.path.join(dep11_dir, "Components-%s.yml.gz" % (arch))

                last_seen_pkgs = set()
                try:
                    for y in yaml.load_all(gzip.open(data_fname, 'r')):
                        if 'Package' in y:
                            last_seen_pkgs.add(y['Package'])
                except FileNotFoundError:
                    pass

                # compile a list of packages that we need to look into
                pkgs_todo = dict()
                for pkg in pkglist:
                    pkid = pkg.pkid

                    last_seen_pkgs.discard(pkg.name)

                    # check if we scanned the package already
                    if self._cache.package_exists(pkid):
                        if not self._cache.package_in_suite(pkid, suite_component_arch) and not self._cache.is_ignored(pkid):
                            log.info("Seen %s before, but not in %s" % (pkid, suite_component_arch))
                            self._cache.add_package_to_suite(pkid, suite_component_arch)
                            new_components = True
                        continue
                    pkgs_todo[pkid] = pkg

                # some packages have been removed
                if last_seen_pkgs:
                    for pkg in last_seen_pkgs:
                        self._cache.remove_package_from_suite(pkid, suite_component_arch)
                    new_components = True

                dep11_header = get_dep11_header(self._repo_name, suite_name, component, os.path.join(self._dep11_url, component), suite.get('dataPriority', 0))

                if not os.path.exists(dep11_dir):
                    os.makedirs(dep11_dir)

                if not pkgs_todo and not new_components:
                    if not os.path.exists(data_fname):
                        log.info ("No packages to process for %s, but %s doesn't exist, so writing with header only." % (suite_component_arch, data_fname))
                        data_f = gzip.open(data_fname, 'wb')
                        data_f.write(bytes(dep11_header, 'utf-8'))
                        data_f.close()
                    else:
                        log.info("Skipped %s, no new packages to process." % suite_component_arch)
                    continue

                if pkgs_todo:
                    # set up metadata extractor
                    icon_theme = suite.get('useIconTheme')
                    iconh = IconHandler(suite_name, component, arch, self._archive_root,
                                                   icon_theme, base_suite_name=suite.get('baseSuite'))
                    iconh.set_wanted_icon_sizes(self._icon_sizes)
                    if not langpacks:
                        langpacks = UbuntuLangpackHandler(suite, suite_name, self._all_pkgs, self._langpack_dir, self._cache)
                    mde = MetadataExtractor(suite_name,
                                    component,
                                    arch,
                                    self._cache,
                                    iconh,
                                    langpacks)

                    # Multiprocessing can't cope with LMDB open in the cache,
                    # but instead of throwing an error or doing something else
                    # that makes debugging easier, it just silently skips each
                    # multprocessing task. Stupid thing.
                    # (remember to re-open the cache later)
                    self._cache.close()

                    # set up multiprocessing
                    with mp.Pool(maxtasksperchild=24) as pool:
                        count = 1
                        def handle_results(result):
                            nonlocal count
                            nonlocal new_components
                            (message, any_components) = result
                            new_components = new_components or any_components
                            log.info(message.format(count, len(pkgs_todo)))
                            count += 1

                        def handle_error(e):
                            traceback.print_exception(type(e), e, e.__traceback__)
                            log.error(str(e))
                            pool.terminate()
                            sys.exit(5)

                        log.info("Processing %i packages in %s" % (len(pkgs_todo), suite_component_arch))
                        for pkid, pkg in pkgs_todo.items():
                            package_fname = os.path.join (self._archive_root, pkg.filename)
                            if not os.path.exists(package_fname):
                                log.warning('Package not found: %s' % (package_fname))
                                continue
                            pkg.filename = package_fname
                            pool.apply_async(extract_metadata,
                                        (mde, suite_name, pkg),
                                        callback=handle_results, error_callback=handle_error)
                        pool.close()
                        pool.join()

                    # reopen the cache, we need it
                    self._cache.reopen()

                hints_dir = os.path.join(self._export_dir, "hints", suite_name, component)
                if not os.path.exists(hints_dir):
                    os.makedirs(hints_dir)
                hints_fname = os.path.join(hints_dir, "DEP11Hints_%s.yml.gz" % (arch))
                hints_f = gzip.open(hints_fname+".new", 'wb')

                if not new_components and os.path.exists(data_fname):
                    log.info("Skipping %s, no components in any of the new packages.", suite_component_arch)
                else:
                    # now write data to disk
                    data_f = gzip.open(data_fname+".new", 'wb')

                    data_f.write(bytes(dep11_header, 'utf-8'))

                for pkg in pkglist:
                    pkid = pkg.pkid
                    if new_components:
                        data = self._cache.get_metadata_for_pkg(pkid)
                        if data:
                            data_f.write(bytes(data, 'utf-8'))
                    hint = self._cache.get_hints(pkid)
                    if hint:
                        hints_f.write(bytes(hint, 'utf-8'))

                if new_components:
                    data_f.close()
                    safe_move_file(data_fname+".new", data_fname)

                hints_f.close()
                safe_move_file(hints_fname+".new", hints_fname)

                all_cpt_pkgs.extend(pkglist)

            # create icon tarball
            self.make_icon_tar(suite_name, component, all_cpt_pkgs)

            log.info("Completed metadata extraction for suite %s/%s" % (suite_name, component))


    def expire_cache(self):
        pkgids = set()
        for suite_name in self._suites_data:
            suite = self._suites_data[suite_name]
            for component in suite['components']:
                for arch in suite['architectures']:
                    pkglist = self._get_packages_for(suite_name, component, arch, with_desc=False)
                    for pkg in pkglist:
                        pkgids.add(pkg.pkid)

        # clean cache
        oldpkgs = self._cache.get_packages_not_in_set(pkgids)
        for pkid in oldpkgs:
            pkid = str(pkid, 'utf-8')
            self._cache.remove_package(pkid)

        # ensure we don't leave cruft, drop orphaned components (cpts w/o pkg)
        self._cache.remove_orphaned_components()
        # drop orphaned media (media w/o registered cpt)
        self._cache.remove_orphaned_media()


    def remove_processed(self, suite_name):
        '''
        Delete information about processed packages, to reprocess them later.
        '''

        suite = self._suites_data.get(suite_name)
        if not suite:
            log.error("Suite '%s' not found!" % (suite_name))
            return False

        for component in suite['components']:
            for arch in suite['architectures']:
                pkglist = self._get_packages_for(suite_name, component, arch, with_desc=False)

                for pkg in pkglist:
                    pkid = pkg.pkid

                    # we ignore packages without any interesting metadata here
                    if self._cache.is_ignored(pkid):
                        continue
                    if not self._cache.package_exists(pkid):
                        continue

                    self._cache.remove_package(pkid)

        # drop all components which don't have packages
        self._cache.remove_orphaned_components()
        self._cache.remove_orphaned_media()


    def forget_package(self, pkid):
        '''
        Delete all information about a package in the cache.
        '''

        if '/' in pkid:
            if not self._cache.package_exists(pkid):
                print("Package with ID '%s' does not exist." % (pkid))
                return
            self._cache.remove_package(pkid)
        else:
            log.info("Removing all packages with name {}".format(pkid))
            ret = self._cache.delete_package_by_name(pkid)
            if not ret:
                print("Unable to remove packages matching name '%s'." % (pkid))
                return

        # drop all components which don't have packages
        self._cache.remove_orphaned_components()


    def show_info(self, pkgname):
        '''
        Show some details we know about a package.
        '''

        print("{}:".format(pkgname))
        for pkva, info in self._cache.get_info(pkgname):
            print(" {}".format(pkva))
            for e in info:
                print("  | -> {}".format(str(e)))


    def prepopulate_cache(self, suite_name):
        '''
        Check which packages we can definitely ignore based on their contents in the Contents.gz file.
        This is useful when e.g. bootstrapping new suites / architectures.
        '''

        suite = self._suites_data.get(suite_name)
        if not suite:
            log.error("Suite '%s' not found!" % (suite_name))
            return False

        for component in suite['components']:
            for arch in suite['architectures']:
                pkid_filelist = dict()
                for fname, pkg in parse_contents_file(self._archive_root, suite_name, component, arch):
                    if not pkid_filelist.get(pkg.pkid):
                        pkid_filelist[pkg.pkid] = list()
                    pkid_filelist[pkg.pkid].append(fname)

                for pkid, filelist in pkid_filelist.items():
                    ignore = True
                    for f in filelist:
                        if 'usr/share/applications/' in f:
                            ignore = False
                            break
                        if 'usr/share/metainfo/' in f:
                            ignore = False
                            break
                        if 'usr/share/appdata/' in f:
                            ignore = False
                            break

                    if not ignore:
                        continue

                    if self._cache.is_ignored(pkid):
                        log.info("Package is already ignored: {}".format(pkid))
                    elif self._cache.package_exists(pkid):
                        log.warning("Tried to ignore package which actually exists and has data: {}".format(pkid))
                    else:
                        log.info("Ignoring package: {}".format(pkid))
                        self._cache.set_package_ignore(pkid)
class ReportGenerator:
    def __init__(self):
        pass


    def initialize(self, dep11_dir):
        dep11_dir = os.path.abspath(dep11_dir)

        conf = load_generator_config(dep11_dir)
        if not conf:
            return False

        self._archive_root = conf.get("ArchiveRoot")

        self._html_url = conf.get("HtmlBaseUrl")
        if not self._html_url:
            self._html_url = "."

        self._template_dir = os.path.join(get_data_dir(), "templates", "default")

        self._distro_name = conf.get("DistroName", "Debian")

        self._export_dir = os.path.join(dep11_dir, "export")
        if conf.get("ExportDir"):
            self._export_dir = conf.get("ExportDir")

        if not os.path.exists(self._export_dir):
            os.makedirs(self._export_dir)

        self._suites_data = conf['Suites']

        self._html_export_dir = os.path.join(self._export_dir, "html")

        self._dep11_url = conf.get("MediaBaseUrl")

        # load metadata cache
        cache_dir = os.path.join(dep11_dir, "cache")
        if conf.get("CacheDir"):
            cache_dir = conf.get("CacheDir")
        self._cache = DataCache(os.path.join(self._export_dir, "media"))
        self._cache.open(cache_dir)

        os.chdir(dep11_dir)
        return True


    def _get_packages_for(self, suite, component, arch):
        return read_packages_dict_from_file(self._archive_root, suite, component, arch).values()


    def render_template(self, name, out_dir, out_name = None, *args, **kwargs):
        if not out_name:
            out_path = os.path.join(out_dir, name)
        else:
            out_path = os.path.join(out_dir, out_name)
        # create subdirectories if necessary
        out_dir = os.path.dirname(os.path.realpath(out_path))
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

        j2_env = Environment(loader=FileSystemLoader(self._template_dir))

        template = j2_env.get_template(name)
        content = template.render(root_url = self._html_url,
                                    distro = self._distro_name,
                                    time = time.strftime("%Y-%m-%d %H:%M:%S %Z"),
                                    generator_version = __version__,
                                    *args, **kwargs)
        log.debug("Render: %s" % (out_path.replace(self._html_export_dir, "")))
        with open(out_path, 'wb') as f:
            f.write(bytes(content, 'utf-8'))


    def _highlight_yaml(self, yml_data):
        if not yml_data:
            return ""
        if not pygments:
            return yml_data.replace("\n", "<br/>\n")
        return pygments.highlight(yml_data, YamlLexer(), HtmlFormatter())


    def _expand_hint(self, hint_data):
        tag_name = hint_data['tag']
        tag = get_hint_tag_info(tag_name)

        desc = ""
        try:
            desc = tag['text'] % hint_data['params']
        except Exception as e:
            desc = "Error while expanding hint description: %s" % (str(e))

        severity = tag.get('severity')
        if not severity:
            log.error("Tag %s has no severity!", tag_name)
            severity = "info"

        return {'tag_name': tag_name, 'description': desc, 'severity': severity}


    def update_reports(self, suite_name):
        dep11_hintsdir = os.path.join(self._export_dir, "hints")
        if not os.path.exists(dep11_hintsdir):
            return
        dep11_minfodir = os.path.join(self._export_dir, "data")
        if not os.path.exists(dep11_minfodir):
            return

        suite = self._suites_data.get(suite_name)
        if not suite:
            log.error("Suite '%s' not found!" % (suite_name))
            return False

        export_dir_root = self._html_export_dir
        media_dir = os.path.join(self._export_dir, "media")
        noimage_url = os.path.join(self._html_url, "static", "img", "no-image.png")

        # Render archive suites index page
        self.render_template("suites_index.html", export_dir_root, "index.html", suites=self._suites_data.keys())
        export_dir = os.path.join(export_dir_root, suite_name)

        log.info("Collecting metadata and issue information for suite '%s'" % (suite_name))

        stats = StatsGenerator(self._cache)
        suite_error_count = 0
        suite_warning_count = 0
        suite_info_count = 0
        suite_metainfo_count = 0

        for component in suite['components']:
            issue_summaries = dict()
            mdata_summaries = dict()
            export_dir_section = os.path.join(self._export_dir, "html", suite_name, component)
            export_dir_issues = os.path.join(export_dir_section, "issues")
            export_dir_metainfo = os.path.join(export_dir_section, "metainfo")

            error_count = 0
            warning_count = 0
            info_count = 0
            metainfo_count = 0

            hint_pages = dict()
            cpt_pages = dict()

            for arch in suite['architectures']:
                pkglist = self._get_packages_for(suite_name, component, arch)

                for pkg in pkglist:
                    pkid = pkg.pkid

                    maintainer = None
                    if pkg:
                        maintainer = pkg.maintainer
                    if not maintainer:
                        maintainer = "Unknown"

                    #
                    # Data processing hints
                    #
                    hints_list = self._cache.get_hints(pkid)
                    if hints_list:
                        hints_list = yaml.safe_load_all(hints_list)
                        for hdata in hints_list:
                            pkg_name = hdata['Package']
                            pkg_id = hdata.get('PackageID')
                            if not pkg_id:
                                pkg_id = pkg_name
                            if not issue_summaries.get(maintainer):
                                issue_summaries[maintainer] = dict()

                            hints_raw = hdata.get('Hints', list())

                            # expand all hints to show long descriptions
                            errors = list()
                            warnings = list()
                            infos = list()

                            for hint in hints_raw:
                                ehint = self._expand_hint(hint)
                                severity = ehint['severity']
                                if severity == "info":
                                    infos.append(ehint)
                                elif severity == "warning":
                                    warnings.append(ehint)
                                else:
                                    errors.append(ehint)

                            if not hint_pages.get(pkg_name):
                                hint_pages[pkg_name] = list()

                            # we fold multiple architectures with the same issues into one view
                            pkid_noarch = pkg_id
                            if "/" in pkg_id:
                                pkid_noarch = pkg_id[:pkg_id.rfind("/")]

                            pcid = ""
                            if hdata.get('ID'):
                                pcid = "%s: %s" % (pkid_noarch, hdata.get('ID'))
                            else:
                                pcid = pkid_noarch

                            page_data = {'identifier': pcid, 'errors': errors, 'warnings': warnings, 'infos': infos, 'archs': [arch]}
                            try:
                                l = hint_pages[pkg_name]
                                index = next(i for i, v in enumerate(l) if equal_dicts(v, page_data, ['archs']))
                                hint_pages[pkg_name][index]['archs'].append(arch)
                            except StopIteration:
                                hint_pages[pkg_name].append(page_data)

                                # add info to global issue count
                                error_count += len(errors)
                                warning_count += len(warnings)
                                info_count += len(infos)

                                # add info for global index
                                if not issue_summaries[maintainer].get(pkg_name):
                                    issue_summaries[maintainer][pkg_name] = {'error_count': len(errors), 'warning_count': len(warnings), 'info_count': len(infos)}


                    #
                    # Component metadata
                    #
                    cptgids = self._cache.get_cpt_gids_for_pkg(pkid)
                    if cptgids:
                        for cptgid in cptgids:
                            mdata = self._cache.get_metadata(cptgid)
                            if not mdata:
                                log.error("Package '%s' refers to missing component with gid '%s'" % (pkid, cptgid))
                                continue
                            mdata = yaml.safe_load(mdata)

                            pkg_name = mdata.get('Package')
                            if not pkg_name:
                                # we probably hit the header
                                continue
                            if not mdata_summaries.get(maintainer):
                                mdata_summaries[maintainer] = dict()


                            # ugly hack to have the screenshot entries linked
                            #if mdata.get('Screenshots'):
                            #    sshot_baseurl = os.path.join(self._dep11_url, component)
                            #    for i in range(len(mdata['Screenshots'])):
                            #        url = mdata['Screenshots'][i]['source-image']['url']
                            #        url = "<a href=\"%s\">%s</a>" % (os.path.join(sshot_baseurl, url), url)
                            #        mdata['Screenshots'][i]['source-image']['url'] = Markup(url)
                            #        thumbnails = mdata['Screenshots'][i]['thumbnails']
                            #        for j in range(len(thumbnails)):
                            #            url = thumbnails[j]['url']
                            #            url = "<a href=\"%s\">%s</a>" % (os.path.join(sshot_baseurl, url), url)
                            #            thumbnails[j]['url'] = Markup(url)
                            #        mdata['Screenshots'][i]['thumbnails'] = thumbnails


                            mdata_yml = dict_to_dep11_yaml(mdata)
                            mdata_yml = self._highlight_yaml(mdata_yml)
                            cid = mdata.get('ID')

                            # try to find an icon for this component (if it's a GUI app)
                            icon_url = None
                            if mdata['Type'] == 'desktop-app' or mdata['Type'] == "web-app":
                                icon_name = mdata['Icon'].get("cached")
                                if icon_name:
                                    icon_fname = os.path.join(component, cptgid, "icons", "64x64", icon_name)
                                    if os.path.isfile(os.path.join(media_dir, icon_fname)):
                                        icon_url = os.path.join(self._dep11_url, icon_fname)
                                    else:
                                        icon_url = noimage_url
                                else:
                                    icon_url = noimage_url
                            else:
                                icon_url = os.path.join(self._html_url, "static", "img", "cpt-nogui.png")

                            if not cpt_pages.get(pkg_name):
                                cpt_pages[pkg_name] = list()

                            page_data = {'cid': cid, 'mdata': mdata_yml, 'icon_url': icon_url, 'archs': [arch]}
                            try:
                                l = cpt_pages[pkg_name]
                                index = next(i for i, v in enumerate(l) if equal_dicts(v, page_data, ['archs']))
                                cpt_pages[pkg_name][index]['archs'].append(arch)
                            except StopIteration:
                                cpt_pages[pkg_name].append(page_data)

                                # increase valid metainfo count
                                metainfo_count += 1

                            # check if we had this package, and add to summary
                            pksum = mdata_summaries[maintainer].get(pkg_name)
                            if not pksum:
                                pksum = dict()

                            if pksum.get('cids'):
                                if not cid in pksum['cids']:
                                    pksum['cids'].append(cid)
                            else:
                                pksum['cids'] = [cid]

                            mdata_summaries[maintainer][pkg_name] = pksum


            #
            # Summary and HTML writing
            #

            log.info("Rendering HTML pages for suite '%s/%s'" % (suite_name, component))

            # remove old HTML pages
            shutil.rmtree(export_dir_section, ignore_errors=True)

            # now write the HTML pages with the previously collected & transformed issue data
            for pkg_name, entry_list in hint_pages.items():
                # render issues page
                self.render_template("issues_page.html", export_dir_issues, "%s.html" % (pkg_name),
                        package_name=pkg_name, entries=entry_list, suite=suite_name, section=component)

            # render page with all components found in a package
            for pkg_name, cptlist in cpt_pages.items():
                # render metainfo page
                self.render_template("metainfo_page.html", export_dir_metainfo, "%s.html" % (pkg_name),
                        package_name=pkg_name, cpts=cptlist, suite=suite_name, section=component)

            # Now render our issue index page
            self.render_template("issues_index.html", export_dir_issues, "index.html",
                        package_summaries=issue_summaries, suite=suite_name, section=component)

            # ... and the metainfo index page
            self.render_template("metainfo_index.html", export_dir_metainfo, "index.html",
                        package_summaries=mdata_summaries, suite=suite_name, section=component)


            validate_result = "Validation was not performed."
            d_fname = os.path.join(dep11_minfodir, suite_name, component, "Components-%s.yml.gz" % (arch))
            if os.path.isfile(d_fname):
                # do format validation
                validator = DEP11Validator()
                ret = validator.validate_file(d_fname)
                if ret:
                    validate_result = "No errors found."
                else:
                    validate_result = ""
                    for issue in validator.issue_list:
                        validate_result += issue.replace("FATAL", "<strong>FATAL</strong>")+"<br/>\n"

            # sum up counts for suite statistics
            suite_metainfo_count += metainfo_count
            suite_error_count += error_count
            suite_warning_count += warning_count
            suite_info_count += info_count

            # add current statistics to the statistics database
            stats.add_data(suite_name, component, metainfo_count, error_count, warning_count, info_count)

            # calculate statistics for this component
            count = metainfo_count + error_count + warning_count + info_count
            valid_perc = 100/count*metainfo_count if count > 0 else 0
            error_perc = 100/count*error_count if count > 0 else 0
            warning_perc = 100/count*warning_count if count > 0 else 0
            info_perc = 100/count*info_count if count > 0 else 0

            # Render our overview page
            self.render_template("section_overview.html", export_dir_section, "index.html",
                        suite=suite_name, section=component, valid_percentage=valid_perc,
                        error_percentage=error_perc, warning_percentage=warning_perc, info_percentage=info_perc,
                        metainfo_count=metainfo_count, error_count=error_count, warning_count=warning_count,
                        info_count=info_count, validate_result=validate_result)


        # calculate statistics for this suite
        count = suite_metainfo_count + suite_error_count + suite_warning_count + suite_info_count
        valid_perc = 100/count*suite_metainfo_count if count > 0 else 0
        error_perc = 100/count*suite_error_count if count > 0 else 0
        warning_perc = 100/count*suite_warning_count if count > 0 else 0
        info_perc = 100/count*suite_info_count if count > 0 else 0

        # Render archive components index/overview page
        self.render_template("sections_index.html", export_dir, "index.html",
                        sections=suite['components'], suite=suite_name, valid_percentage=valid_perc,
                        error_percentage=error_perc, warning_percentage=warning_perc, info_percentage=info_perc,
                        metainfo_count=suite_metainfo_count, error_count=suite_error_count, warning_count=suite_warning_count,
                        info_count=suite_info_count)

        # plot graphs
        stats.plot_graphs(os.path.join(export_dir, "stats"))

        # Copy the static files
        target_static_dir = os.path.join(self._export_dir, "html", "static")
        shutil.rmtree(target_static_dir, ignore_errors=True)
        shutil.copytree(os.path.join(self._template_dir, "static"), target_static_dir)