Ejemplo n.º 1
0
    def __init__(self, suite_name, component, icon_sizes, dcache, icon_finder=None):
        '''
        Initialize the object with List of files.
        '''
        self._suite_name = suite_name
        self._archive_component = component
        self._export_dir = dcache.media_dir
        self._dcache = dcache
        self.write_to_cache = True

        self._icon_ext_allowed = ('.png', '.svg', '.xcf', '.gif', '.svgz', '.jpg')

        if icon_finder:
            self._icon_finder = icon_finder
            self._icon_finder.set_allowed_icon_extensions(self._icon_ext_allowed)
        else:
            self._icon_finder = AbstractIconFinder(self._suite_name, self._archive_component)

        # list of large sizes to scale down, in order to find more icons
        self._large_icon_sizes = xdg_icon_sizes[:]
        # list of icon sizes we want
        self._icon_sizes = list()
        for strsize in icon_sizes:
            self._icon_sizes.append(IconSize(strsize))

        # remove smaller icons - we don't want to scale up icons later
        while (len(self._large_icon_sizes) > 0) and (int(self._icon_sizes[0]) >= int(self._large_icon_sizes[0])):
            del self._large_icon_sizes[0]
Ejemplo n.º 2
0
class MetadataExtractor:
    '''
    Takes a deb file and extracts component metadata from it.
    '''

    def __init__(self, suite_name, component, icon_sizes, dcache, icon_finder=None):
        '''
        Initialize the object with List of files.
        '''
        self._suite_name = suite_name
        self._archive_component = component
        self._export_dir = dcache.media_dir
        self._dcache = dcache
        self.write_to_cache = True

        self._icon_ext_allowed = ('.png', '.svg', '.xcf', '.gif', '.svgz', '.jpg')

        if icon_finder:
            self._icon_finder = icon_finder
            self._icon_finder.set_allowed_icon_extensions(self._icon_ext_allowed)
        else:
            self._icon_finder = AbstractIconFinder(self._suite_name, self._archive_component)

        # list of large sizes to scale down, in order to find more icons
        self._large_icon_sizes = xdg_icon_sizes[:]
        # list of icon sizes we want
        self._icon_sizes = list()
        for strsize in icon_sizes:
            self._icon_sizes.append(IconSize(strsize))

        # remove smaller icons - we don't want to scale up icons later
        while (len(self._large_icon_sizes) > 0) and (int(self._icon_sizes[0]) >= int(self._large_icon_sizes[0])):
            del self._large_icon_sizes[0]

    @property
    def icon_finder(self):
        return self._icon_finder

    @icon_finder.setter
    def icon_finder(self, val):
        self._icon_finder = val

    def reopen_cache(self):
        self._dcache.reopen()

    def get_path_for_cpt(self, cpt, basepath, subdir):
        gid = cpt.global_id
        if not gid:
            return None
        if len(cpt.cid) < 1:
            return None
        path = os.path.join(basepath, gid, subdir)
        return path

    def _get_deb_filelist(self, deb):
        '''
        Returns a list of all files in a deb package
        '''
        files = list()
        if not deb:
            return files
        try:
            deb.data.go(lambda item, data: files.append(item.name))
        except SystemError as e:
            raise e

        return files

    def _scale_screenshot(self, imgsrc, cpt_export_path, cpt_scr_url):
        '''
        scale images in three sets of two-dimensions
        (752x423 624x351 and 112x63)
        '''
        thumbnails = list()
        name = os.path.basename(imgsrc)
        sizes = ['1248x702', '752x423', '624x351', '112x63']
        for size in sizes:
            wd, ht = size.split('x')
            img = Image.open(imgsrc)
            newimg = img.resize((int(wd), int(ht)), Image.ANTIALIAS)
            newpath = os.path.join(cpt_export_path, size)
            if not os.path.exists(newpath):
                os.makedirs(newpath)
            newimg.save(os.path.join(newpath, name))
            url = "%s/%s/%s" % (cpt_scr_url, size, name)
            thumbnails.append({'url': url, 'height': int(ht),
                               'width': int(wd)})

        return thumbnails

    def _fetch_screenshots(self, cpt, cpt_export_path, cpt_public_url=""):
        '''
        Fetches screenshots from the given url and
        stores it in png format.
        '''

        if not cpt.screenshots:
            # don't ignore metadata if no screenshots are present
            return True

        success = True
        shots = list()
        cnt = 1
        for shot in cpt.screenshots:
            # cache some locations which we need later
            origin_url = shot['source-image']['url']
            if not origin_url:
                # url empty? skip this screenshot
                continue
            path     = self.get_path_for_cpt(cpt, cpt_export_path, "screenshots")
            base_url = self.get_path_for_cpt(cpt, cpt_public_url,  "screenshots")
            imgsrc   = os.path.join(path, "source", "scr-%s.png" % (str(cnt)))

            # The Debian services use a custom setup for SSL verification, not trusting global CAs and
            # only Debian itself. If we are running on such a setup, ensure we load the global CA certs
            # in order to establish HTTPS connections to foreign services.
            # For more information, see https://wiki.debian.org/ServicesSSL
            context = None
            ca_path = '/etc/ssl/ca-global'
            if os.path.isdir(ca_path):
                ssl_context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH, capath=ca_path)
            else:
                ssl_context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)

            try:
                # FIXME: The context parameter is only supported since Python 3.4.3, which is not
                # yet widely available, so we can't use it here...
                #! image = urllib.request.urlopen(origin_url, context=ssl_context).read()
                image_req = urllib.request.urlopen(origin_url, timeout=30)
                if image_req.getcode() != 200:
                    msg = "HTTP status code was %i." % (image_req.getcode())
                    cpt.add_hint("screenshot-download-error", {'url': origin_url, 'cpt_id': cpt.cid, 'error': msg})
                    success = False
                    continue

                if not os.path.exists(os.path.dirname(imgsrc)):
                    os.makedirs(os.path.dirname(imgsrc))
                f = open(imgsrc, 'wb')
                f.write(image_req.read())
                f.close()
            except Exception as e:
                cpt.add_hint("screenshot-download-error", {'url': origin_url, 'cpt_id': cpt.cid, 'error': str(e)})
                success = False
                continue

            try:
                img = Image.open(imgsrc)
                wd, ht = img.size
                shot['source-image']['width'] = wd
                shot['source-image']['height'] = ht
                shot['source-image']['url'] = os.path.join(base_url, "source", "scr-%s.png" % (str(cnt)))
                img.close()
            except Exception as e:
                error_msg = str(e)
                # filter out the absolute path: we shouldn't add it
                if error_msg:
                    error_msg = error_msg.replace(os.path.dirname(imgsrc), "")
                cpt.add_hint("screenshot-read-error", {'url': origin_url, 'cpt_id': cpt.cid, 'error': error_msg})
                success = False
                continue

            # scale_screenshots will return a list of
            # dicts with {height,width,url}
            shot['thumbnails'] = self._scale_screenshot(imgsrc, path, base_url)
            shots.append(shot)
            cnt = cnt + 1

        cpt.screenshots = shots
        return success

    def _icon_allowed(self, icon):
        if icon.endswith(self._icon_ext_allowed):
            return True
        return False

    def _render_svg_to_png(self, data, store_path, width, height):
        '''
        Uses cairosvg to render svg data to png data.
        '''

        img =  cairo.ImageSurface(cairo.FORMAT_ARGB32, width, height)
        ctx = cairo.Context(img)

        handle = Rsvg.Handle()
        svg = handle.new_from_data(data)

        wscale = float(width)/float(svg.props.width)
        hscale = float(height)/float(svg.props.height)
        ctx.scale(wscale, hscale);

        svg.render_cairo(ctx)

        img.write_to_png(store_path)

    def _store_icon(self, deb_fname, cpt, cpt_export_path, icon_path, size):
        '''
        Extracts the icon from the deb package and stores it in the cache.
        Ensures the stored icon always has the size given in "size", and renders
        vectorgraphics if necessary.
        '''
        svgicon = False
        if not self._icon_allowed(icon_path):
            cpt.add_hint("icon-format-unsupported", {'icon_fname': os.path.basename(icon_path)})
            return False

        if not os.path.exists(deb_fname):
            return False

        path = self.get_path_for_cpt(cpt, cpt_export_path, "icons/%s" % (str(size)))
        icon_name = "%s_%s" % (cpt.pkgname, os.path.basename(icon_path))
        icon_name_orig = icon_name

        icon_name = icon_name.replace(".svgz", ".png")
        icon_name = icon_name.replace(".svg", ".png")
        icon_store_location = "{0}/{1}".format(path, icon_name)

        if os.path.exists(icon_store_location):
            # we already extracted that icon, skip the extraction step
            # change scalable vector graphics to their .png extension
            cpt.icon = icon_name
            return True

        # filepath is checked because icon can reside in another binary
        # eg amarok's icon is in amarok-data
        icon_data = None
        try:
            icon_data = DebFile(deb_fname).data.extractdata(icon_path)
        except Exception as e:
            cpt.add_hint("deb-extract-error", {'fname': icon_name, 'pkg_fname': deb_fname, 'error': str(e)})
            return False

        if not icon_data:
            cpt.add_hint("deb-extract-error", {'fname': icon_name, 'pkg_fname': deb_fname,
                                               'error': "Icon data was empty. The icon might be a symbolic link, please do not symlink icons "
                                                         "(instead place the icons in their appropriate directories in <code>/usr/share/icons/hicolor/</code>)."})
            return False
        cpt.icon = icon_name

        if icon_name_orig.endswith(".svg"):
            svgicon = True
        elif icon_name_orig.endswith(".svgz"):
            svgicon = True
            try:
                icon_data = zlib.decompress(bytes(icon_data), 15+32)
            except Exception as e:
                cpt.add_hint("svgz-decompress-error", {'icon_fname': icon_name, 'error': str(e)})
                return False

        if not os.path.exists(path):
            os.makedirs(path)

        if svgicon:
            # render the SVG to a bitmap
            self._render_svg_to_png(icon_data, icon_store_location, int(size), int(size))
            return True
        else:
            # we don't trust upstream to have the right icon size present, and therefore
            # always adjust the icon to the right size
            stream = BytesIO(icon_data)
            stream.seek(0)
            img = None
            try:
                img = Image.open(stream)
            except Exception as e:
                cpt.add_hint("icon-open-failed", {'icon_fname': icon_name, 'error': str(e)})
                return False
            newimg = img.resize((int(size), int(size)), Image.ANTIALIAS)
            newimg.save(icon_store_location)
            return True

        return False


    def _match_icon_on_filelist(self, cpt, filelist, icon_name, size):
        if size == "scalable":
            size_str = "scalable"
        else:
            size_str = str(size)
        icon_path = "usr/share/icons/hicolor/%s/apps/%s" % (size_str, icon_name)
        filtered = fnmatch.filter(filelist, icon_path)
        if not filtered:
            return None

        return filtered[0]


    def _match_and_store_icon(self, pkg_fname, cpt, cpt_export_path, filelist, icon_name, size):
        success = False
        matched_icon = self._match_icon_on_filelist(cpt, filelist, icon_name, size)
        if not matched_icon:
            return False

        if not size in self._icon_sizes:
            # scale icons to allowed sizes
            for asize in self._icon_sizes:
                success = self._store_icon(pkg_fname, cpt, cpt_export_path, matched_icon, asize) or success
        else:
            success = self._store_icon(pkg_fname, cpt, cpt_export_path, matched_icon, size)
        return success


    def _fetch_icon(self, cpt, cpt_export_path, pkg_fname, filelist):
        '''
        Searches for icon if absolute path to an icon
        is not given. Component with invalid icons are ignored
        '''
        if not cpt.icon:
            # if we don't know an icon-name or path, just return without error
            return True

        icon_str = cpt.icon
        cpt.icon = None

        all_icon_sizes = self._icon_sizes[:]
        all_icon_sizes.extend(self._large_icon_sizes)

        success = False
        if icon_str.startswith("/"):
            if icon_str[1:] in filelist:
                return self._store_icon(pkg_fname, cpt, cpt_export_path, icon_str[1:], IconSize(64))
        else:
            ret = False
            icon_str = os.path.basename (icon_str)
            # check if there is some kind of file-extension.
            # if there is none, the referenced icon is likely a stock icon, and we assume .png
            if "." in icon_str:
                icon_name_ext = icon_str
            else:
                icon_name_ext = icon_str + ".png"

            found_sizes = list()
            for size in self._icon_sizes:
                ret = self._match_and_store_icon(pkg_fname, cpt, cpt_export_path, filelist, icon_name_ext, size)
                if ret:
                    found_sizes.append(size)
                success = ret or success

            # try if we can add missing icon sizes by scaling down things
            # this also ensures that we also have an 64x64 sized icon
            if set(found_sizes) != set(self._icon_sizes):
                for size in self._icon_sizes:
                    if size in found_sizes:
                        continue
                    for asize in all_icon_sizes:
                        if asize < size:
                            continue
                        icon_fname = self._match_icon_on_filelist(cpt, filelist, icon_name_ext, asize)
                        if not icon_fname:
                            continue
                        ret = self._store_icon(pkg_fname, cpt, cpt_export_path, icon_fname, size)
                        if ret:
                            found_sizes.append(size)
                        success = ret or success
                        break

            # a 64x64 icon is required, so double-check if we have one
            if success and not IconSize(64) in found_sizes:
                success = False

            if not success:
                # we cheat and test for larger icons as well, which can be scaled down
                # first check for a scalable graphic
                success = self._match_and_store_icon(pkg_fname, cpt, cpt_export_path, filelist, icon_str + ".svg", "scalable")
                if not success:
                    success = self._match_and_store_icon(pkg_fname, cpt, cpt_export_path, filelist, icon_str + ".svgz", "scalable")
                # then try to scale down larger graphics
                if not success:
                    for size in self._large_icon_sizes:
                        success = self._match_and_store_icon(pkg_fname, cpt, cpt_export_path, filelist, icon_name_ext, size) or success

        if not success:
            last_pixmap = None
            # handle stuff in the pixmaps directory
            for path in filelist:
                if path.startswith("usr/share/pixmaps"):
                    file_basename = os.path.basename(path)
                    if ((file_basename == icon_str) or (os.path.splitext(file_basename)[0] == icon_str)):
                        # the pixmap dir can contain icons in multiple formats, and store_icon() fails in case
                        # the icon format is not allowed. We therefore only exit here, if the icon has a valid format
                        if self._icon_allowed(path):
                            return self._store_icon(pkg_fname, cpt, cpt_export_path, path, IconSize(64))
                        last_pixmap = path
            if last_pixmap:
                # we don't do a global icon search anymore, since we've found an (unsuitable) icon
                # already
                cpt.add_hint("icon-format-unsupported", {'icon_fname': os.path.basename(last_pixmap)})
                return False

            icon_dict = self._icon_finder.find_icons(cpt.pkgname, icon_str, all_icon_sizes)
            success = False
            if icon_dict:
                for size in self._icon_sizes:
                    if not size in icon_dict:
                        continue

                    success = self._store_icon(icon_dict[size]['deb_fname'],
                                        cpt,
                                        cpt_export_path,
                                        icon_dict[size]['icon_fname'],
                                        size) or success
                if not success:
                    for size in self._large_icon_sizes:
                        if not size in icon_dict:
                            continue
                        for asize in self._icon_sizes:
                            success = self._store_icon(icon_dict[size]['deb_fname'],
                                        cpt,
                                        cpt_export_path,
                                        icon_dict[size]['icon_fname'],
                                        asize) or success
                return success

            if ("." in icon_str) and (not self._icon_allowed(icon_str)):
                cpt.add_hint("icon-format-unsupported", {'icon_fname': icon_str})
            else:
                cpt.add_hint("icon-not-found", {'icon_fname': icon_str})
            return False

        return success


    def process(self, pkgname, pkg_fname, pkgid=None, metainfo_files=None):
        '''
        Reads the metadata from the xml file and the desktop files.
        And returns a list of DEP11Component objects.
        '''
        deb = None
        try:
            deb = DebFile(pkg_fname)
        except Exception as e:
            log.error("Error reading deb file '%s': %s" % (pkg_fname, e))
        if not deb:
            return list()

        try:
            filelist = self._get_deb_filelist(deb)
        except:
            log.error("List of files for '%s' could not be read" % (pkg_fname))
            filelist = None

        if not filelist:
            cpt = DEP11Component(self._suite_name, self._archive_component, pkgname, pkgid)
            cpt.add_hint("deb-filelist-error", {'pkg_fname': os.path.basename(pkg_fname)})
            return [cpt]

        if not pkgid:
            # we didn't get an identifier, so start guessing one.
            idname, ext = os.path.splitext(os.path.basename(pkg_fname))
            if not idname:
                idname = os.path.basename(pkg_fname)
            pkgid = idname

        export_path = "%s/%s" % (self._export_dir, self._archive_component)
        component_dict = dict()

        # if we don't have an explicit list of interesting files, we simply scan all
        if not metainfo_files:
            metainfo_files = filelist

        # first cache all additional metadata (.desktop/.pc/etc.) files
        mdata_raw = dict()
        for meta_file in metainfo_files:
            if meta_file.endswith(".desktop") and meta_file.startswith("usr/share/applications"):
                # We have a .desktop file
                dcontent = None
                cpt_id = os.path.basename(meta_file)

                error = None
                try:
                    dcontent = str(deb.data.extractdata(meta_file), 'utf-8')
                except Exception as e:
                    error = {'tag': "deb-extract-error",
                                'params': {'fname': cpt_id, 'pkg_fname': os.path.basename(pkg_fname), 'error': str(e)}}
                if not dcontent and not error:
                    error = {'tag': "deb-empty-file",
                                'params': {'fname': cpt_id, 'pkg_fname': os.path.basename(pkg_fname)}}
                mdata_raw[cpt_id] = {'error': error, 'data': dcontent}

        # process all AppStream XML files
        for meta_file in metainfo_files:
            if meta_file.endswith(".xml") and meta_file.startswith("usr/share/appdata"):
                xml_content = None
                cpt = DEP11Component(self._suite_name, self._archive_component, pkgname, pkgid)

                try:
                    xml_content = str(deb.data.extractdata(meta_file), 'utf-8')
                except Exception as e:
                    # inability to read an AppStream XML file is a valid reason to skip the whole package
                    cpt.add_hint("deb-extract-error", {'fname': meta_file, 'pkg_fname': os.path.basename(pkg_fname), 'error': str(e)})
                    return [cpt]
                if not xml_content:
                    continue

                read_appstream_upstream_xml(cpt, xml_content)
                component_dict[cpt.cid] = cpt

                # Reads the desktop files associated with the xml file
                if not cpt.cid:
                    # if there is no ID at all, we dump this component, since we cannot do anything with it at all
                    cpt.add_hint("metainfo-no-id")
                    continue

                cpt.set_srcdata_checksum_from_data(xml_content)
                if cpt.kind == "desktop-app":
                    data = mdata_raw.get(cpt.cid)
                    if not data:
                        cpt.add_hint("missing-desktop-file")
                        continue
                    if data['error']:
                        # add a non-fatal hint that we couldn't process the .desktop file
                        cpt.add_hint(data['error']['tag'], data['error']['params'])
                    else:
                        # we have a .desktop component, extend it with the associated .desktop data
                        read_desktop_data(cpt, data['data'])
                        cpt.set_srcdata_checksum_from_data(xml_content+data['data'])
                    del mdata_raw[cpt.cid]

        # now process the remaining metadata files, which have not been processed together with the XML
        for mid, mdata in mdata_raw.items():
            if mid.endswith(".desktop"):
                # We have a .desktop file
                cpt = DEP11Component(self._suite_name, self._archive_component, pkgname, pkgid)
                cpt.cid = mid

                if mdata['error']:
                    # add a fatal hint that we couldn't process this file
                    cpt.add_hint(mdata['error']['tag'], mdata['error']['params'])
                    component_dict[cpt.cid] = cpt
                else:
                    ret = read_desktop_data(cpt, mdata['data'])
                    if ret or not cpt.has_ignore_reason():
                        component_dict[cpt.cid] = cpt
                        cpt.set_srcdata_checksum_from_data(mdata['data'])
                    else:
                        # this means that reading the .desktop file failed and we should
                        # silently ignore this issue (since the file was marked to be invisible on purpose)
                        pass

        # fetch media (icons/screenshots), if we don't ignore the component already
        cpts = component_dict.values()
        for cpt in cpts:
            if cpt.has_ignore_reason():
                continue
            if not cpt.global_id:
                log.error("Component '%s' from package '%s' has no source-data checksum / global-id." % (cpt.cid, pkg_fname))
                continue

            # check if we have a component generated from
            # this source data in the cache already.
            # To account for packages which change their package name, we
            # also need to check if the package this component is associated
            # with matches ours.
            existing_mdata = self._dcache.get_metadata(cpt.global_id)
            if existing_mdata:
                s = "Package: %s\n" % (pkgname)
                if s in existing_mdata:
                    continue
                else:
                    # the exact same metadata exists in a different package already, raise ab error.
                    # ATTENTION: This does not cover the case where *different* metadata (as in, different summary etc.)
                    # but with the *same ID* exists. This kind of issue can only be catched when listing all IDs per
                    # suite/acomponent combination and checking for dupes (we do that in the DEP-11 validator and display
                    # the result prominently on the HTML pages)
                    ecpt = yaml.safe_load(existing_mdata)
                    cpt.add_hint("metainfo-duplicate-id", {'cid': cpt.cid, 'pkgname': ecpt.get('Package', '')})
                    continue

            self._fetch_icon(cpt, export_path, pkg_fname, filelist)
            if cpt.kind == 'desktop-app' and not cpt.icon:
                cpt.add_hint("gui-app-without-icon", {'cid': cpt.cid})
            else:
                self._fetch_screenshots(cpt, export_path)

        # write data to cache
        if self.write_to_cache:
            # write the components we found to the cache
            self._dcache.set_components(pkgid, cpts)

        return cpts