def run(self): # setup the output XML master_root = ET.Element("applications") master_root.set("version", "0.1") master_tree = ET.ElementTree(master_root) # find any extra appstream files files = glob.glob("../appstream-extra/*.xml") for f in files: tree = ET.parse(f) root = tree.getroot() self.log.update_key(os.path.basename(f)) for app in root: app_id = app.find('id') if app_id is None: self.log.write(LoggerItem.WARNING, "appstream id not found") continue # add everything new = ET.SubElement(master_root, 'application') for elem in app: new.append(elem) # check for screenshots in ../screenshots-extra/${id}/* tmp = Application(None, self.cfg) tmp.set_id(app_id.text) self.log.write(LoggerItem.INFO, "adding %s" % tmp.app_id_full) overrides = glob.glob("../screenshots-extra/%s/*.png" % tmp.app_id) if len(overrides) > 0: self.log.write(LoggerItem.INFO, "adding %i screenshot overrides" % len(overrides)) for ss_fn in overrides: tmp.add_screenshot_filename(ss_fn) tmp.build_xml_screenshots(new) # add the generated appstream files files = glob.glob("./appstream/*.xml") files.sort() recognised_types = ['desktop', 'codec', 'font', 'inputmethod'] for filename in files: self.log.update_key(filename) try: tree = ET.parse(filename) except ET.ParseError, e: self.log.write(LoggerItem.WARNING, "XML could not be parsed: %s" % str(e)) continue root = tree.getroot() for app in root: app_id = app.find('id') # check type is known app_id_type = app_id.get('type') if app_id_type not in recognised_types: self.log.write(LoggerItem.WARNING, "appstream id type %s not recognised" % app_id_type) continue # detect duplicate IDs in the data if self.application_ids.has_key(app_id): found = self.application_ids[app_id.text] self.log.write(LoggerItem.WARNING, "duplicate ID found in %s and %s" % (filename, found)) continue # add everything that isn't private new = ET.SubElement(master_root, 'application') for elem in app: if elem.tag.startswith("X-"): continue new.append(elem) # success self.application_ids[app_id.text] = filename self.log.write(LoggerItem.INFO, "adding %s" % app_id.text)
def main(): log = LoggerItem() cfg = Config() # read in AppStream file into several Application objects f = gzip.open(sys.argv[1], 'rb') tree = ET.parse(f) apps = [] for app in tree.getroot(): a = Application(None, cfg) for elem in app: if elem.tag == 'id': a.set_id(elem.text) a.type_id = elem.get('type') log.update_key(a.app_id_full) log.write(LoggerItem.INFO, "parsing") elif elem.tag == 'name': if elem.get(XML_LANG): continue a.names['C'] = ensure_unicode(elem.text) elif elem.tag == 'summary': if elem.get(XML_LANG): continue a.comments['C'] = ensure_unicode(elem.text) elif elem.tag == 'pkgname': a.pkgnames.append(ensure_unicode(elem.text)) elif elem.tag == 'appcategories': for elem2 in elem: a.categories.append(ensure_unicode(elem2.text)) elif elem.tag == 'keywords': for elem2 in elem: a.keywords.append(ensure_unicode(elem2.text)) elif elem.tag == 'url': a.urls[elem.get('type')] = ensure_unicode(elem.text) elif elem.tag == 'compulsory_for_desktop': a.compulsory_for_desktop.append(ensure_unicode(elem.text)) elif elem.tag == 'project_group': a.project_group = ensure_unicode(elem.text) elif elem.tag == 'description': description = '' if len(elem._children): for elem2 in elem: description += elem2.text + u' ' else: description = elem.text a.descriptions['C'] = ensure_unicode(description) elif elem.tag == 'screenshots': if a.type_id == 'font': continue for elem2 in elem: if elem2.tag != 'screenshot': continue caption = None for elem3 in elem2: if elem3.tag == 'caption': caption = elem3.text elif elem3.tag == 'image': if elem3.get('type') != 'source': continue s = Screenshot(a.app_id, None, caption) s.basename = os.path.basename(elem3.text) a.screenshots.append(s) apps.append(a) f.close() # build status page status = open('./screenshots/status.html', 'w') status.write('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 ' + 'Transitional//EN" ' + '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n') status.write('<html xmlns="http://www.w3.org/1999/xhtml">\n') status.write('<head>\n') status.write('<meta http-equiv="Content-Type" content="text/html; ' + 'charset=UTF-8" />\n') status.write('<title>Application Data Review</title>\n') status.write('</head>\n') status.write('<body>\n') status.write('<h1>Executive summary</h1>\n') status.write('<ul>\n') # long descriptions cnt = 0 total = len(apps) for app in apps: if len(app.descriptions) > 0: cnt += 1 tmp = 100 * cnt / total status.write("<li>Applications in Fedora with long descriptions: %i (%i%%)</li>" % (cnt, tmp)) # keywords cnt = 0 total = len(apps) for app in apps: if len(app.keywords) > 0: cnt += 1 tmp = 100 * cnt / total status.write("<li>Applications in Fedora with keywords: %i (%i%%)</li>" % (cnt, tmp)) # categories cnt = 0 total = len(apps) for app in apps: if len(app.categories) > 0: cnt += 1 tmp = 100 * cnt / total status.write("<li>Applications in Fedora with categories: %i (%i%%)</li>" % (cnt, tmp)) # screenshots cnt = 0 total = len(apps) for app in apps: if len(app.screenshots) > 0: cnt += 1 tmp = 100 * cnt / total status.write("<li>Applications in Fedora with screenshots: %i (%i%%)</li>" % (cnt, tmp)) # project apps with appdata for project_group in ['GNOME', 'KDE', 'XFCE']: cnt = 0 total = 0 for app in apps: if app.project_group != project_group: continue total += 1 if len(app.screenshots) > 0 or len(app.descriptions) > 0: cnt += 1 tmp = 0 if total > 0: tmp = 100 * cnt / total status.write("<li>Applications in %s with AppData: %i (%i%%)</li>" % (project_group, cnt, tmp)) status.write('</ul>\n') # write applications status.write('<h1>Applications</h1>\n') for app in apps: if app.type_id == 'font': continue if app.type_id == 'inputmethod': continue if app.type_id == 'codec': continue log.update_key(app.app_id_full) log.write(LoggerItem.INFO, "writing") try: status.write(_to_utf8(_to_html(app))) except AttributeError, e: log.write(LoggerItem.WARNING, "failed to write %s: %s" % (app, str(e))) continue