def iter_parse(self, path: str, factory: PackageFactory) -> Iterable[PackageMaker]: licenses: dict[str, str] = {} packages: dict[str, PackageData] = {} for entry in iter_xml_elements_at_level(path, 1, ['license', 'package', 'version']): if entry.tag == 'license': licenses[safe_getattr(entry, 'name')] = safe_findtext(entry, 'title') elif entry.tag == 'package': packages[safe_getattr(entry, 'name')] = PackageData( safe_findtext(entry, 'title'), safe_findalltexts(entry, 'license'), safe_findalltexts(entry, 'category'), safe_findalltexts(entry, 'url'), ) elif entry.tag == 'version': pkgname = safe_getattr(entry, 'package') version = safe_getattr(entry, 'name') with factory.begin(pkgname + ' ' + version) as pkg: # XXX: package naming is inconsistent (either plain name like kdenlive or # domain prefixed like com.abisource.abiword), but it's assumed that # everything up to the last dot may be stripped (#863) pkg.add_name(packages[pkgname].title, NameType.NPACKD_TITLE) pkg.add_name(pkgname, NameType.NPACKD_FULLNAME) pkg.add_name(pkgname.split('.')[-1], NameType.NPACKD_LASTNAME) pkg.set_version(version) pkg.add_downloads((e.text for e in entry.findall('url'))) # from previously parsed <license> and <package> entries pkg.add_licenses(licenses[license_] for license_ in packages[pkgname].licenses) pkg.add_categories(_filter_categories(packages[pkgname].categories)) pkg.add_homepages(packages[pkgname].urls) yield pkg
def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTransformer) -> Iterable[PackageMaker]: normalize_version = VersionStripper().strip_right_greedy('+') skipped_archs: Dict[str, int] = {} for entry in iter_xml_elements_at_level(path, 1, ['{http://linux.duke.edu/metadata/common}package']): with factory.begin() as pkg: arch = safe_findtext(entry, '{http://linux.duke.edu/metadata/common}arch') if self.allowed_archs and arch not in self.allowed_archs: skipped_archs[arch] = skipped_archs.get(arch, 0) + 1 continue name = safe_findtext(entry, '{http://linux.duke.edu/metadata/common}name') if '%{' in name: pkg.log('incorrect package name (unexpanded substitution)', severity=Logger.ERROR) continue pkg.add_name(name, NameType.GENERIC_PKGNAME) version_elt = entry.find('{http://linux.duke.edu/metadata/common}version') if version_elt is None: raise RuntimeError('Cannot find <version> element') epoch = version_elt.attrib['epoch'] version = version_elt.attrib['ver'] release = version_elt.attrib['rel'] match = re.match('0\\.[0-9]+\\.((?:alpha|beta|rc)[0-9]+)\\.', release) if match: # known pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Prerelease_versions version += '-' + match.group(1) elif release < '1': # unknown pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Some_definitions # most likely a snapshot pkg.set_flags(PackageFlags.IGNORE) pkg.set_version(version, normalize_version) pkg.set_rawversion(nevra_construct(None, epoch, version, release)) pkg.set_summary(entry.findtext('{http://linux.duke.edu/metadata/common}summary')) pkg.add_homepages(entry.findtext('{http://linux.duke.edu/metadata/common}url')) pkg.add_categories(entry.findtext('{http://linux.duke.edu/metadata/common}format/' '{http://linux.duke.edu/metadata/rpm}group')) pkg.add_licenses(entry.findtext('{http://linux.duke.edu/metadata/common}format/' '{http://linux.duke.edu/metadata/rpm}license')) pkg.set_arch(entry.findtext('{http://linux.duke.edu/metadata/common}arch')) packager = entry.findtext('{http://linux.duke.edu/metadata/common}packager') if packager: pkg.add_maintainers(extract_maintainers(packager)) yield pkg for arch, numpackages in sorted(skipped_archs.items()): factory.log('skipped {} packages(s) with disallowed architecture {}'.format(numpackages, arch))
def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTransformer) -> Iterable[PackageMaker]: for entry in iter_xml_elements_at_level(path, 1, ['Package']): with factory.begin() as pkg: pkg.set_name(entry.findtext('Name')) pkg.set_summary(entry.findtext('Summary')) pkg.add_licenses((_expand_multiline_licenses(elt.text) for elt in entry.findall('License') if elt.text)) pkg.add_categories((elt.text for elt in entry.findall('PartOf'))) for update in entry.findall('./History/Update'): pkg.set_version(update.findtext('Version')) break pkg.set_basename(entry.findtext('./Source/Name')) pkg.add_maintainers(entry.findtext('./Source/Packager/Email')) yield pkg
def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTransformer) -> Iterable[PackageMaker]: licenses: Dict[str, str] = {} packages: Dict[str, Tuple[str, List[str], List[str], List[str]]] = {} for entry in iter_xml_elements_at_level( path, 1, ['license', 'package', 'version']): if entry.tag == 'license': licenses[safe_getattr(entry, 'name')] = safe_findtext(entry, 'title') elif entry.tag == 'package': packages[safe_getattr(entry, 'name')] = ( safe_findtext(entry, 'title'), safe_findalltexts(entry, 'license'), safe_findalltexts(entry, 'category'), safe_findalltexts(entry, 'url'), ) elif entry.tag == 'version': pkgname = safe_getattr(entry, 'package') version = safe_getattr(entry, 'name') with factory.begin(pkgname + ' ' + version) as pkg: # XXX: package naming is inconsistent (either plain name like kdenlive or # domain prefixed like com.abisource.abiword), but it's assumed that # everything up to the last dot may be stripped (#863) pkg.set_name(pkgname) pkg.set_basename(pkgname.split('.')[-1]) pkg.set_version(version) pkg.add_downloads((e.text for e in entry.findall('url'))) # from previously parsed <license> and <package> entries pkg.set_summary(packages[pkgname][0]) pkg.add_licenses(licenses[l] for l in packages[pkgname][1]) pkg.add_categories(_filter_categories( packages[pkgname][2])) pkg.add_homepages(_filter_categories(packages[pkgname][3])) yield pkg
def iter_parse(self, path: str, factory: PackageFactory) -> Iterable[PackageMaker]: for entry in iter_xml_elements_at_level(path, 1, ['Package']): with factory.begin() as pkg: pkg.add_name(entry.findtext('Name'), NameType.SOLUS_NAME) pkg.set_summary(entry.findtext('Summary')) pkg.add_licenses((_expand_multiline_licenses(elt.text) for elt in entry.findall('License') if elt.text)) pkg.add_categories( (elt.text for elt in entry.findall('PartOf'))) for update in entry.findall('./History/Update'): pkg.set_version(update.findtext('Version')) break pkg.add_name(entry.findtext('./Source/Name'), NameType.SOLUS_SOURCE_NAME) pkg.add_maintainers(entry.findtext('./Source/Packager/Email')) pkg.add_homepages(entry.findtext('./Source/Homepage')) pkg.set_arch(entry.findtext('./Architecture')) yield pkg
def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTransformer) -> Iterable[PackageMaker]: normalize_version = VersionStripper().strip_right_greedy('+') skipped_archs: Dict[str, int] = Counter() if self._arch_from_filename: factory.log('mitigation for incorrect <arch></arch> enabled', severity=Logger.WARNING) for entry in iter_xml_elements_at_level( path, 1, ['{http://linux.duke.edu/metadata/common}package']): if self._arch_from_filename: # XXX: openmandriva 3 hack, to be removed when it EoLs location_elt = entry.find( '{http://linux.duke.edu/metadata/common}location') if location_elt is None: raise RuntimeError('Cannot find <location> element') arch = nevra_parse(safe_getattr(location_elt, 'href'))[4] else: arch = safe_findtext( entry, '{http://linux.duke.edu/metadata/common}arch') is_src = arch == 'src' if (is_src and not self._src) or (not is_src and not self._binary): skipped_archs[arch] += 1 continue with factory.begin() as pkg: name = safe_findtext( entry, '{http://linux.duke.edu/metadata/common}name') if '%{' in name: pkg.log('incorrect package name (unexpanded substitution)', severity=Logger.ERROR) continue if is_src: pkg.add_name(name, NameType.SRCRPM_NAME) else: pkg.add_name(name, NameType.BINRPM_NAME) sourcerpm = safe_findtext( entry, '{http://linux.duke.edu/metadata/common}format/' '{http://linux.duke.edu/metadata/rpm}sourcerpm') pkg.add_name( nevra_parse(sourcerpm)[0], NameType.BINRPM_SRCNAME) version_elt = entry.find( '{http://linux.duke.edu/metadata/common}version') if version_elt is None: raise RuntimeError('Cannot find <version> element') epoch = version_elt.attrib['epoch'] version = version_elt.attrib['ver'] release = version_elt.attrib['rel'] fixed_version = version match = re.match('0\\.[0-9]+\\.((?:alpha|beta|rc)[0-9]+)\\.', release) if match: # known pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Prerelease_versions fixed_version += '-' + match.group(1) elif release < '1': # unknown pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Some_definitions # most likely a snapshot pkg.set_flags(PackageFlags.IGNORE) pkg.set_version(fixed_version, normalize_version) pkg.set_rawversion( nevra_construct(None, epoch, version, release)) pkg.set_summary( entry.findtext( '{http://linux.duke.edu/metadata/common}summary')) pkg.add_homepages( entry.findtext( '{http://linux.duke.edu/metadata/common}url')) pkg.add_categories( entry.findtext( '{http://linux.duke.edu/metadata/common}format/' '{http://linux.duke.edu/metadata/rpm}group')) pkg.add_licenses( entry.findtext( '{http://linux.duke.edu/metadata/common}format/' '{http://linux.duke.edu/metadata/rpm}license')) pkg.set_arch( entry.findtext( '{http://linux.duke.edu/metadata/common}arch')) packager = entry.findtext( '{http://linux.duke.edu/metadata/common}packager') if packager: pkg.add_maintainers(extract_maintainers(packager)) yield pkg for arch, numpackages in sorted(skipped_archs.items()): factory.log( 'skipped {} packages(s) with disallowed architecture {}'. format(numpackages, arch))
def iter_parse(self, path: str, factory: PackageFactory) -> Iterable[PackageMaker]: # All encounered version_status values: # alpha, beta, developmental, historical, mature, planning, rolling, stable, testing, unknown, unstable _unstable_versions = { 'alpha', 'beta', 'developmental', 'planning', 'testing', 'unstable' } num_total = 0 num_nover = 0 num_noneng = 0 num_debian = 0 num_obsolete = 0 num_accepted = 0 num_devel = 0 for entry in iter_xml_elements_at_level( path, 1, ['{http://semantic-mediawiki.org/swivt/1.0#}Subject']): pages = _get_attrs( entry, '{http://semantic-mediawiki.org/swivt/1.0#}page', '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource') if not pages: continue page = _unescape(pages[0].split('/')[-1]) with factory.begin(page) as pkg: label = safe_findtext( entry, '{http://www.w3.org/2000/01/rdf-schema#}label') name = entry.findtext( '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Name' ) version = entry.findtext( '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Version_identifier' ) if name is None: continue num_total += 1 if version is None: num_nover += 1 continue if entry.findtext( '{http://semantic-mediawiki.org/swivt/1.0#}wikiPageContentLanguage' ) != 'en': num_noneng += 1 continue if entry.findtext( '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Import_source' ) == 'Debian': # 'Debian import' seems OK though num_debian += 1 continue if entry.findtext( '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Decommissioned_or_Obsolete' ) == 'Yes': num_obsolete += 1 continue if self._high_priority and entry.findtext( '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Is_High_Priority_Project' ) != 'true': continue version_status = entry.findtext( '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Version_status' ) if version_status in _unstable_versions: num_devel += 1 pkg.set_flags(PackageFlags.DEVEL) elif version_status == 'rolling': pkg.set_flags(PackageFlags.ROLLING) num_accepted += 1 pkg.add_name(page, NameType.GENERIC_GEN_NAME) pkg.set_version(version) pkg.set_summary( entry.findtext( '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Short_description' )) pkg.add_homepages( _get_attrs( entry, '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Homepage_URL', '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource' )) pkg.add_downloads( _get_attrs( entry, '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Version_download', '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource' )) pkg.set_extra_field('page', page) pkg.set_extra_field('name', name) pkg.set_extra_field('label', label) yield pkg factory.log( 'Total software entries (with Name and Version): {}'.format( num_total)) factory.log( 'Dropped entries with no version defined: {}'.format(num_nover)) factory.log('Dropped non-english pages: {}'.format(num_noneng)) factory.log( 'Dropped entries marked as Import_source=Debian: {}'.format( num_debian)) factory.log( 'Dropped entries marked as Decommissioned_or_Obsolete: {}'.format( num_obsolete)) factory.log('Accepted entries: {} ({} unstable)'.format( num_accepted, num_devel))