def Parse(self, path): result = [] for root, dirs, files in os.walk(path): for filename in files: jsonpath = os.path.join(root, filename) if not jsonpath.endswith('.json'): continue jsondata = None with open(jsonpath, 'r', encoding='utf-8') as jsonfile: jsondata = json.load(jsonfile, strict=False) pkg = Package() pkg.name = filename[:-5] pkg.version = jsondata['version'] if 'url' in jsondata: pkg.downloads = jsondata['url'] if isinstance(jsondata['url'], list) else [jsondata['url']] if 'homepage' in jsondata: pkg.homepage = jsondata['homepage'] if 'license' in jsondata: pkg.licenses = [jsondata['license']] pkg.extrafields = {'path': os.path.relpath(jsonpath, path)} result.append(pkg) return result
def Parse(self, path): result = [] jsondata = None with open(path, 'r', encoding='utf-8') as jsonfile: jsondata = json.load(jsonfile) if not jsondata['success']: raise RuntimeError('non-success json reply, cannot parse') for packagedata in jsondata['data']['apps']: pkg = Package() pkg.name = packagedata['title'] pkg.version = packagedata['version'] pkg.licenses = [packagedata['license']] if 'tagline' in packagedata: pkg.comment = packagedata['tagline'] if 'support' in packagedata: pkg.homepage = packagedata['support'] result.append(pkg) return result
def Parse(self, path): packages = [] with open(os.path.join(path, 'APKINDEX'), 'r', encoding='utf-8') as apkindex: state = {} for line in apkindex: line = line.strip() if line: state[line[0]] = line[2:].strip() continue if not state: continue if state['P'] != state['o']: continue pkg = Package() pkg.name = state['P'] pkg.version, pkg.origversion = SanitizeVersion(state['V']) pkg.comment = state['T'] pkg.homepage = state['U'] # XXX: switch to homepages, split pkg.licenses = [state['L']] if 'm' in state: pkg.maintainers = GetMaintainers(state['m']) state = {} packages.append(pkg) return packages
def Parse(self, path): result = [] root = xml.etree.ElementTree.parse(path) for info in root.findall('./info'): pkg = Package() fn = info.attrib['fn'] url = info.attrib['url'] license_ = info.attrib['license'] pos2 = fn.rfind('-') if pos2 == -1: print('WARNING: unable to parse fn: {}'.format(fn), file=sys.stderr) continue pos1 = fn.rfind('-', 0, pos2) if pos1 == -1: print('WARNING: unable to parse fn: {}'.format(fn), file=sys.stderr) continue pkg.name = fn[:pos1] pkg.origversion = fn[pos1 + 1:] pkg.version = fn[pos1 + 1:pos2] pkg.homepage = url pkg.licenses = [license_] result.append(pkg) return result
def Parse(self, path): result = [] for filename in os.listdir(path): if not filename.endswith('.html'): continue root = None with open(os.path.join(path, filename), encoding='utf-8') as htmlfile: root = lxml.html.document_fromstring(htmlfile.read()) for row in root.xpath('.//div[@class="package-preview"]'): pkg = Package() # header cell = row.xpath('./h3[@class="package-name"]')[0] pkg.name, version = cell.text.split(' ', 1) pkg.version, pkg.origversion = SanitizeVersion(version.strip()) pkg.comment = cell.xpath('./span[@class="package-synopsis"]')[0].text.strip().strip('—').strip() or None # details for cell in row.xpath('./ul[@class="package-info"]/li'): key = cell.xpath('./b')[0].text if key == 'License:': pkg.licenses = [a.text for a in cell.xpath('./a')] elif key == 'Website:': pkg.homepage = cell.xpath('./a')[0].attrib['href'] elif key == 'Package source:': pkg.extrafields['source'] = cell.xpath('./a')[0].text result.append(pkg) return result
def Parse(self, path): result = [] for filename in walk_tree(path, suffix='.json'): data = json.load(open(filename, encoding='utf-8', errors='ignore')) if 'versions' not in data: continue for version, versiondata in data['versions'].items(): pkg = Package() pkg.name = data['name'] if data['license']: pkg.licenses = [data['license']] pkg.homepage = data['url'] pkg.version = version pkg.extrafields['recipe'] = os.path.relpath(filename, path) # garbage: links to git:// or specific commits #if isinstance(versiondata['source'], str): # pkg.downloads = [versiondata['source']] #else: # pkg.downloads = [versiondata['source']['url']] result.append(pkg) return result
def Parse(self, path): result = [] jsondata = None with open(path, 'r', encoding='utf-8') as jsonfile: jsondata = json.load(jsonfile) for packagedata in SimplifyResult(jsondata): entity = packagedata['project'].rsplit( '/', 1)[-1] # this is URL, take only the ID from it # use Arch and AUR package names as a name, as they are most non-ambigous names = [] for field in ['arch_packages', 'aur_packages']: if packagedata[field]: names = packagedata[field].split(', ') break # generate a package for each package name; these will be merged anyway for name in set(names): # generate a package for each version for version in packagedata['versions'].split(', '): version, *flags = version.split('|') is_devel = 'U' in flags is_foreign_os_release = 'O' in flags and 'L' not in flags if is_foreign_os_release: print( 'WARNING: {} ({}) version {} skipped as non-linux release' .format(packagedata['projectLabel'], entity, version), file=sys.stderr) continue pkg = Package() pkg.SetFlag(PackageFlags.devel, is_devel) pkg.name = entity pkg.effname = name pkg.version = version if 'projectDescription' in packagedata: pkg.comment = packagedata['projectDescription'] else: pkg.comment = packagedata['projectLabel'] if packagedata['licenses']: pkg.licenses = packagedata['licenses'].split(', ') if packagedata['websites']: pkg.homepage = packagedata['websites'].split(', ')[ 0] # XXX: use all websites when supported result.append(pkg) return result
def Parse(self, path): result = [] with subprocess.Popen([repology.config.TCLSH, self.helperpath, path], errors='ignore', stdout=subprocess.PIPE, universal_newlines=True) as macportsjson: for pkgdata in json.load(macportsjson.stdout): pkg = Package() pkg.name = pkgdata['name'] pkg.version = pkgdata['version'] # drop obsolete ports (see #235) if 'replaced_by' in pkgdata: continue if 'description' in pkgdata: pkg.comment = pkgdata['description'] if 'homepage' in pkgdata: pkg.homepage = pkgdata['homepage'] if 'categories' in pkgdata: pkg.category = pkgdata['categories'].split()[0] if 'license' in pkgdata: pkg.licenses = [pkgdata['license'] ] # XXX: properly handle braces if 'maintainers' in pkgdata: for maintainer in pkgdata['maintainers'].replace( '{', '').replace('}', '').lower().split(): if maintainer.startswith('@'): # @foo means github user foo pkg.maintainers.append(maintainer[1:] + '@github') elif '@' in maintainer: # plain email pkg.maintainers.append(maintainer) elif ':' in maintainer: # foo.com:bar means [email protected] host, user = maintainer.split(':', 1) pkg.maintainers.append(user + '@' + host) elif maintainer == 'openmaintainer': # ignore, this is a flag that minor changes to a port # are allowed without involving the maintainer pass else: # otherwise it's [email protected] pkg.maintainers.append(maintainer + '@macports.org') pkg.extrafields['portdir'] = pkgdata['portdir'] pkg.extrafields['portname'] = pkgdata['portdir'].split('/')[1] result.append(pkg) return result
def Parse(self, path): packages = [] for moduledir in os.listdir(path): modulepath = os.path.join(path, moduledir) cabalpath = None maxversion = None for versiondir in os.listdir(modulepath): if versiondir == 'preferred-versions': continue if maxversion is None or version_compare( versiondir, maxversion) > 0: maxversion = versiondir cabalpath = os.path.join(path, moduledir, maxversion, moduledir + '.cabal') if maxversion is None: print('WARNING: cannot determine max version for {}'.format( moduledir), file=sys.stderr) continue pkg = Package() pkg.name = moduledir pkg.version = maxversion pkg.homepage = 'http://hackage.haskell.org/package/' + moduledir cabaldata = self.ParseCabal(cabalpath) if cabaldata['name'] == pkg.name and version_compare( cabaldata['version'], pkg.version) == 0: if 'synopsis' in cabaldata and cabaldata['synopsis']: pkg.comment = cabaldata['synopsis'].strip() if 'maintainer' in cabaldata: pkg.maintainers = extract_maintainers( cabaldata['maintainer']) if 'license' in cabaldata: pkg.licenses = [cabaldata['license']] if 'homepage' in cabaldata and ( cabaldata['homepage'].startswith('http://') or cabaldata['homepage'].startswith('https://')): pkg.homepage = cabaldata['homepage'] if 'category' in cabaldata: pkg.category = cabaldata['category'] else: print( 'WARNING: cabal data sanity check failed for {}, ignoring cabal data' .format(cabalpath), file=sys.stderr) packages.append(pkg) return packages
def parse_package(fields): pkg = Package() pkg.name = ensure_str(fields['distribution']) pkg.version = ensure_str(fields['version']) pkg.maintainers = [ensure_str(fields['author']).lower() + '@cpan'] pkg.licenses = ensure_list(fields['license']) pkg.comment = ensure_str(fields.get('abstract')) pkg.homepage = ensure_str(fields.get('resources.homepage')) pkg.downloads = ensure_list(fields.get('download_url')) return pkg
def Parse(self, path): packages = [] for packagedir in os.listdir(path): with open(os.path.join(path, packagedir, 'desc'), 'r', encoding='utf-8') as descfile: key = None value = [] data = {} for line in descfile: line = line.strip() if line.startswith('%') and line.endswith('%'): key = line[1:-1] value = [] elif line == '': data[key] = value else: value.append(line) if 'BASE' in data and data['NAME'][0] != data['BASE'][0]: print('{} skipped, subpackage'.format(data['NAME'][0]), file=sys.stderr) continue pkg = Package() pkg.name = data['NAME'][0] pkg.version, pkg.origversion = SanitizeVersion( data['VERSION'][0]) if 'DESC' in data: pkg.comment = data['DESC'][0] if 'URL' in data: pkg.homepage = data['URL'][0] if 'LICENSE' in data: pkg.licenses = data['LICENSE'] pkg.maintainers = sum( map(extract_maintainers, data['PACKAGER']), []) if 'GROUPS' in data: pkg.category = data['GROUPS'][0] packages.append(pkg) return packages
def Parse(self, path): result = [] for row in lxml.html.parse(path).getroot().xpath('.//table')[0].xpath('./tbody/tr'): pkg = Package() pkg.name = row.xpath('./td[1]/a')[0].text pkg.version = row.xpath('./td[2]')[0].text pkg.comment = row.xpath('./td[3]')[0].text pkg.licenses = [row.xpath('./td[4]')[0].text] result.append(pkg) return result
def Parse(self, path): result = {} # note that we actually parse database prepared by # fetcher, not the file we've downloaded with open(path, 'r', encoding='utf-8') as jsonfile: for entry in json.load(jsonfile)['releases']: pkg = Package() pkg.name = entry['name'] pkg.version = entry['version'] if not pkg.name or not pkg.version: continue homepage = entry.get('homepage') summary = entry.get('summary') description = entry.get('description') #submitter = entry.get('submitter') #download = entry.get('download') license_ = entry.get('license') if homepage: pkg.homepage = homepage if summary: pkg.comment = summary elif description: pkg.comment = description # multiline if license_: pkg.licenses = [license_] # unfiltered garbage #if submitter: # pkg.maintainers = [submitter + '@freshcode'] # ignore for now, may contain download page urls instead of file urls #if download # pkg.downloads = [download] if pkg.name not in result or version_compare( pkg.version, result[pkg.name].version) > 0: result[pkg.name] = pkg return result.values()
def Parse(self, path): result = [] root = xml.etree.ElementTree.parse(path) for info in root.findall('./info'): pkg = Package() # derive names and versions from fn field fn = info.attrib['fn'].rsplit('-', 2) if len(fn) < 3: print('WARNING: unable to parse fn: {}'.format(fn), file=sys.stderr) continue pkg.name = fn[0] pkg.origversion = '-'.join(fn[1:]) pkg.version = fn[1] # Rosa packages are named like PKGNAME-PKGVER-ROSAREV # where ROSAREV is most commonly in the form of N.src, but # may contain other components, such as prerelease stuff # like alpha/beta/rc/pre/... and snapshot revisions/dates # # What we do here is we try to extract prerelease part # and mark version as ignored with non-trivial ROSAREV, # as it it likely a snapshot and trus cannot be trusted if not re.fullmatch('[0-9]+\\.src', fn[2]): pkg.SetFlag(PackageFlags.ignore) match = re.search('\\b(a|alpha|b|beta|pre|rc)[0-9]+', fn[2].lower()) if match: pkg.version += match.group(0) # process url and license url = info.attrib['url'] if url: pkg.homepage = url license_ = info.attrib['license'] pkg.licenses = [license_] result.append(pkg) return result
def Parse(self, path): result = [] root = xml.etree.ElementTree.parse(path) repository = root.find( '{http://www.openpkg.org/xml-rdf-index/0.9}Repository') for item in repository.findall( '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description'): pkg = Package() pkg.name = item.find( '{http://www.openpkg.org/xml-rdf-index/0.9}Name').text pkg.version = item.find( '{http://www.openpkg.org/xml-rdf-index/0.9}Version').text pkg.licenses = [ item.find( '{http://www.openpkg.org/xml-rdf-index/0.9}License').text ] pkg.comment = item.find( '{http://www.openpkg.org/xml-rdf-index/0.9}Summary').text pkg.category = item.find( '{http://www.openpkg.org/xml-rdf-index/0.9}Group').text pkg.homepage = item.find( '{http://www.openpkg.org/xml-rdf-index/0.9}URL').text for source in item.findall( './{http://www.openpkg.org/xml-rdf-index/0.9}Source/{http://www.w3.org/1999/02/22-rdf-syntax-ns#}bag/{http://www.w3.org/1999/02/22-rdf-syntax-ns#}li' ): text = source.text if (text.startswith('https://') or text.startswith('http://') or text.startswith('ftp://') ) and 'openpkg.org' not in text: pkg.downloads.append(text) release = item.find( '{http://www.openpkg.org/xml-rdf-index/0.9}Release').text if pkg.version.endswith(release): pkg.SetFlag(PackageFlags.untrusted) result.append(pkg) return result
def iter_parse(self, path): with open(path, 'r', encoding='utf-8') as jsonfile: for key, packagedata in json.load(jsonfile)['packages'].items(): # see how Nix parses 'derivative' names in # https://github.com/NixOS src/libexpr/names.cc, DrvName::DrvName # it just splits on dash followed by non-letter # # this doesn't work well on 100% cases, it's an upstream problem match = re.match('(.+?)-([^a-zA-Z].*)$', packagedata['name']) if not match: print('cannot extract version: {}/{}'.format( key, packagedata['name']), file=sys.stderr) continue pkg = Package() pkg.name = match.group(1) pkg.version = match.group(2) # some exceptions for prefix in ('75dpi', '100dpi'): if pkg.version.startswith(prefix): pkg.name += '-' + prefix pkg.version = pkg.version[len(prefix) + 1:] merged = pkg.name + '-' + pkg.version for pkgname in [ 'liblqr-1', 'python2.7-3to2', 'python3.6-3to2' ]: if merged.startswith(pkgname): pkg.name = pkgname pkg.version = merged[len(pkgname) + 1:] keyparts = key.split('.') if len(keyparts) > 1: pkg.category = keyparts[0] if pkg.name.endswith('-git'): pkg.name = pkg.name[:-4] pkg.SetFlag(PackageFlags.ignore) if re.match('.*20[0-9]{2}-[0-9]{2}-[0-9]{2}', pkg.version): pkg.SetFlag(PackageFlags.ignore) if re.match('[0-9a-f]*[a-f][0-9a-f]*$', pkg.version) and len(pkg.version) >= 7: print( 'ignoring version which looks like commit hash: {}/{}'. format(key, packagedata['name']), file=sys.stderr) pkg.SetFlag(PackageFlags.ignore) meta = packagedata['meta'] if 'homepage' in meta: pkg.homepage = meta['homepage'] if isinstance( pkg.homepage, list ): # XXX: remove after adding support for homepages array pkg.homepage = pkg.homepage[0] if 'description' in meta and meta['description']: pkg.comment = meta['description'].replace('\n', ' ').strip() if 'maintainers' in meta: if not isinstance(meta['maintainers'], list): print('maintainers is not a list: {}/{}'.format( key, packagedata['name']), file=sys.stderr) else: pkg.maintainers += list( extract_nix_maintainers(meta['maintainers'])) if 'license' in meta: pkg.licenses = extract_nix_licenses(meta['license']) if 'position' in meta: posfile, posline = meta['position'].rsplit(':', 1) pkg.extrafields['posfile'] = posfile pkg.extrafields['posline'] = posline yield pkg
def Parse(self, path): result = [] with open(path, 'r', encoding='utf-8') as jsonfile: for key, packagedata in sorted(json.load(jsonfile)['packages'].items()): # see how Nix parses 'derivative' names in # https://github.com/NixOS src/libexpr/names.cc, DrvName::DrvName # it just splits on dash followed by non-letter # # this doesn't work well on 100% cases, it's an upstream problem match = re.match('(.+?)-([^a-zA-Z].*)$', packagedata['name']) if not match: print('cannot extract version: {}/{}'.format(key, packagedata['name']), file=sys.stderr) continue pkg = Package() pkg.name = match.group(1) pkg.version = match.group(2) # some exceptions for prefix in ('75dpi', '100dpi'): if pkg.version.startswith(prefix): pkg.name += '-' + prefix pkg.version = pkg.version[len(prefix) + 1:] for pkgname in ('liblqr', ): if pkg.name == pkgname: dashpos = pkg.version.find('-') pkg.name = pkg.name + '-' + pkg.version[0:dashpos] pkg.version = pkg.version[dashpos + 1:] if pkg.name.endswith('-git'): pkg.name = pkg.name[:-4] print('ignoring version for git snapshot: {}/{}'.format(key, packagedata['name']), file=sys.stderr) pkg.ignoreversion = True if re.match('.*20[0-9]{2}-[0-9]{2}-[0-9]{2}', pkg.version): print('ignoring version which is a date: {}/{}'.format(key, packagedata['name']), file=sys.stderr) pkg.ignoreversion = True if re.match('[0-9a-f]*[a-f][0-9a-f]*$', pkg.version) and len(pkg.version) >= 7: print('ignoring version which looks like commit hash: {}/{}'.format(key, packagedata['name']), file=sys.stderr) pkg.ignoreversion = True meta = packagedata['meta'] if 'homepage' in meta: pkg.homepage = meta['homepage'] if isinstance(pkg.homepage, list): # XXX: remove after adding support for homepages array pkg.homepage = pkg.homepage[0] if 'description' in meta: pkg.comment = meta['description'] if 'maintainers' in meta: maintainers = meta['maintainers'] if not isinstance(meta['maintainers'], list): print('maintainers is not a list: {}/{}'.format(key, packagedata['name']), file=sys.stderr) else: maintainers = ', '.join(maintainers) pkg.maintainers = GetMaintainers(maintainers) if 'license' in meta: pkg.licenses = ExtractLicenses(meta['license']) result.append(pkg) return result
def Parse(self, path): result = [] trunk_path = os.path.join(path, 'trunk') for package_name in os.listdir(trunk_path): package_path = os.path.join(trunk_path, package_name) maxversion = None for version_name in os.listdir(package_path): if maxversion is None or VersionCompare( version_name, maxversion) > 0: maxversion = version_name if maxversion is None: print('WARNING: no usable versions for package {}'.format( package_name), file=sys.stderr) continue recipe_path = os.path.join(package_path, maxversion, 'Recipe') description_path = os.path.join(package_path, maxversion, 'Resources', 'Description') pkg = Package() pkg.name = package_name pkg.version = maxversion if os.path.isfile(recipe_path): with open(recipe_path, 'r', encoding='utf-8', errors='ignore') as recipe: for line in recipe: line = line.strip() if line.startswith('url='): download = ExpandDownloadUrlTemplates(line[4:]) if download.find('$') == -1: pkg.downloads.append(download.strip('"')) else: print( 'WARNING: Recipe for {}/{} skipped, unhandled URL substitude found' .format(package_name, maxversion), file=sys.stderr) if os.path.isfile(description_path): with open(description_path, 'r', encoding='utf-8', errors='ignore') as description: data = {} current_tag = None for line in description: line = line.strip() match = re.match('^\[([A-Z][a-z]+)\] *(.*?)$', line) if match: current_tag = match.group(1) data[current_tag] = match.group(2) elif current_tag is None: print( 'WARNING: Description for {}/{} skipped, dumb format' .format(package_name, maxversion), file=sys.stderr) break elif line: if data[current_tag]: data[current_tag] += ' ' data[current_tag] += line if 'Summary' in data: pkg.comment = data['Summary'] if 'License' in data: pkg.licenses = [data['License']] if 'Homepage' in data: pkg.homepage = data['Homepage'].strip('"') result.append(pkg) return result