Example #1
0
    def Parse(self, path):
        result = []

        for root, dirs, files in os.walk(path):
            for filename in files:
                jsonpath = os.path.join(root, filename)
                if not jsonpath.endswith('.json'):
                    continue

                jsondata = None
                with open(jsonpath, 'r', encoding='utf-8') as jsonfile:
                    jsondata = json.load(jsonfile, strict=False)

                pkg = Package()

                pkg.name = filename[:-5]
                pkg.version = jsondata['version']

                if 'url' in jsondata:
                    pkg.downloads = jsondata['url'] if isinstance(jsondata['url'], list) else [jsondata['url']]

                if 'homepage' in jsondata:
                    pkg.homepage = jsondata['homepage']

                if 'license' in jsondata:
                    pkg.licenses = [jsondata['license']]

                pkg.extrafields = {'path': os.path.relpath(jsonpath, path)}

                result.append(pkg)

        return result
Example #2
0
    def Parse(self, path):
        result = []

        jsondata = None
        with open(path, 'r', encoding='utf-8') as jsonfile:
            jsondata = json.load(jsonfile)

        if not jsondata['success']:
            raise RuntimeError('non-success json reply, cannot parse')

        for packagedata in jsondata['data']['apps']:
            pkg = Package()

            pkg.name = packagedata['title']
            pkg.version = packagedata['version']

            pkg.licenses = [packagedata['license']]

            if 'tagline' in packagedata:
                pkg.comment = packagedata['tagline']

            if 'support' in packagedata:
                pkg.homepage = packagedata['support']

            result.append(pkg)

        return result
Example #3
0
    def Parse(self, path):
        packages = []

        with open(os.path.join(path, 'APKINDEX'), 'r', encoding='utf-8') as apkindex:
            state = {}
            for line in apkindex:
                line = line.strip()
                if line:
                    state[line[0]] = line[2:].strip()
                    continue

                if not state:
                    continue

                if state['P'] != state['o']:
                    continue

                pkg = Package()

                pkg.name = state['P']
                pkg.version, pkg.origversion = SanitizeVersion(state['V'])

                pkg.comment = state['T']
                pkg.homepage = state['U']  # XXX: switch to homepages, split
                pkg.licenses = [state['L']]

                if 'm' in state:
                    pkg.maintainers = GetMaintainers(state['m'])

                state = {}

                packages.append(pkg)

        return packages
Example #4
0
    def Parse(self, path):
        result = []

        root = xml.etree.ElementTree.parse(path)

        for info in root.findall('./info'):
            pkg = Package()

            fn = info.attrib['fn']
            url = info.attrib['url']
            license_ = info.attrib['license']

            pos2 = fn.rfind('-')
            if pos2 == -1:
                print('WARNING: unable to parse fn: {}'.format(fn),
                      file=sys.stderr)
                continue

            pos1 = fn.rfind('-', 0, pos2)
            if pos1 == -1:
                print('WARNING: unable to parse fn: {}'.format(fn),
                      file=sys.stderr)
                continue

            pkg.name = fn[:pos1]
            pkg.origversion = fn[pos1 + 1:]
            pkg.version = fn[pos1 + 1:pos2]
            pkg.homepage = url
            pkg.licenses = [license_]

            result.append(pkg)

        return result
Example #5
0
    def Parse(self, path):
        result = []

        for filename in os.listdir(path):
            if not filename.endswith('.html'):
                continue

            root = None
            with open(os.path.join(path, filename), encoding='utf-8') as htmlfile:
                root = lxml.html.document_fromstring(htmlfile.read())

            for row in root.xpath('.//div[@class="package-preview"]'):
                pkg = Package()

                # header
                cell = row.xpath('./h3[@class="package-name"]')[0]
                pkg.name, version = cell.text.split(' ', 1)
                pkg.version, pkg.origversion = SanitizeVersion(version.strip())
                pkg.comment = cell.xpath('./span[@class="package-synopsis"]')[0].text.strip().strip('—').strip() or None

                # details
                for cell in row.xpath('./ul[@class="package-info"]/li'):
                    key = cell.xpath('./b')[0].text

                    if key == 'License:':
                        pkg.licenses = [a.text for a in cell.xpath('./a')]
                    elif key == 'Website:':
                        pkg.homepage = cell.xpath('./a')[0].attrib['href']
                    elif key == 'Package source:':
                        pkg.extrafields['source'] = cell.xpath('./a')[0].text

                result.append(pkg)

        return result
Example #6
0
    def Parse(self, path):
        result = []

        for filename in walk_tree(path, suffix='.json'):
            data = json.load(open(filename, encoding='utf-8', errors='ignore'))

            if 'versions' not in data:
                continue

            for version, versiondata in data['versions'].items():
                pkg = Package()

                pkg.name = data['name']

                if data['license']:
                    pkg.licenses = [data['license']]

                pkg.homepage = data['url']

                pkg.version = version

                pkg.extrafields['recipe'] = os.path.relpath(filename, path)

                # garbage: links to git:// or specific commits
                #if isinstance(versiondata['source'], str):
                #    pkg.downloads = [versiondata['source']]
                #else:
                #    pkg.downloads = [versiondata['source']['url']]

            result.append(pkg)

        return result
Example #7
0
    def Parse(self, path):
        result = []

        jsondata = None
        with open(path, 'r', encoding='utf-8') as jsonfile:
            jsondata = json.load(jsonfile)

        for packagedata in SimplifyResult(jsondata):
            entity = packagedata['project'].rsplit(
                '/', 1)[-1]  # this is URL, take only the ID from it

            # use Arch and AUR package names as a name, as they are most non-ambigous
            names = []
            for field in ['arch_packages', 'aur_packages']:
                if packagedata[field]:
                    names = packagedata[field].split(', ')
                    break

            # generate a package for each package name; these will be merged anyway
            for name in set(names):
                # generate a package for each version
                for version in packagedata['versions'].split(', '):
                    version, *flags = version.split('|')

                    is_devel = 'U' in flags
                    is_foreign_os_release = 'O' in flags and 'L' not in flags

                    if is_foreign_os_release:
                        print(
                            'WARNING: {} ({}) version {} skipped as non-linux release'
                            .format(packagedata['projectLabel'], entity,
                                    version),
                            file=sys.stderr)
                        continue

                    pkg = Package()

                    pkg.SetFlag(PackageFlags.devel, is_devel)

                    pkg.name = entity
                    pkg.effname = name
                    pkg.version = version

                    if 'projectDescription' in packagedata:
                        pkg.comment = packagedata['projectDescription']
                    else:
                        pkg.comment = packagedata['projectLabel']

                    if packagedata['licenses']:
                        pkg.licenses = packagedata['licenses'].split(', ')

                    if packagedata['websites']:
                        pkg.homepage = packagedata['websites'].split(', ')[
                            0]  # XXX: use all websites when supported

                    result.append(pkg)

        return result
Example #8
0
    def Parse(self, path):
        result = []

        with subprocess.Popen([repology.config.TCLSH, self.helperpath, path],
                              errors='ignore',
                              stdout=subprocess.PIPE,
                              universal_newlines=True) as macportsjson:
            for pkgdata in json.load(macportsjson.stdout):
                pkg = Package()

                pkg.name = pkgdata['name']
                pkg.version = pkgdata['version']

                # drop obsolete ports (see #235)
                if 'replaced_by' in pkgdata:
                    continue

                if 'description' in pkgdata:
                    pkg.comment = pkgdata['description']

                if 'homepage' in pkgdata:
                    pkg.homepage = pkgdata['homepage']

                if 'categories' in pkgdata:
                    pkg.category = pkgdata['categories'].split()[0]

                if 'license' in pkgdata:
                    pkg.licenses = [pkgdata['license']
                                    ]  # XXX: properly handle braces

                if 'maintainers' in pkgdata:
                    for maintainer in pkgdata['maintainers'].replace(
                            '{', '').replace('}', '').lower().split():
                        if maintainer.startswith('@'):
                            # @foo means github user foo
                            pkg.maintainers.append(maintainer[1:] + '@github')
                        elif '@' in maintainer:
                            # plain email
                            pkg.maintainers.append(maintainer)
                        elif ':' in maintainer:
                            # foo.com:bar means [email protected]
                            host, user = maintainer.split(':', 1)
                            pkg.maintainers.append(user + '@' + host)
                        elif maintainer == 'openmaintainer':
                            # ignore, this is a flag that minor changes to a port
                            # are allowed without involving the maintainer
                            pass
                        else:
                            # otherwise it's [email protected]
                            pkg.maintainers.append(maintainer +
                                                   '@macports.org')

                pkg.extrafields['portdir'] = pkgdata['portdir']
                pkg.extrafields['portname'] = pkgdata['portdir'].split('/')[1]

                result.append(pkg)

        return result
Example #9
0
    def Parse(self, path):
        packages = []

        for moduledir in os.listdir(path):
            modulepath = os.path.join(path, moduledir)

            cabalpath = None
            maxversion = None

            for versiondir in os.listdir(modulepath):
                if versiondir == 'preferred-versions':
                    continue

                if maxversion is None or version_compare(
                        versiondir, maxversion) > 0:
                    maxversion = versiondir
                    cabalpath = os.path.join(path, moduledir, maxversion,
                                             moduledir + '.cabal')

            if maxversion is None:
                print('WARNING: cannot determine max version for {}'.format(
                    moduledir),
                      file=sys.stderr)
                continue

            pkg = Package()

            pkg.name = moduledir
            pkg.version = maxversion
            pkg.homepage = 'http://hackage.haskell.org/package/' + moduledir

            cabaldata = self.ParseCabal(cabalpath)

            if cabaldata['name'] == pkg.name and version_compare(
                    cabaldata['version'], pkg.version) == 0:
                if 'synopsis' in cabaldata and cabaldata['synopsis']:
                    pkg.comment = cabaldata['synopsis'].strip()
                if 'maintainer' in cabaldata:
                    pkg.maintainers = extract_maintainers(
                        cabaldata['maintainer'])
                if 'license' in cabaldata:
                    pkg.licenses = [cabaldata['license']]
                if 'homepage' in cabaldata and (
                        cabaldata['homepage'].startswith('http://')
                        or cabaldata['homepage'].startswith('https://')):
                    pkg.homepage = cabaldata['homepage']
                if 'category' in cabaldata:
                    pkg.category = cabaldata['category']
            else:
                print(
                    'WARNING: cabal data sanity check failed for {}, ignoring cabal data'
                    .format(cabalpath),
                    file=sys.stderr)

            packages.append(pkg)

        return packages
Example #10
0
    def parse_package(fields):
        pkg = Package()

        pkg.name = ensure_str(fields['distribution'])
        pkg.version = ensure_str(fields['version'])
        pkg.maintainers = [ensure_str(fields['author']).lower() + '@cpan']
        pkg.licenses = ensure_list(fields['license'])
        pkg.comment = ensure_str(fields.get('abstract'))
        pkg.homepage = ensure_str(fields.get('resources.homepage'))
        pkg.downloads = ensure_list(fields.get('download_url'))

        return pkg
Example #11
0
    def Parse(self, path):
        packages = []

        for packagedir in os.listdir(path):
            with open(os.path.join(path, packagedir, 'desc'),
                      'r',
                      encoding='utf-8') as descfile:
                key = None
                value = []

                data = {}

                for line in descfile:
                    line = line.strip()
                    if line.startswith('%') and line.endswith('%'):
                        key = line[1:-1]
                        value = []
                    elif line == '':
                        data[key] = value
                    else:
                        value.append(line)

                if 'BASE' in data and data['NAME'][0] != data['BASE'][0]:
                    print('{} skipped, subpackage'.format(data['NAME'][0]),
                          file=sys.stderr)
                    continue

                pkg = Package()

                pkg.name = data['NAME'][0]
                pkg.version, pkg.origversion = SanitizeVersion(
                    data['VERSION'][0])

                if 'DESC' in data:
                    pkg.comment = data['DESC'][0]

                if 'URL' in data:
                    pkg.homepage = data['URL'][0]

                if 'LICENSE' in data:
                    pkg.licenses = data['LICENSE']

                pkg.maintainers = sum(
                    map(extract_maintainers, data['PACKAGER']), [])

                if 'GROUPS' in data:
                    pkg.category = data['GROUPS'][0]

                packages.append(pkg)

        return packages
Example #12
0
    def Parse(self, path):
        result = []

        for row in lxml.html.parse(path).getroot().xpath('.//table')[0].xpath('./tbody/tr'):
            pkg = Package()

            pkg.name = row.xpath('./td[1]/a')[0].text
            pkg.version = row.xpath('./td[2]')[0].text
            pkg.comment = row.xpath('./td[3]')[0].text
            pkg.licenses = [row.xpath('./td[4]')[0].text]

            result.append(pkg)

        return result
Example #13
0
    def Parse(self, path):
        result = {}

        # note that we actually parse database prepared by
        # fetcher, not the file we've downloaded
        with open(path, 'r', encoding='utf-8') as jsonfile:
            for entry in json.load(jsonfile)['releases']:
                pkg = Package()

                pkg.name = entry['name']
                pkg.version = entry['version']

                if not pkg.name or not pkg.version:
                    continue

                homepage = entry.get('homepage')
                summary = entry.get('summary')
                description = entry.get('description')
                #submitter = entry.get('submitter')
                #download = entry.get('download')
                license_ = entry.get('license')

                if homepage:
                    pkg.homepage = homepage

                if summary:
                    pkg.comment = summary
                elif description:
                    pkg.comment = description  # multiline

                if license_:
                    pkg.licenses = [license_]

                # unfiltered garbage
                #if submitter:
                #    pkg.maintainers = [submitter + '@freshcode']

                # ignore for now, may contain download page urls instead of file urls
                #if download
                #    pkg.downloads = [download]

                if pkg.name not in result or version_compare(
                        pkg.version, result[pkg.name].version) > 0:
                    result[pkg.name] = pkg

        return result.values()
Example #14
0
    def Parse(self, path):
        result = []

        root = xml.etree.ElementTree.parse(path)

        for info in root.findall('./info'):
            pkg = Package()

            # derive names and versions from fn field
            fn = info.attrib['fn'].rsplit('-', 2)
            if len(fn) < 3:
                print('WARNING: unable to parse fn: {}'.format(fn),
                      file=sys.stderr)
                continue

            pkg.name = fn[0]
            pkg.origversion = '-'.join(fn[1:])
            pkg.version = fn[1]

            # Rosa packages are named like PKGNAME-PKGVER-ROSAREV
            # where ROSAREV is most commonly in the form of N.src, but
            # may contain other components, such as prerelease stuff
            # like alpha/beta/rc/pre/... and snapshot revisions/dates
            #
            # What we do here is we try to extract prerelease part
            # and mark version as ignored with non-trivial ROSAREV,
            # as it it likely a snapshot and trus cannot be trusted
            if not re.fullmatch('[0-9]+\\.src', fn[2]):
                pkg.SetFlag(PackageFlags.ignore)
                match = re.search('\\b(a|alpha|b|beta|pre|rc)[0-9]+',
                                  fn[2].lower())
                if match:
                    pkg.version += match.group(0)

            # process url and license
            url = info.attrib['url']
            if url:
                pkg.homepage = url

            license_ = info.attrib['license']
            pkg.licenses = [license_]

            result.append(pkg)

        return result
Example #15
0
    def Parse(self, path):
        result = []

        root = xml.etree.ElementTree.parse(path)

        repository = root.find(
            '{http://www.openpkg.org/xml-rdf-index/0.9}Repository')

        for item in repository.findall(
                '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description'):
            pkg = Package()

            pkg.name = item.find(
                '{http://www.openpkg.org/xml-rdf-index/0.9}Name').text
            pkg.version = item.find(
                '{http://www.openpkg.org/xml-rdf-index/0.9}Version').text
            pkg.licenses = [
                item.find(
                    '{http://www.openpkg.org/xml-rdf-index/0.9}License').text
            ]
            pkg.comment = item.find(
                '{http://www.openpkg.org/xml-rdf-index/0.9}Summary').text
            pkg.category = item.find(
                '{http://www.openpkg.org/xml-rdf-index/0.9}Group').text
            pkg.homepage = item.find(
                '{http://www.openpkg.org/xml-rdf-index/0.9}URL').text

            for source in item.findall(
                    './{http://www.openpkg.org/xml-rdf-index/0.9}Source/{http://www.w3.org/1999/02/22-rdf-syntax-ns#}bag/{http://www.w3.org/1999/02/22-rdf-syntax-ns#}li'
            ):
                text = source.text
                if (text.startswith('https://') or text.startswith('http://')
                        or text.startswith('ftp://')
                    ) and 'openpkg.org' not in text:
                    pkg.downloads.append(text)

            release = item.find(
                '{http://www.openpkg.org/xml-rdf-index/0.9}Release').text
            if pkg.version.endswith(release):
                pkg.SetFlag(PackageFlags.untrusted)

            result.append(pkg)

        return result
Example #16
0
    def iter_parse(self, path):
        with open(path, 'r', encoding='utf-8') as jsonfile:
            for key, packagedata in json.load(jsonfile)['packages'].items():
                # see how Nix parses 'derivative' names in
                # https://github.com/NixOS src/libexpr/names.cc, DrvName::DrvName
                # it just splits on dash followed by non-letter
                #
                # this doesn't work well on 100% cases, it's an upstream problem
                match = re.match('(.+?)-([^a-zA-Z].*)$', packagedata['name'])
                if not match:
                    print('cannot extract version: {}/{}'.format(
                        key, packagedata['name']),
                          file=sys.stderr)
                    continue

                pkg = Package()
                pkg.name = match.group(1)
                pkg.version = match.group(2)

                # some exceptions
                for prefix in ('75dpi', '100dpi'):
                    if pkg.version.startswith(prefix):
                        pkg.name += '-' + prefix
                        pkg.version = pkg.version[len(prefix) + 1:]

                merged = pkg.name + '-' + pkg.version
                for pkgname in [
                        'liblqr-1', 'python2.7-3to2', 'python3.6-3to2'
                ]:
                    if merged.startswith(pkgname):
                        pkg.name = pkgname
                        pkg.version = merged[len(pkgname) + 1:]

                keyparts = key.split('.')
                if len(keyparts) > 1:
                    pkg.category = keyparts[0]

                if pkg.name.endswith('-git'):
                    pkg.name = pkg.name[:-4]
                    pkg.SetFlag(PackageFlags.ignore)

                if re.match('.*20[0-9]{2}-[0-9]{2}-[0-9]{2}', pkg.version):
                    pkg.SetFlag(PackageFlags.ignore)

                if re.match('[0-9a-f]*[a-f][0-9a-f]*$',
                            pkg.version) and len(pkg.version) >= 7:
                    print(
                        'ignoring version which looks like commit hash: {}/{}'.
                        format(key, packagedata['name']),
                        file=sys.stderr)
                    pkg.SetFlag(PackageFlags.ignore)

                meta = packagedata['meta']

                if 'homepage' in meta:
                    pkg.homepage = meta['homepage']
                    if isinstance(
                            pkg.homepage, list
                    ):  # XXX: remove after adding support for homepages array
                        pkg.homepage = pkg.homepage[0]

                if 'description' in meta and meta['description']:
                    pkg.comment = meta['description'].replace('\n',
                                                              ' ').strip()

                if 'maintainers' in meta:
                    if not isinstance(meta['maintainers'], list):
                        print('maintainers is not a list: {}/{}'.format(
                            key, packagedata['name']),
                              file=sys.stderr)
                    else:
                        pkg.maintainers += list(
                            extract_nix_maintainers(meta['maintainers']))

                if 'license' in meta:
                    pkg.licenses = extract_nix_licenses(meta['license'])

                if 'position' in meta:
                    posfile, posline = meta['position'].rsplit(':', 1)
                    pkg.extrafields['posfile'] = posfile
                    pkg.extrafields['posline'] = posline

                yield pkg
Example #17
0
    def Parse(self, path):
        result = []

        with open(path, 'r', encoding='utf-8') as jsonfile:
            for key, packagedata in sorted(json.load(jsonfile)['packages'].items()):
                # see how Nix parses 'derivative' names in
                # https://github.com/NixOS src/libexpr/names.cc, DrvName::DrvName
                # it just splits on dash followed by non-letter
                #
                # this doesn't work well on 100% cases, it's an upstream problem
                match = re.match('(.+?)-([^a-zA-Z].*)$', packagedata['name'])
                if not match:
                    print('cannot extract version: {}/{}'.format(key, packagedata['name']), file=sys.stderr)
                    continue

                pkg = Package()
                pkg.name = match.group(1)
                pkg.version = match.group(2)

                # some exceptions
                for prefix in ('75dpi', '100dpi'):
                    if pkg.version.startswith(prefix):
                        pkg.name += '-' + prefix
                        pkg.version = pkg.version[len(prefix) + 1:]

                for pkgname in ('liblqr', ):
                    if pkg.name == pkgname:
                        dashpos = pkg.version.find('-')
                        pkg.name = pkg.name + '-' + pkg.version[0:dashpos]
                        pkg.version = pkg.version[dashpos + 1:]

                if pkg.name.endswith('-git'):
                    pkg.name = pkg.name[:-4]
                    print('ignoring version for git snapshot: {}/{}'.format(key, packagedata['name']), file=sys.stderr)
                    pkg.ignoreversion = True

                if re.match('.*20[0-9]{2}-[0-9]{2}-[0-9]{2}', pkg.version):
                    print('ignoring version which is a date: {}/{}'.format(key, packagedata['name']), file=sys.stderr)
                    pkg.ignoreversion = True

                if re.match('[0-9a-f]*[a-f][0-9a-f]*$', pkg.version) and len(pkg.version) >= 7:
                    print('ignoring version which looks like commit hash: {}/{}'.format(key, packagedata['name']), file=sys.stderr)
                    pkg.ignoreversion = True

                meta = packagedata['meta']

                if 'homepage' in meta:
                    pkg.homepage = meta['homepage']
                    if isinstance(pkg.homepage, list):  # XXX: remove after adding support for homepages array
                        pkg.homepage = pkg.homepage[0]

                if 'description' in meta:
                    pkg.comment = meta['description']

                if 'maintainers' in meta:
                    maintainers = meta['maintainers']
                    if not isinstance(meta['maintainers'], list):
                        print('maintainers is not a list: {}/{}'.format(key, packagedata['name']), file=sys.stderr)
                    else:
                        maintainers = ', '.join(maintainers)
                    pkg.maintainers = GetMaintainers(maintainers)

                if 'license' in meta:
                    pkg.licenses = ExtractLicenses(meta['license'])

                result.append(pkg)

        return result
Example #18
0
    def Parse(self, path):
        result = []

        trunk_path = os.path.join(path, 'trunk')
        for package_name in os.listdir(trunk_path):
            package_path = os.path.join(trunk_path, package_name)

            maxversion = None
            for version_name in os.listdir(package_path):
                if maxversion is None or VersionCompare(
                        version_name, maxversion) > 0:
                    maxversion = version_name

            if maxversion is None:
                print('WARNING: no usable versions for package {}'.format(
                    package_name),
                      file=sys.stderr)
                continue

            recipe_path = os.path.join(package_path, maxversion, 'Recipe')
            description_path = os.path.join(package_path, maxversion,
                                            'Resources', 'Description')

            pkg = Package()

            pkg.name = package_name
            pkg.version = maxversion

            if os.path.isfile(recipe_path):
                with open(recipe_path, 'r', encoding='utf-8',
                          errors='ignore') as recipe:
                    for line in recipe:
                        line = line.strip()
                        if line.startswith('url='):
                            download = ExpandDownloadUrlTemplates(line[4:])
                            if download.find('$') == -1:
                                pkg.downloads.append(download.strip('"'))
                            else:
                                print(
                                    'WARNING: Recipe for {}/{} skipped, unhandled URL substitude found'
                                    .format(package_name, maxversion),
                                    file=sys.stderr)

            if os.path.isfile(description_path):
                with open(description_path,
                          'r',
                          encoding='utf-8',
                          errors='ignore') as description:
                    data = {}
                    current_tag = None
                    for line in description:
                        line = line.strip()
                        match = re.match('^\[([A-Z][a-z]+)\] *(.*?)$', line)
                        if match:
                            current_tag = match.group(1)
                            data[current_tag] = match.group(2)
                        elif current_tag is None:
                            print(
                                'WARNING: Description for {}/{} skipped, dumb format'
                                .format(package_name, maxversion),
                                file=sys.stderr)
                            break
                        elif line:
                            if data[current_tag]:
                                data[current_tag] += ' '
                            data[current_tag] += line

                    if 'Summary' in data:
                        pkg.comment = data['Summary']
                    if 'License' in data:
                        pkg.licenses = [data['License']]
                    if 'Homepage' in data:
                        pkg.homepage = data['Homepage'].strip('"')

            result.append(pkg)

        return result