def parse(location): """ Return a Nuget package from a nuspec XML file at `location`. Return None if this is not a parsable nuspec. """ parsed = _parse_nuspec(location) if TRACE: logger_debug('parsed:', parsed) if not parsed: return pack = parsed.get('package', {}) or {} nuspec = pack.get('metadata') if not nuspec: return name=nuspec.get('id') version=nuspec.get('version') # Summary: A short description of the package for UI display. If omitted, a # truncated version of description is used. description = build_description(nuspec.get('summary') , nuspec.get('description')) # title: A human-friendly title of the package, typically used in UI # displays as on nuget.org and the Package Manager in Visual Studio. If not # specified, the package ID is used. title = nuspec.get('title') if title and title != name: description = build_description(nuspec.get('title') , description) parties = [] authors = nuspec.get('authors') if authors: parties.append(models.Party(name=authors, role='author')) owners = nuspec.get('owners') if owners: parties.append(models.Party(name=owners, role='owner')) repo = nuspec.get('repository') or {} vcs_tool = repo.get('@type') or '' vcs_repository = repo.get('@url') or '' vcs_url =None if vcs_repository: if vcs_tool: vcs_url = '{}+{}'.format(vcs_tool, vcs_repository) else: vcs_url = vcs_repository package = NugetPackage( name=name, version=version, description=description or None, homepage_url=nuspec.get('projectUrl') or None, parties=parties, declared_license=nuspec.get('licenseUrl') or None, copyright=nuspec.get('copyright') or None, vcs_url=vcs_url, ) return package
def parse_metadata(location): """ Return a Package object from the Python wheel 'metadata.json' file at 'location' or None. Check if the parent directory of 'location' contains both a 'METADATA' and a 'DESCRIPTION.rst' file to ensure this is a proper metadata.json file. """ if not location or not location.endswith('metadata.json'): if TRACE: logger_debug('parse_metadata: not metadata.json:', location) return parent_dir = fileutils.parent_directory(location) # FIXME: is the absence of these two files a show stopper? paths = [ os.path.join(parent_dir, n) for n in ('METADATA', 'DESCRIPTION.rst') ] if not all(os.path.exists(p) for p in paths): if TRACE: logger_debug('parse_metadata: not extra paths', paths) return with open(location, 'rb') as infs: infos = json.load(infs) extensions = infos.get('extensions') if TRACE: logger_debug('parse_metadata: extensions:', extensions) details = extensions and extensions.get('python.details') urls = details and details.get('project_urls') homepage_url = urls and urls.get('Home') parties = [] if TRACE: logger_debug('parse_metadata: contacts:', details.get('contacts')) contacts = details and details.get('contacts') or [] for contact in contacts: if TRACE: logger_debug('parse_metadata: contact:', contact) name = contact and contact.get('name') if not name: if TRACE: logger_debug('parse_metadata: no name:', contact) continue parties.append( models.Party(type=models.party_person, name=name, role='contact')) description = build_description(infos.get('summary'), infos.get('description')) package = PythonPackage( name=infos.get('name'), version=infos.get('version'), description=description or None, declared_license=infos.get('license') or None, homepage_url=homepage_url or None, parties=parties, ) return package
def parse_setup_py(location): """ Return a package built from setup.py data. """ if not location or not location.endswith('setup.py'): return # FIXME: what if this is unicode text? if py2: mode = 'rb' else: mode = 'r' with open(location, mode) as inp: setup_text = inp.read() description = build_description( get_setup_attribute(setup_text, 'summary'), get_setup_attribute(setup_text, 'description')) parties = [] author = get_setup_attribute(setup_text, 'author') if author: parties.append( models.Party(type=models.party_person, name=author, role='author')) declared_license = OrderedDict() license_setuptext = get_setup_attribute(setup_text, 'license') declared_license['license'] = license_setuptext classifiers = get_classifiers(setup_text) license_classifiers = [c for c in classifiers if c.startswith('License')] declared_license['classifiers'] = license_classifiers other_classifiers = [c for c in classifiers if not c.startswith('License')] package = PythonPackage( name=get_setup_attribute(setup_text, 'name'), version=get_setup_attribute(setup_text, 'version'), description=description or None, homepage_url=get_setup_attribute(setup_text, 'url') or None, parties=parties, declared_license=declared_license, keywords=other_classifiers, ) return package
def get_description(metainfo, location=None): """ Return a list of keywords found in a ``metainfo`` object or mapping. """ description = None # newer metadata versions use the payload for the description if hasattr(metainfo, 'get_payload'): description = metainfo.get_payload() if not description: # legacymetadata versions use the Description for the description description = get_attribute(metainfo, 'Description') if not description and location: # older metadata versions can use a DESCRIPTION.rst file description = get_legacy_description( fileutils.parent_directory(location)) summary = get_attribute(metainfo, 'Summary') return build_description(summary, description)
def parse(cls, location): gemspec = spec.parse_spec( location=location, package_type=cls.default_package_type, ) name = gemspec.get('name') version = gemspec.get('version') homepage_url = gemspec.get('homepage') description = build_description( summary=gemspec.get('summary'), description=gemspec.get('description'), ) vcs_url = gemspec.get('source') declared_license = gemspec.get('license') if declared_license: # FIXME: why splitting here? this is a job for the license detection declared_license = declared_license.split(',') parties = get_parties(gemspec) dependencies = gemspec.get('dependencies') or [] urls = get_urls(name=name, version=version) package_data = models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=version, parties=parties, homepage_url=homepage_url, description=description, declared_license=declared_license, primary_language=cls.default_primary_language, dependencies=dependencies, **urls) if not package_data.license_expression and package_data.declared_license: package_data.license_expression = models.compute_normalized_license( package_data.declared_license) yield package_data
def parse_pkg_info(location): """ Return a Package from a a 'PKG-INFO' file at 'location' or None. """ if not location or not location.endswith('PKG-INFO'): return infos = {} with open(location, 'rb') as inp: pkg_info = inp.read() for attribute in PKG_INFO_ATTRIBUTES: # FIXME: what is this code doing? this is cryptic at best and messy infos[attribute] = re.findall('^' + attribute + '[\s:]*.*', pkg_info, flags=re.MULTILINE)[0] infos[attribute] = re.sub('^' + attribute + '[\s:]*', '', infos[attribute], flags=re.MULTILINE) if infos[attribute] == 'UNKNOWN': infos[attribute] = None description = build_description(infos.get('Summary'), infos.get('Description')) parties = [] author = infos.get('Author') if author: parties.append( models.Party(type=models.party_person, name=author, role='')) package = PythonPackage( name=infos.get('Name'), version=infos.get('Version'), description=description or None, homepage_url=infos.get('Home-page') or None, # FIXME: this is NOT correct as classifiers can be used for this too declared_license=infos.get('License') or None, # FIXME: what about email? # FIXME: what about maintainers? parties=parties, ) return package
def parse_setup_py(location): """ Return a package built from setup.py data. """ if not location or not location.endswith('setup.py'): return # FIXME: what if this is unicode text? with open(location, 'rb') as inp: setup_text = inp.read() description = build_description( get_setup_attribute(setup_text, 'summary'), get_setup_attribute(setup_text, 'description')) parties = [] author = get_setup_attribute(setup_text, 'author') if author: parties.append( models.Party(type=models.party_person, name=author, role='author')) classifiers = get_classifiers(setup_text) license_classifiers = [c for c in classifiers if c.startswith('License')] other_classifiers = [c for c in classifiers if not c.startswith('License')] licenses = [get_setup_attribute(setup_text, 'license') ] + license_classifiers declared_license = '\n'.join(l for l in licenses if l and l.strip()) package = PythonPackage( name=get_setup_attribute(setup_text, 'name'), version=get_setup_attribute(setup_text, 'version'), description=description or None, homepage_url=get_setup_attribute(setup_text, 'url') or None, parties=parties, declared_license=declared_license, keywords=other_classifiers, ) return package
def parse(cls, location): rpm_tags = get_rpm_tags(location, include_desc=True) if TRACE: logger_debug('recognize: rpm_tags', rpm_tags) if not rpm_tags: return name = rpm_tags.name try: epoch = rpm_tags.epoch and int(rpm_tags.epoch) or None except ValueError: epoch = None evr = EVR( version=rpm_tags.version or None, release=rpm_tags.release or None, epoch=epoch).to_string() qualifiers = {} os = rpm_tags.os if os and os.lower() != 'linux': qualifiers['os'] = os arch = rpm_tags.arch if arch: qualifiers['arch'] = arch source_packages = [] if rpm_tags.source_rpm: sepoch, sname, sversion, srel, sarch = nevra.from_name(rpm_tags.source_rpm) src_evr = EVR(sversion, srel, sepoch).to_string() src_qualifiers = {} if sarch: src_qualifiers['arch'] = sarch src_purl = models.PackageURL( type=cls.default_package_type, # TODO: namespace=cls.default_package_namespace, name=sname, version=src_evr, qualifiers=src_qualifiers ).to_string() if TRACE: logger_debug('recognize: source_rpm', src_purl) source_packages = [src_purl] parties = [] # TODO: also use me to craft a namespace!!! # TODO: assign a namespace to Package URL based on distro names. # CentOS # Fedora Project # OpenMandriva Lx # openSUSE Tumbleweed # Red Hat if rpm_tags.distribution: parties.append(models.Party(name=rpm_tags.distribution, role='distributor')) if rpm_tags.vendor: parties.append(models.Party(name=rpm_tags.vendor, role='vendor')) description = build_description(summary=rpm_tags.summary, description=rpm_tags.description) if TRACE: data = dict( name=name, version=evr, description=description or None, homepage_url=rpm_tags.url or None, parties=parties, declared_license=rpm_tags.license or None, source_packages=source_packages, ) logger_debug('recognize: data to create a package:\n', data) package = models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, # TODO: namespace=cls.default_package_namespace, name=name, version=evr, description=description or None, homepage_url=rpm_tags.url or None, parties=parties, declared_license=rpm_tags.license or None, source_packages=source_packages, ) if TRACE: logger_debug('recognize: created package:\n', package) yield package
def parse_setup_py(location): """ Return a PythonPackage built from setup.py data. """ if not location or not location.endswith('setup.py'): return with open(location) as inp: setup_text = inp.read() setup_args = {} # Parse setup.py file and traverse the AST tree = ast.parse(setup_text) for statement in tree.body: # We only care about function calls or assignments to functions named # `setup` or `main` if (isinstance(statement, (ast.Expr, ast.Call, ast.Assign)) and isinstance(statement.value, ast.Call) and isinstance(statement.value.func, ast.Name) # we also look for main as sometimes this is used instead of setup() and statement.value.func.id in ('setup', 'main')): # Process the arguments to the setup function for kw in getattr(statement.value, 'keywords', []): arg_name = kw.arg if isinstance(kw.value, ast.Str): setup_args[arg_name] = kw.value.s elif isinstance(kw.value, ( ast.List, ast.Tuple, ast.Set, )): # We collect the elements of a list if the element # and tag function calls value = [ elt.s for elt in kw.value.elts if not isinstance(elt, ast.Call) ] setup_args[arg_name] = value # TODO: what if isinstance(kw.value, ast.Dict) # or an expression like a call to version=get_version or version__version__ package_name = setup_args.get('name') if not package_name: return description = build_description( setup_args.get('summary', ''), setup_args.get('description', ''), ) parties = [] author = setup_args.get('author') author_email = setup_args.get('author_email') homepage_url = setup_args.get('url') if author: parties.append( models.Party(type=models.party_person, name=author, email=author_email, role='author', url=homepage_url)) elif author_email: parties.append( models.Party(type=models.party_person, email=author_email, role='author', url=homepage_url)) declared_license = {} license_setuptext = setup_args.get('license') declared_license['license'] = license_setuptext classifiers = setup_args.get('classifiers', []) license_classifiers = [c for c in classifiers if c.startswith('License')] declared_license['classifiers'] = license_classifiers other_classifiers = [c for c in classifiers if not c.startswith('License')] detected_version = setup_args.get('version') if not detected_version: # search for possible dunder versions here and elsewhere detected_version = detect_version_attribute(location) return PythonPackage( name=package_name, version=detected_version, description=description or None, homepage_url=setup_args.get('url') or None, parties=parties, declared_license=declared_license, keywords=other_classifiers, )
def build_rubygem_package_data(gem_data, datasource_id): """ Return a PackageData for ``datasource_id`` built from a Gem `gem_data` mapping or None. The ``gem_data`` can come from a .gemspec or .gem/metadata. Optionally use the provided ``download_url`` and `package_url`` strings. """ if not gem_data: return metadata = gem_data.get('metadata') or {} name = gem_data.get('name') # there are two levels of nesting for version: version1 = gem_data.get('version') or {} version = version1.get('version') or None platform = gem_data.get('platform') if platform != 'ruby': qualifiers = dict(platform=platform) else: qualifiers = {} description = build_description( summary=gem_data.get('summary'), description=gem_data.get('description'), ) # Since the gem spec doc is not clear wrt. to the default being OR or AND # we will treat a list of licenses and a conjunction for now (e.g. AND) # See https://guides.rubygems.org/specification-reference/#licenseo lic = gem_data.get('license') licenses = gem_data.get('licenses') declared_license = licenses_mapper(lic, licenses) # we may have tow homepages and one may be wrong. # we prefer the one from the metadata homepage_url = metadata.get('homepage_uri') if not homepage_url: homepage_url = gem_data.get('homepage') urls = get_urls(name, version, platform) dependencies = get_dependencies(gem_data.get('dependencies')) file_references = get_file_references(metadata.get('files')) package_data = models.PackageData( datasource_id=datasource_id, type=GemArchiveHandler.default_package_type, primary_language=GemArchiveHandler.default_primary_language, name=name, version=version, qualifiers=qualifiers, description=description, homepage_url=homepage_url, declared_license=declared_license, bug_tracking_url=metadata.get('bug_tracking_uri'), code_view_url=metadata.get('source_code_uri'), file_references=file_references, dependencies=dependencies, **urls, ) # we can have one singular or a plural list of authors authors = gem_data.get('authors') or [] # or a string of coma-sperated authors (in the Rubygems API) if isinstance(authors, str): authors = [a.strip() for a in authors.split(',') if a.strip()] authors.append(gem_data.get('author') or '') for author in authors: if author and author.strip(): party = models.Party(name=author, role='author') package_data.parties.append(party) # TODO: we have an email that is either a string or a list of string # date: 2019-01-09 00:00:00.000000000 Z date = gem_data.get('date') if date and len(date) >= 10: date = date[:10] package_data.release_date = date[:10] # TODO: infer source purl and add purl to package_data.source_packages # not used for now # "changelog_uri" => "https://example.com/user/bestgemever/CHANGELOG.md", # "wiki_uri" => "https://example.com/user/bestgemever/wiki" # "mailing_list_uri" => "https://groups.example.com/bestgemever", # "documentation_uri" => "https://www.example.info/gems/bestgemever/0.0.1", if not package_data.homepage_url: package_data.homepage_url = rubygems_homepage_url(name, version) if not package_data.license_expression and package_data.declared_license: package_data.license_expression = models.compute_normalized_license( package_data.declared_license) return package_data
def parse_setup_py(location): """ Return a PythonPackage built from setup.py data. """ if not location or not location.endswith('setup.py'): return # FIXME: what if this is unicode text? if py2: mode = 'rb' else: mode = 'r' with open(location, mode) as inp: setup_text = inp.read() setup_args = OrderedDict() # Parse setup.py file and traverse the AST tree = ast.parse(setup_text) for statement in tree.body: # We only care about function calls or assignments to functions named `setup` if (isinstance(statement, ast.Expr) or isinstance(statement, ast.Call) or isinstance(statement, ast.Assign) and isinstance(statement.value, ast.Call) and isinstance(statement.value.func, ast.Name) and statement.value.func.id == 'setup'): # Process the arguments to the setup function for kw in statement.value.keywords: arg_name = kw.arg if isinstance(kw.value, ast.Str): setup_args[arg_name] = kw.value.s if isinstance(kw.value, ast.List): # We collect the elements of a list if the element is not a function call setup_args[arg_name] = [ elt.s for elt in kw.value.elts if not isinstance(elt, ast.Call) ] package_name = setup_args.get('name') if not package_name: return description = build_description(setup_args.get('summary', ''), setup_args.get('description', '')) parties = [] author = setup_args.get('author') author_email = setup_args.get('author_email') homepage_url = setup_args.get('url') if author: parties.append( models.Party(type=models.party_person, name=author, email=author_email, role='author', url=homepage_url)) declared_license = OrderedDict() license_setuptext = setup_args.get('license') declared_license['license'] = license_setuptext classifiers = setup_args.get('classifiers', []) license_classifiers = [c for c in classifiers if c.startswith('License')] declared_license['classifiers'] = license_classifiers other_classifiers = [c for c in classifiers if not c.startswith('License')] return PythonPackage( name=package_name, version=setup_args.get('version'), description=description or None, homepage_url=setup_args.get('url') or None, parties=parties, declared_license=declared_license, keywords=other_classifiers, )
def parse(cls, location): """ Yield one or more Package manifest objects given a file ``location`` pointing to a package archive, manifest or similar. """ podspec = spec.parse_spec( location=location, package_type=cls.default_package_type, ) name = podspec.get('name') version = podspec.get('version') homepage_url = podspec.get('homepage') declared_license = podspec.get('license') license_expression = None if declared_license: license_expression = models.compute_normalized_license( declared_license) summary = podspec.get('summary') description = podspec.get('description') description = utils.build_description( summary=summary, description=description, ) vcs_url = podspec.get('source') or '' authors = podspec.get('author') or [] # FIXME: we are doing nothing with the email list parties = [] if authors: for author in authors: auth, email = parse_person(author) party = models.Party( type=models.party_person, name=auth, email=email, role='author', ) parties.append(party) urls = get_urls(name=name, version=version, homepage_url=homepage_url, vcs_url=vcs_url) yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=version, primary_language=cls.default_primary_language, vcs_url=vcs_url, # FIXME: a source should be a PURL, not a list of URLs # source_packages=vcs_url.split('\n'), description=description, declared_license=declared_license, license_expression=license_expression, homepage_url=homepage_url, parties=parties, **urls, )
def parse(location): """ Return an RpmPackage object for the file at location or None if the file is not an RPM. """ tags = get_rpm_tags(location, include_desc=True) if TRACE: logger_debug('parse: tags', tags) if not tags: return name = tags.name try: epoch = tags.epoch and int(tags.epoch) or None except ValueError: epoch = None evr = EVR(version=tags.version or None, release=tags.release or None, epoch=epoch).to_string() qualifiers = {} os = tags.os if os and os.lower() != 'linux': qualifiers['os'] = os arch = tags.arch if arch: qualifiers['arch'] = arch source_packages = [] if tags.source_rpm: src_epoch, src_name, src_version, src_release, src_arch = nevra.from_name( tags.source_rpm) src_evr = EVR(src_version, src_release, src_epoch).to_string() src_qualifiers = {} if src_arch: src_qualifiers['arch'] = src_arch src_purl = models.PackageURL(type=RpmPackage.default_type, name=src_name, version=src_evr, qualifiers=src_qualifiers).to_string() if TRACE: logger_debug('parse: source_rpm', src_purl) source_packages = [src_purl] parties = [] if tags.distribution: parties.append(models.Party(name=tags.distribution, role='distributor')) if tags.vendor: parties.append(models.Party(name=tags.vendor, role='vendor')) description = build_description(tags.summary, tags.description) if TRACE: data = dict(name=name, version=evr, description=description or None, homepage_url=tags.url or None, parties=parties, declared_license=tags.license or None, source_packages=source_packages) logger_debug('parse: data to create a package:\n', data) package = RpmPackage(name=name, version=evr, description=description or None, homepage_url=tags.url or None, parties=parties, declared_license=tags.license or None, source_packages=source_packages) if TRACE: logger_debug('parse: created package:\n', package) return package
def parse(cls, location): with open(location , 'rb') as loc: parsed = xmltodict.parse(loc) if not parsed: return pack = parsed.get('package') or {} nuspec = pack.get('metadata') if not nuspec: return name = nuspec.get('id') version = nuspec.get('version') # Summary: A short description of the package for UI display. If omitted, a # truncated version of description is used. description = build_description(nuspec.get('summary') , nuspec.get('description')) # title: A human-friendly title of the package, typically used in UI # displays as on nuget.org and the Package Manager in Visual Studio. If not # specified, the package ID is used. title = nuspec.get('title') if title and title != name: description = build_description(nuspec.get('title') , description) parties = [] authors = nuspec.get('authors') if authors: parties.append(models.Party(name=authors, role='author')) owners = nuspec.get('owners') if owners: parties.append(models.Party(name=owners, role='owner')) vcs_url = None repo = nuspec.get('repository') or {} vcs_repository = repo.get('@url') or '' if vcs_repository: vcs_tool = repo.get('@type') or '' if vcs_tool: vcs_url = f'{vcs_tool}+{vcs_repository}' else: vcs_url = vcs_repository urls = get_urls(name, version) package_data = models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=version, description=description or None, homepage_url=nuspec.get('projectUrl') or None, parties=parties, # FIXME: license has evolved and is now SPDX... declared_license=nuspec.get('licenseUrl') or None, copyright=nuspec.get('copyright') or None, vcs_url=vcs_url, **urls, ) if not package_data.license_expression and package_data.declared_license: package_data.license_expression = cls.compute_normalized_license(package_data) yield package_data
def recognize(cls, location): """ Yield one or more Package manifest objects given a file ``location`` pointing to a package archive, manifest or similar. """ with open(location, 'rb') as loc: parsed = xmltodict.parse(loc) if TRACE: logger_debug('parsed:', parsed) if not parsed: return pack = parsed.get('package', {}) or {} nuspec = pack.get('metadata') if not nuspec: return name = nuspec.get('id') version = nuspec.get('version') # Summary: A short description of the package for UI display. If omitted, a # truncated version of description is used. description = build_description(nuspec.get('summary'), nuspec.get('description')) # title: A human-friendly title of the package, typically used in UI # displays as on nuget.org and the Package Manager in Visual Studio. If not # specified, the package ID is used. title = nuspec.get('title') if title and title != name: description = build_description(nuspec.get('title'), description) parties = [] authors = nuspec.get('authors') if authors: parties.append(models.Party(name=authors, role='author')) owners = nuspec.get('owners') if owners: parties.append(models.Party(name=owners, role='owner')) repo = nuspec.get('repository') or {} vcs_tool = repo.get('@type') or '' vcs_repository = repo.get('@url') or '' vcs_url = None if vcs_repository: if vcs_tool: vcs_url = '{}+{}'.format(vcs_tool, vcs_repository) else: vcs_url = vcs_repository yield cls( name=name, version=version, description=description or None, homepage_url=nuspec.get('projectUrl') or None, parties=parties, declared_license=nuspec.get('licenseUrl') or None, copyright=nuspec.get('copyright') or None, vcs_url=vcs_url, )