def author_mapper(authors_content, package): """ Update package parties with authors and return package. https://getcomposer.org/doc/04-schema.md#authors """ for name, role, email, url in parse_person(authors_content): role = role or 'author' package.parties.append( models.Party(type=models.party_person, name=name, role=role, email=email, url=url)) return package
def m_maintainer_handler(value, **kwargs): """ Return a Package data mapping as a list of parties a maintainer Party. A maintainer value may be one or more mail name <*****@*****.**> parts, space-separated. """ parties = [] for name, email in get_maintainers(value): maintainer = models.Party( type='person', role='maintainer', name=name, email=email, ) parties.append(maintainer) return {'parties': parties}
def build_package(package_data): """ Return a Package object from a package_data mapping (from a haxelib.json or similar) or None. { "name": "haxelib", "url" : "https://lib.haxe.org/documentation/", "license": "GPL", "tags": ["haxelib", "core"], "description": "The haxelib client", "classPath": "src", "version": "3.4.0", "releasenote": " * Fix password input issue in Windows (#421).\n * ....", "contributors": ["back2dos", "ncannasse", "jason", "Simn", "nadako", "andyli"] } """ package = HaxePackage( name=package_data.get('name'), version=package_data.get('version'), homepage_url=package_data.get('url'), declared_license=package_data.get('license'), keywords=package_data.get('tags'), description=package_data.get('description'), ) package.download_url = package.repository_download_url() for contrib in package_data.get('contributors', []): party = models.Party( type=models.party_person, name=contrib, role='contributor', url='https://lib.haxe.org/u/{}'.format(contrib)) package.parties.append(party) for dep_name, dep_version in package_data.get('dependencies', {}).items(): dep_version = dep_version and dep_version.strip() is_resolved = bool(dep_version) dep_purl = PackageURL( type='haxe', name=dep_name, version=dep_version ).to_string() dep = models.DependentPackage(purl=dep_purl, is_resolved=is_resolved,) package.dependencies.append(dep) return package
def parse_setup_py(location): """ Return a package built from setup.py data. """ if not location or not location.endswith('setup.py'): return # FIXME: what if this is unicode text? if py2: mode = 'rb' else: mode = 'r' with open(location, mode) as inp: setup_text = inp.read() description = build_description( get_setup_attribute(setup_text, 'summary'), get_setup_attribute(setup_text, 'description')) parties = [] author = get_setup_attribute(setup_text, 'author') if author: parties.append( models.Party(type=models.party_person, name=author, role='author')) declared_license = OrderedDict() license_setuptext = get_setup_attribute(setup_text, 'license') declared_license['license'] = license_setuptext classifiers = get_classifiers(setup_text) license_classifiers = [c for c in classifiers if c.startswith('License')] declared_license['classifiers'] = license_classifiers other_classifiers = [c for c in classifiers if not c.startswith('License')] package = PythonPackage( name=get_setup_attribute(setup_text, 'name'), version=get_setup_attribute(setup_text, 'version'), description=description or None, homepage_url=get_setup_attribute(setup_text, 'url') or None, parties=parties, declared_license=declared_license, keywords=other_classifiers, ) return package
def parse_pkg_info(location): """ Return a Package from a a 'PKG-INFO' file at 'location' or None. """ if not location or not location.endswith('PKG-INFO'): return infos = {} with open(location, 'rb') as inp: pkg_info = inp.read() for attribute in PKG_INFO_ATTRIBUTES: # FIXME: what is this code doing? this is cryptic at best and messy infos[attribute] = re.findall('^' + attribute + '[\s:]*.*', pkg_info, flags=re.MULTILINE)[0] infos[attribute] = re.sub('^' + attribute + '[\s:]*', '', infos[attribute], flags=re.MULTILINE) if infos[attribute] == 'UNKNOWN': infos[attribute] = None description = build_description(infos.get('Summary'), infos.get('Description')) parties = [] author = infos.get('Author') if author: parties.append( models.Party(type=models.party_person, name=author, role='')) package = PythonPackage( name=infos.get('Name'), version=infos.get('Version'), description=description or None, homepage_url=infos.get('Home-page') or None, # FIXME: this is NOT correct as classifiers can be used for this too declared_license=infos.get('License') or None, # FIXME: what about email? # FIXME: what about maintainers? parties=parties, ) return package
def parse(location): """ Return a Package built from parsing a file at 'location' The file name can be either a 'setup.py', 'metadata.json' or 'PKG-INFO' file. """ file_name = fileutils.file_name(location) if file_name == 'setup.py': package = PythonPackage( name=get_attribute(location, 'name'), homepage_url=get_attribute(location, 'url'), description=get_attribute(location, 'description'), version=get_attribute(location, 'version'), authors=[models.Party(type=models.party_person, name=get_attribute(location, 'author'))], asserted_licenses=[AssertedLicense(license=get_attribute(location, 'license'))], ) return package if file_name == 'metadata.json': parse_metadata(location) if file_name == 'PKG-INFO': parse_pkg_info(location)
def recognize(cls, location): """ Yield one or more Package manifest objects given a file ``location`` pointing to a package archive, manifest or similar. """ with io.open(location, encoding='utf-8') as loc: package_data = saneyaml.load(loc.read()) name = package_data.get('name') # FIXME: having no name may not be a problem See #1514 if not name: return version = package_data.get('version') homepage_url = package_data.get('home_url') or package_data.get( 'homepage_url') download_url = package_data.get('download_url') declared_license = package_data.get('license_expression') copyright_statement = package_data.get('copyright') owner = package_data.get('owner') if not isinstance(owner, str): owner = repr(owner) parties = [ models.Party(type=models.party_person, name=owner, role='owner') ] about_package = cls( type='about', name=name, version=version, declared_license=declared_license, copyright=copyright_statement, parties=parties, homepage_url=homepage_url, download_url=download_url, ) about_package.extra_data['about_resource'] = package_data.get( 'about_resource') yield about_package
def test_MetadataBzl_parse(self): test_file = self.get_test_loc('metadatabzl/METADATA.bzl') result_packages = build.BuckMetadataBzlHandler.parse(test_file) expected_packages = [ models.PackageData( datasource_id=build.BuckMetadataBzlHandler.datasource_id, type='github', name='example', version='0.0.1', declared_license=['BSD-3-Clause'], parties=[ models.Party( type=models.party_org, name='oss_foundation', role='maintainer' ) ], homepage_url='https://github.com/example/example', ), ] compare_package_results(expected_packages, result_packages)
def create_package_data_from_msiinfo_results( msiinfo_results, datasource_id='msi_installer', package_type='msi', ): """ Return PackageData from a mapping of `msiinfo_results` """ author_name = msiinfo_results.pop('Author', '') parties = [] if author_name: parties.append( models.Party( type=None, role='author', name=author_name ) ) # Currently, we use the contents `Subject` field from the msiinfo suminfo # results as the package name because it contains the package name most of # the time. Getting the version out of the `Subject` string is not # straightforward because the format of the string is usually different # between different MSIs subject = msiinfo_results.pop('Subject', '') name = subject version = get_version_from_subject_line(subject) description = msiinfo_results.pop('Comments', '') keywords = msiinfo_results.pop('Keywords', []) return models.PackageData( datasource_id=datasource_id, type=package_type, name=name, version=version, description=description, parties=parties, keywords=keywords, extra_data=msiinfo_results )
def recognize(cls, location): """ Yield one or more Package manifest objects given a file ``location`` pointing to a package archive, manifest or similar. """ with open(location, 'rb') as loc: parsed = xmltodict.parse(loc) if TRACE: logger_debug('parsed:', parsed) if not parsed: return assembly = parsed.get('assembly', {}) description = assembly.get('@description', '') company = assembly.get('@company', '') copyright = assembly.get('@copyright', '') support_url = assembly.get('@supportInformation', '') assembly_identity = assembly.get('assemblyIdentity', {}) name = assembly_identity.get('@name', '') version = assembly_identity.get('@version', '') parties = [] if company: parties.append( models.Party( name=company, type=models.party_org, role='owner', )) yield cls( name=name, version=version, description=description, homepage_url=support_url, parties=parties, copyright=copyright, )
def parse_metadata(location): """ Return a Package object from the Python wheel 'metadata.json' file at 'location' or None. Check if the parent directory of 'location' contains both a 'METADATA' and a 'DESCRIPTION.rst' file. """ if not location or not location.endswith('metadata.json'): return parent_dir = fileutils.parent_directory(location) # FIXME: is the absence of these two files a show stopper? if not all(os.path.exists(os.path.join(parent_dir, fname)) for fname in ('METADATA', 'DESCRIPTION.rst')): return # FIXME: wrap in a with statement infos = json.loads(open(location, 'rb').read()) print(infos) homepage_url = None authors = [] if infos['extensions']: try: homepage_url = infos['extensions']['python.details']['project_urls']['Home'] except: # FIXME: why catch all expections? pass try: for contact in infos['extensions']['python.details']['contacts']: authors.append(models.Party(type=models.party_person, name=contact['name'],)) except: # FIXME: why catch all expections? pass package = PythonPackage( name=infos.get('name'), version=infos.get('version'), summary=infos.get('summary'), asserted_licenses=[AssertedLicense(license=infos.get('license'))], homepage_url=homepage_url, authors=authors, ) return package
def parse_setup_py(location): """ Return a package built from setup.py data. """ if not location or not location.endswith('setup.py'): return # FIXME: what if this is unicode text? with open(location, 'rb') as inp: setup_text = inp.read() description = build_description( get_setup_attribute(setup_text, 'summary'), get_setup_attribute(setup_text, 'description')) parties = [] author = get_setup_attribute(setup_text, 'author') if author: parties.append( models.Party(type=models.party_person, name=author, role='author')) classifiers = get_classifiers(setup_text) license_classifiers = [c for c in classifiers if c.startswith('License')] other_classifiers = [c for c in classifiers if not c.startswith('License')] licenses = [get_setup_attribute(setup_text, 'license') ] + license_classifiers declared_license = '\n'.join(l for l in licenses if l and l.strip()) package = PythonPackage( name=get_setup_attribute(setup_text, 'name'), version=get_setup_attribute(setup_text, 'version'), description=description or None, homepage_url=get_setup_attribute(setup_text, 'url') or None, parties=parties, declared_license=declared_license, keywords=other_classifiers, ) return package
def parse(location): """ Return a MicrosoftUpdateManifestPackage from a .mum XML file at `location`. Return None if this is not a parsable .mum file. """ parsed = parse_mum(location) if TRACE: logger_debug('parsed:', parsed) if not parsed: return assembly = parsed.get('assembly', {}) description = assembly.get('@description', '') company = assembly.get('@company', '') copyright = assembly.get('@copyright', '') support_url = assembly.get('@supportInformation', '') assembly_identity = assembly.get('assemblyIdentity', {}) name = assembly_identity.get('@name', '') version = assembly_identity.get('@version', '') parties = [] if company: parties.append( models.Party( name=company, type=models.party_org, role='owner', ) ) return MicrosoftUpdateManifestPackage( name=name, version=version, description=description, homepage_url=support_url, parties=parties, copyright=copyright, )
def parse_with_pkginfo(pkginfo): if pkginfo and pkginfo.name: common_data = dict( name=pkginfo.name, version=pkginfo.version, description=pkginfo.description, download_url=pkginfo.download_url, homepage_url=pkginfo.home_page, ) package = PythonPackage(**common_data) if pkginfo.license: # TODO: We should make the declared license as it is, this should be updated in scancode to parse a pure string package.declared_license = {'license': pkginfo.license} if pkginfo.maintainer: common_data['parties'] = [] common_data['parties'].append( models.Party(type=models.party_person, name=pkginfo.maintainer, role='author', email=pkginfo.maintainer_email)) return package
def parse_with_pkginfo(pkginfo): if pkginfo and pkginfo.name: description = pkginfo.description if not description: description = pkginfo.summary common_data = dict( name=pkginfo.name, version=pkginfo.version, description=description, download_url=pkginfo.download_url, homepage_url=pkginfo.home_page, ) package = PythonPackage(**common_data) declared_license = {} if pkginfo.license: # TODO: We should make the declared license as it is, this should be updated in scancode to parse a pure string declared_license['license'] = pkginfo.license if pkginfo.classifiers: license_classifiers = [] other_classifiers = [] for classifier in pkginfo.classifiers: if classifier.startswith('License'): license_classifiers.append(classifier) else: other_classifiers.append(classifier) declared_license['classifiers'] = license_classifiers package.keywords = other_classifiers if declared_license: package.declared_license = declared_license if pkginfo.author_email: parties = [] parties.append( models.Party(type=models.party_person, name=pkginfo.author, role='author', email=pkginfo.author_email)) package.parties = parties return package
def parse(cls, location): with open(location, 'rb') as loc: parsed = xmltodict.parse(loc) if not parsed: return assembly = parsed.get('assembly', {}) description = assembly.get('@description', '') company = assembly.get('@company', '') copyrght = assembly.get('@copyright', '') support_url = assembly.get('@supportInformation', '') assembly_identity = assembly.get('assemblyIdentity', {}) name = assembly_identity.get('@name', '') version = assembly_identity.get('@version', '') parties = [] if company: parties.append( models.Party( name=company, type=models.party_org, role='owner', )) yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=version, description=description, homepage_url=support_url, parties=parties, copyright=copyrght, )
def build_package_data(debian_data, datasource_id, package_type='deb', distro=None): """ Return a PackageData object from a package_data mapping (from a dpkg status or similar file) or None. """ name = debian_data.get('package') version = debian_data.get('version') qualifiers = {} architecture = debian_data.get('architecture') if architecture: qualifiers['architecture'] = architecture extra_data = {} # Multi-Arch can be: "foreign", "same", "allowed", "all", "optional" or # empty/non-present. See https://wiki.debian.org/Multiarch/HOWTO multi_arch = debian_data.get('multi-arch') if multi_arch: extra_data['multi_arch'] = multi_arch description = debian_data.get('description') homepage_url = debian_data.get('homepage') size = debian_data.get('installed') parties = [] maintainer = debian_data.get('maintainer') if maintainer: party = models.Party(role='maintainer', name=maintainer) parties.append(party) orig_maintainer = debian_data.get('original_maintainer') if orig_maintainer: party = models.Party(role='original_maintainer', name=orig_maintainer) parties.append(party) keywords = [] keyword = debian_data.get('section') if keyword: keywords.append(keyword) source_packages = [] source = debian_data.get('source') if source: source_pkg_purl = PackageURL(type=package_type, name=source, namespace=distro).to_string() source_packages.append(source_pkg_purl) return models.PackageData( datasource_id=datasource_id, type=package_type, namespace=distro, name=name, version=version, qualifiers=qualifiers, description=description, homepage_url=homepage_url, size=size, source_packages=source_packages, keywords=keywords, parties=parties, extra_data=extra_data, )
def parse(cls, location): rpm_tags = get_rpm_tags(location, include_desc=True) if TRACE: logger_debug('recognize: rpm_tags', rpm_tags) if not rpm_tags: return name = rpm_tags.name try: epoch = rpm_tags.epoch and int(rpm_tags.epoch) or None except ValueError: epoch = None evr = EVR( version=rpm_tags.version or None, release=rpm_tags.release or None, epoch=epoch).to_string() qualifiers = {} os = rpm_tags.os if os and os.lower() != 'linux': qualifiers['os'] = os arch = rpm_tags.arch if arch: qualifiers['arch'] = arch source_packages = [] if rpm_tags.source_rpm: sepoch, sname, sversion, srel, sarch = nevra.from_name(rpm_tags.source_rpm) src_evr = EVR(sversion, srel, sepoch).to_string() src_qualifiers = {} if sarch: src_qualifiers['arch'] = sarch src_purl = models.PackageURL( type=cls.default_package_type, # TODO: namespace=cls.default_package_namespace, name=sname, version=src_evr, qualifiers=src_qualifiers ).to_string() if TRACE: logger_debug('recognize: source_rpm', src_purl) source_packages = [src_purl] parties = [] # TODO: also use me to craft a namespace!!! # TODO: assign a namespace to Package URL based on distro names. # CentOS # Fedora Project # OpenMandriva Lx # openSUSE Tumbleweed # Red Hat if rpm_tags.distribution: parties.append(models.Party(name=rpm_tags.distribution, role='distributor')) if rpm_tags.vendor: parties.append(models.Party(name=rpm_tags.vendor, role='vendor')) description = build_description(summary=rpm_tags.summary, description=rpm_tags.description) if TRACE: data = dict( name=name, version=evr, description=description or None, homepage_url=rpm_tags.url or None, parties=parties, declared_license=rpm_tags.license or None, source_packages=source_packages, ) logger_debug('recognize: data to create a package:\n', data) package = models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, # TODO: namespace=cls.default_package_namespace, name=name, version=evr, description=description or None, homepage_url=rpm_tags.url or None, parties=parties, declared_license=rpm_tags.license or None, source_packages=source_packages, ) if TRACE: logger_debug('recognize: created package:\n', package) yield package
def parse(cls, location): """ Yield one or more Package manifest objects given a file ``location`` pointing to a package archive, manifest or similar. """ with io.open(location, encoding='utf-8') as loc: package_data = saneyaml.load(loc.read()) # About files can contain any purl and also have a namespace about_type = package_data.get('type') about_ns = package_data.get('namespace') purl_type = None purl_ns = None purl = package_data.get('purl') if purl: purl = PackageURL.from_string(purl) if purl: purl_type = purl.type package_type = about_type or purl_type or cls.default_package_type package_ns = about_ns or purl_ns name = package_data.get('name') version = package_data.get('version') homepage_url = package_data.get('home_url') or package_data.get( 'homepage_url') download_url = package_data.get('download_url') copyright_statement = package_data.get('copyright') license_expression = package_data.get('license_expression') declared_license = license_expression owner = package_data.get('owner') if not isinstance(owner, str): owner = repr(owner) parties = [ models.Party(type=models.party_person, name=owner, role='owner') ] # FIXME: also include notice_file and license_file(s) as file_references file_references = [] about_resource = package_data.get('about_resource') if about_resource: file_references.append(models.FileReference(path=about_resource)) # FIXME: we should put the unprocessed attributes in extra data yield models.PackageData( datasource_id=cls.datasource_id, type=package_type, namespace=package_ns, name=name, version=version, declared_license=declared_license, license_expression=license_expression, copyright=copyright_statement, parties=parties, homepage_url=homepage_url, download_url=download_url, file_references=file_references, )
def parse_metadata(location): """ Return a Package object from the Python wheel 'metadata.json' file at 'location' or None. Check if the parent directory of 'location' contains both a 'METADATA' and a 'DESCRIPTION.rst' file to ensure this is a proper metadata.json file. """ if not location or not location.endswith('metadata.json'): if TRACE: logger_debug('parse_metadata: not metadata.json:', location) return parent_dir = fileutils.parent_directory(location) # FIXME: is the absence of these two files a show stopper? paths = [ os.path.join(parent_dir, n) for n in ('METADATA', 'DESCRIPTION.rst') ] if not all(os.path.exists(p) for p in paths): if TRACE: logger_debug('parse_metadata: not extra paths', paths) return with open(location, 'rb') as infs: infos = json.load(infs) extensions = infos.get('extensions') if TRACE: logger_debug('parse_metadata: extensions:', extensions) details = extensions and extensions.get('python.details') urls = details and details.get('project_urls') homepage_url = urls and urls.get('Home') parties = [] if TRACE: logger_debug('parse_metadata: contacts:', details.get('contacts')) contacts = details and details.get('contacts') or [] for contact in contacts: if TRACE: logger_debug('parse_metadata: contact:', contact) name = contact and contact.get('name') if not name: if TRACE: logger_debug('parse_metadata: no name:', contact) continue parties.append( models.Party(type=models.party_person, name=name, role='contact')) description = build_description(infos.get('summary'), infos.get('description')) classifiers = infos.get('classifiers') license_classifiers = [] other_classifiers = [] if classifiers: for classifier in classifiers: if classifier.startswith('License'): license_classifiers.append(classifier) else: other_classifiers.append(classifier) declared_license = {} lic = infos.get('license') if lic: declared_license['license'] = lic if license_classifiers: declared_license['classifiers'] = license_classifiers package = PythonPackage( name=infos.get('name'), version=infos.get('version'), description=description or None, declared_license=declared_license or None, homepage_url=homepage_url or None, parties=parties, keywords=other_classifiers, ) return package
def parse_setup_py(location): """ Return a PythonPackage built from setup.py data. """ if not location or not location.endswith('setup.py'): return with open(location) as inp: setup_text = inp.read() setup_args = {} # Parse setup.py file and traverse the AST tree = ast.parse(setup_text) for statement in tree.body: # We only care about function calls or assignments to functions named # `setup` or `main` if (isinstance(statement, (ast.Expr, ast.Call, ast.Assign)) and isinstance(statement.value, ast.Call) and isinstance(statement.value.func, ast.Name) # we also look for main as sometimes this is used instead of setup() and statement.value.func.id in ('setup', 'main')): # Process the arguments to the setup function for kw in getattr(statement.value, 'keywords', []): arg_name = kw.arg if isinstance(kw.value, ast.Str): setup_args[arg_name] = kw.value.s elif isinstance(kw.value, ( ast.List, ast.Tuple, ast.Set, )): # We collect the elements of a list if the element # and tag function calls value = [ elt.s for elt in kw.value.elts if not isinstance(elt, ast.Call) ] setup_args[arg_name] = value # TODO: what if isinstance(kw.value, ast.Dict) # or an expression like a call to version=get_version or version__version__ package_name = setup_args.get('name') if not package_name: return description = build_description( setup_args.get('summary', ''), setup_args.get('description', ''), ) parties = [] author = setup_args.get('author') author_email = setup_args.get('author_email') homepage_url = setup_args.get('url') if author: parties.append( models.Party(type=models.party_person, name=author, email=author_email, role='author', url=homepage_url)) elif author_email: parties.append( models.Party(type=models.party_person, email=author_email, role='author', url=homepage_url)) declared_license = {} license_setuptext = setup_args.get('license') declared_license['license'] = license_setuptext classifiers = setup_args.get('classifiers', []) license_classifiers = [c for c in classifiers if c.startswith('License')] declared_license['classifiers'] = license_classifiers other_classifiers = [c for c in classifiers if not c.startswith('License')] detected_version = setup_args.get('version') if not detected_version: # search for possible dunder versions here and elsewhere detected_version = detect_version_attribute(location) return PythonPackage( name=package_name, version=detected_version, description=description or None, homepage_url=setup_args.get('url') or None, parties=parties, declared_license=declared_license, keywords=other_classifiers, )
def build_package(package_data): """ Return a cran Package object from a dictionary yaml data. """ name = package_data.get('Package') if name: parties = [] maintainers = package_data.get('Maintainer') if maintainers: for maintainer in maintainers.split(',\n'): name, email = get_party_info(maintainer) if name or email: parties.append( models.Party( name=name, role='maintainer', email=email, ) ) authors = package_data.get('Author') if authors: for author in authors.split(',\n'): name, email = get_party_info(author) if name or email: parties.append( models.Party( name=name, role='author', email=email, ) ) package_dependencies = [] dependencies = package_data.get('Depends') if dependencies: for dependency in dependencies.split(',\n'): requirement = None for splitter in ('==', '>=', '<=', '>', '<'): if splitter in dependency: splits = dependency.split(splitter) # Replace the package name and keep the relationship and version # For example: R (>= 2.1) requirement = dependency.replace(splits[0], '').strip().strip(')').strip() dependency = splits[0].strip().strip('(').strip() break package_dependencies.append( models.DependentPackage( purl=PackageURL( type='cran', name=dependency).to_string(), requirement=requirement, scope='dependencies', is_runtime=True, is_optional=False, ) ) package = CranPackage( name=name, version = package_data.get('Version'), description = package_data.get('Description', '') or package_data.get('Title', ''), declared_license = package_data.get('License'), parties = parties, dependencies = package_dependencies, #TODO: Let's handle the release date as a Date type #release_date = package_data.get('Date/Publication'), ) return package
def build_opam_package(opams): """ Return a Package from a opam file or None. """ package_dependencies = [] deps = opams.get('depends') or [] for dep in deps: package_dependencies.append( models.DependentPackage( purl=dep.purl, requirement=dep.version, scope='dependency', is_runtime=True, is_optional=False, is_resolved=False, ) ) name = opams.get('name') version = opams.get('version') homepage_url = opams.get('homepage') download_url = opams.get('src') vcs_url = opams.get('dev-repo') bug_tracking_url = opams.get('bug-reports') declared_license = opams.get('license') sha1 = opams.get('sha1') md5 = opams.get('md5') sha256 = opams.get('sha256') sha512 = opams.get('sha512') short_desc = opams.get('synopsis') or '' long_desc = opams.get('description') or '' if long_desc == short_desc: long_desc = None descriptions = [d for d in (short_desc, long_desc) if d and d.strip()] description = '\n'.join(descriptions) parties = [] authors = opams.get('authors') or [] for author in authors: parties.append( models.Party( type=models.party_person, name=author, role='author' ) ) maintainers = opams.get('maintainer') or [] for maintainer in maintainers: parties.append( models.Party( type=models.party_person, email=maintainer, role='maintainer' ) ) package = OpamPackage( name=name, version=version, vcs_url=vcs_url, homepage_url=homepage_url, download_url=download_url, sha1=sha1, md5=md5, sha256=sha256, sha512=sha512, bug_tracking_url=bug_tracking_url, declared_license=declared_license, description=description, parties=parties, dependencies=package_dependencies ) return package
def build_package(cls, pubspec_data): """ Return a package object from a package data mapping or None """ name = pubspec_data.get('name') version = pubspec_data.get('version') description = pubspec_data.get('description') homepage_url = pubspec_data.get('homepage') declared_license = pubspec_data.get('license') vcs_url = pubspec_data.get('repository') download_url = pubspec_data.get('archive_url') # Author and authors are deprecated authors = [] author = pubspec_data.get('author') if author: authors.append(author) authors.extend(pubspec_data.get('authors') or []) parties = [] for auth in authors: parties.append( models.Party(type=models.party_person, role='author', name=auth)) package_dependencies = [] dependencies = collect_deps( pubspec_data, 'dependencies', is_runtime=True, is_optional=False, ) package_dependencies.extend(dependencies) dev_dependencies = collect_deps( pubspec_data, 'dev_dependencies', is_runtime=False, is_optional=True, ) package_dependencies.extend(dev_dependencies) env_dependencies = collect_deps( pubspec_data, 'environment', is_runtime=True, is_optional=False, ) package_dependencies.extend(env_dependencies) extra_data = {} def add_to_extra_if_present(_key): _value = pubspec_data.get(_key) if _value: extra_data[_key] = _value add_to_extra_if_present('issue_tracker') add_to_extra_if_present('documentation') add_to_extra_if_present('dependencies_overrides') add_to_extra_if_present('executables') add_to_extra_if_present('publish_to') package = cls( name=name, version=version, vcs_url=vcs_url, description=description, declared_license=declared_license, parties=parties, homepage_url=homepage_url, dependencies=package_dependencies, extra_data=extra_data, ) if not download_url: package.download_url = package.repository_download_url() return package
def parse(location): """ Return a Nuget package from a nuspec XML file at `location`. Return None if this is not a parsable nuspec. """ parsed = _parse_nuspec(location) if TRACE: logger_debug('parsed:', parsed) if not parsed: return pack = parsed.get('package', {}) or {} nuspec = pack.get('metadata') if not nuspec: return name = nuspec.get('id') version = nuspec.get('version') # Summary: A short description of the package for UI display. If omitted, a # truncated version of description is used. description = build_description(nuspec.get('summary'), nuspec.get('description')) # title: A human-friendly title of the package, typically used in UI # displays as on nuget.org and the Package Manager in Visual Studio. If not # specified, the package ID is used. title = nuspec.get('title') if title and title != name: description = build_description(nuspec.get('title'), description) parties = [] authors = nuspec.get('authors') if authors: parties.append(models.Party(name=authors, role='author')) owners = nuspec.get('owners') if owners: parties.append(models.Party(name=owners, role='owner')) repo = nuspec.get('repository') or {} vcs_tool = repo.get('@type') or '' vcs_repository = repo.get('@url') or '' vcs_url = None if vcs_repository: if vcs_tool: vcs_url = '{}+{}'.format(vcs_tool, vcs_repository) else: vcs_url = vcs_repository package = NugetPackage( name=name, version=version, description=description or None, homepage_url=nuspec.get('projectUrl') or None, parties=parties, declared_license=nuspec.get('licenseUrl') or None, copyright=nuspec.get('copyright') or None, vcs_url=vcs_url, ) return package
def build_package(package_data, datasource_id): """ Return a PackageData object from a package_data mapping from a metadata.json or similar or None. """ name = package_data.get('name') version = package_data.get('version') maintainer_name = package_data.get('maintainer', '') maintainer_email = package_data.get('maintainer_email', '') parties = [] if maintainer_name or maintainer_email: parties.append( models.Party( name=maintainer_name.strip() or None, role='maintainer', email=maintainer_email.strip() or None, )) # TODO: combine descriptions as done elsewhere description = package_data.get('description', '') or package_data.get( 'long_description', '') lic = package_data.get('license', '') declared_license = None license_expression = None if lic: declared_license = lic.strip() if declared_license: license_expression = models.compute_normalized_license( declared_license) code_view_url = package_data.get('source_url', '') bug_tracking_url = package_data.get('issues_url', '') deps = dict(package_data.get('dependencies', {}) or {}) deps.update(package_data.get('depends', {}) or {}) dependencies = [] for dependency_name, requirement in deps.items(): dependencies.append( models.DependentPackage( purl=PackageURL(type='chef', name=dependency_name).to_string(), scope='dependencies', extracted_requirement=requirement, is_runtime=True, is_optional=False, )) yield models.PackageData( datasource_id=datasource_id, type=ChefMetadataJsonHandler.default_package_type, name=name, version=version, parties=parties, description=description.strip() or None, declared_license=declared_license, license_expression=license_expression, code_view_url=code_view_url.strip() or None, bug_tracking_url=bug_tracking_url.strip() or None, dependencies=dependencies, primary_language='Ruby', **get_urls(name, version), )
def build_rubygem_package(cls, gem_data, download_url=None, package_url=None): """ Return a Package built from a Gem `gem_data` mapping or None. The `gem_data can come from a .gemspec or .gem/gem_data. Optionally use the provided `download_url` and `purl` strings. """ if not gem_data: return name = gem_data.get('name') short_desc = gem_data.get('summary') or '' long_desc = gem_data.get('description') or '' if long_desc == short_desc: long_desc = None descriptions = [d for d in (short_desc, long_desc) if d and d.strip()] description = '\n'.join(descriptions) # Since the gem spec doc is not clear https://guides.rubygems.org # /specification-reference/#licenseo, we will treat a list of licenses and a # conjunction for now (e.g. AND) lic = gem_data.get('license') licenses = gem_data.get('licenses') declared_license = licenses_mapper(lic, licenses) package_manifest = cls(name=name, description=description, homepage_url=gem_data.get('homepage'), download_url=download_url, declared_license=declared_license) # we can have one singular or a plural list of authors authors = gem_data.get('authors') or [] # or a string of coma-sperated authors (in the Rubygems API) if isinstance(authors, str): authors = [a.strip() for a in authors.split(',') if a.strip()] authors.append(gem_data.get('author') or '') for author in authors: if author and author.strip(): party = models.Party(name=author, role='author') package_manifest.parties.append(party) # TODO: we have a email that is either a string or a list of string # date: 2019-01-09 00:00:00.000000000 Z date = gem_data.get('date') if date and len(date) >= 10: date = date[:10] package_manifest.release_date = date[:10] # there are two levels of nesting version1 = gem_data.get('version') or {} version = version1.get('version') or None package_manifest.version = version package_manifest.set_purl(package_url) metadata = gem_data.get('metadata') or {} if metadata: homepage_url = metadata.get('homepage_uri') if homepage_url: if not package_manifest.homepage_url: package_manifest.homepage_url = homepage_url elif package_manifest.homepage_url == homepage_url: pass else: # we have both and one is wrong. # we prefer the existing one from the metadata pass package_manifest.bug_tracking_url = metadata.get('bug_tracking_uri') source_code_url = metadata.get('source_code_uri') if source_code_url: package_manifest.code_view_url = source_code_url # TODO: infer purl and add purl to package_manifest.source_packages # not used for now # "changelog_uri" => "https://example.com/user/bestgemever/CHANGELOG.md", # "wiki_uri" => "https://example.com/user/bestgemever/wiki" # "mailing_list_uri" => "https://groups.example.com/bestgemever", # "documentation_uri" => "https://www.example.info/gems/bestgemever/0.0.1", platform = gem_data.get('platform') if platform != 'ruby': qualifiers = dict(platform=platform) if not package_manifest.qualifiers: package_manifest.qualifiers = {} package_manifest.qualifiers.update(qualifiers) package_manifest.dependencies = get_dependencies( gem_data.get('dependencies')) if not package_manifest.download_url: package_manifest.download_url = package_manifest.repository_download_url( ) if not package_manifest.homepage_url: package_manifest.homepage_url = package_manifest.repository_homepage_url( ) return package_manifest
def build_rubygem_package_data(gem_data, datasource_id): """ Return a PackageData for ``datasource_id`` built from a Gem `gem_data` mapping or None. The ``gem_data`` can come from a .gemspec or .gem/metadata. Optionally use the provided ``download_url`` and `package_url`` strings. """ if not gem_data: return metadata = gem_data.get('metadata') or {} name = gem_data.get('name') # there are two levels of nesting for version: version1 = gem_data.get('version') or {} version = version1.get('version') or None platform = gem_data.get('platform') if platform != 'ruby': qualifiers = dict(platform=platform) else: qualifiers = {} description = build_description( summary=gem_data.get('summary'), description=gem_data.get('description'), ) # Since the gem spec doc is not clear wrt. to the default being OR or AND # we will treat a list of licenses and a conjunction for now (e.g. AND) # See https://guides.rubygems.org/specification-reference/#licenseo lic = gem_data.get('license') licenses = gem_data.get('licenses') declared_license = licenses_mapper(lic, licenses) # we may have tow homepages and one may be wrong. # we prefer the one from the metadata homepage_url = metadata.get('homepage_uri') if not homepage_url: homepage_url = gem_data.get('homepage') urls = get_urls(name, version, platform) dependencies = get_dependencies(gem_data.get('dependencies')) file_references = get_file_references(metadata.get('files')) package_data = models.PackageData( datasource_id=datasource_id, type=GemArchiveHandler.default_package_type, primary_language=GemArchiveHandler.default_primary_language, name=name, version=version, qualifiers=qualifiers, description=description, homepage_url=homepage_url, declared_license=declared_license, bug_tracking_url=metadata.get('bug_tracking_uri'), code_view_url=metadata.get('source_code_uri'), file_references=file_references, dependencies=dependencies, **urls, ) # we can have one singular or a plural list of authors authors = gem_data.get('authors') or [] # or a string of coma-sperated authors (in the Rubygems API) if isinstance(authors, str): authors = [a.strip() for a in authors.split(',') if a.strip()] authors.append(gem_data.get('author') or '') for author in authors: if author and author.strip(): party = models.Party(name=author, role='author') package_data.parties.append(party) # TODO: we have an email that is either a string or a list of string # date: 2019-01-09 00:00:00.000000000 Z date = gem_data.get('date') if date and len(date) >= 10: date = date[:10] package_data.release_date = date[:10] # TODO: infer source purl and add purl to package_data.source_packages # not used for now # "changelog_uri" => "https://example.com/user/bestgemever/CHANGELOG.md", # "wiki_uri" => "https://example.com/user/bestgemever/wiki" # "mailing_list_uri" => "https://groups.example.com/bestgemever", # "documentation_uri" => "https://www.example.info/gems/bestgemever/0.0.1", if not package_data.homepage_url: package_data.homepage_url = rubygems_homepage_url(name, version) if not package_data.license_expression and package_data.declared_license: package_data.license_expression = models.compute_normalized_license( package_data.declared_license) return package_data
def build_xcode_package(podspec_json_data): """ Return a Package object from a podspec.json package data mapping. """ name = podspec_json_data.get('name') version = podspec_json_data.get('version') summary = podspec_json_data.get('summary', '') description = podspec_json_data.get('description', '') homepage_url = podspec_json_data.get('homepage') license = podspec_json_data.get('license') if isinstance(license, dict): declared_license = ' '.join(list(license.values())) else: declared_license = license source = podspec_json_data.get('source') vcs_url = None download_url = None if isinstance(source, dict): git_url = source.get('git', '') http_url = source.get('http', '') if git_url: vcs_url = git_url elif http_url: download_url = http_url if not vcs_url: vcs_url = source authors = podspec_json_data.get('authors') or {} license_matches = get_license_matches(query_string=declared_license) if not license_matches: license_expression = 'unknown' else: license_expression = get_license_expression_from_matches( license_matches) if summary and not description.startswith(summary): desc = [summary] if description: desc += [description] description = '. '.join(desc) parties = [] if authors: if isinstance(authors, dict): for key, value in authors.items(): party = models.Party(type=models.party_org, name=key, url=value + '.com', role='owner') parties.append(party) else: party = models.Party(type=models.party_org, name=authors, role='owner') parties.append(party) extra_data = {} extra_data['source'] = podspec_json_data['source'] dependencies = podspec_json_data.get('dependencies', '') if dependencies: extra_data['dependencies'] = dependencies extra_data['podspec.json'] = podspec_json_data package = CocoapodsPackage( name=name, version=version, vcs_url=vcs_url, description=description, declared_license=declared_license, license_expression=license_expression, homepage_url=homepage_url, download_url=download_url, parties=parties, ) package.api_data_url = package.get_api_data_url() return package
def parse(cls, location): """ Yield one or more Package manifest objects given a file ``location`` pointing to a package_data archive, manifest or similar. { "name": "haxelib", "url" : "https://lib.haxe.org/documentation/", "license": "GPL", "tags": ["haxelib", "core"], "description": "The haxelib client", "classPath": "src", "version": "3.4.0", "releasenote": " * Fix password input issue in Windows (#421).\n * ....", "contributors": ["back2dos", "ncannasse", "jason", "Simn", "nadako", "andyli"] } """ with io.open(location, encoding='utf-8') as loc: json_data = json.load(loc) name = json_data.get('name') version = json_data.get('version') package_data = models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=version, homepage_url=json_data.get('url'), declared_license=json_data.get('license'), keywords=json_data.get('tags'), description=json_data.get('description'), primary_language=cls.default_primary_language, ) if not package_data.license_expression and package_data.declared_license: package_data.license_expression = cls.compute_normalized_license(package_data) if name and version: download_url = f'https://lib.haxe.org/p/{name}/{version}/download/' package_data.repository_download_url = download_url package_data.download_url = download_url if name: package_data.repository_homepage_url = f'https://lib.haxe.org/p/{name}' for contrib in json_data.get('contributors', []): party = models.Party( type=models.party_person, name=contrib, role='contributor', url='https://lib.haxe.org/u/{}'.format(contrib)) package_data.parties.append(party) for dep_name, dep_version in json_data.get('dependencies', {}).items(): dep_version = dep_version and dep_version.strip() is_resolved = bool(dep_version) dep_purl = PackageURL( type=cls.default_package_type, name=dep_name, version=dep_version ).to_string() dep = models.DependentPackage(purl=dep_purl, is_resolved=is_resolved,) package_data.dependencies.append(dep) yield package_data