def compute_normalized_license(declared_license): """ Return a detected license expression from a declared license mapping. """ if not declared_license: return detected_licenses = [] for license_declaration in declared_license: # 1. try detection on the value of name if not empty and keep this name = license_declaration.get('name') via_name = models.compute_normalized_license(name) # 2. try detection on the value of url if not empty and keep this url = license_declaration.get('url') via_url = models.compute_normalized_license(url) # 3. try detection on the value of comment if not empty and keep this comments = license_declaration.get('comments') via_comments = models.compute_normalized_license(comments) if via_name: # The name should have precedence and any unknowns # in url and comment should be ignored. if via_url == 'unknown': via_url = None if via_comments == 'unknown': via_comments = None # Check the three detections to decide which license to keep name_and_url = via_name == via_url name_and_comment = via_name == via_comments all_same = name_and_url and name_and_comment if via_name: if all_same: detected_licenses.append(via_name) # name and (url or comment) are same elif name_and_url and not via_comments: detected_licenses.append(via_name) elif name_and_comment and not via_url: detected_licenses.append(via_name) else: # we have some non-unknown license detected in url or comment detections = via_name, via_url, via_comments detections = [l for l in detections if l] if detections: combined_expression = combine_expressions(detections) if combined_expression: detected_licenses.append(combined_expression) elif via_url: detected_licenses.append(via_url) elif via_comments: detected_licenses.append(via_comments) if detected_licenses: return combine_expressions(detected_licenses)
def compute_normalized_license(declared_license): """ Return a normalized license expression string detected from a list of declared license items or string type. """ if not declared_license: return detected_licenses = [] if isinstance(declared_license, str): if declared_license == 'proprietary': return declared_license if '(' in declared_license and ')' in declared_license and ' or ' in declared_license: declared_license = declared_license.strip().rstrip(')').lstrip('(') declared_license = declared_license.split(' or ') else: return models.compute_normalized_license(declared_license) if isinstance(declared_license, list): for declared in declared_license: detected_license = models.compute_normalized_license(declared) detected_licenses.append(detected_license) else: declared_license = repr(declared_license) detected_license = models.compute_normalized_license(declared_license) if detected_licenses: # build a proper license expression: the defaultfor composer is OR return combine_expressions(detected_licenses, 'OR')
def compute_normalized_license(declared_license): """ Return a normalized license expression string detected from a list of declared license items. """ if not declared_license: return detected_licenses = [] for value in declared_license.values(): if not value: continue # The value could be a string or a list if isinstance(value, string_types): detected_license = models.compute_normalized_license(value) if detected_license: detected_licenses.append(detected_license) else: for declared in value: detected_license = models.compute_normalized_license(declared) if detected_license: detected_licenses.append(detected_license) if detected_licenses: return combine_expressions(detected_licenses)
def compute_normalized_license(cls, package): """ Return a normalized license expression string or None detected from a ``package`` Package declared license items or an ordered dict. """ declared_license = package.declared_license if not declared_license: return if not isinstance(declared_license, dict): return models.compute_normalized_license( declared_license=declared_license) licenses = declared_license.get('licenses') if not licenses: return license_logic = declared_license.get('licenselogic') # the default in FreebSD expressions is AND relation = 'AND' if license_logic: if license_logic == 'or' or license_logic == 'dual': relation = 'OR' detected_licenses = [] for lic in licenses: detected = models.compute_normalized_license(declared_license=lic) if detected: detected_licenses.append(detected) if detected_licenses: return combine_expressions(expressions=detected_licenses, relation=relation)
def compute_normalized_license(declared_license): """ Return a normalized license expression string detected from a list of declared license items. """ if not declared_license: return detected_licenses = [] for declared in declared_license: if isinstance(declared, str): detected_license = models.compute_normalized_license(declared) if detected_license: detected_licenses.append(detected_license) elif isinstance(declared, dict): # 1. try detection on the value of type if not empty and keep this ltype = declared.get('type') via_type = models.compute_normalized_license(ltype) # 2. try detection on the value of url if not empty and keep this url = declared.get('url') via_url = models.compute_normalized_license(url) if via_type: # The type should have precedence and any unknowns # in url should be ignored. # TODO: find a better way to detect unknown licenses if via_url in ( 'unknown', 'unknwon-license-reference', ): via_url = None if via_type: if via_type == via_url: detected_licenses.append(via_type) else: if not via_url: detected_licenses.append(via_type) else: combined_expression = combine_expressions( [via_type, via_url]) detected_licenses.append(combined_expression) elif via_url: detected_licenses.append(via_url) if detected_licenses: return combine_expressions(detected_licenses)
def compute_normalized_license(declared_license): """ Return a normalized license expression string detected from a list of declared license items or an ordered dict. """ if not declared_license: return licenses = declared_license.get('licenses') if not licenses: return license_logic = declared_license.get('licenselogic') relation = 'AND' if license_logic: if license_logic == 'or' or license_logic == 'dual': relation = 'OR' detected_licenses = [] for declared in licenses: detected_license = models.compute_normalized_license(declared) if detected_license: detected_licenses.append(detected_license) if detected_licenses: return combine_expressions(detected_licenses, relation)
def build_package(rpm_tags, datasource_id, package_type, package_namespace=None): """ Return a PackageData object from an ``rpm_tags`` iterable of (name, value_type, value) tuples. """ # mapping of real Package field name -> value converted to expected format converted = { 'datasource_id': datasource_id, 'type': package_type, 'namespace': package_namespace } for name, value_type, value in rpm_tags: handler = RPM_TAG_HANDLER_BY_NAME.get(name) # FIXME: we need to handle EVRA correctly # TODO: add more fields # TODO: merge with tag handling in rpm.py if handler: try: handled = handler(value, **converted) except Exception as e: raise Exception(value, converted) from e converted.update(handled) package_data = models.PackageData.from_dict(converted) if not package_data.license_expression and package_data.declared_license: package_data.license_expression = models.compute_normalized_license( package_data.declared_license) return package_data
def compute_normalized_license(cls, package): declared_license = package.declared_license if not declared_license: return if isinstance(declared_license, ( list, tuple, )): detected_licenses = [ models.compute_normalized_license(declared) for declared in declared_license ] if detected_licenses: return combine_expressions(detected_licenses) if isinstance(declared_license, str): return models.compute_normalized_license(declared_license)
def compute_normalized_license(declared_license): """ Return a normalized license expression string detected from a mapping or list of declared license items. """ if not declared_license: return if isinstance(declared_license, dict): values = list(declared_license.values()) elif isinstance(declared_license, list): values = list(declared_license) elif isinstance(declared_license, ( str, unicode, )): values = [declared_license] else: return detected_licenses = [] for value in values: if not value: continue # The value could be a string or a list if isinstance(value, string_types): detected_license = models.compute_normalized_license(value) if detected_license: detected_licenses.append(detected_license) else: # this is a list for declared in value: detected_license = models.compute_normalized_license(declared) if detected_license: detected_licenses.append(detected_license) if detected_licenses: return combine_expressions(detected_licenses)
def compute_normalized_license(self): """ Return a normalized license expression string detected from a list of declared license strings. """ if not self.declared_license: return detected_licenses = [] for declared in self.declared_license: detected_license = models.compute_normalized_license(declared) detected_licenses.append(detected_license) if detected_licenses: return combine_expressions(detected_licenses)
def detect_declared_license(declared): """ Return a tuple of (declared license, detected license expression) from a declared license. Both can be None. """ declared = normalize_and_cleanup_declared_license(declared) if not declared: return None, None # apply multiple license detection in sequence detected = detect_using_name_mapping(declared) if detected: return declared, detected detected = models.compute_normalized_license(declared) return declared, detected
def parse(cls, location): gemspec = spec.parse_spec( location=location, package_type=cls.default_package_type, ) name = gemspec.get('name') version = gemspec.get('version') homepage_url = gemspec.get('homepage') description = build_description( summary=gemspec.get('summary'), description=gemspec.get('description'), ) vcs_url = gemspec.get('source') declared_license = gemspec.get('license') if declared_license: # FIXME: why splitting here? this is a job for the license detection declared_license = declared_license.split(',') parties = get_parties(gemspec) dependencies = gemspec.get('dependencies') or [] urls = get_urls(name=name, version=version) package_data = models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=version, parties=parties, homepage_url=homepage_url, description=description, declared_license=declared_license, primary_language=cls.default_primary_language, dependencies=dependencies, **urls) if not package_data.license_expression and package_data.declared_license: package_data.license_expression = models.compute_normalized_license( package_data.declared_license) yield package_data
def compute_bower_normalized_license(declared_license): """ Return a normalized license expression string detected from a list of declared license strings. """ if not declared_license: return detected_licenses = [] for declared in declared_license: detected_license = models.compute_normalized_license(declared) if detected_license: detected_licenses.append(detected_license) else: detected_licenses.append('unknown') if detected_licenses: return combine_expressions(detected_licenses)
def build_package(readme_manifest): """ Return a Package object from a readme_manifest mapping (from a README.chromium file or similar) or None. """ package = models.PackageData( datasource_id=ReadmeHandler.datasource_id, type=ReadmeHandler.default_package_type, ) for line in readme_manifest.splitlines(): line = line.strip() if ':' in line: key, _sep, value = line.partition(':') elif '=' in line: key, _sep, value = line.partition('=') else: key = None value = None if key: key = key.lower().strip() if value: value = value.strip() if not key or not value: continue package_key = PACKAGE_FIELD_BY_README_FIELD.get(key) if not package_key: continue setattr(package, package_key, value) if not package.license_expression and package.declared_license: package.license_expression = models.compute_normalized_license( package.declared_license) return package
def compute_normalized_license(self): # TODO: there is a mapping of well known licenses to reuse too if not self.declared_license or not self.declared_license.strip(): return # scancode convention is to put one license per line when there are # multiple licenses as a list in the manifest. lines = [ l for l in self.declared_license.splitlines(False) if l and l.strip() ] if not lines: return # we default to and AND as the Gem spec is rather vague on what it means # to have a list of licenses. Note that we do not use the # license_expression library for this, as each license may not be # parsable at all: instead we do this at the string level if len(lines) > 1: lines = ['({})'.format(l) for l in lines] licenses = ' AND '.join(lines) return models.compute_normalized_license(licenses)
def compute_normalized_license(self): return models.compute_normalized_license(self.declared_license)
def compute_normalized_license(self): # TODO: there is a mapping of well known licenses to reuse too return models.compute_normalized_license(self.declared_license)
def parse(cls, location): infos = pe_info(location) version = get_first( infos, 'Full Version', 'ProductVersion', 'FileVersion', 'Assembly Version', ) release_date = get_first(infos, 'BuildDate') if release_date: if len(release_date) >= 10: release_date = release_date[:10] release_date = release_date.replace('/', '-') name = get_first( infos, 'ProductName', 'OriginalFilename', 'InternalName', ) copyr = get_first(infos, 'LegalCopyright') LegalCopyright = copyr, LegalTrademarks = concat(infos, 'LegalTrademarks', 'LegalTrademarks1', 'LegalTrademarks2', 'LegalTrademarks3') License = get_first(infos, 'License') declared_license = {} if LegalCopyright or LegalTrademarks or License: declared_license = dict(LegalCopyright=copyr, LegalTrademarks=LegalTrademarks, License=License) description = concat(infos, 'FileDescription', 'Comments') parties = [] cname = get_first(infos, 'CompanyName', 'Company') if cname: parties = [Party(type=party_org, role='author', name=cname)] homepage_url = get_first(infos, 'URL', 'WWW') package_data = models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=version, release_date=release_date, copyright=copyr, declared_license=declared_license, description=description, parties=parties, homepage_url=homepage_url, ) if not package_data.license_expression and package_data.declared_license: package_data.license_expression = models.compute_normalized_license( package_data.declared_license) yield package_data
def parse(cls, location): cran_desc = get_cran_description(location) name = cran_desc.get('Package') if not name: return parties = [] maintainers = cran_desc.get('Maintainer') or '' for maintainer in maintainers.split(',\n'): maintainer_name, maintainer_email = get_party_info(maintainer) if maintainer_name or maintainer_email: parties.append( models.Party( name=maintainer_name, role='maintainer', email=maintainer_email, )) authors = cran_desc.get('Author') or '' for author in authors.split(',\n'): author_name, author_email = get_party_info(author) if author_name or author_email: parties.append( models.Party( name=author_name, role='author', email=author_email, )) package_dependencies = [] dependencies = cran_desc.get('Depends') or '' for dependency in dependencies.split(',\n'): requirement = None # TODO: IMHO we could do simpler and better here for splitter in ('==', '>=', '<=', '>', '<'): if splitter in dependency: splits = dependency.split(splitter) # Replace the package name and keep the relationship and version # For example: R (>= 2.1) requirement = dependency.replace( splits[0], '').strip().strip(')').strip() dependency = splits[0].strip().strip('(').strip() break package_dependencies.append( models.DependentPackage( purl=PackageURL(type='cran', name=dependency).to_string(), extracted_requirement=requirement, scope='dependencies', is_runtime=True, is_optional=False, )) declared_license = cran_desc.get('License') license_expression = None if declared_license: license_expression = models.compute_normalized_license( declared_license) # TODO: Let's handle the release date as a Date type # release_date = cran_desc.get('Date/Publication'), yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=cran_desc.get('Version'), # TODO: combine both together description=cran_desc.get('Description', '') or cran_desc.get('Title', ''), declared_license=declared_license, license_expression=license_expression, parties=parties, dependencies=package_dependencies, repository_homepage_url= f'https://cran.r-project.org/package={name}', )
def build_rubygem_package_data(gem_data, datasource_id): """ Return a PackageData for ``datasource_id`` built from a Gem `gem_data` mapping or None. The ``gem_data`` can come from a .gemspec or .gem/metadata. Optionally use the provided ``download_url`` and `package_url`` strings. """ if not gem_data: return metadata = gem_data.get('metadata') or {} name = gem_data.get('name') # there are two levels of nesting for version: version1 = gem_data.get('version') or {} version = version1.get('version') or None platform = gem_data.get('platform') if platform != 'ruby': qualifiers = dict(platform=platform) else: qualifiers = {} description = build_description( summary=gem_data.get('summary'), description=gem_data.get('description'), ) # Since the gem spec doc is not clear wrt. to the default being OR or AND # we will treat a list of licenses and a conjunction for now (e.g. AND) # See https://guides.rubygems.org/specification-reference/#licenseo lic = gem_data.get('license') licenses = gem_data.get('licenses') declared_license = licenses_mapper(lic, licenses) # we may have tow homepages and one may be wrong. # we prefer the one from the metadata homepage_url = metadata.get('homepage_uri') if not homepage_url: homepage_url = gem_data.get('homepage') urls = get_urls(name, version, platform) dependencies = get_dependencies(gem_data.get('dependencies')) file_references = get_file_references(metadata.get('files')) package_data = models.PackageData( datasource_id=datasource_id, type=GemArchiveHandler.default_package_type, primary_language=GemArchiveHandler.default_primary_language, name=name, version=version, qualifiers=qualifiers, description=description, homepage_url=homepage_url, declared_license=declared_license, bug_tracking_url=metadata.get('bug_tracking_uri'), code_view_url=metadata.get('source_code_uri'), file_references=file_references, dependencies=dependencies, **urls, ) # we can have one singular or a plural list of authors authors = gem_data.get('authors') or [] # or a string of coma-sperated authors (in the Rubygems API) if isinstance(authors, str): authors = [a.strip() for a in authors.split(',') if a.strip()] authors.append(gem_data.get('author') or '') for author in authors: if author and author.strip(): party = models.Party(name=author, role='author') package_data.parties.append(party) # TODO: we have an email that is either a string or a list of string # date: 2019-01-09 00:00:00.000000000 Z date = gem_data.get('date') if date and len(date) >= 10: date = date[:10] package_data.release_date = date[:10] # TODO: infer source purl and add purl to package_data.source_packages # not used for now # "changelog_uri" => "https://example.com/user/bestgemever/CHANGELOG.md", # "wiki_uri" => "https://example.com/user/bestgemever/wiki" # "mailing_list_uri" => "https://groups.example.com/bestgemever", # "documentation_uri" => "https://www.example.info/gems/bestgemever/0.0.1", if not package_data.homepage_url: package_data.homepage_url = rubygems_homepage_url(name, version) if not package_data.license_expression and package_data.declared_license: package_data.license_expression = models.compute_normalized_license( package_data.declared_license) return package_data
def parse( location, datasource_id, package_type, primary_language, base_url='https://repo1.maven.org/maven2', ): """ Yield Packagedata objects from parsing a Maven pom file at `location` or using the provided `text` (one or the other but not both). """ pom = get_maven_pom(location=location) if not pom: return if TRACE: ptd = pformat(pom.to_dict()) logger.debug(f'PomXmlHandler.parse: pom:.to_dict()\n{ptd}') version = pom.version # pymaven whart if version == 'latest.release': version = None qualifiers = {} classifier = pom.classifier if classifier: qualifiers['classifier'] = classifier packaging = pom.packaging if packaging: extension = get_extension(packaging) if extension and extension not in ('jar', 'pom'): # we use type as in the PURL spec: this is a problematic field with # complex defeinition in Maven qualifiers['type'] = extension declared_license = pom.licenses group_id = pom.group_id artifact_id = pom.artifact_id # craft a source package purl for the main binary source_packages = [] is_main_binary_jar = not classifier and all( [group_id, artifact_id, version]) if is_main_binary_jar: spurl = PackageURL( type=package_type, namespace=group_id, name=artifact_id, version=version, # we hardcode the source qualifier for now... qualifiers=dict(classifier='sources')) source_packages = [spurl.to_string()] pname = pom.name or '' pdesc = pom.description or '' if pname == pdesc: description = pname else: description = [d for d in (pname, pdesc) if d] description = '\n'.join(description) issue_mngt = pom.issue_management or {} bug_tracking_url = issue_mngt.get('url') scm = pom.scm or {} urls = build_vcs_and_code_view_urls(scm) urls.update( get_urls( namespace=group_id, name=artifact_id, version=version, qualifiers=qualifiers, base_url=base_url, )) # FIXME: there are still other data to map in a PackageData package_data = models.PackageData( datasource_id=datasource_id, type=package_type, primary_language=primary_language, namespace=group_id, name=artifact_id, version=version, qualifiers=qualifiers or None, description=description or None, homepage_url=pom.url or None, declared_license=declared_license or None, parties=get_parties(pom), dependencies=get_dependencies(pom), source_packages=source_packages, bug_tracking_url=bug_tracking_url, **urls, ) if not package_data.license_expression and package_data.declared_license: package_data.license_expression = models.compute_normalized_license( package_data.declared_license) yield package_data
def parse(cls, location): opams = parse_opam(location) package_dependencies = [] deps = opams.get('depends') or [] for dep in deps: package_dependencies.append( models.DependentPackage( purl=dep["purl"], extracted_requirement=dep["version"], scope='dependency', is_runtime=True, is_optional=False, is_resolved=False, )) name = opams.get('name') version = opams.get('version') homepage_url = opams.get('homepage') download_url = opams.get('src') vcs_url = opams.get('dev-repo') bug_tracking_url = opams.get('bug-reports') declared_license = opams.get('license') sha1 = opams.get('sha1') md5 = opams.get('md5') sha256 = opams.get('sha256') sha512 = opams.get('sha512') repository_homepage_url = get_repository_homepage_url(name) api_data_url = get_api_data_url(name, version) short_desc = opams.get('synopsis') or '' long_desc = opams.get('description') or '' if long_desc == short_desc: long_desc = None descriptions = [d for d in (short_desc, long_desc) if d and d.strip()] description = '\n'.join(descriptions) parties = [] authors = opams.get('authors') or [] for author in authors: parties.append( models.Party(type=models.party_person, name=author, role='author')) maintainers = opams.get('maintainer') or [] for maintainer in maintainers: parties.append( models.Party(type=models.party_person, email=maintainer, role='maintainer')) package_data = models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=version, vcs_url=vcs_url, homepage_url=homepage_url, download_url=download_url, sha1=sha1, md5=md5, sha256=sha256, sha512=sha512, bug_tracking_url=bug_tracking_url, declared_license=declared_license, description=description, parties=parties, dependencies=package_dependencies, api_data_url=api_data_url, repository_homepage_url=repository_homepage_url, primary_language=cls.default_primary_language) if not package_data.license_expression and package_data.declared_license: package_data.license_expression = models.compute_normalized_license( package_data.declared_license) yield package_data
def parse(cls, location): """ Yield one or more Package manifest objects given a file ``location`` pointing to a package archive, manifest or similar. """ podspec = spec.parse_spec( location=location, package_type=cls.default_package_type, ) name = podspec.get('name') version = podspec.get('version') homepage_url = podspec.get('homepage') declared_license = podspec.get('license') license_expression = None if declared_license: license_expression = models.compute_normalized_license( declared_license) summary = podspec.get('summary') description = podspec.get('description') description = utils.build_description( summary=summary, description=description, ) vcs_url = podspec.get('source') or '' authors = podspec.get('author') or [] # FIXME: we are doing nothing with the email list parties = [] if authors: for author in authors: auth, email = parse_person(author) party = models.Party( type=models.party_person, name=auth, email=email, role='author', ) parties.append(party) urls = get_urls(name=name, version=version, homepage_url=homepage_url, vcs_url=vcs_url) yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=version, primary_language=cls.default_primary_language, vcs_url=vcs_url, # FIXME: a source should be a PURL, not a list of URLs # source_packages=vcs_url.split('\n'), description=description, declared_license=declared_license, license_expression=license_expression, homepage_url=homepage_url, parties=parties, **urls, )
def build_package(pubspec_data): """ Return a package object from a package data mapping or None """ name = pubspec_data.get('name') version = pubspec_data.get('version') description = pubspec_data.get('description') homepage_url = pubspec_data.get('homepage') declared_license = pubspec_data.get('license') vcs_url = pubspec_data.get('repository') download_url = pubspec_data.get('archive_url') api_data_url = name and version and f'https://pub.dev/api/packages/{name}/versions/{version}' repository_homepage_url = name and version and f'https://pub.dev/packages/{name}/versions/{version}' # A URL should be in the form of: # https://pub.dartlang.org/packages/url_launcher/versions/6.0.9.tar.gz # And it may resolve to: # https://storage.googleapis.com/pub-packages/packages/http-0.13.2.tar.gz # as seen in the pub.dev web pages repository_download_url = name and version and f'https://pub.dartlang.org/packages/{name}/versions/{version}.tar.gz' download_url = download_url or repository_download_url # Author and authors are deprecated authors = [] author = pubspec_data.get('author') if author: authors.append(author) authors.extend(pubspec_data.get('authors') or []) parties = [] for auth in authors: parties.append( models.Party(type=models.party_person, role='author', name=auth)) package_dependencies = [] dependencies = collect_deps( pubspec_data, 'dependencies', is_runtime=True, is_optional=False, ) package_dependencies.extend(dependencies) dev_dependencies = collect_deps( pubspec_data, 'dev_dependencies', is_runtime=False, is_optional=True, ) package_dependencies.extend(dev_dependencies) env_dependencies = collect_deps( pubspec_data, 'environment', is_runtime=True, is_optional=False, ) package_dependencies.extend(env_dependencies) extra_data = {} def add_to_extra_if_present(_key): _value = pubspec_data.get(_key) if _value: extra_data[_key] = _value add_to_extra_if_present('issue_tracker') add_to_extra_if_present('documentation') add_to_extra_if_present('dependencies_overrides') add_to_extra_if_present('executables') add_to_extra_if_present('publish_to') package = models.PackageData( datasource_id=DartPubspecYamlHandler.datasource_id, type=DartPubspecYamlHandler.default_primary_language, primary_language=DartPubspecYamlHandler.default_primary_language, name=name, version=version, download_url=download_url, vcs_url=vcs_url, description=description, declared_license=declared_license, parties=parties, homepage_url=homepage_url, dependencies=package_dependencies, extra_data=extra_data, repository_homepage_url=repository_homepage_url, api_data_url=api_data_url, repository_download_url=repository_download_url, ) if not package.license_expression and package.declared_license: package.license_expression = models.compute_normalized_license( package.declared_license) return package
def build_package_data(package_data): # Note: A composer.json without name and description is not a usable PHP # composer package. Name and description fields are required but only for # published packages: https://getcomposer.org/doc/04-schema.md#name We want # to catch both published and non-published packages here. Therefore, we use # None as a package name if there is no name. ns_name = package_data.get('name') is_private = False if not ns_name: ns = None name = None is_private = True else: ns, _, name = ns_name.rpartition('/') package = models.PackageData( datasource_id=PhpComposerJsonHandler.datasource_id, type=PhpComposerJsonHandler.default_package_type, namespace=ns, name=name, repository_homepage_url=get_repository_homepage_url(ns, name), api_data_url=get_api_data_url(ns, name), primary_language=PhpComposerJsonHandler.default_primary_language, ) # mapping of top level composer.json items to the Package object field name plain_fields = [ ('version', 'version'), ('description', 'summary'), ('keywords', 'keywords'), ('homepage', 'homepage_url'), ] for source, target in plain_fields: value = package_data.get(source) if isinstance(value, str): value = value.strip() if value: setattr(package, target, value) # mapping of top level composer.json items to a function accepting as # arguments the composer.json element value and returning an iterable of # key, values Package Object to update field_mappers = [ ('authors', author_mapper), ('license', partial(licensing_mapper, is_private=is_private)), ('support', support_mapper), ('require', partial(_deps_mapper, scope='require', is_runtime=True)), ('require-dev', partial(_deps_mapper, scope='require-dev', is_optional=True)), ('provide', partial(_deps_mapper, scope='provide', is_runtime=True)), ('conflict', partial(_deps_mapper, scope='conflict', is_runtime=True, is_optional=True)), ('replace', partial(_deps_mapper, scope='replace', is_runtime=True, is_optional=True)), ('suggest', partial(_deps_mapper, scope='suggest', is_runtime=True, is_optional=True)), ('source', source_mapper), ('dist', dist_mapper) ] for source, func in field_mappers: value = package_data.get(source) if value: if isinstance(value, str): value = value.strip() if value: func(value, package) # Parse vendor from name value vendor_mapper(package) if not package.license_expression and package.declared_license: package.license_expression = models.compute_normalized_license(package.declared_license) return package
def build_package(package_data, datasource_id): """ Return a PackageData object from a package_data mapping from a metadata.json or similar or None. """ name = package_data.get('name') version = package_data.get('version') maintainer_name = package_data.get('maintainer', '') maintainer_email = package_data.get('maintainer_email', '') parties = [] if maintainer_name or maintainer_email: parties.append( models.Party( name=maintainer_name.strip() or None, role='maintainer', email=maintainer_email.strip() or None, )) # TODO: combine descriptions as done elsewhere description = package_data.get('description', '') or package_data.get( 'long_description', '') lic = package_data.get('license', '') declared_license = None license_expression = None if lic: declared_license = lic.strip() if declared_license: license_expression = models.compute_normalized_license( declared_license) code_view_url = package_data.get('source_url', '') bug_tracking_url = package_data.get('issues_url', '') deps = dict(package_data.get('dependencies', {}) or {}) deps.update(package_data.get('depends', {}) or {}) dependencies = [] for dependency_name, requirement in deps.items(): dependencies.append( models.DependentPackage( purl=PackageURL(type='chef', name=dependency_name).to_string(), scope='dependencies', extracted_requirement=requirement, is_runtime=True, is_optional=False, )) yield models.PackageData( datasource_id=datasource_id, type=ChefMetadataJsonHandler.default_package_type, name=name, version=version, parties=parties, description=description.strip() or None, declared_license=declared_license, license_expression=license_expression, code_view_url=code_view_url.strip() or None, bug_tracking_url=bug_tracking_url.strip() or None, dependencies=dependencies, primary_language='Ruby', **get_urls(name, version), )