def parse(cls, location): gemfile_lock = GemfileLockParser(location) dependencies = [] for _, gem in gemfile_lock.all_gems.items(): dependencies.append( models.DependentPackage( purl=PackageURL(type='gem', name=gem.name, version=gem.version).to_string(), extracted_requirement=', '.join(gem.requirements), # FIXME: get proper scope... This does not seem right scope='dependencies', is_runtime=True, is_optional=False, is_resolved=True, )) yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, dependencies=dependencies, primary_language=cls.default_primary_language, ) for _, gem in gemfile_lock.all_gems.items(): deps = [] for _dep_name, dep in gem.dependencies.items(): deps.append( models.DependentPackage( purl=PackageURL(type='gem', name=dep.name, version=dep.version).to_string(), extracted_requirement=', '.join(dep.requirements), scope='dependencies', is_runtime=True, is_optional=False, is_resolved=True, )) urls = get_urls(gem.name, gem.version) yield models.PackageData( datasource_id=cls.datasource_id, primary_language=cls.default_primary_language, type=cls.default_package_type, name=gem.name, version=gem.version, dependencies=deps, **urls)
def _parse(location): """ Yield parsed PackageData objects from ``location``. Raises Exceptions on errors. """ for handler in PACKAGE_DATAFILE_HANDLERS: if not handler.is_datafile(location): continue if TRACE: logger_debug(f'_parse:.is_datafile: {location}') try: for parsed in handler.parse(location): if TRACE: logger_debug(f' _parse: parsed: {parsed!r}') yield parsed except NotImplementedError: # build a plain package if parse is not yet implemented pkg = models.PackageData( datasource_id=handler.datasource_id, type=handler.default_package_type, primary_language=handler.default_primary_language, ) if TRACE: logger_debug('_parse: NotImplementedError: parsed', parsed) yield pkg if SCANCODE_DEBUG_PACKAGE_API: raise
def test_MetadataBzl_recognize_new_format(self): test_file = self.get_test_loc('metadatabzl/new-format/METADATA.bzl') result_packages = build.BuckMetadataBzlHandler.parse(test_file) expected_packages = [ models.PackageData( datasource_id=build.BuckMetadataBzlHandler.datasource_id, type='github', name='example/example', version='0.0.1', declared_license='BSD-3-Clause', parties=[ models.Party( type=models.party_org, name='example_org', role='maintainer' ) ], download_url='', sha1='', homepage_url='https://github.com/example/example', vcs_url='https://github.com/example/example.git', extra_data=dict(vcs_commit_hash="deadbeef") ) ] compare_package_results(expected_packages, result_packages)
def parse(cls, location): with io.open(location, encoding='utf-8') as loc: package_data = json.load(loc) packages = [ build_package_data(p) for p in package_data.get('packages', []) ] packages_dev = [ build_package_data(p) for p in package_data.get('packages-dev', []) ] required_deps = [ build_dep_package(p, scope='require', is_runtime=True, is_optional=False) for p in packages ] required_dev_deps = [ build_dep_package(p, scope='require-dev', is_runtime=False, is_optional=True) for p in packages_dev ] yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, primary_language=cls.default_primary_language, dependencies=required_deps + required_dev_deps ) for package in packages + packages_dev: yield package
def test_BazelPackage_parse(self): test_file = self.get_test_loc('bazel/parse/BUILD') result_packages = build.BazelBuildHandler.parse(test_file) expected_packages = [ models.PackageData( name='hello-greet', type=build.BazelBuildHandler.default_package_type, datasource_id=build.BazelBuildHandler.datasource_id, ), models.PackageData( name='hello-world', type=build.BazelBuildHandler.default_package_type, datasource_id=build.BazelBuildHandler.datasource_id, ) ] compare_package_results(expected_packages, result_packages)
def build_package(cls, dependencies): package_dependencies = [] for dependency in dependencies: # Ignore collected dependencies that do not have a name name = dependency.get('name', '') if not name: continue namespace = dependency.get('namespace', '') version = dependency.get('version', '') scope = dependency.get('scope', '') is_runtime = True is_optional = False if 'test' in scope.lower(): is_runtime = False is_optional = True package_dependencies.append( models.DependentPackage( purl=PackageURL(type=cls.default_package_type, namespace=namespace, name=name, version=version).to_string(), scope=scope, extracted_requirement=version, is_runtime=is_runtime, is_optional=is_optional, )) yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, primary_language=BuildGradleHandler.default_primary_language, dependencies=package_dependencies, )
def parse(cls, location): distro = Distro.from_os_release_file(location) distro_identifier = distro.identifier pretty_name = distro.pretty_name and distro.pretty_name.lower() or '' if distro_identifier == 'debian': namespace = 'debian' if 'distroless' in pretty_name: name = 'distroless' elif pretty_name.startswith('debian'): name = 'distroless' elif distro_identifier == 'ubuntu' and distro.id_like == 'debian': namespace = 'debian' name = 'ubuntu' else: namespace = distro_identifier name = 'ubuntu' version = distro.version_id yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, namespace=namespace, name=name, version=version, )
def test_BuckPackage_parse(self): test_file = self.get_test_loc('buck/parse/BUCK') result_packages = build.BuckPackageHandler.parse(test_file) expected_packages = [ models.PackageData( name='app', type=build.BuckPackageHandler.default_package_type, datasource_id=build.BuckPackageHandler.datasource_id, ), models.PackageData( name='app2', type=build.BuckPackageHandler.default_package_type, datasource_id=build.BuckPackageHandler.datasource_id, ), ] compare_package_results(expected_packages, result_packages)
def parse(cls, location): godeps = Godep(location) if godeps.import_path: # we create a purl from the import path to parse ns/name nicely purl = PackageURL.from_string(f'pkg:golang/{godeps.import_path}') namespace = purl.namespace name = purl.name else: namespace = None name = None dependencies = [] deps = godeps.dependencies or [] for dep in deps: dependencies.append( models.DependentPackage( purl=str( PackageURL.from_string( f'pkg:golang/{dep.import_path}')), extracted_requirement=dep.revision, scope='Deps', is_runtime=True, is_optional=False, is_resolved=False, )) yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, namespace=namespace, name=name, primary_language=cls.default_primary_language, dependencies=dependencies, )
def parse(cls, location): """ Yield one or more Package manifest objects given a file ``location`` pointing to a package archive, manifest or similar. """ with io.open(location, encoding='utf-8') as loc: freebsd_manifest = saneyaml.load(loc) package_data = models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, qualifiers=dict( arch=freebsd_manifest.get('arch'), origin=freebsd_manifest.get('origin'), )) # mapping of top level manifest items to the PackageData object field name plain_fields = [ ('name', 'name'), ('version', 'version'), ('www', 'homepage_url'), ('desc', 'description'), ('categories', 'keywords'), ] for source, target in plain_fields: value = freebsd_manifest.get(source) if value: if isinstance(value, str): value = value.strip() if value: setattr(package_data, target, value) # mapping of top level +COMPACT_MANIFEST items to a function accepting as # arguments the package.json element value and returning an iterable of key, # values Package Object to update field_mappers = [ ('maintainer', maintainer_mapper), ('origin', origin_mapper), ('arch', arch_mapper), ] for source, func in field_mappers: logger.debug('parse: %(source)r, %(func)r' % locals()) value = freebsd_manifest.get(source) or None if value: func(value, package_data) # license_mapper needs multiple fields license_mapper(freebsd_manifest, package_data) if package_data.declared_license: package_data.license_expression = cls.compute_normalized_license( package_data) yield package_data
def parse(cls, location): with open(location) as inp: locks_data = saneyaml.load(inp.read()) dependencies = list(collect_locks(locks_data)) yield models.PackageData(datasource_id=cls.datasource_id, type=cls.default_package_type, primary_language=cls.default_primary_language, dependencies=dependencies)
def parse(cls, location): package_data = toml.load(location, _dict=dict) core_package_data = package_data.get('package', {}) name = core_package_data.get('name') version = core_package_data.get('version') description = core_package_data.get('description') or '' description = description.strip() authors = core_package_data.get('authors') or [] parties = list(get_parties(person_names=authors, party_role='author')) declared_license = core_package_data.get('license') # TODO: load as a notice_text license_file = core_package_data.get('license-file') keywords = core_package_data.get('keywords') or [] categories = core_package_data.get('categories') or [] keywords.extend(categories) # cargo dependencies are complex and can be overriden at multiple levels dependencies = [] for key, value in core_package_data.items(): if key.endswith('dependencies'): dependencies.extend( dependency_mapper(dependencies=value, scope=key)) # TODO: add file refs: # - readme, include and exclude # TODO: other URLs # - documentation vcs_url = core_package_data.get('repository') homepage_url = core_package_data.get('homepage') repository_homepage_url = name and f'https://crates.io/crates/{name}' repository_download_url = name and version and f'https://crates.io/api/v1/crates/{name}/{version}/download' api_data_url = name and f'https://crates.io/api/v1/crates/{name}' yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=version, primary_language=cls.default_primary_language, description=description, parties=parties, declared_license=declared_license, vcs_url=vcs_url, homepage_url=homepage_url, repository_homepage_url=repository_homepage_url, repository_download_url=repository_download_url, api_data_url=api_data_url, dependencies=dependencies, )
def parse(cls, location): sections = parse_manifest(location) if sections: main_section = sections[0] manifest = get_normalized_java_manifest_data(main_section) if manifest: package_data = models.PackageData(**manifest, ) if not package_data.license_expression and package_data.declared_license: package_data.license_expression = cls.compute_normalized_license( package_data) yield package_data
def parse(cls, location): gomods = go_mod.parse_gomod(location) dependencies = [] require = gomods.require or [] for gomod in require: dependencies.append( models.DependentPackage( purl=gomod.purl(include_version=True), extracted_requirement=gomod.version, scope='require', is_runtime=True, is_optional=False, is_resolved=False, )) exclude = gomods.exclude or [] for gomod in exclude: dependencies.append( models.DependentPackage( purl=gomod.purl(include_version=True), extracted_requirement=gomod.version, scope='exclude', is_runtime=True, is_optional=False, is_resolved=False, )) name = gomods.name namespace = gomods.namespace homepage_url = f'https://pkg.go.dev/{gomods.namespace}/{gomods.name}' vcs_url = f'https://{gomods.namespace}/{gomods.name}.git' repository_homepage_url = None if namespace and name: repository_homepage_url = f'https://pkg.go.dev/{namespace}/{name}' yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, namespace=namespace, vcs_url=vcs_url, homepage_url=homepage_url, repository_homepage_url=repository_homepage_url, dependencies=dependencies, primary_language=cls.default_primary_language, )
def parse(cls, location): """ Yield PackageData from a YAML Podfile.lock. """ with open(location) as pfl: data = saneyaml.load(pfl) pods = data['PODS'] dependencies = [] for pod in pods: if isinstance(pod, dict): for main_pod, _dep_pods in pod.items(): purl, xreq = parse_dep_requirements(main_pod) dependencies.append( models.DependentPackage( purl=str(purl), # FIXME: why dev? scope='requires', extracted_requirement=xreq, is_runtime=False, is_optional=True, is_resolved=True, )) elif isinstance(pod, str): purl, xreq = parse_dep_requirements(pod) dependencies.append( models.DependentPackage( purl=str(purl), # FIXME: why dev? scope='requires', extracted_requirement=xreq, is_runtime=False, is_optional=True, is_resolved=True, )) yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, primary_language=cls.default_primary_language, dependencies=dependencies, )
def parse(cls, location): metayaml = get_meta_yaml_data(location) package_element = metayaml.get('package') or {} name = package_element.get('name') if not name: return version = package_element.get('version') package = models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=version, ) # FIXME: source is source, not download source = metayaml.get('source') or {} package.download_url = source.get('url') package.sha256 = source.get('sha256') about = metayaml.get('about') or {} package.homepage_url = about.get('home') package.declared_license = about.get('license') if package.declared_license: package.license_expression = cls.compute_normalized_license( package) package.description = about.get('summary') package.vcs_url = about.get('dev_url') requirements = metayaml.get('requirements') or {} for scope, reqs in requirements.items(): # requirements format is like: # (u'run', [u'mccortex ==1.0', u'nextflow ==19.01.0', u'cortexpy # ==0.45.7', u'kallisto ==0.44.0', u'bwa', u'pandas', # u'progressbar2', u'python >=3.6'])]) for req in reqs: name, _, requirement = req.partition(" ") purl = PackageURL(type=cls.default_package_type, name=name) package.dependencies.append( models.DependentPackage( purl=purl.to_string(), extracted_requirement=requirement, scope=scope, is_runtime=True, is_optional=False, )) yield package
def parse(cls, location): """ Yield PackageData from a pom.properties file (which is typically side- by-side with its pom file.) """ with open(location) as props: properties = javaproperties.load(props) or {} if TRACE: logger.debug( f'MavenPomPropertiesHandler.parse: properties: {properties!r}' ) if properties: yield models.PackageData( datasource_id=cls.datasource_id, type=cls.package_type, primary_language=cls.primary_language, extra_data=dict(pom_properties=properties))
def parse(cls, location): # we use the parent directory as a package name name = fileutils.file_name(fileutils.parent_directory(location)) # we could use checksums as version in the future version = None # there is an optional array of license file names in targets that we could use # declared_license = None # there are dependencies we could use # dependencies = [] yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=version, )
def parse(cls, location): gemspec = spec.parse_spec( location=location, package_type=cls.default_package_type, ) name = gemspec.get('name') version = gemspec.get('version') homepage_url = gemspec.get('homepage') description = build_description( summary=gemspec.get('summary'), description=gemspec.get('description'), ) vcs_url = gemspec.get('source') declared_license = gemspec.get('license') if declared_license: # FIXME: why splitting here? this is a job for the license detection declared_license = declared_license.split(',') parties = get_parties(gemspec) dependencies = gemspec.get('dependencies') or [] urls = get_urls(name=name, version=version) package_data = models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=version, parties=parties, homepage_url=homepage_url, description=description, declared_license=declared_license, primary_language=cls.default_primary_language, dependencies=dependencies, **urls) if not package_data.license_expression and package_data.declared_license: package_data.license_expression = models.compute_normalized_license( package_data.declared_license) yield package_data
def test_msi_create_package_data_from_msiinfo_results(self): result = create_package_data_from_msiinfo_results( self.python_3_9_5_add_to_path_results.copy()).to_dict() expected = models.PackageData( type=MsiInstallerHandler.default_package_type, datasource_id=MsiInstallerHandler.datasource_id, name='Python 3.9.5 Add to Path (64-bit)', version='v 3.9.5', description= 'This installer database contains the logic and data required to install Python 3.9.5 Add to Path (64-bit).', parties=[ Party(type=None, role='author', name='Python Software Foundation') ], keywords='Installer', ).to_dict() result['extra_data'] = {} assert result == expected
def msi_parse(location, datasource_id='msi_installer', package_type='msi', ): """ Return PackageData from ``location`` """ if on_linux: info = get_msi_info(location) return create_package_data_from_msiinfo_results( msiinfo_results=info, datasource_id=datasource_id, package_type=package_type, ) else: return models.PackageData( datasource_id=datasource_id, type=package_type, )
def parse_debian_files_list(location, datasource_id, package_type): """ Yield PackageData from a list of file paths at locations such as an from a Debian installed .list or .md5sums file. """ qualifiers = {} filename = fileutils.file_base_name(location) if ':' in filename: name, _, arch = filename.partition(':') qualifiers['arch'] = arch else: name = filename file_references = [] with open(location) as info_file: for line in info_file: line = line.strip() if not line or line.startswith('#'): continue # for a plain file lits, the md5sum will be empty md5sum, _, path = line.partition(' ') path = path.strip() md5sum = md5sum and md5sum.strip() or None # we ignore dirs in general, and we ignore these that would # be created a plain dir when we can if path in ignored_root_dirs: continue ref = models.FileReference(path=path, md5=md5sum) file_references.append(ref) if not file_references: return yield models.PackageData( datasource_id=datasource_id, type=package_type, name=name, qualifiers=qualifiers, file_references=file_references, )
def parse(cls, location): gosums = go_mod.parse_gosum(location) package_dependencies = [] for gosum in gosums: package_dependencies.append( models.DependentPackage( purl=gosum.purl(), extracted_requirement=gosum.version, scope='dependency', is_runtime=True, is_optional=False, is_resolved=True, )) yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, dependencies=package_dependencies, primary_language=cls.default_primary_language, )
def test_MetadataBzl_parse(self): test_file = self.get_test_loc('metadatabzl/METADATA.bzl') result_packages = build.BuckMetadataBzlHandler.parse(test_file) expected_packages = [ models.PackageData( datasource_id=build.BuckMetadataBzlHandler.datasource_id, type='github', name='example', version='0.0.1', declared_license=['BSD-3-Clause'], parties=[ models.Party( type=models.party_org, name='oss_foundation', role='maintainer' ) ], homepage_url='https://github.com/example/example', ), ] compare_package_results(expected_packages, result_packages)
def create_package_data_from_msiinfo_results( msiinfo_results, datasource_id='msi_installer', package_type='msi', ): """ Return PackageData from a mapping of `msiinfo_results` """ author_name = msiinfo_results.pop('Author', '') parties = [] if author_name: parties.append( models.Party( type=None, role='author', name=author_name ) ) # Currently, we use the contents `Subject` field from the msiinfo suminfo # results as the package name because it contains the package name most of # the time. Getting the version out of the `Subject` string is not # straightforward because the format of the string is usually different # between different MSIs subject = msiinfo_results.pop('Subject', '') name = subject version = get_version_from_subject_line(subject) description = msiinfo_results.pop('Comments', '') keywords = msiinfo_results.pop('Keywords', []) return models.PackageData( datasource_id=datasource_id, type=package_type, name=name, version=version, description=description, parties=parties, keywords=keywords, extra_data=msiinfo_results )
def build_package(readme_manifest): """ Return a Package object from a readme_manifest mapping (from a README.chromium file or similar) or None. """ package = models.PackageData( datasource_id=ReadmeHandler.datasource_id, type=ReadmeHandler.default_package_type, ) for line in readme_manifest.splitlines(): line = line.strip() if ':' in line: key, _sep, value = line.partition(':') elif '=' in line: key, _sep, value = line.partition('=') else: key = None value = None if key: key = key.lower().strip() if value: value = value.strip() if not key or not value: continue package_key = PACKAGE_FIELD_BY_README_FIELD.get(key) if not package_key: continue setattr(package, package_key, value) if not package.license_expression and package.declared_license: package.license_expression = models.compute_normalized_license( package.declared_license) return package
def get_installed_dotnet_versions_from_regtree( registry_tree, datasource_id, package_type, ): """ Yield PackageData for the installed versions of .NET framework from a Windows ``registry_tree``. """ if not registry_tree: return for entry in registry_tree: # The .NET version can be found in the path whose last segment ends with # `Full` if not entry.get('path', '').endswith('\\Full'): continue file_references = [] version = None for values in entry.get('values', []): key = values.get('name') value = values.get('value') if key == 'Version': version = value if key == 'InstallPath': file_references.append(models.FileReference(path=value)) yield models.PackageData( datasource_id=datasource_id, type=package_type, name='microsoft-dot-net-framework', version=version, file_references=file_references, )
def parse(cls, location): with open(location, 'rb') as loc: parsed = xmltodict.parse(loc) if not parsed: return assembly = parsed.get('assembly', {}) description = assembly.get('@description', '') company = assembly.get('@company', '') copyrght = assembly.get('@copyright', '') support_url = assembly.get('@supportInformation', '') assembly_identity = assembly.get('assemblyIdentity', {}) name = assembly_identity.get('@name', '') version = assembly_identity.get('@version', '') parties = [] if company: parties.append( models.Party( name=company, type=models.party_org, role='owner', )) yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=version, description=description, homepage_url=support_url, parties=parties, copyright=copyrght, )
def parse(cls, location): cargo_lock = toml.load(location, _dict=dict) dependencies = [] package = cargo_lock.get('package', []) for dep in package: # TODO: add missing "source" vs. "dependencies" and checksum dependencies.append( models.DependentPackage( purl=PackageURL(type='cargo', name=dep.get('name'), version=dep.get('version')).to_string(), extracted_requirement=dep.get('version'), scope='dependencies', is_runtime=True, is_optional=False, is_resolved=True, )) yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, primary_language=cls.default_primary_language, dependencies=dependencies, )
def build_package_data_from_package_filename( filename, datasource_id, package_type, ): """ Return a PackageData built from the filename of a Debian package archive. """ # TODO: we cannot know the distro from the name only deb = DebArchive.from_filename(filename=filename) if deb.architecture: qualifiers = dict(architecture=deb.architecture) else: qualifiers = {} return models.PackageData( datasource_id=datasource_id, type=package_type, name=deb.name, version=deb.version, qualifiers=qualifiers, )