def dirname_handler(value, **kwargs): """ Return a mapping of {'file_references': <list of FileReference dicts>}. Update the ``current_filerefs`` found in `kwargs` by adding the correct dir, basename and checksum value. """ file_references = [] current_filerefs = kwargs.get('current_filerefs') or [] for dirindexes, checksum, basename in current_filerefs: dirname = value[int(dirindexes)] # TODO: review this. Empty filename does not make sense, unless these # are directories that we might ignore OK. # There is case where entry of basename is "</string>" which will # cause error as None type cannot be used for join. # Therefore, we need to convert the None type to empty string # in order to make the join works. if basename == None: basename = '' file_reference = models.FileReference( path=posixpath.join(dirname, basename), # TODO: add size and fileclass as extra data ) # TODO: we could/should use instead the filedigestalgo RPM tag algo = infer_digest_algo(checksum) if algo: setattr(file_reference, algo, checksum) file_references.append(file_reference) return {'file_references': [fr.to_dict() for fr in file_references]}
def parse_debian_files_list(location, datasource_id, package_type): """ Yield PackageData from a list of file paths at locations such as an from a Debian installed .list or .md5sums file. """ qualifiers = {} filename = fileutils.file_base_name(location) if ':' in filename: name, _, arch = filename.partition(':') qualifiers['arch'] = arch else: name = filename file_references = [] with open(location) as info_file: for line in info_file: line = line.strip() if not line or line.startswith('#'): continue # for a plain file lits, the md5sum will be empty md5sum, _, path = line.partition(' ') path = path.strip() md5sum = md5sum and md5sum.strip() or None # we ignore dirs in general, and we ignore these that would # be created a plain dir when we can if path in ignored_root_dirs: continue ref = models.FileReference(path=path, md5=md5sum) file_references.append(ref) if not file_references: return yield models.PackageData( datasource_id=datasource_id, type=package_type, name=name, qualifiers=qualifiers, file_references=file_references, )
def get_installed_dotnet_versions_from_regtree( registry_tree, datasource_id, package_type, ): """ Yield PackageData for the installed versions of .NET framework from a Windows ``registry_tree``. """ if not registry_tree: return for entry in registry_tree: # The .NET version can be found in the path whose last segment ends with # `Full` if not entry.get('path', '').endswith('\\Full'): continue file_references = [] version = None for values in entry.get('values', []): key = values.get('name') value = values.get('value') if key == 'Version': version = value if key == 'InstallPath': file_references.append(models.FileReference(path=value)) yield models.PackageData( datasource_id=datasource_id, type=package_type, name='microsoft-dot-net-framework', version=version, file_references=file_references, )
def parse(cls, location): """ Yield one or more Package manifest objects given a file ``location`` pointing to a package archive, manifest or similar. """ with io.open(location, encoding='utf-8') as loc: package_data = saneyaml.load(loc.read()) # About files can contain any purl and also have a namespace about_type = package_data.get('type') about_ns = package_data.get('namespace') purl_type = None purl_ns = None purl = package_data.get('purl') if purl: purl = PackageURL.from_string(purl) if purl: purl_type = purl.type package_type = about_type or purl_type or cls.default_package_type package_ns = about_ns or purl_ns name = package_data.get('name') version = package_data.get('version') homepage_url = package_data.get('home_url') or package_data.get( 'homepage_url') download_url = package_data.get('download_url') copyright_statement = package_data.get('copyright') license_expression = package_data.get('license_expression') declared_license = license_expression owner = package_data.get('owner') if not isinstance(owner, str): owner = repr(owner) parties = [ models.Party(type=models.party_person, name=owner, role='owner') ] # FIXME: also include notice_file and license_file(s) as file_references file_references = [] about_resource = package_data.get('about_resource') if about_resource: file_references.append(models.FileReference(path=about_resource)) # FIXME: we should put the unprocessed attributes in extra data yield models.PackageData( datasource_id=cls.datasource_id, type=package_type, namespace=package_ns, name=name, version=version, declared_license=declared_license, license_expression=license_expression, copyright=copyright_statement, parties=parties, homepage_url=homepage_url, download_url=download_url, file_references=file_references, )
def parse(cls, location): with io.open(location, encoding='utf-8') as loc: package_data = json.load(loc) # we have two formats: v1 and v2 lockfile_version = package_data.get('lockfileVersion', 1) root_name = package_data.get('name') root_version = package_data.get('version') root_ns, _ , root_name = root_name.rpartition('/') extra_data = dict(lockfile_version=lockfile_version) # this is the top level element that we return root_package_data = models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, primary_language=cls.default_primary_language, namespace=root_ns, name=root_name, version=root_version, extra_data=extra_data, **get_urls(root_ns, root_name, root_version) ) # https://docs.npmjs.com/cli/v8/configuring-npm/package-lock-json#lockfileversion if lockfile_version == 1: deps_key = 'dependencies' else: # v2 and may be v3??? deps_key = 'packages' deps_mapping = package_data.get(deps_key) or {} dependencies = [] for dep, dep_data in deps_mapping.items(): is_dev = dep_data.get('dev', False) is_optional = dep_data.get('optional', False) is_devoptional = dep_data.get('devOptional', False) if is_dev or is_devoptional: is_runtime = False is_optional = True scope = 'devDependencies' else: is_runtime = True is_optional = is_optional scope = 'dependencies' if not dep: # in v2 format the first dep is the same as the top level # package and has no name pass # only present for first top level # otherwise get name from dep name = dep_data.get('name') if not name: if 'node_modules/' in dep: # the name is the last segment as the dep can be: # "node_modules/ansi-align/node_modules/ansi-regex" _, _, name = dep.rpartition('node_modules/') else: name = dep ns, _ , name = name.rpartition('/') version = dep_data.get('version') dep_purl = PackageURL( type=cls.default_package_type, namespace=ns, name=name, version=version, ).to_string() dependency = models.DependentPackage( purl=dep_purl, extracted_requirement=version, scope=scope, is_runtime=is_runtime, is_optional=is_optional, is_resolved=True, ) # only seen in v2 for the top level package... but good to keep declared_license = dep_data.get('license') # URLs and checksums misc = get_urls(ns, name, version) resolved = dep_data.get('resolved') misc.update(get_checksum_and_url(resolved).items()) integrity = dep_data.get('integrity') misc.update(get_algo_hexsum(integrity).items()) resolved_package = models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, primary_language=cls.default_primary_language, namespace=ns, name=name, version=version, declared_license=declared_license, **misc, ) # these are paths t the root of the installed package in v2 if dep: resolved_package.file_references = [models.FileReference(path=dep)], # v1 as name/constraint pairs subrequires = dep_data.get('requires') or {} # in v1 these are further nested dependencies # in v2 these are name/constraint pairs like v1 requires subdependencies = dep_data.get('dependencies') # v2? ignored for now dev_subdependencies = dep_data.get('devDependencies') optional_subdependencies = dep_data.get('optionalDependencies') engines = dep_data.get('engines') funding = dep_data.get('funding') if lockfile_version == 1: subdeps_data = subrequires else: subdeps_data = subdependencies subdeps_data = subdeps_data or {} sub_deps = [] for subdep, subdep_req in subdeps_data.items(): sdns, _ , sdname = subdep.rpartition('/') sdpurl = PackageURL( type=cls.default_package_type, namespace=sdns, name=sdname ).to_string() sub_deps.append( models.DependentPackage( purl=sdpurl, scope=scope, extracted_requirement=subdep_req, is_runtime=is_runtime, is_optional=is_optional, is_resolved=False, ) ) resolved_package.dependencies = sub_deps dependency.resolved_package = resolved_package.to_dict() dependencies.append(dependency) root_package_data.dependencies = dependencies yield root_package_data
def get_file_references(files): """ Return a list of FileReference from a ``files`` list of gem file paths. """ files = files or [] return [models.FileReference(path) for path in files]
def get_installed_windows_programs_from_regtree( registry_tree, datasource_id, package_type, ): """ Yield installed Windows PackageData from a Windows ``registry_tree``. """ if not registry_tree: return field_by_regkey = { 'DisplayName': 'name', 'DisplayVersion': 'version', 'URLInfoAbout': 'homepage_url', 'Publisher': 'publisher', 'DisplayIcon': 'display_icon', 'UninstallString': 'uninstall_string', 'InstallLocation': 'install_location', } for entry in registry_tree: package_info = {} for entry_value in entry.get('values', []): key = entry_value.get('name') value = entry_value.get('value') pkg_field = field_by_regkey.get(key) if pkg_field: package_info[pkg_field] = value name = package_info.get('name') version = package_info.get('version') homepage_url = package_info.get('homepage_url') publisher = package_info.get('publisher') parties = [] if publisher: parties.append( models.Party( type=models.party_org, role='publisher', name=publisher, )) file_references = [] install_location = package_info.get('install_location') if install_location: file_references.append(models.FileReference(path=install_location)) display_icon = package_info.get('display_icon') if display_icon: file_references.append(models.FileReference(path=display_icon)) uninstall_string = package_info.get('uninstall_string') if uninstall_string: file_references.append(models.FileReference(path=uninstall_string)) yield models.PackageData( datasource_id=datasource_id, type=package_type, name=name, version=version, parties=parties, homepage_url=homepage_url, file_references=file_references, )