def create_discovered_packages(project, scanned_codebase): """ Save the packages of a ScanCode `scanned_codebase` scancode.resource.Codebase object to the DB as DiscoveredPackage of `project`. Relate package resources to CodebaseResource. """ for scanned_resource in scanned_codebase.walk(skip_root=True): scanned_packages = getattr(scanned_resource, "packages", []) if not scanned_packages: continue scanned_resource_path = scanned_resource.get_path(strip_root=True) cbr = CodebaseResource.objects.get(project=project, path=scanned_resource_path) for scan_data in scanned_packages: discovered_package = pipes.update_or_create_package(project, scan_data) set_codebase_resource_for_package( codebase_resource=cbr, discovered_package=discovered_package ) scanned_package = packagedcode.get_package_instance(scan_data) # Set all the resource attached to that package scanned_package_resources = scanned_package.get_package_resources( scanned_resource, scanned_codebase ) for scanned_package_res in scanned_package_resources: package_cbr = CodebaseResource.objects.get( project=project, path=scanned_package_res.get_path(strip_root=True) ) set_codebase_resource_for_package( codebase_resource=package_cbr, discovered_package=discovered_package ) # also set dependencies as their own packages # TODO: we should instead relate these to the package # TODO: we likely need a status for DiscoveredPackage dependencies = scanned_package.dependencies or [] for dependency in dependencies: # FIXME: we should get DependentPackage instances and not a mapping purl = getattr(dependency, "purl", None) if not purl: # TODO: we should log that continue purl = PackageURL.from_string(purl) dep = purl.to_dict() dependent_package = pipes.update_or_create_package(project, dep) # attached to the current resource (typically a manifest?) set_codebase_resource_for_package( codebase_resource=cbr, discovered_package=dependent_package )
def scan_rootfs_for_system_packages(project, rootfs, detect_licenses=True): """ Given a `project` Project and an `rootfs` RootFs, scan the `rootfs` for installed system packages. Create a DiscoveredPackage for each. Then for each installed DiscoveredPackage installed file, check if it exists as a CodebaseResource and relate that CodebaseResource to its DiscoveredPackage or keep that as a missing file. """ if not rootfs.distro: raise DistroNotFound(f"Distro not found.") distro_id = rootfs.distro.identifier if distro_id not in PACKAGE_GETTER_BY_DISTRO: raise DistroNotSupported(f'Distro "{distro_id}" is not supported.') package_getter = partial( PACKAGE_GETTER_BY_DISTRO[distro_id], distro=distro_id, detect_licenses=detect_licenses, ) installed_packages = rootfs.get_installed_packages(package_getter) for i, (purl, package) in enumerate(installed_packages): logger.info(f"Creating package #{i}: {purl}") created_package = pipes.update_or_create_package(project, package.to_dict()) # We have no files for this installed package, we cannot go further. if not package.installed_files: logger.info(f" No installed_files for: {purl}") continue missing_resources = created_package.missing_resources[:] modified_resources = created_package.modified_resources[:] codebase_resources = project.codebaseresources.all() for install_file in package.installed_files: rootfs_path = pipes.normalize_path(install_file.path) logger.info(f" installed file rootfs_path: {rootfs_path}") try: codebase_resource = codebase_resources.get( rootfs_path=rootfs_path, ) except ObjectDoesNotExist: if rootfs_path not in missing_resources: missing_resources.append(rootfs_path) logger.info(f" installed file is missing: {rootfs_path}") continue # id list? if created_package not in codebase_resource.discovered_packages.all(): codebase_resource.discovered_packages.add(created_package) codebase_resource.status = "system-package" logger.info(f" added as system-package to: {purl}") codebase_resource.save() if ( ( install_file.sha512 and codebase_resource.sha512 and codebase_resource.sha512 != install_file.sha512 ) or ( install_file.sha256 and codebase_resource.sha256 and codebase_resource.sha256 != install_file.sha256 ) or ( install_file.sha1 and codebase_resource.sha1 and codebase_resource.sha1 != install_file.sha1 ) or ( install_file.md5 and codebase_resource.md5 and codebase_resource.md5 != install_file.md5 ) ): # Alpine uses SHA1 while Debian uses MD5, we prefer te strongest # hash that's present if install_file.path not in modified_resources: modified_resources.append(install_file.path) created_package.missing_resources = missing_resources created_package.modified_resources = modified_resources created_package.save()
def scan_image_for_system_packages(project, image, detect_licenses=True): """ Given a `project` and an `image`, scan the `image` layer by layer for installed system packages. Create a DiscoveredPackage for each. Then for each installed DiscoveredPackage installed file, check if it exists as a CodebaseResource and relate that CodebaseResource to its DiscoveredPackage or keep that as a missing file. """ if not image.distro: raise rootfs.DistroNotFound(f"Distro not found.") distro_id = image.distro.identifier if distro_id not in rootfs.PACKAGE_GETTER_BY_DISTRO: raise rootfs.DistroNotSupported( f'Distro "{distro_id}" is not supported.') package_getter = partial( rootfs.PACKAGE_GETTER_BY_DISTRO[distro_id], distro=distro_id, detect_licenses=detect_licenses, ) installed_packages = image.get_installed_packages(package_getter) for i, (purl, package, layer) in enumerate(installed_packages): logger.info(f"Creating package #{i}: {purl}") created_package = pipes.update_or_create_package( project, package.to_dict()) # We have no files for this installed package, we cannot go further. if not package.installed_files: logger.info(f" No installed_files for: {purl}") continue missing_resources = created_package.missing_resources[:] modified_resources = created_package.modified_resources[:] codebase_resources = CodebaseResource.objects.project(project) for install_file in package.installed_files: install_file_path = pipes.normalize_path(install_file.path) layer_rootfs_path = posixpath.join( layer.layer_id, install_file_path.strip("/"), ) logger.info(f" installed file rootfs_path: {install_file_path}") logger.info(f" layer rootfs_path: {layer_rootfs_path}") cbr_qs = codebase_resources.filter( path__endswith=layer_rootfs_path, rootfs_path=install_file_path, ) found_res = False for codebase_resource in cbr_qs: found_res = True if created_package not in codebase_resource.discovered_packages.all( ): codebase_resource.discovered_packages.add(created_package) codebase_resource.status = "system-package" logger.info(f" added as system-package to: {purl}") codebase_resource.save() if ((install_file.sha512 and codebase_resource.sha512 and codebase_resource.sha512 != install_file.sha512) or (install_file.sha256 and codebase_resource.sha256 and codebase_resource.sha256 != install_file.sha256) or (install_file.sha1 and codebase_resource.sha1 and codebase_resource.sha1 != install_file.sha1) or (install_file.md5 and codebase_resource.md5 and codebase_resource.md5 != install_file.md5)): # Alpine uses SHA1 while Debian uses MD5, we prefer te strongest # hash that's present if install_file.path not in modified_resources: modified_resources.append(install_file.path) if not found_res and install_file_path not in missing_resources: missing_resources.append(install_file_path) logger.info( f" installed file is missing: {install_file_path}") created_package.missing_resources = missing_resources created_package.modified_resources = modified_resources created_package.save()