Example #1
0
def create_discovered_packages(project, scanned_codebase):
    """
    Save the packages of a ScanCode `scanned_codebase` scancode.resource.Codebase
    object to the DB as DiscoveredPackage of `project`.
    Relate package resources to CodebaseResource.
    """
    for scanned_resource in scanned_codebase.walk(skip_root=True):
        scanned_packages = getattr(scanned_resource, "packages", [])
        if not scanned_packages:
            continue

        scanned_resource_path = scanned_resource.get_path(strip_root=True)
        cbr = CodebaseResource.objects.get(project=project, path=scanned_resource_path)

        for scan_data in scanned_packages:
            discovered_package = pipes.update_or_create_package(project, scan_data)
            set_codebase_resource_for_package(
                codebase_resource=cbr, discovered_package=discovered_package
            )

            scanned_package = packagedcode.get_package_instance(scan_data)
            # Set all the resource attached to that package
            scanned_package_resources = scanned_package.get_package_resources(
                scanned_resource, scanned_codebase
            )
            for scanned_package_res in scanned_package_resources:
                package_cbr = CodebaseResource.objects.get(
                    project=project, path=scanned_package_res.get_path(strip_root=True)
                )
                set_codebase_resource_for_package(
                    codebase_resource=package_cbr, discovered_package=discovered_package
                )

            # also set dependencies as their own packages
            # TODO: we should instead relate these to the package
            # TODO: we likely need a status for DiscoveredPackage
            dependencies = scanned_package.dependencies or []
            for dependency in dependencies:
                # FIXME: we should get DependentPackage instances and not a mapping
                purl = getattr(dependency, "purl", None)
                if not purl:
                    # TODO: we should log that
                    continue
                purl = PackageURL.from_string(purl)
                dep = purl.to_dict()
                dependent_package = pipes.update_or_create_package(project, dep)

                # attached to the current resource (typically a manifest?)
                set_codebase_resource_for_package(
                    codebase_resource=cbr, discovered_package=dependent_package
                )
Example #2
0
def scan_rootfs_for_system_packages(project, rootfs, detect_licenses=True):
    """
    Given a `project` Project and an `rootfs` RootFs, scan the `rootfs` for
    installed system packages. Create a DiscoveredPackage for each.

    Then for each installed DiscoveredPackage installed file, check if it exists
    as a CodebaseResource and relate that CodebaseResource to its
    DiscoveredPackage or keep that as a missing file.
    """
    if not rootfs.distro:
        raise DistroNotFound(f"Distro not found.")

    distro_id = rootfs.distro.identifier
    if distro_id not in PACKAGE_GETTER_BY_DISTRO:
        raise DistroNotSupported(f'Distro "{distro_id}" is not supported.')

    package_getter = partial(
        PACKAGE_GETTER_BY_DISTRO[distro_id],
        distro=distro_id,
        detect_licenses=detect_licenses,
    )

    installed_packages = rootfs.get_installed_packages(package_getter)

    for i, (purl, package) in enumerate(installed_packages):
        logger.info(f"Creating package #{i}: {purl}")
        created_package = pipes.update_or_create_package(project, package.to_dict())

        # We have no files for this installed package, we cannot go further.
        if not package.installed_files:
            logger.info(f"  No installed_files for: {purl}")
            continue

        missing_resources = created_package.missing_resources[:]
        modified_resources = created_package.modified_resources[:]

        codebase_resources = project.codebaseresources.all()

        for install_file in package.installed_files:
            rootfs_path = pipes.normalize_path(install_file.path)
            logger.info(f"   installed file rootfs_path: {rootfs_path}")

            try:
                codebase_resource = codebase_resources.get(
                    rootfs_path=rootfs_path,
                )
            except ObjectDoesNotExist:
                if rootfs_path not in missing_resources:
                    missing_resources.append(rootfs_path)
                logger.info(f"      installed file is missing: {rootfs_path}")
                continue

            # id list?
            if created_package not in codebase_resource.discovered_packages.all():
                codebase_resource.discovered_packages.add(created_package)
                codebase_resource.status = "system-package"
                logger.info(f"      added as system-package to: {purl}")
                codebase_resource.save()

            if (
                (
                    install_file.sha512
                    and codebase_resource.sha512
                    and codebase_resource.sha512 != install_file.sha512
                )
                or (
                    install_file.sha256
                    and codebase_resource.sha256
                    and codebase_resource.sha256 != install_file.sha256
                )
                or (
                    install_file.sha1
                    and codebase_resource.sha1
                    and codebase_resource.sha1 != install_file.sha1
                )
                or (
                    install_file.md5
                    and codebase_resource.md5
                    and codebase_resource.md5 != install_file.md5
                )
            ):
                # Alpine uses SHA1 while Debian uses MD5, we prefer te strongest
                # hash that's present
                if install_file.path not in modified_resources:
                    modified_resources.append(install_file.path)

        created_package.missing_resources = missing_resources
        created_package.modified_resources = modified_resources
        created_package.save()
Example #3
0
def scan_image_for_system_packages(project, image, detect_licenses=True):
    """
    Given a `project` and an `image`, scan the `image` layer by layer for
    installed system packages. Create a DiscoveredPackage for each.

    Then for each installed DiscoveredPackage installed file, check if it exists
    as a CodebaseResource and relate that CodebaseResource to its
    DiscoveredPackage or keep that as a missing file.
    """
    if not image.distro:
        raise rootfs.DistroNotFound(f"Distro not found.")

    distro_id = image.distro.identifier
    if distro_id not in rootfs.PACKAGE_GETTER_BY_DISTRO:
        raise rootfs.DistroNotSupported(
            f'Distro "{distro_id}" is not supported.')

    package_getter = partial(
        rootfs.PACKAGE_GETTER_BY_DISTRO[distro_id],
        distro=distro_id,
        detect_licenses=detect_licenses,
    )

    installed_packages = image.get_installed_packages(package_getter)

    for i, (purl, package, layer) in enumerate(installed_packages):
        logger.info(f"Creating package #{i}: {purl}")
        created_package = pipes.update_or_create_package(
            project, package.to_dict())

        # We have no files for this installed package, we cannot go further.
        if not package.installed_files:
            logger.info(f"  No installed_files for: {purl}")
            continue

        missing_resources = created_package.missing_resources[:]
        modified_resources = created_package.modified_resources[:]

        codebase_resources = CodebaseResource.objects.project(project)

        for install_file in package.installed_files:
            install_file_path = pipes.normalize_path(install_file.path)
            layer_rootfs_path = posixpath.join(
                layer.layer_id,
                install_file_path.strip("/"),
            )
            logger.info(f"   installed file rootfs_path: {install_file_path}")
            logger.info(f"   layer rootfs_path: {layer_rootfs_path}")
            cbr_qs = codebase_resources.filter(
                path__endswith=layer_rootfs_path,
                rootfs_path=install_file_path,
            )
            found_res = False
            for codebase_resource in cbr_qs:
                found_res = True
                if created_package not in codebase_resource.discovered_packages.all(
                ):
                    codebase_resource.discovered_packages.add(created_package)
                    codebase_resource.status = "system-package"
                    logger.info(f"      added as system-package to: {purl}")
                    codebase_resource.save()

                if ((install_file.sha512 and codebase_resource.sha512
                     and codebase_resource.sha512 != install_file.sha512) or
                    (install_file.sha256 and codebase_resource.sha256
                     and codebase_resource.sha256 != install_file.sha256)
                        or (install_file.sha1 and codebase_resource.sha1
                            and codebase_resource.sha1 != install_file.sha1)
                        or (install_file.md5 and codebase_resource.md5
                            and codebase_resource.md5 != install_file.md5)):
                    # Alpine uses SHA1 while Debian uses MD5, we prefer te strongest
                    # hash that's present
                    if install_file.path not in modified_resources:
                        modified_resources.append(install_file.path)

            if not found_res and install_file_path not in missing_resources:
                missing_resources.append(install_file_path)
                logger.info(
                    f"      installed file is missing: {install_file_path}")

        created_package.missing_resources = missing_resources
        created_package.modified_resources = modified_resources
        created_package.save()