def set_packages_root(resource, codebase):
    """
    Set the root_path attribute as the path to the root Resource for a given
    package package or build script that may exist in a `resource`.
    """
    # only files can have a package
    if not resource.is_file:
        return

    package_manifests = resource.package_manifests
    if not package_manifests:
        return
    # NOTE: we are dealing with a single file therefore there should be only be
    # a single package detected. But some package manifests can document more
    # than one package at a time such as multiple arches/platforms for a gempsec
    # or multiple sub package (with "%package") in an RPM .spec file.

    modified = False
    for package_manifest in package_manifests:
        package_instance = get_package_instance(package_manifest)
        package_root = package_instance.get_package_root(resource, codebase)
        if not package_root:
            # this can happen if we scan a single resource that is a package package
            continue
        # What if the target resource (e.g. a parent) is the root and we are in stripped root mode?
        if package_root.is_root and codebase.strip_root:
            continue
        package_manifest['root_path'] = package_root.path
        modified = True

    if modified:
        # we did set the root_path
        codebase.save_resource(resource)
    return resource
Exemple #2
0
 def test_get_package_resources_on_nested_packages_should_include_manifest(self):
     from packagedcode import get_package_instance
     from scancode.resource import VirtualCodebase
     scan_loc = self.get_test_loc('plugin_consolidate/nested-npm-packages.json')
     codebase = VirtualCodebase(scan_loc)
     for resource in codebase.walk():
         for package_data in resource.packages:
             package = get_package_instance(package_data)
             package_resources = list(package.get_package_resources(resource, codebase))
             assert any(r.name == 'package.json' for r in package_resources), resource.path
Exemple #3
0
def create_discovered_packages(project, scanned_codebase):
    """
    Save the packages of a ScanCode `scanned_codebase` scancode.resource.Codebase
    object to the DB as DiscoveredPackage of `project`.
    Relate package resources to CodebaseResource.
    """
    for scanned_resource in scanned_codebase.walk(skip_root=True):
        scanned_packages = getattr(scanned_resource, "packages", [])
        if not scanned_packages:
            continue

        scanned_resource_path = scanned_resource.get_path(strip_root=True)
        cbr = CodebaseResource.objects.get(project=project, path=scanned_resource_path)

        for scan_data in scanned_packages:
            discovered_package = pipes.update_or_create_package(project, scan_data)
            set_codebase_resource_for_package(
                codebase_resource=cbr, discovered_package=discovered_package
            )

            scanned_package = packagedcode.get_package_instance(scan_data)
            # Set all the resource attached to that package
            scanned_package_resources = scanned_package.get_package_resources(
                scanned_resource, scanned_codebase
            )
            for scanned_package_res in scanned_package_resources:
                package_cbr = CodebaseResource.objects.get(
                    project=project, path=scanned_package_res.get_path(strip_root=True)
                )
                set_codebase_resource_for_package(
                    codebase_resource=package_cbr, discovered_package=discovered_package
                )

            # also set dependencies as their own packages
            # TODO: we should instead relate these to the package
            # TODO: we likely need a status for DiscoveredPackage
            dependencies = scanned_package.dependencies or []
            for dependency in dependencies:
                # FIXME: we should get DependentPackage instances and not a mapping
                purl = getattr(dependency, "purl", None)
                if not purl:
                    # TODO: we should log that
                    continue
                purl = PackageURL.from_string(purl)
                dep = purl.to_dict()
                dependent_package = pipes.update_or_create_package(project, dep)

                # attached to the current resource (typically a manifest?)
                set_codebase_resource_for_package(
                    codebase_resource=cbr, discovered_package=dependent_package
                )
Exemple #4
0
 def test_get_package_resources_on_nested_packages_should_include_manifest(
         self):
     from packagedcode import get_package_instance
     from commoncode.resource import VirtualCodebase
     scan_file = self.get_scan('plugin_consolidate/nested-packages',
                               cli_options='-p')
     codebase = VirtualCodebase(scan_file)
     for resource in codebase.walk():
         for package_data in resource.package_manifests:
             package = get_package_instance(package_data)
             package_resources = list(
                 package.get_package_resources(resource, codebase))
             assert any(r.name == 'package.json'
                        for r in package_resources), resource.path
Exemple #5
0
 def test_Package_get_package_resource_works_with_nested_packages_and_ignores(
         self):
     from packagedcode import get_package_instance
     from packagedcode import npm
     from commoncode.resource import VirtualCodebase
     scan_loc = self.get_test_loc('models/nested-npm-packages.json')
     codebase = VirtualCodebase(scan_loc)
     for resource in codebase.walk():
         for package_data in resource.packages:
             package = get_package_instance(package_data)
             assert isinstance(package, npm.NpmPackage)
             package_resources = list(
                 package.get_package_resources(resource, codebase))
             assert any(r.name == 'package.json'
                        for r in package_resources), resource.path
Exemple #6
0
def get_consolidated_packages(codebase):
    """
    Yield a ConsolidatedPackage for each detected package in the codebase
    """
    for resource in codebase.walk(topdown=False):
        for package_data in resource.packages:
            package = get_package_instance(package_data)
            package_root = package.get_package_root(resource, codebase)
            package_root.extra_data['package_root'] = True
            package_root.save(codebase)
            is_build_file = isinstance(package, BaseBuildManifestPackage)
            package_resources = list(
                package.get_package_resources(package_root, codebase))
            package_license_expression = package.license_expression
            package_copyright = package.copyright

            package_holders = []
            if package_copyright:
                numbered_lines = [(0, package_copyright)]
                for _, holder, _, _ in CopyrightDetector().detect(
                        numbered_lines,
                        copyrights=False,
                        holders=True,
                        authors=False,
                        include_years=False):
                    package_holders.append(holder)
            package_holders = process_holders(package_holders)

            discovered_license_expressions = []
            discovered_holders = []
            for package_resource in package_resources:
                if not is_build_file:
                    # If a resource is part of a package Component, then it cannot be part of any other type of Component
                    package_resource.extra_data['in_package_component'] = True
                    package_resource.save(codebase)
                if package_resource.license_expressions:
                    package_resource_license_expression = combine_expressions(
                        package_resource.license_expressions)
                    if package_resource_license_expression:
                        discovered_license_expressions.append(
                            package_resource_license_expression)
                if package_resource.holders:
                    discovered_holders.extend(
                        h.get('value') for h in package_resource.holders)
            discovered_holders = process_holders(discovered_holders)

            combined_discovered_license_expression = combine_expressions(
                discovered_license_expressions)
            if combined_discovered_license_expression:
                simplified_discovered_license_expression = str(
                    Licensing().parse(
                        combined_discovered_license_expression).simplify())
            else:
                simplified_discovered_license_expression = None

            c = Consolidation(
                core_license_expression=package_license_expression,
                # Sort holders by holder key
                core_holders=[
                    h for h, _ in sorted(copyright_summary.cluster(
                        package_holders),
                                         key=lambda t: t[0].key)
                ],
                other_license_expression=
                simplified_discovered_license_expression,
                # Sort holders by holder key
                other_holders=[
                    h for h, _ in sorted(copyright_summary.cluster(
                        discovered_holders),
                                         key=lambda t: t[0].key)
                ],
                files_count=len([
                    package_resource for package_resource in package_resources
                    if package_resource.is_file
                ]),
                resources=package_resources,
            )
            if is_build_file:
                c.identifier = package.name
                yield ConsolidatedComponent(type='build', consolidation=c)
            else:
                yield ConsolidatedPackage(package=package, consolidation=c)
Exemple #7
0
def get_consolidated_packages(codebase):
    """
    Yield a ConsolidatedPackage for each detected package in the codebase
    """
    for resource in codebase.walk(topdown=False):
        for package_data in resource.packages:
            package = get_package_instance(package_data)
            is_build_file = isinstance(package, BaseBuildManifestPackage)
            package_resources = list(package.get_package_resources(resource, codebase))
            package_license_expression = package.license_expression
            package_copyright = package.copyright

            package_holders = []
            if package_copyright:
                numbered_lines = [(0, package_copyright)]
                for _, holder, _, _ in CopyrightDetector().detect(numbered_lines,
                        copyrights=False, holders=True, authors=False, include_years=False):
                    package_holders.append(holder)

            discovered_license_expressions = []
            discovered_holders = []
            for package_resource in package_resources:
                if not is_build_file:
                    # If a resource is part of a package Component, then it cannot be part of any other type of Component
                    package_resource.extra_data['in_package_component'] = True
                    package_resource.save(codebase)

                package_resource_license_expression = combine_expressions(package_resource.license_expressions)
                package_resource_holders = package_resource.holders
                if not package_resource_license_expression and not package_resource_holders:
                    continue
                discovered_license_expressions.append(package_resource_license_expression)
                discovered_holders.extend(h.get('value') for h in package_resource_holders)

            # Remove NoneTypes from discovered licenses
            discovered_license_expressions = [lic for lic in discovered_license_expressions if lic]
            # Remove NoneTypes from discovered holders
            discovered_holders = [holder for holder in discovered_holders if holder]

            combined_discovered_license_expression = combine_expressions(discovered_license_expressions)
            if combined_discovered_license_expression:
                simplified_discovered_license_expression = str(Licensing().parse(combined_discovered_license_expression).simplify())
            else:
                simplified_discovered_license_expression = None

            c = Consolidation(
                core_license_expression=package_license_expression,
                core_holders=sorted(set(package_holders)),
                other_license_expression=simplified_discovered_license_expression,
                other_holders=sorted(set(discovered_holders)),
                files_count=sum(1 for package_resource in package_resources if package_resource.is_file),
                resources=package_resources,
            )
            if is_build_file:
                c.identifier = package.name
                yield ConsolidatedComponent(
                    type='build',
                    consolidation=c
                )
            else:
                yield ConsolidatedPackage(
                    package=package,
                    consolidation=c
                )