Beispiel #1
0
def compute_normalized_license(declared_license):
    """
    Return a detected license expression from a declared license mapping.
    """
    if not declared_license:
        return

    detected_licenses = []

    for license_declaration in declared_license:
        # 1. try detection on the value of name if not empty and keep this
        name = license_declaration.get('name')
        via_name = models.compute_normalized_license(name)

        # 2. try detection on the value of url  if not empty and keep this
        url = license_declaration.get('url')
        via_url = models.compute_normalized_license(url)

        # 3. try detection on the value of comment  if not empty and keep this
        comments = license_declaration.get('comments')
        via_comments = models.compute_normalized_license(comments)

        if via_name:
            # The name should have precedence and any unknowns
            # in url and comment should be ignored.
            if via_url == 'unknown':
                via_url = None
            if via_comments == 'unknown':
                via_comments = None

        # Check the three detections to decide which license to keep
        name_and_url = via_name == via_url
        name_and_comment = via_name == via_comments
        all_same = name_and_url and name_and_comment

        if via_name:
            if all_same:
                detected_licenses.append(via_name)

            # name and (url or comment) are same
            elif name_and_url and not via_comments:
                detected_licenses.append(via_name)
            elif name_and_comment and not via_url:
                detected_licenses.append(via_name)

            else:
                # we have some non-unknown license detected in url or comment
                detections = via_name, via_url, via_comments
                detections = [l for l in detections if l]
                if detections:
                    combined_expression = combine_expressions(detections)
                    if combined_expression:
                        detected_licenses.append(combined_expression)
        elif via_url:
            detected_licenses.append(via_url)
        elif via_comments:
            detected_licenses.append(via_comments)

    if detected_licenses:
        return combine_expressions(detected_licenses)
Beispiel #2
0
def compute_normalized_license(declared_license):
    """
    Return a normalized license expression string detected from a list of
    declared license items.
    """
    if not declared_license:
        return

    detected_licenses = []

    for declared in declared_license:
        if isinstance(declared, str):
            detected_license = models.compute_normalized_license(declared)
            if detected_license:
                detected_licenses.append(detected_license)

        elif isinstance(declared, dict):
            # 1. try detection on the value of type if not empty and keep this
            ltype = declared.get('type')
            via_type = models.compute_normalized_license(ltype)

            # 2. try detection on the value of url  if not empty and keep this
            url = declared.get('url')
            via_url = models.compute_normalized_license(url)

            if via_type:
                # The type should have precedence and any unknowns
                # in url should be ignored.
                # TODO: find a better way to detect unknown licenses
                if via_url in (
                        'unknown',
                        'unknwon-license-reference',
                ):
                    via_url = None

            if via_type:
                if via_type == via_url:
                    detected_licenses.append(via_type)
                else:
                    if not via_url:
                        detected_licenses.append(via_type)
                    else:
                        combined_expression = combine_expressions(
                            [via_type, via_url])
                        detected_licenses.append(combined_expression)
            elif via_url:
                detected_licenses.append(via_url)

    if detected_licenses:
        return combine_expressions(detected_licenses)
Beispiel #3
0
def compute_normalized_license(declared_license):
    """
    Return a normalized license expression string detected from a list of
    declared license items or an ordered dict.
    """
    if not declared_license:
        return

    licenses = declared_license.get('licenses')
    if not licenses:
        return

    license_logic = declared_license.get('licenselogic')
    relation = 'AND'
    if license_logic:
        if license_logic == 'or' or license_logic == 'dual':
            relation = 'OR'

    detected_licenses = []
    for declared in licenses:
        detected_license = models.compute_normalized_license(declared)
        if detected_license:
            detected_licenses.append(detected_license)

    if detected_licenses:
        return combine_expressions(detected_licenses, relation)
Beispiel #4
0
    def compute_normalized_license(cls, package):
        """
        Return a normalized license expression string or None detected from a ``package`` Package
        declared license items or an ordered dict.
        """
        declared_license = package.declared_license
        if not declared_license:
            return

        if not isinstance(declared_license, dict):
            return models.compute_normalized_license(
                declared_license=declared_license)

        licenses = declared_license.get('licenses')
        if not licenses:
            return

        license_logic = declared_license.get('licenselogic')
        # the default in FreebSD expressions is AND
        relation = 'AND'
        if license_logic:
            if license_logic == 'or' or license_logic == 'dual':
                relation = 'OR'

        detected_licenses = []
        for lic in licenses:
            detected = models.compute_normalized_license(declared_license=lic)
            if detected:
                detected_licenses.append(detected)

        if detected_licenses:
            return combine_expressions(expressions=detected_licenses,
                                       relation=relation)
Beispiel #5
0
def compute_normalized_license(package, resource, codebase):
    """
    Return a normalized license expression string detected from a list of
    declared license items.
    """
    declared_licenses = package.declared_license
    if not declared_licenses:
        return

    declared_licenses = set(declared_licenses)

    license_expressions = []

    parent = resource.parent(codebase)
    # FIXME: we should be able to get the path relatively to the ABOUT file resource
    for child in parent.children(codebase):
        if child.name in declared_licenses:
            licenses = get_licenses(child.location)
            if not licenses:
                license_expressions.append('unknown')
            else:
                license_expressions.extend(
                    licenses.get('license_expressions', []))

    return combine_expressions(license_expressions)
Beispiel #6
0
def compute_normalized_license(declared_license):
    """
    Return a normalized license expression string detected from a list of
    declared license items.
    """
    if not declared_license:
        return

    detected_licenses = []

    for value in declared_license.values():
        if not value:
            continue
        # The value could be a string or a list
        if isinstance(value, string_types):
            detected_license = models.compute_normalized_license(value)
            if detected_license:
                detected_licenses.append(detected_license)
        else:
            for declared in value:
                detected_license = models.compute_normalized_license(declared)
                if detected_license:
                    detected_licenses.append(detected_license)

    if detected_licenses:
        return combine_expressions(detected_licenses)
Beispiel #7
0
def _queryset_to_xlsx_worksheet(queryset, workbook, exclude_fields=None):
    multivalues_separator = "\n"

    model_class = queryset.model
    model_name = model_class._meta.model_name

    fieldnames = get_serializer_fields(model_class)
    exclude_fields = exclude_fields or []
    fieldnames = [field for field in fieldnames if field not in exclude_fields]

    worksheet = workbook.add_worksheet(model_name)
    worksheet.write_row(row=0, col=0, data=fieldnames)

    for row_index, record in enumerate(queryset.iterator(), start=1):
        for col_index, field in enumerate(fieldnames):
            value = getattr(record, field)
            if not value:
                continue
            elif field == "license_expressions":
                value = combine_expressions(value)
            elif isinstance(value, list):
                value = [
                    list(entry.values())[0]
                    if isinstance(entry, dict) else str(entry)
                    for entry in value
                ]
                value = multivalues_separator.join(ordered_unique(value))
            elif isinstance(value, dict):
                value = json.dumps(value) if value else ""

            worksheet.write_string(row_index, col_index, str(value))
Beispiel #8
0
def create_consolidated_components(resource, codebase, holder_key):
    """
    Yield ConsolidatedComponents for every holder-grouped set of RIDs for a
    given resource and holder key
    """
    license_expressions = []
    holder = None
    resources = []
    for r in resource.walk(codebase):
        for normalized_holder in r.extra_data.get('normalized_holders', []):
            if not (normalized_holder.key == holder_key):
                continue
            normalized_license_expression = r.extra_data.get(
                'normalized_license_expression')
            if normalized_license_expression:
                license_expressions.append(normalized_license_expression)
            if not holder:
                holder = normalized_holder
            resources.append(r)

    # We add the current directory Resource we are currently at to the set
    # of resources that have this particular key
    resources.append(resource)
    resource.extra_data['majority'] = True
    resource.save(codebase)
    core_license_expression = combine_expressions(license_expressions)
    if core_license_expression is not None:
        core_license_expression = str(core_license_expression)
    c = Consolidation(
        core_license_expression=core_license_expression,
        core_holders=[holder],
        files_count=len([r for r in resources if r.is_file]),
        resources=resources,
    )
    yield ConsolidatedComponent(type='holders', consolidation=c)
Beispiel #9
0
    def to_dict(self, **kwargs):
        def dict_fields(attr, value):
            if attr.name in ('resources', ):
                return False
            return True

        license_expressions_to_combine = []
        if self.core_license_expression:
            license_expressions_to_combine.append(self.core_license_expression)
        if self.other_license_expression:
            license_expressions_to_combine.append(
                self.other_license_expression)
        if license_expressions_to_combine:
            combined_license_expression = combine_expressions(
                license_expressions_to_combine)
            if combined_license_expression:
                self.consolidated_license_expression = str(
                    Licensing().parse(combined_license_expression).simplify())
        self.core_holders = [h.original for h in self.core_holders]
        self.other_holders = [h.original for h in self.other_holders]
        self.consolidated_holders = sorted(
            set(self.core_holders + self.other_holders))
        # TODO: Verify and test that we are generating detectable copyrights
        self.consolidated_copyright = 'Copyright (c) {}'.format(', '.join(
            self.consolidated_holders))
        return attr.asdict(self, filter=dict_fields, dict_factory=dict)
Beispiel #10
0
def compute_normalized_license(declared_license):
    """
    Return a normalized license expression string detected from a list of
    declared license items or string type.
    """
    if not declared_license:
        return

    detected_licenses = []

    if isinstance(declared_license, str):
        if declared_license == 'proprietary':
            return declared_license
        if '(' in declared_license and ')' in declared_license and ' or ' in declared_license:
            declared_license = declared_license.strip().rstrip(')').lstrip('(')
            declared_license = declared_license.split(' or ')
        else:
            return models.compute_normalized_license(declared_license)

    if isinstance(declared_license, list):
        for declared in declared_license:
            detected_license = models.compute_normalized_license(declared)
            detected_licenses.append(detected_license)
    else:
        declared_license = repr(declared_license)
        detected_license = models.compute_normalized_license(declared_license)

    if detected_licenses:
        # build a proper license expression: the defaultfor composer is OR
        return combine_expressions(detected_licenses, 'OR')
def get_license_expression_from_matches(license_matches):
    """
    Craft a license expression from a list of LicenseMatch objects.
    """
    from packagedcode.utils import combine_expressions

    license_expressions = [
        match.rule.license_expression for match in license_matches
    ]
    return str(combine_expressions(license_expressions, unique=False))
    def compute_normalized_license(self):
        """
        Return a normalized license expression string detected from a list of
        declared license strings.
        """
        if not self.declared_license:
            return

        detected_licenses = []
        for declared in self.declared_license:
            detected_license = models.compute_normalized_license(declared)
            detected_licenses.append(detected_license)

        if detected_licenses:
            return combine_expressions(detected_licenses)
Beispiel #13
0
def detect_license_in_unstructured_text(location):
    """
    Return a detected license expression string from a file at `location`
    """
    from scancode.api import get_licenses
    detected = get_licenses(location)
    if not detected:
        # we have no match: return an unknown key
        return 'unknown'

    detected_expressions = detected['license_expressions']

    if TRACE:
        logger_debug(
            'detect_license_in_unstructured_text: detected_expressions:',
            detected_expressions)

    return combine_expressions(detected_expressions)
Beispiel #14
0
def compute_bower_normalized_license(declared_license):
    """
    Return a normalized license expression string detected from a list of
    declared license strings.
    """
    if not declared_license:
        return

    detected_licenses = []

    for declared in declared_license:
        detected_license = models.compute_normalized_license(declared)
        if detected_license:
            detected_licenses.append(detected_license)
        else:
            detected_licenses.append('unknown')

    if detected_licenses:
        return combine_expressions(detected_licenses)
Beispiel #15
0
    def compute_normalized_license(cls, package):
        declared_license = package.declared_license
        if not declared_license:
            return

        if isinstance(declared_license, (
                list,
                tuple,
        )):
            detected_licenses = [
                models.compute_normalized_license(declared)
                for declared in declared_license
            ]

            if detected_licenses:
                return combine_expressions(detected_licenses)

        if isinstance(declared_license, str):
            return models.compute_normalized_license(declared_license)
Beispiel #16
0
    def compute_normalized_license(self):
        """
        Return a normalized license expression string detected from a list of
        declared license items.
        """
        declared_license = self.declared_license
        manifest_parent_path = self.root_path

        if not declared_license or not manifest_parent_path:
            return

        license_expressions = []
        for license_file in declared_license:
            license_file_path = os.path.join(manifest_parent_path, license_file)
            if os.path.exists(license_file_path) and os.path.isfile(license_file_path):
                licenses = get_licenses(license_file_path)
                license_expressions.extend(licenses.get('license_expressions', []))

        return combine_expressions(license_expressions)
Beispiel #17
0
def get_consolidated_component_resources(resource, codebase):
    """
    Return a list of resources to be used to create a ConsolidatedComponent from `resource`
    """
    license_expression = resource.extra_data.get('origin_summary_license_expression')
    holders = resource.extra_data.get('origin_summary_holders')
    if not license_expression and holders:
        return
    resources = [] if resource.extra_data.get('in_package_component') else [resource]
    for r in resource.walk(codebase, topdown=False):
        if r.extra_data.get('in_package_component'):
            continue
        resource_holders = tuple(h.get('value') for h in r.holders)
        if ((r.is_file
                and combine_expressions(r.license_expressions) == license_expression
                and resource_holders == holders)
                or (r.is_dir
                and r.extra_data.get('origin_summary_license_expression', '') == license_expression
                and r.extra_data.get('origin_summary_holders', tuple()) == holders)):
            resources.append(r)
    return resources
Beispiel #18
0
def compute_normalized_license(declared_license, location=None):
    """
    Return a normalized license expression string detected from a list of
    declared license items.

    The specification for pub demands to have a LICENSE file side-by-side and
    nothing else. See https://dart.dev/tools/pub/publishing#preparing-to-publish
    """
    # FIXME: we need a location to find the FILE file
    # Approach:
    # Find the LICENSE file
    # detect on the text
    # combine all expressions

    if not declared_license:
        return

    detected_licenses = []

    if detected_licenses:
        return combine_expressions(detected_licenses)
Beispiel #19
0
def compute_normalized_license(declared_license):
    """
    Return a normalized license expression string detected from a mapping or list of
    declared license items.
    """
    if not declared_license:
        return

    if isinstance(declared_license, dict):
        values = list(declared_license.values())
    elif isinstance(declared_license, list):
        values = list(declared_license)
    elif isinstance(declared_license, (
            str,
            unicode,
    )):
        values = [declared_license]
    else:
        return

    detected_licenses = []

    for value in values:
        if not value:
            continue
        # The value could be a string or a list
        if isinstance(value, string_types):
            detected_license = models.compute_normalized_license(value)
            if detected_license:
                detected_licenses.append(detected_license)
        else:
            # this is a list
            for declared in value:
                detected_license = models.compute_normalized_license(declared)
                if detected_license:
                    detected_licenses.append(detected_license)

    if detected_licenses:
        return combine_expressions(detected_licenses)
Beispiel #20
0
def get_license_holders_consolidated_components(codebase):
    """
    Yield a ConsolidatedComponent for each directory where 75% or more of the files have the
    same license expression and copyright holders
    """
    # TODO: Create Consolidated Components for the 25% or less of files that
    # aren't part of the majority
    # TODO: Take license score into account
    root = codebase.root
    if root.extra_data.get('in_package_component'):
        return

    origin_translation_table = {}
    for resource in codebase.walk(topdown=False):
        # TODO: Consider facets for later

        if resource.is_file or resource.extra_data.get('in_package_component'):
            continue

        children = resource.children(codebase)
        if not children:
            continue

        # Collect license expression and holders count for stat-based summarization
        origin_count = Counter()
        # TODO: Consider license match coverage and license score when consolidating things on licenses
        # We may consolidate things by a weakly-matched license
        for child in children:
            if child.extra_data.get('in_package_component'):
                continue
            if child.is_file:
                license_expression = combine_expressions(child.license_expressions)
                holders = tuple(h['value'] for h in child.holders)
                if not license_expression or not holders:
                    continue
                origin = holders, license_expression
                origin_key = ''.join(holders) + license_expression
                origin_translation_table[origin_key] = origin
                origin_count[origin_key] += 1
            else:
                # We are in a subdirectory
                child_origin_count = child.extra_data.get('origin_count', {})
                if not child_origin_count:
                    continue
                origin_count.update(child_origin_count)

        if origin_count:
            resource.extra_data['origin_count'] = origin_count
            resource.save(codebase)

            # TODO: When there is a tie, we need to be explicit and consistent about the tiebreaker
            # TODO: Consider creating two components instead of tiebreaking
            origin_key, top_count = origin_count.most_common(1)[0]
            if is_majority(top_count, resource.files_count):
                majority_holders, majority_license_expression = origin_translation_table[origin_key]
                resource.extra_data['origin_summary_license_expression'] = majority_license_expression
                resource.extra_data['origin_summary_holders'] = majority_holders
                resource.extra_data['origin_summary_count'] = top_count
                resource.save(codebase)

                # Create consolidated components for a child that has a majority
                # that is different than the one we have now
                for child in children:
                    origin_summary_license_expression = child.extra_data.get('origin_summary_license_expression')
                    origin_summary_holders = child.extra_data.get('origin_summary_holders')
                    if (origin_summary_license_expression and origin_summary_holders
                            and origin_summary_license_expression != majority_license_expression
                            and origin_summary_holders != majority_holders):
                        c = create_license_holders_consolidated_component(child, codebase)
                        if c:
                            yield c
            else:
                # If there is no majority, we see if any of our child directories had majorities
                for child in children:
                    c = create_license_holders_consolidated_component(child, codebase)
                    if c:
                        yield c

    # Yield a Component for root if there is a majority
    c = create_license_holders_consolidated_component(root, codebase)
    if c:
        yield c
Beispiel #21
0
def compute_license_score(codebase):
    """
    Return a mapping of scoring elements and a license clarity score computed at
    the codebase level.

    The license clarity score is a value from 0-100 calculated by combining the
    weighted values determined for each of the scoring elements:

    Declared license:
    - When true, indicates that the software package licensing is documented at
      top-level or well-known locations in the software project, typically in a
      package manifest, NOTICE, LICENSE, COPYING or README file.
    - Scoring Weight = 40

    Identification precision:
    - Indicates how well the license statement(s) of the software identify known
      licenses that can be designated by precise keys (identifiers) as provided in
      a publicly available license list, such as the ScanCode LicenseDB, the SPDX
      license list, the OSI license list, or a URL pointing to a specific license
      text in a project or organization website.
    - Scoring Weight = 40

    License texts:
    - License texts are provided to support the declared license expression in
      files such as a package manifest, NOTICE, LICENSE, COPYING or README.
    - Scoring Weight = 10

    Declared copyright:
    - When true, indicates that the software package copyright is documented at
      top-level or well-known locations in the software project, typically in a
      package manifest, NOTICE, LICENSE, COPYING or README file.
    - Scoring Weight = 10

    Ambiguous compound licensing
    - When true, indicates that the software has a license declaration that
      makes it difficult to construct a reliable license expression, such as in
      the case of multiple licenses where the conjunctive versus disjunctive
      relationship is not well defined.
    - Scoring Weight = -10

    Conflicting license categories
    - When true, indicates the declared license expression of the software is in
      the permissive category, but that other potentially conflicting categories,
      such as copyleft and proprietary, have been detected in lower level code.
    - Scoring Weight = -20
    """

    scoring_elements = ScoringElements()
    declared_licenses = get_field_values_from_codebase_resources(
        codebase=codebase,
        field_name='licenses',
        key_files_only=True,
    )
    declared_license_expressions = get_field_values_from_codebase_resources(
        codebase=codebase,
        field_name='license_expressions',
        key_files_only=True)

    unique_declared_license_expressions = unique(declared_license_expressions)
    declared_license_categories = get_license_categories(declared_licenses)

    copyrights = get_field_values_from_codebase_resources(
        codebase=codebase, field_name='copyrights', key_files_only=True)

    other_licenses = get_field_values_from_codebase_resources(
        codebase=codebase, field_name='licenses', key_files_only=False)

    scoring_elements.declared_license = bool(declared_licenses)
    if scoring_elements.declared_license:
        scoring_elements.score += 40

    scoring_elements.precise_license_detection = check_declared_licenses(
        declared_licenses)
    if scoring_elements.precise_license_detection:
        scoring_elements.score += 40

    scoring_elements.has_license_text = check_for_license_texts(
        declared_licenses)
    if scoring_elements.has_license_text:
        scoring_elements.score += 10

    scoring_elements.declared_copyrights = bool(copyrights)
    if scoring_elements.declared_copyrights:
        scoring_elements.score += 10

    is_permissively_licensed = check_declared_license_categories(
        declared_license_categories)
    if is_permissively_licensed:
        scoring_elements.conflicting_license_categories = check_for_conflicting_licenses(
            other_licenses)
        if scoring_elements.conflicting_license_categories and scoring_elements.score > 0:
            scoring_elements.score -= 20

    declared_license_expression = get_primary_license(
        unique_declared_license_expressions)

    if not declared_license_expression:
        # If we cannot get a single primary license, then we combine and simplify the license expressions from key files
        combined_declared_license_expression = combine_expressions(
            unique_declared_license_expressions)
        if combined_declared_license_expression:
            declared_license_expression = str(Licensing().parse(
                combined_declared_license_expression).simplify())
        scoring_elements.ambiguous_compound_licensing = True
        if scoring_elements.score > 0:
            scoring_elements.score -= 10

    return scoring_elements, declared_license_expression or ''
Beispiel #22
0
def get_holders_consolidated_components(codebase):
    """
    Yield a ConsolidatedComponent for every directory if there are files with
    both license and copyright detected in them
    """
    if codebase.root.extra_data.get('in_package_component'):
        return

    # Step 1: Normalize license expressions and holders on file Resources and
    # save a list of holder keys that were detected in the immediate directory
    # on directory resources
    for resource in codebase.walk(topdown=False):
        # Each Resource we are processing is a directory
        if resource.is_file or resource.extra_data.get('in_package_component'):
            continue

        current_holders = set()
        for child in resource.children(codebase):
            # Each child we are processing is a file
            if (child.is_dir or child.extra_data.get('in_package_component')
                    or (not child.license_expressions and not child.holders)):
                continue

            if child.license_expressions:
                license_expression = combine_expressions(
                    child.license_expressions)
                if license_expression:
                    child.extra_data[
                        'normalized_license_expression'] = license_expression
                    child.save(codebase)

            if child.holders:
                holders = process_holders(h['value'] for h in child.holders)
                if holders:
                    # Dedupe holders
                    d = {}
                    for holder in holders:
                        if holder.key not in d:
                            d[holder.key] = holder
                    holders = [holder for _, holder in d.items()]

                    # Keep track of holders found in this immediate directory
                    for holder in holders:
                        if holder.key not in current_holders:
                            current_holders.add(holder.key)

                    child.extra_data['normalized_holders'] = holders
                    child.save(codebase)

        if current_holders:
            # Save a list of detected holders found in the immediate directory
            resource.extra_data['current_holders'] = current_holders
            resource.save(codebase)

    # Step 2: Walk the codebase top-down and create consolidated_components along the way.
    # By going top-down, we ensure that the highest-most Resource is used as the common
    # ancestor for a given holder.
    # We populate the `has_been_consolidated` set with the holder key to keep track of which
    # holders we have already created a consolidation for.
    has_been_consolidated = set()
    for resource in codebase.walk(topdown=True):
        for holder in resource.extra_data.get('current_holders', set()):
            if holder in has_been_consolidated:
                continue
            has_been_consolidated.add(holder)
            for c in create_consolidated_components(resource, codebase,
                                                    holder):
                yield c
Beispiel #23
0
def get_consolidated_packages(codebase):
    """
    Yield a ConsolidatedPackage for each detected package in the codebase
    """
    for resource in codebase.walk(topdown=False):
        for package_data in resource.packages:
            package = get_package_instance(package_data)
            package_root = package.get_package_root(resource, codebase)
            package_root.extra_data['package_root'] = True
            package_root.save(codebase)
            is_build_file = isinstance(package, BaseBuildManifestPackage)
            package_resources = list(
                package.get_package_resources(package_root, codebase))
            package_license_expression = package.license_expression
            package_copyright = package.copyright

            package_holders = []
            if package_copyright:
                numbered_lines = [(0, package_copyright)]
                for _, holder, _, _ in CopyrightDetector().detect(
                        numbered_lines,
                        copyrights=False,
                        holders=True,
                        authors=False,
                        include_years=False):
                    package_holders.append(holder)
            package_holders = process_holders(package_holders)

            discovered_license_expressions = []
            discovered_holders = []
            for package_resource in package_resources:
                if not is_build_file:
                    # If a resource is part of a package Component, then it cannot be part of any other type of Component
                    package_resource.extra_data['in_package_component'] = True
                    package_resource.save(codebase)
                if package_resource.license_expressions:
                    package_resource_license_expression = combine_expressions(
                        package_resource.license_expressions)
                    if package_resource_license_expression:
                        discovered_license_expressions.append(
                            package_resource_license_expression)
                if package_resource.holders:
                    discovered_holders.extend(
                        h.get('value') for h in package_resource.holders)
            discovered_holders = process_holders(discovered_holders)

            combined_discovered_license_expression = combine_expressions(
                discovered_license_expressions)
            if combined_discovered_license_expression:
                simplified_discovered_license_expression = str(
                    Licensing().parse(
                        combined_discovered_license_expression).simplify())
            else:
                simplified_discovered_license_expression = None

            c = Consolidation(
                core_license_expression=package_license_expression,
                # Sort holders by holder key
                core_holders=[
                    h for h, _ in sorted(copyright_summary.cluster(
                        package_holders),
                                         key=lambda t: t[0].key)
                ],
                other_license_expression=
                simplified_discovered_license_expression,
                # Sort holders by holder key
                other_holders=[
                    h for h, _ in sorted(copyright_summary.cluster(
                        discovered_holders),
                                         key=lambda t: t[0].key)
                ],
                files_count=len([
                    package_resource for package_resource in package_resources
                    if package_resource.is_file
                ]),
                resources=package_resources,
            )
            if is_build_file:
                c.identifier = package.name
                yield ConsolidatedComponent(type='build', consolidation=c)
            else:
                yield ConsolidatedPackage(package=package, consolidation=c)
Beispiel #24
0
 def test_combine_expressions_with_or_relationship(self):
     assert 'mit OR apache-2.0' == combine_expressions(
         ['mit', 'apache-2.0'], 'OR')
Beispiel #25
0
 def test_combine_expressions_with_regular(self):
     assert 'mit AND apache-2.0' == combine_expressions(
         ['mit', 'apache-2.0'])
Beispiel #26
0
 def test_combine_expressions_with_empty_input(self):
     assert None == combine_expressions(None)
     assert None == combine_expressions([])
Beispiel #27
0
def get_consolidated_packages(codebase):
    """
    Yield a ConsolidatedPackage for each detected package in the codebase
    """
    for resource in codebase.walk(topdown=False):
        for package_data in resource.packages:
            package = get_package_instance(package_data)
            is_build_file = isinstance(package, BaseBuildManifestPackage)
            package_resources = list(package.get_package_resources(resource, codebase))
            package_license_expression = package.license_expression
            package_copyright = package.copyright

            package_holders = []
            if package_copyright:
                numbered_lines = [(0, package_copyright)]
                for _, holder, _, _ in CopyrightDetector().detect(numbered_lines,
                        copyrights=False, holders=True, authors=False, include_years=False):
                    package_holders.append(holder)

            discovered_license_expressions = []
            discovered_holders = []
            for package_resource in package_resources:
                if not is_build_file:
                    # If a resource is part of a package Component, then it cannot be part of any other type of Component
                    package_resource.extra_data['in_package_component'] = True
                    package_resource.save(codebase)

                package_resource_license_expression = combine_expressions(package_resource.license_expressions)
                package_resource_holders = package_resource.holders
                if not package_resource_license_expression and not package_resource_holders:
                    continue
                discovered_license_expressions.append(package_resource_license_expression)
                discovered_holders.extend(h.get('value') for h in package_resource_holders)

            # Remove NoneTypes from discovered licenses
            discovered_license_expressions = [lic for lic in discovered_license_expressions if lic]
            # Remove NoneTypes from discovered holders
            discovered_holders = [holder for holder in discovered_holders if holder]

            combined_discovered_license_expression = combine_expressions(discovered_license_expressions)
            if combined_discovered_license_expression:
                simplified_discovered_license_expression = str(Licensing().parse(combined_discovered_license_expression).simplify())
            else:
                simplified_discovered_license_expression = None

            c = Consolidation(
                core_license_expression=package_license_expression,
                core_holders=sorted(set(package_holders)),
                other_license_expression=simplified_discovered_license_expression,
                other_holders=sorted(set(discovered_holders)),
                files_count=sum(1 for package_resource in package_resources if package_resource.is_file),
                resources=package_resources,
            )
            if is_build_file:
                c.identifier = package.name
                yield ConsolidatedComponent(
                    type='build',
                    consolidation=c
                )
            else:
                yield ConsolidatedPackage(
                    package=package,
                    consolidation=c
                )
 def test_combine_expressions_with_empty_input(self):
     assert combine_expressions(None) == None
     assert combine_expressions([]) == None
Beispiel #29
0
 def test_combine_expressions_with_duplicated_elements(self):
     assert 'mit AND apache-2.0' == combine_expressions(
         ['mit', 'apache-2.0', 'mit'])
Beispiel #30
0
def get_origin_info_from_top_level_packages(top_level_packages, codebase):
    """
    Return a 3-tuple containing the strings of declared license expression,
    copyright holder, and primary programming language from a
    ``top_level_packages`` list of detected top-level packages mapping and a
    ``codebase``.
    """
    if not top_level_packages:
        return '', '', ''

    license_expressions = []
    programming_languages = []
    copyrights = []
    parties = []

    for package_mapping in top_level_packages:
        package = models.Package.from_dict(package_mapping)
        # we are only interested in key packages
        if not is_key_package(package, codebase):
            continue

        license_expression = package.license_expression
        if license_expression:
            license_expressions.append(license_expression)

        programming_language = package.primary_language
        if programming_language:
            programming_languages.append(programming_language)

        copyright_statement = package.copyright
        if copyright_statement:
            copyrights.append(copyright_statement)

        parties.extend(package.parties or [])

    # Combine license expressions
    unique_license_expressions = unique(license_expressions)
    combined_declared_license_expression = combine_expressions(
        expressions=unique_license_expressions,
        relation='AND',
    )

    declared_license_expression = ''
    if combined_declared_license_expression:
        declared_license_expression = str(
            Licensing().parse(combined_declared_license_expression).simplify())

    # Get holders
    holders = list(get_holders_from_copyright(copyrights))
    declared_holders = []
    if holders:
        declared_holders = holders
    elif parties:
        declared_holders = [party.name for party in parties or []]

    declared_holders = unique(declared_holders)

    # Programming language
    unique_programming_languages = unique(programming_languages)
    primary_language = ''
    if len(unique_programming_languages) == 1:
        primary_language = unique_programming_languages[0]

    return declared_license_expression, declared_holders, primary_language