def compute_normalized_license(declared_license): """ Return a detected license expression from a declared license mapping. """ if not declared_license: return detected_licenses = [] for license_declaration in declared_license: # 1. try detection on the value of name if not empty and keep this name = license_declaration.get('name') via_name = models.compute_normalized_license(name) # 2. try detection on the value of url if not empty and keep this url = license_declaration.get('url') via_url = models.compute_normalized_license(url) # 3. try detection on the value of comment if not empty and keep this comments = license_declaration.get('comments') via_comments = models.compute_normalized_license(comments) if via_name: # The name should have precedence and any unknowns # in url and comment should be ignored. if via_url == 'unknown': via_url = None if via_comments == 'unknown': via_comments = None # Check the three detections to decide which license to keep name_and_url = via_name == via_url name_and_comment = via_name == via_comments all_same = name_and_url and name_and_comment if via_name: if all_same: detected_licenses.append(via_name) # name and (url or comment) are same elif name_and_url and not via_comments: detected_licenses.append(via_name) elif name_and_comment and not via_url: detected_licenses.append(via_name) else: # we have some non-unknown license detected in url or comment detections = via_name, via_url, via_comments detections = [l for l in detections if l] if detections: combined_expression = combine_expressions(detections) if combined_expression: detected_licenses.append(combined_expression) elif via_url: detected_licenses.append(via_url) elif via_comments: detected_licenses.append(via_comments) if detected_licenses: return combine_expressions(detected_licenses)
def compute_normalized_license(declared_license): """ Return a normalized license expression string detected from a list of declared license items. """ if not declared_license: return detected_licenses = [] for declared in declared_license: if isinstance(declared, str): detected_license = models.compute_normalized_license(declared) if detected_license: detected_licenses.append(detected_license) elif isinstance(declared, dict): # 1. try detection on the value of type if not empty and keep this ltype = declared.get('type') via_type = models.compute_normalized_license(ltype) # 2. try detection on the value of url if not empty and keep this url = declared.get('url') via_url = models.compute_normalized_license(url) if via_type: # The type should have precedence and any unknowns # in url should be ignored. # TODO: find a better way to detect unknown licenses if via_url in ( 'unknown', 'unknwon-license-reference', ): via_url = None if via_type: if via_type == via_url: detected_licenses.append(via_type) else: if not via_url: detected_licenses.append(via_type) else: combined_expression = combine_expressions( [via_type, via_url]) detected_licenses.append(combined_expression) elif via_url: detected_licenses.append(via_url) if detected_licenses: return combine_expressions(detected_licenses)
def compute_normalized_license(declared_license): """ Return a normalized license expression string detected from a list of declared license items or an ordered dict. """ if not declared_license: return licenses = declared_license.get('licenses') if not licenses: return license_logic = declared_license.get('licenselogic') relation = 'AND' if license_logic: if license_logic == 'or' or license_logic == 'dual': relation = 'OR' detected_licenses = [] for declared in licenses: detected_license = models.compute_normalized_license(declared) if detected_license: detected_licenses.append(detected_license) if detected_licenses: return combine_expressions(detected_licenses, relation)
def compute_normalized_license(cls, package): """ Return a normalized license expression string or None detected from a ``package`` Package declared license items or an ordered dict. """ declared_license = package.declared_license if not declared_license: return if not isinstance(declared_license, dict): return models.compute_normalized_license( declared_license=declared_license) licenses = declared_license.get('licenses') if not licenses: return license_logic = declared_license.get('licenselogic') # the default in FreebSD expressions is AND relation = 'AND' if license_logic: if license_logic == 'or' or license_logic == 'dual': relation = 'OR' detected_licenses = [] for lic in licenses: detected = models.compute_normalized_license(declared_license=lic) if detected: detected_licenses.append(detected) if detected_licenses: return combine_expressions(expressions=detected_licenses, relation=relation)
def compute_normalized_license(package, resource, codebase): """ Return a normalized license expression string detected from a list of declared license items. """ declared_licenses = package.declared_license if not declared_licenses: return declared_licenses = set(declared_licenses) license_expressions = [] parent = resource.parent(codebase) # FIXME: we should be able to get the path relatively to the ABOUT file resource for child in parent.children(codebase): if child.name in declared_licenses: licenses = get_licenses(child.location) if not licenses: license_expressions.append('unknown') else: license_expressions.extend( licenses.get('license_expressions', [])) return combine_expressions(license_expressions)
def compute_normalized_license(declared_license): """ Return a normalized license expression string detected from a list of declared license items. """ if not declared_license: return detected_licenses = [] for value in declared_license.values(): if not value: continue # The value could be a string or a list if isinstance(value, string_types): detected_license = models.compute_normalized_license(value) if detected_license: detected_licenses.append(detected_license) else: for declared in value: detected_license = models.compute_normalized_license(declared) if detected_license: detected_licenses.append(detected_license) if detected_licenses: return combine_expressions(detected_licenses)
def _queryset_to_xlsx_worksheet(queryset, workbook, exclude_fields=None): multivalues_separator = "\n" model_class = queryset.model model_name = model_class._meta.model_name fieldnames = get_serializer_fields(model_class) exclude_fields = exclude_fields or [] fieldnames = [field for field in fieldnames if field not in exclude_fields] worksheet = workbook.add_worksheet(model_name) worksheet.write_row(row=0, col=0, data=fieldnames) for row_index, record in enumerate(queryset.iterator(), start=1): for col_index, field in enumerate(fieldnames): value = getattr(record, field) if not value: continue elif field == "license_expressions": value = combine_expressions(value) elif isinstance(value, list): value = [ list(entry.values())[0] if isinstance(entry, dict) else str(entry) for entry in value ] value = multivalues_separator.join(ordered_unique(value)) elif isinstance(value, dict): value = json.dumps(value) if value else "" worksheet.write_string(row_index, col_index, str(value))
def create_consolidated_components(resource, codebase, holder_key): """ Yield ConsolidatedComponents for every holder-grouped set of RIDs for a given resource and holder key """ license_expressions = [] holder = None resources = [] for r in resource.walk(codebase): for normalized_holder in r.extra_data.get('normalized_holders', []): if not (normalized_holder.key == holder_key): continue normalized_license_expression = r.extra_data.get( 'normalized_license_expression') if normalized_license_expression: license_expressions.append(normalized_license_expression) if not holder: holder = normalized_holder resources.append(r) # We add the current directory Resource we are currently at to the set # of resources that have this particular key resources.append(resource) resource.extra_data['majority'] = True resource.save(codebase) core_license_expression = combine_expressions(license_expressions) if core_license_expression is not None: core_license_expression = str(core_license_expression) c = Consolidation( core_license_expression=core_license_expression, core_holders=[holder], files_count=len([r for r in resources if r.is_file]), resources=resources, ) yield ConsolidatedComponent(type='holders', consolidation=c)
def to_dict(self, **kwargs): def dict_fields(attr, value): if attr.name in ('resources', ): return False return True license_expressions_to_combine = [] if self.core_license_expression: license_expressions_to_combine.append(self.core_license_expression) if self.other_license_expression: license_expressions_to_combine.append( self.other_license_expression) if license_expressions_to_combine: combined_license_expression = combine_expressions( license_expressions_to_combine) if combined_license_expression: self.consolidated_license_expression = str( Licensing().parse(combined_license_expression).simplify()) self.core_holders = [h.original for h in self.core_holders] self.other_holders = [h.original for h in self.other_holders] self.consolidated_holders = sorted( set(self.core_holders + self.other_holders)) # TODO: Verify and test that we are generating detectable copyrights self.consolidated_copyright = 'Copyright (c) {}'.format(', '.join( self.consolidated_holders)) return attr.asdict(self, filter=dict_fields, dict_factory=dict)
def compute_normalized_license(declared_license): """ Return a normalized license expression string detected from a list of declared license items or string type. """ if not declared_license: return detected_licenses = [] if isinstance(declared_license, str): if declared_license == 'proprietary': return declared_license if '(' in declared_license and ')' in declared_license and ' or ' in declared_license: declared_license = declared_license.strip().rstrip(')').lstrip('(') declared_license = declared_license.split(' or ') else: return models.compute_normalized_license(declared_license) if isinstance(declared_license, list): for declared in declared_license: detected_license = models.compute_normalized_license(declared) detected_licenses.append(detected_license) else: declared_license = repr(declared_license) detected_license = models.compute_normalized_license(declared_license) if detected_licenses: # build a proper license expression: the defaultfor composer is OR return combine_expressions(detected_licenses, 'OR')
def get_license_expression_from_matches(license_matches): """ Craft a license expression from a list of LicenseMatch objects. """ from packagedcode.utils import combine_expressions license_expressions = [ match.rule.license_expression for match in license_matches ] return str(combine_expressions(license_expressions, unique=False))
def compute_normalized_license(self): """ Return a normalized license expression string detected from a list of declared license strings. """ if not self.declared_license: return detected_licenses = [] for declared in self.declared_license: detected_license = models.compute_normalized_license(declared) detected_licenses.append(detected_license) if detected_licenses: return combine_expressions(detected_licenses)
def detect_license_in_unstructured_text(location): """ Return a detected license expression string from a file at `location` """ from scancode.api import get_licenses detected = get_licenses(location) if not detected: # we have no match: return an unknown key return 'unknown' detected_expressions = detected['license_expressions'] if TRACE: logger_debug( 'detect_license_in_unstructured_text: detected_expressions:', detected_expressions) return combine_expressions(detected_expressions)
def compute_bower_normalized_license(declared_license): """ Return a normalized license expression string detected from a list of declared license strings. """ if not declared_license: return detected_licenses = [] for declared in declared_license: detected_license = models.compute_normalized_license(declared) if detected_license: detected_licenses.append(detected_license) else: detected_licenses.append('unknown') if detected_licenses: return combine_expressions(detected_licenses)
def compute_normalized_license(cls, package): declared_license = package.declared_license if not declared_license: return if isinstance(declared_license, ( list, tuple, )): detected_licenses = [ models.compute_normalized_license(declared) for declared in declared_license ] if detected_licenses: return combine_expressions(detected_licenses) if isinstance(declared_license, str): return models.compute_normalized_license(declared_license)
def compute_normalized_license(self): """ Return a normalized license expression string detected from a list of declared license items. """ declared_license = self.declared_license manifest_parent_path = self.root_path if not declared_license or not manifest_parent_path: return license_expressions = [] for license_file in declared_license: license_file_path = os.path.join(manifest_parent_path, license_file) if os.path.exists(license_file_path) and os.path.isfile(license_file_path): licenses = get_licenses(license_file_path) license_expressions.extend(licenses.get('license_expressions', [])) return combine_expressions(license_expressions)
def get_consolidated_component_resources(resource, codebase): """ Return a list of resources to be used to create a ConsolidatedComponent from `resource` """ license_expression = resource.extra_data.get('origin_summary_license_expression') holders = resource.extra_data.get('origin_summary_holders') if not license_expression and holders: return resources = [] if resource.extra_data.get('in_package_component') else [resource] for r in resource.walk(codebase, topdown=False): if r.extra_data.get('in_package_component'): continue resource_holders = tuple(h.get('value') for h in r.holders) if ((r.is_file and combine_expressions(r.license_expressions) == license_expression and resource_holders == holders) or (r.is_dir and r.extra_data.get('origin_summary_license_expression', '') == license_expression and r.extra_data.get('origin_summary_holders', tuple()) == holders)): resources.append(r) return resources
def compute_normalized_license(declared_license, location=None): """ Return a normalized license expression string detected from a list of declared license items. The specification for pub demands to have a LICENSE file side-by-side and nothing else. See https://dart.dev/tools/pub/publishing#preparing-to-publish """ # FIXME: we need a location to find the FILE file # Approach: # Find the LICENSE file # detect on the text # combine all expressions if not declared_license: return detected_licenses = [] if detected_licenses: return combine_expressions(detected_licenses)
def compute_normalized_license(declared_license): """ Return a normalized license expression string detected from a mapping or list of declared license items. """ if not declared_license: return if isinstance(declared_license, dict): values = list(declared_license.values()) elif isinstance(declared_license, list): values = list(declared_license) elif isinstance(declared_license, ( str, unicode, )): values = [declared_license] else: return detected_licenses = [] for value in values: if not value: continue # The value could be a string or a list if isinstance(value, string_types): detected_license = models.compute_normalized_license(value) if detected_license: detected_licenses.append(detected_license) else: # this is a list for declared in value: detected_license = models.compute_normalized_license(declared) if detected_license: detected_licenses.append(detected_license) if detected_licenses: return combine_expressions(detected_licenses)
def get_license_holders_consolidated_components(codebase): """ Yield a ConsolidatedComponent for each directory where 75% or more of the files have the same license expression and copyright holders """ # TODO: Create Consolidated Components for the 25% or less of files that # aren't part of the majority # TODO: Take license score into account root = codebase.root if root.extra_data.get('in_package_component'): return origin_translation_table = {} for resource in codebase.walk(topdown=False): # TODO: Consider facets for later if resource.is_file or resource.extra_data.get('in_package_component'): continue children = resource.children(codebase) if not children: continue # Collect license expression and holders count for stat-based summarization origin_count = Counter() # TODO: Consider license match coverage and license score when consolidating things on licenses # We may consolidate things by a weakly-matched license for child in children: if child.extra_data.get('in_package_component'): continue if child.is_file: license_expression = combine_expressions(child.license_expressions) holders = tuple(h['value'] for h in child.holders) if not license_expression or not holders: continue origin = holders, license_expression origin_key = ''.join(holders) + license_expression origin_translation_table[origin_key] = origin origin_count[origin_key] += 1 else: # We are in a subdirectory child_origin_count = child.extra_data.get('origin_count', {}) if not child_origin_count: continue origin_count.update(child_origin_count) if origin_count: resource.extra_data['origin_count'] = origin_count resource.save(codebase) # TODO: When there is a tie, we need to be explicit and consistent about the tiebreaker # TODO: Consider creating two components instead of tiebreaking origin_key, top_count = origin_count.most_common(1)[0] if is_majority(top_count, resource.files_count): majority_holders, majority_license_expression = origin_translation_table[origin_key] resource.extra_data['origin_summary_license_expression'] = majority_license_expression resource.extra_data['origin_summary_holders'] = majority_holders resource.extra_data['origin_summary_count'] = top_count resource.save(codebase) # Create consolidated components for a child that has a majority # that is different than the one we have now for child in children: origin_summary_license_expression = child.extra_data.get('origin_summary_license_expression') origin_summary_holders = child.extra_data.get('origin_summary_holders') if (origin_summary_license_expression and origin_summary_holders and origin_summary_license_expression != majority_license_expression and origin_summary_holders != majority_holders): c = create_license_holders_consolidated_component(child, codebase) if c: yield c else: # If there is no majority, we see if any of our child directories had majorities for child in children: c = create_license_holders_consolidated_component(child, codebase) if c: yield c # Yield a Component for root if there is a majority c = create_license_holders_consolidated_component(root, codebase) if c: yield c
def compute_license_score(codebase): """ Return a mapping of scoring elements and a license clarity score computed at the codebase level. The license clarity score is a value from 0-100 calculated by combining the weighted values determined for each of the scoring elements: Declared license: - When true, indicates that the software package licensing is documented at top-level or well-known locations in the software project, typically in a package manifest, NOTICE, LICENSE, COPYING or README file. - Scoring Weight = 40 Identification precision: - Indicates how well the license statement(s) of the software identify known licenses that can be designated by precise keys (identifiers) as provided in a publicly available license list, such as the ScanCode LicenseDB, the SPDX license list, the OSI license list, or a URL pointing to a specific license text in a project or organization website. - Scoring Weight = 40 License texts: - License texts are provided to support the declared license expression in files such as a package manifest, NOTICE, LICENSE, COPYING or README. - Scoring Weight = 10 Declared copyright: - When true, indicates that the software package copyright is documented at top-level or well-known locations in the software project, typically in a package manifest, NOTICE, LICENSE, COPYING or README file. - Scoring Weight = 10 Ambiguous compound licensing - When true, indicates that the software has a license declaration that makes it difficult to construct a reliable license expression, such as in the case of multiple licenses where the conjunctive versus disjunctive relationship is not well defined. - Scoring Weight = -10 Conflicting license categories - When true, indicates the declared license expression of the software is in the permissive category, but that other potentially conflicting categories, such as copyleft and proprietary, have been detected in lower level code. - Scoring Weight = -20 """ scoring_elements = ScoringElements() declared_licenses = get_field_values_from_codebase_resources( codebase=codebase, field_name='licenses', key_files_only=True, ) declared_license_expressions = get_field_values_from_codebase_resources( codebase=codebase, field_name='license_expressions', key_files_only=True) unique_declared_license_expressions = unique(declared_license_expressions) declared_license_categories = get_license_categories(declared_licenses) copyrights = get_field_values_from_codebase_resources( codebase=codebase, field_name='copyrights', key_files_only=True) other_licenses = get_field_values_from_codebase_resources( codebase=codebase, field_name='licenses', key_files_only=False) scoring_elements.declared_license = bool(declared_licenses) if scoring_elements.declared_license: scoring_elements.score += 40 scoring_elements.precise_license_detection = check_declared_licenses( declared_licenses) if scoring_elements.precise_license_detection: scoring_elements.score += 40 scoring_elements.has_license_text = check_for_license_texts( declared_licenses) if scoring_elements.has_license_text: scoring_elements.score += 10 scoring_elements.declared_copyrights = bool(copyrights) if scoring_elements.declared_copyrights: scoring_elements.score += 10 is_permissively_licensed = check_declared_license_categories( declared_license_categories) if is_permissively_licensed: scoring_elements.conflicting_license_categories = check_for_conflicting_licenses( other_licenses) if scoring_elements.conflicting_license_categories and scoring_elements.score > 0: scoring_elements.score -= 20 declared_license_expression = get_primary_license( unique_declared_license_expressions) if not declared_license_expression: # If we cannot get a single primary license, then we combine and simplify the license expressions from key files combined_declared_license_expression = combine_expressions( unique_declared_license_expressions) if combined_declared_license_expression: declared_license_expression = str(Licensing().parse( combined_declared_license_expression).simplify()) scoring_elements.ambiguous_compound_licensing = True if scoring_elements.score > 0: scoring_elements.score -= 10 return scoring_elements, declared_license_expression or ''
def get_holders_consolidated_components(codebase): """ Yield a ConsolidatedComponent for every directory if there are files with both license and copyright detected in them """ if codebase.root.extra_data.get('in_package_component'): return # Step 1: Normalize license expressions and holders on file Resources and # save a list of holder keys that were detected in the immediate directory # on directory resources for resource in codebase.walk(topdown=False): # Each Resource we are processing is a directory if resource.is_file or resource.extra_data.get('in_package_component'): continue current_holders = set() for child in resource.children(codebase): # Each child we are processing is a file if (child.is_dir or child.extra_data.get('in_package_component') or (not child.license_expressions and not child.holders)): continue if child.license_expressions: license_expression = combine_expressions( child.license_expressions) if license_expression: child.extra_data[ 'normalized_license_expression'] = license_expression child.save(codebase) if child.holders: holders = process_holders(h['value'] for h in child.holders) if holders: # Dedupe holders d = {} for holder in holders: if holder.key not in d: d[holder.key] = holder holders = [holder for _, holder in d.items()] # Keep track of holders found in this immediate directory for holder in holders: if holder.key not in current_holders: current_holders.add(holder.key) child.extra_data['normalized_holders'] = holders child.save(codebase) if current_holders: # Save a list of detected holders found in the immediate directory resource.extra_data['current_holders'] = current_holders resource.save(codebase) # Step 2: Walk the codebase top-down and create consolidated_components along the way. # By going top-down, we ensure that the highest-most Resource is used as the common # ancestor for a given holder. # We populate the `has_been_consolidated` set with the holder key to keep track of which # holders we have already created a consolidation for. has_been_consolidated = set() for resource in codebase.walk(topdown=True): for holder in resource.extra_data.get('current_holders', set()): if holder in has_been_consolidated: continue has_been_consolidated.add(holder) for c in create_consolidated_components(resource, codebase, holder): yield c
def get_consolidated_packages(codebase): """ Yield a ConsolidatedPackage for each detected package in the codebase """ for resource in codebase.walk(topdown=False): for package_data in resource.packages: package = get_package_instance(package_data) package_root = package.get_package_root(resource, codebase) package_root.extra_data['package_root'] = True package_root.save(codebase) is_build_file = isinstance(package, BaseBuildManifestPackage) package_resources = list( package.get_package_resources(package_root, codebase)) package_license_expression = package.license_expression package_copyright = package.copyright package_holders = [] if package_copyright: numbered_lines = [(0, package_copyright)] for _, holder, _, _ in CopyrightDetector().detect( numbered_lines, copyrights=False, holders=True, authors=False, include_years=False): package_holders.append(holder) package_holders = process_holders(package_holders) discovered_license_expressions = [] discovered_holders = [] for package_resource in package_resources: if not is_build_file: # If a resource is part of a package Component, then it cannot be part of any other type of Component package_resource.extra_data['in_package_component'] = True package_resource.save(codebase) if package_resource.license_expressions: package_resource_license_expression = combine_expressions( package_resource.license_expressions) if package_resource_license_expression: discovered_license_expressions.append( package_resource_license_expression) if package_resource.holders: discovered_holders.extend( h.get('value') for h in package_resource.holders) discovered_holders = process_holders(discovered_holders) combined_discovered_license_expression = combine_expressions( discovered_license_expressions) if combined_discovered_license_expression: simplified_discovered_license_expression = str( Licensing().parse( combined_discovered_license_expression).simplify()) else: simplified_discovered_license_expression = None c = Consolidation( core_license_expression=package_license_expression, # Sort holders by holder key core_holders=[ h for h, _ in sorted(copyright_summary.cluster( package_holders), key=lambda t: t[0].key) ], other_license_expression= simplified_discovered_license_expression, # Sort holders by holder key other_holders=[ h for h, _ in sorted(copyright_summary.cluster( discovered_holders), key=lambda t: t[0].key) ], files_count=len([ package_resource for package_resource in package_resources if package_resource.is_file ]), resources=package_resources, ) if is_build_file: c.identifier = package.name yield ConsolidatedComponent(type='build', consolidation=c) else: yield ConsolidatedPackage(package=package, consolidation=c)
def test_combine_expressions_with_or_relationship(self): assert 'mit OR apache-2.0' == combine_expressions( ['mit', 'apache-2.0'], 'OR')
def test_combine_expressions_with_regular(self): assert 'mit AND apache-2.0' == combine_expressions( ['mit', 'apache-2.0'])
def test_combine_expressions_with_empty_input(self): assert None == combine_expressions(None) assert None == combine_expressions([])
def get_consolidated_packages(codebase): """ Yield a ConsolidatedPackage for each detected package in the codebase """ for resource in codebase.walk(topdown=False): for package_data in resource.packages: package = get_package_instance(package_data) is_build_file = isinstance(package, BaseBuildManifestPackage) package_resources = list(package.get_package_resources(resource, codebase)) package_license_expression = package.license_expression package_copyright = package.copyright package_holders = [] if package_copyright: numbered_lines = [(0, package_copyright)] for _, holder, _, _ in CopyrightDetector().detect(numbered_lines, copyrights=False, holders=True, authors=False, include_years=False): package_holders.append(holder) discovered_license_expressions = [] discovered_holders = [] for package_resource in package_resources: if not is_build_file: # If a resource is part of a package Component, then it cannot be part of any other type of Component package_resource.extra_data['in_package_component'] = True package_resource.save(codebase) package_resource_license_expression = combine_expressions(package_resource.license_expressions) package_resource_holders = package_resource.holders if not package_resource_license_expression and not package_resource_holders: continue discovered_license_expressions.append(package_resource_license_expression) discovered_holders.extend(h.get('value') for h in package_resource_holders) # Remove NoneTypes from discovered licenses discovered_license_expressions = [lic for lic in discovered_license_expressions if lic] # Remove NoneTypes from discovered holders discovered_holders = [holder for holder in discovered_holders if holder] combined_discovered_license_expression = combine_expressions(discovered_license_expressions) if combined_discovered_license_expression: simplified_discovered_license_expression = str(Licensing().parse(combined_discovered_license_expression).simplify()) else: simplified_discovered_license_expression = None c = Consolidation( core_license_expression=package_license_expression, core_holders=sorted(set(package_holders)), other_license_expression=simplified_discovered_license_expression, other_holders=sorted(set(discovered_holders)), files_count=sum(1 for package_resource in package_resources if package_resource.is_file), resources=package_resources, ) if is_build_file: c.identifier = package.name yield ConsolidatedComponent( type='build', consolidation=c ) else: yield ConsolidatedPackage( package=package, consolidation=c )
def test_combine_expressions_with_empty_input(self): assert combine_expressions(None) == None assert combine_expressions([]) == None
def test_combine_expressions_with_duplicated_elements(self): assert 'mit AND apache-2.0' == combine_expressions( ['mit', 'apache-2.0', 'mit'])
def get_origin_info_from_top_level_packages(top_level_packages, codebase): """ Return a 3-tuple containing the strings of declared license expression, copyright holder, and primary programming language from a ``top_level_packages`` list of detected top-level packages mapping and a ``codebase``. """ if not top_level_packages: return '', '', '' license_expressions = [] programming_languages = [] copyrights = [] parties = [] for package_mapping in top_level_packages: package = models.Package.from_dict(package_mapping) # we are only interested in key packages if not is_key_package(package, codebase): continue license_expression = package.license_expression if license_expression: license_expressions.append(license_expression) programming_language = package.primary_language if programming_language: programming_languages.append(programming_language) copyright_statement = package.copyright if copyright_statement: copyrights.append(copyright_statement) parties.extend(package.parties or []) # Combine license expressions unique_license_expressions = unique(license_expressions) combined_declared_license_expression = combine_expressions( expressions=unique_license_expressions, relation='AND', ) declared_license_expression = '' if combined_declared_license_expression: declared_license_expression = str( Licensing().parse(combined_declared_license_expression).simplify()) # Get holders holders = list(get_holders_from_copyright(copyrights)) declared_holders = [] if holders: declared_holders = holders elif parties: declared_holders = [party.name for party in parties or []] declared_holders = unique(declared_holders) # Programming language unique_programming_languages = unique(programming_languages) primary_language = '' if len(unique_programming_languages) == 1: primary_language = unique_programming_languages[0] return declared_license_expression, declared_holders, primary_language