def compute_normalized_license(declared_license, expression_symbols=None): """ Return a normalized license_expression string from the ``declared_license``. Return 'unknown' if there is a declared license but it cannot be detected (including on errors) and return None if there is no declared license. Use the ``expression_symbols`` mapping of {lowered key: LicenseSymbol} if provided. Otherwise use the standard SPDX license symbols. """ # Ensure declared license is always a string if not isinstance(declared_license, str): declared_license = repr(declared_license) if not declared_license: return from packagedcode import licensing try: return licensing.get_normalized_expression( query_string=declared_license, expression_symbols=expression_symbols) except Exception as e: # we never fail just for this if TRACE: logger_debug( f'Failed to compute license for {declared_license!r}: {e!r}') # FIXME: add logging return 'unknown'
def parse_copyright_file(copyright_file, skip_debian_packaging=True, simplify_licenses=True): """ Return a tuple of (declared license, detected license_expression, copyrights) strings computed from the `copyright_file` location. For each copyright file paragraph we treat the "name" as a license declaration. The text is used for detection and cross-reference with the declaration. """ if not copyright_file: return None, None, None declared_license, detected_license, copyrights = parse_structured_copyright_file( copyright_file=copyright_file, skip_debian_packaging=skip_debian_packaging, simplify_licenses=simplify_licenses, ) if not detected_license or detected_license == 'unknown': text = textcode.analysis.unicode_text(copyright_file) detected_license = get_normalized_expression(text, try_as_expression=False) if not copyrights: copyrights = '\n'.join(copyright_detector(copyright_file)) return declared_license, detected_license, copyrights
def detect_declared_license(declared): """ Return a tuple of (declared license, detected license expression) from a declared license. Both can be None. """ declared = normalize_and_cleanup_declared_license(declared) if TRACE: logger_debug(f'detect_declared_license: {declared}') if not declared: return None, None # apply multiple license detection in sequence detected = detect_using_name_mapping(declared) if detected: return declared, detected from packagedcode import licensing try: detected = licensing.get_normalized_expression( declared, try_as_expression=False, approximate=False, ) except Exception: # FIXME: add logging # we never fail just for this return 'unknown' return declared, detected
def parse_copyright_file( copyright_file, skip_debian_packaging=True, simplify_licenses=True, unique=True ): """ Return a tuple of (declared license, detected license_expression, copyrights) strings computed from the `copyright_file` location. For each copyright file paragraph we treat the "name" as a license declaration. The text is used for detection and cross-reference with the declaration. """ if not copyright_file: return None, None, None # first parse as structured copyright file declared_license, detected_license, copyrights = parse_structured_copyright_file( copyright_file=copyright_file, skip_debian_packaging=skip_debian_packaging, simplify_licenses=simplify_licenses, unique=unique, ) if TRACE: logger_debug( f'parse_copyright_file: declared_license: {declared_license}\n' f'detected_license: {detected_license}\n' f'copyrights: {copyrights}' ) # dive into whole text only if we detected everything as unknown. # TODO: this is not right. if not detected_license or detected_license == 'unknown': text = textcode.analysis.unicode_text(copyright_file) detected_license = get_normalized_expression(text, try_as_expression=False) if TRACE: logger_debug( f'parse_copyright_file: using whole text: ' f'detected_license: {detected_license}' ) # dive into copyright if we did not detect any. if not copyrights: copyrights = '\n'.join(copyright_detector(copyright_file)) if TRACE: logger_debug( f'parse_copyright_file: using whole text: ' f'copyrights: {copyrights}' ) return declared_license, detected_license, copyrights
def test_get_normalized_expression(self): assert get_normalized_expression('mit') == 'mit' assert get_normalized_expression( 'mit or asasa or Apache-2.0') == 'apache-2.0 AND unknown' assert get_normalized_expression( 'mit or asasa or Apache-2.0') == 'apache-2.0 AND unknown' assert get_normalized_expression( 'mit asasa or Apache-2.0') == 'mit OR apache-2.0' assert get_normalized_expression('') is None assert get_normalized_expression(None) is None
def compute_normalized_license(declared_license): """ Return a normalized license_expression string from the ``declared_license``. Return 'unknown' if there is a declared license but it cannot be detected (including on errors) and return None if there is no declared license. """ if not declared_license: return from packagedcode import licensing try: return licensing.get_normalized_expression(declared_license) except Exception: # FIXME: add logging # we never fail just for this return 'unknown'
def compute_normalized_license(declared_license, expression_symbols=None): """ Return a normalized license_expression string from the ``declared_license``. Return 'unknown' if there is a declared license but it cannot be detected (including on errors) and return None if there is no declared license. Use the ``expression_symbols`` mapping of {lowered key: LicenseSymbol} if provided. Otherwise use the standard SPDX license symbols. """ if not declared_license: return from packagedcode import licensing try: return licensing.get_normalized_expression( query_string=declared_license, expression_symbols=expression_symbols) except Exception: # FIXME: add logging # we never fail just for this return 'unknown'
def parse_structured_copyright_file( copyright_file, skip_debian_packaging=True, simplify_licenses=True, unique=True, ): """ Return a tuple of (declared license, detected license_expression, copyrights) strings computed from the `copyright_file` location. For each copyright file paragraph we treat the "name" as a license declaration. The text is used for detection and cross-reference with the declaration. If `skip_debian_packaging` is True, the Debian packaging license --if detected-- is skipped. If `simplify_licenses` is True the license expressions are simplified. If `unique` is True, repeated copyrights, detected or declared licenses are ignore, and only unique detections are returne. """ if not copyright_file: return None, None, None deco = DebianCopyright.from_file(copyright_file) declared_licenses = [] detected_licenses = [] copyrights = [] deco = fix_copyright(deco) licensing = Licensing() for paragraph in deco.paragraphs: if skip_debian_packaging and is_debian_packaging(paragraph): # Skipping packaging license and copyrights since they are not # relevant to the effective package license continue if isinstance(paragraph, (CopyrightHeaderParagraph, CopyrightFilesParagraph)): pcs = paragraph.copyright.statements or [] for p in pcs: p = p.dumps() # avoid repeats if unique: if p not in copyrights: copyrights.append(p) else: copyrights.append(p) if isinstance(paragraph, CatchAllParagraph): text = paragraph.dumps() if text: detected = get_normalized_expression(text, try_as_expression=False) if not detected: detected = 'unknown' detected_licenses.append(detected) else: plicense = paragraph.license if not plicense: continue declared, detected = detect_declared_license(plicense.name) # avoid repeats if unique: if declared and declared not in declared_licenses: declared_licenses.append(declared) if detected and detected not in detected_licenses: detected_licenses.append(detected) else: declared_licenses.append(declared) detected_licenses.append(detected) # also detect in text text = paragraph.license.text if text: detected = get_normalized_expression(text, try_as_expression=False) if not detected: detected = 'unknown' # avoid repeats if unique: if detected not in detected_licenses: detected_licenses.append(detected) else: detected_licenses.append(detected) declared_license = '\n'.join(declared_licenses) if detected_licenses: detected_licenses = [licensing.parse(dl, simple=True) for dl in detected_licenses] if len(detected_licenses) > 1: detected_license = licensing.AND(*detected_licenses) else: detected_license = detected_licenses[0] if simplify_licenses: detected_license = detected_license.simplify() detected_license = str(detected_license) else: detected_license = 'unknown' copyrights = '\n'.join(copyrights) return declared_license, detected_license, copyrights