Example #1
0
def compute_normalized_license(declared_license, expression_symbols=None):
    """
    Return a normalized license_expression string from the ``declared_license``.
    Return 'unknown' if there is a declared license but it cannot be detected
    (including on errors) and return None if there is no declared license.

    Use the ``expression_symbols`` mapping of {lowered key: LicenseSymbol}
    if provided. Otherwise use the standard SPDX license symbols.
    """
    # Ensure declared license is always a string
    if not isinstance(declared_license, str):
        declared_license = repr(declared_license)

    if not declared_license:
        return

    from packagedcode import licensing
    try:
        return licensing.get_normalized_expression(
            query_string=declared_license,
            expression_symbols=expression_symbols)
    except Exception as e:
        # we never fail just for this
        if TRACE:
            logger_debug(
                f'Failed to compute license for {declared_license!r}: {e!r}')
        # FIXME: add logging
        return 'unknown'
Example #2
0
def parse_copyright_file(copyright_file,
                         skip_debian_packaging=True,
                         simplify_licenses=True):
    """
    Return a tuple of (declared license, detected license_expression, copyrights) strings computed
    from the `copyright_file` location. For each copyright file paragraph we
    treat the "name" as a license declaration. The text is used for detection
    and cross-reference with the declaration.
    """
    if not copyright_file:
        return None, None, None

    declared_license, detected_license, copyrights = parse_structured_copyright_file(
        copyright_file=copyright_file,
        skip_debian_packaging=skip_debian_packaging,
        simplify_licenses=simplify_licenses,
    )

    if not detected_license or detected_license == 'unknown':
        text = textcode.analysis.unicode_text(copyright_file)
        detected_license = get_normalized_expression(text,
                                                     try_as_expression=False)
    if not copyrights:
        copyrights = '\n'.join(copyright_detector(copyright_file))
    return declared_license, detected_license, copyrights
Example #3
0
def detect_declared_license(declared):
    """
    Return a tuple of (declared license, detected license expression) from a
    declared license. Both can be None.
    """
    declared = normalize_and_cleanup_declared_license(declared)

    if TRACE:
        logger_debug(f'detect_declared_license: {declared}')

    if not declared:
        return None, None

    # apply multiple license detection in sequence
    detected = detect_using_name_mapping(declared)
    if detected:
        return declared, detected

    from packagedcode import licensing
    try:
        detected = licensing.get_normalized_expression(
            declared,
            try_as_expression=False,
            approximate=False,
        )
    except Exception:
        # FIXME: add logging
        # we never fail just for this
        return 'unknown'

    return declared, detected
Example #4
0
def parse_copyright_file(
    copyright_file,
    skip_debian_packaging=True,
    simplify_licenses=True,
    unique=True
):
    """
    Return a tuple of (declared license, detected license_expression, copyrights) strings computed
    from the `copyright_file` location. For each copyright file paragraph we
    treat the "name" as a license declaration. The text is used for detection
    and cross-reference with the declaration.
    """
    if not copyright_file:
        return None, None, None

    # first parse as structured copyright file
    declared_license, detected_license, copyrights = parse_structured_copyright_file(
        copyright_file=copyright_file,
        skip_debian_packaging=skip_debian_packaging,
        simplify_licenses=simplify_licenses,
        unique=unique,
    )
    if TRACE:
        logger_debug(
            f'parse_copyright_file: declared_license: {declared_license}\n'
            f'detected_license: {detected_license}\n'
            f'copyrights: {copyrights}'
        )

    # dive into whole text only if we detected everything as unknown.
    # TODO: this is not right.
    if not detected_license or detected_license == 'unknown':
        text = textcode.analysis.unicode_text(copyright_file)
        detected_license = get_normalized_expression(text, try_as_expression=False)
        if TRACE:
            logger_debug(
                f'parse_copyright_file: using whole text: '
                f'detected_license: {detected_license}'
            )

    # dive into copyright if we did not detect any.
    if not copyrights:
        copyrights = '\n'.join(copyright_detector(copyright_file))
        if TRACE:
            logger_debug(
                f'parse_copyright_file: using whole text: '
                f'copyrights: {copyrights}'
            )

    return declared_license, detected_license, copyrights
Example #5
0
 def test_get_normalized_expression(self):
     assert get_normalized_expression('mit') == 'mit'
     assert get_normalized_expression(
         'mit or asasa or Apache-2.0') == 'apache-2.0 AND unknown'
     assert get_normalized_expression(
         'mit or asasa or Apache-2.0') == 'apache-2.0 AND unknown'
     assert get_normalized_expression(
         'mit asasa or Apache-2.0') == 'mit OR apache-2.0'
     assert get_normalized_expression('') is None
     assert get_normalized_expression(None) is None
Example #6
0
def compute_normalized_license(declared_license):
    """
    Return a normalized license_expression string from the ``declared_license``.
    Return 'unknown' if there is a declared license but it cannot be detected
    (including on errors) and return None if there is no declared license.
    """

    if not declared_license:
        return

    from packagedcode import licensing
    try:
        return licensing.get_normalized_expression(declared_license)
    except Exception:
        # FIXME: add logging
        # we never fail just for this
        return 'unknown'
def compute_normalized_license(declared_license, expression_symbols=None):
    """
    Return a normalized license_expression string from the ``declared_license``.
    Return 'unknown' if there is a declared license but it cannot be detected
    (including on errors) and return None if there is no declared license.

    Use the ``expression_symbols`` mapping of {lowered key: LicenseSymbol}
    if provided. Otherwise use the standard SPDX license symbols.
    """

    if not declared_license:
        return

    from packagedcode import licensing
    try:
        return licensing.get_normalized_expression(
            query_string=declared_license,
            expression_symbols=expression_symbols)
    except Exception:
        # FIXME: add logging
        # we never fail just for this
        return 'unknown'
Example #8
0
def parse_structured_copyright_file(
    copyright_file,
    skip_debian_packaging=True,
    simplify_licenses=True,
    unique=True,
):
    """
    Return a tuple of (declared license, detected license_expression,
    copyrights) strings computed from the `copyright_file` location. For each
    copyright file paragraph we treat the "name" as a license declaration. The
    text is used for detection and cross-reference with the declaration.

    If `skip_debian_packaging` is True, the Debian packaging license --if
    detected-- is skipped.

    If `simplify_licenses` is True the license expressions are simplified.

    If `unique` is True, repeated copyrights, detected or declared licenses are
    ignore, and only unique detections are returne.
    """
    if not copyright_file:
        return None, None, None

    deco = DebianCopyright.from_file(copyright_file)

    declared_licenses = []
    detected_licenses = []
    copyrights = []

    deco = fix_copyright(deco)

    licensing = Licensing()
    for paragraph in deco.paragraphs:

        if skip_debian_packaging and is_debian_packaging(paragraph):
            # Skipping packaging license and copyrights since they are not
            # relevant to the effective package license
            continue

        if isinstance(paragraph, (CopyrightHeaderParagraph, CopyrightFilesParagraph)):
            pcs = paragraph.copyright.statements or []
            for p in pcs:
                p = p.dumps()
                # avoid repeats
                if unique:
                    if p not in copyrights:
                        copyrights.append(p)
                else:
                    copyrights.append(p)

        if isinstance(paragraph, CatchAllParagraph):
            text = paragraph.dumps()
            if text:
                detected = get_normalized_expression(text, try_as_expression=False)
                if not detected:
                    detected = 'unknown'
                detected_licenses.append(detected)
        else:
            plicense = paragraph.license
            if not plicense:
                continue

            declared, detected = detect_declared_license(plicense.name)
            # avoid repeats
            if unique:
                if declared and declared not in declared_licenses:
                    declared_licenses.append(declared)
                if detected and detected not in detected_licenses:
                    detected_licenses.append(detected)
            else:
                declared_licenses.append(declared)
                detected_licenses.append(detected)

            # also detect in text
            text = paragraph.license.text
            if text:
                detected = get_normalized_expression(text, try_as_expression=False)
                if not detected:
                    detected = 'unknown'
                # avoid repeats
                if unique:
                    if detected not in detected_licenses:
                        detected_licenses.append(detected)
                else:
                    detected_licenses.append(detected)

    declared_license = '\n'.join(declared_licenses)

    if detected_licenses:
        detected_licenses = [licensing.parse(dl, simple=True) for dl in detected_licenses]

        if len(detected_licenses) > 1:
            detected_license = licensing.AND(*detected_licenses)
        else:
            detected_license = detected_licenses[0]

        if simplify_licenses:
            detected_license = detected_license.simplify()

        detected_license = str(detected_license)

    else:
        detected_license = 'unknown'

    copyrights = '\n'.join(copyrights)
    return declared_license, detected_license, copyrights