Ejemplo n.º 1
0
 def test_get_expression_extra_parens_2(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = '// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)'
     expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)
     assert expression.render() == 'gpl-2.0 OR bsd-simplified'
Ejemplo n.º 2
0
 def test_get_expression_does_not_fail_on_empty(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = 'SPDX-License-Identifier: '
     expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)
     assert expression == None
Ejemplo n.º 3
0
 def test_get_expression_with_parens_and_with(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = '/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) AND MIT) */'
     expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)
     assert expression.render() == 'gpl-2.0 WITH linux-syscall-exception-gpl AND mit'
Ejemplo n.º 4
0
def get_spdx_expression(text, expression_symbols=None):
    """
    Return a matched license expression string or None by matching the
    ``text`` as an SPDX license expression.

    Use the ``expression_symbols`` mapping of {lowered key: LicenseSymbol} if
    provided. Otherwise use the standard SPDX license symbols.

    This is used to handle cases of mixed standard SPDX and non-standard SPDX-
    like symbols used for instance in some package manifests.
    """
    from licensedcode.cache import get_spdx_symbols
    from licensedcode.cache import get_unknown_spdx_symbol

    licensing = Licensing()
    if not expression_symbols:
        expression_symbols = get_spdx_symbols()

    unknown_symbol = get_unknown_spdx_symbol()
    _prefix, exp_text = prepare_text(text)

    expression = get_expression(
        text=exp_text,
        licensing=licensing,
        expression_symbols=expression_symbols,
        unknown_symbol=unknown_symbol,
    )
    if expression is None:
        return
    return expression.render()
 def test_get_expression_with_empty_expression_should_return_unknown(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = '* SPDX-License-Identifier:'
     expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)
     assert None == expression
Ejemplo n.º 6
0
 def test_get_expression_license_ref(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = '/* SPDX-License-Identifier: LicenseRef-ABC  */'
     expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)
     assert expression.render() == 'unknown-spdx'
Ejemplo n.º 7
0
 def test_get_expression_simple(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = '*  SPDX-License-Identifier: BSD-3-Clause'
     expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)
     assert expression.render() == 'bsd-new'
 def test_get_expression_with_plus(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = '* SPDX-License-Identifier: GPL-2.0+'
     expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)
     assert 'gpl-2.0-plus' == expression.render()
Ejemplo n.º 9
0
 def test_get_expression_with_extra_parens(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = '* SPDX-License-Identifier: (GPL-2.0+ OR MIT)'
     expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)
     assert expression.render() == 'gpl-2.0-plus OR mit'
Ejemplo n.º 10
0
 def test_get_expression_quoted(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = '''LIST "SPDX-License-Identifier: GPL-2.0"'''
     expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)
     assert expression.render() == 'gpl-2.0'
Ejemplo n.º 11
0
 def test_get_expression_multiple_or(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = '* SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)'
     expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)
     assert expression.render() == 'bsd-new OR epl-1.0 OR apache-2.0 OR mit'
Ejemplo n.º 12
0
 def test_get_expression_with_empty_expression2_should_return_unknown(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = ''
     expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)
     assert expression == None
Ejemplo n.º 13
0
 def test_get_expression_without_and_should_not_return_unknown(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = '* SPDX-License-Identifier:     GPL-2.0+ BSD-2-Clause'
     expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)
     assert expression != unknown_symbol
Ejemplo n.º 14
0
 def test_get_expression_simple_with(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = '/* SPDX-License-Identifier: LGPL-2.0+ WITH Linux-syscall-note */'
     expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)
     assert expression.render() == 'lgpl-2.0-plus WITH linux-syscall-exception-gpl'
Ejemplo n.º 15
0
 def test__reparse_invalid_expression_with_non_balanced_parens_should_return_a_proper_expression(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = '(GPL-2.0+ and (BSD-2-Clause '
     expression = _reparse_invalid_expression(line_text, licensing, spdx_symbols, unknown_symbol)
     expected = '(gpl-2.0-plus AND bsd-simplified) AND unknown-spdx'
     assert expression.render() == expected
Ejemplo n.º 16
0
def spdx_id_match(idx, query_run, text):
    """
    Return one LicenseMatch by matching the `text` as an SPDX license expression
    using the `query_run` positions and `idx` index for support.
    """
    from licensedcode.cache import get_spdx_symbols
    from licensedcode.cache import get_unknown_spdx_symbol

    if TRACE:
        logger_debug('spdx_id_match: start:', 'text:', text, 'query_run:',
                     query_run)

    licensing = Licensing()
    symbols_by_spdx = get_spdx_symbols()
    unknown_symbol = get_unknown_spdx_symbol()

    _prefix, exp_text = prepare_text(text)
    expression = get_expression(exp_text, licensing, symbols_by_spdx,
                                unknown_symbol)
    if expression is None:
        return
    expression_str = expression.render()

    match_len = len(query_run)
    match_start = query_run.start
    matched_tokens = query_run.tokens

    # build synthetic rule
    # TODO: ensure that all the SPDX license keys are known symbols
    rule = SpdxRule(
        license_expression=expression_str,
        # FIXME: for now we are putting the original query text as a
        # rule text: this is likely incorrect when it comes to properly
        # computing the known and unknowns and high and lows for this rule.
        # Alternatively we could use the expression string, padded with
        # spdx-license-identifier: this may be wrong too, if the line was
        # not padded originally with this tag
        stored_text=text,
        length=match_len)

    # build match from parsed expression
    # collect match start and end: e.g. the whole text
    qspan = Span(range(match_start, query_run.end + 1))

    # we use the query side to build the ispans
    ispan = Span(range(0, match_len))

    len_legalese = idx.len_legalese
    hispan = Span(p for p, t in enumerate(matched_tokens) if t < len_legalese)

    match = LicenseMatch(rule=rule,
                         qspan=qspan,
                         ispan=ispan,
                         hispan=hispan,
                         query_run_start=match_start,
                         matcher=MATCH_SPDX_ID,
                         query=query_run.query)
    return match
Ejemplo n.º 17
0
 def test__reparse_invalid_expression_without_or_should_return_a_proper_expression(self):
     # this is a uboot-style legacy expression without OR
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = 'GPL-2.0+ BSD-2-Clause'
     expression = _reparse_invalid_expression(line_text, licensing, spdx_symbols, unknown_symbol)
     expected = 'gpl-2.0-plus OR bsd-simplified'
     assert expression.render() == expected
 def test__reparse_invalid_expression_with_improper_keyword_should_return_a_proper_expression(
         self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = '* SPDX-License-Identifier:    or GPL-2.0+ BSD-2-Clause '
     expression = _reparse_invalid_expression(line_text, licensing,
                                              spdx_symbols, unknown_symbol)
     expected = '(gpl-2.0-plus AND bsd-simplified) AND unknown-spdx'
     assert expected == expression.render()
Ejemplo n.º 19
0
 def test__parse_expression_with_empty_expression_should_raise_ExpressionError(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = '* SPDX-License-Identifier:'
     try:
         _parse_expression(line_text, licensing, spdx_symbols, unknown_symbol)
         self.fail('ExpressionError not raised')
     except ExpressionError:
         pass
Ejemplo n.º 20
0
 def test__parse_expression_without_and_raise_exception(self):
     licensing = Licensing()
     spdx_symbols = get_spdx_symbols()
     unknown_symbol = get_unknown_spdx_symbol()
     line_text = '* SPDX-License-Identifier:     GPL-2.0+ BSD-2-Clause'
     try:
         _parse_expression(line_text, licensing, spdx_symbols, unknown_symbol)
         self.fail('exception should be raised')
     except:
         pass
Ejemplo n.º 21
0
    def test_get_expression_without_lid(self):
        licensing = Licensing()
        spdx_symbols = get_spdx_symbols()
        unknown_symbol = get_unknown_spdx_symbol()
        line_text = ('EPL-2.0 OR Apache-2.0 OR '
                     'GPL-2.0 WITH Classpath-exception-2.0 OR '
                     'GPL-2.0')
        expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)

        expected = 'epl-2.0 OR apache-2.0 OR gpl-2.0 WITH classpath-exception-2.0 OR gpl-2.0'
        assert expression.render() == expected

        expected = ['epl-2.0', u'apache-2.0', u'gpl-2.0', u'classpath-exception-2.0', u'gpl-2.0']
        assert licensing.license_keys(expression, unique=False) == expected

        assert all(s.wrapped for s in licensing.license_symbols(expression, decompose=True))
    def test_get_expression_complex(self):
        licensing = Licensing()
        spdx_symbols = get_spdx_symbols()
        unknown_symbol = get_unknown_spdx_symbol()
        line_text = ('* SPDX-License-Identifier: '
                     'EPL-2.0 OR aPache-2.0 OR '
                     'GPL-2.0 WITH classpath-exception-2.0 OR '
                     'GPL-2.0')
        expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)

        expected = 'epl-2.0 OR apache-2.0 OR gpl-2.0 WITH classpath-exception-2.0 OR gpl-2.0'
        assert expected == expression.render()

        expected = ['epl-2.0', u'apache-2.0', u'gpl-2.0', u'classpath-exception-2.0']
        assert expected == licensing.license_keys(expression, unique=True)

        assert all(s.wrapped for s in licensing.license_symbols(expression, decompose=True))
Ejemplo n.º 23
0
    def test_get_expression_complex_with_unknown_symbols_and_refs(self):
        licensing = Licensing()
        spdx_symbols = get_spdx_symbols()
        unknown_symbol = get_unknown_spdx_symbol()
        line_text = ('* SPDX-License-Identifier: '
                     'EPL-2.0 OR Apache-2.0 '
                     'OR GPL-2.0  WITH Classpath-exception-2.0 '
                     'OR LicenseRef-GPL-2.0 WITH Assembly-exception')

        expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol)

        expected = 'epl-2.0 OR apache-2.0 OR gpl-2.0 WITH classpath-exception-2.0 OR unknown-spdx WITH unknown-spdx'
        assert expression.render() == expected

        expected = ['epl-2.0', 'apache-2.0', 'gpl-2.0', 'classpath-exception-2.0', 'unknown-spdx', 'unknown-spdx']
        assert licensing.license_keys(expression, unique=False) == expected

        assert all(s.wrapped for s in licensing.license_symbols(expression, decompose=True))
Ejemplo n.º 24
0
    def test_get_expression_works_for_legacy_deprecated_old_spdx_symbols(self):
        exp_by_old = {
            'eCos-2.0': 'gpl-2.0-plus WITH ecos-exception-2.0',
            'GPL-2.0-with-autoconf-exception': 'gpl-2.0 WITH autoconf-exception-2.0',
            'GPL-2.0-with-bison-exception': 'gpl-2.0 WITH bison-exception-2.2',
            'GPL-2.0-with-classpath-exception': 'gpl-2.0 WITH classpath-exception-2.0',
            'GPL-2.0-with-font-exception': 'gpl-2.0 WITH font-exception-gpl',
            'GPL-2.0-with-GCC-exception': 'gpl-2.0 WITH gcc-linking-exception-2.0',
            'GPL-3.0-with-autoconf-exception': 'gpl-3.0 WITH autoconf-exception-3.0',
            'GPL-3.0-with-GCC-exception': 'gpl-3.0 WITH gcc-exception-3.1',
            'wxWindows': 'lgpl-2.0-plus WITH wxwindows-exception-3.1',
        }
        licensing = Licensing()
        symbols_by_spdx = get_spdx_symbols()
        unknown_symbol = get_unknown_spdx_symbol()

        for test, expected in exp_by_old.items():
            result = get_expression(
                test, licensing, symbols_by_spdx, unknown_symbol).render()
            assert result == expected
Ejemplo n.º 25
0
 def test_get_unknown_spdx_symbol(self):
     assert 'unknown-spdx' == cache.get_unknown_spdx_symbol().key
Ejemplo n.º 26
0
 def test_get_unknown_spdx_symbol_from_defined_db(self):
     test_dir = self.get_test_loc('spdx/db-unknown')
     from licensedcode.models import load_licenses
     test_licenses = load_licenses(test_dir)
     assert 'unknown-spdx' == cache.get_unknown_spdx_symbol(_test_licenses=test_licenses).key
Ejemplo n.º 27
0
def spdx_id_match(idx, query_run, text):
    """
    Return one LicenseMatch by matching the `text` as an SPDX license expression
    using the `query_run` positions and `idx` index for support.
    """
    from licensedcode.cache import get_spdx_symbols
    from licensedcode.cache import get_unknown_spdx_symbol

    if TRACE:
        logger_debug('spdx_id_match: start:', 'text:', text, 'query_run:',
                     query_run)

    licensing = Licensing()
    symbols_by_spdx = get_spdx_symbols()
    unknown_symbol = get_unknown_spdx_symbol()

    expression = get_expression(text, licensing, symbols_by_spdx,
                                unknown_symbol)
    expression_str = expression.render()

    if TRACE:
        logger_debug('spdx_id_match: expression:', repr(expression_str))

    # how many known or unknown-spdx symbols occurence do we have?
    known_syms = 0
    unknown_syms = 0
    for sym in licensing.license_symbols(expression,
                                         unique=False,
                                         decompose=True):
        if sym == unknown_symbol:
            unknown_syms += 1
        else:
            known_syms += 1

    match_len = len(query_run)
    match_start = query_run.start
    matched_tokens = query_run.tokens

    # are we starting with SPDX-License-Identifier or not? if yes: fix start
    cleaned = clean_text(text).lower()
    # FIXME: dnl and rem may not be known tokens hence the pos will be wrong
    if cleaned.startswith((
            'list',
            'dnl',
            'rem',
    )):
        match_start += 1
        match_len -= 1
        matched_tokens[1:]

    # build synthetic rule
    # TODO: ensure that all the SPDX license keys are known symbols
    rule = SpdxRule(
        license_expression=expression_str,
        # FIXME: for now we are putting the original query text as a
        # rule text: this is likely incorrect when it comes to properly
        # computing the known and unknowns and high and lows for this rule.
        # alternatively we could use the expression string, padded with
        # spdx-license-identifier: this may be wrong too, if the line was
        # not padded originally with this tag
        stored_text=text,
        length=match_len)

    # build match from parsed expression
    # collect match start and end: e.g. the whole text
    qspan = Span(range(match_start, query_run.end + 1))

    # we use the query side to build the ispans
    ispan = Span(range(0, match_len))

    len_junk = idx.len_junk
    hispan = Span(p for p, t in enumerate(matched_tokens) if t >= len_junk)

    match = LicenseMatch(rule=rule,
                         qspan=qspan,
                         ispan=ispan,
                         hispan=hispan,
                         query_run_start=match_start,
                         matcher=MATCH_SPDX_ID,
                         query=query_run.query)

    if TRACE:
        logger_debug('spdx_id_match: match found:', match)
    return match