def test_get_spdx_symbols_fails_on_duplicated_other_spdx_keys(self): test_dir = self.get_test_loc('spdx/db-dupe-other') from licensedcode.models import load_licenses test_licenses = load_licenses(test_dir) try: cache.get_spdx_symbols(_test_licenses=test_licenses) self.fail('ValueError not raised!') except ValueError as e: assert 'Duplicated "other" SPDX license key' in str(e)
def test_get_expression_with_plus(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '* SPDX-License-Identifier: GPL-2.0+' expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) assert 'gpl-2.0-plus' == expression.render()
def test_get_expression_with_extra_parens(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '* SPDX-License-Identifier: (GPL-2.0+ OR MIT)' expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) assert expression.render() == 'gpl-2.0-plus OR mit'
def test_get_expression_does_not_fail_on_empty(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = 'SPDX-License-Identifier: ' expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) assert expression == None
def test_get_expression_with_parens_and_with(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) AND MIT) */' expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) assert expression.render() == 'gpl-2.0 WITH linux-syscall-exception-gpl AND mit'
def get_spdx_expression(text, expression_symbols=None): """ Return a matched license expression string or None by matching the ``text`` as an SPDX license expression. Use the ``expression_symbols`` mapping of {lowered key: LicenseSymbol} if provided. Otherwise use the standard SPDX license symbols. This is used to handle cases of mixed standard SPDX and non-standard SPDX- like symbols used for instance in some package manifests. """ from licensedcode.cache import get_spdx_symbols from licensedcode.cache import get_unknown_spdx_symbol licensing = Licensing() if not expression_symbols: expression_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() _prefix, exp_text = prepare_text(text) expression = get_expression( text=exp_text, licensing=licensing, expression_symbols=expression_symbols, unknown_symbol=unknown_symbol, ) if expression is None: return return expression.render()
def test_get_expression_simple(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '* SPDX-License-Identifier: BSD-3-Clause' expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) assert expression.render() == 'bsd-new'
def test_get_expression_simple_with(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '/* SPDX-License-Identifier: LGPL-2.0+ WITH Linux-syscall-note */' expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) assert expression.render() == 'lgpl-2.0-plus WITH linux-syscall-exception-gpl'
def test_get_expression_license_ref(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '/* SPDX-License-Identifier: LicenseRef-ABC */' expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) assert expression.render() == 'unknown-spdx'
def test_get_expression_with_empty_expression_should_return_unknown(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '* SPDX-License-Identifier:' expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) assert None == expression
def test_get_expression_multiple_or(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '* SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)' expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) assert expression.render() == 'bsd-new OR epl-1.0 OR apache-2.0 OR mit'
def test_get_expression_extra_parens_2(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)' expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) assert expression.render() == 'gpl-2.0 OR bsd-simplified'
def test_get_expression_without_and_should_not_return_unknown(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '* SPDX-License-Identifier: GPL-2.0+ BSD-2-Clause' expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) assert expression != unknown_symbol
def test_get_expression_quoted(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '''LIST "SPDX-License-Identifier: GPL-2.0"''' expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) assert expression.render() == 'gpl-2.0'
def generate_license_tests(location): # map their keys to ours license_mapping = {spdx: l.key for spdx, l in get_spdx_symbols().items()} license_mapping.update(extra_license_keys) for test in list(collect_tests(location)) + list( collect_url_tests(location)): loc = test.location print(f'Processing: {loc}') with open(loc, 'w') as txt: txt.write(test.text) lickey = test.license_key lickey = lickey and lickey.lower() or None lickey = license_mapping.get(lickey) lickey = lickey or 'unknown' url = f'https://raw.githubusercontent.com/google/licensecheck/v0.3.1/testdata/{test.filename}' with open(loc + '.yml', 'w') as td: data = dict( license_expressions=[lickey], notes= (f'License test derived from a file of the BSD-licensed repository at:\n' + f'{url}\n' + f'originally expected to be detected as {test.license_key}\n' + f'with coverage of {test.coverage}\n' + (test.notes or ''))) td.write(saneyaml.dump(data))
def test_get_expression_with_empty_expression2_should_return_unknown(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '' expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) assert expression == None
def test__reparse_invalid_expression_with_non_balanced_parens_should_return_a_proper_expression(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '(GPL-2.0+ and (BSD-2-Clause ' expression = _reparse_invalid_expression(line_text, licensing, spdx_symbols, unknown_symbol) expected = '(gpl-2.0-plus AND bsd-simplified) AND unknown-spdx' assert expression.render() == expected
def spdx_id_match(idx, query_run, text): """ Return one LicenseMatch by matching the `text` as an SPDX license expression using the `query_run` positions and `idx` index for support. """ from licensedcode.cache import get_spdx_symbols from licensedcode.cache import get_unknown_spdx_symbol if TRACE: logger_debug('spdx_id_match: start:', 'text:', text, 'query_run:', query_run) licensing = Licensing() symbols_by_spdx = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() _prefix, exp_text = prepare_text(text) expression = get_expression(exp_text, licensing, symbols_by_spdx, unknown_symbol) if expression is None: return expression_str = expression.render() match_len = len(query_run) match_start = query_run.start matched_tokens = query_run.tokens # build synthetic rule # TODO: ensure that all the SPDX license keys are known symbols rule = SpdxRule( license_expression=expression_str, # FIXME: for now we are putting the original query text as a # rule text: this is likely incorrect when it comes to properly # computing the known and unknowns and high and lows for this rule. # Alternatively we could use the expression string, padded with # spdx-license-identifier: this may be wrong too, if the line was # not padded originally with this tag stored_text=text, length=match_len) # build match from parsed expression # collect match start and end: e.g. the whole text qspan = Span(range(match_start, query_run.end + 1)) # we use the query side to build the ispans ispan = Span(range(0, match_len)) len_legalese = idx.len_legalese hispan = Span(p for p, t in enumerate(matched_tokens) if t < len_legalese) match = LicenseMatch(rule=rule, qspan=qspan, ispan=ispan, hispan=hispan, query_run_start=match_start, matcher=MATCH_SPDX_ID, query=query_run.query) return match
def test__reparse_invalid_expression_without_or_should_return_a_proper_expression(self): # this is a uboot-style legacy expression without OR licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = 'GPL-2.0+ BSD-2-Clause' expression = _reparse_invalid_expression(line_text, licensing, spdx_symbols, unknown_symbol) expected = 'gpl-2.0-plus OR bsd-simplified' assert expression.render() == expected
def test__parse_expression_without_and_raise_exception(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '* SPDX-License-Identifier: GPL-2.0+ BSD-2-Clause' try: _parse_expression(line_text, licensing, spdx_symbols, unknown_symbol) self.fail('exception should be raised') except: pass
def test__parse_expression_with_empty_expression_should_raise_ExpressionError(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '* SPDX-License-Identifier:' try: _parse_expression(line_text, licensing, spdx_symbols, unknown_symbol) self.fail('ExpressionError not raised') except ExpressionError: pass
def test__reparse_invalid_expression_with_improper_keyword_should_return_a_proper_expression( self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = '* SPDX-License-Identifier: or GPL-2.0+ BSD-2-Clause ' expression = _reparse_invalid_expression(line_text, licensing, spdx_symbols, unknown_symbol) expected = '(gpl-2.0-plus AND bsd-simplified) AND unknown-spdx' assert expected == expression.render()
def test_get_expression_without_lid(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = ('EPL-2.0 OR Apache-2.0 OR ' 'GPL-2.0 WITH Classpath-exception-2.0 OR ' 'GPL-2.0') expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) expected = 'epl-2.0 OR apache-2.0 OR gpl-2.0 WITH classpath-exception-2.0 OR gpl-2.0' assert expression.render() == expected expected = ['epl-2.0', u'apache-2.0', u'gpl-2.0', u'classpath-exception-2.0', u'gpl-2.0'] assert licensing.license_keys(expression, unique=False) == expected assert all(s.wrapped for s in licensing.license_symbols(expression, decompose=True))
def test_get_spdx_symbols_from_dir(self): test_dir = self.get_test_loc('spdx/db') from licensedcode.models import load_licenses test_licenses = load_licenses(test_dir) result = { key: val.key for key, val in cache.get_spdx_symbols(_test_licenses=test_licenses).items() } expected = { u'bar': u'xxd', u'foo': u'xxd', u'qt-lgpl-exception-1.1': u'qt-lgpl-exception-1.1', u'xskat': u'xskat' } assert expected == result
def test_get_expression_complex(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = ('* SPDX-License-Identifier: ' 'EPL-2.0 OR aPache-2.0 OR ' 'GPL-2.0 WITH classpath-exception-2.0 OR ' 'GPL-2.0') expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) expected = 'epl-2.0 OR apache-2.0 OR gpl-2.0 WITH classpath-exception-2.0 OR gpl-2.0' assert expected == expression.render() expected = ['epl-2.0', u'apache-2.0', u'gpl-2.0', u'classpath-exception-2.0'] assert expected == licensing.license_keys(expression, unique=True) assert all(s.wrapped for s in licensing.license_symbols(expression, decompose=True))
def test_get_expression_complex_with_unknown_symbols_and_refs(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = ('* SPDX-License-Identifier: ' 'EPL-2.0 OR Apache-2.0 ' 'OR GPL-2.0 WITH Classpath-exception-2.0 ' 'OR LicenseRef-GPL-2.0 WITH Assembly-exception') expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) expected = 'epl-2.0 OR apache-2.0 OR gpl-2.0 WITH classpath-exception-2.0 OR unknown-spdx WITH unknown-spdx' assert expression.render() == expected expected = ['epl-2.0', 'apache-2.0', 'gpl-2.0', 'classpath-exception-2.0', 'unknown-spdx', 'unknown-spdx'] assert licensing.license_keys(expression, unique=False) == expected assert all(s.wrapped for s in licensing.license_symbols(expression, decompose=True))
def test_get_expression_works_for_legacy_deprecated_old_spdx_symbols(self): exp_by_old = { 'eCos-2.0': 'gpl-2.0-plus WITH ecos-exception-2.0', 'GPL-2.0-with-autoconf-exception': 'gpl-2.0 WITH autoconf-exception-2.0', 'GPL-2.0-with-bison-exception': 'gpl-2.0 WITH bison-exception-2.2', 'GPL-2.0-with-classpath-exception': 'gpl-2.0 WITH classpath-exception-2.0', 'GPL-2.0-with-font-exception': 'gpl-2.0 WITH font-exception-gpl', 'GPL-2.0-with-GCC-exception': 'gpl-2.0 WITH gcc-linking-exception-2.0', 'GPL-3.0-with-autoconf-exception': 'gpl-3.0 WITH autoconf-exception-3.0', 'GPL-3.0-with-GCC-exception': 'gpl-3.0 WITH gcc-exception-3.1', 'wxWindows': 'lgpl-2.0-plus WITH wxwindows-exception-3.1', } licensing = Licensing() symbols_by_spdx = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() for test, expected in exp_by_old.items(): result = get_expression( test, licensing, symbols_by_spdx, unknown_symbol).render() assert result == expected
def test_get_spdx_symbols_checks_duplicates_with_deprecated_on_live_db(self): from licensedcode.models import load_licenses test_licenses = load_licenses(with_deprecated=True) cache.get_spdx_symbols(_test_licenses=test_licenses)
def spdx_id_match(idx, query_run, text): """ Return one LicenseMatch by matching the `text` as an SPDX license expression using the `query_run` positions and `idx` index for support. """ from licensedcode.cache import get_spdx_symbols from licensedcode.cache import get_unknown_spdx_symbol if TRACE: logger_debug('spdx_id_match: start:', 'text:', text, 'query_run:', query_run) licensing = Licensing() symbols_by_spdx = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() expression = get_expression(text, licensing, symbols_by_spdx, unknown_symbol) expression_str = expression.render() if TRACE: logger_debug('spdx_id_match: expression:', repr(expression_str)) # how many known or unknown-spdx symbols occurence do we have? known_syms = 0 unknown_syms = 0 for sym in licensing.license_symbols(expression, unique=False, decompose=True): if sym == unknown_symbol: unknown_syms += 1 else: known_syms += 1 match_len = len(query_run) match_start = query_run.start matched_tokens = query_run.tokens # are we starting with SPDX-License-Identifier or not? if yes: fix start cleaned = clean_text(text).lower() # FIXME: dnl and rem may not be known tokens hence the pos will be wrong if cleaned.startswith(( 'list', 'dnl', 'rem', )): match_start += 1 match_len -= 1 matched_tokens[1:] # build synthetic rule # TODO: ensure that all the SPDX license keys are known symbols rule = SpdxRule( license_expression=expression_str, # FIXME: for now we are putting the original query text as a # rule text: this is likely incorrect when it comes to properly # computing the known and unknowns and high and lows for this rule. # alternatively we could use the expression string, padded with # spdx-license-identifier: this may be wrong too, if the line was # not padded originally with this tag stored_text=text, length=match_len) # build match from parsed expression # collect match start and end: e.g. the whole text qspan = Span(range(match_start, query_run.end + 1)) # we use the query side to build the ispans ispan = Span(range(0, match_len)) len_junk = idx.len_junk hispan = Span(p for p, t in enumerate(matched_tokens) if t >= len_junk) match = LicenseMatch(rule=rule, qspan=qspan, ispan=ispan, hispan=hispan, query_run_start=match_start, matcher=MATCH_SPDX_ID, query=query_run.query) if TRACE: logger_debug('spdx_id_match: match found:', match) return match
def test_get_spdx_symbols(self): result = cache.get_spdx_symbols() assert 'mit' in result