def test_get_expression_without_lid(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = ('EPL-2.0 OR Apache-2.0 OR ' 'GPL-2.0 WITH Classpath-exception-2.0 OR ' 'GPL-2.0') expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) expected = 'epl-2.0 OR apache-2.0 OR gpl-2.0 WITH classpath-exception-2.0 OR gpl-2.0' assert expression.render() == expected expected = ['epl-2.0', u'apache-2.0', u'gpl-2.0', u'classpath-exception-2.0', u'gpl-2.0'] assert licensing.license_keys(expression, unique=False) == expected assert all(s.wrapped for s in licensing.license_symbols(expression, decompose=True))
def test_get_expression_complex(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = ('* SPDX-License-Identifier: ' 'EPL-2.0 OR aPache-2.0 OR ' 'GPL-2.0 WITH classpath-exception-2.0 OR ' 'GPL-2.0') expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) expected = 'epl-2.0 OR apache-2.0 OR gpl-2.0 WITH classpath-exception-2.0 OR gpl-2.0' assert expected == expression.render() expected = ['epl-2.0', u'apache-2.0', u'gpl-2.0', u'classpath-exception-2.0'] assert expected == licensing.license_keys(expression, unique=True) assert all(s.wrapped for s in licensing.license_symbols(expression, decompose=True))
def test_get_expression_complex_with_unknown_symbols_and_refs(self): licensing = Licensing() spdx_symbols = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() line_text = ('* SPDX-License-Identifier: ' 'EPL-2.0 OR Apache-2.0 ' 'OR GPL-2.0 WITH Classpath-exception-2.0 ' 'OR LicenseRef-GPL-2.0 WITH Assembly-exception') expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) expected = 'epl-2.0 OR apache-2.0 OR gpl-2.0 WITH classpath-exception-2.0 OR unknown-spdx WITH unknown-spdx' assert expression.render() == expected expected = ['epl-2.0', 'apache-2.0', 'gpl-2.0', 'classpath-exception-2.0', 'unknown-spdx', 'unknown-spdx'] assert licensing.license_keys(expression, unique=False) == expected assert all(s.wrapped for s in licensing.license_symbols(expression, decompose=True))
def spdx_id_match(idx, query_run, text): """ Return one LicenseMatch by matching the `text` as an SPDX license expression using the `query_run` positions and `idx` index for support. """ from licensedcode.cache import get_spdx_symbols from licensedcode.cache import get_unknown_spdx_symbol if TRACE: logger_debug('spdx_id_match: start:', 'text:', text, 'query_run:', query_run) licensing = Licensing() symbols_by_spdx = get_spdx_symbols() unknown_symbol = get_unknown_spdx_symbol() expression = get_expression(text, licensing, symbols_by_spdx, unknown_symbol) expression_str = expression.render() if TRACE: logger_debug('spdx_id_match: expression:', repr(expression_str)) # how many known or unknown-spdx symbols occurence do we have? known_syms = 0 unknown_syms = 0 for sym in licensing.license_symbols(expression, unique=False, decompose=True): if sym == unknown_symbol: unknown_syms += 1 else: known_syms += 1 match_len = len(query_run) match_start = query_run.start matched_tokens = query_run.tokens # are we starting with SPDX-License-Identifier or not? if yes: fix start cleaned = clean_text(text).lower() # FIXME: dnl and rem may not be known tokens hence the pos will be wrong if cleaned.startswith(( 'list', 'dnl', 'rem', )): match_start += 1 match_len -= 1 matched_tokens[1:] # build synthetic rule # TODO: ensure that all the SPDX license keys are known symbols rule = SpdxRule( license_expression=expression_str, # FIXME: for now we are putting the original query text as a # rule text: this is likely incorrect when it comes to properly # computing the known and unknowns and high and lows for this rule. # alternatively we could use the expression string, padded with # spdx-license-identifier: this may be wrong too, if the line was # not padded originally with this tag stored_text=text, length=match_len) # build match from parsed expression # collect match start and end: e.g. the whole text qspan = Span(range(match_start, query_run.end + 1)) # we use the query side to build the ispans ispan = Span(range(0, match_len)) len_junk = idx.len_junk hispan = Span(p for p, t in enumerate(matched_tokens) if t >= len_junk) match = LicenseMatch(rule=rule, qspan=qspan, ispan=ispan, hispan=hispan, query_run_start=match_start, matcher=MATCH_SPDX_ID, query=query_run.query) if TRACE: logger_debug('spdx_id_match: match found:', match) return match