def test_get_full_matched_text(self): rule_text = u''' Copyright {{some copyright}} THIS IS FROM {{THE CODEHAUS}} AND CONTRIBUTORS IN NO EVENT SHALL {{THE CODEHAUS}} OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE {{POSSIBILITY OF SUCH}} DAMAGE ''' rule = Rule( _text=rule_text, licenses=['test'], ) idx = index.LicenseIndex([rule]) querys = u''' foobar 45 Copyright 2003 (C) James. All Rights Reserved. THIS IS FROM THE CODEHAUS AND CONTRIBUTORS IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC ''' result = idx.match(query_string=querys) assert 1 == len(result) match = result[0] expected = u"""Copyright [2003] ([C]) [James]. [All] [Rights] [Reserved]. THIS IS FROM [THE] [CODEHAUS] AND CONTRIBUTORS IN NO EVENT SHALL [THE] [best] [CODEHAUS] OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE [POSSIBILITY] [OF] [SUCH] DAMAGE""" matched_text = u''.join( get_full_matched_text(match, query_string=querys, idx=idx)) assert expected == matched_text # test again using a template expected = u"""Copyright <br>2003</br> (<br>C</br>) <br>James</br>. <br>All</br> <br>Rights</br> <br>Reserved</br>. THIS IS FROM <br>THE</br> <br>CODEHAUS</br> AND CONTRIBUTORS IN NO EVENT SHALL <br>THE</br> <br>best</br> <br>CODEHAUS</br> OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE <br>POSSIBILITY</br> <br>OF</br> <br>SUCH</br> DAMAGE""" matched_text = u''.join( get_full_matched_text(match, query_string=querys, idx=idx, highlight_not_matched=u'<br>%s</br>')) assert expected == matched_text # test again using whole_lines expected = u""" foobar 45 Copyright 2003 (C) James. All Rights Reserved. THIS IS FROM THE CODEHAUS AND CONTRIBUTORS IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC\n""" matched_text = u''.join( get_full_matched_text(match, query_string=querys, idx=idx, highlight_not_matched=u'%s', whole_lines=True)) assert expected == matched_text
def test_get_full_matched_text_base(self): rule_text = u''' Copyright {{some copyright}} THIS IS FROM {{THE CODEHAUS}} AND CONTRIBUTORS IN NO EVENT SHALL {{THE CODEHAUS}} OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE {{POSSIBILITY OF SUCH}} DAMAGE ''' rule = Rule(stored_text=rule_text, license_expression='test') idx = index.LicenseIndex([rule]) querys = u''' foobar 45 . Copyright 2003 (C) James. All Rights Reserved. THIS IS FROM THE CODEHAUS AND CONTRIBUTORS IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC dasdasda . ''' result = idx.match(query_string=querys) assert 1 == len(result) match = result[0] expected = u"""Copyright [2003] ([C]) [James]. [All] [Rights] [Reserved]. THIS IS FROM THE CODEHAUS AND CONTRIBUTORS IN NO EVENT SHALL THE [best] CODEHAUS OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.""" matched_text = u''.join( get_full_matched_text(match, query_string=querys, idx=idx)) assert expected == matched_text
def get_licenses(location, min_score=0, include_text=False, diag=False): """ Yield dictionaries of license data detected in the file at location. `minimum_score` is a minimum score threshold from 0 to 100. The default is 0 means that all license matches will be returned. With any other value matches that have a score below minimum score with not be returned. If `diag` is True, additional match details are returned with the matched_rule key of the returned mapping. """ from licensedcode.cache import get_index from licensedcode.cache import get_licenses_db from licensedcode.match import get_full_matched_text idx = get_index() licenses = get_licenses_db() for match in idx.match(location=location, min_score=min_score): if include_text: matched_text = u''.join( get_full_matched_text(match, location=location, idx=idx, whole_lines=False)) for license_key in match.rule.licenses: lic = licenses.get(license_key) result = OrderedDict() result['key'] = lic.key result['score'] = match.score() result['short_name'] = lic.short_name result['category'] = lic.category result['owner'] = lic.owner result['homepage_url'] = lic.homepage_url result['text_url'] = lic.text_urls[0] if lic.text_urls else '' result['dejacode_url'] = DEJACODE_LICENSE_URL.format(lic.key) spdx_key = lic.spdx_license_key result['spdx_license_key'] = spdx_key if spdx_key: spdx_key = lic.spdx_license_key.rstrip('+') spdx_url = SPDX_LICENSE_URL.format(spdx_key) else: spdx_url = '' result['spdx_url'] = spdx_url result['start_line'] = match.start_line result['end_line'] = match.end_line matched_rule = result['matched_rule'] = OrderedDict() matched_rule['identifier'] = match.rule.identifier matched_rule['license_choice'] = match.rule.license_choice matched_rule['licenses'] = match.rule.licenses if diag: matched_rule['matcher'] = match.matcher matched_rule['rule_length'] = match.rule.length matched_rule['matched_length'] = match.ilen() matched_rule['match_coverage'] = match.coverage() matched_rule['rule_relevance'] = match.rule.relevance if include_text: result['matched_text'] = matched_text yield result
def test_get_full_matched_text(self): rule_text = u''' Copyright {{some copyright}} THIS IS FROM {{THE CODEHAUS}} AND CONTRIBUTORS IN NO EVENT SHALL {{THE CODEHAUS}} OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE {{POSSIBILITY OF SUCH}} DAMAGE ''' rule = Rule(_text=rule_text, licenses=['test'],) idx = index.LicenseIndex([rule]) querys = u''' foobar 45 Copyright 2003 (C) James. All Rights Reserved. THIS IS FROM THE CODEHAUS AND CONTRIBUTORS IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC ''' result = idx.match(query_string=querys) assert 1 == len(result) match = result[0] expected = u"""Copyright [2003] ([C]) [James]. [All] [Rights] [Reserved]. THIS IS FROM [THE] [CODEHAUS] AND CONTRIBUTORS IN NO EVENT SHALL [THE] [best] [CODEHAUS] OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE [POSSIBILITY] [OF] [SUCH] DAMAGE""" matched_text = u''.join(get_full_matched_text(match, query_string=querys, idx=idx)) assert expected == matched_text # test again using a template expected = u"""Copyright <br>2003</br> (<br>C</br>) <br>James</br>. <br>All</br> <br>Rights</br> <br>Reserved</br>. THIS IS FROM <br>THE</br> <br>CODEHAUS</br> AND CONTRIBUTORS IN NO EVENT SHALL <br>THE</br> <br>best</br> <br>CODEHAUS</br> OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE <br>POSSIBILITY</br> <br>OF</br> <br>SUCH</br> DAMAGE""" matched_text = u''.join(get_full_matched_text(match, query_string=querys, idx=idx, highlight_not_matched=u'<br>%s</br>')) assert expected == matched_text # test again using whole_lines expected = u""" foobar 45 Copyright 2003 (C) James. All Rights Reserved. THIS IS FROM THE CODEHAUS AND CONTRIBUTORS IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC\n""" matched_text = u''.join(get_full_matched_text(match, query_string=querys, idx=idx, highlight_not_matched=u'%s', whole_lines=True)) assert expected == matched_text
def test_get_full_matched_text_does_not_munge_plus(self): rule_text = 'MODULE_LICENSE_GPL+ +' rule = Rule(stored_text=rule_text, license_expression='test') idx = index.LicenseIndex([rule]) querys = 'MODULE_LICENSE_GPL+ +' result = idx.match(query_string=querys) assert 1 == len(result) match = result[0] expected = 'MODULE_LICENSE_GPL+ +' matched_text = u''.join( get_full_matched_text(match, query_string=querys, idx=idx)) assert expected == matched_text