def test_get_full_matched_text(self):
        rule_text = u'''
            Copyright {{some copyright}}
            THIS IS FROM {{THE CODEHAUS}} AND CONTRIBUTORS
            IN NO EVENT SHALL {{THE CODEHAUS}} OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE {{POSSIBILITY OF SUCH}} DAMAGE
        '''

        rule = Rule(
            _text=rule_text,
            licenses=['test'],
        )
        idx = index.LicenseIndex([rule])

        querys = u'''
            foobar 45 Copyright 2003 (C) James. All Rights Reserved.
            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
            IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC
        '''
        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]

        expected = u"""Copyright [2003] ([C]) [James]. [All] [Rights] [Reserved].
            THIS IS FROM [THE] [CODEHAUS] AND CONTRIBUTORS
            IN NO EVENT SHALL [THE] [best] [CODEHAUS] OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE [POSSIBILITY] [OF] [SUCH] DAMAGE"""
        matched_text = u''.join(
            get_full_matched_text(match, query_string=querys, idx=idx))
        assert expected == matched_text

        # test again using a template
        expected = u"""Copyright <br>2003</br> (<br>C</br>) <br>James</br>. <br>All</br> <br>Rights</br> <br>Reserved</br>.
            THIS IS FROM <br>THE</br> <br>CODEHAUS</br> AND CONTRIBUTORS
            IN NO EVENT SHALL <br>THE</br> <br>best</br> <br>CODEHAUS</br> OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE <br>POSSIBILITY</br> <br>OF</br> <br>SUCH</br> DAMAGE"""
        matched_text = u''.join(
            get_full_matched_text(match,
                                  query_string=querys,
                                  idx=idx,
                                  highlight_not_matched=u'<br>%s</br>'))
        assert expected == matched_text

        # test again using whole_lines
        expected = u"""            foobar 45 Copyright 2003 (C) James. All Rights Reserved.
            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
            IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC\n"""
        matched_text = u''.join(
            get_full_matched_text(match,
                                  query_string=querys,
                                  idx=idx,
                                  highlight_not_matched=u'%s',
                                  whole_lines=True))
        assert expected == matched_text
Example #2
0
    def test_get_full_matched_text_base(self):
        rule_text = u'''
            Copyright {{some copyright}}
            THIS IS FROM {{THE CODEHAUS}} AND CONTRIBUTORS
            IN NO EVENT SHALL {{THE CODEHAUS}} OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE {{POSSIBILITY OF SUCH}} DAMAGE
        '''

        rule = Rule(stored_text=rule_text, license_expression='test')
        idx = index.LicenseIndex([rule])

        querys = u'''
            foobar 45 . Copyright 2003 (C) James. All Rights Reserved.
            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
            IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC dasdasda .
        '''
        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]

        expected = u"""Copyright [2003] ([C]) [James]. [All] [Rights] [Reserved].
            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
            IN NO EVENT SHALL THE [best] CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."""
        matched_text = u''.join(
            get_full_matched_text(match, query_string=querys, idx=idx))
        assert expected == matched_text
Example #3
0
def get_licenses(location, min_score=0, include_text=False, diag=False):
    """
    Yield dictionaries of license data detected in the file at location.

    `minimum_score` is a minimum score threshold from 0 to 100. The default is 0
    means that all license matches will be returned. With any other value matches
    that have a score below minimum score with not be returned.

    If `diag` is True, additional match details are returned with the matched_rule
    key of the returned mapping.
    """
    from licensedcode.cache import get_index
    from licensedcode.cache import get_licenses_db
    from licensedcode.match import get_full_matched_text

    idx = get_index()
    licenses = get_licenses_db()

    for match in idx.match(location=location, min_score=min_score):
        if include_text:
            matched_text = u''.join(
                get_full_matched_text(match,
                                      location=location,
                                      idx=idx,
                                      whole_lines=False))
        for license_key in match.rule.licenses:
            lic = licenses.get(license_key)
            result = OrderedDict()
            result['key'] = lic.key
            result['score'] = match.score()
            result['short_name'] = lic.short_name
            result['category'] = lic.category
            result['owner'] = lic.owner
            result['homepage_url'] = lic.homepage_url
            result['text_url'] = lic.text_urls[0] if lic.text_urls else ''
            result['dejacode_url'] = DEJACODE_LICENSE_URL.format(lic.key)
            spdx_key = lic.spdx_license_key
            result['spdx_license_key'] = spdx_key
            if spdx_key:
                spdx_key = lic.spdx_license_key.rstrip('+')
                spdx_url = SPDX_LICENSE_URL.format(spdx_key)
            else:
                spdx_url = ''
            result['spdx_url'] = spdx_url
            result['start_line'] = match.start_line
            result['end_line'] = match.end_line
            matched_rule = result['matched_rule'] = OrderedDict()
            matched_rule['identifier'] = match.rule.identifier
            matched_rule['license_choice'] = match.rule.license_choice
            matched_rule['licenses'] = match.rule.licenses
            if diag:
                matched_rule['matcher'] = match.matcher
                matched_rule['rule_length'] = match.rule.length
                matched_rule['matched_length'] = match.ilen()
                matched_rule['match_coverage'] = match.coverage()
                matched_rule['rule_relevance'] = match.rule.relevance
            if include_text:
                result['matched_text'] = matched_text
            yield result
    def test_get_full_matched_text(self):
        rule_text = u'''
            Copyright {{some copyright}}
            THIS IS FROM {{THE CODEHAUS}} AND CONTRIBUTORS
            IN NO EVENT SHALL {{THE CODEHAUS}} OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE {{POSSIBILITY OF SUCH}} DAMAGE
        '''

        rule = Rule(_text=rule_text, licenses=['test'],)
        idx = index.LicenseIndex([rule])

        querys = u'''
            foobar 45 Copyright 2003 (C) James. All Rights Reserved.
            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
            IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC
        '''
        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]

        expected = u"""Copyright [2003] ([C]) [James]. [All] [Rights] [Reserved].
            THIS IS FROM [THE] [CODEHAUS] AND CONTRIBUTORS
            IN NO EVENT SHALL [THE] [best] [CODEHAUS] OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE [POSSIBILITY] [OF] [SUCH] DAMAGE"""
        matched_text = u''.join(get_full_matched_text(match, query_string=querys, idx=idx))
        assert expected == matched_text

        # test again using a template
        expected = u"""Copyright <br>2003</br> (<br>C</br>) <br>James</br>. <br>All</br> <br>Rights</br> <br>Reserved</br>.
            THIS IS FROM <br>THE</br> <br>CODEHAUS</br> AND CONTRIBUTORS
            IN NO EVENT SHALL <br>THE</br> <br>best</br> <br>CODEHAUS</br> OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE <br>POSSIBILITY</br> <br>OF</br> <br>SUCH</br> DAMAGE"""
        matched_text = u''.join(get_full_matched_text(match, query_string=querys, idx=idx, highlight_not_matched=u'<br>%s</br>'))
        assert expected == matched_text

        # test again using whole_lines
        expected = u"""            foobar 45 Copyright 2003 (C) James. All Rights Reserved.
            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
            IN NO EVENT SHALL THE best CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. chabada DAMAGE 12 ABC\n"""
        matched_text = u''.join(get_full_matched_text(match, query_string=querys, idx=idx, highlight_not_matched=u'%s', whole_lines=True))
        assert expected == matched_text
Example #5
0
    def test_get_full_matched_text_does_not_munge_plus(self):
        rule_text = 'MODULE_LICENSE_GPL+ +'

        rule = Rule(stored_text=rule_text, license_expression='test')
        idx = index.LicenseIndex([rule])

        querys = 'MODULE_LICENSE_GPL+ +'
        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]

        expected = 'MODULE_LICENSE_GPL+ +'
        matched_text = u''.join(
            get_full_matched_text(match, query_string=querys, idx=idx))
        assert expected == matched_text