コード例 #1
0
    def test_match_exact_from_string_twice_with_repeated_text(self):
        _text = u'licensed under the GPL, licensed under the GPL'
        #                0    1   2   3         4      5   6   7
        licenses = ['tst']
        rule = models.Rule(licenses=licenses, _text=_text)

        idx = index.LicenseIndex([rule])
        querys = u'Hi licensed under the GPL, licensed under the GPL yes.'
        #          0        1   2   3     4       5     6    7   8   9

        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert 'licensed under the GPL licensed under the GPL' == qtext
        assert 'licensed under the GPL licensed under the GPL' == itext

        assert Span(0, 7) == match.qspan
        assert Span(0, 7) == match.ispan

        # match again to ensure that there are no state side effects
        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        assert Span(0, 7) == match.qspan
        assert Span(0, 7) == match.ispan

        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert u'licensed under the GPL licensed under the GPL' == qtext
        assert u'licensed under the GPL licensed under the GPL' == itext
コード例 #2
0
    def test_match_exact_from_string_twice_with_repeated_text(self):
        _text = u'licensed under the GPL, licensed under the GPL'
        #                0    1   2   3         4      5   6   7
        licenses = ['tst']
        rule = models.Rule(licenses=licenses, _text=_text)

        idx = index.LicenseIndex([rule])
        querys = u'Hi licensed under the GPL, licensed under the GPL yes.'
        #          0        1   2   3     4       5     6    7   8   9

        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert 'licensed under the GPL licensed under the GPL' == qtext
        assert 'licensed under the GPL licensed under the GPL' == itext

        assert Span(0, 7) == match.qspan
        assert Span(0, 7) == match.ispan

        # match again to ensure that there are no state side effects
        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        assert Span(0, 7) == match.qspan
        assert Span(0, 7) == match.ispan

        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert u'licensed under the GPL licensed under the GPL' == qtext
        assert u'licensed under the GPL licensed under the GPL' == itext
コード例 #3
0
    def test_match_can_match_with_simple_rule_template2(self):
        rule_text = u'''
        IN NO EVENT SHALL THE {{X CONSORTIUM}}
        BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
        CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
        SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
        '''
        rule = Rule(_text=rule_text, licenses=['x-consortium'])
        idx = index.LicenseIndex([rule])

        query_string = u'''
        IN NO EVENT SHALL THE Y CORP
        BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
        CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
        SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
        '''

        matches = idx.match(query_string=query_string)
        assert 1 == len(matches)
        match = matches[0]
        qtext, itext = get_texts(match, query_string=query_string, idx=idx)

        expected_qtokens = u'''
        IN NO EVENT SHALL THE [Y] [CORP] BE LIABLE FOR ANY CLAIM DAMAGES OR OTHER
        LIABILITY WHETHER IN AN ACTION OF CONTRACT TORT OR OTHERWISE ARISING FROM OUT
        OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
        SOFTWARE
        '''.split()
        expected_itokens = u'''
        IN NO EVENT SHALL THE BE LIABLE FOR ANY CLAIM DAMAGES OR OTHER LIABILITY
        WHETHER IN AN ACTION OF CONTRACT TORT OR OTHERWISE ARISING FROM OUT OF OR IN
        CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
        '''.split()
        assert expected_qtokens == qtext.split()
        assert expected_itokens == itext.split()
コード例 #4
0
    def test_match_can_match_with_simple_rule_template2(self):
        rule_text = u'''
        IN NO EVENT SHALL THE {{X CONSORTIUM}}
        BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
        CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
        SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
        '''
        rule = Rule(_text=rule_text, licenses=['x-consortium'])
        idx = index.LicenseIndex([rule])

        query_string = u'''
        IN NO EVENT SHALL THE Y CORP
        BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
        CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
        SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
        '''

        matches = idx.match(query_string=query_string)
        assert 1 == len(matches)
        match = matches[0]
        qtext, itext = get_texts(match, query_string=query_string, idx=idx)

        expected_qtokens = u'''
        IN NO EVENT SHALL THE [Y] [CORP] BE LIABLE FOR ANY CLAIM DAMAGES OR OTHER
        LIABILITY WHETHER IN AN ACTION OF CONTRACT TORT OR OTHERWISE ARISING FROM OUT
        OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
        SOFTWARE
        '''.split()
        expected_itokens = u'''
        IN NO EVENT SHALL THE BE LIABLE FOR ANY CLAIM DAMAGES OR OTHER LIABILITY
        WHETHER IN AN ACTION OF CONTRACT TORT OR OTHERWISE ARISING FROM OUT OF OR IN
        CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
        '''.split()
        assert expected_qtokens == qtext.split()
        assert expected_itokens == itext.split()
コード例 #5
0
    def test_match_with_templates_with_redundant_tokens_yield_single_exact_match(self):
        _text = u'copyright reserved mit is license, {{}} copyright reserved mit is license'
        #                 0        1  2   3       4               5        6   7  8       9
        licenses = ['tst']
        rule = models.Rule(licenses=licenses, _text=_text)
        idx = index.LicenseIndex([rule])
        expected_idx = {'_tst_73_0': {u'copyright': [0, 5], u'license': [4, 9], u'mit': [2, 7]}}
        assert expected_idx == idx.to_dict()

        querys = u'Hi my copyright reserved mit is license is the copyright reserved mit is license yes.'
        #           0  1         2        3   4  5       6  7   8         9       10  11 12      13  14
        qry = Query(query_string=querys, idx=idx)

        # convert tid to actual token strings
        tks_as_str = lambda tks: [None if tid is None else idx.tokens_by_tid[tid] for tid in tks]

        expected = [None, None, u'copyright', u'reserved', u'mit', u'is', u'license', u'is', None, u'copyright', u'reserved', u'mit', u'is', u'license', None]
        #              0     1            2            3       4      5           6      7      8            9           10      11     12          13     14
        assert expected == tks_as_str(qry.tokens_with_unknowns())

        result = idx.match(query_string=querys)
        assert 1 == len(result)

        match = result[0]
        assert Span(0, 4) | Span(6, 10) == match.qspan
        assert Span(0, 9) == match.ispan
        assert 100 == match.coverage()
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert 'copyright reserved mit is license <is> [the] copyright reserved mit is license' == qtext
        assert 'copyright reserved mit is license copyright reserved mit is license' == itext
コード例 #6
0
    def test_match_has_correct_line_positions_for_query_with_repeats(self):
        expected = [
            # licenses, match.lines(), qtext,
            ([u'apache-2.0'], (1, 2),
             u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'
             ),
            ([u'apache-2.0'], (3, 4),
             u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'
             ),
            ([u'apache-2.0'], (5, 6),
             u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'
             ),
            ([u'apache-2.0'], (7, 8),
             u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'
             ),
            ([u'apache-2.0'], (9, 10),
             u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'
             ),
        ]
        test_path = 'positions/license1.txt'

        test_location = self.get_test_loc(test_path)
        idx = cache.get_index()
        matches = idx.match(test_location)
        for i, match in enumerate(matches):
            ex_lics, ex_lines, ex_qtext = expected[i]
            qtext, _itext = get_texts(match, location=test_location, idx=idx)

            try:
                assert ex_lics == match.rule.licenses
                assert ex_lines == match.lines()
                assert ex_qtext == qtext
            except AssertionError:
                assert expected[i] == (match.rule.licenses, match.lines(),
                                       qtext)
コード例 #7
0
    def test_match_matches_correctly_simple_exact_query_across_query_runs(self):
        tf1 = self.get_test_loc('detect/mit/mit.c')
        ftr = Rule(text_file=tf1, licenses=['mit'])
        idx = index.LicenseIndex([ftr])
        query_doc = self.get_test_loc('detect/mit/mit3.c')
        matches = idx.match(query_doc)
        assert 1 == len(matches)
        match = matches[0]

        qtext, itext = get_texts(match, location=query_doc, idx=idx)
        expected_qtext = u'''
            Permission is hereby granted free of charge to any person obtaining a
            copy of this software and associated documentation files the Software to
            deal in THE SOFTWARE WITHOUT RESTRICTION INCLUDING WITHOUT LIMITATION THE
            RIGHTS TO USE COPY MODIFY MERGE PUBLISH DISTRIBUTE SUBLICENSE AND OR SELL
            COPIES of the Software and to permit persons to whom the Software is
            furnished to do so subject to the following conditions The above
            copyright notice and this permission notice shall be included in all
            copies or substantial portions of the Software
        '''.split()
        assert expected_qtext == qtext.split()

        expected_itext = u'''
            Permission is hereby granted free of charge to any person obtaining a
            copy of this software and associated documentation files the Software to
            deal in the Software without restriction including without limitation
            the rights to use copy modify merge publish distribute sublicense and or
            sell copies of the Software and to permit persons to whom the Software
            is furnished to do so subject to the following conditions The above
            copyright notice and this permission notice shall be included in all
            copies or substantial portions of the Software
        '''.split()
        assert expected_itext == itext.split()
コード例 #8
0
    def test_match_matches_correctly_simple_exact_query_across_query_runs(
            self):
        tf1 = self.get_test_loc('detect/mit/mit.c')
        ftr = Rule(text_file=tf1, licenses=['mit'])
        idx = index.LicenseIndex([ftr])
        query_doc = self.get_test_loc('detect/mit/mit3.c')
        matches = idx.match(query_doc)
        assert 1 == len(matches)
        match = matches[0]

        qtext, itext = get_texts(match, location=query_doc, idx=idx)
        expected_qtext = u'''
            Permission is hereby granted free of charge to any person obtaining a
            copy of this software and associated documentation files the Software to
            deal in THE SOFTWARE WITHOUT RESTRICTION INCLUDING WITHOUT LIMITATION THE
            RIGHTS TO USE COPY MODIFY MERGE PUBLISH DISTRIBUTE SUBLICENSE AND OR SELL
            COPIES of the Software and to permit persons to whom the Software is
            furnished to do so subject to the following conditions The above
            copyright notice and this permission notice shall be included in all
            copies or substantial portions of the Software
        '''.split()
        assert expected_qtext == qtext.split()

        expected_itext = u'''
            Permission is hereby granted free of charge to any person obtaining a
            copy of this software and associated documentation files the Software to
            deal in the Software without restriction including without limitation
            the rights to use copy modify merge publish distribute sublicense and or
            sell copies of the Software and to permit persons to whom the Software
            is furnished to do so subject to the following conditions The above
            copyright notice and this permission notice shall be included in all
            copies or substantial portions of the Software
        '''.split()
        assert expected_itext == itext.split()
コード例 #9
0
    def test_overlap_detection5(self):
        #  test this containment relationship between test and index licenses:
        #   * Index licenses:
        #   +-license 2 --------+
        #   |  +-license 1 --+  |
        #   +-------------------+
        #
        #   +-license 4 --------+
        #   |  +-license 1 --+  |
        #   +-------------------+

        # setup index
        license1 = '''Redistribution and use permitted for MIT license.'''

        license2 = '''Redistributions of source must retain copyright.
        Redistribution and use permitted for MIT license.
        Redistributions in binary form is permitted.'''

        rule1 = Rule(_text=license1, licenses=['overlap'])
        rule2 = Rule(_text=license2, licenses=['overlap'])
        idx = index.LicenseIndex([rule1, rule2])

        querys = '''My source.
        Redistribution and use permitted for MIT license.
        My code.'''

        # test : querys contains license1: return license1 as exact coverage
        matches = idx.match(query_string=querys)
        assert 1 == len(matches)

        match = matches[0]
        assert rule1 == match.rule
        qtext, _itext = get_texts(match, query_string=querys, idx=idx)
        assert 'Redistribution and use permitted for MIT license' == qtext
コード例 #10
0
    def test_overlap_detection5(self):
        #  test this containment relationship between test and index licenses:
        #   * Index licenses:
        #   +-license 2 --------+
        #   |  +-license 1 --+  |
        #   +-------------------+
        #
        #   +-license 4 --------+
        #   |  +-license 1 --+  |
        #   +-------------------+

        # setup index
        license1 = '''Redistribution and use permitted for MIT license.'''

        license2 = '''Redistributions of source must retain copyright.
        Redistribution and use permitted for MIT license.
        Redistributions in binary form is permitted.'''

        rule1 = Rule(_text=license1, licenses=['overlap'])
        rule2 = Rule(_text=license2, licenses=['overlap'])
        idx = index.LicenseIndex([rule1, rule2])

        querys = '''My source.
        Redistribution and use permitted for MIT license.
        My code.'''

        # test : querys contains license1: return license1 as exact coverage
        matches = idx.match(query_string=querys)
        assert 1 == len(matches)

        match = matches[0]
        assert rule1 == match.rule
        qtext, _itext = get_texts(match, query_string=querys, idx=idx)
        assert 'Redistribution and use permitted for MIT license' == qtext
コード例 #11
0
    def test_match_with_templates_with_redundant_tokens_yield_single_exact_match(self):
        _text = u'copyright reserved mit is license, {{}} copyright reserved mit is license'
        #                 0        1  2   3       4               5        6   7  8       9
        licenses = ['tst']
        rule = models.Rule(licenses=licenses, _text=_text)
        idx = index.LicenseIndex([rule])
        expected_idx = {'_tst_73_0': {u'copyright': [0, 5], u'license': [4, 9], u'mit': [2, 7]}}
        assert expected_idx == idx.to_dict()

        querys = u'Hi my copyright reserved mit is license is the copyright reserved mit is license yes.'
        #           0  1         2        3   4  5       6  7   8         9       10  11 12      13  14
        qry = Query(query_string=querys, idx=idx)

        # convert tid to actual token strings
        tks_as_str = lambda tks: [None if tid is None else idx.tokens_by_tid[tid] for tid in tks]

        expected = [None, None, u'copyright', u'reserved', u'mit', u'is', u'license', u'is', None, u'copyright', u'reserved', u'mit', u'is', u'license', None]
        #              0     1            2            3       4      5           6      7      8            9           10      11     12          13     14
        assert expected == tks_as_str(qry.tokens_with_unknowns())

        result = idx.match(query_string=querys)
        assert 1 == len(result)

        match = result[0]
        assert Span(0, 4) | Span(6, 10) == match.qspan
        assert Span(0, 9) == match.ispan
        assert 100 == match.coverage()
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert 'copyright reserved mit is license <is> [the] copyright reserved mit is license' == qtext
        assert 'copyright reserved mit is license copyright reserved mit is license' == itext
コード例 #12
0
    def closure_test_function(*args, **kwargs):
        idx = cache.get_index()
        matches = idx.match(
            location=test_file,
            min_score=min_score,
            # if negative, do not detect negative rules when testing negative rules
            detect_negative=detect_negative)

        if not matches:
            matches = []

        # TODO: we should expect matches properly, not with a grab bag of flat license keys
        # flattened list of all detected license keys across all matches.
        detected_licenses = functional.flatten(
            map(unicode, match.rule.licenses) for match in matches)
        try:
            if not detect_negative:
                # we skipped negative detection for a negative rule
                # we just want to ensure that the rule was matched proper
                assert matches and not expected_licenses and not detected_licenses
            else:
                assert expected_licenses == detected_licenses
        except:
            # On failure, we compare against more result data to get additional
            # failure details, including the test_file and full match details
            match_failure_trace = []

            if trace_text:
                for match in matches:
                    qtext, itext = get_texts(match,
                                             location=test_file,
                                             idx=idx)
                    rule_text_file = match.rule.text_file
                    rule_data_file = match.rule.data_file
                    match_failure_trace.extend([
                        '', '', '======= MATCH ====', match,
                        '======= Matched Query Text for:',
                        'file://{test_file}'.format(**locals())
                    ])
                    if test_data_file:
                        match_failure_trace.append(
                            'file://{test_data_file}'.format(**locals()))
                    match_failure_trace.append(qtext.splitlines())
                    match_failure_trace.extend([
                        '',
                        '======= Matched Rule Text for:'
                        'file://{rule_text_file}'.format(**locals()),
                        'file://{rule_data_file}'.format(**locals()),
                        itext.splitlines(),
                    ])
            # this assert will always fail and provide a detailed failure trace
            assert expected_licenses == detected_licenses + [
                test_name, 'test file: file://' + test_file
            ] + match_failure_trace
コード例 #13
0
def print_matched_texts(match, location=None, query_string=None, idx=None):
    """
    Convenience function to print matched texts for tracing and debugging tests.
    """
    qtext, itext = get_texts(match, location=location, query_string=query_string, idx=idx)
    print()
    print('Matched qtext:')
    print(qtext)
    print()
    print('Matched itext:')
    print(itext)
コード例 #14
0
    def test_match_in_binary_lkms_1(self):
        idx = cache.get_index()
        qloc = self.get_test_loc('positions/ath_pci.ko')
        matches = idx.match(location=qloc)
        assert 1 == len(matches)
        match = matches[0]
        assert ['bsd-new', 'gpl-2.0'] == match.rule.licenses

        qtext, itext = get_texts(match, location=qloc, idx=idx)
        assert 'license Dual BSD GPL' == qtext
        assert 'license Dual BSD GPL' == itext
コード例 #15
0
    def test_match_in_binary_lkms_1(self):
        idx = cache.get_index()
        qloc = self.get_test_loc('positions/ath_pci.ko')
        matches = idx.match(location=qloc)
        assert 1 == len(matches)
        match = matches[0]
        assert ['bsd-new', 'gpl-2.0'] == match.rule.licenses

        qtext, itext = get_texts(match, location=qloc, idx=idx)
        assert 'license Dual BSD GPL' == qtext
        assert 'license Dual BSD GPL' == itext
コード例 #16
0
    def test_match_in_binary_lkms_2(self):
        idx = cache.get_index()
        qloc = self.get_test_loc('positions/eeepc_acpi.ko')
        matches = idx.match(location=qloc)
        assert 1 == len(matches)
        match = matches[0]
        assert ['gpl-1.0-plus'] == match.rule.licenses
        assert match.ispan == Span(0, 1)

        qtext, itext = get_texts(match, location=qloc, idx=idx)
        assert 'license GPL' == qtext
        assert 'License GPL' == itext
コード例 #17
0
    def test_match_can_match_with_plain_rule_simple2(self):
        rule_text = u'''X11 License
        Copyright (C) 1996 X Consortium
        Permission is hereby granted, free of charge, to any person obtaining a copy
        of this software and associated documentation files (the "Software"), to deal
        in the Software without restriction, including without limitation the rights
        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
        copies of the Software, and to permit persons to whom the Software is
        furnished to do so, subject to the following conditions: The above copyright
        notice and this permission notice shall be included in all copies or
        substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS",
        WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
        TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
        NONINFRINGEMENT. IN NO EVENT SHALL THE X CONSORTIUM BE LIABLE FOR ANY CLAIM,
        DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
        OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
        OR OTHER DEALINGS IN THE SOFTWARE. Except as contained in this notice, the
        name of the X Consortium shall not be used in advertising or otherwise to
        promote the sale, use or other dealings in this Software without prior
        written authorization from the X Consortium. X Window System is a trademark
        of X Consortium, Inc.
        '''
        rule = Rule(_text=rule_text, licenses=['x-consortium'])
        idx = index.LicenseIndex([rule])

        query_loc = self.get_test_loc(
            'detect/simple_detection/x11-xconsortium_text.txt')
        matches = idx.match(location=query_loc)
        assert 1 == len(matches)

        expected_qtext = u'''
        X11 License Copyright C 1996 X Consortium Permission is hereby granted free
        of charge to any person obtaining a copy of this software and associated
        documentation files the Software to deal in the Software without restriction
        including without limitation the rights to use copy modify merge publish
        distribute sublicense and or sell copies of the Software and to permit
        persons to whom the Software is furnished to do so subject to the following
        conditions The above copyright notice and this permission notice shall be
        included in all copies or substantial portions of the Software THE SOFTWARE
        IS PROVIDED AS IS WITHOUT WARRANTY OF ANY KIND EXPRESS OR IMPLIED INCLUDING
        BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY FITNESS FOR A PARTICULAR
        PURPOSE AND NONINFRINGEMENT IN NO EVENT SHALL THE X CONSORTIUM BE LIABLE FOR
        ANY CLAIM DAMAGES OR OTHER LIABILITY WHETHER IN AN ACTION OF CONTRACT TORT OR
        OTHERWISE ARISING FROM OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
        OR OTHER DEALINGS IN THE SOFTWARE Except as contained in this notice the name
        of the X Consortium shall not be used in advertising or otherwise to promote
        the sale use or other dealings in this Software without prior written
        authorization from the X Consortium X Window System is a trademark of X
        Consortium Inc
        '''.split()
        match = matches[0]
        qtext, _itext = get_texts(match, location=query_loc, idx=idx)
        assert expected_qtext == qtext.split()
コード例 #18
0
    def test_match_in_binary_lkms_2(self):
        idx = cache.get_index()
        qloc = self.get_test_loc('positions/eeepc_acpi.ko')
        matches = idx.match(location=qloc)
        assert 1 == len(matches)
        match = matches[0]
        assert ['gpl'] == match.rule.licenses
        assert match.ispan == Span(0, 1)

        qtext, itext = get_texts(match, location=qloc, idx=idx)
        assert 'license GPL' == qtext
        assert 'License GPL' == itext
コード例 #19
0
    def test_match_return_correct_positions_with_short_index_and_queries(self):
        idx = index.LicenseIndex([Rule(_text='MIT License', licenses=['mit'])])
        matches = idx.match(query_string='MIT License')
        assert 1 == len(matches)

        assert {'_tst_11_0': {'mit': [0]}} == idx.to_dict()

        qtext, itext = get_texts(matches[0],
                                 query_string='MIT License',
                                 idx=idx)
        assert 'MIT License' == qtext
        assert 'MIT License' == itext
        assert Span(0, 1) == matches[0].qspan
        assert Span(0, 1) == matches[0].ispan

        matches = idx.match(query_string='MIT MIT License')
        assert 1 == len(matches)

        qtext, itext = get_texts(matches[0],
                                 query_string='MIT MIT License',
                                 idx=idx)
        assert 'MIT License' == qtext
        assert 'MIT License' == itext
        assert Span(1, 2) == matches[0].qspan
        assert Span(0, 1) == matches[0].ispan

        query_doc1 = 'do you think I am a mit license MIT License, yes, I think so'
        # #                                  0       1   2       3
        matches = idx.match(query_string=query_doc1)
        assert 2 == len(matches)

        qtext, itext = get_texts(matches[0], query_string=query_doc1, idx=idx)
        assert 'mit license' == qtext
        assert 'MIT License' == itext
        assert Span(0, 1) == matches[0].qspan
        assert Span(0, 1) == matches[0].ispan

        qtext, itext = get_texts(matches[1], query_string=query_doc1, idx=idx)
        assert 'MIT License' == qtext
        assert 'MIT License' == itext
        assert Span(2, 3) == matches[1].qspan
        assert Span(0, 1) == matches[1].ispan

        query_doc2 = '''do you think I am a mit license
                        MIT License
                        yes, I think so'''
        matches = idx.match(query_string=query_doc2)
        assert 2 == len(matches)

        qtext, itext = get_texts(matches[0], query_string=query_doc2, idx=idx)
        assert 'mit license' == qtext
        assert 'MIT License' == itext
        assert Span(0, 1) == matches[0].qspan
        assert Span(0, 1) == matches[0].ispan

        qtext, itext = get_texts(matches[1], query_string=query_doc2, idx=idx)
        assert 'MIT License' == qtext
        assert 'MIT License' == itext
        assert Span(2, 3) == matches[1].qspan
        assert Span(0, 1) == matches[1].ispan
コード例 #20
0
    def test_match_can_match_with_plain_rule_simple2(self):
        rule_text = u'''X11 License
        Copyright (C) 1996 X Consortium
        Permission is hereby granted, free of charge, to any person obtaining a copy
        of this software and associated documentation files (the "Software"), to deal
        in the Software without restriction, including without limitation the rights
        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
        copies of the Software, and to permit persons to whom the Software is
        furnished to do so, subject to the following conditions: The above copyright
        notice and this permission notice shall be included in all copies or
        substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS",
        WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
        TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
        NONINFRINGEMENT. IN NO EVENT SHALL THE X CONSORTIUM BE LIABLE FOR ANY CLAIM,
        DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
        OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
        OR OTHER DEALINGS IN THE SOFTWARE. Except as contained in this notice, the
        name of the X Consortium shall not be used in advertising or otherwise to
        promote the sale, use or other dealings in this Software without prior
        written authorization from the X Consortium. X Window System is a trademark
        of X Consortium, Inc.
        '''
        rule = Rule(_text=rule_text, licenses=['x-consortium'])
        idx = index.LicenseIndex([rule])

        query_loc = self.get_test_loc('detect/simple_detection/x11-xconsortium_text.txt')
        matches = idx.match(location=query_loc)
        assert 1 == len(matches)

        expected_qtext = u'''
        X11 License Copyright C 1996 X Consortium Permission is hereby granted free
        of charge to any person obtaining a copy of this software and associated
        documentation files the Software to deal in the Software without restriction
        including without limitation the rights to use copy modify merge publish
        distribute sublicense and or sell copies of the Software and to permit
        persons to whom the Software is furnished to do so subject to the following
        conditions The above copyright notice and this permission notice shall be
        included in all copies or substantial portions of the Software THE SOFTWARE
        IS PROVIDED AS IS WITHOUT WARRANTY OF ANY KIND EXPRESS OR IMPLIED INCLUDING
        BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY FITNESS FOR A PARTICULAR
        PURPOSE AND NONINFRINGEMENT IN NO EVENT SHALL THE X CONSORTIUM BE LIABLE FOR
        ANY CLAIM DAMAGES OR OTHER LIABILITY WHETHER IN AN ACTION OF CONTRACT TORT OR
        OTHERWISE ARISING FROM OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
        OR OTHER DEALINGS IN THE SOFTWARE Except as contained in this notice the name
        of the X Consortium shall not be used in advertising or otherwise to promote
        the sale use or other dealings in this Software without prior written
        authorization from the X Consortium X Window System is a trademark of X
        Consortium Inc
        '''.split()
        match = matches[0]
        qtext, _itext = get_texts(match, location=query_loc, idx=idx)
        assert expected_qtext == qtext.split()
コード例 #21
0
 def test_match_in_binary_lkms_3(self):
     idx = cache.get_index()
     qloc = self.get_test_loc('positions/wlan_xauth.ko')
     matches = idx.match(location=qloc)
     assert 1 == len(matches)
     match = matches[0]
     assert ['bsd-new', 'gpl-2.0'] == match.rule.licenses
     assert 100 == match.coverage()
     assert 20 == match.score()
     qtext, itext = get_texts(match, location=qloc, idx=idx)
     assert 'license Dual BSD GPL' == qtext
     assert 'license Dual BSD GPL' == itext
     assert Span(0, 3) == match.ispan
コード例 #22
0
 def test_match_in_binary_lkms_3(self):
     idx = cache.get_index()
     qloc = self.get_test_loc('positions/wlan_xauth.ko')
     matches = idx.match(location=qloc)
     assert 1 == len(matches)
     match = matches[0]
     assert ['bsd-new', 'gpl-2.0'] == match.rule.licenses
     assert 100 == match.coverage()
     assert 20 == match.score()
     qtext, itext = get_texts(match, location=qloc, idx=idx)
     assert 'license Dual BSD GPL' == qtext
     assert 'license Dual BSD GPL' == itext
     assert Span(0, 3) == match.ispan
コード例 #23
0
    def test_overlap_detection1(self):
        #  test this containment relationship between test and index licenses:
        #   * Index licenses:
        #   +-license 2 --------+
        #   |  +-license 1 --+  |
        #   +-------------------+
        #
        #   * License texts to detect:
        #   +- license 3 -----------+
        #   | +-license 2 --------+ |
        #   | |  +-license 1 --+  | |
        #   | +-------------------+ |
        #   +-----------------------+
        #
        #   +-license 4 --------+
        #   |  +-license 1 --+  |
        #   +-------------------+

        # setup index
        license1 = '''Redistribution and use permitted.'''

        license2 = '''Redistributions of source must retain copyright.
        Redistribution and use permitted.
        Redistributions in binary form is permitted.'''

        license3 = '''
        this license source
        Redistributions of source must retain copyright.
        Redistribution and use permitted.
        Redistributions in binary form is permitted.
        has a permitted license'''

        license4 = '''My Redistributions is permitted.
        Redistribution and use permitted.
        Use is permitted too.'''

        rule1 = Rule(_text=license1, licenses=['overlap'])
        rule2 = Rule(_text=license2, licenses=['overlap'])
        rule3 = Rule(_text=license3, licenses=['overlap'])
        rule4 = Rule(_text=license4, licenses=['overlap'])
        idx = index.LicenseIndex([rule1, rule2, rule3, rule4])

        querys = 'Redistribution and use bla permitted.'
        # test : license1 is in the index and contains no other rule. should return rule1 at exact coverage.
        matches = idx.match(query_string=querys)
        assert 1 == len(matches)
        match = matches[0]
        assert Span(0, 3) == match.qspan
        assert rule1 == match.rule
        qtext, _itext = get_texts(match, query_string=querys, idx=idx)
        assert 'Redistribution and use [bla] permitted' == qtext
コード例 #24
0
    def test_overlap_detection1(self):
        #  test this containment relationship between test and index licenses:
        #   * Index licenses:
        #   +-license 2 --------+
        #   |  +-license 1 --+  |
        #   +-------------------+
        #
        #   * License texts to detect:
        #   +- license 3 -----------+
        #   | +-license 2 --------+ |
        #   | |  +-license 1 --+  | |
        #   | +-------------------+ |
        #   +-----------------------+
        #
        #   +-license 4 --------+
        #   |  +-license 1 --+  |
        #   +-------------------+

        # setup index
        license1 = '''Redistribution and use permitted.'''

        license2 = '''Redistributions of source must retain copyright.
        Redistribution and use permitted.
        Redistributions in binary form is permitted.'''

        license3 = '''
        this license source
        Redistributions of source must retain copyright.
        Redistribution and use permitted.
        Redistributions in binary form is permitted.
        has a permitted license'''

        license4 = '''My Redistributions is permitted.
        Redistribution and use permitted.
        Use is permitted too.'''

        rule1 = Rule(_text=license1, licenses=['overlap'])
        rule2 = Rule(_text=license2, licenses=['overlap'])
        rule3 = Rule(_text=license3, licenses=['overlap'])
        rule4 = Rule(_text=license4, licenses=['overlap'])
        idx = index.LicenseIndex([rule1, rule2, rule3, rule4])

        querys = 'Redistribution and use bla permitted.'
        # test : license1 is in the index and contains no other rule. should return rule1 at exact coverage.
        matches = idx.match(query_string=querys)
        assert 1 == len(matches)
        match = matches[0]
        assert Span(0, 3) == match.qspan
        assert rule1 == match.rule
        qtext, _itext = get_texts(match, query_string=querys, idx=idx)
        assert 'Redistribution and use [bla] permitted' == qtext
コード例 #25
0
    def test_match_exact_with_junk_in_between_good_tokens(self):
        _text = u'licensed under the GPL, licensed under the GPL'
        licenses = ['tst']
        rule = models.Rule(licenses=licenses, _text=_text)

        idx = index.LicenseIndex([rule])
        querys = u'Hi licensed that under is the that GPL, licensed or under not the GPL by yes.'

        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert u'licensed [that] under [is] the [that] GPL licensed [or] under [not] the GPL' == qtext
        assert u'licensed under the GPL licensed under the GPL' == itext
コード例 #26
0
    def test_match_exact_from_file(self):
        idx = index.LicenseIndex(self.get_test_rules('index/mini'))
        query_loc = self.get_test_loc('index/queryperfect-mini')

        result = idx.match(location=query_loc)
        assert 1 == len(result)
        match = result[0]

        qtext, itext = get_texts(match, location=query_loc, idx=idx)
        assert 'Redistribution and use in source and binary forms with or without modification are permitted' == qtext
        assert 'Redistribution and use in source and binary forms with or without modification are permitted' == itext

        assert Span(0, 13) == match.qspan
        assert Span(0, 13) == match.ispan
コード例 #27
0
    def test_match_exact_from_file(self):
        idx = index.LicenseIndex(self.get_test_rules('index/mini'))
        query_loc = self.get_test_loc('index/queryperfect-mini')

        result = idx.match(location=query_loc)
        assert 1 == len(result)
        match = result[0]

        qtext, itext = get_texts(match, location=query_loc, idx=idx)
        assert 'Redistribution and use in source and binary forms with or without modification are permitted' == qtext
        assert 'Redistribution and use in source and binary forms with or without modification are permitted' == itext

        assert Span(0, 13) == match.qspan
        assert Span(0, 13) == match.ispan
コード例 #28
0
    def test_match_exact_with_junk_in_between_good_tokens(self):
        _text = u'licensed under the GPL, licensed under the GPL'
        licenses = ['tst']
        rule = models.Rule(licenses=licenses, _text=_text)

        idx = index.LicenseIndex([rule])
        querys = u'Hi licensed that under is the that GPL, licensed or under not the GPL by yes.'

        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert u'licensed [that] under [is] the [that] GPL licensed [or] under [not] the GPL' == qtext
        assert u'licensed under the GPL licensed under the GPL' == itext
コード例 #29
0
def print_matched_texts(match, location=None, query_string=None, idx=None):
    """
    Convenience function to print matched texts for tracing and debugging tests.
    """
    qtext, itext = get_texts(match,
                             location=location,
                             query_string=query_string,
                             idx=idx)
    print()
    print('Matched qtext:')
    print(qtext)
    print()
    print('Matched itext:')
    print(itext)
コード例 #30
0
    def test_match_template_with_few_tokens_around_gaps_is_wholly_seq_matched(
            self):
        # was failing when a gapped token (from a template) starts at a
        # beginning of an index doc. We may still skip that, but capture a large match anyway.

        rule_text = u'''
            Copyright {{some copyright}}
            THIS IS FROM {{THE CODEHAUS}} AND CONTRIBUTORS
            IN NO EVENT SHALL {{THE CODEHAUS}} OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE {{POSSIBILITY OF SUCH}} DAMAGE
        '''

        rule = Rule(
            _text=rule_text,
            licenses=['test'],
        )
        idx = index.LicenseIndex([rule])

        querys = u'''
            Copyright 2003 (C) James. All Rights Reserved.
            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
            IN NO EVENT SHALL THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        '''
        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        assert match_seq.MATCH_SEQ == match.matcher

        exp_qtext = u"""
            Copyright [2003] [C] [James] [All] [Rights] [Reserved]
            THIS IS FROM <THE> [CODEHAUS]
            AND CONTRIBUTORS
            IN NO EVENT SHALL <THE> [CODEHAUS] OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE [POSSIBILITY] <OF> [SUCH] DAMAGE
        """.split()

        exp_itext = u"""
            Copyright
            THIS IS FROM
            AND CONTRIBUTORS
            IN NO EVENT SHALL OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE DAMAGE
        """.split()
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert exp_qtext == qtext.split()
        assert exp_qtext == qtext.split()
        assert exp_itext == itext.split()
        assert 99 <= match.coverage()
コード例 #31
0
    def test_match_return_correct_positions_with_short_index_and_queries(self):
        idx = index.LicenseIndex([Rule(_text='MIT License', licenses=['mit'])])
        matches = idx.match(query_string='MIT License')
        assert 1 == len(matches)

        assert {'_tst_11_0': {'mit': [0]}} == idx.to_dict()

        qtext, itext = get_texts(matches[0], query_string='MIT License', idx=idx)
        assert 'MIT License' == qtext
        assert 'MIT License' == itext
        assert Span(0, 1) == matches[0].qspan
        assert Span(0, 1) == matches[0].ispan

        matches = idx.match(query_string='MIT MIT License')
        assert 1 == len(matches)

        qtext, itext = get_texts(matches[0], query_string='MIT MIT License', idx=idx)
        assert 'MIT License' == qtext
        assert 'MIT License' == itext
        assert Span(1, 2) == matches[0].qspan
        assert Span(0, 1) == matches[0].ispan

        query_doc1 = 'do you think I am a mit license MIT License, yes, I think so'
        # #                                  0       1   2       3
        matches = idx.match(query_string=query_doc1)
        assert 2 == len(matches)

        qtext, itext = get_texts(matches[0], query_string=query_doc1, idx=idx)
        assert 'mit license' == qtext
        assert 'MIT License' == itext
        assert Span(0, 1) == matches[0].qspan
        assert Span(0, 1) == matches[0].ispan

        qtext, itext = get_texts(matches[1], query_string=query_doc1, idx=idx)
        assert 'MIT License' == qtext
        assert 'MIT License' == itext
        assert Span(2, 3) == matches[1].qspan
        assert Span(0, 1) == matches[1].ispan

        query_doc2 = '''do you think I am a mit license
                        MIT License
                        yes, I think so'''
        matches = idx.match(query_string=query_doc2)
        assert 2 == len(matches)

        qtext, itext = get_texts(matches[0], query_string=query_doc2, idx=idx)
        assert 'mit license' == qtext
        assert 'MIT License' == itext
        assert Span(0, 1) == matches[0].qspan
        assert Span(0, 1) == matches[0].ispan

        qtext, itext = get_texts(matches[1], query_string=query_doc2, idx=idx)
        assert 'MIT License' == qtext
        assert 'MIT License' == itext
        assert Span(2, 3) == matches[1].qspan
        assert Span(0, 1) == matches[1].ispan
コード例 #32
0
    def test_match_return_one_match_with_correct_offsets(self):
        idx = index.LicenseIndex([Rule(_text='A one. a license two. A three.', licenses=['abc'])])

        querys = u'some junk. A one. A license two. A three.'
        #            0    1   2   3  4      5    6  7      8

        matches = idx.match(query_string=querys)
        assert 1 == len(matches)
        match = matches[0]
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert 'A one A license two A three' == qtext
        assert 'A one a license two A three' == itext

        assert Span(0, 6) == match.qspan
        assert Span(0, 6) == match.ispan
コード例 #33
0
    def test_match_return_one_match_with_correct_offsets(self):
        idx = index.LicenseIndex([Rule(_text='A one. a license two. A three.', licenses=['abc'])])

        querys = u'some junk. A one. A license two. A three.'
        #            0    1   2   3  4      5    6  7      8

        matches = idx.match(query_string=querys)
        assert 1 == len(matches)
        match = matches[0]
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert 'A one A license two A three' == qtext
        assert 'A one a license two A three' == itext

        assert Span(0, 6) == match.qspan
        assert Span(0, 6) == match.ispan
コード例 #34
0
    def test_match_works_for_apache_rule(self):
        idx = cache.get_index()
        querys = u'''I am not a license.

            The Apache Software License, Version 2.0
            http://www.apache.org/licenses/LICENSE-2.0.txt
            '''
        matches = idx.match(query_string=querys)

        assert 1 == len(matches)
        match = matches[0]
        assert 'apache-2.0_8.RULE' == match.rule.identifier
        assert match_aho.MATCH_AHO_EXACT == match.matcher

        qtext, _itext = get_texts(match, query_string=querys, idx=idx)
        assert u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt' == qtext
        assert (3, 4) == match.lines()
コード例 #35
0
    def test_match_works_for_apache_rule(self):
        idx = cache.get_index()
        querys = u'''I am not a license.

            The Apache Software License, Version 2.0
            http://www.apache.org/licenses/LICENSE-2.0.txt
            '''
        matches = idx.match(query_string=querys)

        assert 1 == len(matches)
        match = matches[0]
        assert 'apache-2.0_8.RULE' == match.rule.identifier
        assert match_aho.MATCH_AHO_EXACT == match.matcher

        qtext, _itext = get_texts(match, query_string=querys, idx=idx)
        assert u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt' == qtext
        assert (3, 4) == match.lines()
コード例 #36
0
    def closure_test_function(*args, **kwargs):
        idx = cache.get_index()
        matches = idx.match(location=test_file, min_score=min_score,
                            # if negative, do not detect negative rules when testing negative rules
                            detect_negative=detect_negative)

        if not matches:
            matches = []

        # TODO: we should expect matches properly, not with a grab bag of flat license keys
        # flattened list of all detected license keys across all matches.
        detected_licenses = functional.flatten(map(unicode, match.rule.licenses) for match in matches)
        try:
            if not detect_negative:
                # we skipped negative detection for a negative rule
                # we just want to ensure that the rule was matched proper
                assert matches and not expected_licenses and not detected_licenses
            else:
                assert expected_licenses == detected_licenses
        except:
            # On failure, we compare against more result data to get additional
            # failure details, including the test_file and full match details
            match_failure_trace = []

            if trace_text:
                for match in matches:
                    qtext, itext = get_texts(match, location=test_file, idx=idx)
                    rule_text_file = match.rule.text_file
                    rule_data_file = match.rule.data_file
                    match_failure_trace.extend(['', '',
                        '======= MATCH ====', match,
                        '======= Matched Query Text for:',
                        'file://{test_file}'.format(**locals())
                    ])
                    if test_data_file:
                        match_failure_trace.append('file://{test_data_file}'.format(**locals()))
                    match_failure_trace.append(qtext.splitlines())
                    match_failure_trace.extend(['',
                        '======= Matched Rule Text for:'
                        'file://{rule_text_file}'.format(**locals()),
                        'file://{rule_data_file}'.format(**locals()),
                        itext.splitlines(),
                    ])
            # this assert will always fail and provide a detailed failure trace
            assert expected_licenses == detected_licenses + [test_name, 'test file: file://' + test_file] + match_failure_trace
コード例 #37
0
    def test_match_template_with_few_tokens_around_gaps_is_wholly_seq_matched(self):
        # was failing when a gapped token (from a template) starts at a
        # beginning of an index doc. We may still skip that, but capture a large match anyway.

        rule_text = u'''
            Copyright {{some copyright}}
            THIS IS FROM {{THE CODEHAUS}} AND CONTRIBUTORS
            IN NO EVENT SHALL {{THE CODEHAUS}} OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE {{POSSIBILITY OF SUCH}} DAMAGE
        '''

        rule = Rule(_text=rule_text, licenses=['test'],)
        idx = index.LicenseIndex([rule])

        querys = u'''
            Copyright 2003 (C) James. All Rights Reserved.
            THIS IS FROM THE CODEHAUS AND CONTRIBUTORS
            IN NO EVENT SHALL THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        '''
        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        assert match_seq.MATCH_SEQ == match.matcher

        exp_qtext = u"""
            Copyright [2003] [C] [James] [All] [Rights] [Reserved]
            THIS IS FROM <THE> [CODEHAUS]
            AND CONTRIBUTORS
            IN NO EVENT SHALL <THE> [CODEHAUS] OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE [POSSIBILITY] <OF> [SUCH] DAMAGE
        """.split()

        exp_itext = u"""
            Copyright
            THIS IS FROM
            AND CONTRIBUTORS
            IN NO EVENT SHALL OR ITS CONTRIBUTORS BE LIABLE
            EVEN IF ADVISED OF THE DAMAGE
        """.split()
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert exp_qtext == qtext.split()
        assert exp_qtext == qtext.split()
        assert exp_itext == itext.split()
        assert 99 <= match.coverage()
コード例 #38
0
ファイル: index.py プロジェクト: yudhik11/scancode-toolkit
    def debug_matches(self, matches, message, location=None, query_string=None, with_text=False, query=None):
        if TRACE or TRACE_NEGATIVE:
            logger_debug(message + ':', len(matches))
            if query:
                # set line early to ease debugging
                match.set_lines(matches, query.line_by_pos)

            if TRACE_MATCHES or TRACE_NEGATIVE:
                map(logger_debug, matches)

            if (TRACE_MATCHES_TEXT  or TRACE_NEGATIVE) and with_text:
                logger_debug(message + ' MATCHED TEXTS')
                for m in matches:
                    logger_debug(m)
                    qt, it = match.get_texts(m, location, query_string, self)
                    print('  MATCHED QUERY TEXT:', qt)
                    print('  MATCHED RULE TEXT:', it)
                    print()
コード例 #39
0
    def test_match_exact_from_string_once(self):
        rule_text = 'Redistribution and use in source and binary forms, with or without modification, are permitted'
        idx = index.LicenseIndex([models.Rule(_text=rule_text, licenses=['bsd'])])
        querys = '''
            The
            Redistribution and use in source and binary forms, with or without modification, are permitted.

            Always'''

        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert 'Redistribution and use in source and binary forms with or without modification are permitted' == qtext
        assert 'Redistribution and use in source and binary forms with or without modification are permitted' == itext

        assert Span(0, 13) == match.qspan
        assert Span(0, 13) == match.ispan
コード例 #40
0
    def test_match_exact_from_string_once(self):
        rule_text = 'Redistribution and use in source and binary forms, with or without modification, are permitted'
        idx = index.LicenseIndex([models.Rule(_text=rule_text, licenses=['bsd'])])
        querys = '''
            The
            Redistribution and use in source and binary forms, with or without modification, are permitted.

            Always'''

        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert 'Redistribution and use in source and binary forms with or without modification are permitted' == qtext
        assert 'Redistribution and use in source and binary forms with or without modification are permitted' == itext

        assert Span(0, 13) == match.qspan
        assert Span(0, 13) == match.ispan
コード例 #41
0
    def test_match_return_correct_offsets(self):
        _text = u'A GPL. A MIT. A LGPL.'
        #         0   1  2   3  4    5
        licenses = ['test']
        rule = models.Rule(licenses=licenses, _text=_text)
        idx = index.LicenseIndex([rule])
        querys = u'some junk. A GPL. A MIT. A LGPL.'
        #             0    1  2   3  4   5  6    7

        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert 'A GPL A MIT A LGPL' == qtext
        assert 'A GPL A MIT A LGPL' == itext

        assert Span(0, 5) == match.qspan
        assert Span(0, 5) == match.ispan
コード例 #42
0
    def test_match_return_correct_offsets(self):
        _text = u'A GPL. A MIT. A LGPL.'
        #         0   1  2   3  4    5
        licenses = ['test']
        rule = models.Rule(licenses=licenses, _text=_text)
        idx = index.LicenseIndex([rule])
        querys = u'some junk. A GPL. A MIT. A LGPL.'
        #             0    1  2   3  4   5  6    7

        result = idx.match(query_string=querys)
        assert 1 == len(result)
        match = result[0]
        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert 'A GPL A MIT A LGPL' == qtext
        assert 'A GPL A MIT A LGPL' == itext

        assert Span(0, 5) == match.qspan
        assert Span(0, 5) == match.ispan
コード例 #43
0
    def test_match_can_match_with_rule_template_for_public_domain(self):
        test_text = '''
        I hereby abandon any property rights to {{SAX 2.0 (the Simple API for
        XML)}}, and release all of {{the SAX 2.0 }} source code, compiled code,
        and documentation contained in this distribution into the Public Domain.
        '''
        rule = Rule(_text=test_text, licenses=['public-domain'])
        idx = index.LicenseIndex([rule])

        querys = '''
        SAX2 is Free!
        I hereby abandon any property rights to SAX 2.0 (the Simple API for
        XML), and release all of the SAX 2.0 source code, compiled code, and
        documentation contained in this distribution into the Public Domain. SAX
        comes with NO WARRANTY or guarantee of fitness for any purpose.
        SAX2 is Free!
        '''
        matches = idx.match(query_string=querys)

        assert 1 == len(matches)
        match = matches[0]

        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        expected_qtext = u'''
        I hereby abandon any property rights to [SAX] [2] [0] <the> [Simple] [API] [for] [XML]
        <and> <release> <all> <of> <the> [SAX] [2] [0]
        source code compiled code and documentation contained in this distribution
        into the Public Domain
        '''.split()
        assert expected_qtext == qtext.split()

        expected_itext = u'''
        I hereby abandon any property rights to
        <and> <release> <all> <of>
        source code compiled code and documentation contained in this distribution
        into the Public Domain
        '''.split()
        assert expected_itext == itext.split()

        assert 84 == match.coverage()
        assert 70 == match.score()
        assert Span(0, 6) | Span(13, 26) == match.qspan
        assert Span(0, 6) | Span(11, 24) == match.ispan
コード例 #44
0
    def test_match_can_match_with_rule_template_for_public_domain(self):
        test_text = '''
        I hereby abandon any property rights to {{SAX 2.0 (the Simple API for
        XML)}}, and release all of {{the SAX 2.0 }} source code, compiled code,
        and documentation contained in this distribution into the Public Domain.
        '''
        rule = Rule(_text=test_text, licenses=['public-domain'])
        idx = index.LicenseIndex([rule])

        querys = '''
        SAX2 is Free!
        I hereby abandon any property rights to SAX 2.0 (the Simple API for
        XML), and release all of the SAX 2.0 source code, compiled code, and
        documentation contained in this distribution into the Public Domain. SAX
        comes with NO WARRANTY or guarantee of fitness for any purpose.
        SAX2 is Free!
        '''
        matches = idx.match(query_string=querys)

        assert 1 == len(matches)
        match = matches[0]

        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        expected_qtext = u'''
        I hereby abandon any property rights to [SAX] [2] [0] <the> [Simple] [API] [for] [XML]
        <and> <release> <all> <of> <the> [SAX] [2] [0]
        source code compiled code and documentation contained in this distribution
        into the Public Domain
        '''.split()
        assert expected_qtext == qtext.split()

        expected_itext = u'''
        I hereby abandon any property rights to
        <and> <release> <all> <of>
        source code compiled code and documentation contained in this distribution
        into the Public Domain
        '''.split()
        assert expected_itext == itext.split()

        assert 84 == match.coverage()
        assert 70 == match.score()
        assert Span(0, 6) | Span(13, 26) == match.qspan
        assert Span(0, 6) | Span(11, 24) == match.ispan
コード例 #45
0
    def test_fulltext_detection_works_with_partial_overlap_from_location(self):
        test_doc = self.get_test_loc('detect/templates/license3.txt')
        idx = index.LicenseIndex([Rule(text_file=test_doc, licenses=['mylicense'])])

        query_loc = self.get_test_loc('detect/templates/license4.txt')
        matches = idx.match(query_loc)

        assert 1 == len(matches)
        match = matches[0]
        assert Span(0, 41) == match.qspan
        assert Span(0, 41) == match.ispan
        assert 100 == match.coverage()
        assert 100 == match.score()
        qtext, _itext = get_texts(match, location=query_loc, idx=idx)
        expected = '''
            is free software you can redistribute it and or modify it under the terms
            of the GNU Lesser General Public License as published by the Free
            Software Foundation either version 2 1 of the License or at your option
            any later version
        '''.split()
        assert expected == qtext.split()
コード例 #46
0
    def test_fulltext_detection_works_with_partial_overlap_from_location(self):
        test_doc = self.get_test_loc('detect/templates/license3.txt')
        idx = index.LicenseIndex([Rule(text_file=test_doc, licenses=['mylicense'])])

        query_loc = self.get_test_loc('detect/templates/license4.txt')
        matches = idx.match(query_loc)

        assert 1 == len(matches)
        match = matches[0]
        assert Span(0, 41) == match.qspan
        assert Span(0, 41) == match.ispan
        assert 100 == match.coverage()
        assert 100 == match.score()
        qtext, _itext = get_texts(match, location=query_loc, idx=idx)
        expected = '''
            is free software you can redistribute it and or modify it under the terms
            of the GNU Lesser General Public License as published by the Free
            Software Foundation either version 2 1 of the License or at your option
            any later version
        '''.split()
        assert expected == qtext.split()
コード例 #47
0
    def debug_matches(self,
                      matches,
                      message,
                      location=None,
                      query_string=None,
                      with_text=False):
        if TRACE or TRACE_NEGATIVE:
            logger_debug(message + ':', len(matches))

            if TRACE_MATCHES or TRACE_NEGATIVE:
                map(logger_debug, matches)

            if (TRACE_MATCHES_TEXT or TRACE_NEGATIVE) and with_text:
                logger_debug(message + ' MATCHED TEXTS')
                for m in matches:
                    logger_debug(m)
                    qt, it = get_texts(m, location, query_string, self)
                    print('  MATCHED QUERY TEXT')
                    print(qt)
                    print('  MATCHED RULE TEXT')
                    print(it)
                    print()
コード例 #48
0
    def test_match_with_surrounding_junk_should_return_an_exact_match(self):
        tf1 = self.get_test_loc('detect/mit/mit.c')
        ftr = Rule(text_file=tf1, licenses=['mit'])
        idx = index.LicenseIndex([ftr])

        query_loc = self.get_test_loc('detect/mit/mit4.c')
        matches = idx.match(query_loc)
        assert len(matches) == 1
        match = matches[0]
        qtext, itext = get_texts(match, location=query_loc, idx=idx)
        expected_qtext = u'''
            Permission [add] [text] is hereby granted free of charge to any person
            obtaining a copy of this software and associated documentation files the
            Software to deal in the Software without restriction including without
            limitation the rights to use copy modify merge publish distribute
            sublicense and or sell copies of the Software and to permit persons to
            whom the Software is furnished to do so subject to the following
            conditions The above copyright [add] [text] notice and this permission
            notice shall be included in all copies or substantial portions of the
            Software
        '''.split()
        assert expected_qtext == qtext.split()

        expected_itext = u'''
            Permission is hereby granted free of charge to any person obtaining a
            copy of this software and associated documentation files the Software to
            deal in the Software without restriction including without limitation the
            rights to use copy modify merge publish distribute sublicense and or sell
            copies of the Software and to permit persons to whom the Software is
            furnished to do so subject to the following conditions The above
            copyright notice and this permission notice shall be included in all
            copies or substantial portions of the Software
        '''.split()
        assert expected_itext == itext.split()

        assert Span(0, 86) == match.qspan
        assert Span(0, 86) == match.ispan
        assert 95.6 == match.score()
コード例 #49
0
    def test_match_seq_are_correct_on_apache(self):
        rule_dir = self.get_test_loc('match_seq/rules')
        idx = index.LicenseIndex(load_rules(rule_dir))

        query_loc = self.get_test_loc('match_seq/query')
        matches = idx.match(location=query_loc)
        assert 1 == len(matches)
        match = matches[0]
        assert match_seq.MATCH_SEQ == match.matcher
        qtext, _itext = get_texts(match, location=query_loc, idx=idx)
        expected = u'''
        The OpenSymphony Group All rights reserved Redistribution and use in source and
        binary forms with or without modification are permitted provided that the following
        conditions are met 1 Redistributions of source code must retain the above copyright
        notice this list of conditions and the following disclaimer 2 Redistributions in
        binary form must reproduce the above copyright notice this list of conditions and the
        following disclaimer in the documentation and or other materials provided with the
        distribution 3 The end user documentation included with the redistribution if any
        must include the following acknowledgment <4> <This> <product> <includes> <software>
        <developed> <by> <the> <OpenSymphony> <Group> <http> <www> <opensymphony> <com> <5>
        Alternately this acknowledgment may appear in the software itself if and wherever
        such third party acknowledgments normally appear The names OpenSymphony and The
        OpenSymphony Group must not be used to endorse or promote products derived from this
        software without prior written permission For written permission please contact
        license opensymphony com Products derived from this software may not be called
        OpenSymphony or [OsCore] nor may OpenSymphony or [OsCore] appear in their name
        without prior written permission of the OpenSymphony Group THIS SOFTWARE IS PROVIDED
        AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO THE
        IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
        DISCLAIMED IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE
        LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES
        INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE
        DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF
        LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR
        OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE
        POSSIBILITY OF SUCH DAMAGE
        '''
        assert expected.split() == qtext.split()
コード例 #50
0
    def test_match_seq_are_correct_on_apache(self):
        rule_dir = self.get_test_loc('match_seq/rules')
        idx = index.LicenseIndex(load_rules(rule_dir))

        query_loc = self.get_test_loc('match_seq/query')
        matches = idx.match(location=query_loc)
        assert 1 == len(matches)
        match = matches[0]
        assert match_seq.MATCH_SEQ == match.matcher
        qtext, _itext = get_texts(match, location=query_loc, idx=idx)
        expected = u'''
        The OpenSymphony Group All rights reserved Redistribution and use in source and
        binary forms with or without modification are permitted provided that the following
        conditions are met 1 Redistributions of source code must retain the above copyright
        notice this list of conditions and the following disclaimer 2 Redistributions in
        binary form must reproduce the above copyright notice this list of conditions and the
        following disclaimer in the documentation and or other materials provided with the
        distribution 3 The end user documentation included with the redistribution if any
        must include the following acknowledgment <4> <This> <product> <includes> <software>
        <developed> <by> <the> <OpenSymphony> <Group> <http> <www> <opensymphony> <com> <5>
        Alternately this acknowledgment may appear in the software itself if and wherever
        such third party acknowledgments normally appear The names OpenSymphony and The
        OpenSymphony Group must not be used to endorse or promote products derived from this
        software without prior written permission For written permission please contact
        license opensymphony com Products derived from this software may not be called
        OpenSymphony or [OsCore] nor may OpenSymphony or [OsCore] appear in their name
        without prior written permission of the OpenSymphony Group THIS SOFTWARE IS PROVIDED
        AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO THE
        IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
        DISCLAIMED IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE
        LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES
        INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE
        DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF
        LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR
        OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE
        POSSIBILITY OF SUCH DAMAGE
        '''
        assert expected.split() == qtext.split()
コード例 #51
0
    def test_match_with_surrounding_junk_should_return_an_exact_match(self):
        tf1 = self.get_test_loc('detect/mit/mit.c')
        ftr = Rule(text_file=tf1, licenses=['mit'])
        idx = index.LicenseIndex([ftr])

        query_loc = self.get_test_loc('detect/mit/mit4.c')
        matches = idx.match(query_loc)
        assert len(matches) == 1
        match = matches[0]
        qtext, itext = get_texts(match, location=query_loc, idx=idx)
        expected_qtext = u'''
            Permission [add] [text] is hereby granted free of charge to any person
            obtaining a copy of this software and associated documentation files the
            Software to deal in the Software without restriction including without
            limitation the rights to use copy modify merge publish distribute
            sublicense and or sell copies of the Software and to permit persons to
            whom the Software is furnished to do so subject to the following
            conditions The above copyright [add] [text] notice and this permission
            notice shall be included in all copies or substantial portions of the
            Software
        '''.split()
        assert expected_qtext == qtext.split()

        expected_itext = u'''
            Permission is hereby granted free of charge to any person obtaining a
            copy of this software and associated documentation files the Software to
            deal in the Software without restriction including without limitation the
            rights to use copy modify merge publish distribute sublicense and or sell
            copies of the Software and to permit persons to whom the Software is
            furnished to do so subject to the following conditions The above
            copyright notice and this permission notice shall be included in all
            copies or substantial portions of the Software
        '''.split()
        assert expected_itext == itext.split()

        assert Span(0, 86) == match.qspan
        assert Span(0, 86) == match.ispan
        assert 95.6 == match.score()
コード例 #52
0
    def test_match_has_correct_line_positions_for_query_with_repeats(self):
        expected = [
            # licenses, match.lines(), qtext,
            ([u'apache-2.0'], (1, 2), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'),
            ([u'apache-2.0'], (3, 4), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'),
            ([u'apache-2.0'], (5, 6), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'),
            ([u'apache-2.0'], (7, 8), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'),
            ([u'apache-2.0'], (9, 10), u'The Apache Software License Version 2 0 http www apache org licenses LICENSE 2 0 txt'),
        ]
        test_path = 'positions/license1.txt'

        test_location = self.get_test_loc(test_path)
        idx = cache.get_index()
        matches = idx.match(test_location)
        for i, match in enumerate(matches):
            ex_lics, ex_lines, ex_qtext = expected[i]
            qtext, _itext = get_texts(match, location=test_location, idx=idx)

            try:
                assert ex_lics == match.rule.licenses
                assert ex_lines == match.lines()
                assert ex_qtext == qtext
            except AssertionError:
                assert expected[i] == (match.rule.licenses, match.lines(), qtext)
コード例 #53
0
def match_sequence(idx, candidate, query_run, start_offset=0):
    """
    Return a list of LicenseMatch by matching the `query_run` tokens sequence
    against the `idx` index for the `candidate` rule tuple (rid, rule,
    intersection).
    """
    if not candidate:
        return []

    rid, rule, _intersection = candidate
    high_postings = idx.high_postings_by_rid[rid]
    itokens = idx.tids_by_rid[rid]

    len_junk = idx.len_junk

    qbegin = query_run.start + start_offset
    qfinish = query_run.end
    qtokens = query_run.query.tokens
    query = query_run.query

    matches = []
    qstart = qbegin
    qlen = len(query_run)

    # match as long as long we find alignments and have high matchable tokens
    # this allows to find repeated instances of the same rule in the query run
    query_run_matchables = query_run.matchables

    while qstart <= qfinish:
        if not query_run_matchables:
            break
        block_matches = match_blocks(qtokens, itokens, qstart, qlen, high_postings, len_junk, query_run_matchables)
        if not block_matches:
            break
        if TRACE2:
            logger_debug('block_matches:')
            for m in block_matches:
                i, j, k = m
                print(m)
                print('qtokens:', ' '.join(idx.tokens_by_tid[t] for t in qtokens[i:i + k]))
                print('itokens:', ' '.join(idx.tokens_by_tid[t] for t in itokens[j:j + k]))

        # create one match for each matching block: this not entirely correct
        # but this will be sorted out at LicenseMatch merging and filtering time
        for qpos, ipos, mlen in block_matches:
            qspan = Span(range(qpos, qpos + mlen))
            iposses = range(ipos, ipos + mlen)
            hispan = Span(p for p in iposses if itokens[p] >= len_junk)
            ispan = Span(iposses)
            match = LicenseMatch(rule, qspan, ispan, hispan, qbegin, matcher=MATCH_SEQ, query=query)
            if TRACE2:
                qt, it = get_texts(
                    match, location=query.location, query_string=query.query_string, idx=idx)
                print('###########################')
                print(match)
                print('###########################')
                print(qt)
                print('###########################')
                print(it)
                print('###########################')

            matches.append(match)
            qstart = max([qstart, qspan.end + 1])

    if TRACE: map(logger_debug, matches)
    return matches
コード例 #54
0
    def test_match_with_template_and_multiple_rules(self):
        test_rules = self.get_test_rules('index/bsd_templates',)
        idx = index.LicenseIndex(test_rules)
        querys = u'''


Hello, what about this

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
    this list of conditions and the following disclaimer.

    * Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

    * Neither the name of nexB Inc. nor the names of its contributors may be
    used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


Goodbye
No part of match        '''
        result = idx.match(query_string=querys)

        assert 1 == len(result)
        match = result[0]
        assert match_seq.MATCH_SEQ == match.matcher

        exp_qtext = u"""
            Redistribution and use in source and binary forms with or without
            modification are permitted provided that the following conditions
            are met

            Redistributions of source code must retain the above copyright
            notice this list of conditions and the following disclaimer

            Redistributions in binary form must reproduce the above copyright
            notice this list of conditions and the following disclaimer in the
            documentation and or other materials provided with the distribution

            Neither the name of [nexB] <Inc> nor the names of its
            contributors may be used to endorse or promote products derived from
            this software without specific prior written permission

            THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
            AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES INCLUDING BUT NOT
            LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
            A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL THE COPYRIGHT
            OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL
            SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES INCLUDING BUT NOT LIMITED
            TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE DATA OR
            PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF
            LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING
            NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
            SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
        """.split()

        exp_itext = u"""
            Redistribution and use in source and binary forms with or without
            modification are permitted provided that the following conditions
            are met

            Redistributions of source code must retain the above copyright
            notice this list of conditions and the following disclaimer

            Redistributions in binary form must reproduce the above copyright
            notice this list of conditions and the following disclaimer in the
            documentation and or other materials provided with the distribution

            Neither the name of nor the names of its contributors may be
            used to endorse or promote products derived from this software
            without specific prior written permission

            THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
            AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES INCLUDING BUT NOT
            LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
            A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL THE COPYRIGHT
            OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL
            SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES INCLUDING BUT NOT LIMITED
            TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE DATA OR
            PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF
            LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING
            NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
            SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
        """.split()
#         q = Query(query_string=querys, idx=idx)

#         print('######################')
#         print('######################')
#         print('q=', querys.lower().replace('*', ' ').replace('/', ' '). split())
#         print('q2=', [None if t is None else idx.tokens_by_tid[t] for t in q.tokens_with_unknowns()])
#         print('######################')


        qtext, itext = get_texts(match, query_string=querys, idx=idx)
        assert exp_qtext == qtext.split()
        assert exp_itext == itext.split()

        assert Span(Span(1, 72) | Span(74, 212)) == match.qspan

        assert Span(0, 210) == match.ispan
        assert 100 == match.coverage()
コード例 #55
0
    def test_match_to_indexed_template_with_few_tokens_around_gaps(self):
        # Was failing when a gap in a template starts very close to the start of
        # a rule tokens seq. We may still skip that, but we capture a large
        # match anyway.

        rule = models.Rule(text_file=self.get_test_loc('index/templates/idx.txt'), licenses=['test'],)
        idx = index.LicenseIndex([rule])

        query_loc = self.get_test_loc('index/templates/query.txt')
        result = idx.match(location=query_loc)
        assert 1 == len(result)
        match = result[0]

        exp_qtext = u"""
            All Rights Reserved

            Redistribution and use of this software and associated documentation
            Software with or without modification are permitted provided that
            the following conditions are met

            1 Redistributions of source code must retain copyright statements
            and notices Redistributions must also contain a copy of this
            document

            2 Redistributions in binary form must reproduce the above copyright
            notice this list of conditions and the following disclaimer in the
            documentation and or other materials provided with the distribution

            3 The name [groovy] must not be used to endorse or promote
            products derived from this Software without prior written permission
            of <The> [Codehaus] For written permission please contact
            [info] [codehaus] [org]

            4 Products derived from this Software may not be called [groovy]
            nor may [groovy] appear in their names without prior written
            permission of <The> [Codehaus] [groovy] is a registered
            trademark of <The> [Codehaus]

            5 Due credit should be given to <The> [Codehaus]
            [http] [groovy] [codehaus] [org]


            <THIS> <SOFTWARE> <IS> <PROVIDED> <BY> <THE> [CODEHAUS] AND CONTRIBUTORS
            AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT
            LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
            A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL <THE> [CODEHAUS]
            OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT INDIRECT
            INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES INCLUDING BUT
            NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF
            USE DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
            ANY THEORY OF LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT
            INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE
            OF THIS SOFTWARE EVEN IF ADVISED OF THE [POSSIBILITY] <OF> [SUCH] DAMAGE
        """.split()

        exp_itext = u"""
            All Rights Reserved

            Redistribution and use of this software and associated documentation
            Software with or without modification are permitted provided that
            the following conditions are met

            1 Redistributions of source code must retain copyright statements
            and notices Redistributions must also contain a copy of this
            document

            2 Redistributions in binary form must reproduce the above copyright
            notice this list of conditions and the following disclaimer in the
            documentation and or other materials provided with the distribution

            3 The name must not be used to endorse or promote products
            derived from this Software without prior written permission of
            For written permission please contact

            4 Products derived from this Software may not be called nor
            may appear in their names without prior written permission of
            is a registered trademark of

            5 Due credit should be given to

            <THIS> <SOFTWARE> <IS> <PROVIDED> <BY>
            AND CONTRIBUTORS AS IS AND ANY
            EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO THE
            IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
            PURPOSE ARE DISCLAIMED IN NO EVENT SHALL OR ITS CONTRIBUTORS
            BE LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR
            CONSEQUENTIAL DAMAGES INCLUDING BUT NOT LIMITED TO PROCUREMENT OF
            SUBSTITUTE GOODS OR SERVICES LOSS OF USE DATA OR PROFITS OR BUSINESS
            INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY WHETHER
            IN CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR
            OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF
            ADVISED OF THE DAMAGE
        """.split()
        qtext, itext = get_texts(match, location=query_loc, idx=idx)
        assert exp_qtext == qtext.split()
        assert exp_itext == itext.split()
        assert match.coverage() > 97
        assert match_seq.MATCH_SEQ == match.matcher
コード例 #56
0
    def test_match_can_match_with_rule_template_with_gap_near_start_with_few_tokens_before(
            self):
        # failed when a gapped token starts at a beginning of rule with few tokens before
        test_file = self.get_test_loc('detect/templates/license7.txt')
        rule = Rule(text_file=test_file, licenses=['lic'])
        idx = index.LicenseIndex([rule])

        qloc = self.get_test_loc('detect/templates/license8.txt')
        matches = idx.match(qloc)
        assert 1 == len(matches)

        match = matches[0]
        expected_qtokens = u"""
        All Rights Reserved Redistribution and use of this software and associated
        documentation Software with or without modification are permitted provided
        that the following conditions are met

        1 Redistributions of source code must retain copyright statements and notices
        Redistributions must also contain a copy of this document

        2 Redistributions in binary form must reproduce the above copyright notice
        this list of conditions and the following disclaimer in the documentation and
        or other materials provided with the distribution

        3 The name [groovy] must not be used to endorse or promote products derived
        from this Software without prior written permission of <The> [Codehaus] For
        written permission please contact [info] [codehaus] [org]

        4 Products derived from this Software may not be called [groovy] nor may
        [groovy] appear in their names without prior written permission of <The>
        [Codehaus]

        [groovy] is a registered trademark of <The> [Codehaus]

        5 Due credit should be given to <The> [Codehaus]
        [http] [groovy] [codehaus] [org]

        <THIS> <SOFTWARE> <IS> <PROVIDED> <BY> <THE> [CODEHAUS] <AND> <CONTRIBUTORS>
        AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO
        THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
        PURPOSE ARE DISCLAIMED IN NO EVENT SHALL <THE> [CODEHAUS] OR ITS
        CONTRIBUTORS BE LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY
        OR CONSEQUENTIAL DAMAGES INCLUDING BUT NOT LIMITED TO PROCUREMENT OF
        SUBSTITUTE GOODS OR SERVICES LOSS OF USE DATA OR PROFITS OR BUSINESS
        INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY WHETHER IN
        CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR OTHERWISE ARISING
        IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY
        OF SUCH DAMAGE
        """.split()

        expected_itokens = u''' All Rights Reserved Redistribution and use of this
        software and associated documentation Software with or without modification
        are permitted provided that the following conditions are met

        1 Redistributions of source code must retain copyright statements and notices
        Redistributions must also contain a copy of this document

        2 Redistributions in binary form must reproduce the above copyright notice
        this list of conditions and the following disclaimer in the documentation and
        or other materials provided with the distribution

        3 The name must not be used to endorse or promote products derived from this
        Software without prior written permission of For written permission please
        contact

        4 Products derived from this Software may not be called nor may appear in
        their names without prior written permission of is a registered trademark of

        5 Due credit should be given to


        <THIS> <SOFTWARE> <IS> <PROVIDED> <BY>

        AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO
        THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
        PURPOSE ARE DISCLAIMED IN NO EVENT SHALL OR ITS CONTRIBUTORS BE LIABLE FOR
        ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES
        INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS
        OF USE DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY
        THEORY OF LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING
        NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE
        EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
        '''.split()

        qtext, itext = get_texts(match, location=qloc, idx=idx)
        assert expected_qtokens == qtext.split()
        assert expected_itokens == itext.split()

        assert 97.55 == match.coverage()
        assert 92.64 == match.score()
        expected = Span(2, 98) | Span(100, 125) | Span(127, 131) | Span(
            133, 139) | Span(149, 178) | Span(180, 253)
        assert expected == match.qspan
        assert Span(1, 135) | Span(141, 244) == match.ispan
コード例 #57
0
    def test_match_to_indexed_template_with_few_tokens_around_gaps(self):
        # Was failing when a gap in a template starts very close to the start of
        # a rule tokens seq. We may still skip that, but we capture a large
        # match anyway.

        rule = models.Rule(text_file=self.get_test_loc('index/templates/idx.txt'), licenses=['test'],)
        idx = index.LicenseIndex([rule])

        query_loc = self.get_test_loc('index/templates/query.txt')
        result = idx.match(location=query_loc)
        assert 1 == len(result)
        match = result[0]

        exp_qtext = u"""
            All Rights Reserved

            Redistribution and use of this software and associated documentation
            Software with or without modification are permitted provided that
            the following conditions are met

            1 Redistributions of source code must retain copyright statements
            and notices Redistributions must also contain a copy of this
            document

            2 Redistributions in binary form must reproduce the above copyright
            notice this list of conditions and the following disclaimer in the
            documentation and or other materials provided with the distribution

            3 The name [groovy] must not be used to endorse or promote
            products derived from this Software without prior written permission
            of <The> [Codehaus] For written permission please contact
            [info] [codehaus] [org]

            4 Products derived from this Software may not be called [groovy]
            nor may [groovy] appear in their names without prior written
            permission of <The> [Codehaus] [groovy] is a registered
            trademark of <The> [Codehaus]

            5 Due credit should be given to <The> [Codehaus]
            [http] [groovy] [codehaus] [org]


            <THIS> <SOFTWARE> <IS> <PROVIDED> <BY> <THE> [CODEHAUS] AND CONTRIBUTORS
            AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT
            LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
            A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL <THE> [CODEHAUS]
            OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT INDIRECT
            INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES INCLUDING BUT
            NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF
            USE DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
            ANY THEORY OF LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT
            INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE
            OF THIS SOFTWARE EVEN IF ADVISED OF THE [POSSIBILITY] <OF> [SUCH] DAMAGE
        """.split()

        exp_itext = u"""
            All Rights Reserved

            Redistribution and use of this software and associated documentation
            Software with or without modification are permitted provided that
            the following conditions are met

            1 Redistributions of source code must retain copyright statements
            and notices Redistributions must also contain a copy of this
            document

            2 Redistributions in binary form must reproduce the above copyright
            notice this list of conditions and the following disclaimer in the
            documentation and or other materials provided with the distribution

            3 The name must not be used to endorse or promote products
            derived from this Software without prior written permission of
            For written permission please contact

            4 Products derived from this Software may not be called nor
            may appear in their names without prior written permission of
            is a registered trademark of

            5 Due credit should be given to

            <THIS> <SOFTWARE> <IS> <PROVIDED> <BY>
            AND CONTRIBUTORS AS IS AND ANY
            EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO THE
            IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
            PURPOSE ARE DISCLAIMED IN NO EVENT SHALL OR ITS CONTRIBUTORS
            BE LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR
            CONSEQUENTIAL DAMAGES INCLUDING BUT NOT LIMITED TO PROCUREMENT OF
            SUBSTITUTE GOODS OR SERVICES LOSS OF USE DATA OR PROFITS OR BUSINESS
            INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY WHETHER
            IN CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR
            OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF
            ADVISED OF THE DAMAGE
        """.split()
        qtext, itext = get_texts(match, location=query_loc, idx=idx)
        assert exp_qtext == qtext.split()
        assert exp_itext == itext.split()
        assert match.coverage() > 97
        assert match_seq.MATCH_SEQ == match.matcher
コード例 #58
0
    def test_match_can_match_with_rule_template_with_gap_near_start_with_few_tokens_before(self):
        # failed when a gapped token starts at a beginning of rule with few tokens before
        test_file = self.get_test_loc('detect/templates/license7.txt')
        rule = Rule(text_file=test_file, licenses=['lic'])
        idx = index.LicenseIndex([rule])

        qloc = self.get_test_loc('detect/templates/license8.txt')
        matches = idx.match(qloc)
        assert 1 == len(matches)

        match = matches[0]
        expected_qtokens = u"""
        All Rights Reserved Redistribution and use of this software and associated
        documentation Software with or without modification are permitted provided
        that the following conditions are met

        1 Redistributions of source code must retain copyright statements and notices
        Redistributions must also contain a copy of this document

        2 Redistributions in binary form must reproduce the above copyright notice
        this list of conditions and the following disclaimer in the documentation and
        or other materials provided with the distribution

        3 The name [groovy] must not be used to endorse or promote products derived
        from this Software without prior written permission of <The> [Codehaus] For
        written permission please contact [info] [codehaus] [org]

        4 Products derived from this Software may not be called [groovy] nor may
        [groovy] appear in their names without prior written permission of <The>
        [Codehaus]

        [groovy] is a registered trademark of <The> [Codehaus]

        5 Due credit should be given to <The> [Codehaus]
        [http] [groovy] [codehaus] [org]

        <THIS> <SOFTWARE> <IS> <PROVIDED> <BY> <THE> [CODEHAUS] <AND> <CONTRIBUTORS>
        AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO
        THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
        PURPOSE ARE DISCLAIMED IN NO EVENT SHALL <THE> [CODEHAUS] OR ITS
        CONTRIBUTORS BE LIABLE FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY
        OR CONSEQUENTIAL DAMAGES INCLUDING BUT NOT LIMITED TO PROCUREMENT OF
        SUBSTITUTE GOODS OR SERVICES LOSS OF USE DATA OR PROFITS OR BUSINESS
        INTERRUPTION HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY WHETHER IN
        CONTRACT STRICT LIABILITY OR TORT INCLUDING NEGLIGENCE OR OTHERWISE ARISING
        IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY
        OF SUCH DAMAGE
        """.split()

        expected_itokens = u''' All Rights Reserved Redistribution and use of this
        software and associated documentation Software with or without modification
        are permitted provided that the following conditions are met

        1 Redistributions of source code must retain copyright statements and notices
        Redistributions must also contain a copy of this document

        2 Redistributions in binary form must reproduce the above copyright notice
        this list of conditions and the following disclaimer in the documentation and
        or other materials provided with the distribution

        3 The name must not be used to endorse or promote products derived from this
        Software without prior written permission of For written permission please
        contact

        4 Products derived from this Software may not be called nor may appear in
        their names without prior written permission of is a registered trademark of

        5 Due credit should be given to


        <THIS> <SOFTWARE> <IS> <PROVIDED> <BY>

        AS IS AND ANY EXPRESSED OR IMPLIED WARRANTIES INCLUDING BUT NOT LIMITED TO
        THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
        PURPOSE ARE DISCLAIMED IN NO EVENT SHALL OR ITS CONTRIBUTORS BE LIABLE FOR
        ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL DAMAGES
        INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS
        OF USE DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON ANY
        THEORY OF LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR TORT INCLUDING
        NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE
        EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
        '''.split()

        qtext, itext = get_texts(match, location=qloc, idx=idx)
        assert expected_qtokens == qtext.split()
        assert expected_itokens == itext.split()

        assert 97.55 == match.coverage()
        assert 92.64 == match.score()
        expected = Span(2, 98) | Span(100, 125) | Span(127, 131) | Span(133, 139) | Span(149, 178) | Span(180, 253)
        assert expected == match.qspan
        assert  Span(1, 135) | Span(141, 244) == match.ispan