def test_LicenseMatch_score_0(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0'])
        r1.relevance = 0
        r1.length = 6

        m1 = LicenseMatch(rule=r1, qspan=Span(), ispan=Span())
        assert m1.score() == 0
Example #2
0
    def test_LicenseMatch_score_100_non_contiguous(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0'])
        r1.relevance = 100
        r1.length = 42

        m1 = LicenseMatch(rule=r1, qspan=Span(0, 19) | Span(30, 51), ispan=Span(0, 41))
        assert m1.score() == 80.77
Example #3
0
    def test_merge_overlapping_matches(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0'])
        m1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2))
        m2 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))

        matches = merge_matches([m1, m2])
        assert [LicenseMatch(rule=r1, qspan=Span(0, 6), ispan=Span(0, 6))] == matches
Example #4
0
    def test_LicenseMatch_equality_2(self):
        r1 = Rule(stored_text='r1', license_expression='apache-2.0 OR gpl')
        m1_r1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2))
        r2 = Rule(stored_text='r2', license_expression='gpl OR apache-2.0')
        m2_r2 = LicenseMatch(rule=r2, qspan=Span(0, 2), ispan=Span(0, 2))

        assert r1.licensing is r2.licensing
        assert r1 != r2
        assert r1.license_expression != r2.license_expression
        assert r1.license_expression_object == r2.license_expression_object
        assert str(r1.license_expression_object.simplify()) == str(
            r2.license_expression_object.simplify())

        assert m1_r1 == m2_r2
        assert not (m1_r1 != m2_r2)

        assert r2.same_licensing(r2)
        assert m1_r1.qspan == m2_r2.qspan
        assert m1_r1.ispan == m2_r2.ispan
        r3 = Rule(stored_text='r3', license_expression='gpl OR apache-2.0')
        m3_r3 = LicenseMatch(rule=r3, qspan=Span(0, 2), ispan=Span(0, 3))

        assert m2_r2 != m3_r3

        r4 = Rule(stored_text='r3', license_expression='gpl1 OR apache-2.0')
        m4_r4 = LicenseMatch(rule=r4, qspan=Span(0, 2), ispan=Span(0, 3))

        assert m3_r3 != m4_r4
Example #5
0
    def test_merge_merges_duplicate_matches(self):
        r1 = Rule(text_file='r1', license_expression='apache-2.0')
        m1 = LicenseMatch(rule=r1, qspan=Span(0, 8), ispan=Span(0, 8))
        m2 = LicenseMatch(rule=r1, qspan=Span(0, 8), ispan=Span(0, 8))

        matches = merge_matches([m1, m2])
        assert ([m1] == matches) or ([m2] == matches)
Example #6
0
    def test_LicenseMatch_score_0_relevance(self):
        r1 = Rule(text_file='r1', license_expression='apache-2.0')
        r1.relevance = 0
        r1.length = 6

        m1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2))
        assert m1.score() == 0
Example #7
0
    def test_merge_does_not_merge_overlapping_matches_in_sequence_with_assymetric_overlap(
            self):
        r1 = Rule(text_file='r1', license_expression=u'lgpl-2.0-plus')

        # ---> merge_matches: current: LicenseMatch<'3-seq', lines=(9, 28), 'lgpl-2.0-plus_9.RULE', u'lgpl-2.0-plus', choice=False, score=87.5, qlen=126, ilen=126, hilen=20, rlen=144, qreg=(50, 200), ireg=(5, 142), qspan=Span(50, 90)|Span(92, 142)|Span(151, 182)|Span(199, 200), ispan=Span(5, 21)|Span(23, 46)|Span(48, 77)|Span(79, 93)|Span(95, 100)|Span(108, 128)|Span(130, 142), hispan=Span(10)|Span(14)|Span(18)|Span(24)|Span(27)|Span(52)|Span(57)|Span(61)|Span(65, 66)|Span(68)|Span(70)|Span(80)|Span(88)|Span(96)|Span(111)|Span(113)|Span(115)|Span(131)|Span(141)>
        # ---> merge_matches: next:    LicenseMatch<'2-aho', lines=(28, 44), 'lgpl-2.0-plus_9.RULE', u'lgpl-2.0-plus', choice=False, score=100.0, qlen=144, ilen=144, hilen=21, rlen=144, qreg=(198, 341), ireg=(0, 143), qspan=Span(198, 341), ispan=Span(0, 143), hispan=Span(1)|Span(10)|Span(14)|Span(18)|Span(24)|Span(27)|Span(52)|Span(57)|Span(61)|Span(65, 66)|Span(68)|Span(70)|Span(80)|Span(88)|Span(96)|Span(111)|Span(113)|Span(115)|Span(131)|Span(141)>
        #     ---> ###merge_matches: next overlaps in sequence current, merged as new: LicenseMatch<'3-seq 2-aho', lines=(9, 44), 'lgpl-2.0-plus_9.RULE', u'lgpl-2.0-plus', choice=False, score=100.0, qlen=268, ilen=144, hilen=21, rlen=144, qreg=(50, 341), ireg=(0, 143), qspan=Span(50, 90)|Span(92, 142)|Span(151, 182)|Span(198, 341), ispan=Span(0, 143), his

        # ---> merge_matches: current: qlen=126, ilen=126, hilen=20, rlen=144, qreg=(50, 200), ireg=(5, 142)
        # ---> merge_matches: next:    qlen=144, ilen=144, hilen=21, rlen=144, qreg=(198, 341), ireg=(0, 143)

        m1 = LicenseMatch(
            rule=r1,
            qspan=Span(50, 90) | Span(92, 142) | Span(151, 182)
            | Span(199, 200),
            ispan=Span(5, 21) | Span(23, 46) | Span(48, 77) | Span(79, 93)
            | Span(95, 100) | Span(108, 128) | Span(130, 142),
            hispan=Span(10) | Span(14) | Span(18) | Span(24) | Span(27)
            | Span(52) | Span(57) | Span(61) | Span(65, 66) | Span(68)
            | Span(70) | Span(80) | Span(88) | Span(96) | Span(111) | Span(113)
            | Span(115) | Span(131) | Span(141),
        )
        m2 = LicenseMatch(rule=r1,
                          qspan=Span(198, 341),
                          ispan=Span(0, 143),
                          hispan=Span(1) | Span(10) | Span(14) | Span(18)
                          | Span(24) | Span(27) | Span(52) | Span(57)
                          | Span(61) | Span(65, 66) | Span(68) | Span(70)
                          | Span(80) | Span(88) | Span(96) | Span(111)
                          | Span(113) | Span(115) | Span(131) | Span(141))

        matches = merge_matches([m1, m2])
        assert [m1, m2] == matches
Example #8
0
    def test_LicenseMatch_score_100_contiguous(self):
        r1 = Rule(text_file='r1', license_expression='apache-2.0')
        r1.relevance = 100
        r1.length = 42

        m1 = LicenseMatch(rule=r1, qspan=Span(0, 41), ispan=Span(0, 41))
        assert m1.score() == 100
Example #9
0
    def test_merge_does_merge_overlapping_matches_of_same_rules_if_in_sequence(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])

        m1 = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5))
        m2 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))

        assert [LicenseMatch(rule=r1, qspan=Span(0, 6), ispan=Span(0, 6))] == merge_matches([m1, m2])
    def test_match_has_correct_positions_basic(self):
        idx = cache.get_index()
        querys = u'''Licensed under the GNU General Public License (GPL).
                     Licensed under the GNU General Public License (GPL).
                     Licensed under the GNU General Public License (GPL).'''

        matches = idx.match(query_string=querys)

        rule = [r for r in idx.rules_by_rid
                if r.identifier == 'gpl_69.RULE'][0]
        m1 = LicenseMatch(rule=rule,
                          qspan=Span(0, 7),
                          ispan=Span(0, 7),
                          start_line=1,
                          end_line=1)
        m2 = LicenseMatch(rule=rule,
                          qspan=Span(8, 15),
                          ispan=Span(0, 7),
                          start_line=2,
                          end_line=2)
        m3 = LicenseMatch(rule=rule,
                          qspan=Span(16, 23),
                          ispan=Span(0, 7),
                          start_line=3,
                          end_line=3)
        assert [m1, m2, m3] == matches
Example #11
0
 def test_merge_should_not_merge_repeated_matches_out_of_sequence(self):
     rule = Rule(text_file='gpl-2.0_49.RULE', licenses=[u'gpl-2.0'])
     rule.rid = 2615
     m1 = LicenseMatch(rule=rule, matcher='chunk1', qspan=Span(0, 7), ispan=Span(0, 7))
     m2 = LicenseMatch(rule=rule, matcher='chunk2', qspan=Span(8, 15), ispan=Span(0, 7))
     m3 = LicenseMatch(rule=rule, matcher='chunk3', qspan=Span(16, 23), ispan=Span(0, 7))
     result = merge_matches([m1, m2, m3])
     assert [m1, m2, m3] == result
Example #12
0
    def test_merge_contiguous_contained_matches(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2))
        m2 = LicenseMatch(rule=r1, qspan=Span(3, 6), ispan=Span(3, 6))
        m5 = LicenseMatch(rule=r1, qspan=Span(7, 8), ispan=Span(7, 8))

        result = merge_matches([m1, m2, m5])
        assert [LicenseMatch(rule=r1, qspan=Span(0, 8), ispan=Span(0, 8))] == result
Example #13
0
    def test_merge_contiguous_touching_matches_in_sequence(self):
        r1 = Rule(_text='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2))
        m2 = LicenseMatch(rule=r1, qspan=Span(3, 6), ispan=Span(3, 6))

        result = merge_matches([m1, m2])
        match = result[0]
        assert LicenseMatch(rule=r1, qspan=Span(0, 6), ispan=Span(0, 6)) == match
Example #14
0
    def test_merge_does_not_merge_overlapping_matches_of_same_rules_if_in_not_sequence(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])

        m1 = LicenseMatch(rule=r1, qspan=Span(1, 3), ispan=Span(1, 3))
        m2 = LicenseMatch(rule=r1, qspan=Span(14, 20), ispan=Span(1, 3))

        matches = merge_matches([m1, m2])
        assert sorted([m1, m2]) == sorted(matches)
Example #15
0
    def test_merge_does_not_merge_overlapping_matches_of_different_rules_with_different_licensing(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl2'])

        m1 = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5))
        m2 = LicenseMatch(rule=r2, qspan=Span(1, 6), ispan=Span(1, 6))

        assert [m1, m2] == merge_matches([m1, m2])
Example #16
0
    def test_merge_does_merge_non_contiguous_matches_in_sequence(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2))
        m2 = LicenseMatch(rule=r1, qspan=Span(4, 6), ispan=Span(4, 6))
        m5 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))

        results = merge_matches([m1, m2, m5])
        assert [LicenseMatch(rule=r1, qspan=Span(0, 6), ispan=Span(0, 6))] == results
Example #17
0
    def test_LicenseMatch_equals(self):
        rule = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=rule, matcher='chunk1', qspan=Span(0, 7), ispan=Span(0, 7), start_line=1, end_line=1)
        m2 = LicenseMatch(rule=rule, matcher='chunk2', qspan=Span(0, 7), ispan=Span(0, 7), start_line=1, end_line=1)
        assert m1 == m2

        m3 = LicenseMatch(rule=rule, matcher='chunk3', qspan=Span(16, 23), ispan=Span(0, 7), start_line=3, end_line=3)
        assert m1 != m3
Example #18
0
    def test_LicenseMatch_score_25_with_stored_relevance(self):
        r1 = Rule(text_file='r1', license_expression='apache-2.0')
        r1.relevance = 50
        r1.length = 6

        m1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2))
        # NB we do not have a query here
        assert m1.score() == 25
Example #19
0
    def test_merge_does_not_merge_overlapping_matches_of_same_rules_if_in_sequence_with_gaps_for_long_match(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        r1.length = 20
        m1 = LicenseMatch(rule=r1, qspan=Span(1, 10), ispan=Span(1, 10))
        m2 = LicenseMatch(rule=r1, qspan=Span(14, 20), ispan=Span(14, 20))

        expected = [LicenseMatch(rule=r1, qspan=Span(1, 10) | Span(14, 20), ispan=Span(1, 10) | Span(14, 20))]
        results = merge_matches([m1, m2])
        assert expected == results
Example #20
0
    def test_merge_does_not_merge_contained_matches_of_different_rules_with_same_licensing(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl'])

        m1 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))
        m2 = LicenseMatch(rule=r2, qspan=Span(1, 6), ispan=Span(1, 6))

        matches = merge_matches([m1, m2])
        assert sorted([m1, m2]) == sorted(matches)
Example #21
0
    def test_combine_matches_with_same_rules(self):
        r1 = Rule(text_file='r1', license_expression='apache-2.0 OR gpl')

        m1 = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5))
        m2 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))

        match = m1.combine(m2)
        assert Span(0, 6) == match.qspan
        assert Span(0, 6) == match.ispan
Example #22
0
 def test_filter_matches_filters_multiple_nested_contained_matches_and_large_overlapping(self):
     r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
     m1 = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5))
     large_overlap = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))
     contained = LicenseMatch(rule=r1, qspan=Span(1, 4), ispan=Span(1, 4))
     in_contained = LicenseMatch(rule=r1, qspan=Span(2, 3), ispan=Span(2, 3))
     result, discarded = filter_contained_matches([m1, contained, in_contained, large_overlap])
     assert [m1] == result
     assert discarded
Example #23
0
    def test_files_does_filter_contained_matches_of_different_rules_with_same_licensing(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl'])

        m1 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))
        m2 = LicenseMatch(rule=r2, qspan=Span(1, 6), ispan=Span(1, 6))

        matches, discarded = filter_contained_matches([m1, m2])
        assert [m2] == matches
        assert [m1] == discarded
Example #24
0
    def test_merge_does_not_merges_matches_with_same_spans_if_rules_are_different(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2))
        m5 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))

        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl'])
        m2 = LicenseMatch(rule=r2, qspan=Span(0, 2), ispan=Span(0, 2))

        result = merge_matches([m1, m2, m5])
        assert sorted([LicenseMatch(rule=r1, qspan=Span(0, 6), ispan=Span(0, 6)), m2]) == sorted(result)
Example #25
0
 def test_merge_merges_contained_and_overlapping_match(self):
     r1 = Rule(text_file='r1', license_expression='apache-2.0 OR gpl')
     m1 = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5))
     contained = LicenseMatch(rule=r1, qspan=Span(1, 4), ispan=Span(1, 4))
     overlapping = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))
     assert contained in overlapping
     assert contained in m1
     result = merge_matches([m1, contained, overlapping])
     expected = [LicenseMatch(rule=r1, qspan=Span(0, 6), ispan=Span(0, 6))]
     assert expected == result
Example #26
0
    def test_filter_matches_filters_matches_with_medium_overlap_only_if_license_are_the_same(self):
        r1 = Rule(text_file='r1', licenses=['apache-1.1'])
        m1 = LicenseMatch(rule=r1, qspan=Span(0, 10), ispan=Span(0, 10))
        m2 = LicenseMatch(rule=r1, qspan=Span(3, 11), ispan=Span(3, 11))

        r2 = Rule(text_file='r2', licenses=['gpl', 'apache-2.0'])
        m3 = LicenseMatch(rule=r2, qspan=Span(7, 15), ispan=Span(7, 15))

        result, discarded = filter_contained_matches([m1, m2, m3])
        assert sorted([m1, m3]) == sorted(result)
        assert discarded
Example #27
0
    def test_filter_prefers_longer_overlaping_matches(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl'])

        overlap = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5))
        same_span1 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))
        same_span2 = LicenseMatch(rule=r2, qspan=Span(1, 8), ispan=Span(1, 8))

        matches, discarded = filter_contained_matches([overlap, same_span1, same_span2])
        assert [same_span2] == matches
        assert discarded
Example #28
0
    def test_combine_raise_TypeError_for_matches_of_different_rules(self):
        r1 = Rule(text_file='r1', license_expression='apache-2.0 OR gpl')
        r2 = Rule(text_file='r2', license_expression='apache-2.0 OR gpl2')

        m1 = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5))
        m2 = LicenseMatch(rule=r2, qspan=Span(1, 6), ispan=Span(1, 6))

        try:
            m1.combine(m2)
        except TypeError:
            pass
Example #29
0
    def test_filter_matches_filters_non_contiguous_or_overlapping__but_contained_matches(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, qspan=Span(1, 2), ispan=Span(1, 2))
        m2 = LicenseMatch(rule=r1, qspan=Span(3, 6), ispan=Span(3, 6))
        m3 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))
        m4 = LicenseMatch(rule=r1, qspan=Span(0, 7), ispan=Span(0, 7))
        m5 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))

        result, discarded = filter_contained_matches([m1, m2, m3, m4, m5])
        assert [m4] == result
        assert discarded
Example #30
0
    def test_filter_does_filter_overlaping_matches_with_same_licensings(self):
        r1 = Rule(text_file='r1', license_expression='apache-2.0 OR gpl')
        r2 = Rule(text_file='r2', license_expression='apache-2.0 OR gpl')

        overlap = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5))
        same_span1 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))
        same_span2 = LicenseMatch(rule=r2, qspan=Span(1, 6), ispan=Span(1, 6))

        matches, discarded = filter_contained_matches(
            [overlap, same_span1, same_span2])
        assert [overlap] == matches
        assert discarded