Ejemplo n.º 1
0
 def test_filter_matches_filters_multiple_nested_contained_matches_and_large_overlapping(self):
     r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
     m1 = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5))
     large_overlap = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))
     contained = LicenseMatch(rule=r1, qspan=Span(1, 4), ispan=Span(1, 4))
     in_contained = LicenseMatch(rule=r1, qspan=Span(2, 3), ispan=Span(2, 3))
     result, discarded = filter_contained_matches([m1, contained, in_contained, large_overlap])
     assert [m1] == result
     assert discarded
Ejemplo n.º 2
0
    def test_files_does_filter_contained_matches_of_different_rules_with_same_licensing(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl'])

        m1 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))
        m2 = LicenseMatch(rule=r2, qspan=Span(1, 6), ispan=Span(1, 6))

        matches, discarded = filter_contained_matches([m1, m2])
        assert [m2] == matches
        assert [m1] == discarded
Ejemplo n.º 3
0
    def test_filter_matches_filters_matches_with_medium_overlap_only_if_license_are_the_same(self):
        r1 = Rule(text_file='r1', licenses=['apache-1.1'])
        m1 = LicenseMatch(rule=r1, qspan=Span(0, 10), ispan=Span(0, 10))
        m2 = LicenseMatch(rule=r1, qspan=Span(3, 11), ispan=Span(3, 11))

        r2 = Rule(text_file='r2', licenses=['gpl', 'apache-2.0'])
        m3 = LicenseMatch(rule=r2, qspan=Span(7, 15), ispan=Span(7, 15))

        result, discarded = filter_contained_matches([m1, m2, m3])
        assert sorted([m1, m3]) == sorted(result)
        assert discarded
Ejemplo n.º 4
0
    def test_filter_matches_filters_non_contiguous_or_overlapping__but_contained_matches(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, qspan=Span(1, 2), ispan=Span(1, 2))
        m2 = LicenseMatch(rule=r1, qspan=Span(3, 6), ispan=Span(3, 6))
        m3 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))
        m4 = LicenseMatch(rule=r1, qspan=Span(0, 7), ispan=Span(0, 7))
        m5 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))

        result, discarded = filter_contained_matches([m1, m2, m3, m4, m5])
        assert [m4] == result
        assert discarded
Ejemplo n.º 5
0
    def test_filter_prefers_longer_overlaping_matches(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl'])

        overlap = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5))
        same_span1 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))
        same_span2 = LicenseMatch(rule=r2, qspan=Span(1, 8), ispan=Span(1, 8))

        matches, discarded = filter_contained_matches([overlap, same_span1, same_span2])
        assert [same_span2] == matches
        assert discarded
Ejemplo n.º 6
0
    def test_filter_does_filter_overlaping_matches_with_same_licensings(self):
        r1 = Rule(text_file='r1', license_expression='apache-2.0 OR gpl')
        r2 = Rule(text_file='r2', license_expression='apache-2.0 OR gpl')

        overlap = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5))
        same_span1 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))
        same_span2 = LicenseMatch(rule=r2, qspan=Span(1, 6), ispan=Span(1, 6))

        matches, discarded = filter_contained_matches(
            [overlap, same_span1, same_span2])
        assert [overlap] == matches
        assert discarded
Ejemplo n.º 7
0
    def test_filter_filters_matches_with_same_spans_if_licenses_are_identical_but_rule_differ(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0'])
        m1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2))
        m5 = LicenseMatch(rule=r1, qspan=Span(1, 6), ispan=Span(1, 6))

        r2 = Rule(text_file='r2', licenses=['apache-2.0'])
        m2 = LicenseMatch(rule=r2, qspan=Span(0, 2), ispan=Span(0, 2))

        matches, discarded = filter_contained_matches([m1, m2, m5])

        assert [m5] == matches
        assert discarded
Ejemplo n.º 8
0
    def test_filter_matches_does_filter_matches_with_contained_spans_if_licenses_are_different(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0'])
        m1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2))

        r2 = Rule(text_file='r2', licenses=['apache-2.0'])
        m2 = LicenseMatch(rule=r2, qspan=Span(1, 6), ispan=Span(1, 6))

        r3 = Rule(text_file='r3', licenses=['apache-1.1'])
        m3 = LicenseMatch(rule=r3, qspan=Span(0, 2), ispan=Span(0, 2))

        matches, discarded = filter_contained_matches([m1, m2, m3])
        assert [m2] == matches
        assert discarded
Ejemplo n.º 9
0
    def test_filter_does_not_filter_multiple_contained_matches_across_rules(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5))

        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl'])
        contained1 = LicenseMatch(rule=r2, qspan=Span(1, 2), ispan=Span(1, 2))

        r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl'])
        contained2 = LicenseMatch(rule=r3, qspan=Span(3, 4), ispan=Span(3, 4))

        r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl'])
        m5 = LicenseMatch(rule=r5, qspan=Span(1, 6), ispan=Span(1, 6))

        result, _discarded = filter_contained_matches([m1, contained1, contained2, m5])
        assert [m1] == result
Ejemplo n.º 10
0
    def test_filter_multiple_contained_matches(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, qspan=Span(0, 5), ispan=Span(0, 5))

        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl'])
        contained1 = LicenseMatch(rule=r2, qspan=Span(1, 2), ispan=Span(1, 2))

        r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl'])
        contained2 = LicenseMatch(rule=r3, qspan=Span(3, 4), ispan=Span(3, 4))

        r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl'])
        m5 = LicenseMatch(rule=r5, qspan=Span(1, 6), ispan=Span(1, 6))

        matches, discarded = filter_contained_matches([m1, contained1, contained2, m5])
        assert [m1] == matches
        assert sorted([m5, contained1, contained2, ]) == sorted(discarded)
Ejemplo n.º 11
0
    def test_filter_matches_filters_non_contiguous_or_overlapping_contained_matches_with_touching_boundaries(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2))

        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl'])
        m2 = LicenseMatch(rule=r2, qspan=Span(3, 7), ispan=Span(3, 7))

        r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl'])
        m3 = LicenseMatch(rule=r3, qspan=Span(0, 6), ispan=Span(0, 6))

        r6 = Rule(text_file='r6', licenses=['apache-2.0', 'gpl'])
        m6 = LicenseMatch(rule=r6, qspan=Span(1, 7), ispan=Span(1, 7))

        r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl'])
        m5 = LicenseMatch(rule=r5, qspan=Span(1, 6), ispan=Span(1, 6))

        r4 = Rule(text_file='r4', licenses=['apache-2.0', 'gpl'])
        m4 = LicenseMatch(rule=r4, qspan=Span(0, 7), ispan=Span(0, 7))

        result, discarded = filter_contained_matches([m1, m2, m3, m4, m5, m6])
        assert [m4] == result
        assert discarded