Example #1
0
    def test_non_contiguous_or_overlapping_contained_matches_touching_boundaries_are_filtered(
            self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=2),
                          score=100)

        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl'])
        m2 = LicenseMatch(rule=r2,
                          query_position=analysis.Token(start=3, end=7),
                          score=100)

        r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl'])
        m3 = LicenseMatch(rule=r3,
                          query_position=analysis.Token(start=0, end=6),
                          score=100)

        r6 = Rule(text_file='r6', licenses=['apache-2.0', 'gpl'])
        m6 = LicenseMatch(rule=r6,
                          query_position=analysis.Token(start=1, end=7),
                          score=100)

        r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl'])
        m5 = LicenseMatch(rule=r5,
                          query_position=analysis.Token(start=1, end=6),
                          score=100)

        r4 = Rule(text_file='r4', licenses=['apache-2.0', 'gpl'])
        m4 = LicenseMatch(rule=r4,
                          query_position=analysis.Token(start=0, end=7),
                          score=100)

        result = detect.filter_overlapping_matches([m1, m2, m3, m4, m5, m6])
        assert [m4] == result
Example #2
0
    def test_multiple_contained_matches_are_filtered(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=5),
                          score=100)

        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl'])
        contained1 = LicenseMatch(rule=r2,
                                  query_position=analysis.Token(start=1,
                                                                end=2),
                                  score=100)

        r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl'])
        contained2 = LicenseMatch(rule=r3,
                                  query_position=analysis.Token(start=3,
                                                                end=4),
                                  score=100)

        r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl'])
        m5 = LicenseMatch(rule=r5,
                          query_position=analysis.Token(start=1, end=6),
                          score=100)

        result = detect.filter_overlapping_matches(
            [m1, contained1, contained2, m5])
        assert [m1, m5] == result
    def test_contiguous_non_overlapping_matches_are_not_filtered(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2), score=100)
        m2 = LicenseMatch(rule=r1, query_position=analysis.Token(start=3, end=6), score=100)
        m5 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100)

        result = detect.filter_overlapping_matches([m1, m2, m5])
        assert [m1, m5] == result
    def test_contained_matches_are_filtered(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        contained1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=5), score=100)
        same_span1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100)
        same_span2 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100)

        result = detect.filter_overlapping_matches([contained1, same_span1, same_span2])
        assert [contained1, same_span2] == result
    def test_single_contained_matche_is_filtered(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=5), score=100)
        contained = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=4), score=100)
        m5 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100)

        result = detect.filter_overlapping_matches([m1, contained, m5])
        assert [m1, m5] == result
    def test_matches_with_partially_overlapping_spans_are_merged_if_license_are_the_same(self):
        r1 = Rule(text_file='r1', licenses=['apache-1.1'])
        m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=10), score=100)
        m2 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100)
        r2 = Rule(text_file='r2', licenses=['gpl', 'apache-2.0'])
        m3 = LicenseMatch(rule=r2, query_position=analysis.Token(start=5, end=15), score=100)

        result = detect.filter_overlapping_matches([m1, m2, m3])
        assert [m1, m3] == result
    def test_matches_with_same_span_are_filtered_if_licenses_are_different(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0'])
        m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2), score=100)
        r2 = Rule(text_file='r2', licenses=['apache-1.1'])
        m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=0, end=2), score=100)
        m5 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100)

        result = detect.filter_overlapping_matches([m1, m2, m5])
        assert [m2, m5] == result
Example #8
0
    def test_single_contained_matche_is_filtered(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=5),
                          score=100)
        contained = LicenseMatch(rule=r1,
                                 query_position=analysis.Token(start=1, end=4),
                                 score=100)
        m5 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=1, end=6),
                          score=100)

        result = detect.filter_overlapping_matches([m1, contained, m5])
        assert [m1, m5] == result
Example #9
0
    def test_non_contiguous_matches_are_not_filtered(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=2),
                          score=100)
        m2 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=4, end=6),
                          score=100)
        m5 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=1, end=6),
                          score=100)

        result = detect.filter_overlapping_matches([m1, m2, m5])
        assert [m1, m5] == result
Example #10
0
    def test_multiple_contained_matches_are_filtered(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=5), score=100)

        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl'])
        contained1 = LicenseMatch(rule=r2, query_position=analysis.Token(start=1, end=2), score=100)

        r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl'])
        contained2 = LicenseMatch(rule=r3, query_position=analysis.Token(start=3, end=4), score=100)

        r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl'])
        m5 = LicenseMatch(rule=r5, query_position=analysis.Token(start=1, end=6), score=100)

        result = detect.filter_overlapping_matches([m1, contained1, contained2, m5])
        assert [m1, m5] == result
Example #11
0
    def test_matches_with_partially_overlapping_spans_are_merged_if_license_are_the_same(
            self):
        r1 = Rule(text_file='r1', licenses=['apache-1.1'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=10),
                          score=100)
        m2 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=1, end=6),
                          score=100)
        r2 = Rule(text_file='r2', licenses=['gpl', 'apache-2.0'])
        m3 = LicenseMatch(rule=r2,
                          query_position=analysis.Token(start=5, end=15),
                          score=100)

        result = detect.filter_overlapping_matches([m1, m2, m3])
        assert [m1, m3] == result
Example #12
0
    def test_matches_with_same_span_are_filtered_if_licenses_are_different(
            self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=2),
                          score=100)
        r2 = Rule(text_file='r2', licenses=['apache-1.1'])
        m2 = LicenseMatch(rule=r2,
                          query_position=analysis.Token(start=0, end=2),
                          score=100)
        m5 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=1, end=6),
                          score=100)

        result = detect.filter_overlapping_matches([m1, m2, m5])
        assert [m2, m5] == result
Example #13
0
    def test_contained_matches_are_filtered(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        contained1 = LicenseMatch(rule=r1,
                                  query_position=analysis.Token(start=0,
                                                                end=5),
                                  score=100)
        same_span1 = LicenseMatch(rule=r1,
                                  query_position=analysis.Token(start=1,
                                                                end=6),
                                  score=100)
        same_span2 = LicenseMatch(rule=r1,
                                  query_position=analysis.Token(start=1,
                                                                end=6),
                                  score=100)

        result = detect.filter_overlapping_matches(
            [contained1, same_span1, same_span2])
        assert [contained1, same_span2] == result
Example #14
0
    def test_non_contiguous_or_overlapping_contained_matches_touching_boundaries_are_filtered(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2), score=100)

        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl'])
        m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=3, end=7), score=100)

        r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl'])
        m3 = LicenseMatch(rule=r3, query_position=analysis.Token(start=0, end=6), score=100)

        r6 = Rule(text_file='r6', licenses=['apache-2.0', 'gpl'])
        m6 = LicenseMatch(rule=r6, query_position=analysis.Token(start=1, end=7), score=100)

        r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl'])
        m5 = LicenseMatch(rule=r5, query_position=analysis.Token(start=1, end=6), score=100)

        r4 = Rule(text_file='r4', licenses=['apache-2.0', 'gpl'])
        m4 = LicenseMatch(rule=r4, query_position=analysis.Token(start=0, end=7), score=100)

        result = detect.filter_overlapping_matches([m1, m2, m3, m4, m5, m6])
        assert [m4] == result