Ejemplo n.º 1
0
    def test_multiple_contained_matches_are_filtered(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=5),
                          score=100)

        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl'])
        contained1 = LicenseMatch(rule=r2,
                                  query_position=analysis.Token(start=1,
                                                                end=2),
                                  score=100)

        r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl'])
        contained2 = LicenseMatch(rule=r3,
                                  query_position=analysis.Token(start=3,
                                                                end=4),
                                  score=100)

        r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl'])
        m5 = LicenseMatch(rule=r5,
                          query_position=analysis.Token(start=1, end=6),
                          score=100)

        result = detect.filter_overlapping_matches(
            [m1, contained1, contained2, m5])
        assert [m1, m5] == result
Ejemplo n.º 2
0
    def test_match_is_same(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2))
        r2 = Rule(text_file='r2', licenses=['gpl', 'apache-2.0'])
        m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=0, end=2))

        assert m1.is_same(m2)
        assert m2.is_same(m1)
Ejemplo n.º 3
0
    def test_non_contiguous_matches_are_not_filtered(self):
        r1 = Rule(licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=2))
        m2 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=4, end=6))
        m5 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=1, end=6))

        self.assertEqual([m1, m5], detect.filter_matches([m1, m2, m5]))
Ejemplo n.º 4
0
    def test_matches_with_same_span_are_kept_if_licenses_are_different(self):
        r1 = Rule(licenses=['apache-2.0'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=2))
        r2 = Rule(licenses=['apache-1.1'])
        m2 = LicenseMatch(rule=r2,
                          query_position=analysis.Token(start=0, end=2))
        m5 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=1, end=6))

        self.assertEqual([m1, m2, m5], detect.filter_matches([m1, m2, m5]))
Ejemplo n.º 5
0
    def test_single_contained_matche_is_filtered(self):
        r1 = Rule(licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=5))
        contained = LicenseMatch(rule=r1,
                                 query_position=analysis.Token(start=1, end=4))
        m5 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=1, end=6))

        test = detect.filter_matches([m1, contained, m5])
        self.assertEqual([m1, m5], test)
Ejemplo n.º 6
0
    def test_overlapping_matches_are_filtered(self):
        r1 = Rule(licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=5))
        same_span = LicenseMatch(rule=r1,
                                 query_position=analysis.Token(start=1, end=6))
        same_span_too = LicenseMatch(rule=r1,
                                     query_position=analysis.Token(start=1,
                                                                   end=6))

        test = detect.filter_matches([m1, same_span, same_span_too])
        self.assertEqual([m1, same_span], test)
Ejemplo n.º 7
0
    def test_non_contiguous_matches_are_not_filtered(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=2),
                          score=100)
        m2 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=4, end=6),
                          score=100)
        m5 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=1, end=6),
                          score=100)

        result = detect.filter_overlapping_matches([m1, m2, m5])
        assert [m1, m5] == result
Ejemplo n.º 8
0
    def test_single_contained_matche_is_filtered(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=5),
                          score=100)
        contained = LicenseMatch(rule=r1,
                                 query_position=analysis.Token(start=1, end=4),
                                 score=100)
        m5 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=1, end=6),
                          score=100)

        result = detect.filter_overlapping_matches([m1, contained, m5])
        assert [m1, m5] == result
Ejemplo n.º 9
0
    def test_matches_with_partially_overlapping_spans_are_merged_if_license_are_the_same(
            self):
        r1 = Rule(licenses=['apache-1.1'])
        r2 = Rule(licenses=['gpl', 'apache-2.0'])

        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=10))
        m2 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=1, end=6))

        m3 = LicenseMatch(rule=r2,
                          query_position=analysis.Token(start=5, end=15))

        self.assertEqual([m1, m3], detect.filter_matches([m1, m2, m3]))
Ejemplo n.º 10
0
    def test_non_contiguous_or_overlapping_contained_matches_are_filtered(
            self):
        r1 = Rule(licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=1, end=2))
        m2 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=3, end=6))
        m3 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=1, end=6))
        m4 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=7))
        m5 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=1, end=6))

        self.assertEqual([m4], detect.filter_matches([m1, m2, m3, m4, m5]))
Ejemplo n.º 11
0
    def test_matches_with_partially_overlapping_spans_are_merged_if_license_are_the_same(
            self):
        r1 = Rule(text_file='r1', licenses=['apache-1.1'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=10),
                          score=100)
        m2 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=1, end=6),
                          score=100)
        r2 = Rule(text_file='r2', licenses=['gpl', 'apache-2.0'])
        m3 = LicenseMatch(rule=r2,
                          query_position=analysis.Token(start=5, end=15),
                          score=100)

        result = detect.filter_overlapping_matches([m1, m2, m3])
        assert [m1, m3] == result
Ejemplo n.º 12
0
    def test_matches_with_same_span_are_filtered_if_licenses_are_different(
            self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=2),
                          score=100)
        r2 = Rule(text_file='r2', licenses=['apache-1.1'])
        m2 = LicenseMatch(rule=r2,
                          query_position=analysis.Token(start=0, end=2),
                          score=100)
        m5 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=1, end=6),
                          score=100)

        result = detect.filter_overlapping_matches([m1, m2, m5])
        assert [m2, m5] == result
Ejemplo n.º 13
0
    def test_contained_matches_are_filtered(self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        contained1 = LicenseMatch(rule=r1,
                                  query_position=analysis.Token(start=0,
                                                                end=5),
                                  score=100)
        same_span1 = LicenseMatch(rule=r1,
                                  query_position=analysis.Token(start=1,
                                                                end=6),
                                  score=100)
        same_span2 = LicenseMatch(rule=r1,
                                  query_position=analysis.Token(start=1,
                                                                end=6),
                                  score=100)

        result = detect.filter_overlapping_matches(
            [contained1, same_span1, same_span2])
        assert [contained1, same_span2] == result
Ejemplo n.º 14
0
    def test_match_is_not_same(self):
        r1 = Rule(text_file='r1', licenses=['apache-1.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2))
        r2 = Rule(text_file='r2', licenses=['gpl', 'apache-2.0'])
        m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=0, end=2))

        assert not m1.is_same(m2)
        assert not m2.is_same(m1)

        r3 = Rule(text_file='r3', licenses=['apache-1.0', 'gpl'])
        m3 = LicenseMatch(rule=r3, query_position=analysis.Token(start=0, end=2))

        assert m1.is_same(m3)
        assert m3.is_same(m1)

        r4 = Rule(text_file='r4', licenses=['apache-1.0', 'gpl'])
        m4 = LicenseMatch(rule=r4, query_position=analysis.Token(start=1, end=2))

        assert not m1.is_same(m4)
        assert not m4.is_same(m1)
Ejemplo n.º 15
0
    def test_match_is_not_same(self):
        r1 = Rule(licenses=['apache-1.0', 'gpl'])
        m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2))
        r2 = Rule(licenses=['gpl', 'apache-2.0'])
        m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=0, end=2))

        self.assertFalse(m1.is_same(m2))
        self.assertFalse(m2.is_same(m1))

        r3 = Rule(licenses=['apache-1.0', 'gpl'])
        m3 = LicenseMatch(rule=r3, query_position=analysis.Token(start=0, end=2))

        self.assertTrue(m1.is_same(m3))
        self.assertTrue(m3.is_same(m1))

        r4 = Rule(licenses=['apache-1.0', 'gpl'])
        m4 = LicenseMatch(rule=r4, query_position=analysis.Token(start=1, end=2))

        self.assertFalse(m1.is_same(m4))
        self.assertFalse(m4.is_same(m1))
Ejemplo n.º 16
0
    def test_non_contiguous_or_overlapping_contained_matches_touching_boundaries_are_filtered(
            self):
        r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=2),
                          score=100)

        r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl'])
        m2 = LicenseMatch(rule=r2,
                          query_position=analysis.Token(start=3, end=7),
                          score=100)

        r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl'])
        m3 = LicenseMatch(rule=r3,
                          query_position=analysis.Token(start=0, end=6),
                          score=100)

        r6 = Rule(text_file='r6', licenses=['apache-2.0', 'gpl'])
        m6 = LicenseMatch(rule=r6,
                          query_position=analysis.Token(start=1, end=7),
                          score=100)

        r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl'])
        m5 = LicenseMatch(rule=r5,
                          query_position=analysis.Token(start=1, end=6),
                          score=100)

        r4 = Rule(text_file='r4', licenses=['apache-2.0', 'gpl'])
        m4 = LicenseMatch(rule=r4,
                          query_position=analysis.Token(start=0, end=7),
                          score=100)

        result = detect.filter_overlapping_matches([m1, m2, m3, m4, m5, m6])
        assert [m4] == result
Ejemplo n.º 17
0
    def test_match_is_same(self):
        r1 = Rule(licenses=['apache-2.0', 'gpl'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=2))
        r2 = Rule(licenses=['gpl', 'apache-2.0'])
        m2 = LicenseMatch(rule=r2,
                          query_position=analysis.Token(start=0, end=2))

        self.assertTrue(m1.is_same(m2))
        self.assertTrue(m2.is_same(m1))
Ejemplo n.º 18
0
    def test_match_is_not_same(self):
        r1 = Rule(text_file='r1', licenses=['apache-1.0', 'gpl'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=2))
        r2 = Rule(text_file='r2', licenses=['gpl', 'apache-2.0'])
        m2 = LicenseMatch(rule=r2,
                          query_position=analysis.Token(start=0, end=2))

        assert not m1.is_same(m2)
        assert not m2.is_same(m1)

        r3 = Rule(text_file='r3', licenses=['apache-1.0', 'gpl'])
        m3 = LicenseMatch(rule=r3,
                          query_position=analysis.Token(start=0, end=2))

        assert m1.is_same(m3)
        assert m3.is_same(m1)

        r4 = Rule(text_file='r4', licenses=['apache-1.0', 'gpl'])
        m4 = LicenseMatch(rule=r4,
                          query_position=analysis.Token(start=1, end=2))

        assert not m1.is_same(m4)
        assert not m4.is_same(m1)
Ejemplo n.º 19
0
    def test_match_is_not_same(self):
        r1 = Rule(licenses=['apache-1.0', 'gpl'])
        m1 = LicenseMatch(rule=r1,
                          query_position=analysis.Token(start=0, end=2))
        r2 = Rule(licenses=['gpl', 'apache-2.0'])
        m2 = LicenseMatch(rule=r2,
                          query_position=analysis.Token(start=0, end=2))

        self.assertFalse(m1.is_same(m2))
        self.assertFalse(m2.is_same(m1))

        r3 = Rule(licenses=['apache-1.0', 'gpl'])
        m3 = LicenseMatch(rule=r3,
                          query_position=analysis.Token(start=0, end=2))

        self.assertTrue(m1.is_same(m3))
        self.assertTrue(m3.is_same(m1))

        r4 = Rule(licenses=['apache-1.0', 'gpl'])
        m4 = LicenseMatch(rule=r4,
                          query_position=analysis.Token(start=1, end=2))

        self.assertFalse(m1.is_same(m4))
        self.assertFalse(m4.is_same(m1))