def test_multiple_contained_matches_are_filtered(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=5), score=100) r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl']) contained1 = LicenseMatch(rule=r2, query_position=analysis.Token(start=1, end=2), score=100) r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl']) contained2 = LicenseMatch(rule=r3, query_position=analysis.Token(start=3, end=4), score=100) r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl']) m5 = LicenseMatch(rule=r5, query_position=analysis.Token(start=1, end=6), score=100) result = detect.filter_overlapping_matches( [m1, contained1, contained2, m5]) assert [m1, m5] == result
def test_match_is_same(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2)) r2 = Rule(text_file='r2', licenses=['gpl', 'apache-2.0']) m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=0, end=2)) assert m1.is_same(m2) assert m2.is_same(m1)
def test_non_contiguous_matches_are_not_filtered(self): r1 = Rule(licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2)) m2 = LicenseMatch(rule=r1, query_position=analysis.Token(start=4, end=6)) m5 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6)) self.assertEqual([m1, m5], detect.filter_matches([m1, m2, m5]))
def test_matches_with_same_span_are_kept_if_licenses_are_different(self): r1 = Rule(licenses=['apache-2.0']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2)) r2 = Rule(licenses=['apache-1.1']) m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=0, end=2)) m5 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6)) self.assertEqual([m1, m2, m5], detect.filter_matches([m1, m2, m5]))
def test_single_contained_matche_is_filtered(self): r1 = Rule(licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=5)) contained = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=4)) m5 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6)) test = detect.filter_matches([m1, contained, m5]) self.assertEqual([m1, m5], test)
def test_overlapping_matches_are_filtered(self): r1 = Rule(licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=5)) same_span = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6)) same_span_too = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6)) test = detect.filter_matches([m1, same_span, same_span_too]) self.assertEqual([m1, same_span], test)
def test_non_contiguous_matches_are_not_filtered(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2), score=100) m2 = LicenseMatch(rule=r1, query_position=analysis.Token(start=4, end=6), score=100) m5 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) result = detect.filter_overlapping_matches([m1, m2, m5]) assert [m1, m5] == result
def test_single_contained_matche_is_filtered(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=5), score=100) contained = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=4), score=100) m5 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) result = detect.filter_overlapping_matches([m1, contained, m5]) assert [m1, m5] == result
def test_matches_with_partially_overlapping_spans_are_merged_if_license_are_the_same( self): r1 = Rule(licenses=['apache-1.1']) r2 = Rule(licenses=['gpl', 'apache-2.0']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=10)) m2 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6)) m3 = LicenseMatch(rule=r2, query_position=analysis.Token(start=5, end=15)) self.assertEqual([m1, m3], detect.filter_matches([m1, m2, m3]))
def test_non_contiguous_or_overlapping_contained_matches_are_filtered( self): r1 = Rule(licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=2)) m2 = LicenseMatch(rule=r1, query_position=analysis.Token(start=3, end=6)) m3 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6)) m4 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=7)) m5 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6)) self.assertEqual([m4], detect.filter_matches([m1, m2, m3, m4, m5]))
def test_matches_with_partially_overlapping_spans_are_merged_if_license_are_the_same( self): r1 = Rule(text_file='r1', licenses=['apache-1.1']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=10), score=100) m2 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) r2 = Rule(text_file='r2', licenses=['gpl', 'apache-2.0']) m3 = LicenseMatch(rule=r2, query_position=analysis.Token(start=5, end=15), score=100) result = detect.filter_overlapping_matches([m1, m2, m3]) assert [m1, m3] == result
def test_matches_with_same_span_are_filtered_if_licenses_are_different( self): r1 = Rule(text_file='r1', licenses=['apache-2.0']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2), score=100) r2 = Rule(text_file='r2', licenses=['apache-1.1']) m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=0, end=2), score=100) m5 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) result = detect.filter_overlapping_matches([m1, m2, m5]) assert [m2, m5] == result
def test_contained_matches_are_filtered(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) contained1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=5), score=100) same_span1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) same_span2 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) result = detect.filter_overlapping_matches( [contained1, same_span1, same_span2]) assert [contained1, same_span2] == result
def test_match_is_not_same(self): r1 = Rule(text_file='r1', licenses=['apache-1.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2)) r2 = Rule(text_file='r2', licenses=['gpl', 'apache-2.0']) m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=0, end=2)) assert not m1.is_same(m2) assert not m2.is_same(m1) r3 = Rule(text_file='r3', licenses=['apache-1.0', 'gpl']) m3 = LicenseMatch(rule=r3, query_position=analysis.Token(start=0, end=2)) assert m1.is_same(m3) assert m3.is_same(m1) r4 = Rule(text_file='r4', licenses=['apache-1.0', 'gpl']) m4 = LicenseMatch(rule=r4, query_position=analysis.Token(start=1, end=2)) assert not m1.is_same(m4) assert not m4.is_same(m1)
def test_match_is_not_same(self): r1 = Rule(licenses=['apache-1.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2)) r2 = Rule(licenses=['gpl', 'apache-2.0']) m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=0, end=2)) self.assertFalse(m1.is_same(m2)) self.assertFalse(m2.is_same(m1)) r3 = Rule(licenses=['apache-1.0', 'gpl']) m3 = LicenseMatch(rule=r3, query_position=analysis.Token(start=0, end=2)) self.assertTrue(m1.is_same(m3)) self.assertTrue(m3.is_same(m1)) r4 = Rule(licenses=['apache-1.0', 'gpl']) m4 = LicenseMatch(rule=r4, query_position=analysis.Token(start=1, end=2)) self.assertFalse(m1.is_same(m4)) self.assertFalse(m4.is_same(m1))
def test_non_contiguous_or_overlapping_contained_matches_touching_boundaries_are_filtered( self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2), score=100) r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl']) m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=3, end=7), score=100) r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl']) m3 = LicenseMatch(rule=r3, query_position=analysis.Token(start=0, end=6), score=100) r6 = Rule(text_file='r6', licenses=['apache-2.0', 'gpl']) m6 = LicenseMatch(rule=r6, query_position=analysis.Token(start=1, end=7), score=100) r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl']) m5 = LicenseMatch(rule=r5, query_position=analysis.Token(start=1, end=6), score=100) r4 = Rule(text_file='r4', licenses=['apache-2.0', 'gpl']) m4 = LicenseMatch(rule=r4, query_position=analysis.Token(start=0, end=7), score=100) result = detect.filter_overlapping_matches([m1, m2, m3, m4, m5, m6]) assert [m4] == result
def test_match_is_same(self): r1 = Rule(licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2)) r2 = Rule(licenses=['gpl', 'apache-2.0']) m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=0, end=2)) self.assertTrue(m1.is_same(m2)) self.assertTrue(m2.is_same(m1))