def test_non_contiguous_or_overlapping_contained_matches_touching_boundaries_are_filtered( self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2), score=100) r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl']) m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=3, end=7), score=100) r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl']) m3 = LicenseMatch(rule=r3, query_position=analysis.Token(start=0, end=6), score=100) r6 = Rule(text_file='r6', licenses=['apache-2.0', 'gpl']) m6 = LicenseMatch(rule=r6, query_position=analysis.Token(start=1, end=7), score=100) r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl']) m5 = LicenseMatch(rule=r5, query_position=analysis.Token(start=1, end=6), score=100) r4 = Rule(text_file='r4', licenses=['apache-2.0', 'gpl']) m4 = LicenseMatch(rule=r4, query_position=analysis.Token(start=0, end=7), score=100) result = detect.filter_overlapping_matches([m1, m2, m3, m4, m5, m6]) assert [m4] == result
def test_multiple_contained_matches_are_filtered(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=5), score=100) r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl']) contained1 = LicenseMatch(rule=r2, query_position=analysis.Token(start=1, end=2), score=100) r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl']) contained2 = LicenseMatch(rule=r3, query_position=analysis.Token(start=3, end=4), score=100) r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl']) m5 = LicenseMatch(rule=r5, query_position=analysis.Token(start=1, end=6), score=100) result = detect.filter_overlapping_matches( [m1, contained1, contained2, m5]) assert [m1, m5] == result
def test_contiguous_non_overlapping_matches_are_not_filtered(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2), score=100) m2 = LicenseMatch(rule=r1, query_position=analysis.Token(start=3, end=6), score=100) m5 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) result = detect.filter_overlapping_matches([m1, m2, m5]) assert [m1, m5] == result
def test_contained_matches_are_filtered(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) contained1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=5), score=100) same_span1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) same_span2 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) result = detect.filter_overlapping_matches([contained1, same_span1, same_span2]) assert [contained1, same_span2] == result
def test_single_contained_matche_is_filtered(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=5), score=100) contained = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=4), score=100) m5 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) result = detect.filter_overlapping_matches([m1, contained, m5]) assert [m1, m5] == result
def test_matches_with_partially_overlapping_spans_are_merged_if_license_are_the_same(self): r1 = Rule(text_file='r1', licenses=['apache-1.1']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=10), score=100) m2 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) r2 = Rule(text_file='r2', licenses=['gpl', 'apache-2.0']) m3 = LicenseMatch(rule=r2, query_position=analysis.Token(start=5, end=15), score=100) result = detect.filter_overlapping_matches([m1, m2, m3]) assert [m1, m3] == result
def test_matches_with_same_span_are_filtered_if_licenses_are_different(self): r1 = Rule(text_file='r1', licenses=['apache-2.0']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2), score=100) r2 = Rule(text_file='r2', licenses=['apache-1.1']) m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=0, end=2), score=100) m5 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) result = detect.filter_overlapping_matches([m1, m2, m5]) assert [m2, m5] == result
def test_non_contiguous_matches_are_not_filtered(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2), score=100) m2 = LicenseMatch(rule=r1, query_position=analysis.Token(start=4, end=6), score=100) m5 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) result = detect.filter_overlapping_matches([m1, m2, m5]) assert [m1, m5] == result
def test_multiple_contained_matches_are_filtered(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=5), score=100) r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl']) contained1 = LicenseMatch(rule=r2, query_position=analysis.Token(start=1, end=2), score=100) r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl']) contained2 = LicenseMatch(rule=r3, query_position=analysis.Token(start=3, end=4), score=100) r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl']) m5 = LicenseMatch(rule=r5, query_position=analysis.Token(start=1, end=6), score=100) result = detect.filter_overlapping_matches([m1, contained1, contained2, m5]) assert [m1, m5] == result
def test_matches_with_partially_overlapping_spans_are_merged_if_license_are_the_same( self): r1 = Rule(text_file='r1', licenses=['apache-1.1']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=10), score=100) m2 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) r2 = Rule(text_file='r2', licenses=['gpl', 'apache-2.0']) m3 = LicenseMatch(rule=r2, query_position=analysis.Token(start=5, end=15), score=100) result = detect.filter_overlapping_matches([m1, m2, m3]) assert [m1, m3] == result
def test_matches_with_same_span_are_filtered_if_licenses_are_different( self): r1 = Rule(text_file='r1', licenses=['apache-2.0']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2), score=100) r2 = Rule(text_file='r2', licenses=['apache-1.1']) m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=0, end=2), score=100) m5 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) result = detect.filter_overlapping_matches([m1, m2, m5]) assert [m2, m5] == result
def test_contained_matches_are_filtered(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) contained1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=5), score=100) same_span1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) same_span2 = LicenseMatch(rule=r1, query_position=analysis.Token(start=1, end=6), score=100) result = detect.filter_overlapping_matches( [contained1, same_span1, same_span2]) assert [contained1, same_span2] == result
def test_non_contiguous_or_overlapping_contained_matches_touching_boundaries_are_filtered(self): r1 = Rule(text_file='r1', licenses=['apache-2.0', 'gpl']) m1 = LicenseMatch(rule=r1, query_position=analysis.Token(start=0, end=2), score=100) r2 = Rule(text_file='r2', licenses=['apache-2.0', 'gpl']) m2 = LicenseMatch(rule=r2, query_position=analysis.Token(start=3, end=7), score=100) r3 = Rule(text_file='r3', licenses=['apache-2.0', 'gpl']) m3 = LicenseMatch(rule=r3, query_position=analysis.Token(start=0, end=6), score=100) r6 = Rule(text_file='r6', licenses=['apache-2.0', 'gpl']) m6 = LicenseMatch(rule=r6, query_position=analysis.Token(start=1, end=7), score=100) r5 = Rule(text_file='r5', licenses=['apache-2.0', 'gpl']) m5 = LicenseMatch(rule=r5, query_position=analysis.Token(start=1, end=6), score=100) r4 = Rule(text_file='r4', licenses=['apache-2.0', 'gpl']) m4 = LicenseMatch(rule=r4, query_position=analysis.Token(start=0, end=7), score=100) result = detect.filter_overlapping_matches([m1, m2, m3, m4, m5, m6]) assert [m4] == result