def test_hsp_overlap_size(self): overlap_size = hmm_detection.hsp_overlap_size first = FakeHSPHit("A", "A", 50, 60, 0., None) second = FakeHSPHit("B", "B", 70, 100, 0., None) # no overlap assert overlap_size(first, second) == 0 first.hit_end = 70 # still no overlap, end isn't inclusive assert overlap_size(first, second) == 0 # a mix of second starting inside first for i in range(1, 30): first.hit_end += 1 assert overlap_size(first, second) == i # second wholly contained first.hit_end = 110 assert overlap_size(first, second) == 30 # first inside second first.hit_start = 75 assert overlap_size(first, second) == 25 # first inside second, but direction reversed first.hit_end = 50 with self.assertRaises(AssertionError): overlap_size(first, second)
def test_filter(self): # fake HSPs all in one CDS with overlap > 20 and query_ids from the same equivalence group # not overlapping by > 20 first = FakeHSPHit("AMP-binding", "A", 50, 90, 0.1, None) second = FakeHSPHit("A-OX", "A", 70, 100, 0.5, None) new, by_id = hmm_detection.filter_results([first, second], {"A": [first, second]}, self.filter_file, self.signature_names) assert new == [first, second] assert by_id == {"A": [first, second]} # overlapping, in same group first.hit_end = 91 assert hmm_detection.hsp_overlap_size(first, second) == 21 new, by_id = hmm_detection.filter_results([first, second], {"A": [first, second]}, self.filter_file, self.signature_names) assert new == [second] assert by_id == {"A": [second]} # overlapping, not in same group second.query_id = "none" new, by_id = hmm_detection.filter_results([first, second], {"A": [first, second]}, self.filter_file, self.signature_names) assert new == [first, second] assert by_id == {"A": [first, second]} # not in the same CDS, but int he same group second.hit_id = "B" second.query_id = "A-OX" new, by_id = hmm_detection.filter_results([first, second], {"A": [first], "B": [second]}, self.filter_file, self.signature_names) assert new == [first, second] assert by_id == {"A": [first], "B": [second]}