def test_filter(self): # fake HSPs all in one CDS with overlap > 20 and query_ids from the same equivalence group # not overlapping by > 20 first = FakeHSPHit("AMP-binding", "A", 50, 90, 0.1, None) second = FakeHSPHit("A-OX", "A", 70, 100, 0.5, None) new, by_id = hmm_detection.filter_results([first, second], {"A": [first, second]}, self.filter_file, self.signature_names) assert new == [first, second] assert by_id == {"A": [first, second]} # overlapping, in same group first.hit_end = 91 assert hmm_detection.hsp_overlap_size(first, second) == 21 new, by_id = hmm_detection.filter_results([first, second], {"A": [first, second]}, self.filter_file, self.signature_names) assert new == [second] assert by_id == {"A": [second]} # overlapping, not in same group second.query_id = "none" new, by_id = hmm_detection.filter_results([first, second], {"A": [first, second]}, self.filter_file, self.signature_names) assert new == [first, second] assert by_id == {"A": [first, second]} # not in the same CDS, but int he same group second.hit_id = "B" second.query_id = "A-OX" new, by_id = hmm_detection.filter_results([first, second], {"A": [first], "B": [second]}, self.filter_file, self.signature_names) assert new == [first, second] assert by_id == {"A": [first], "B": [second]}
def test_filter_multiple(self): # all in one CDS no overlap and the same query_ids -> cull all but the best score # not overlapping, not same query_id first = FakeHSPHit("AMP-binding", "A", 50, 60, 0.1, None) second = FakeHSPHit("A-OX", "A", 70, 100, 0.5, None) both = [first, second] by_id = {"A": [first, second]} new, by_id = hmm_detection.filter_result_multiple( list(both), dict(by_id)) assert new == [first, second] assert by_id == {"A": [first, second]} # not overlapping, same query_id first.query_id = "A-OX" new, by_id = hmm_detection.filter_result_multiple( list(both), dict(by_id)) assert new == [second] assert by_id == {"A": [second]} # not in same CDS, same query_id second.hit_id = "B" by_id = {"A": [first], "B": [second]} new, by_id = hmm_detection.filter_result_multiple( list(both), dict(by_id)) assert new == [first, second] assert by_id == {"A": [first], "B": [second]}