def test_find_optional_simple(self): sequence = 'MYKITTYYYY' pattern = pattern_search.Pattern('T-Y(0,1).') assert pattern.find(sequence) == 4 pattern = pattern_search.Pattern('K-Y(0,1)-I.') assert pattern.find(sequence) == 2
def test_find_all_start(self): sequence = 'TAAATAAAATA' pattern = pattern_search.Pattern('<T.') assert len(pattern.find_all(sequence)) == 1 pattern = pattern_search.Pattern('<T-A(0,3).') assert len(pattern.find_all(sequence)) == 4
def test_find_all_end(self): sequence = 'TAAATAAAATA' pattern = pattern_search.Pattern('A>.') assert len(pattern.find_all(sequence)) == 1 pattern = pattern_search.Pattern('A(0,3)-T-A>.') assert len(pattern.find_all('TAAATAAAATA')) == 4
def test_find_range_any(self): sequence = 'MYKITTYYYY' pattern = pattern_search.Pattern('x(2,3).') assert pattern.find(sequence) == 0 pattern = pattern_search.Pattern('K-x(0,1)-I.') assert pattern.find(sequence) == 2
def test_find_any_repeats(self): sequence = 'MAGICHAT' pattern = pattern_search.Pattern('M-x-x-x-C.') assert pattern.find(sequence) == 0 pattern = pattern_search.Pattern('M-x(3)-C.') assert pattern.find(sequence) == 0
def test_find_all_basic_types(self): sequence = 'MAGICHAT' pattern = pattern_search.Pattern('x(3).') assert len(pattern.find_all(sequence)) == 6 pattern = pattern_search.Pattern('[MH]-[AGI](0,3).') assert len(pattern.find_all(sequence)) == 6 pattern = pattern_search.Pattern('{AG}(1,3).') assert len(pattern.find_all(sequence)) == 8
def test_find_negated(self): sequence = 'MAGICHAT' pattern = pattern_search.Pattern('{A}.') assert pattern.find(sequence) == 0 assert pattern.find('A') == -1 assert pattern.find('') == -1 pattern = pattern_search.Pattern('{MAGIC}.') assert pattern.find(sequence) == 5 assert pattern.find('MAGIC') == -1 assert pattern.find('GAMMA') == -1
def test_find_range_simple(self): sequence = 'MYKITTYYYY' pattern = pattern_search.Pattern('Y(1,2).') assert pattern.find(sequence) == 1 pattern = pattern_search.Pattern('Y(2,4).') assert pattern.find(sequence) == 6 pattern = pattern_search.Pattern('T(2,3)-Y.') assert pattern.find(sequence) == 4
def test_optional_cterminus_repeat(self): pattern = pattern_search.Pattern('A-[T>](1,2).') assert pattern.find('MAGICHAT') == 6 hits = pattern.find_all('MAGICHAT') assert len(hits) == 1 assert hits[0].start == 6 and hits[0].end == 8 assert "MAGICHAT"[hits[0].start:hits[0].end] == "AT" pattern = pattern_search.Pattern('[T>](1,2).') assert pattern.find('MAGICHAT') == 7 assert len(pattern.find_all('MAGICHAT')) == 1
def test_find_any_combos(self): sequence = 'MAGICHAT' pattern = pattern_search.Pattern('x-A.') assert pattern.find('A') == -1 assert pattern.find('') == -1 assert pattern.find('AC') == -1 assert pattern.find(sequence) == 0 # three-letter combinations: pattern = pattern_search.Pattern('x-A-x.') assert pattern.find(sequence) == 0 assert pattern.find('A') == -1
def test_find_multiple(self): sequence = 'MAGICHAT' # simplest but inefficient: one acceptable amino acid pattern = pattern_search.Pattern('[A].') assert pattern.find(sequence) == 1 assert pattern.find('M') == -1 assert pattern.find('') == -1 # actual use case: multiple acceptable amino acids pattern = pattern_search.Pattern('[AC].') # This finds an A... assert pattern.find(sequence) == 1 # ...or a C assert pattern.find('CAT') == 0 assert pattern.find('KITTY') == -1
def test_find_single_then_any(self): sequence = 'MAGICHAT' pattern = pattern_search.Pattern('M-x.') assert pattern.find('M') == -1 assert pattern.find('') == -1 assert pattern.find('AM') == -1 assert pattern.find(sequence) == 0
def test_find(self): sequence = 'MAGICHAT' pattern = pattern_search.Pattern('Y.') assert pattern.find(sequence) == -1 pattern = pattern_search.Pattern('A.') assert pattern.find(sequence) == 1 pattern = pattern_search.Pattern('A-G.') assert pattern.find(sequence) == 1 pattern = pattern_search.Pattern('A-Y.') assert pattern.find(sequence) == -1 pattern = pattern_search.Pattern('A-T.') assert pattern.find(sequence) == 6 assert pattern.find('') == -1
def test_distance(self): pattern = pattern_search.Pattern('M-A-G-I-C.') result = pattern.find_all('MAGICHAT') assert result[0].end == 5 assert len(result[0]) == 5 result = pattern.find_all('AMAGICHAT') assert result[0].end == 6 assert len(result[0]) == 5
def test_termini(self): sequence = 'MAGICHAT' pattern = pattern_search.Pattern('<M.') assert pattern.find(sequence) == 0 assert pattern.find('AMAGICHAT') == -1 pattern = pattern_search.Pattern('T>.') assert pattern.find(sequence) == 7 assert pattern.find('MAGICHATS') == -1 pattern = pattern_search.Pattern('<M-x(6)-T>.') assert pattern.find(sequence) == 0 assert pattern.find('MAGICHATS') == -1 assert pattern.find('AMAGICHAT') == -1 sequence = 'MMMCHEESE' pattern = pattern_search.Pattern('<M(3).') assert pattern.find(sequence) == 0 pattern = pattern_search.Pattern('<M(3,4).') assert pattern.find(sequence) == 0 pattern = pattern_search.Pattern('Y(2,3)>.') assert pattern.find('KITTYYY') == 4
def test_invalid_pattern(self): with self.assertRaises(ValueError): pattern_search.Pattern('AA.') with self.assertRaises(ValueError): pattern_search.Pattern('*.')
def test_multiple_start(self): sequence = 'MAGICHAT' pattern = pattern_search.Pattern('<[MA].') assert len(pattern.find_all(sequence)) == 1
def test_leading_multiple_repeats(self): sequence = 'MMAGIC' result = pattern_search.Pattern( 'M(1,2)-A(0,1).').head.match_all_possible(sequence) assert len(result) == 3
def test_invalid_optional_or_cterm(self): with self.assertRaises(ValueError): pattern_search.Pattern('A>-[T>].') with self.assertRaises(ValueError): pattern_search.Pattern('A>-C-[T>].')
def test_find_all_range_not_start(self): assert len(pattern_search.Pattern('<{G}(1,3).').find_all('MAGIC')) == 2
def test_find_all_range_not_end(self): assert len( pattern_search.Pattern('{H}(1,3)>.').find_all('MAGICHAT')) == 2
def test_negated_end(self): sequence = 'MAGICHAT' pattern = pattern_search.Pattern('{D}>.') assert len(pattern.find_all(sequence)) == 1
def test_negated_start(self): sequence = 'MAGICHAT' pattern = pattern_search.Pattern('<{N}.') assert len(pattern.find_all(sequence)) == 1
def test_find_all_range_multi_start(self): assert len( pattern_search.Pattern('<[MA](1,3).').find_all('MAGIC')) == 2
def test_find_all_range_multi_end(self): assert len( pattern_search.Pattern('[AT](1,3)>.').find_all('MAGICHAT')) == 2
def test_find_all_leading_multiple_repeats(self): sequence = 'MMAGIC' result = pattern_search.Pattern('M(1,2)-A(0,1).').find_all(sequence) assert len(result) == 5
def test_multiple_end(self): sequence = 'MAGICHAT' pattern = pattern_search.Pattern('[AT]>.') assert len(pattern.find_all(sequence)) == 1
def test_find_all_range_start(self): pattern = pattern_search.Pattern('<M(1,3).') assert len(pattern.find_all('MMAGIC')) == 2
def test_find_all_range_end(self): sequence = 'KITTYYYY' pattern = pattern_search.Pattern('Y(2,5)>.') assert len(pattern.find_all(sequence)) == 3
def test_find_all_multiple_repeats(self): sequence = 'AAATAAAA' pattern = pattern_search.Pattern('A(0,3)-T-A(0,3).') assert len(pattern.find_all(sequence)) == 16