class MutationFinderTests(TestCase): _single_letter_aa_codes = [aa[0] for aa in amino_acid_three_to_one_letter_map] _triple_letter_aa_codes = [aa[1] for aa in amino_acid_three_to_one_letter_map] def setUp(self): """ Initalize some objects for use in the tests """ self.me = MutationFinder(regular_expressions=regular_expressions) def test_init(self): """MF: __init__ returns without error """ me = MutationFinder(regular_expressions=[]) me = MutationFinder(regular_expressions=regular_expressions) def test_call_no_mutations(self): """MF: extraction functions with no extraction-worthy data """ self.assertEqual(self.me(''),{}) self.assertEqual(self.me('There is no mutation data here.'),{}) self.assertEqual(self.me('T64 is almost a valid mutation.'),{}) self.assertEqual(self.me('So is 42S.'),{}) def test_call_single_mutation(self): """MF:extraction functions when one mutation is present """ expected = {PointMutation(42,'S','T'):[(0,4)]} self.assertEqual(self.me('S42T'),expected) expected = {PointMutation(42,'S','T'):[(4,8)]} self.assertEqual(self.me('The S42T mutation was made.'),expected) def test_call_multiple_mutations(self): """MF:extraction functions when more than one mutation is present """ expected = {PointMutation(42,'S','T'):[(0,4)],\ PointMutation(36,'W','Y'):[(9,13)]} self.assertEqual(self.me('S42T and W36Y'),expected) expected = {PointMutation(42,'S','T'):[(0,8)],\ PointMutation(36,'W','Y'):[(13,21)]} self.assertEqual(self.me('Ser42Thr and Trp36Tyr'),expected) def test_call_multiple_mutations_w_positive_lookahead(self): """MF:extraction functions when > 1 mutation are look-ahead is req'd """ expected = {PointMutation(42,'S','T'):[(0,4)],\ PointMutation(36,'W','Y'):[(5,9)]} self.assertEqual(self.me('S42T W36Y'),expected) expected = {PointMutation(42,'S','T'):[(0,8)],\ PointMutation(36,'W','Y'):[(9,17)]} self.assertEqual(self.me('Ser42Thr Trp36Tyr'),expected) def test_call_spans_tallied(self): """MF:spans are tallied in call """ expected = {PointMutation(42,'S','T'):[(0,4)],\ PointMutation(36,'W','Y'):[(9,13)]} self.assertEqual(self.me('S42T and W36Y'),expected) expected = {PointMutation(42,'S','T'):[(0,4)],\ PointMutation(36,'W','Y'):[(6,10),(16,20)]} self.assertEqual(self.me('S42T, W36Y, and W36Y'),expected) expected = {PointMutation(42,'S','T'):[(0,4)],\ PointMutation(36,'W','Y'):[(6,10),(26,30),(12,20)]} self.assertEqual(self.me('S42T, W36Y, Trp36Tyr, and W36Y'),expected) def test_call_spans_calculated_correctly_for_different_matches(self): """MF:spans are correctly calculated for various mention formats""" expected = {PointMutation(42,'A','G'):[(4,8)]} self.assertEqual(self.me('The A42G mutation was made.'),expected) expected = {PointMutation(42,'A','G'):[(4,15)]} self.assertEqual(self.me('The Ala42-->Gly mutation was made.'),expected) expected = {PointMutation(42,'A','G'):[(4,12)]} self.assertEqual(self.me('The Ala42Gly mutation was made.'),expected) expected = {PointMutation(42,'A','G'):[(4,20)]} self.assertEqual(self.me('The Ala42 to Glycine mutation.'),expected) def test_regex_case_insensitive_flag_one_letter(self): """MF:one-letter abbreviations case-sensitive""" self.assertEqual(self.me._regular_expressions[0].match('a64t'),None) self.assertEqual(self.me._regular_expressions[0].match('A64t'),None) self.assertEqual(self.me._regular_expressions[0].match('a64T'),None) self.assertEqual(self.me._regular_expressions[0].match('A64T')\ .group(),'A64T') def test_regex_case_insensitive_flag_three_letter(self): """MF:toggle regex case insensitive functions for non-built-in regexs""" # IGNORECASE flag on self.assertEqual(self.me._regular_expressions[1].match('ala64gly')\ .group(),'ala64gly') self.assertEqual(self.me._regular_expressions[1].match('Ala64Gly')\ .group(),'Ala64Gly') self.assertEqual(self.me._regular_expressions[1].match('aLa64gLy')\ .group(),'aLa64gLy') self.assertEqual(self.me._regular_expressions[1].match('ALA64GLY')\ .group(),'ALA64GLY') def test_one_letter_match(self): """MF:regex identifies one-letter codes""" self.assertEqual(self.me._regular_expressions[0].match('A64G')\ .group(),'A64G') def test_one_letter_match_loc_restriction(self): """MF:single-letter regex ignored positions < 10""" self.assertEqual(self.me._regular_expressions[0].match('A64G')\ .group(),'A64G') self.assertEqual(self.me._regular_expressions[0].match('E2F'),None) self.assertEqual(self.me._regular_expressions[0].match('H9A'),None) def test_three_letter_match(self): """MF:regex identifies three-letter codes""" self.assertEqual(self.me._regular_expressions[1].match('Ala6Gly')\ .group(),'Ala6Gly') self.assertEqual(self.me._regular_expressions[1].match('Ala64Gly')\ .group(),'Ala64Gly') def test_varied_digit_length(self): """MF:regex identifies mutations w/ different location lengths""" self.assertEqual(self.me._regular_expressions[0].match('A64G')\ .group(),'A64G') self.assertEqual(self.me._regular_expressions[1].match('Ala64Gly')\ .group(),'Ala64Gly') self.assertEqual(self.me._regular_expressions[0].match('A864G')\ .group(),'A864G') self.assertEqual(self.me._regular_expressions[1].match('Ala864Gly')\ .group(),'Ala864Gly') self.assertEqual(self.me._regular_expressions[0].match('A8864G')\ .group(),'A8864G') self.assertEqual(self.me._regular_expressions[1].match('Ala8864Gly')\ .group(),'Ala8864Gly') def test_post_process(self): """MF:post processing steps function as expected """ mutations = {PointMutation(460,'W','W'):[(0,5)]} expected = {} self.me._post_process(mutations) self.assertEqual(mutations,expected) mutations = {PointMutation(460,'W','W'):[(0,5)],\ PointMutation(460,'W','G'):[(6,11)]} expected = {PointMutation(460,'W','G'):[(6,11)]} self.me._post_process(mutations) self.assertEqual(mutations,expected) def test_unacceptable_general_word_boundaries(self): """MF:regexs disallow unacceptable word boundaries""" starts = list('abcdefghijklmnopqrstuvwxyz0123456789~@#$%^&*_+=])') ends = list('abcdefghijklmnopqrstuvwxyz0123456789~@#$%^&*_+=([') mutation_texts = ['A64G','Ala64Gly','Ala64-->Gly'] for mutation_text in mutation_texts: for start in starts: for end in ends: text = ''.join([start,mutation_text,end]) self.assertEqual(self.me(text),{}) def test_acceptable_general_word_boundaries(self): """MF:regexs allow acceptable word boundaries""" ends = ['.',',','',' ','\t','\n',')',']','"',"'",':',';','?','!','/','-'] starts = [' ','\t','\n','"',"'",'(','[','','/',',','-'] mutation_texts = ['A64G','Ala64Gly','Ala64-->Gly'] for mutation_text in mutation_texts: for start in starts: for end in ends: text = ''.join([start,mutation_text,end]) expected = {PointMutation(64,'A','G'):\ [(text.index('A'),text.index('A')+len(mutation_text))]} self.assertEqual(self.me(text),expected) def test_mix_one_three_letter_match(self): """MF:regex ignores one/three letter code mixes""" self.assertEqual(self.me('Ala64G'),{}) self.assertEqual(self.me('A64Gly'),{}) def test_full_name_matches(self): """MF:regex identifies full name mentions of amino acids """ expected = {PointMutation(64,'A','G'):[(0,15)]} self.assertEqual(self.me('alanine64-->Gly'),expected) expected = {PointMutation(64,'A','G'):[(0,15)]} self.assertEqual(self.me('Ala64-->glycine'),expected) def test_single_residue_fails_non_xNy(self): """MF:single residue matches fail in non-xNy format """ self.assertEqual(self.me('A64-->glycine'),{}) self.assertEqual(self.me('Ala64-->G'),{}) def test_text_based_matches_w_N_m(self): """MF:regex identifies wN m text descriptions """ texts = ['Ala64 to Gly','Alanine64 to Glycine',\ 'Ala64 to glycine','alanine64 to Gly'] for text in texts: self.assertEqual(self.me(text),\ {PointMutation(64,'A','G'):[(0,len(text))]}) texts = ['The Ala64 to Gly substitution',\ 'The Ala64 to glycine substitution',\ 'The Ala64 to Gly substitution'] for text in texts: self.assertEqual(self.me(text),\ {PointMutation(64,'A','G'):[(4,len(text)-13)]}) def test_text_match_spacing(self): """MF:mis-spaced text matches fail """ self.assertEqual(self.me('TheAla40toGlymutation'),{}) self.assertEqual(self.me('arg40tomet'),{}) self.assertEqual(self.me('ala25tohis'),{})
def setUp(self): """ Initalize some objects for use in the tests """ self.me = MutationFinder(regular_expressions=regular_expressions)
class MutationFinderTests(TestCase): _single_letter_aa_codes = [aa[0] for aa in amino_acid_three_to_one_letter_map] _triple_letter_aa_codes = [aa[1] for aa in amino_acid_three_to_one_letter_map] def setUp(self): """ Initalize some objects for use in the tests """ self.me = MutationFinder(regular_expressions=regular_expressions) def test_init(self): """MF: __init__ returns without error """ me = MutationFinder(regular_expressions=[]) me = MutationFinder(regular_expressions=regular_expressions) def test_call_no_mutations(self): """MF: extraction functions with no extraction-worthy data """ self.assertEqual(self.me(""), {}) self.assertEqual(self.me("There is no mutation data here."), {}) self.assertEqual(self.me("T64 is almost a valid mutation."), {}) self.assertEqual(self.me("So is 42S."), {}) def test_call_single_mutation(self): """MF:extraction functions when one mutation is present """ expected = {PointMutation(42, "S", "T"): [(0, 4)]} self.assertEqual(self.me("S42T"), expected) expected = {PointMutation(42, "S", "T"): [(4, 8)]} self.assertEqual(self.me("The S42T mutation was made."), expected) def test_call_multiple_mutations(self): """MF:extraction functions when more than one mutation is present """ expected = {PointMutation(42, "S", "T"): [(0, 4)], PointMutation(36, "W", "Y"): [(9, 13)]} self.assertEqual(self.me("S42T and W36Y"), expected) expected = {PointMutation(42, "S", "T"): [(0, 8)], PointMutation(36, "W", "Y"): [(13, 21)]} self.assertEqual(self.me("Ser42Thr and Trp36Tyr"), expected) def test_call_multiple_mutations_w_positive_lookahead(self): """MF:extraction functions when > 1 mutation are look-ahead is req'd """ expected = {PointMutation(42, "S", "T"): [(0, 4)], PointMutation(36, "W", "Y"): [(5, 9)]} self.assertEqual(self.me("S42T W36Y"), expected) expected = {PointMutation(42, "S", "T"): [(0, 8)], PointMutation(36, "W", "Y"): [(9, 17)]} self.assertEqual(self.me("Ser42Thr Trp36Tyr"), expected) def test_call_spans_tallied(self): """MF:spans are tallied in call """ expected = {PointMutation(42, "S", "T"): [(0, 4)], PointMutation(36, "W", "Y"): [(9, 13)]} self.assertEqual(self.me("S42T and W36Y"), expected) expected = {PointMutation(42, "S", "T"): [(0, 4)], PointMutation(36, "W", "Y"): [(6, 10), (16, 20)]} self.assertEqual(self.me("S42T, W36Y, and W36Y"), expected) expected = {PointMutation(42, "S", "T"): [(0, 4)], PointMutation(36, "W", "Y"): [(6, 10), (26, 30), (12, 20)]} self.assertEqual(self.me("S42T, W36Y, Trp36Tyr, and W36Y"), expected) def test_call_spans_calculated_correctly_for_different_matches(self): """MF:spans are correctly calculated for various mention formats""" expected = {PointMutation(42, "A", "G"): [(4, 8)]} self.assertEqual(self.me("The A42G mutation was made."), expected) expected = {PointMutation(42, "A", "G"): [(4, 15)]} self.assertEqual(self.me("The Ala42-->Gly mutation was made."), expected) expected = {PointMutation(42, "A", "G"): [(4, 12)]} self.assertEqual(self.me("The Ala42Gly mutation was made."), expected) expected = {PointMutation(42, "A", "G"): [(4, 20)]} self.assertEqual(self.me("The Ala42 to Glycine mutation."), expected) def test_regex_case_insensitive_flag_one_letter(self): """MF:one-letter abbreviations case-sensitive""" self.assertEqual(self.me._regular_expressions[0].match("a64t"), None) self.assertEqual(self.me._regular_expressions[0].match("A64t"), None) self.assertEqual(self.me._regular_expressions[0].match("a64T"), None) self.assertEqual(self.me._regular_expressions[0].match("A64T").group(), "A64T") def test_regex_case_insensitive_flag_three_letter(self): """MF:toggle regex case insensitive functions for non-built-in regexs""" # IGNORECASE flag on self.assertEqual(self.me._regular_expressions[1].match("ala64gly").group(), "ala64gly") self.assertEqual(self.me._regular_expressions[1].match("Ala64Gly").group(), "Ala64Gly") self.assertEqual(self.me._regular_expressions[1].match("aLa64gLy").group(), "aLa64gLy") self.assertEqual(self.me._regular_expressions[1].match("ALA64GLY").group(), "ALA64GLY") def test_one_letter_match(self): """MF:regex identifies one-letter codes""" self.assertEqual(self.me._regular_expressions[0].match("A64G").group(), "A64G") def test_one_letter_match_loc_restriction(self): """MF:single-letter regex ignored positions < 10""" self.assertEqual(self.me._regular_expressions[0].match("A64G").group(), "A64G") self.assertEqual(self.me._regular_expressions[0].match("E2F"), None) self.assertEqual(self.me._regular_expressions[0].match("H9A"), None) def test_three_letter_match(self): """MF:regex identifies three-letter codes""" self.assertEqual(self.me._regular_expressions[1].match("Ala6Gly").group(), "Ala6Gly") self.assertEqual(self.me._regular_expressions[1].match("Ala64Gly").group(), "Ala64Gly") def test_varied_digit_length(self): """MF:regex identifies mutations w/ different location lengths""" self.assertEqual(self.me._regular_expressions[0].match("A64G").group(), "A64G") self.assertEqual(self.me._regular_expressions[1].match("Ala64Gly").group(), "Ala64Gly") self.assertEqual(self.me._regular_expressions[0].match("A864G").group(), "A864G") self.assertEqual(self.me._regular_expressions[1].match("Ala864Gly").group(), "Ala864Gly") self.assertEqual(self.me._regular_expressions[0].match("A8864G").group(), "A8864G") self.assertEqual(self.me._regular_expressions[1].match("Ala8864Gly").group(), "Ala8864Gly") def test_post_process(self): """MF:post processing steps function as expected """ mutations = {PointMutation(460, "W", "W"): [(0, 5)]} expected = {} self.me._post_process(mutations) self.assertEqual(mutations, expected) mutations = {PointMutation(460, "W", "W"): [(0, 5)], PointMutation(460, "W", "G"): [(6, 11)]} expected = {PointMutation(460, "W", "G"): [(6, 11)]} self.me._post_process(mutations) self.assertEqual(mutations, expected) def test_unacceptable_general_word_boundaries(self): """MF:regexs disallow unacceptable word boundaries""" starts = list("abcdefghijklmnopqrstuvwxyz0123456789~@#$%^&*_+=])") ends = list("abcdefghijklmnopqrstuvwxyz0123456789~@#$%^&*_+=([") mutation_texts = ["A64G", "Ala64Gly", "Ala64-->Gly"] for mutation_text in mutation_texts: for start in starts: for end in ends: text = "".join([start, mutation_text, end]) self.assertEqual(self.me(text), {}) def test_acceptable_general_word_boundaries(self): """MF:regexs allow acceptable word boundaries""" ends = [".", ",", "", " ", "\t", "\n", ")", "]", '"', "'", ":", ";", "?", "!", "/", "-"] starts = [" ", "\t", "\n", '"', "'", "(", "[", "", "/", ",", "-"] mutation_texts = ["A64G", "Ala64Gly", "Ala64-->Gly"] for mutation_text in mutation_texts: for start in starts: for end in ends: text = "".join([start, mutation_text, end]) expected = {PointMutation(64, "A", "G"): [(text.index("A"), text.index("A") + len(mutation_text))]} self.assertEqual(self.me(text), expected) def test_mix_one_three_letter_match(self): """MF:regex ignores one/three letter code mixes""" self.assertEqual(self.me("Ala64G"), {}) self.assertEqual(self.me("A64Gly"), {}) def test_full_name_matches(self): """MF:regex identifies full name mentions of amino acids """ expected = {PointMutation(64, "A", "G"): [(0, 15)]} self.assertEqual(self.me("alanine64-->Gly"), expected) expected = {PointMutation(64, "A", "G"): [(0, 15)]} self.assertEqual(self.me("Ala64-->glycine"), expected) def test_single_residue_fails_non_xNy(self): """MF:single residue matches fail in non-xNy format """ self.assertEqual(self.me("A64-->glycine"), {}) self.assertEqual(self.me("Ala64-->G"), {}) def test_text_based_matches_w_N_m(self): """MF:regex identifies wN m text descriptions """ texts = ["Ala64 to Gly", "Alanine64 to Glycine", "Ala64 to glycine", "alanine64 to Gly"] for text in texts: self.assertEqual(self.me(text), {PointMutation(64, "A", "G"): [(0, len(text))]}) texts = ["The Ala64 to Gly substitution", "The Ala64 to glycine substitution", "The Ala64 to Gly substitution"] for text in texts: self.assertEqual(self.me(text), {PointMutation(64, "A", "G"): [(4, len(text) - 13)]}) def test_text_match_spacing(self): """MF:mis-spaced text matches fail """ self.assertEqual(self.me("TheAla40toGlymutation"), {}) self.assertEqual(self.me("arg40tomet"), {}) self.assertEqual(self.me("ala25tohis"), {})