Example #1
0
class MutationFinderTests(TestCase):

    _single_letter_aa_codes = [aa[0] for aa in amino_acid_three_to_one_letter_map]
    _triple_letter_aa_codes = [aa[1] for aa in amino_acid_three_to_one_letter_map]

    def setUp(self):
        """ Initalize some objects for use in the tests """
        self.me = MutationFinder(regular_expressions=regular_expressions)

    def test_init(self):
        """MF: __init__ returns without error """
        me = MutationFinder(regular_expressions=[])
        me = MutationFinder(regular_expressions=regular_expressions)

    def test_call_no_mutations(self):
        """MF: extraction functions with no extraction-worthy data """
        self.assertEqual(self.me(''),{})
        self.assertEqual(self.me('There is no mutation data here.'),{})
        self.assertEqual(self.me('T64 is almost a valid mutation.'),{})
        self.assertEqual(self.me('So is 42S.'),{})
        
    def test_call_single_mutation(self):
        """MF:extraction functions when one mutation is present """
        expected = {PointMutation(42,'S','T'):[(0,4)]}
        self.assertEqual(self.me('S42T'),expected)
        expected = {PointMutation(42,'S','T'):[(4,8)]}
        self.assertEqual(self.me('The S42T mutation was made.'),expected)

    def test_call_multiple_mutations(self):
        """MF:extraction functions when more than one mutation is present """
        expected = {PointMutation(42,'S','T'):[(0,4)],\
                    PointMutation(36,'W','Y'):[(9,13)]}
        self.assertEqual(self.me('S42T and W36Y'),expected)
        
        expected = {PointMutation(42,'S','T'):[(0,8)],\
                    PointMutation(36,'W','Y'):[(13,21)]}
        self.assertEqual(self.me('Ser42Thr and Trp36Tyr'),expected)
 
    def test_call_multiple_mutations_w_positive_lookahead(self):
        """MF:extraction functions when > 1 mutation are look-ahead is req'd """
        expected = {PointMutation(42,'S','T'):[(0,4)],\
                    PointMutation(36,'W','Y'):[(5,9)]}
        self.assertEqual(self.me('S42T W36Y'),expected)
        
        expected = {PointMutation(42,'S','T'):[(0,8)],\
                    PointMutation(36,'W','Y'):[(9,17)]}
        self.assertEqual(self.me('Ser42Thr Trp36Tyr'),expected)
        
    def test_call_spans_tallied(self):
        """MF:spans are tallied in call """
        expected = {PointMutation(42,'S','T'):[(0,4)],\
                    PointMutation(36,'W','Y'):[(9,13)]}
        self.assertEqual(self.me('S42T and W36Y'),expected)
        expected = {PointMutation(42,'S','T'):[(0,4)],\
                    PointMutation(36,'W','Y'):[(6,10),(16,20)]}
        self.assertEqual(self.me('S42T, W36Y, and W36Y'),expected)
        expected = {PointMutation(42,'S','T'):[(0,4)],\
                    PointMutation(36,'W','Y'):[(6,10),(26,30),(12,20)]}
        self.assertEqual(self.me('S42T, W36Y, Trp36Tyr, and W36Y'),expected)

    def test_call_spans_calculated_correctly_for_different_matches(self):
        """MF:spans are correctly calculated for various mention formats"""
        expected = {PointMutation(42,'A','G'):[(4,8)]}
        self.assertEqual(self.me('The A42G mutation was made.'),expected)
        expected = {PointMutation(42,'A','G'):[(4,15)]}
        self.assertEqual(self.me('The Ala42-->Gly mutation was made.'),expected)
        expected = {PointMutation(42,'A','G'):[(4,12)]}
        self.assertEqual(self.me('The Ala42Gly mutation was made.'),expected)
        expected = {PointMutation(42,'A','G'):[(4,20)]}
        self.assertEqual(self.me('The Ala42 to Glycine mutation.'),expected)
        

    def test_regex_case_insensitive_flag_one_letter(self):
        """MF:one-letter abbreviations case-sensitive"""
        self.assertEqual(self.me._regular_expressions[0].match('a64t'),None)
        self.assertEqual(self.me._regular_expressions[0].match('A64t'),None)
        self.assertEqual(self.me._regular_expressions[0].match('a64T'),None)
        self.assertEqual(self.me._regular_expressions[0].match('A64T')\
            .group(),'A64T')

    def test_regex_case_insensitive_flag_three_letter(self):
        """MF:toggle regex case insensitive functions for non-built-in regexs"""
        # IGNORECASE flag on
        self.assertEqual(self.me._regular_expressions[1].match('ala64gly')\
            .group(),'ala64gly')
        self.assertEqual(self.me._regular_expressions[1].match('Ala64Gly')\
            .group(),'Ala64Gly')
        self.assertEqual(self.me._regular_expressions[1].match('aLa64gLy')\
            .group(),'aLa64gLy')
        self.assertEqual(self.me._regular_expressions[1].match('ALA64GLY')\
            .group(),'ALA64GLY')

    def test_one_letter_match(self):
        """MF:regex identifies one-letter codes"""
        self.assertEqual(self.me._regular_expressions[0].match('A64G')\
            .group(),'A64G')


    def test_one_letter_match_loc_restriction(self):
        """MF:single-letter regex ignored positions < 10"""
        self.assertEqual(self.me._regular_expressions[0].match('A64G')\
            .group(),'A64G')
        self.assertEqual(self.me._regular_expressions[0].match('E2F'),None)
        self.assertEqual(self.me._regular_expressions[0].match('H9A'),None)
    
    def test_three_letter_match(self):
        """MF:regex identifies three-letter codes"""
        self.assertEqual(self.me._regular_expressions[1].match('Ala6Gly')\
            .group(),'Ala6Gly')
        self.assertEqual(self.me._regular_expressions[1].match('Ala64Gly')\
            .group(),'Ala64Gly')
    
    def test_varied_digit_length(self):
        """MF:regex identifies mutations w/ different location lengths"""
        self.assertEqual(self.me._regular_expressions[0].match('A64G')\
            .group(),'A64G')
        self.assertEqual(self.me._regular_expressions[1].match('Ala64Gly')\
            .group(),'Ala64Gly')
        self.assertEqual(self.me._regular_expressions[0].match('A864G')\
            .group(),'A864G')
        self.assertEqual(self.me._regular_expressions[1].match('Ala864Gly')\
            .group(),'Ala864Gly')
        self.assertEqual(self.me._regular_expressions[0].match('A8864G')\
            .group(),'A8864G')
        self.assertEqual(self.me._regular_expressions[1].match('Ala8864Gly')\
            .group(),'Ala8864Gly')

    def test_post_process(self):
        """MF:post processing steps function as expected """
        mutations = {PointMutation(460,'W','W'):[(0,5)]}
        expected = {}
        self.me._post_process(mutations)
        self.assertEqual(mutations,expected)
        
        mutations = {PointMutation(460,'W','W'):[(0,5)],\
            PointMutation(460,'W','G'):[(6,11)]}
        expected = {PointMutation(460,'W','G'):[(6,11)]}
        self.me._post_process(mutations)
        self.assertEqual(mutations,expected)


    def test_unacceptable_general_word_boundaries(self):
        """MF:regexs disallow unacceptable word boundaries"""

        starts = list('abcdefghijklmnopqrstuvwxyz0123456789~@#$%^&*_+=])')
        ends = list('abcdefghijklmnopqrstuvwxyz0123456789~@#$%^&*_+=([')
        mutation_texts = ['A64G','Ala64Gly','Ala64-->Gly']

        for mutation_text in mutation_texts:
            for start in starts:
                for end in ends:
                    text = ''.join([start,mutation_text,end])
                    self.assertEqual(self.me(text),{})

        
    def test_acceptable_general_word_boundaries(self):
        """MF:regexs allow acceptable word boundaries"""
        ends = ['.',',','',' ','\t','\n',')',']','"',"'",':',';','?','!','/','-']
        starts = [' ','\t','\n','"',"'",'(','[','','/',',','-']
        mutation_texts = ['A64G','Ala64Gly','Ala64-->Gly']

        for mutation_text in mutation_texts:
            for start in starts:
                for end in ends:
                    text = ''.join([start,mutation_text,end])
                    expected = {PointMutation(64,'A','G'):\
                        [(text.index('A'),text.index('A')+len(mutation_text))]}
                    self.assertEqual(self.me(text),expected)

    def test_mix_one_three_letter_match(self):
        """MF:regex ignores one/three letter code mixes"""
        
        self.assertEqual(self.me('Ala64G'),{})
        self.assertEqual(self.me('A64Gly'),{})

    def test_full_name_matches(self):
        """MF:regex identifies full name mentions of amino acids """
        expected = {PointMutation(64,'A','G'):[(0,15)]}
        self.assertEqual(self.me('alanine64-->Gly'),expected)
        expected = {PointMutation(64,'A','G'):[(0,15)]}
        self.assertEqual(self.me('Ala64-->glycine'),expected)

    def test_single_residue_fails_non_xNy(self):
        """MF:single residue matches fail in non-xNy format """        
        self.assertEqual(self.me('A64-->glycine'),{})
        self.assertEqual(self.me('Ala64-->G'),{})
        
    def test_text_based_matches_w_N_m(self):
        """MF:regex identifies wN m text descriptions """
        texts = ['Ala64 to Gly','Alanine64 to Glycine',\
            'Ala64 to glycine','alanine64 to Gly']

        for text in texts:
            self.assertEqual(self.me(text),\
                {PointMutation(64,'A','G'):[(0,len(text))]})

        texts = ['The Ala64 to Gly substitution',\
                 'The Ala64 to glycine substitution',\
                 'The Ala64 to Gly substitution']
        
        for text in texts:
            self.assertEqual(self.me(text),\
                {PointMutation(64,'A','G'):[(4,len(text)-13)]})

    def test_text_match_spacing(self):
        """MF:mis-spaced text matches fail """
        self.assertEqual(self.me('TheAla40toGlymutation'),{})
        self.assertEqual(self.me('arg40tomet'),{})
        self.assertEqual(self.me('ala25tohis'),{})
Example #2
0
class MutationFinderTests(TestCase):

    _single_letter_aa_codes = [aa[0] for aa in amino_acid_three_to_one_letter_map]
    _triple_letter_aa_codes = [aa[1] for aa in amino_acid_three_to_one_letter_map]

    def setUp(self):
        """ Initalize some objects for use in the tests """
        self.me = MutationFinder(regular_expressions=regular_expressions)

    def test_init(self):
        """MF: __init__ returns without error """
        me = MutationFinder(regular_expressions=[])
        me = MutationFinder(regular_expressions=regular_expressions)

    def test_call_no_mutations(self):
        """MF: extraction functions with no extraction-worthy data """
        self.assertEqual(self.me(""), {})
        self.assertEqual(self.me("There is no mutation data here."), {})
        self.assertEqual(self.me("T64 is almost a valid mutation."), {})
        self.assertEqual(self.me("So is 42S."), {})

    def test_call_single_mutation(self):
        """MF:extraction functions when one mutation is present """
        expected = {PointMutation(42, "S", "T"): [(0, 4)]}
        self.assertEqual(self.me("S42T"), expected)
        expected = {PointMutation(42, "S", "T"): [(4, 8)]}
        self.assertEqual(self.me("The S42T mutation was made."), expected)

    def test_call_multiple_mutations(self):
        """MF:extraction functions when more than one mutation is present """
        expected = {PointMutation(42, "S", "T"): [(0, 4)], PointMutation(36, "W", "Y"): [(9, 13)]}
        self.assertEqual(self.me("S42T and W36Y"), expected)

        expected = {PointMutation(42, "S", "T"): [(0, 8)], PointMutation(36, "W", "Y"): [(13, 21)]}
        self.assertEqual(self.me("Ser42Thr and Trp36Tyr"), expected)

    def test_call_multiple_mutations_w_positive_lookahead(self):
        """MF:extraction functions when > 1 mutation are look-ahead is req'd """
        expected = {PointMutation(42, "S", "T"): [(0, 4)], PointMutation(36, "W", "Y"): [(5, 9)]}
        self.assertEqual(self.me("S42T W36Y"), expected)

        expected = {PointMutation(42, "S", "T"): [(0, 8)], PointMutation(36, "W", "Y"): [(9, 17)]}
        self.assertEqual(self.me("Ser42Thr Trp36Tyr"), expected)

    def test_call_spans_tallied(self):
        """MF:spans are tallied in call """
        expected = {PointMutation(42, "S", "T"): [(0, 4)], PointMutation(36, "W", "Y"): [(9, 13)]}
        self.assertEqual(self.me("S42T and W36Y"), expected)
        expected = {PointMutation(42, "S", "T"): [(0, 4)], PointMutation(36, "W", "Y"): [(6, 10), (16, 20)]}
        self.assertEqual(self.me("S42T, W36Y, and W36Y"), expected)
        expected = {PointMutation(42, "S", "T"): [(0, 4)], PointMutation(36, "W", "Y"): [(6, 10), (26, 30), (12, 20)]}
        self.assertEqual(self.me("S42T, W36Y, Trp36Tyr, and W36Y"), expected)

    def test_call_spans_calculated_correctly_for_different_matches(self):
        """MF:spans are correctly calculated for various mention formats"""
        expected = {PointMutation(42, "A", "G"): [(4, 8)]}
        self.assertEqual(self.me("The A42G mutation was made."), expected)
        expected = {PointMutation(42, "A", "G"): [(4, 15)]}
        self.assertEqual(self.me("The Ala42-->Gly mutation was made."), expected)
        expected = {PointMutation(42, "A", "G"): [(4, 12)]}
        self.assertEqual(self.me("The Ala42Gly mutation was made."), expected)
        expected = {PointMutation(42, "A", "G"): [(4, 20)]}
        self.assertEqual(self.me("The Ala42 to Glycine mutation."), expected)

    def test_regex_case_insensitive_flag_one_letter(self):
        """MF:one-letter abbreviations case-sensitive"""
        self.assertEqual(self.me._regular_expressions[0].match("a64t"), None)
        self.assertEqual(self.me._regular_expressions[0].match("A64t"), None)
        self.assertEqual(self.me._regular_expressions[0].match("a64T"), None)
        self.assertEqual(self.me._regular_expressions[0].match("A64T").group(), "A64T")

    def test_regex_case_insensitive_flag_three_letter(self):
        """MF:toggle regex case insensitive functions for non-built-in regexs"""
        # IGNORECASE flag on
        self.assertEqual(self.me._regular_expressions[1].match("ala64gly").group(), "ala64gly")
        self.assertEqual(self.me._regular_expressions[1].match("Ala64Gly").group(), "Ala64Gly")
        self.assertEqual(self.me._regular_expressions[1].match("aLa64gLy").group(), "aLa64gLy")
        self.assertEqual(self.me._regular_expressions[1].match("ALA64GLY").group(), "ALA64GLY")

    def test_one_letter_match(self):
        """MF:regex identifies one-letter codes"""
        self.assertEqual(self.me._regular_expressions[0].match("A64G").group(), "A64G")

    def test_one_letter_match_loc_restriction(self):
        """MF:single-letter regex ignored positions < 10"""
        self.assertEqual(self.me._regular_expressions[0].match("A64G").group(), "A64G")
        self.assertEqual(self.me._regular_expressions[0].match("E2F"), None)
        self.assertEqual(self.me._regular_expressions[0].match("H9A"), None)

    def test_three_letter_match(self):
        """MF:regex identifies three-letter codes"""
        self.assertEqual(self.me._regular_expressions[1].match("Ala6Gly").group(), "Ala6Gly")
        self.assertEqual(self.me._regular_expressions[1].match("Ala64Gly").group(), "Ala64Gly")

    def test_varied_digit_length(self):
        """MF:regex identifies mutations w/ different location lengths"""
        self.assertEqual(self.me._regular_expressions[0].match("A64G").group(), "A64G")
        self.assertEqual(self.me._regular_expressions[1].match("Ala64Gly").group(), "Ala64Gly")
        self.assertEqual(self.me._regular_expressions[0].match("A864G").group(), "A864G")
        self.assertEqual(self.me._regular_expressions[1].match("Ala864Gly").group(), "Ala864Gly")
        self.assertEqual(self.me._regular_expressions[0].match("A8864G").group(), "A8864G")
        self.assertEqual(self.me._regular_expressions[1].match("Ala8864Gly").group(), "Ala8864Gly")

    def test_post_process(self):
        """MF:post processing steps function as expected """
        mutations = {PointMutation(460, "W", "W"): [(0, 5)]}
        expected = {}
        self.me._post_process(mutations)
        self.assertEqual(mutations, expected)

        mutations = {PointMutation(460, "W", "W"): [(0, 5)], PointMutation(460, "W", "G"): [(6, 11)]}
        expected = {PointMutation(460, "W", "G"): [(6, 11)]}
        self.me._post_process(mutations)
        self.assertEqual(mutations, expected)

    def test_unacceptable_general_word_boundaries(self):
        """MF:regexs disallow unacceptable word boundaries"""

        starts = list("abcdefghijklmnopqrstuvwxyz0123456789~@#$%^&*_+=])")
        ends = list("abcdefghijklmnopqrstuvwxyz0123456789~@#$%^&*_+=([")
        mutation_texts = ["A64G", "Ala64Gly", "Ala64-->Gly"]

        for mutation_text in mutation_texts:
            for start in starts:
                for end in ends:
                    text = "".join([start, mutation_text, end])
                    self.assertEqual(self.me(text), {})

    def test_acceptable_general_word_boundaries(self):
        """MF:regexs allow acceptable word boundaries"""
        ends = [".", ",", "", " ", "\t", "\n", ")", "]", '"', "'", ":", ";", "?", "!", "/", "-"]
        starts = [" ", "\t", "\n", '"', "'", "(", "[", "", "/", ",", "-"]
        mutation_texts = ["A64G", "Ala64Gly", "Ala64-->Gly"]

        for mutation_text in mutation_texts:
            for start in starts:
                for end in ends:
                    text = "".join([start, mutation_text, end])
                    expected = {PointMutation(64, "A", "G"): [(text.index("A"), text.index("A") + len(mutation_text))]}
                    self.assertEqual(self.me(text), expected)

    def test_mix_one_three_letter_match(self):
        """MF:regex ignores one/three letter code mixes"""

        self.assertEqual(self.me("Ala64G"), {})
        self.assertEqual(self.me("A64Gly"), {})

    def test_full_name_matches(self):
        """MF:regex identifies full name mentions of amino acids """
        expected = {PointMutation(64, "A", "G"): [(0, 15)]}
        self.assertEqual(self.me("alanine64-->Gly"), expected)
        expected = {PointMutation(64, "A", "G"): [(0, 15)]}
        self.assertEqual(self.me("Ala64-->glycine"), expected)

    def test_single_residue_fails_non_xNy(self):
        """MF:single residue matches fail in non-xNy format """
        self.assertEqual(self.me("A64-->glycine"), {})
        self.assertEqual(self.me("Ala64-->G"), {})

    def test_text_based_matches_w_N_m(self):
        """MF:regex identifies wN m text descriptions """
        texts = ["Ala64 to Gly", "Alanine64 to Glycine", "Ala64 to glycine", "alanine64 to Gly"]

        for text in texts:
            self.assertEqual(self.me(text), {PointMutation(64, "A", "G"): [(0, len(text))]})

        texts = ["The Ala64 to Gly substitution", "The Ala64 to glycine substitution", "The Ala64 to Gly substitution"]

        for text in texts:
            self.assertEqual(self.me(text), {PointMutation(64, "A", "G"): [(4, len(text) - 13)]})

    def test_text_match_spacing(self):
        """MF:mis-spaced text matches fail """
        self.assertEqual(self.me("TheAla40toGlymutation"), {})
        self.assertEqual(self.me("arg40tomet"), {})
        self.assertEqual(self.me("ala25tohis"), {})