Beispiel #1
0
 def setUp(self):
     """ Initalize some objects for use in the tests """
     self.me = BaselineMutationExtractor()
Beispiel #2
0
class BaselineMutationExtractorTests(TestCase):
    """ Tests of the BaselineMutationExtractor class """

    _single_letter_aa_codes = [aa[0] for aa in amino_acid_three_to_one_letter_map]
    _triple_letter_aa_codes = [aa[1] for aa in amino_acid_three_to_one_letter_map]

    def setUp(self):
        """ Initalize some objects for use in the tests """
        self.me = BaselineMutationExtractor()

    def test_init(self):
        """BME: __init__ returns without error """
        me = BaselineMutationExtractor()

    def test_call_no_mutations(self):
        """BME: extraction functions with no extraction-worthy data """
        self.assertEqual(self.me(''),{})
        self.assertEqual(self.me('There is no mutation data here.'),{})
        self.assertEqual(self.me('T64 is almost a valid mutation.'),{})
        self.assertEqual(self.me('So is 42S.'),{})
        
    def test_call_single_mutation(self):
        """BME:extraction functions when one mutation is present """
        expected = {PointMutation(42,'S','T'):1}
        self.assertEqual(self.me('S42T'),expected)
        self.assertEqual(self.me('The S42T mutation was made.'),expected)
        
    def test_call_boundaries_required(self):
        """BME:match boundaries are recognized """
        expected = {PointMutation(42,'S','T'):1}
        self.assertEqual(self.me('S42T'),expected)
        self.assertEqual(self.me('S42Test'),{})
        self.assertEqual(self.me('S42-Test mutation was made.'),{})
        self.assertEqual(self.me('gfS42T'),{})
        self.assertEqual(self.me('S42Thr'),{})
        
    def test_call_punc_ignored(self):
        """BME:puncuation ignored in mutation words """
        expected = {PointMutation(42,'S','T'):1}
        # internal punctuation
        self.assertEqual(self.me('S42-T'),expected)
        # leading punctuation
        self.assertEqual(self.me('?S42T'),expected)
        # training punctuation
        self.assertEqual(self.me('S42T?'),expected)
        # all punctuation marks
        self.assertEqual(self.me('!@#$%^&*()~`"\';:.,><?/{}[]\|+=-_S42T'),\
            expected)

    def test_call_multiple_mutations(self):
        """BME:extraction functions when more than one mutation is present """
        expected = {PointMutation(42,'S','T'):1,PointMutation(36,'W','Y'):1}
        self.assertEqual(self.me('S42T and W36Y'),expected)
        self.assertEqual(self.me('S42T W36Y'),expected)
        
    def test_call_count(self):
        """BME:counting of mentions works """
        expected = {PointMutation(42,'S','T'):1,PointMutation(36,'W','Y'):1}
        self.assertEqual(self.me('S42T and W36Y'),expected)
        expected = {PointMutation(42,'S','T'):1,PointMutation(36,'W','Y'):2}
        self.assertEqual(self.me('S42T, W36Y, and W36Y'),expected)
        expected = {PointMutation(42,'S','T'):1,PointMutation(36,'W','Y'):3}
        self.assertEqual(self.me('S42T, W36Y, Trp36Tyr, and W36Y'),expected)

    def test_call_three_to_one_letter_map(self):
        """BME:identical Mutation objects created for varied matches"""
        expected = {PointMutation(42,'A','G'):1}
        self.assertEqual(self.me('The A42G mutation was made.'),expected)
        self.assertEqual(self.me('The Ala42Gly mutation was made.'),expected)
        self.assertEqual(self.me('The A42 to glycine mutation was made.'),\
            expected)
        

    def test_regex_case_sensitive(self):
        """BME:regex case sensitive functions as expected"""
        # one-letter abbreviations must be uppercase
        self.assertEqual(self.me._word_regexs[0].match('a64t'),None)
        self.assertEqual(self.me._word_regexs[0].match('A64t'),None)
        self.assertEqual(self.me._word_regexs[0].match('a64T'),None)
        self.assertEqual(self.me._word_regexs[0].match('A64T').group(),'A64T')

        # three-letter abbreviations must be titlecase
        self.assertEqual(self.me._word_regexs[1].match('ala64gly'),None)
        self.assertEqual(self.me._word_regexs[1].match('ALA64GLY'),None)
        self.assertEqual(self.me._word_regexs[1].match('aLa64gLy'),None)
        self.assertEqual(self.me._word_regexs[1].match('Ala64Gly').group(),\
            'Ala64Gly')

        # full names must be lowercase or titlecase
        self.assertEqual(self.me._string_regexs[3].match(\
            'Ala64 to glycine').group(),'Ala64 to glycine')
        self.assertEqual(self.me._string_regexs[3].match(\
            'Ala64 to Glycine').group(),'Ala64 to Glycine')
        self.assertEqual(self.me._string_regexs[3].match(\
            'Ala64 to GLYCINE'),None)
        self.assertEqual(self.me._string_regexs[3].match(\
            'Ala64 to glYcine'),None)

    def test_one_letter_match(self):
        """BME:regex identifies one-letter codes"""
        self.assertEqual(self.me._word_regexs[0].match('A64G').group(),'A64G')
    
    def test_three_letter_match(self):
        """BME:regex identifies three-letter codes"""
        self.assertEqual(self.me._word_regexs[1].match('Ala64Gly').group(),'Ala64Gly')
    
    def test_varied_digit_length(self):
        """BME:regex identifies mutations w/ different location lengths"""
        self.assertEqual(self.me._word_regexs[0].match('A4G').group(),'A4G')
        self.assertEqual(self.me._word_regexs[0].match('A64G').group(),'A64G')
        self.assertEqual(self.me._word_regexs[0].match('A864G').group(),'A864G')
        self.assertEqual(self.me._word_regexs[0].match('A8864G').group(),'A8864G')


    def test_word_boundary_requirement(self):
        """BME:regex requries word boundaries surrounding mutation"""
        for i in range(len(self.me._word_regexs)):
            self.assertEqual(self.me._word_regexs[i].match('TheAla64Glymut'),None)
            self.assertEqual(self.me._word_regexs[i].match('Ala64Gly/p53634'),None)

    def test_mix_one_three_letter_match(self):
        """BME:regex ignores one/three letter code mixes"""
        for i in range(len(self.me._word_regexs)):
            self.assertEqual(self.me._word_regexs[i].match('Ala64G'),None)
            self.assertEqual(self.me._word_regexs[i].match('A64Gly'),None)

    def test_preprocess_words(self):
        """BME:word-level preprocessing functions as expected"""
        
        r = "this is a t64g mutation."
        expected = ['this','is','a','t64g','mutation']
        self.assertEqual(self.me._preprocess_words(r),expected)
        
        r = "this is ! t64g mutation."
        expected = ['this','is','','t64g','mutation']
        self.assertEqual(self.me._preprocess_words(r),expected)
        
        r = ""
        expected = []
        self.assertEqual(self.me._preprocess_words(r),expected)

    def test_preprocess_sentences(self):
        """BME:sentence-level preprocessing functions as expected"""
        r = "This is a test. The T65->Y mutation"
        expected = ['This is a test','The T65Y mutation']
        self.assertEqual(self.me._preprocess_sentences(r),expected)

    def test_replace_regex(self):
        """BME: replace regex functions as expected"""
        self.assertEqual(self.me._replace_regex.sub('',''),'')
        self.assertEqual(self.me._replace_regex.sub('','a46t'),'a46t')
        self.assertEqual(self.me._replace_regex.sub('','a46->t'),'a46t')
        self.assertEqual(self.me._replace_regex.sub('','A234-T'),'A234T')
        self.assertEqual(self.me._replace_regex.sub('','A(234)T'),'A234T')
        self.assertEqual(self.me._replace_regex.sub(\
            '','The Gly64->Thr mutation.'),'The Gly64Thr mutation')

    def test_ten_word_match(self):
        """BME: ten-word pattern functions as expected """
        expected = {PointMutation(42,'S','A'):1}
        self.assertEqual(self.me('Ser42 was mutated to Ala'),expected)
        self.assertEqual(self.me('S42 was mutated to Ala'),expected)
        self.assertEqual(self.me('Ser42 was mutated to alanine'),expected)
        self.assertEqual(self.me('the S42 was mutated to alanine'),expected)
        self.assertEqual(self.me('S42 was mutated to alanine'),expected)
        # Tenth word is alanine, so it's a match
        self.assertEqual(self.me('S42 a a a a a a a a a alanine'),expected)
        # Eleventh word is alanine, so no match
        self.assertEqual(self.me('S42 a a a a a a a a a a alanine'),{})