Esempio n. 1
0
 def test_normalized_mutations_blank_gs(self):
     """PC: Normalized Mutations functions with no gold standard mutations
     """
     # Functions with an empty gold standard dict
     pc = PerformanceCalculator(self._blank_mutation_data)
     perf = pc.calculate_normalized_mutations(self._gold_standard_data)
     self.assertEqual(perf.TruePositive, 0)
     self.assertEqual(perf.FalsePositive, 6)
     self.assertEqual(perf.FalseNegative, 0)
     self.assertEqual(perf.TrueNegative, None)
Esempio n. 2
0
 def test_document_retrieval_blank_gs(self):
     """PC: Document Retrieval functions with no gold standard mutations
     """
     # Functions with an empty gold standard dict
     pc = PerformanceCalculator(self._blank_mutation_data)
     perf = pc.calculate_document_retrieval(self._gold_standard_data)
     self.assertEqual(perf.TruePositive, 0)
     self.assertEqual(perf.FalsePositive, 3)
     self.assertEqual(perf.FalseNegative, 0)
     self.assertEqual(perf.TrueNegative, 1)
Esempio n. 3
0
 def test_normalized_mutations_blank_gs(self):
     """PC: Normalized Mutations functions with no gold standard mutations
     """
     # Functions with an empty gold standard dict
     pc = PerformanceCalculator(self._blank_mutation_data)
     perf = pc.calculate_normalized_mutations(\
         self._gold_standard_data)
     self.assertEqual(perf.TruePositive,0)
     self.assertEqual(perf.FalsePositive,6)
     self.assertEqual(perf.FalseNegative,0)
     self.assertEqual(perf.TrueNegative,None)
Esempio n. 4
0
 def test_document_retrieval_blank_gs(self):
     """PC: Document Retrieval functions with no gold standard mutations
     """
     # Functions with an empty gold standard dict
     pc = PerformanceCalculator(self._blank_mutation_data)
     perf = pc.calculate_document_retrieval(\
         self._gold_standard_data)
     self.assertEqual(perf.TruePositive,0)
     self.assertEqual(perf.FalsePositive,3)
     self.assertEqual(perf.FalseNegative,0)
     self.assertEqual(perf.TrueNegative,1)
Esempio n. 5
0
 def test_init(self):
     """PC: init functions as expected """
     # instantiate the class
     pc = PerformanceCalculator(self._gold_standard_data)
     # Check one of the values
     self.assertEqual(\
         pc._gold_standard['12206666'][PointMutation(95,'D','N')],2)
Esempio n. 6
0
 def test_normalized_mutations_invalid_input(self):
     """PC: Normalized Mutations handles invalid input correctly
     """
     # Handles gold-standard and extractor output which does
     # not completely overlap correctly -- this is explictly 
     # not allowed because it is not clear how it should be
     # handled
     pc = PerformanceCalculator({'1':{},'2':{}})
     self.assertRaises(PerformanceCalculatorError,\
         pc.calculate_normalized_mutations,{'1':{},'3':{}})
     self.assertRaises(PerformanceCalculatorError,\
         pc.calculate_normalized_mutations,{'1':{}})
     self.assertRaises(PerformanceCalculatorError,\
         pc.calculate_normalized_mutations,{'1':{},'2':{},'3':{}})
Esempio n. 7
0
 def setUp(self):
     """ Set up variables for the tests""" 
     self._gold_standard_data = gold_standard_data
     self._blank_mutation_data =\
         {'3476160':{},'14500716':{},'12206666':{},'11327835':{}} 
     self._pc = PerformanceCalculator(self._gold_standard_data)
Esempio n. 8
0
class PerformanceCalculatorTests(TestCase):
    """ Tests of the PerformanceCalculator class """

    def setUp(self):
        """ Set up variables for the tests""" 
        self._gold_standard_data = gold_standard_data
        self._blank_mutation_data =\
            {'3476160':{},'14500716':{},'12206666':{},'11327835':{}} 
        self._pc = PerformanceCalculator(self._gold_standard_data)

    def test_init(self):
        """PC: init functions as expected """
        # instantiate the class
        pc = PerformanceCalculator(self._gold_standard_data)
        # Check one of the values
        self.assertEqual(\
            pc._gold_standard['12206666'][PointMutation(95,'D','N')],2)

    def test_extracted_mentions_blank_input(self):
        """PC: Extracted Mentions functions with no extracted mutations
        """
        # Functions with no mutations
        perf = self._pc.calculate_extracted_mentions(\
            self._blank_mutation_data)
        self.assertEqual(perf.TruePositive,0)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,14)
        self.assertEqual(perf.TrueNegative,None)

    def test_extracted_mentions_blank_gs(self):
        """PC: Extracted Mentions functions with no gold standard mutations
        """
        # Functions with an empty gold standard dict
        pc = PerformanceCalculator(self._blank_mutation_data)
        perf = pc.calculate_extracted_mentions(\
            self._gold_standard_data)
        self.assertEqual(perf.TruePositive,0)
        self.assertEqual(perf.FalsePositive,14)
        self.assertEqual(perf.FalseNegative,0)
        self.assertEqual(perf.TrueNegative,None)
 
    def test_extracted_mentions_perfect_input(self):
        """PC: Extracted Mentions functions with perfect extracted mutations
        """
        # Functions with perfect input (i.e. equal to the gold standard)
        perf = self._pc.calculate_extracted_mentions(\
            self._gold_standard_data)
        self.assertEqual(perf.TruePositive,14)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,0)
        self.assertEqual(perf.TrueNegative,None)
  
    def test_extracted_mentions_invalid_input(self):
        """PC: Extracted Mentions handles invalid input correctly
        """
        # Handles gold-standard and extractor output which does
        # not completely overlap correctly -- this is explictly 
        # not allowed because it is not clear how it should be
        # handled
        pc = PerformanceCalculator({'1':{},'2':{}})
        self.assertRaises(PerformanceCalculatorError,\
            pc.calculate_extracted_mentions,{'1':{},'3':{}})
        self.assertRaises(PerformanceCalculatorError,\
            pc.calculate_extracted_mentions,{'1':{}})
        self.assertRaises(PerformanceCalculatorError,\
            pc.calculate_extracted_mentions,{'1':{},'2':{},'3':{}})
 

    def test_extracted_mentions_varied_input(self):
        """PC: Extracted Mentions functions with varied extracted mutations
        """
        # 
        perf = self._pc.calculate_extracted_mentions(\
            {'3476160':{PointMutation(22,'T','C'):1},\
             '14500716':{},'12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,1)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,13)
        self.assertEqual(perf.TrueNegative,None)
        # An extra count results in a false positive
        perf = self._pc.calculate_extracted_mentions(\
            {'3476160':{PointMutation(22,'T','C'):2},\
             '14500716':{},'12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,1)
        self.assertEqual(perf.FalsePositive,1)
        self.assertEqual(perf.FalseNegative,13)
        self.assertEqual(perf.TrueNegative,None)
        # ...and two extra counts results in two false positives
        perf = self._pc.calculate_extracted_mentions(\
            {'3476160':{PointMutation(22,'T','C'):3},\
             '14500716':{},'12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,1)
        self.assertEqual(perf.FalsePositive,2)
        self.assertEqual(perf.FalseNegative,13)
        self.assertEqual(perf.TrueNegative,None)
        
        # One missing count results in one false negative (see 11327835)
        # and one less true positive
        perf = self._pc.calculate_extracted_mentions(\
             {'3476160': {PointMutation(87,'S','C'): 1,\
                        PointMutation(22,'T','C'): 1},\
             '14500716': {},\
             '12206666': {PointMutation(95,'D','A'): 4,\
                         PointMutation(95,'D','N'): 2,\
                         PointMutation(95,'D','E'): 2},\
             '11327835': {PointMutation(64,'H','A'): 3}})
        self.assertEqual(perf.TruePositive,13)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,1)
        self.assertEqual(perf.TrueNegative,None)
        # Two missing counts results in two false negatives (see 11327835)
        # and two less true positives
        perf = self._pc.calculate_extracted_mentions(\
            {'3476160': {PointMutation(87,'S','C'): 1,\
                        PointMutation(22,'T','C'): 1},\
             '14500716': {},\
             '12206666': {PointMutation(95,'D','A'): 4,\
                         PointMutation(95,'D','N'): 2,\
                         PointMutation(95,'D','E'): 2},\
             '11327835': {PointMutation(64,'H','A'): 2}})
        self.assertEqual(perf.TruePositive,12)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,2)
        self.assertEqual(perf.TrueNegative,None)

        # One extra mutation tallied as one FalsePositive 
        perf = self._pc.calculate_extracted_mentions(\
            {'3476160':{PointMutation(42,'L','Y'):1},\
             '14500716':{},'12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,0)
        self.assertEqual(perf.FalsePositive,1)
        self.assertEqual(perf.FalseNegative,14)
        self.assertEqual(perf.TrueNegative,None)
        # Two extra mutations tallied as two FalsePositives 
        perf = self._pc.calculate_extracted_mentions(\
            {'3476160':{PointMutation(42,'L','Y'):2},\
             '14500716':{},'12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,0)
        self.assertEqual(perf.FalsePositive,2)
        self.assertEqual(perf.FalseNegative,14)
        self.assertEqual(perf.TrueNegative,None)
        perf = self._pc.calculate_extracted_mentions(\
            {'3476160':{PointMutation(42,'L','Y'):1},\
             '14500716':{PointMutation(33,'P','T'):1},\
             '12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,0)
        self.assertEqual(perf.FalsePositive,2)
        self.assertEqual(perf.FalseNegative,14)
        self.assertEqual(perf.TrueNegative,None)

    def test_document_retrieval_blank_input(self):
        """PC: Document Retrieval functions with no extracted mutations
        """
        # Functions with no extracted mutations
        perf = self._pc.calculate_document_retrieval(\
            self._blank_mutation_data)
        self.assertEqual(perf.TruePositive,0)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,3)
        self.assertEqual(perf.TrueNegative,1)

    def test_document_retrieval_blank_gs(self):
        """PC: Document Retrieval functions with no gold standard mutations
        """
        # Functions with an empty gold standard dict
        pc = PerformanceCalculator(self._blank_mutation_data)
        perf = pc.calculate_document_retrieval(\
            self._gold_standard_data)
        self.assertEqual(perf.TruePositive,0)
        self.assertEqual(perf.FalsePositive,3)
        self.assertEqual(perf.FalseNegative,0)
        self.assertEqual(perf.TrueNegative,1)
 
    def test_document_retrieval_perfect_input(self):
        """PC: Document Retrieval functions with perfect extracted mutations
        """
        # Functions with perfect input (i.e. equal to the gold standard)
        perf = self._pc.calculate_document_retrieval(\
            self._gold_standard_data)
        self.assertEqual(perf.TruePositive,3)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,0)
        self.assertEqual(perf.TrueNegative,1)
  
    def test_document_retrieval_invalid_input(self):
        """PC: Document Retrieval handles invalid input correctly
        """
        # Handles gold-standard and extractor output which does
        # not completely overlap correctly -- this is explictly 
        # not allowed because it is not clear how it should be
        # handled
        pc = PerformanceCalculator({'1':{},'2':{}})
        self.assertRaises(PerformanceCalculatorError,\
            pc.calculate_document_retrieval,{'1':{},'3':{}})
        self.assertRaises(PerformanceCalculatorError,\
            pc.calculate_document_retrieval,{'1':{}})
        self.assertRaises(PerformanceCalculatorError,\
            pc.calculate_document_retrieval,{'1':{},'2':{},'3':{}})
 
    def test_document_retrieval_varied_input(self):
        """PC: Document Retrieval functions with varied extracted mutations
        """
        # 
        perf = self._pc.calculate_document_retrieval(\
            {'3476160':{PointMutation(22,'T','C'):1},\
             '14500716':{},'12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,1)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,2)
        self.assertEqual(perf.TrueNegative,1)
        # An extra count has no effect
        perf = self._pc.calculate_document_retrieval(\
            {'3476160':{PointMutation(22,'T','C'):2},\
             '14500716':{},'12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,1)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,2)
        self.assertEqual(perf.TrueNegative,1)
        # ...and two extra counts has no effect
        perf = self._pc.calculate_document_retrieval(\
            {'3476160':{PointMutation(22,'T','C'):3},\
             '14500716':{},'12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,1)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,2)
        self.assertEqual(perf.TrueNegative,1)
        
        # One missing count has no effect
        perf = self._pc.calculate_document_retrieval(\
            {'3476160': {PointMutation(87,'S','C'): 1,\
                        PointMutation(22,'T','C'): 1},\
             '14500716': {},\
             '12206666': {PointMutation(95,'D','A'): 4,\
                         PointMutation(95,'D','N'): 2,\
                         PointMutation(95,'D','E'): 2},\
             '11327835': {PointMutation(64,'H','A'): 3}})
        self.assertEqual(perf.TruePositive,3)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,0)
        self.assertEqual(perf.TrueNegative,1)
        # Two missing counts has no effect
        perf = self._pc.calculate_document_retrieval(\
            {'3476160': {PointMutation(87,'S','C'): 1,\
                        PointMutation(22,'T','C'): 1},\
             '14500716': {},\
             '12206666': {PointMutation(95,'D','A'): 4,\
                         PointMutation(95,'D','N'): 2,\
                         PointMutation(95,'D','E'): 2},\
             '11327835': {PointMutation(64,'H','A'): 2}})
        self.assertEqual(perf.TruePositive,3)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,0)
        self.assertEqual(perf.TrueNegative,1)

        # Incorrect mutations still counts as a TP document 
        perf = self._pc.calculate_document_retrieval(\
            {'3476160':{PointMutation(42,'L','Y'):1},\
             '14500716':{},'12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,1)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,2)
        self.assertEqual(perf.TrueNegative,1)
        # False Positive/TrueNegative tallied correctly 
        perf = self._pc.calculate_document_retrieval(\
            {'3476160':{},'14500716':{PointMutation(42,'L','Y'):1},\
             '12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,0)
        self.assertEqual(perf.FalsePositive,1)
        self.assertEqual(perf.FalseNegative,3)
        self.assertEqual(perf.TrueNegative,0)


    def test_normalized_mutations_blank_input(self):
        """PC: Normalized Mutations functions with no extracted mutations
        """
        # Functions with no extracted mutations
        perf = self._pc.calculate_normalized_mutations(\
            self._blank_mutation_data)
        self.assertEqual(perf.TruePositive,0)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,6)
        self.assertEqual(perf.TrueNegative,None)

    def test_normalized_mutations_blank_gs(self):
        """PC: Normalized Mutations functions with no gold standard mutations
        """
        # Functions with an empty gold standard dict
        pc = PerformanceCalculator(self._blank_mutation_data)
        perf = pc.calculate_normalized_mutations(\
            self._gold_standard_data)
        self.assertEqual(perf.TruePositive,0)
        self.assertEqual(perf.FalsePositive,6)
        self.assertEqual(perf.FalseNegative,0)
        self.assertEqual(perf.TrueNegative,None)
 
    def test_normalized_mutations_perfect_input(self):
        """PC: Normalized Mutations functions with perfect extracted mutations
        """
        # Functions with perfect input (i.e. equal to the gold standard)
        perf = self._pc.calculate_normalized_mutations(\
            self._gold_standard_data)
        self.assertEqual(perf.TruePositive,6)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,0)
        self.assertEqual(perf.TrueNegative,None)
  
    def test_normalized_mutations_invalid_input(self):
        """PC: Normalized Mutations handles invalid input correctly
        """
        # Handles gold-standard and extractor output which does
        # not completely overlap correctly -- this is explictly 
        # not allowed because it is not clear how it should be
        # handled
        pc = PerformanceCalculator({'1':{},'2':{}})
        self.assertRaises(PerformanceCalculatorError,\
            pc.calculate_normalized_mutations,{'1':{},'3':{}})
        self.assertRaises(PerformanceCalculatorError,\
            pc.calculate_normalized_mutations,{'1':{}})
        self.assertRaises(PerformanceCalculatorError,\
            pc.calculate_normalized_mutations,{'1':{},'2':{},'3':{}})
 

    def test_normalized_mutations_varied_input(self):
        """PC: Normalized Mutations functions with varied extracted mutations
        """
        # 
        perf = self._pc.calculate_normalized_mutations(\
            {'3476160':{PointMutation(22,'T','C'):1},\
             '14500716':{},'12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,1)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,5)
        self.assertEqual(perf.TrueNegative,None)
        # An extra count has no effect
        perf = self._pc.calculate_normalized_mutations(\
            {'3476160':{PointMutation(22,'T','C'):2},\
             '14500716':{},'12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,1)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,5)
        self.assertEqual(perf.TrueNegative,None)
        # ...and two extra counts have no effect
        perf = self._pc.calculate_normalized_mutations(\
            {'3476160':{PointMutation(22,'T','C'):3},\
             '14500716':{},'12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,1)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,5)
        self.assertEqual(perf.TrueNegative,None)
        
        # One missing count has no effect
        perf = self._pc.calculate_normalized_mutations(\
            {'3476160': {PointMutation(87,'S','C'): 1,\
                        PointMutation(22,'T','C'): 1},\
             '14500716': {},\
             '12206666': {PointMutation(95,'D','A'): 4,\
                         PointMutation(95,'D','N'): 2,\
                         PointMutation(95,'D','E'): 2},\
             '11327835': {PointMutation(64,'H','A'): 3}})
        self.assertEqual(perf.TruePositive,6)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,0)
        self.assertEqual(perf.TrueNegative,None)
        # Two missing counts have no effect
        perf = self._pc.calculate_normalized_mutations(\
            {'3476160': {PointMutation(87,'S','C'): 1,\
                        PointMutation(22,'T','C'): 1},\
             '14500716': {},\
             '12206666': {PointMutation(95,'D','A'): 4,\
                         PointMutation(95,'D','N'): 2,\
                         PointMutation(95,'D','E'): 2},\
             '11327835': {PointMutation(64,'H','A'): 2}})
        self.assertEqual(perf.TruePositive,6)
        self.assertEqual(perf.FalsePositive,0)
        self.assertEqual(perf.FalseNegative,0)
        self.assertEqual(perf.TrueNegative,None)

        # One extra mutation tallied as one FalsePositive 
        perf = self._pc.calculate_normalized_mutations(\
            {'3476160':{PointMutation(42,'L','Y'):1},\
             '14500716':{},'12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,0)
        self.assertEqual(perf.FalsePositive,1)
        self.assertEqual(perf.FalseNegative,6)
        self.assertEqual(perf.TrueNegative,None)
        # Two counts of one extra mutation tallied as one FalsePositive 
        perf = self._pc.calculate_normalized_mutations(\
            {'3476160':{PointMutation(42,'L','Y'):2},\
             '14500716':{},'12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,0)
        self.assertEqual(perf.FalsePositive,1)
        self.assertEqual(perf.FalseNegative,6)
        self.assertEqual(perf.TrueNegative,None)
        # Two extra mutations tallied as two FalsePositives 
        perf = self._pc.calculate_normalized_mutations(\
            {'3476160':{PointMutation(42,'L','Y'):1},\
             '14500716':{PointMutation(33,'P','T'):1},\
             '12206666':{},'11327835':{}})
        self.assertEqual(perf.TruePositive,0)
        self.assertEqual(perf.FalsePositive,2)
        self.assertEqual(perf.FalseNegative,6)
        self.assertEqual(perf.TrueNegative,None)
Esempio n. 9
0
 def setUp(self):
     """ Set up variables for the tests"""
     self._gold_standard_data = gold_standard_data
     self._blank_mutation_data = {"3476160": {}, "14500716": {}, "12206666": {}, "11327835": {}}
     self._pc = PerformanceCalculator(self._gold_standard_data)
Esempio n. 10
0
class PerformanceCalculatorTests(TestCase):
    """ Tests of the PerformanceCalculator class """

    def setUp(self):
        """ Set up variables for the tests"""
        self._gold_standard_data = gold_standard_data
        self._blank_mutation_data = {"3476160": {}, "14500716": {}, "12206666": {}, "11327835": {}}
        self._pc = PerformanceCalculator(self._gold_standard_data)

    def test_init(self):
        """PC: init functions as expected """
        # instantiate the class
        pc = PerformanceCalculator(self._gold_standard_data)
        # Check one of the values
        self.assertEqual(pc._gold_standard["12206666"][PointMutation(95, "D", "N")], 2)

    def test_extracted_mentions_blank_input(self):
        """PC: Extracted Mentions functions with no extracted mutations
        """
        # Functions with no mutations
        perf = self._pc.calculate_extracted_mentions(self._blank_mutation_data)
        self.assertEqual(perf.TruePositive, 0)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 14)
        self.assertEqual(perf.TrueNegative, None)

    def test_extracted_mentions_blank_gs(self):
        """PC: Extracted Mentions functions with no gold standard mutations
        """
        # Functions with an empty gold standard dict
        pc = PerformanceCalculator(self._blank_mutation_data)
        perf = pc.calculate_extracted_mentions(self._gold_standard_data)
        self.assertEqual(perf.TruePositive, 0)
        self.assertEqual(perf.FalsePositive, 14)
        self.assertEqual(perf.FalseNegative, 0)
        self.assertEqual(perf.TrueNegative, None)

    def test_extracted_mentions_perfect_input(self):
        """PC: Extracted Mentions functions with perfect extracted mutations
        """
        # Functions with perfect input (i.e. equal to the gold standard)
        perf = self._pc.calculate_extracted_mentions(self._gold_standard_data)
        self.assertEqual(perf.TruePositive, 14)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 0)
        self.assertEqual(perf.TrueNegative, None)

    def test_extracted_mentions_invalid_input(self):
        """PC: Extracted Mentions handles invalid input correctly
        """
        # Handles gold-standard and extractor output which does
        # not completely overlap correctly -- this is explictly
        # not allowed because it is not clear how it should be
        # handled
        pc = PerformanceCalculator({"1": {}, "2": {}})
        self.assertRaises(PerformanceCalculatorError, pc.calculate_extracted_mentions, {"1": {}, "3": {}})
        self.assertRaises(PerformanceCalculatorError, pc.calculate_extracted_mentions, {"1": {}})
        self.assertRaises(PerformanceCalculatorError, pc.calculate_extracted_mentions, {"1": {}, "2": {}, "3": {}})

    def test_extracted_mentions_varied_input(self):
        """PC: Extracted Mentions functions with varied extracted mutations
        """
        #
        perf = self._pc.calculate_extracted_mentions(
            {"3476160": {PointMutation(22, "T", "C"): 1}, "14500716": {}, "12206666": {}, "11327835": {}}
        )
        self.assertEqual(perf.TruePositive, 1)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 13)
        self.assertEqual(perf.TrueNegative, None)
        # An extra count results in a false positive
        perf = self._pc.calculate_extracted_mentions(
            {"3476160": {PointMutation(22, "T", "C"): 2}, "14500716": {}, "12206666": {}, "11327835": {}}
        )
        self.assertEqual(perf.TruePositive, 1)
        self.assertEqual(perf.FalsePositive, 1)
        self.assertEqual(perf.FalseNegative, 13)
        self.assertEqual(perf.TrueNegative, None)
        # ...and two extra counts results in two false positives
        perf = self._pc.calculate_extracted_mentions(
            {"3476160": {PointMutation(22, "T", "C"): 3}, "14500716": {}, "12206666": {}, "11327835": {}}
        )
        self.assertEqual(perf.TruePositive, 1)
        self.assertEqual(perf.FalsePositive, 2)
        self.assertEqual(perf.FalseNegative, 13)
        self.assertEqual(perf.TrueNegative, None)

        # One missing count results in one false negative (see 11327835)
        # and one less true positive
        perf = self._pc.calculate_extracted_mentions(
            {
                "3476160": {PointMutation(87, "S", "C"): 1, PointMutation(22, "T", "C"): 1},
                "14500716": {},
                "12206666": {
                    PointMutation(95, "D", "A"): 4,
                    PointMutation(95, "D", "N"): 2,
                    PointMutation(95, "D", "E"): 2,
                },
                "11327835": {PointMutation(64, "H", "A"): 3},
            }
        )
        self.assertEqual(perf.TruePositive, 13)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 1)
        self.assertEqual(perf.TrueNegative, None)
        # Two missing counts results in two false negatives (see 11327835)
        # and two less true positives
        perf = self._pc.calculate_extracted_mentions(
            {
                "3476160": {PointMutation(87, "S", "C"): 1, PointMutation(22, "T", "C"): 1},
                "14500716": {},
                "12206666": {
                    PointMutation(95, "D", "A"): 4,
                    PointMutation(95, "D", "N"): 2,
                    PointMutation(95, "D", "E"): 2,
                },
                "11327835": {PointMutation(64, "H", "A"): 2},
            }
        )
        self.assertEqual(perf.TruePositive, 12)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 2)
        self.assertEqual(perf.TrueNegative, None)

        # One extra mutation tallied as one FalsePositive
        perf = self._pc.calculate_extracted_mentions(
            {"3476160": {PointMutation(42, "L", "Y"): 1}, "14500716": {}, "12206666": {}, "11327835": {}}
        )
        self.assertEqual(perf.TruePositive, 0)
        self.assertEqual(perf.FalsePositive, 1)
        self.assertEqual(perf.FalseNegative, 14)
        self.assertEqual(perf.TrueNegative, None)
        # Two extra mutations tallied as two FalsePositives
        perf = self._pc.calculate_extracted_mentions(
            {"3476160": {PointMutation(42, "L", "Y"): 2}, "14500716": {}, "12206666": {}, "11327835": {}}
        )
        self.assertEqual(perf.TruePositive, 0)
        self.assertEqual(perf.FalsePositive, 2)
        self.assertEqual(perf.FalseNegative, 14)
        self.assertEqual(perf.TrueNegative, None)
        perf = self._pc.calculate_extracted_mentions(
            {
                "3476160": {PointMutation(42, "L", "Y"): 1},
                "14500716": {PointMutation(33, "P", "T"): 1},
                "12206666": {},
                "11327835": {},
            }
        )
        self.assertEqual(perf.TruePositive, 0)
        self.assertEqual(perf.FalsePositive, 2)
        self.assertEqual(perf.FalseNegative, 14)
        self.assertEqual(perf.TrueNegative, None)

    def test_document_retrieval_blank_input(self):
        """PC: Document Retrieval functions with no extracted mutations
        """
        # Functions with no extracted mutations
        perf = self._pc.calculate_document_retrieval(self._blank_mutation_data)
        self.assertEqual(perf.TruePositive, 0)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 3)
        self.assertEqual(perf.TrueNegative, 1)

    def test_document_retrieval_blank_gs(self):
        """PC: Document Retrieval functions with no gold standard mutations
        """
        # Functions with an empty gold standard dict
        pc = PerformanceCalculator(self._blank_mutation_data)
        perf = pc.calculate_document_retrieval(self._gold_standard_data)
        self.assertEqual(perf.TruePositive, 0)
        self.assertEqual(perf.FalsePositive, 3)
        self.assertEqual(perf.FalseNegative, 0)
        self.assertEqual(perf.TrueNegative, 1)

    def test_document_retrieval_perfect_input(self):
        """PC: Document Retrieval functions with perfect extracted mutations
        """
        # Functions with perfect input (i.e. equal to the gold standard)
        perf = self._pc.calculate_document_retrieval(self._gold_standard_data)
        self.assertEqual(perf.TruePositive, 3)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 0)
        self.assertEqual(perf.TrueNegative, 1)

    def test_document_retrieval_invalid_input(self):
        """PC: Document Retrieval handles invalid input correctly
        """
        # Handles gold-standard and extractor output which does
        # not completely overlap correctly -- this is explictly
        # not allowed because it is not clear how it should be
        # handled
        pc = PerformanceCalculator({"1": {}, "2": {}})
        self.assertRaises(PerformanceCalculatorError, pc.calculate_document_retrieval, {"1": {}, "3": {}})
        self.assertRaises(PerformanceCalculatorError, pc.calculate_document_retrieval, {"1": {}})
        self.assertRaises(PerformanceCalculatorError, pc.calculate_document_retrieval, {"1": {}, "2": {}, "3": {}})

    def test_document_retrieval_varied_input(self):
        """PC: Document Retrieval functions with varied extracted mutations
        """
        #
        perf = self._pc.calculate_document_retrieval(
            {"3476160": {PointMutation(22, "T", "C"): 1}, "14500716": {}, "12206666": {}, "11327835": {}}
        )
        self.assertEqual(perf.TruePositive, 1)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 2)
        self.assertEqual(perf.TrueNegative, 1)
        # An extra count has no effect
        perf = self._pc.calculate_document_retrieval(
            {"3476160": {PointMutation(22, "T", "C"): 2}, "14500716": {}, "12206666": {}, "11327835": {}}
        )
        self.assertEqual(perf.TruePositive, 1)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 2)
        self.assertEqual(perf.TrueNegative, 1)
        # ...and two extra counts has no effect
        perf = self._pc.calculate_document_retrieval(
            {"3476160": {PointMutation(22, "T", "C"): 3}, "14500716": {}, "12206666": {}, "11327835": {}}
        )
        self.assertEqual(perf.TruePositive, 1)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 2)
        self.assertEqual(perf.TrueNegative, 1)

        # One missing count has no effect
        perf = self._pc.calculate_document_retrieval(
            {
                "3476160": {PointMutation(87, "S", "C"): 1, PointMutation(22, "T", "C"): 1},
                "14500716": {},
                "12206666": {
                    PointMutation(95, "D", "A"): 4,
                    PointMutation(95, "D", "N"): 2,
                    PointMutation(95, "D", "E"): 2,
                },
                "11327835": {PointMutation(64, "H", "A"): 3},
            }
        )
        self.assertEqual(perf.TruePositive, 3)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 0)
        self.assertEqual(perf.TrueNegative, 1)
        # Two missing counts has no effect
        perf = self._pc.calculate_document_retrieval(
            {
                "3476160": {PointMutation(87, "S", "C"): 1, PointMutation(22, "T", "C"): 1},
                "14500716": {},
                "12206666": {
                    PointMutation(95, "D", "A"): 4,
                    PointMutation(95, "D", "N"): 2,
                    PointMutation(95, "D", "E"): 2,
                },
                "11327835": {PointMutation(64, "H", "A"): 2},
            }
        )
        self.assertEqual(perf.TruePositive, 3)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 0)
        self.assertEqual(perf.TrueNegative, 1)

        # Incorrect mutations still counts as a TP document
        perf = self._pc.calculate_document_retrieval(
            {"3476160": {PointMutation(42, "L", "Y"): 1}, "14500716": {}, "12206666": {}, "11327835": {}}
        )
        self.assertEqual(perf.TruePositive, 1)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 2)
        self.assertEqual(perf.TrueNegative, 1)
        # False Positive/TrueNegative tallied correctly
        perf = self._pc.calculate_document_retrieval(
            {"3476160": {}, "14500716": {PointMutation(42, "L", "Y"): 1}, "12206666": {}, "11327835": {}}
        )
        self.assertEqual(perf.TruePositive, 0)
        self.assertEqual(perf.FalsePositive, 1)
        self.assertEqual(perf.FalseNegative, 3)
        self.assertEqual(perf.TrueNegative, 0)

    def test_normalized_mutations_blank_input(self):
        """PC: Normalized Mutations functions with no extracted mutations
        """
        # Functions with no extracted mutations
        perf = self._pc.calculate_normalized_mutations(self._blank_mutation_data)
        self.assertEqual(perf.TruePositive, 0)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 6)
        self.assertEqual(perf.TrueNegative, None)

    def test_normalized_mutations_blank_gs(self):
        """PC: Normalized Mutations functions with no gold standard mutations
        """
        # Functions with an empty gold standard dict
        pc = PerformanceCalculator(self._blank_mutation_data)
        perf = pc.calculate_normalized_mutations(self._gold_standard_data)
        self.assertEqual(perf.TruePositive, 0)
        self.assertEqual(perf.FalsePositive, 6)
        self.assertEqual(perf.FalseNegative, 0)
        self.assertEqual(perf.TrueNegative, None)

    def test_normalized_mutations_perfect_input(self):
        """PC: Normalized Mutations functions with perfect extracted mutations
        """
        # Functions with perfect input (i.e. equal to the gold standard)
        perf = self._pc.calculate_normalized_mutations(self._gold_standard_data)
        self.assertEqual(perf.TruePositive, 6)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 0)
        self.assertEqual(perf.TrueNegative, None)

    def test_normalized_mutations_invalid_input(self):
        """PC: Normalized Mutations handles invalid input correctly
        """
        # Handles gold-standard and extractor output which does
        # not completely overlap correctly -- this is explictly
        # not allowed because it is not clear how it should be
        # handled
        pc = PerformanceCalculator({"1": {}, "2": {}})
        self.assertRaises(PerformanceCalculatorError, pc.calculate_normalized_mutations, {"1": {}, "3": {}})
        self.assertRaises(PerformanceCalculatorError, pc.calculate_normalized_mutations, {"1": {}})
        self.assertRaises(PerformanceCalculatorError, pc.calculate_normalized_mutations, {"1": {}, "2": {}, "3": {}})

    def test_normalized_mutations_varied_input(self):
        """PC: Normalized Mutations functions with varied extracted mutations
        """
        #
        perf = self._pc.calculate_normalized_mutations(
            {"3476160": {PointMutation(22, "T", "C"): 1}, "14500716": {}, "12206666": {}, "11327835": {}}
        )
        self.assertEqual(perf.TruePositive, 1)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 5)
        self.assertEqual(perf.TrueNegative, None)
        # An extra count has no effect
        perf = self._pc.calculate_normalized_mutations(
            {"3476160": {PointMutation(22, "T", "C"): 2}, "14500716": {}, "12206666": {}, "11327835": {}}
        )
        self.assertEqual(perf.TruePositive, 1)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 5)
        self.assertEqual(perf.TrueNegative, None)
        # ...and two extra counts have no effect
        perf = self._pc.calculate_normalized_mutations(
            {"3476160": {PointMutation(22, "T", "C"): 3}, "14500716": {}, "12206666": {}, "11327835": {}}
        )
        self.assertEqual(perf.TruePositive, 1)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 5)
        self.assertEqual(perf.TrueNegative, None)

        # One missing count has no effect
        perf = self._pc.calculate_normalized_mutations(
            {
                "3476160": {PointMutation(87, "S", "C"): 1, PointMutation(22, "T", "C"): 1},
                "14500716": {},
                "12206666": {
                    PointMutation(95, "D", "A"): 4,
                    PointMutation(95, "D", "N"): 2,
                    PointMutation(95, "D", "E"): 2,
                },
                "11327835": {PointMutation(64, "H", "A"): 3},
            }
        )
        self.assertEqual(perf.TruePositive, 6)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 0)
        self.assertEqual(perf.TrueNegative, None)
        # Two missing counts have no effect
        perf = self._pc.calculate_normalized_mutations(
            {
                "3476160": {PointMutation(87, "S", "C"): 1, PointMutation(22, "T", "C"): 1},
                "14500716": {},
                "12206666": {
                    PointMutation(95, "D", "A"): 4,
                    PointMutation(95, "D", "N"): 2,
                    PointMutation(95, "D", "E"): 2,
                },
                "11327835": {PointMutation(64, "H", "A"): 2},
            }
        )
        self.assertEqual(perf.TruePositive, 6)
        self.assertEqual(perf.FalsePositive, 0)
        self.assertEqual(perf.FalseNegative, 0)
        self.assertEqual(perf.TrueNegative, None)

        # One extra mutation tallied as one FalsePositive
        perf = self._pc.calculate_normalized_mutations(
            {"3476160": {PointMutation(42, "L", "Y"): 1}, "14500716": {}, "12206666": {}, "11327835": {}}
        )
        self.assertEqual(perf.TruePositive, 0)
        self.assertEqual(perf.FalsePositive, 1)
        self.assertEqual(perf.FalseNegative, 6)
        self.assertEqual(perf.TrueNegative, None)
        # Two counts of one extra mutation tallied as one FalsePositive
        perf = self._pc.calculate_normalized_mutations(
            {"3476160": {PointMutation(42, "L", "Y"): 2}, "14500716": {}, "12206666": {}, "11327835": {}}
        )
        self.assertEqual(perf.TruePositive, 0)
        self.assertEqual(perf.FalsePositive, 1)
        self.assertEqual(perf.FalseNegative, 6)
        self.assertEqual(perf.TrueNegative, None)
        # Two extra mutations tallied as two FalsePositives
        perf = self._pc.calculate_normalized_mutations(
            {
                "3476160": {PointMutation(42, "L", "Y"): 1},
                "14500716": {PointMutation(33, "P", "T"): 1},
                "12206666": {},
                "11327835": {},
            }
        )
        self.assertEqual(perf.TruePositive, 0)
        self.assertEqual(perf.FalsePositive, 2)
        self.assertEqual(perf.FalseNegative, 6)
        self.assertEqual(perf.TrueNegative, None)
Esempio n. 11
0
 def setUp(self):
     """ Set up variables for the tests""" 
     self._gold_standard_data = gold_standard_data
     self._blank_mutation_data =\
         {'3476160':{},'14500716':{},'12206666':{},'11327835':{}} 
     self._pc = PerformanceCalculator(self._gold_standard_data)