Ejemplo n.º 1
0
 def reset(self):
     """Reset the internal state to reuse the evaluator."""
     self.primary_eval = ProteinEvaluation()
     self.secondary_eval = ProteinMacroEvaluation()
     self.results = None
     self.gold_standard = None
     self.logger = logging.getLogger("ProteinEvaluator")
     self._dois = None
Ejemplo n.º 2
0
class ProteinEvaluator(AbstractEvaluator):
    """Implementation of the evaluation process for INT and IPT."""
    
    def reset(self):
        """Reset the internal state to reuse the evaluator."""
        self.primary_eval = ProteinEvaluation()
        self.secondary_eval = ProteinMacroEvaluation()
        self.results = None
        self.gold_standard = None
        self.logger = logging.getLogger("ProteinEvaluator")
        self._dois = None

    def _prepare(self):
        """Prepare the instance for the evaluation run."""
        assert len(self.results) == len(self.gold_standard), \
            "the entries in the evaluation result and the gold standard " \
            "do not match"
        
        self.primary_eval.set_fn(self.gold_standard.true_items())
        self.logger.debug(
            "INT/IPT evaluation: %i GS annotations" % 
            self.primary_eval.hits.fn
        )
    
    def _process(self):
        """Process the result set."""
        self._dois = self.results.keys()
        self._dois.sort()
        result_sizes = [
            len(result_list) for result_list in self.results.values()
        ]
        max_rank_in_results = max(result_sizes) if len(result_sizes) else 0
        self.logger.info("longest result set has %i annotations",
                         max_rank_in_results)
        
        if self.cutoff and self.cutoff < max_rank_in_results:
            max_rank_in_results = self.cutoff
        
        for doi in list(self._dois):
            std_items = self.gold_standard[doi]
            result_doc = ProteinEvaluation(doi=doi, fn=len(std_items))
            self.secondary_eval[doi] = result_doc
            
        for rank in range(max_rank_in_results):
            for doi in list(self._dois):
                self._process_doi(doi, rank)
            
            # Calculate & store the average P/R pair
            # at this rank over all documents (macro-averaging)
            self.secondary_eval.store_p_at_current_r()
            # Calculate & store the current P/R value
            # at this rank over all documents (micro-averaging)
            self.primary_eval.store_p_at_current_r()
    
    def _process_doi(self, doi, rank):
        """Evaluate the result at a given rank for a document."""
        result_items = self.results[doi]
        std_items = self.gold_standard.get(doi) # special syntax for mocking
        
        try:
            item = result_items[rank]
        except IndexError:
            # no more results for this DOI
            self._dois.remove(doi)
        else:
            if item.confidence is not None and \
               item.confidence < self.min_conf:
                self._dois.remove(doi) # confidence-base cutoff
            else:
                # evaluate the result at the current rank
                self.primary_eval.evaluate_item(item, std_items)
                self.secondary_eval[doi].evaluate_item(item, std_items)
                self.secondary_eval[doi].store_p_at_current_r()
Ejemplo n.º 3
0
class ProteinEvaluationTest(CalculationAssertions):
    
    @patch('biocreative.evaluation.calculation.hits.Hits', spec=True)
    def setUp(self, unused):
        self.evaluator = ProteinEvaluation()
        
        for attr in C.HITS_ATTRIBUTES:
            setattr(self.evaluator.hits, attr, 2)
    
    def test_f_score(self):
        self.assert_property("f_score", 0.5)
    
    def test_evaluate_tp_item(self):
        self.evaluator.evaluate_item(1, [0,1,2])
        self.assert_hits(self.evaluator.hits, tp=3, fp=2, fn=1)
    
    def test_evaluate_fp_item(self):
        self.evaluator.evaluate_item(3, [0,1,2])
        self.assert_hits(self.evaluator.hits, tp=2, fp=3, fn=2)
    
    def test_evaluate_item_with_illegal_std_items(self):
        self.assertRaises(
            AssertionError, self.evaluator.evaluate_item, 1, (0,1,2)
        )
    
    def test_evaluate(self):
        self.evaluator.evaluate_item = Mock()
        self.evaluator.store_p_at_current_r = Mock()
        gs_set = [0,1,2]
        self.evaluator.evaluate([3,4,1,2], gs_set, 3)
        self.assertTrue(self.evaluator.store_p_at_current_r.called)
        self.assertEqual(self.evaluator.store_p_at_current_r.call_count, 3)
        self.assertTrue(self.evaluator.evaluate_item.called)
        self.assertEqual(self.evaluator.evaluate_item.call_count, 3)
        arg_list = self.evaluator.evaluate_item.call_args_list
        exp_list = (((3, gs_set), {}), ((4, gs_set), {}), ((1, gs_set), {}))
        
        for call, args in enumerate(arg_list):
            self.assert_values(
                "evaluate_item call %i" % (call + 1), exp_list[call], args
            )
    
    def test_evaluate_with_illegal_result_items(self):
        self.assertRaises(
            AssertionError, self.evaluator.evaluate, (1,2,3), set([0,1,2]), 2
        )
    
    def set_up_avrg_p_test(self):
        for hits in (
            {'tp': 1, 'fp': 0, 'fn': 2, 'tn': 0}, # p=1.0, r=0.33
            {'tp': 1, 'fp': 1, 'fn': 2, 'tn': 0}, # p=0.5, r=0.33
            {'tp': 2, 'fp': 1, 'fn': 1, 'tn': 0}, # p=0.66, r=0.66
            {'tp': 2, 'fp': 2, 'fn': 1, 'tn': 0}, # p=0.5, r=0.66
            {'tp': 2, 'fp': 3, 'fn': 1, 'tn': 0}, # p=0.4, r=0.66
            {'tp': 3, 'fp': 3, 'fn': 0, 'tn': 0}, # p=0.5, r=1.0
        ):
            for attr, value in hits.items():
                setattr(self.evaluator.hits, attr, value)
            
            self.evaluator.store_p_at_current_r()
        
        self.p_at_full_r = 0.5
        self.avrg_p_values = [(1/1.0, 1/3.0), (2/3.0, 2/3.0), (3/6.0, 3/3.0)]
        self.pr_values = (
            (1/1.0, 1/3.0), (1/2.0, 1/3.0),
            (2/3.0, 2/3.0), (2/5.0, 2/3.0),
            (3/6.0, 3/3.0),
        )
        self.avrg_p = 0.0
        last_r = 0.0
        
        for p, r in self.avrg_p_values:
            self.avrg_p += p * (r - last_r)
            last_r = r
    
    def test_avrg_p_properties(self):
        self.set_up_avrg_p_test()
        self.assert_property("p_at_full_r", self.p_at_full_r)
        self.assert_property("avrg_p", self.avrg_p)
    
    def test_pr_values(self):
        self.set_up_avrg_p_test()
        pr_values = tuple(self.evaluator.yield_precision_recall_pairs())
        self.assertEqual(pr_values, self.pr_values)
Ejemplo n.º 4
0
 def random_protein_result():
     results = list(set([randint(1, 100) for i in range(100)]))
     gold_standard = sample(range(1, 101), 10)
     evaluator = ProteinEvaluation(doi="test", fn=len(gold_standard))
     evaluator.evaluate(results, gold_standard, 0)
     return evaluator
Ejemplo n.º 5
0
 def setUp(self, unused):
     self.evaluator = ProteinEvaluation()
     
     for attr in C.HITS_ATTRIBUTES:
         setattr(self.evaluator.hits, attr, 2)