コード例 #1
0
ファイル: protein.py プロジェクト: fnl/bceval
 def reset(self):
     """Reset the internal state to reuse the evaluator."""
     self.primary_eval = ProteinEvaluation()
     self.secondary_eval = ProteinMacroEvaluation()
     self.results = None
     self.gold_standard = None
     self.logger = logging.getLogger("ProteinEvaluator")
     self._dois = None
コード例 #2
0
ファイル: __init__.py プロジェクト: fnl/bceval
 def helper_protein(self, result_items, std_items):
     self.evaluator = ProteinEvaluation(doi="test", fn=3)
     self.evaluator.evaluate(result_items, std_items, 3)
     self.assert_hits(self.evaluator.hits, tp=2, fp=1, fn=1, tn=0)
     p = 2/3.0
     r = 2/3.0
     self.assert_property("precision", p)
     self.assert_property("recall", r)
     self.assert_property("f_score", 2.0 * p * r / (p + r))
     self.assert_property("p_at_full_r", None)
     self.assert_property("avrg_p", 1/1.0 * 1/3.0 + 2/3.0 * 1/3.0)
コード例 #3
0
ファイル: __init__.py プロジェクト: fnl/bceval
 def test_macro_evaluation(self):
     protein_results = [
         CalculationTests.random_protein_result() for i in range(50)
     ]
     self.evaluator = ProteinMacroEvaluation(
         ((i, r) for i, r in enumerate(protein_results))
     )
     N = len(protein_results)
     precision = sum(p.precision for p in protein_results) / N
     recall = sum(p.recall for p in protein_results) / N
     f_score = sum(p.f_score for p in protein_results) / N
     self.assert_property("precision", precision)
     self.assert_property("recall", recall)
     self.assert_property("f_score", f_score)
コード例 #4
0
ファイル: macro_evaluation.py プロジェクト: fnl/bceval
 def setUp(self):
     RandomProteinEvaluation = \
         ProteinMacroEvaluationTest.init_random_protein_evaluation_mock
     self.random_mocks = dict((i, RandomProteinEvaluation()) for i in range(10))
     self.evaluator = ProteinMacroEvaluation(self.random_mocks)
コード例 #5
0
ファイル: macro_evaluation.py プロジェクト: fnl/bceval
class ProteinMacroEvaluationTest(CalculationAssertions):
    
    def setUp(self):
        RandomProteinEvaluation = \
            ProteinMacroEvaluationTest.init_random_protein_evaluation_mock
        self.random_mocks = dict((i, RandomProteinEvaluation()) for i in range(10))
        self.evaluator = ProteinMacroEvaluation(self.random_mocks)
    
    @staticmethod
    @patch('biocreative.evaluation.calculation.hits.Hits', spec=True)
    def init_random_protein_evaluation_mock(unused):
        mock = Mock(wraps=ProteinEvaluation())
        
        for prop in C.PROTEIN_PROPERTIES:
            setattr(mock, prop, random())
        
        for attr in C.HITS_ATTRIBUTES:
            setattr(mock.hits, attr, randint(0, 1000))
        
        return mock
    
    def test_std_dev(self):
        for prop in C.PROTEIN_PROPERTIES:
            if prop != 'avrg_p':
                expected = ProteinMacroEvaluationTest.calculate_std_dev(
                    [getattr(m, prop) for m in self.random_mocks.values()]
                )
                received = self.evaluator.std_dev(prop)
                self.assert_values(prop, expected, received)
    
    def test_properties_except_hits(self):
        for prop in C.PROTEIN_PROPERTIES:
            if prop != 'avrg_p':
                expected = self.get_average_for(prop)
                self.assert_property(prop, expected)
    
    @patch('biocreative.evaluation.calculation.hits.Hits', spec=True)
    def test_hits(self, unused):
        expected = dict()
        
        for attr in C.HITS_ATTRIBUTES:
            expected[attr] = sum(
                getattr(mock.hits, attr) for mock in self.random_mocks.values()
            )
        
        received = self.evaluator.hits
        
        for attr in C.HITS_ATTRIBUTES:
            self.assertEqual(
                getattr(received, attr), expected[attr],
                "%s hits don't match (received: %i, expected: %i)" % (
                    attr, getattr(received, attr), expected[attr]
                )
            )
    
    def test_average_for(self):
        expected = self.get_average_for('precision')
        received = self.evaluator._average_for('precision')
        self.assert_values('average_for', expected, received)
    
    def test_static_calculations(self):
        for kind in ('variation', 'variance', 'std_dev'):
            self.run_static_calc_test_for(kind)
    
    def run_static_calc_test_for(self, name):
        rnd_floats = [random() for i in range(10)]
        expected_fun = eval('ProteinMacroEvaluationTest.calculate_%s' % name)
        test_fun = eval('ProteinMacroEvaluation._%s' % name)
        expected = expected_fun(rnd_floats)
        received = test_fun(rnd_floats)
        self.assert_values(name, expected, received)
    
    def get_average_for(self, prop):
        return ProteinMacroEvaluationTest.calculate_average(
            [getattr(m, prop) for m in self.random_mocks.values()]
        )
    
    @staticmethod
    def calculate_average(numbers):
        total = sum(numbers)
        return float(total) / len(numbers)
    
    @staticmethod
    def calculate_variation(numbers):
        average = ProteinMacroEvaluationTest.calculate_average(numbers)
        return sum((i - average)**2 for i in numbers)
    
    @staticmethod
    def calculate_variance(numbers):
        variation = ProteinMacroEvaluationTest.calculate_variation(numbers)
        return variation / float(len(numbers))
    
    @staticmethod
    def calculate_std_dev(numbers):
        variance = ProteinMacroEvaluationTest.calculate_variance(numbers)
        return sqrt(variance)
コード例 #6
0
ファイル: protein.py プロジェクト: fnl/bceval
class ProteinEvaluator(AbstractEvaluator):
    """Implementation of the evaluation process for INT and IPT."""
    
    def reset(self):
        """Reset the internal state to reuse the evaluator."""
        self.primary_eval = ProteinEvaluation()
        self.secondary_eval = ProteinMacroEvaluation()
        self.results = None
        self.gold_standard = None
        self.logger = logging.getLogger("ProteinEvaluator")
        self._dois = None

    def _prepare(self):
        """Prepare the instance for the evaluation run."""
        assert len(self.results) == len(self.gold_standard), \
            "the entries in the evaluation result and the gold standard " \
            "do not match"
        
        self.primary_eval.set_fn(self.gold_standard.true_items())
        self.logger.debug(
            "INT/IPT evaluation: %i GS annotations" % 
            self.primary_eval.hits.fn
        )
    
    def _process(self):
        """Process the result set."""
        self._dois = self.results.keys()
        self._dois.sort()
        result_sizes = [
            len(result_list) for result_list in self.results.values()
        ]
        max_rank_in_results = max(result_sizes) if len(result_sizes) else 0
        self.logger.info("longest result set has %i annotations",
                         max_rank_in_results)
        
        if self.cutoff and self.cutoff < max_rank_in_results:
            max_rank_in_results = self.cutoff
        
        for doi in list(self._dois):
            std_items = self.gold_standard[doi]
            result_doc = ProteinEvaluation(doi=doi, fn=len(std_items))
            self.secondary_eval[doi] = result_doc
            
        for rank in range(max_rank_in_results):
            for doi in list(self._dois):
                self._process_doi(doi, rank)
            
            # Calculate & store the average P/R pair
            # at this rank over all documents (macro-averaging)
            self.secondary_eval.store_p_at_current_r()
            # Calculate & store the current P/R value
            # at this rank over all documents (micro-averaging)
            self.primary_eval.store_p_at_current_r()
    
    def _process_doi(self, doi, rank):
        """Evaluate the result at a given rank for a document."""
        result_items = self.results[doi]
        std_items = self.gold_standard.get(doi) # special syntax for mocking
        
        try:
            item = result_items[rank]
        except IndexError:
            # no more results for this DOI
            self._dois.remove(doi)
        else:
            if item.confidence is not None and \
               item.confidence < self.min_conf:
                self._dois.remove(doi) # confidence-base cutoff
            else:
                # evaluate the result at the current rank
                self.primary_eval.evaluate_item(item, std_items)
                self.secondary_eval[doi].evaluate_item(item, std_items)
                self.secondary_eval[doi].store_p_at_current_r()
コード例 #7
0
ファイル: __init__.py プロジェクト: fnl/bceval
class CalculationTests(CalculationAssertions):
    
    def test_article_auc_pr(self):
        self.evaluator = CalculationTests.simulate_article_evaluator(
            ArticleAucPrEvaluation, 3
        )
        self.assert_hits(self.evaluator.hits, tp=3, fp=7, fn=0, tn=0)
        self.assert_property("p_at_full_r", 3/8.0)
        recall_span = 1/3.0
        self.assert_property(
            "auc_pr",
            (1/1.0 + 1/1.0)/2 * recall_span +
            (1/4.0 + 2/5.0)/2 * recall_span +
            (2/7.0 + 3/8.0)/2 * recall_span
        )
    
    def test_article_mcc(self):
        self.evaluator = CalculationTests.simulate_article_evaluator(
            ArticleMccEvaluation, 0
        )
        self.assert_hits(self.evaluator.hits, tp=2, fp=2, fn=1, tn=5)
        self.assert_property("sensitivity", 2/3.0) # tp / (tp + fn)
        self.assert_property("specificity", 5/7.0) # tn / (tn + fp)
        self.assert_property("accuracy", 7/10.0) # (tp + tn) / sum(hits)
        self.assert_property(
            "mcc_score", (2*5 - 2*1) / sqrt(4*3*7*6)
        ) # (tp*tn - fp*fn) / sqrt(tp+fp * tp+fn * tn+fp * tn+fn)
    
    @staticmethod
    def simulate_article_evaluator(EvaluatorClass, fn_count):
        evaluator = EvaluatorClass(fn=fn_count)
        t = True
        f = False
        
        for result_item, std_item in [
            (t, t), (f, f), (f, f), (t, f), (f, t),
            (f, f), (f, f), (t, t), (f, f), (t, f)
        ]:
            evaluator.evaluate(result_item, std_item, None)
        
        return evaluator
    
    def test_protein_for_normalizations(self):
        self.helper_protein(["A", "B", "C", "D"], ["A", "C", "D"])
    
    def test_protein_for_pairs(self):
        A, B, C, D = ("a", "x"), ("b", "y"), ("c", "z"), ("d", "w")
        self.helper_protein([A, B, C, D], [A, C, D])
    
    def helper_protein(self, result_items, std_items):
        self.evaluator = ProteinEvaluation(doi="test", fn=3)
        self.evaluator.evaluate(result_items, std_items, 3)
        self.assert_hits(self.evaluator.hits, tp=2, fp=1, fn=1, tn=0)
        p = 2/3.0
        r = 2/3.0
        self.assert_property("precision", p)
        self.assert_property("recall", r)
        self.assert_property("f_score", 2.0 * p * r / (p + r))
        self.assert_property("p_at_full_r", None)
        self.assert_property("avrg_p", 1/1.0 * 1/3.0 + 2/3.0 * 1/3.0)
    
    def test_macro_evaluation(self):
        protein_results = [
            CalculationTests.random_protein_result() for i in range(50)
        ]
        self.evaluator = ProteinMacroEvaluation(
            ((i, r) for i, r in enumerate(protein_results))
        )
        N = len(protein_results)
        precision = sum(p.precision for p in protein_results) / N
        recall = sum(p.recall for p in protein_results) / N
        f_score = sum(p.f_score for p in protein_results) / N
        self.assert_property("precision", precision)
        self.assert_property("recall", recall)
        self.assert_property("f_score", f_score)
    
    @staticmethod
    def random_protein_result():
        results = list(set([randint(1, 100) for i in range(100)]))
        gold_standard = sample(range(1, 101), 10)
        evaluator = ProteinEvaluation(doi="test", fn=len(gold_standard))
        evaluator.evaluate(results, gold_standard, 0)
        return evaluator