Example #1
0
    def test_exception_on_strictness(self):
        evaluator = MentionLevelEvaluator()  # this is fine
        evaluation = (evaluator.evaluate(self.dataset1))(
            MentionLevelEvaluator.TOTAL_LABEL)  # this is fine

        self.assertRaises(ValueError, evaluation.compute,
                          'strictness not valid')
Example #2
0
    def test_exact_strictness(self):
        evaluator = MentionLevelEvaluator()
        evaluation = (evaluator.evaluate(self.dataset1))(MentionLevelEvaluator.TOTAL_LABEL)

        self.assertEqual(evaluation.tp, 3)  # the 3 exact matches
        self.assertEqual(evaluation.fp, 4)  # the 3 overlapping + 1 spurious
        self.assertEqual(evaluation.fn, 5)  # the 3 overlapping + 2 missing

        ret = evaluation.compute('exact')

        self.assertEqual(ret.precision, 3 / 7)
        self.assertEqual(ret.recall, 3 / 8)
        self.assertEqual(ret.f_measure, 2 * (3 / 7 * 3 / 8) / (3 / 7 + 3 / 8))
Example #3
0
    def test_half_overlapping_strictness(self):
        evaluator = MentionLevelEvaluator()
        evaluation = (evaluator.evaluate(self.dataset1))(MentionLevelEvaluator.TOTAL_LABEL)

        self.assertEqual(evaluation.tp, 3)  # the 3 exact matches
        self.assertEqual(evaluation.fp - evaluation.fp_ov, 1)  # the 1 spurious
        self.assertEqual(evaluation.fn - evaluation.fn_ov, 2)  # the 2 missing
        self.assertEqual(evaluation.fp_ov, 3)  # the 3 overlapping
        self.assertEqual(evaluation.fn_ov, 3)  # the 3 overlapping

        ret = evaluation.compute('half_overlapping')

        self.assertEqual(ret.precision, (3 + 6 / 2) / 10)
        self.assertEqual(ret.recall, (3 + 6 / 2) / 11)
        self.assertEqual(ret.f_measure, 2 * ((3 + 6 / 2) / 10 * (3 + 6 / 2) / 11) / ((3 + 6 / 2) / 10 + (3 + 6 / 2) / 11))
Example #4
0
    def __is_acceptable(self, doc_id, doc, annotators):
        if len(annotators) == 1:
            return True

        from itertools import combinations
        from nalaf.structures.data import Dataset
        from nalaf.learning.evaluators import MentionLevelEvaluator
        import math

        agreement = []
        for first, second in combinations(annotators, 2):
            data = Dataset()
            data.documents[doc_id] = doc

            AnnJsonAnnotationReader(first).annotate(data)
            AnnJsonAnnotationReader(second, is_predicted=True).annotate(data)
            results = MentionLevelEvaluator().evaluate(data)
            if not math.isnan(results[-1]):
                agreement.append(results[-1])

        # clean the doc from any annotations we added to calculate agreement
        for part in doc.parts.values():
            part.annotations = []
            part.predicted_annotations = []

        return agreement and sum(agreement)/len(agreement) >= self.iaa_threshold
Example #5
0
def benchmark_nala(member1, member2):
    itrs = []

    # Read the IAA iterations in blocks so that the plain documents are not deleted with the AnnJsonAnnotationReader's
    for itr in IterationRound.all():
        if itr.is_IAA():
            dataset = itr.read(read_annotations=False)
            AnnJsonAnnotationReader(
                os.path.join(itr.path, "reviewed", member1),
                read_only_class_id=MUT_CLASS_ID,
                delete_incomplete_docs=False).annotate(dataset)
            AnnJsonAnnotationReader(os.path.join(itr.path, "reviewed",
                                                 member2),
                                    read_only_class_id=MUT_CLASS_ID,
                                    delete_incomplete_docs=False,
                                    is_predicted=True).annotate(dataset)
            itrs.append(dataset)
            dataset = None

    # Then merge the IAA iterations
    all_itrs_dataset = Dataset()
    for itr_dataset in itrs:
        all_itrs_dataset.extend_dataset(itr_dataset)

    ExclusiveNLDefiner().define(all_itrs_dataset)

    return (all_itrs_dataset, MentionLevelEvaluator(
        subclass_analysis=True).evaluate(all_itrs_dataset))
Example #6
0
    def test_subclass_analysis(self):
        evaluator = MentionLevelEvaluator(subclass_analysis=True)
        evaluations = evaluator.evaluate(self.dataset1)

        self.assertEqual(evaluations(1).tp, 1)
        self.assertEqual(evaluations(2).tp, 2)

        self.assertEqual(evaluations(1).fp, 3)
        self.assertEqual(evaluations(2).fp, 1)

        self.assertEqual(evaluations(1).fn, 4)
        self.assertEqual(evaluations(2).fn, 1)

        self.assertEqual(evaluations(1).fp_ov, 2)
        self.assertEqual(evaluations(1).fn_ov, 2)
        self.assertEqual(evaluations(2).fp_ov, 1)
        self.assertEqual(evaluations(2).fn_ov, 1)
Example #7
0
def evaluate():
    from nalaf.utils.annotation_readers import AnnJsonAnnotationReader
    size_before = len(data)
    AnnJsonAnnotationReader(os.path.join(folder_name, "annjson"),
                            is_predicted=True,
                            delete_incomplete_docs=False).annotate(data)
    assert (size_before == len(data))

    ExclusiveNLDefiner().define(data)
    e = MentionLevelEvaluator(subclass_analysis=True).evaluate(data)
    print(e)
Example #8
0
    def test(tagger, test_set, print_eval=True, print_results=False):
        tagger.tag(test_set)
        definer.define(test_set)
        stats(test_set, "test")
        evaluation = MentionLevelEvaluator(
            subclass_analysis=True).evaluate(test_set)

        print_run_args()

        if print_eval:
            print(evaluation)
        if print_results:
            ConsoleWriter(ent1_class_id=PRO_CLASS_ID,
                          ent2_class_id=MUT_CLASS_ID,
                          color=True).write(test_set)
Example #9
0
def calculate_standard_error(data):
    evaluator = MentionLevelEvaluator('overlapping', subclass_analysis=True)
    keys = data.documents.keys()
    subclasses = set(ann.subclass for ann in chain(data.annotations(), data.predicted_annotations()))

    sample_precision = {subclass: [] for subclass in subclasses}
    sample_recall = {subclass: [] for subclass in subclasses}
    sample_f_score = {subclass: [] for subclass in subclasses}

    for i in range(1000):
        sample = Dataset()
        random_keys = random.sample(keys, round(len(keys) * 0.15))
        sample.documents = {key: data.documents[key] for key in random_keys}

        subclass_measures, measures = evaluator.evaluate(sample)

        for subclass in subclass_measures.keys():
            subclass_results = subclass_measures[subclass]
            sample_precision[subclass].append(subclass_results[-3])
            sample_recall[subclass].append(subclass_results[-2])
            sample_f_score[subclass].append(subclass_results[-1])

    subclass_measures, measures = evaluator.evaluate(data)
    for subclass in subclass_measures.keys():
        subclass_results = subclass_measures[subclass]
        mean_precision = subclass_results[-3]
        mean_recall = subclass_results[-2]
        mean_f_score = subclass_results[-1]

        p = calc_std(mean_precision, sample_precision[subclass])
        r = calc_std(mean_recall, sample_recall[subclass])
        f = calc_std(mean_f_score, sample_f_score[subclass])

        print('subclass:{} {:.4f}+-{:.4f} {:.4f}+-{:.4f} {:.4f}+-{:.4f}'.format(
            subclass, mean_precision, p, mean_recall, r, mean_f_score, f
        ))
Example #10
0
def benchmark_IDP4(member1, member2):
    itr = IterationRound(0)
    IDP4_corpus = itr.read(read_annotations=False)

    IAA_IDP4_corpus = Dataset()
    for docid, document in IDP4_corpus.documents.items():
        if docid in IDP4_IAA_docs:
            IAA_IDP4_corpus.documents[docid] = document

    AnnJsonAnnotationReader(
        os.path.join(itr.path, "base", "annjson", "members", member1),
        read_only_class_id=MUT_CLASS_ID,
        delete_incomplete_docs=True).annotate(IAA_IDP4_corpus)
    AnnJsonAnnotationReader(os.path.join(itr.path, "base", "annjson",
                                         "members", member2),
                            read_only_class_id=MUT_CLASS_ID,
                            delete_incomplete_docs=True,
                            is_predicted=True).annotate(IAA_IDP4_corpus)

    ExclusiveNLDefiner().define(IAA_IDP4_corpus)

    return (IAA_IDP4_corpus, MentionLevelEvaluator(
        subclass_analysis=True).evaluate(IAA_IDP4_corpus))
Example #11
0
 def test_implements_evaluator_interface(self):
     self.assertIsInstance(MentionLevelEvaluator(), Evaluator)
Example #12
0
except:
    corpus = StringReader(corpusName).read()
    folderName = None  # just print out in standard output

# ------------------------------------------------------------------------------

# Example calls:
# python scripts/SETH.py SETH nala_test resources/predictions/  # predict
# python scripts/SETH.py check_performance nala_test resources/predictions/SETH/nala_test &> resources/predictions/SETH/nala_test/oresults.tsv  # evaluate

if (methodName == 'check_performance'):
    # folderName is assumed to be the final/leaf predictions folder, e.g., `resources/predictions/SETH/nala_test`
    BRATPartsAnnotationReader(folderName, is_predicted=True).annotate(corpus)
    ExclusiveNLDefiner().define(corpus)
    evaluation = MentionLevelEvaluator(subclass_analysis=True).evaluate(corpus)
    print(evaluation)

else:
    if folderName:
        # folderName is assumed to be the root predictions folder, e.g., `resources/predictions/`
        folderName = os.path.join(folderName, methodName, corpusName)
        if not os.path.exists(folderName):
            os.makedirs(folderName)

    useMutationFinderOnly = "true" if methodName == "MFmodified" else "false"

    run_set_server(useMutationFinderOnly)

    run_seth_on_corpus(corpus, folderName, useMutationFinderOnly)
Example #13
0
def _get_entity_evaluator(evaluation_level):
    """
    Returns EntityEvaluator object based on specified evaluation_level
    """

    if evaluation_level == 1:
        ENTITY_MAP_FUN = Entity.__repr__
        ENTITY_ACCEPT_FUN = str.__eq__

    elif evaluation_level == 2:
        ENTITY_MAP_FUN = EntityEvaluator.COMMON_ENTITY_MAP_FUNS[
            'entity_normalized_fun'](
                {
                    PRO_ID: UNIPROT_NORM_ID,
                    LOC_ID: GO_NORM_ID,
                    ORG_ID: TAXONOMY_NORM_ID,
                },
                penalize_unknown_normalizations="agnostic",
                add_entity_text=True,
            )
        ENTITY_ACCEPT_FUN = EntityEvaluator.COMMON_ENTITY_ACCEPT_FUNS['exact']

    elif evaluation_level == 3:
        ENTITY_MAP_FUN = EntityEvaluator.COMMON_ENTITY_MAP_FUNS[
            'entity_normalized_fun'](
                {
                    PRO_ID: UNIPROT_NORM_ID,
                    LOC_ID: GO_NORM_ID,
                    ORG_ID: TAXONOMY_NORM_ID,
                },
                penalize_unknown_normalizations="agnostic",
                add_entity_text=True,
            )
        ENTITY_ACCEPT_FUN = EntityEvaluator.COMMON_ENTITY_ACCEPT_FUNS[
            'overlapping']

    elif evaluation_level == 4:
        ENTITY_MAP_FUN = EntityEvaluator.COMMON_ENTITY_MAP_FUNS[
            'entity_normalized_fun'](
                {
                    PRO_ID: UNIPROT_NORM_ID,
                    LOC_ID: GO_NORM_ID,
                    ORG_ID: TAXONOMY_NORM_ID,
                },
                penalize_unknown_normalizations="softest",
                add_entity_text=False,
            )
        ENTITY_ACCEPT_FUN = accept_entity_uniprot_go_taxonomy

    elif evaluation_level == 5:
        ENTITY_MAP_FUN = EntityEvaluator.COMMON_ENTITY_MAP_FUNS[
            'entity_normalized_fun'](
                {
                    PRO_ID: UNIPROT_NORM_ID,
                    LOC_ID: GO_NORM_ID,
                    ORG_ID: TAXONOMY_NORM_ID,
                },
                penalize_unknown_normalizations="softest",
                add_entity_text=False,
            )

        def accept_checking_sequence_identity(gold, pred):
            return accept_entity_uniprot_go_taxonomy(gold,
                                                     pred,
                                                     min_seq_identity=90)

        ENTITY_ACCEPT_FUN = accept_checking_sequence_identity

    else:
        raise AssertionError(evaluation_level)

    entity_evaluator = EntityEvaluator(entity_map_fun=ENTITY_MAP_FUN,
                                       entity_accept_fun=ENTITY_ACCEPT_FUN)

    mention_evaluator = MentionLevelEvaluator(subclass_analysis=True)

    return (mention_evaluator, entity_evaluator)