Python Annotations.get_entity_annotations 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: medacy.tools

클래스/타입: Annotations

메소드/함수: get_entity_annotations

hotexamples.com에서의 예제들: 13

Python Annotations.get_entity_annotations - 13개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 medacy.tools.Annotations.get_entity_annotations에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Annotations(30)

get_entity_annotations(13)

compare_by_index(4)

diff(4)

add_entity(3)

compute_counts(2)

difference(2)

compute_confusion_matrix(2)

compute_ambiguity(2)

compare_by_index_stats(2)

to_html(2)

compare_by_entity(1)

get_entity_count(1)

get_labels(1)

get_spacy_entities(1)

intersection(1)

stats(1)

to_ann(1)

예제 #1

파일 보기

 def test_ann_conversions(self):
     """Tests converting and un-converting a valid Annotations object to an ANN file."""
     annotations = Annotations(self.ann_file_path_one, annotation_type='ann')
     annotations.to_ann(write_location=join(self.test_dir,"intermediary.ann"))
     annotations2 = Annotations(join(self.test_dir, "intermediary.ann"), annotation_type='ann')
     self.assertEqual(annotations.get_entity_annotations(return_dictionary=True),
                      annotations2.get_entity_annotations(return_dictionary=True)
                      )

예제 #2

파일 보기

파일: test_annotation.py 프로젝트: yushu-liu/medaCy

 def test_intersection(self):
     annotations1 = Annotations(join(self.dataset.get_data_directory(),
                                     self.ann_files[0]),
                                annotation_type='ann')
     annotations2 = Annotations(join(self.dataset.get_data_directory(),
                                     self.ann_files[1]),
                                annotation_type='ann')
     annotations1.add_entity(*annotations2.get_entity_annotations()[0])
     annotations1.add_entity(*annotations2.get_entity_annotations()[1])
     self.assertEqual(
         annotations1.intersection(annotations2),
         set([
             annotations2.get_entity_annotations()[0],
             annotations2.get_entity_annotations()[1]
         ]))

예제 #3

파일 보기

 def test_get_entity_annotations_dict(self):
     """
     Tests the validity of the annotation dict
     :return:
     """
     annotations = Annotations(join(self.dataset.get_data_directory(), self.ann_files[0]), annotation_type='ann')
     self.assertIsInstance(annotations.get_entity_annotations(return_dictionary=True), dict)

예제 #4

파일 보기

 def test_init_from_ann_file(self):
     """
     Tests initialization from valid ann file
     :return:
     """
     annotations = Annotations(join(self.dataset.get_data_directory(), self.ann_files[0]), annotation_type='ann')
     self.assertIsNotNone(annotations.get_entity_annotations())

예제 #5

파일 보기

    def test_confusion_matrix(self):
        annotations1 = Annotations(join(self.dataset.get_data_directory(), self.ann_files[0]), annotation_type='ann')
        annotations2 = Annotations(join(self.dataset.get_data_directory(), self.ann_files[1]), annotation_type='ann')
        annotations1.add_entity(*annotations2.get_entity_annotations()[0])

        self.assertEqual(len(annotations1.compute_confusion_matrix(annotations2, self.entities)[0]), len(self.entities))
        self.assertEqual(len(annotations1.compute_confusion_matrix(annotations2, self.entities)), len(self.entities))

예제 #6

파일 보기

 def test_get_entity_annotations_list(self):
     """
     Tests the validity of annotation list
     :return:
     """
     annotations = Annotations(join(self.dataset.get_data_directory(), self.ann_files[0]), annotation_type='ann')
     self.assertIsInstance(annotations.get_entity_annotations(), list)

예제 #7

파일 보기

    def test_good_con_data(self):
        """Tests to see if valid con data can be used to instantiate an Annotations object."""
        with open(join(self.test_dir, "test_con.con"), 'w+') as c,\
                open(join(self.test_dir, "test_con_text.txt"), 'w+') as t:
            c.write(con_text)
            t.write(con_source_text)

            annotations = Annotations(c.name, annotation_type='con', source_text_path=t.name)
            self.assertIsInstance(annotations.get_entity_annotations(), list)

예제 #8

파일 보기

파일: test_annotation.py 프로젝트: yushu-liu/medaCy

 def test_compute_ambiguity(self):
     annotations1 = Annotations(join(self.dataset.get_data_directory(),
                                     self.ann_files[0]),
                                annotation_type='ann')
     annotations2 = Annotations(join(self.dataset.get_data_directory(),
                                     self.ann_files[0]),
                                annotation_type='ann')
     label, start, end, text = annotations2.get_entity_annotations()[0]
     annotations2.add_entity('incorrect_label', start, end, text)
     self.assertEqual(len(annotations1.compute_ambiguity(annotations2)), 1)

예제 #9

파일 보기

파일: dataset.py 프로젝트: yushu-liu/medaCy

    def get_training_data(self, data_format='spacy'):
        """
        Get training data in a specified format.

        :param data_format: The specified format as a string.

        :return: The requested data in the requested format.
        """
        # Only spaCy format is currently supported.
        if data_format != 'spacy':
            raise TypeError("Format %s not supported" % format)

        training_data = []

        # Add each entry in dataset with annotation to train_data
        for data_file in self.all_data_files:
            txt_path = data_file.get_text_path()
            ann_path = data_file.get_annotation_path()
            annotations = Annotations(ann_path, source_text_path=txt_path)
            training_data.append(
                annotations.get_entity_annotations(format='spacy'))

        return training_data

예제 #10

파일 보기

파일: gold_annotator_component.py 프로젝트: yushu-liu/medaCy

    def __call__(self, doc):
        nlp = self.nlp

        if hasattr(doc._, 'file_name'):
            logging.debug("%s: Called GoldAnnotator Component",
                          doc._.file_name)

        if logging.getLogger().getEffectiveLevel(
        ) == logging.DEBUG:  #print document tokenization
            for token in doc:
                logging.debug(str(token))

        #check if gold annotation file path has been set.
        if not hasattr(doc._, 'gold_annotation_file'):
            raise ValueError(
                "No extension doc._.gold_annotation_file is present.")

        gold_annotations = Annotations(doc._.gold_annotation_file,
                                       annotation_type='ann')

        # for label in set([label for _,_,label in [gold['entities'][key] for key in gold['entities']]]):

        # for token in doc:
        #     print(token.text, token.idx)
        for e_label, e_start, e_end, _ in gold_annotations.get_entity_annotations(
        ):
            #print(e_label, e_start, e_end)
            if e_start > e_end:
                logging.critical(
                    "%s: Broken annotation - start is greater than end: (%i,%i,%s)",
                    doc._.file_name, e_start, e_end, e_label)
                continue
            span = doc.char_span(e_start, e_end)

            if span is None:
                self.failed_overlay_count += 1
                self.failed_identifying_span_count += 1
                logging.warning(
                    "%s: Number of failed annotation overlays with current tokenizer: %i (%i,%i,%s)",
                    doc._.file_name, self.failed_overlay_count, e_start, e_end,
                    e_label)
            fixed_span = self.find_span(e_start, e_end, e_label, span, doc)
            if fixed_span is not None:
                if span is None:
                    logging.warning("%s: Fixed span (%i,%i,%s) into: %s",
                                    doc._.file_name, e_start, e_end, e_label,
                                    fixed_span.text)
                    self.failed_identifying_span_count -= 1
                for token in fixed_span:
                    if e_label in self.labels or not self.labels:
                        token._.set('gold_label', e_label)

            else:  #annotation was not able to be fixed, it will be ignored - this is bad in evaluation.
                logging.warning("%s: Could not fix annotation: (%i,%i,%s)",
                                doc._.file_name, e_start, e_end, e_label)
                logging.warning("%s: Total Failed Annotations: %i",
                                doc._.file_name,
                                self.failed_identifying_span_count)

        if self.failed_overlay_count > .3 * len(
                gold_annotations.get_entity_annotations()):
            logging.warning(
                "%s: Annotations may mis-aligned as more than 30 percent failed to overlay: %s",
                doc._.file_name, doc._.gold_annotation_file)

        return doc

예제 #11

파일 보기

 def test_init_from_ann_file(self):
     """Tests initialization from valid ann file"""
     annotations = Annotations(self.ann_file_path_one, annotation_type='ann')
     self.assertIsNotNone(annotations.get_entity_annotations())

예제 #12

파일 보기

 def test_get_entity_annotations_list(self):
     """Tests the validity of annotation list"""
     annotations = Annotations(self.ann_file_path_one, annotation_type='ann')
     self.assertIsInstance(annotations.get_entity_annotations(), list)

예제 #13

파일 보기

 def test_get_entity_annotations_dict(self):
     """Tests the validity of the annotation dict."""
     annotations = Annotations(self.ann_file_path_one, annotation_type='ann')
     self.assertIsInstance(annotations.get_entity_annotations(return_dictionary=True), dict)