Python Annotation.extract_annotated_spansの例

プログラミング言語: Python

名前空間/パッケージ名: annotation

クラス/型: Annotation

メソッド/関数: extract_annotated_spans

hotexamples.comのコード掲載数: 7

Python Annotation.extract_annotated_spans - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのannotation.Annotation.extract_annotated_spansの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Annotation(30)

extract_annotated_spans(7)

fromJsonFile(7)

load(4)

sortAnns(3)

__init__(3)

write(3)

add(3)

isSame(2)

labels(2)

draw(2)

from_json(2)

init_size(2)

save_annotated_spans(1)

is_top_entity(1)

add_dataset_ID(1)

parse_json(1)

parse_rdf(1)

populate(1)

save(1)

bbox(1)

serialize(1)

setNCBIID(1)

toJsonFile(1)

to_dict(1)

to_json(1)

top_entity_score(1)

tube(1)

tube_end(1)

tube_start(1)

set_bbox(1)

image_size(1)

interpolate(1)

generate_tiles(1)

class_id(1)

copy(1)

del_later_bboxes(1)

del_tube(1)

deleteLater(1)

exec_(1)

aggregate(1)

add_tube(1)

getClass(1)

boundaries(1)

getLinearObjects(1)

getNCBIID(1)

getSubjectList(1)

get_avg_time_per_click(1)

get_bbox(1)

get_bboxes(1)

コード例 #1

ファイルを表示

ファイル: evaluator.py プロジェクト: michaelmilleryoder/fanfiction-nlp-evaluation

    def evaluate_quotes(self, fandom_fname, fic_representation, save=True, exact_match=True):
        """ Evaluate quotes for a fic.
            Args:
                save: save AnnotatedSpan quote objects in a pickled file in a tmp directory
        """
        # Quote extraction evaluation
        # Load gold quote spans
        gold = Annotation(self.quote_settings.gold_dirpath, fandom_fname, file_ext=self.quote_settings.gold_ext, fic_csv_dirpath=self.fic_csv_dirpath)
        gold.extract_annotated_spans()

        # Load predicted quote spans
        fic_representation.extract_quotes(
            save_dirpath=self.quote_settings.preds_outpath, 
            coref_from=self.coref_from)

        # Get scores
        quote_scores, quote_groups = scorer.quote_scores(fic_representation.quotes, gold.annotations, exact_match=exact_match)
        print('\tQuote extraction results:')
        for key in ['extraction_f1', 'extraction_precision', 'extraction_recall']:
            print(f'\t\t{key}: {quote_scores[key]: .2%}')
        print('\tQuote attribution results:')
        for key in ['attribution_f1', 'attribution_precision', 'attribution_recall']:
            print(f'\t\t{key}: {quote_scores[key]: .2%}')
        print()
        return quote_scores, quote_groups

コード例 #2

ファイルを表示

    def modify_coref_files(self,
                           coref_annotations_dirpath,
                           coref_annotations_ext,
                           annotation_type='gold'):
        """ Changes coref tokens to specified external annotations in 
            self.token_data.
            Saves out to {token_output_dirpath}_gold_coref/token_fpath.
            Returns the suffix added to dirpaths.
        """
        # Load externally annotated mentions, place in self.character_mentions
        annotation = Annotation(coref_annotations_dirpath,
                                self.fandom_fname,
                                file_ext=coref_annotations_ext,
                                fic_csv_dirpath=self.fic_csv_dirpath,
                                annotation_type=annotation_type)
        annotation.extract_annotated_spans()
        self.character_mentions = annotation.annotations

        # Modify coref <tags> in CSV
        self.modify_coref_tags(
            annotation.annotations)  # Modifies self.coref_fic

        # Save out
        modify_text = f'_{annotation_type}_coref'
        self.coref_output_dirpath = self.coref_output_dirpath.rstrip(
            '/') + modify_text
        self.save_coref_csv()

        # Modify coref characters file
        self.coref_chars_output_dirpath = self.coref_chars_output_dirpath.rstrip(
            '/') + modify_text
        self.save_characters_file()

        return modify_text

コード例 #3

ファイルを表示

    def modify_quote_spans(self, quote_annotations_dirpath,
                           quote_annotations_ext):
        """ Modifies quote marks so that the pipeline will recognized
            gold quotes as quote spans """
        # Load gold quote extractions
        gold = Annotation(quote_annotations_dirpath,
                          self.fandom_fname,
                          file_ext=quote_annotations_ext,
                          fic_csv_dirpath=self.fic_csv_dirpath)
        gold.extract_annotated_spans()

        # Modify CSV text_tokenized
        self.modify_quote_marks(gold.annotations)  # Modifies self.coref_fic

        # Save out
        modify_text = '_gold_quotes'
        self.coref_output_dirpath = self.coref_output_dirpath.rstrip(
            '/') + modify_text
        self.save_coref_csv()

        # Change characters file path, too
        self.coref_chars_output_dirpath = self.coref_chars_output_dirpath.rstrip(
            '/') + modify_text

        return modify_text

コード例 #4

ファイルを表示

ファイル: booknlp_output.py プロジェクト: michaelmilleryoder/fanfiction-nlp-evaluation

    def modify_quote_tokens(self,
                            original_tokenization_dirpath=None,
                            quote_annotations_dirpath=None,
                            quote_annotations_ext=None,
                            change_to='gold'):
        """ Changes quote tokens so BookNLP will recognize them in certain ways.
            Args:
                change_to:
                    'gold': Change to gold quote extractions
                    'match': Replace quotes with smart quotes to match a tokens file done without whitespace tokenization
                    'strict': Change existing BookNLP quotes using a dictionary. Single quotes to ` and ', double quotes to `` and ''
        """
        if change_to == 'gold':
            # Load gold quote extractions
            gold = Annotation(quote_annotations_dirpath,
                              self.fandom_fname,
                              file_ext=quote_annotations_ext,
                              fic_csv_dirpath=self.fic_csv_dirpath)
            gold.extract_annotated_spans()

            # Clear existing quotes, since might have been modified after whitespace tokenization
            self.clear_quotes()

            # Add gold quote spans in
            for span in gold.annotations:
                self.add_quote_span(span)

            # Change output dirpath for later saving (after replace gold coref)
            self.modified_token_output_dirpath = self.modified_token_output_dirpath.rstrip(
                '/') + '_gold_quotes'

        elif change_to == 'match':
            original_tokens = load_tokens_file(
                os.path.join(self.original_tokenization_dirpath,
                             self.fandom_fname + self.token_file_ext))
            self.token_data = match_quotes(original_tokens, self.token_data)

            # Save out
            save_tokens_file(self.token_data, self.modified_token_fpath)

        elif change_to == 'strict':
            quote_changes = {
                "“": "``",
                "”": "''",
            }
            self.token_data['normalizedWord'] = self.token_data[
                'normalizedWord'].map(lambda x: quote_changes.get(x, x))
            self.token_data['lemma'] = self.token_data['lemma'].map(
                lambda x: quote_changes.get(x, x))

            # Save out
            pdb.set_trace()
            self.token_data.to_csv(self.modified_token_fpath,
                                   sep='\t',
                                   quoting=csv.QUOTE_NONE,
                                   index=False)

コード例 #5

ファイルを表示

ファイル: compare_models.py プロジェクト: michaelmilleryoder/fanfiction-nlp-evaluation

    def load_fic_spans(self, fandom_fname, gold_dirpath, baseline_dirpath,
                       experimental_dirpath, gold_annotations_ext):
        """ Load quote or coref predictions and gold spans for a fic.
            Returns gold_spans, baseline_spans, experimental_spans
        """
        gold_annotation = Annotation(gold_dirpath,
                                     fandom_fname,
                                     file_ext=gold_annotations_ext,
                                     fic_csv_dirpath=self.fic_csv_dirpath)
        gold_annotation.extract_annotated_spans()
        gold_spans = gold_annotation.annotations
        baseline_spans = utils.load_pickle(baseline_dirpath, fandom_fname)
        experimental_spans = utils.load_pickle(experimental_dirpath,
                                               fandom_fname)

        return gold_spans, baseline_spans, experimental_spans

コード例 #6

ファイルを表示

ファイル: booknlp_output.py プロジェクト: michaelmilleryoder/fanfiction-nlp-evaluation

    def modify_coref_tokens(self, coref_annotations_dirpath,
                            coref_annotations_ext):
        """ Changes coref tokens to gold annotations in self.token_data.
            Saves out to {token_output_dirpath}_gold_coref/token_fpath
        """
        # Load gold mentions
        gold = Annotation(coref_annotations_dirpath,
                          self.fandom_fname,
                          file_ext=coref_annotations_ext,
                          fic_csv_dirpath=self.fic_csv_dirpath)
        gold.extract_annotated_spans()

        # Build character name to id dictionary for gold characters (arbitrary)
        self.char_name2id = defaultdict(lambda: len(self.char_name2id))
        #self.char_name2id = {charname: len(self.char_name2id) for charname in sorted(gold.annotations_set)}

        # Clear existing character coref annotations
        self.token_data['characterId'] = -1

        # Modify original tokens file
        for span in gold.annotations:
            self.modify_coref_span(span)

        # Renumber BookNLP's own token IDs for re-running on modified output
        self.renumber_token_ids()

        # Save out
        self.modified_token_output_dirpath = self.modified_token_output_dirpath.rstrip(
            '/') + '_gold_coref'
        if not os.path.exists(self.modified_token_output_dirpath):
            os.mkdir(self.modified_token_output_dirpath)
        self.modified_token_fpath = os.path.join(
            self.modified_token_output_dirpath,
            f'{self.fandom_fname}{self.token_file_ext}')
        self.token_data.to_csv(self.modified_token_fpath,
                               sep='\t',
                               quoting=csv.QUOTE_NONE,
                               index=False)

コード例 #7

ファイルを表示

ファイル: evaluator.py プロジェクト: michaelmilleryoder/fanfiction-nlp-evaluation

    def evaluate_coref(self, fandom_fname, fic_representation, save=True):
        """ Evaluate coref for a fic.
            Args:
                save: save AnnotatedSpan objects in a pickled file in a tmp directory
        """
        # Load gold mentions
        gold = Annotation(self.coref_settings.gold_dirpath, fandom_fname, 
            file_ext=self.coref_settings.gold_ext, 
            fic_csv_dirpath=self.fic_csv_dirpath)
        gold.extract_annotated_spans()

        # Load predicted mentions
        fic_representation.extract_character_mentions(
            save_dirpath=self.coref_settings.preds_outpath)

        # Get scores
        coref_scores = scorer.coref_scores(
            fic_representation.character_mentions, gold.annotations, exact_match=True)
        print('\tCoref results:')
        for key in ['lea_f1', 'lea_precision', 'lea_recall']:
            print(f'\t\t{key}: {coref_scores[key]: .2%}')
        print()
        return coref_scores