Exemple #1
0
from flair.data import TaggedCorpus
from flair.data_fetcher import NLPTaskDataFetcher, NLPTask
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, MemoryEmbeddings, CharacterEmbeddings
from typing import List
import torch

# 1. get the corpus
columns = {0: 'text', 1: 'ner'}
corpus: TaggedCorpus = NLPTaskDataFetcher.fetch_column_corpus("data1", columns,
                                                              train_file="train.txt",
                                                              test_file="test.txt",
                                                              dev_file="dev.txt")
print(corpus)

# 2. what tag do we want to predict?
tag_type = 'ner'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# 4. initialize embeddings
embedding_types: List[TokenEmbeddings] = [
    CharacterEmbeddings(path_to_char_dict="characters_merged.bin"),
    WordEmbeddings("tmp/glove.1.8G.bin")

]

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# 5. initialize sequence tagger