from flair.data import TaggedCorpus from flair.data_fetcher import NLPTaskDataFetcher, NLPTask from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, MemoryEmbeddings, CharacterEmbeddings from typing import List import torch # 1. get the corpus columns = {0: 'text', 1: 'ner'} corpus: TaggedCorpus = NLPTaskDataFetcher.fetch_column_corpus("data1", columns, train_file="train.txt", test_file="test.txt", dev_file="dev.txt") print(corpus) # 2. what tag do we want to predict? tag_type = 'ner' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # 4. initialize embeddings embedding_types: List[TokenEmbeddings] = [ CharacterEmbeddings(path_to_char_dict="characters_merged.bin"), WordEmbeddings("tmp/glove.1.8G.bin") ] embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types) # 5. initialize sequence tagger