def build_and_train_conll03en_flair_sequence_tagger(corpus,tag_type,tag_dictionary): ''' do not change! same configuration as described in file: "flair/resources/docs/EXPERIMENTS.md" section: "CoNLL-03 Named Entity Recognition (English)" ''' embeddings: StackedEmbeddings = StackedEmbeddings( embeddings=[ WordEmbeddings("glove"), PooledFlairEmbeddings("news-forward", pooling="min"), PooledFlairEmbeddings("news-backward", pooling="min"), ] ) from flair.models import SequenceTagger tagger: SequenceTagger = SequenceTagger( hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type, ) from flair.trainers import ModelTrainer corpus = Corpus(train=corpus.train, dev=corpus.dev,test=[]) trainer: ModelTrainer = ModelTrainer(tagger, corpus) # trainer.train("resources/taggers/example-ner", train_with_dev=True, max_epochs=150) # original trainer.train("flair_checkpoints", train_with_dev=False, max_epochs=40,save_final_model=False) # original return tagger
def main(base_path, output_dir, nb_epochs): # parser = argparse.ArgumentParser() # parser.add_argument("--data_dir", default='./', type=str, required=True, help="The parent dir of inpu data, must contain folder name `conll_03`") # parser.add_argument("--output_dir", default=None, required=True, help="The output directory where is going to store the trained model") # parser.add_argument("--train_epochs", default=3, type=int, required=True, help="Number of epochs to train") # args = parser.parse_args() # base_path = args.data_dir corpus: Corpus = CONLL_03(base_path=base_path) tag_type = 'ner' tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) embedding_types: List[TokenEmbeddings] = [ WordEmbeddings('glove'), PooledFlairEmbeddings('news-forward', pooling='min'), PooledFlairEmbeddings('news-backward', pooling='min'), ] embeddings: StackedEmbeddings = StackedEmbeddings( embeddings=embedding_types) tagger: SequenceTagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type) trainer: ModelTrainer = ModelTrainer(tagger, corpus) # output_dir = args.output_dir # nb_epochs = args.train_epochs # output_dir = # nb_epochs = 10 trainer.train(output_dir, train_with_dev=False, max_epochs=nb_epochs) # 150
def load_flair(mode = 'flair'): if mode == 'flair': stacked_embeddings = StackedEmbeddings([ WordEmbeddings('glove'), PooledFlairEmbeddings('news-forward', pooling='min'), PooledFlairEmbeddings('news-backward', pooling='min') ]) else:##bert stacked_embeddings = BertEmbeddings('bert-base-uncased') ##concat last 4 layers give the best return stacked_embeddings
def __init__(self, hidden_dim: int, rnn_type: str, vocab_size: int, tagset_size: int, task_type: str): super(TaskLearner, self).__init__() self.task_type = task_type self.rnn_type = rnn_type self.bidirectional = True self.num_layers = 2 self.num_directions = 2 if self.bidirectional else 1 # Word Embeddings (TODO: Implement pre-trained word embeddings) # self.word_embeddings = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim) # TODO: Implement padding_idx=self.pad_idx embedding_types: List[TokenEmbeddings] = [ WordEmbeddings('glove'), PooledFlairEmbeddings('news-forward', pooling='min'), PooledFlairEmbeddings('news-backward', pooling='min') ] embeddings: StackedEmbeddings = StackedEmbeddings( embeddings=embedding_types) self.embeddings = embeddings self.embedding_dim: int = self.embeddings.embedding_length if self.rnn_type == 'gru': rnn = nn.GRU elif self.rnn_type == 'lstm': rnn = nn.LSTM elif self.rnn_type == 'rnn': rnn = nn.RNN else: raise ValueError # Sequence tagger self.rnn = rnn(input_size=self.embedding_dim, hidden_size=hidden_dim, num_layers=self.num_layers, dropout=0.0 if self.num_layers == 1 else 0.5, bidirectional=self.bidirectional, batch_first=True) if self.task_type == 'SEQ': # Linear layer that maps hidden state space from rnn to tag space self.hidden2tag = nn.Linear(in_features=hidden_dim * self.num_directions, out_features=tagset_size) if self.task_type == 'CLF': # COME BACK LATER... self.drop = nn.Dropout(p=0.5) self.hidden2tag = nn.Linear(in_features=hidden_dim * self.num_directions, out_features=1)
def train_flair(self): # Flair Model Initialisation and Training # # 1. get the corpus # corpus: Corpus = ColumnCorpus(os.path.join(os.getcwd(), 'results', '10'), # {0: 'text', 1: 'ner'}, # train_file='train.txt', # test_file='test.txt', # dev_file='valid.txt', # column_delimiter=' ') corpus: Corpus = CONLL_03( base_path=os.path.join(os.getcwd(), 'results', '10')) corpus.dev_file = 'valid.txt' # rather than 'dev.txt' # 2. what tag do we want to predict? tag_type = 'ner' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) # initialize embeddings embedding_types: List[TokenEmbeddings] = [ # GloVe embeddings WordEmbeddings('glove'), # contextual string embeddings, forward PooledFlairEmbeddings('news-forward', pooling='min'), # contextual string embeddings, backward PooledFlairEmbeddings('news-backward', pooling='min'), ] embeddings: StackedEmbeddings = StackedEmbeddings( embeddings=embedding_types) # initialize sequence tagger tagger: SequenceTagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type) # initialize trainer trainer: ModelTrainer = ModelTrainer(tagger, corpus) results = trainer.train(os.path.join(os.getcwd(), 'results', '10', 'tagger'), train_with_dev=False, max_epochs=50) print(results)
def post_init(self): import flair flair.device = self.device from flair.embeddings import WordEmbeddings, FlairEmbeddings, BytePairEmbeddings, PooledFlairEmbeddings, \ DocumentPoolEmbeddings embeddings_list = [] for e in self.embeddings: model_name, model_id = e.split(':', maxsplit=1) emb = None try: if model_name == 'flair': emb = FlairEmbeddings(model_id) elif model_name == 'pooledflair': emb = PooledFlairEmbeddings(model_id) elif model_name == 'word': emb = WordEmbeddings(model_id) elif model_name == 'byte-pair': emb = BytePairEmbeddings(model_id) except ValueError: self.logger.error(f'embedding not found: {e}') continue if emb is not None: embeddings_list.append(emb) if embeddings_list: self.model = DocumentPoolEmbeddings(embeddings_list, pooling=self.pooling_strategy) self.logger.info( f'flair encoder initialized with embeddings: {self.embeddings}' ) else: self.logger.error('flair encoder initialization failed.')
def post_init(self): from flair.embeddings import WordEmbeddings, FlairEmbeddings, BytePairEmbeddings, PooledFlairEmbeddings, \ DocumentPoolEmbeddings if self.model is not None: return embeddings_list = [] for e in self.embeddings: model_name, model_id = e.split(':', maxsplit=1) emb = None try: if model_name == 'flair': emb = FlairEmbeddings(model_id) elif model_name == 'pooledflair': emb = PooledFlairEmbeddings(model_id) elif model_name == 'word': emb = WordEmbeddings(model_id) elif model_name == 'byte-pair': emb = BytePairEmbeddings(model_id) except ValueError: self.logger.error('embedding not found: {}'.format(e)) continue if emb is not None: embeddings_list.append(emb) if embeddings_list: self.model = DocumentPoolEmbeddings(embeddings_list, pooling=self.pooling_strategy) self.logger.info( 'initialize flair encoder with embeddings: {}'.format( self.embeddings))
def _train( self, output_dir: Union[str, Path], corpus: Optional[ColumnCorpus] = None, tagger: Optional[SequenceTagger] = None, hidden_size: int = 256, learning_rate: float = 0.1, mini_batch_size: int = 32, max_epochs: int = 100, use_crf: bool = True, ) -> SequenceTagger: tag_dictionary = corpus.make_tag_dictionary(tag_type="ner") if not tagger: tagger = SequenceTagger( hidden_size=hidden_size, embeddings=PooledFlairEmbeddings("news-forward"), tag_dictionary=tag_dictionary, tag_type="ner", use_crf=use_crf, ) trainer = ModelTrainer(tagger, corpus) trainer.train( output_dir, learning_rate=learning_rate, mini_batch_size=mini_batch_size, max_epochs=max_epochs, ) model_path = Path(output_dir, "best-model.pt") return SequenceTagger.load(model_path)
def use_flair_to_extract_context_embeddings(file, dest_folder, embedding_type, embedding_size, pretrained_model=None): if embedding_type.lower() == 'elmo': context_embedding = ELMoEmbeddings(model='pubmed') elif embedding_type.lower() == 'elmo_transformer': context_embedding = ELMoTransformerEmbeddings() elif embedding_type.lower() == 'flair': context_embedding = PooledFlairEmbeddings() elif embedding_type.lower() == 'bioflair': flair_1 = PooledFlairEmbeddings('pubmed-forward') flair_2 = PooledFlairEmbeddings('pubmed-backward') elmo = ELMoEmbeddings(model='pubmed') #bert = BertEmbeddings(bert_model_or_path='bert-base-multilingual-cased', layers='-1') context_embedding = StackedEmbeddings(embeddings=[flair_1, flair_2, elmo]) elif embedding_type.lower() == 'biobert' or embedding_type.lower() == 'bert': context_embedding = BertEmbeddings(bert_model_or_path=pretrained_model, layers='-1') data = {} dest_name = os.path.basename(file).split('.') print(dest_folder) with open(file, 'r') as f, open('{}/{}.pickle'.format(dest_folder, dest_name[0]), 'wb') as d: sentence = '' instance = [] j = 0 for i in f.readlines(): if i != '\n': i = i.split() sentence += ' '+i[0] elif i == '\n': sent = Sentence(sentence.strip()) context_embedding.embed(sent) v = '' for i in sent: instance.append((i.text, i.embedding[:embedding_size])) sentence = '' if instance: data[j] = list(zip(*(instance.copy()))) j += 1 instance.clear() pickle.dump(data, d) f.close() d.close()
def get_embeddings(pooling_op='min'): return StackedEmbeddings(embeddings=[ # pre-trained embeddings PooledFlairEmbeddings( 'es-forward', pooling=pooling_op, ), PooledFlairEmbeddings( 'es-backward', pooling=pooling_op, ), BytePairEmbeddings( language='es', dim=300, ), # self-trained embeddings SpanishHealthCorpusEmbeddings('wang2vec'), # SpanishHealthCorpusEmbeddings('fastText'), ])
if args.task == 'ner': flair_corpus = NLPTask.CONLL_03 tag_type = 'ner' embedding_types = [WordEmbeddings('glove')] else: flair_corpus = NLPTask.CONLL_2000 tag_type = 'np' embedding_types = [WordEmbeddings('extvec')] _base_path = 'resources/taggers/{}-{}'.format(args.task, args.model_path) if args.use_flair_embeddings is True: embedding_types.extend([ # contextual string embeddings, forward PooledFlairEmbeddings('news-forward'), # contextual string embeddings, backward PooledFlairEmbeddings('news-backward') ]) if args.model_path != 'NA': embedding_types.append(EyeTrackingFeatureEmbedding(args.model_path)) corpus = NLPTaskDataFetcher.load_corpus(flair_corpus, base_path=TASK_DATASET_DIR) tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) embeddings = StackedEmbeddings(embeddings=embedding_types) tagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type)
search_space = SearchSpace() #Create or embedding stacks #Flair recommends adding GLoVe to their character-level embeddings flair_normal = StackedEmbeddings([ WordEmbeddings('glove'), FlairEmbeddings('mix-forward'), FlairEmbeddings('mix-backward') ]) bert = BertEmbeddings() elmo = ELMoEmbeddings('original') flair_pooled = StackedEmbeddings([ WordEmbeddings('glove'), PooledFlairEmbeddings('mix-forward'), PooledFlairEmbeddings('mix-backward') ]) search_space.add(Parameter.EMBEDDINGS, hp.choice, options=[bert, elmo, flair_normal, flair_pooled]) #other hyperparams are kept fixed for this excercise. #Add to the lists to add to grid #unfortunately for small grids, Flair picks random search instead of true #grid search search_space.add(Parameter.HIDDEN_SIZE, hp.choice, options=[384]) search_space.add(Parameter.RNN_LAYERS, hp.choice, options=[1]) search_space.add(Parameter.DROPOUT, hp.choice, options=[0.0])
# this is the folder in which train, test and dev files reside data_folder = 'data/ner/bc5dr' # init a corpus using column format, data folder and the names of the train, dev and test files corpus: Corpus = ColumnCorpus(data_folder, columns, train_file='train.txt', test_file='test.txt', dev_file='dev.txt') tag_type = 'ner' tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, PooledFlairEmbeddings, ELMoEmbeddings from typing import List embedding_types: List[TokenEmbeddings] = [ PooledFlairEmbeddings('pubmed-forward'), PooledFlairEmbeddings('pubmed-backward'), ELMoEmbeddings('pubmed'), ] embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types) from flair.models import SequenceTagger tagger: SequenceTagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type, use_crf=True) # initialize trainer from flair.trainers import ModelTrainer
def train(model, selected_embeddings): # 1. get the corpus if model == 'AMT': corpus = read_in_AMT() elif model == 'CADEC': corpus = read_in_CADEC() elif model == 'TwitterADR': corpus = read_in_TwitterADR() elif model == 'Micromed': corpus = read_in_Micromed() print(corpus) # 2. what tag do we want to predict? tag_type = 'ner' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) embedding_types: List[TokenEmbeddings] = [ ] if selected_embeddings['glove']: embedding_types.append(WordEmbeddings('glove')) if selected_embeddings['twitter']: embedding_types.append(WordEmbeddings('twitter')) if selected_embeddings['char']: embedding_types.append(CharacterEmbeddings()) # FlairEmbeddings if selected_embeddings['flair']: embedding_types.append(FlairEmbeddings('news-forward')) # sFlairEmbeddings if selected_embeddings['flair']: embedding_types.append(FlairEmbeddings('news-backward')) # PooledFlairEmbeddings if selected_embeddings['pooled-flair']: embedding_types.append(PooledFlairEmbeddings('news-forward', pooling='mean')) # PooledFlairEmbeddings if selected_embeddings['pooled-flair']: embedding_types.append(PooledFlairEmbeddings('news-backward', pooling='mean')) # init BERT if selected_embeddings['bert']: embedding_types.append(BertEmbeddings()) # init roberta if selected_embeddings['roberta']: embedding_types.append(RoBERTaEmbeddings()) # init BioBERT if selected_embeddings['biobert']: embedding_types.append(BertEmbeddings("data/embeddings/biobert-pubmed-pmc-cased")) # init clinical BERT if selected_embeddings['clinicalbiobert']: embedding_types.append(BertEmbeddings("data/embeddings/pretrained_bert_tf/biobert-base-clinical-cased")) # init multilingual ELMo if selected_embeddings['elmo']: embedding_types.append(ELMoEmbeddings()) embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types) tagger: SequenceTagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type, use_crf=True ) trainer: ModelTrainer = ModelTrainer(tagger, corpus) selected_embeddings_text = [key for key in selected_embeddings if selected_embeddings[key]] selected_embeddings_text = '_'.join(selected_embeddings_text) model_dir = 'resources/taggers/FA_' + model + selected_embeddings_text # 7. start training trainer.train(model_dir, train_with_dev=True, learning_rate=0.1, mini_batch_size=4, max_epochs=200, checkpoint=True) # 8. plot training curves (optional) from flair.visual.training_curves import Plotter plotter = Plotter() plotter.plot_training_curves(model_dir + '/loss.tsv') plotter.plot_weights(model_dir + '/weights.txt')
def read_dataset(self, file_dict, dataset_name, *args, **kwargs): """ :param file_dict: Will have just one key:value file_dict['base_path'] = <base_path> base_path will have the path to the directory that will have the structure : conll_03 directory conll_03/eng.testa conll_03/eng.testb conll_03/eng.train onto-ner directory onto-ner/eng.testa onto-ner/eng.testb onto-ner/eng.train :param dataset_name: Could be one of the constants from NLPTask class(only NLPTask.CONLL_03 and NLPTask.ONTONER are used) :param args: :param kwargs: :return: """ base_path = file_dict['base_path'] self.dataset = dataset_name # 1. get the corpus corpus: TaggedCorpus = NLPTaskDataFetcher.load_corpus(dataset_name, base_path) # 2. what tag do we want to predict? tag_type = 'ner' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) if dataset_name == NLPTask.CONLL_03: # initialize embeddings embedding_types: List[TokenEmbeddings] = [ # GloVe embeddings WordEmbeddings('glove'), # contextual string embeddings, forward PooledFlairEmbeddings('news-forward', pooling='min'), # contextual string embeddings, backward PooledFlairEmbeddings('news-backward', pooling='min'), ] elif dataset_name == NLPTask.ONTONER: # initialize embeddings embedding_types: List[TokenEmbeddings] = [ WordEmbeddings('crawl'), FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward'), ] embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types) # initialize sequence tagger from flair.models import SequenceTagger tagger: SequenceTagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type) self.corpus = corpus self.embeddings = embeddings self.tag_dictionary = tag_dictionary self.embedding_types = embedding_types self.tagger = tagger
# -*- coding: utf-8 -*- from flair.datasets import CONLL_03 from flair.embeddings import PooledFlairEmbeddings, StackedEmbeddings, WordEmbeddings from flair.models import SequenceTagger from flair.trainers import ModelTrainer corpus = CONLL_03(base_path="data/conll-2003") embedding_types = [ WordEmbeddings("glove"), PooledFlairEmbeddings("news-forward", pooling="min"), PooledFlairEmbeddings("news-backward", pooling="min"), ] embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types) tagger: SequenceTagger = SequenceTagger( hidden_size=256, embeddings=embeddings, tag_dictionary=corpus.make_tag_dictionary(tag_type="ner"), tag_type="ner", ) trainer: ModelTrainer = ModelTrainer(tagger, corpus) trainer.train("models/checkpoints", train_with_dev=True, max_epochs=150)
def use_flair_to_extract_context_embeddings(files, file_name, dest_folder, layer, embedding_type, embedding_size, pretrained_model=None): if embedding_type.lower() == 'elmo': context_embedding = ELMoEmbeddings(model='pubmed') elif embedding_type.lower() == 'elmo_transformer': context_embedding = ELMoTransformerEmbeddings() elif embedding_type.lower() == 'flair': context_embedding = PooledFlairEmbeddings() elif embedding_type.lower() == 'bioflair': flair_1 = PooledFlairEmbeddings('pubmed-forward') flair_2 = PooledFlairEmbeddings('pubmed-backward') elmo = ELMoEmbeddings(model='pubmed') context_embedding = StackedEmbeddings( embeddings=[flair_1, flair_2, elmo]) elif embedding_type.lower() == 'biobert' or embedding_type.lower( ) == 'bert': context_embedding = TransformerWordEmbeddings(pretrained_model, layers=layer) data = [] for i in files: open_f = open(i, 'r') data += open_f.readlines() open_f.close() with open('{}/{}1.pickle'.format(dest_folder, file_name), 'wb') as store, open( '{}/ebm_comet_multilabels_p1.txt'.format(dest_folder), 'w') as file: #sentence = '' sentence = [] multi_labels = '' instance = [] label_representations = {} #fetch outcome phrase vector representations grouped in their respective outcome domain labels if file_name.lower() == 'ebm-comet': label_representations = ebm_comet_preprocessing( data=data, context_embedding=context_embedding, sentence=[], label_representations={}, file=file) elif file_name.lower() == 'ebm-nlp': label_representations, domain_label_count = ebm_nlp_processing( data=data, context_embedding=context_embedding, sentence=[], label_representations={}) label_centroids = {} print(label_representations.keys()) print([i.shape for i in list(label_representations.values())]) print(domain_label_count) #find the centroid of each group of outcome phrases vectors to represent each label for lab in label_representations: label_centroids[lab] = torch.mean(label_representations[lab], 0) pickle.dump(label_centroids, store) store.close
# corpus: Corpus = NLPTaskDataFetcher.load_corpus(NLPTask.CONLL_03, base_path='resources/tasks') # 2. what tag do we want to predict? tag_type = 'ner' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) # initialize embeddings embedding_types: List[TokenEmbeddings] = [ # GloVe embeddings WordEmbeddings('glove'), # contextual string embeddings, forward PooledFlairEmbeddings('news-forward', pooling='min'), # contextual string embeddings, backward PooledFlairEmbeddings('news-backward', pooling='min'), ] embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types) # initialize sequence tagger tagger: SequenceTagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type) # initialize trainer
def resume(model1, selected_embeddings, model2): # 1. get the corpus if model2 == 'AMT': corpus = read_in_AMT() elif model2 == 'CADEC': corpus = read_in_CADEC() elif model2 == 'TwitterADR': corpus = read_in_TwitterADR() elif model2 == 'Micromed': corpus = read_in_Micromed() print(corpus) # 2. what tag do we want to predict? tag_type = 'ner' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) embedding_types: List[TokenEmbeddings] = [ ] if selected_embeddings['glove']: embedding_types.append(WordEmbeddings('glove')) if selected_embeddings['twitter']: embedding_types.append(WordEmbeddings('twitter')) if selected_embeddings['char']: embedding_types.append(CharacterEmbeddings()) if selected_embeddings['flair']: embedding_types.append(FlairEmbeddings('news-forward')) if selected_embeddings['flair']: embedding_types.append(FlairEmbeddings('news-backward')) if selected_embeddings['pooled-flair']: embedding_types.append(PooledFlairEmbeddings('news-forward', pooling='mean')) if selected_embeddings['pooled-flair']: embedding_types.append(PooledFlairEmbeddings('news-backward', pooling='mean')) # init multilingual BERT if selected_embeddings['bert']: embedding_types.append(BertEmbeddings()) # init multilingual ELMo if selected_embeddings['elmo']: embedding_types.append(ELMoEmbeddings()) embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types) # tagger: SequenceTagger = SequenceTagger(hidden_size=256, # embeddings=embeddings, # tag_dictionary=tag_dictionary, # tag_type=tag_type, # use_crf=True) selected_embeddings_text = [key for key in selected_embeddings if selected_embeddings[key]] selected_embeddings_text = '_'.join(selected_embeddings_text) model_dir1 = 'resources/taggers/to_resume_CoNLL-03_' + model1 + selected_embeddings_text #checkpoint = tagger.load_checkpoint(Path(model_dir1+ '/checkpoint.pt')) #trainer = ModelTrainer.load_from_checkpoint(checkpoint, corpus) best_model = SequenceTagger.load(Path(model_dir1+ '/best-model.pt')) trainer: ModelTrainer = ModelTrainer(best_model, corpus) # resources/taggers/to_resume_CADECglove_char_flair/ model_dir2 = 'resources/taggers/train_with_dev_from_' + model1 + '_to_' + model2 + selected_embeddings_text + '_fine-tuned7s' trainer.train(model_dir2, EvaluationMetric.MICRO_F1_SCORE, train_with_dev=True, learning_rate=0.1, mini_batch_size=8, max_epochs=150, checkpoint=True)