def test_train_load_use_tagger_multicorpus(results_base_path, tasks_base_path): corpus_1 = flair.datasets.ColumnCorpus(data_folder=tasks_base_path / "fashion", column_format={ 0: "text", 3: "ner" }) corpus_2 = flair.datasets.NER_GERMAN_GERMEVAL(base_path=tasks_base_path) corpus = MultiCorpus([corpus_1, corpus_2]) tag_dictionary = corpus.make_label_dictionary("ner") tagger: SequenceTagger = SequenceTagger( hidden_size=64, embeddings=turian_embeddings, tag_dictionary=tag_dictionary, tag_type="ner", use_crf=False, ) # initialize trainer trainer: ModelTrainer = ModelTrainer(tagger, corpus) trainer.train( results_base_path, learning_rate=0.1, mini_batch_size=2, max_epochs=2, shuffle=False, ) del trainer, tagger, corpus loaded_model: SequenceTagger = SequenceTagger.load(results_base_path / "final-model.pt") sentence = Sentence("I love Berlin") sentence_empty = Sentence(" ") loaded_model.predict(sentence) loaded_model.predict([sentence, sentence_empty]) loaded_model.predict([sentence_empty]) # clean up results directory shutil.rmtree(results_base_path) del loaded_model
def test_train_resume_tagger(results_base_path, tasks_base_path): corpus_1 = flair.datasets.ColumnCorpus(data_folder=tasks_base_path / "fashion", column_format={ 0: "text", 3: "ner" }) corpus_2 = flair.datasets.NER_GERMAN_GERMEVAL( base_path=tasks_base_path).downsample(0.1) corpus = MultiCorpus([corpus_1, corpus_2]) tag_dictionary = corpus.make_label_dictionary("ner") model: SequenceTagger = SequenceTagger( hidden_size=64, embeddings=turian_embeddings, tag_dictionary=tag_dictionary, tag_type="ner", use_crf=False, ) # train model for 2 epochs trainer = ModelTrainer(model, corpus) trainer.train(results_base_path, max_epochs=2, shuffle=False, checkpoint=True) del model # load the checkpoint model and train until epoch 4 checkpoint_model = SequenceTagger.load(results_base_path / "checkpoint.pt") trainer.resume(model=checkpoint_model, max_epochs=4) # clean up results directory del trainer