def test_train_resume_sequence_tagging_training(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpora( [NLPTask.FASHION, NLPTask.GERMEVAL], base_path=tasks_base_path ) tag_dictionary = corpus.make_tag_dictionary("ner") embeddings = WordEmbeddings("turian") model: SequenceTagger = SequenceTagger( hidden_size=64, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type="ner", use_crf=False, ) trainer = ModelTrainer(model, corpus) trainer.train(results_base_path, max_epochs=2, test_mode=True, checkpoint=True) trainer = ModelTrainer.load_from_checkpoint( results_base_path / "checkpoint.pt", "SequenceTagger", corpus ) trainer.train(results_base_path, max_epochs=2, test_mode=True, checkpoint=True) # clean up results directory shutil.rmtree(results_base_path)
def test_train_load_use_tagger_multicorpus(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpora( [NLPTask.FASHION, NLPTask.GERMEVAL], base_path=tasks_base_path) tag_dictionary = corpus.make_tag_dictionary('ner') embeddings = WordEmbeddings('glove') tagger: SequenceTagger = SequenceTagger(hidden_size=64, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type='ner', use_crf=False) # initialize trainer trainer: ModelTrainer = ModelTrainer(tagger, corpus) trainer.train(results_base_path, learning_rate=0.1, mini_batch_size=2, max_epochs=2, test_mode=True) loaded_model: SequenceTagger = SequenceTagger.load_from_file( results_base_path / 'final-model.pt') sentence = Sentence('I love Berlin') sentence_empty = Sentence(' ') loaded_model.predict(sentence) loaded_model.predict([sentence, sentence_empty]) loaded_model.predict([sentence_empty]) # clean up results directory shutil.rmtree(results_base_path)
def test_multi_corpus(tasks_base_path): # get two corpora as one corpus = NLPTaskDataFetcher.load_corpora( [NLPTask.FASHION, NLPTask.GERMEVAL], tasks_base_path) assert len(corpus.train) == 8 assert len(corpus.dev) == 2 assert len(corpus.test) == 2
def test_train_resume_sequence_tagging_training(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpora( [NLPTask.FASHION, NLPTask.GERMEVAL], base_path=tasks_base_path) tag_dictionary = corpus.make_tag_dictionary(u'ner') embeddings = WordEmbeddings(u'glove') model = SequenceTagger(hidden_size=64, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=u'ner', use_crf=False) trainer = ModelTrainer(model, corpus) trainer.train(results_base_path, max_epochs=2, test_mode=True, checkpoint=True) trainer = ModelTrainer.load_from_checkpoint( (results_base_path / u'checkpoint.pt'), u'SequenceTagger', corpus) trainer.train(results_base_path, max_epochs=2, test_mode=True, checkpoint=True) shutil.rmtree(results_base_path)
def test_multi_corpus(tasks_base_path): corpus = NLPTaskDataFetcher.load_corpora( [NLPTask.FASHION, NLPTask.GERMEVAL], tasks_base_path) assert (len(corpus.train) == 8) assert (len(corpus.dev) == 2) assert (len(corpus.test) == 2)