Esempio n. 1
0
def test_train_resume_sequence_tagging_training(results_base_path, tasks_base_path):
    corpus = NLPTaskDataFetcher.load_corpora(
        [NLPTask.FASHION, NLPTask.GERMEVAL], base_path=tasks_base_path
    )
    tag_dictionary = corpus.make_tag_dictionary("ner")

    embeddings = WordEmbeddings("turian")

    model: SequenceTagger = SequenceTagger(
        hidden_size=64,
        embeddings=embeddings,
        tag_dictionary=tag_dictionary,
        tag_type="ner",
        use_crf=False,
    )

    trainer = ModelTrainer(model, corpus)
    trainer.train(results_base_path, max_epochs=2, test_mode=True, checkpoint=True)

    trainer = ModelTrainer.load_from_checkpoint(
        results_base_path / "checkpoint.pt", "SequenceTagger", corpus
    )
    trainer.train(results_base_path, max_epochs=2, test_mode=True, checkpoint=True)

    # clean up results directory
    shutil.rmtree(results_base_path)
Esempio n. 2
0
def test_train_load_use_tagger_multicorpus(results_base_path, tasks_base_path):

    corpus = NLPTaskDataFetcher.load_corpora(
        [NLPTask.FASHION, NLPTask.GERMEVAL], base_path=tasks_base_path)
    tag_dictionary = corpus.make_tag_dictionary('ner')

    embeddings = WordEmbeddings('glove')

    tagger: SequenceTagger = SequenceTagger(hidden_size=64,
                                            embeddings=embeddings,
                                            tag_dictionary=tag_dictionary,
                                            tag_type='ner',
                                            use_crf=False)

    # initialize trainer
    trainer: ModelTrainer = ModelTrainer(tagger, corpus)

    trainer.train(results_base_path,
                  learning_rate=0.1,
                  mini_batch_size=2,
                  max_epochs=2,
                  test_mode=True)

    loaded_model: SequenceTagger = SequenceTagger.load_from_file(
        results_base_path / 'final-model.pt')

    sentence = Sentence('I love Berlin')
    sentence_empty = Sentence('       ')

    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])

    # clean up results directory
    shutil.rmtree(results_base_path)
Esempio n. 3
0
def test_multi_corpus(tasks_base_path):
    # get two corpora as one
    corpus = NLPTaskDataFetcher.load_corpora(
        [NLPTask.FASHION, NLPTask.GERMEVAL], tasks_base_path)

    assert len(corpus.train) == 8
    assert len(corpus.dev) == 2
    assert len(corpus.test) == 2
Esempio n. 4
0
def test_train_resume_sequence_tagging_training(results_base_path,
                                                tasks_base_path):
    corpus = NLPTaskDataFetcher.load_corpora(
        [NLPTask.FASHION, NLPTask.GERMEVAL], base_path=tasks_base_path)
    tag_dictionary = corpus.make_tag_dictionary(u'ner')
    embeddings = WordEmbeddings(u'glove')
    model = SequenceTagger(hidden_size=64,
                           embeddings=embeddings,
                           tag_dictionary=tag_dictionary,
                           tag_type=u'ner',
                           use_crf=False)
    trainer = ModelTrainer(model, corpus)
    trainer.train(results_base_path,
                  max_epochs=2,
                  test_mode=True,
                  checkpoint=True)
    trainer = ModelTrainer.load_from_checkpoint(
        (results_base_path / u'checkpoint.pt'), u'SequenceTagger', corpus)
    trainer.train(results_base_path,
                  max_epochs=2,
                  test_mode=True,
                  checkpoint=True)
    shutil.rmtree(results_base_path)
Esempio n. 5
0
def test_multi_corpus(tasks_base_path):
    corpus = NLPTaskDataFetcher.load_corpora(
        [NLPTask.FASHION, NLPTask.GERMEVAL], tasks_base_path)
    assert (len(corpus.train) == 8)
    assert (len(corpus.dev) == 2)
    assert (len(corpus.test) == 2)