Example #1
0
def test_train_load_use_tagger_multicorpus(results_base_path, tasks_base_path):
    corpus_1 = flair.datasets.ColumnCorpus(data_folder=tasks_base_path /
                                           "fashion",
                                           column_format={
                                               0: "text",
                                               3: "ner"
                                           })
    corpus_2 = flair.datasets.NER_GERMAN_GERMEVAL(base_path=tasks_base_path)

    corpus = MultiCorpus([corpus_1, corpus_2])
    tag_dictionary = corpus.make_label_dictionary("ner")

    tagger: SequenceTagger = SequenceTagger(
        hidden_size=64,
        embeddings=turian_embeddings,
        tag_dictionary=tag_dictionary,
        tag_type="ner",
        use_crf=False,
    )

    # initialize trainer
    trainer: ModelTrainer = ModelTrainer(tagger, corpus)

    trainer.train(
        results_base_path,
        learning_rate=0.1,
        mini_batch_size=2,
        max_epochs=2,
        shuffle=False,
    )

    del trainer, tagger, corpus
    loaded_model: SequenceTagger = SequenceTagger.load(results_base_path /
                                                       "final-model.pt")

    sentence = Sentence("I love Berlin")
    sentence_empty = Sentence("       ")

    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])

    # clean up results directory
    shutil.rmtree(results_base_path)
    del loaded_model
def test_train_resume_tagger(results_base_path, tasks_base_path):

    corpus_1 = flair.datasets.ColumnCorpus(data_folder=tasks_base_path /
                                           "fashion",
                                           column_format={
                                               0: "text",
                                               3: "ner"
                                           })
    corpus_2 = flair.datasets.NER_GERMAN_GERMEVAL(
        base_path=tasks_base_path).downsample(0.1)

    corpus = MultiCorpus([corpus_1, corpus_2])
    tag_dictionary = corpus.make_label_dictionary("ner")

    model: SequenceTagger = SequenceTagger(
        hidden_size=64,
        embeddings=turian_embeddings,
        tag_dictionary=tag_dictionary,
        tag_type="ner",
        use_crf=False,
    )

    # train model for 2 epochs
    trainer = ModelTrainer(model, corpus)
    trainer.train(results_base_path,
                  max_epochs=2,
                  shuffle=False,
                  checkpoint=True)

    del model

    # load the checkpoint model and train until epoch 4
    checkpoint_model = SequenceTagger.load(results_base_path / "checkpoint.pt")
    trainer.resume(model=checkpoint_model, max_epochs=4)

    # clean up results directory
    del trainer