Example #1
0
def test_plotting_training_curves_and_weights(resources_path):
    plotter = Plotter()
    plotter.plot_training_curves(resources_path / "visual/loss.tsv")
    plotter.plot_weights(resources_path / "visual/weights.txt")

    # clean up directory
    (resources_path / "visual/weights.png").unlink()
    (resources_path / "visual/training.png").unlink()
Example #2
0
def test_plotting_training_curves_and_weights():
    plotter = Plotter()
    plotter.plot_training_curves('./resources/visual/loss.tsv')
    plotter.plot_weights('./resources/visual/weights.txt')

    # clean up directory
    os.remove('./resources/visual/weights.png')
    os.remove('./resources/visual/training.png')
Example #3
0
    def plot_curve(self,
                   traing_curve_path=os.path.normpath(
                       r'./resources/taggers/slow_bert/loss.tsv'),
                   weights_path=os.path.normpath(
                       r'./resources/taggers/slow_bert/loss.tsv')):

        from flair.visual.training_curves import Plotter
        plotter = Plotter()

        plotter.plot_training_curves(traing_curve_path)
        plotter.plot_weights(weights_path)
Example #4
0
def main(args):
    args = parser.parse_args()

    # 1. get the corpus
    corpus: TaggedCorpus = NLPTaskDataFetcher.load_classification_corpus(
        args.data_dir[0],
        train_file='train.txt',
        dev_file='dev.txt',
        test_file='test.txt')

    # 2. create the label dictionary
    label_dict = corpus.make_label_dictionary()

    # 3. make a list of word embeddings
    word_embeddings = [
        WordEmbeddings('glove'),

        # comment in flair embeddings for state-of-the-art results
        # FlairEmbeddings('news-forward'),
        # FlairEmbeddings('news-backward'),
        # ELMoEmbeddings()
    ]

    # 4. init document embedding by passing list of word embeddings
    document_embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings(
        word_embeddings,
        hidden_size=128,
        reproject_words=True,
        reproject_words_dimension=64,
    )

    # 5. create the text classifier
    classifier = TextClassifier(document_embeddings,
                                label_dictionary=label_dict,
                                multi_label=False)

    # 6. initialize the text classifier trainer
    trainer = ModelTrainer(classifier, corpus)

    # 7. start the training
    model_out = 'resources/classifiers/sentence-classification/glove'
    trainer.train(model_out,
                  learning_rate=0.1,
                  mini_batch_size=32,
                  anneal_factor=0.5,
                  patience=5,
                  max_epochs=100)

    # 8. plot training curves (optional)
    from flair.visual.training_curves import Plotter
    plotter = Plotter()
    plotter.plot_training_curves(join(model_out, 'loss.tsv'))
    plotter.plot_weights(join(model_out, 'weights.txt'))
Example #5
0
    def __init__(self, corpus_name: str):

        corpus = NLPTaskDataFetcher.load_column_corpus(
            loc.abs_path([loc.ASSETS, loc.MODELS, loc.DIRKSON]), {
                0: 'text',
                1: 'ner'
            },
            train_file=corpus_name + loc.DIRKSON_VALIDATION_TXT,
            test_file=corpus_name + loc.DIRKSON_TEST_TXT)

        embedding_types = [
            BertEmbeddings('bert-base-uncased'),
            FlairEmbeddings('mix-forward'),
            FlairEmbeddings('mix-backward')
        ]

        tag_type = 'ner'
        embeddings = StackedEmbeddings(embeddings=embedding_types)
        tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)

        tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                                embeddings=embeddings,
                                                tag_dictionary=tag_dictionary,
                                                tag_type=tag_type,
                                                use_crf=True)

        trainer: ModelTrainer = ModelTrainer(tagger, corpus)

        if not path.exists:
            os.mkdir(
                loc.abs_path(
                    [loc.ASSETS, loc.MODELS, loc.DIRKSON, corpus_name]))
        trainer.train(loc.abs_path(
            [loc.ASSETS, loc.MODELS, loc.DIRKSON, corpus_name]),
                      learning_rate=0.1,
                      mini_batch_size=32,
                      max_epochs=150)

        plotter = Plotter()
        plotter.plot_training_curves(
            loc.abs_path([
                loc.ASSETS, loc.MODELS, loc.DIRKSON, corpus_name, loc.LOSS_TSV
            ]))
        plotter.plot_weights(
            loc.abs_path([
                loc.ASSETS, loc.MODELS, loc.DIRKSON, corpus_name,
                loc.WEIGHTS_TXT
            ]))
Example #6
0
    def train(self,
              trainfile,
              devfile,
              testfile,
              resfolder,
              embtype="bert",
              chunk_len=100,
              batch_len=8):
        """
        *** This method can be used to train new models with the settings used in project Redewiedergabe
        It is not accessible from rwtagger_script and not documented in detail. Use at your own risk. ;-)
        ***
        :param trainfile:
        :param devfile:
        :param testfile:
        :param resfolder:
        :param embtype:
        :param chunk_len:
        :param batch_len:
        :return:
        """
        emb_name, embeddings = self._get_embeddings(embtype)

        corpus: Corpus = self.create_corpus(trainfile, devfile, testfile,
                                            chunk_len)
        tag_dictionary = corpus.make_tag_dictionary(tag_type="cat")

        if not os.path.exists(resfolder):
            os.makedirs(resfolder)

        tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                                embeddings=embeddings,
                                                tag_dictionary=tag_dictionary,
                                                tag_type="cat",
                                                use_crf=True,
                                                rnn_layers=2)
        trainer: ModelTrainer = ModelTrainer(tagger, corpus)

        trainer.train(resfolder,
                      learning_rate=0.1,
                      mini_batch_size=batch_len,
                      max_epochs=150,
                      checkpoint=True)
        # plot training curves
        plotter = Plotter()
        plotter.plot_training_curves(os.path.join(resfolder, 'loss.tsv'))
        plotter.plot_weights(os.path.join(resfolder, 'weights.txt'))
Example #7
0
def train(data_folder, model_output_folder):

    corpus: TaggedCorpus = NLPTaskDataFetcher.load_corpus(
        NLPTask.CONLL_03, base_path=data_folder)

    # 2. what tag do we want to predict?
    tag_type = 'ner'

    # 3. make the tag dictionary from the corpus
    tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
    print(tag_dictionary.idx2item)

    # init Flair embeddings
    flair_forward_embedding = FlairEmbeddings('multi-forward')
    flair_backward_embedding = FlairEmbeddings('multi-backward')

    # init multilingual BERT
    bert_embedding = BertEmbeddings('bert-base-multilingual-cased')

    # 4. initialize embeddings
    embedding_types: List[TokenEmbeddings] = [
        flair_forward_embedding, flair_backward_embedding, bert_embedding
    ]

    embeddings: StackedEmbeddings = StackedEmbeddings(
        embeddings=embedding_types)

    # 5. initialize sequence tagger
    from flair.models import SequenceTagger
    tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                            embeddings=embeddings,
                                            tag_dictionary=tag_dictionary,
                                            tag_type=tag_type)
    # 6. initialize trainer
    from flair.trainers import ModelTrainer

    trainer: ModelTrainer = ModelTrainer(tagger, corpus)

    # 7. start training
    trainer.train(model_output_folder, mini_batch_size=256, max_epochs=150)

    # 8. plot training curves (optional)
    from flair.visual.training_curves import Plotter
    plotter = Plotter()
    plotter.plot_training_curves(model_output_folder + '/loss.tsv')
    plotter.plot_weights(model_output_folder + '/weights.txt')
Example #8
0
def main():
    train_dev_corpus = NLPTaskDataFetcher.load_classification_corpus(
        Path(DATA_PATH),
        train_file='flair_train.csv',
        test_file='flair_test.csv',
        dev_file='flair_dev.csv')

    label_dict = train_dev_corpus.make_label_dictionary()

    word_embeddings = [
        WordEmbeddings('crawl'),
        FlairEmbeddings('news-forward-fast', chars_per_chunk=128),
        FlairEmbeddings('news-backward-fast', chars_per_chunk=128)
    ]

    document_embeddings = DocumentRNNEmbeddings(word_embeddings,
                                                rnn_type='LSTM',
                                                hidden_size=128,
                                                reproject_words=True,
                                                reproject_words_dimension=64)

    classifier = TextClassifier(document_embeddings,
                                label_dictionary=label_dict,
                                multi_label=False)

    trainer = ModelTrainer(classifier, train_dev_corpus)
    trainer.train(PRETRAINED_FLAIR,
                  max_epochs=40,
                  learning_rate=0.2,
                  mini_batch_size=32,
                  embeddings_in_memory=False,
                  checkpoint=True)

    plotter = Plotter()
    plotter.plot_training_curves(FLAIR_LOSS)
    plotter.plot_weights(FLAIR_WEIGHTS)
Example #9
0
    # comment in these lines to use contextual string embeddings
    # CharLMEmbeddings('news-forward'),
    # CharLMEmbeddings('news-backward'),
]

embeddings = WordEmbeddings("tmp/glove.bin")
# 5. initialize sequence tagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=1024,
                                        embeddings=embeddings,
                                        tag_dictionary=tag_dictionary,
                                        tag_type=tag_type,
                                        use_crf=True)

# 6. initialize trainer
from flair.trainers import SequenceTaggerTrainer

trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus)

# 7. start training
trainer.train('resources/taggers/example-ner',
              learning_rate=0.1,
              mini_batch_size=8,
              max_epochs=150)

# 8. plot training curves (optional)
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_training_curves('resources/taggers1/example-ner/loss.tsv')
plotter.plot_weights('resources/taggers1/example-ner/weights.txt')
Example #10
0
from flair.embeddings import WordEmbeddings, FlairEmbeddings, DocumentRNNEmbeddings
from flair.models import TextClassifier
from flair.trainers import ModelTrainer

import shelve
from os import path

#%% Loading classifier
with shelve.open(path.join('data', 'prepared_data', 'bbc')) as db:
    classifier = db['classifier']
    corpus=db['corpus']

#%% Model trainer definition
trainer = ModelTrainer(classifier, corpus)
model_path = path.join('models', 'bbc')

# 7. start the training
trainer.train(model_path,
              learning_rate=0.1,
              mini_batch_size=32,
              anneal_factor=0.5,
              patience=5,
              max_epochs=150)

# 8. plot weight traces (optional)
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_training_curves(path.join(model_path, 'loss.tsv'))
plotter.plot_weights(path.join(model_path, 'weights.txt'))
#%%
Example #11
0
                                                                     )

# 5. create the text classifier
classifier = TextClassifier(document_embeddings, label_dictionary=label_dict)

# 6. initialize the text classifier trainer
trainer = ModelTrainer(classifier, corpus)

# 7. start the training
trainer.train('resources/taggers/ag_news',
              learning_rate=0.1,
              mini_batch_size=32,
              anneal_factor=0.5,
              patience=5,
              max_epochs=150)

# 8. plot training curves (optional)
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_training_curves('resources/taggers/ag_news/loss.tsv')
plotter.plot_weights('resources/taggers/ag_news/weights.txt')

classifier = TextClassifier.load('resources/taggers/ag_news/final-model.pt')

# create example sentence
sentence = Sentence('France is the current world cup winner.')

# predict tags and print
classifier.predict(sentence)

print(sentence.labels)
Example #12
0
def run_experiments(input_dir: Path, output_dir: Path):
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    # retrieve corpus using column format, data folder and the names of the train, dev and test files
    corpus = ColumnCorpus(
        input_dir,
        {0: 'text', 1: 'dep', 2: 'aspect'},
        train_file='Laptops_poria-train.conll',
        # train_file='Restaurants_poria-train.conll',
        test_file='Laptops_poria-test.conll',
        # test_file='Restaurants_poria-test.conll',
        dev_file='Laptops_poria-train.conll'
        # dev_file='Restaurants_poria-train.conll'
    )

    # 2. what tag do we want to predict?
    tag_type = 'aspect'

    # 3. make the tag dictionary from the corpus
    tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
    print(tag_dictionary.idx2item)

    all_embedding_to_test = {
        # 'glove+aspects': [
        #     WordEmbeddings('glove'),
        #     WordEmbeddings(
        #         (output_dir / 'aspect_2_aspect_graph-en_core_web_lg.en_core_web_lg.word2vec_format.bin').as_posix()
        #     ),
        # ],
        # 'glove': [
        #     WordEmbeddings('glove'),
        # ],
        # 'charlmembedding': [
        #     FlairEmbeddings('news-forward'),
        #     FlairEmbeddings('news-backward'),
        # ],
        # 'glove-simple-char': [
        #     WordEmbeddings('glove'),
        #     CharacterEmbeddings(),
        # ],
        'bert+aspects': [
            BertEmbeddings('bert-large-cased'),
            WordEmbeddings(
                (output_dir / 'aspect_2_aspect_graph-en_core_web_lg.en_core_web_lg.word2vec_format.bin').as_posix()
            )
        ],
        'bert': [
            BertEmbeddings('bert-large-cased'),
        ],
        # 'elmo': [
        #     ELMoEmbeddings('original')
        # ]
    }

    for name, embeddings_to_stack in tqdm(
            all_embedding_to_test.items(),
            desc='Different embeddings stacked',
            total=len(all_embedding_to_test)
    ):
        results_folder = Path(DEFAULT_OUTPUT_PATH / f'sequence-tagging/aspects/laptops-{name}')
        embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embeddings_to_stack)

        # 5. initialize sequence tagger
        tagger: SequenceTagger = SequenceTagger(
            hidden_size=256,
            embeddings=embeddings,
            tag_dictionary=tag_dictionary,
            tag_type=tag_type,
            use_crf=True
        )

        trainer: ModelTrainer = ModelTrainer(tagger, corpus)

        # 7. start training
        trainer.train(
            results_folder.as_posix(),
            learning_rate=0.1,
            mini_batch_size=32,
            max_epochs=150
        )

        # 8. plot training curves (optional)
        plotter = Plotter()
        plotter.plot_training_curves(results_folder / 'loss.tsv')
        plotter.plot_weights(results_folder / 'weights.txt')
Example #13
0
embedding_types: List[TokenEmbeddings] = [
    CharacterEmbeddings(),
    WordEmbeddings("tmp/glove.1.8G.bin")
]

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# 5. initialize sequence tagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=1024,
                                        embeddings=embeddings,
                                        tag_dictionary=tag_dictionary,
                                        tag_type=tag_type,
                                        use_crf=True)

# 6. initialize trainer
from flair.trainers import SequenceTaggerTrainer

trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus)

# 7. start training
model_path = "tmp/model2"
trainer.train(model_path, learning_rate=0.1, mini_batch_size=8, max_epochs=150)

# 8. plot training curves (optional)
from flair.visual.training_curves import Plotter

plotter = Plotter()
plotter.plot_training_curves(f'{model_path}/loss.tsv')
plotter.plot_weights(f'{model_path}/weights.txt')
Example #14
0
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# initialize sequence tagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(
    hidden_size=256,
    embeddings=embeddings,
    tag_dictionary=tag_dictionary,
    tag_type=tag_type,
    use_crf=True,
)

# initialize trainer
from flair.trainers import ModelTrainer

trainer: ModelTrainer = ModelTrainer(tagger, corpus)

trainer.train(
    "resources/taggers/example-ner",
    learning_rate=0.1,
    mini_batch_size=32,
    max_epochs=1,
    shuffle=False,
)

plotter = Plotter()
plotter.plot_training_curves("resources/taggers/example-ner/loss.tsv")
plotter.plot_weights("resources/taggers/example-ner/weights.txt")
Example #15
0
    def train_all(self):
        config_file = open(self.config, "r")
        if self.config.split('.')[-1] == "yml":
            datastore = yaml.load(config_file)
        elif self.config.split('.')[-1] == "json":
            datastore = json.loads(config_file.read())
        else:
            print("Need a json or yaml file as config")
            sys.exit(0)

        columns = {
            int(datastore["dataset_reader"]["position_text"]): "text",
            int(datastore["dataset_reader"]["position_ner"]): "ner",
        }

        # focus_on = datastore["dataset_reader"]["focus_on"]

        if bool(datastore["dataset_reader"]["only_train"]):

            all_corpus = []
            log.info("Reading data from {}".format(datastore["dataset_reader"]["data_folder"]))

            all_corpus = ColumnCorpusTrain(
                datastore["dataset_reader"]["data_folder"],
                columns,
                train_file=datastore["dataset_reader"]["train_name"],
            )

            tag_type = "ner"
            tag_dictionary = all_corpus[0].make_tag_dictionary(tag_type=tag_type)

        else:

            iobes_corpus = ColumnCorpus(
                datastore["dataset_reader"]["data_folder"],
                columns,
                train_file=datastore["dataset_reader"]["train_name"],
                dev_file=datastore["dataset_reader"]["dev_name"],
                test_file=datastore["dataset_reader"]["test_name"],
            )

            tag_type = "ner"
            tag_dictionary = iobes_corpus.make_tag_dictionary(tag_type=tag_type)

            try:
                train_ratio = float(datastore["dataset_reader"]["train_ratio"])
                iobes_corpus = Corpus(iobes_corpus.train[0:int(len(iobes_corpus.train) * train_ratio)],
                                      iobes_corpus.dev, iobes_corpus.test)
                log_ratio = "Using only ", str(train_ratio * 100), "% of the train dataset"
                log.info(log_ratio)
            except:
                pass

        embed_list = []
        word_char = []
        char_word = []
        for embed in datastore["embeddings"]["embeddings_list"]:

            if embed == "bpe":
                embed_list.append(BytePairEmbeddings(datastore["embeddings"]["lang"]))
            elif embed == "fasttext":
                embed_list.append(WordEmbeddings(datastore["embeddings"]["lang"]))
            elif embed == "flair" and datastore["embeddings"]["lang"] == "en":
                embed_list.append(FlairEmbeddings("news-forward"))
                embed_list.append(FlairEmbeddings("news-backward"))
            elif embed == "bert-base-uncased":
                if datastore["embeddings"]["lang"] == "en":
                    embed_list.append(BertEmbeddings("bert-base-uncased"))
            elif embed == "bert-base-cased":
                if datastore["embeddings"]["lang"] == "en":
                    embed_list.append(BertEmbeddings("bert-base-cased"))
            elif embed == "bert-large-uncased":
                if datastore["embeddings"]["lang"] == "en":
                    embed_list.append(BertEmbeddings("bert-large-uncased"))
            elif embed == "bert-large-cased":
                if datastore["embeddings"]["lang"] == "en":
                    embed_list.append(BertEmbeddings("bert-large-cased"))
            elif embed == "elmo-small":
                if datastore["embeddings"]["lang"] == "en":
                    embed_list.append(ELMoEmbeddings("small"))
            elif embed == "elmo-medium":
                if datastore["embeddings"]["lang"] == "en":
                    embed_list.append(ELMoEmbeddings("medium"))
            elif embed == "elmo-original":
                if datastore["embeddings"]["lang"] == "en":
                    embed_list.append(ELMoEmbeddings("original"))
            elif embed == "bert-base-chinese":
                if datastore["embeddings"]["lang"] == "zh":
                    embed_list.append(emb.BertEmbeddingsChinese("bert-base-chinese"))
            else:
                split_name = embed.split(".")
                ext = split_name[-1]
                kind = split_name[-2]

                if ext == "pt":  # Flair type

                    extra_index = 0
                    try:
                        extra_index = int(datastore["embeddings"]["extra_index"])
                    except:
                        pass

                    if kind == "char":
                        embed_list.append(emb.FlairEmbeddingsChar(embed, extra_index=extra_index))
                    elif kind == "char-seg":
                        embed_list.append(emb.FlairEmbeddingsWordLevelCharSeg(embed, extra_index=extra_index))

                if ext == "vec":  # Char type
                    if kind == "char-seg":
                        embed_list.append(emb.WordEmbeddingsVecCharSeg(embed))
                    elif kind == "char":
                        embed_list.append(emb.WordEmbeddingsVecFirst(embed))
                    elif kind == "word":
                        embed_list.append(emb.WordEmbeddingsVecWord(embed))
                    elif kind == "bichar":
                        embed_list.append(emb.WordEmbeddingsVecBichar(embed))
                if ext == "bin":
                    if kind == "word":
                        embed_list.append(emb.WordEmbeddingsBinWord(embed))
                    elif kind == "bichar":
                        embed_list.append(emb.WordEmbeddingsBinBichar(embed))

        try:
            if bool(datastore["embeddings"]["ner_embed"]) == True:
                print("Generate NER embeddings..")
                embed_list.append(
                    emb.nerEmbedding(
                        generateNerEmbFromTrain(
                            iobes_corpus.train, tag_dictionary.get_items()
                        )
                    )
                )
        except:
            pass
        try:
            if bool(datastore["embeddings"]["one_hot"]) == True:
                print("Generate one hot embeddings..")
                embed_list.append(emb.OneHotEmbeddings(iobes_corpus))
        except:
            pass
        try:
            if datastore["embeddings"]["embeddings_ngram_list"] != None:
                embed_list.append(
                    emb.WordEmbeddingsVecNGramList(
                        datastore["embeddings"]["embeddings_ngram_list"]
                    )
                )
        except:
            pass

        if len(word_char) == 1 and len(char_word) == 1:
            embed_list.append(emb.WordEmbeddingsVecWordChar(word_char[0], char_word[0]))

        embedding_types: List[TokenEmbeddings] = embed_list

        embeddings: emb.StackedEmbeddingsNew = emb.StackedEmbeddingsNew(
            embeddings=embedding_types
        )

        if bool(datastore["dataset_reader"]["only_train"]):
            score = []
            for i in range(len(all_corpus)):

                tagger: SequenceTagger = SequenceTagger(
                    hidden_size=int(datastore["model"]["hidden_size"]),
                    embeddings=embeddings,
                    tag_dictionary=tag_dictionary,
                    tag_type=tag_type,
                    use_crf=bool(datastore["model"]["use_crf"]),
                    dropout=float(datastore["model"]["dropout"]),
                    word_dropout=float(datastore["model"]["word_dropout"]),
                    locked_dropout=float(datastore["model"]["locked_dropout"]),
                    rnn_layers=int(datastore["model"]["rnn_layers"]),
                )

                folder = datastore["train_config"]["folder"] + "/" + str(i)
                best = Path(folder + "/checkpoint.pt")
                iobes_corpus = all_corpus[i]
                if not best.exists():
                    best = Path(folder + "/best-model.pt")

                if best.exists():
                    trainer = ModelTrainer.load_checkpoint(
                        tagger.load_checkpoint(best), iobes_corpus
                    )
                else:
                    trainer: ModelTrainer = ModelTrainer(tagger, iobes_corpus)

                # 7. start training

                result = trainer.train(
                    folder,
                    learning_rate=float(datastore["train_config"]["learning_rate"]),
                    anneal_factor=float(datastore["train_config"]["anneal_factor"]),
                    min_learning_rate=float(datastore["train_config"]["min_learning_rate"]),
                    mini_batch_size=int(datastore["train_config"]["batch_size"]),
                    max_epochs=int(datastore["train_config"]["epoch"]),
                    save_final_model=bool(datastore["train_config"]["save_final_model"]),
                    checkpoint=bool(datastore["train_config"]["checkpoint"]),
                    param_selection_mode=bool(
                        datastore["train_config"]["param_selection_mode"]
                    ),
                    patience=int(datastore["train_config"]["patience"]),
                    monitor_test=bool(datastore["train_config"]["monitor_test"]),
                    embeddings_storage_mode=str(datastore["train_config"]["embeddings_storage_mode"]),
                    shuffle=bool(datastore["train_config"]["shuffle"]),
                )

                plotter = Plotter()
                if bool(datastore["train_config"]["save_plot_training_curve"]):
                    curve = folder + "/loss.tsv"
                    plotter.plot_training_curves(curve)
                if bool(datastore["train_config"]["save_plot_weights"]):
                    weight = folder + "/weights.txt"
                    plotter.plot_weights(weight)

                score.append(result["test_score"])

            print(score, "  \n Moyenne : ", round(sum(score) / len(score), 2))


        else:

            tagger: SequenceTagger = SequenceTagger(
                hidden_size=int(datastore["model"]["hidden_size"]),
                embeddings=embeddings,
                tag_dictionary=tag_dictionary,
                tag_type=tag_type,
                use_crf=bool(datastore["model"]["use_crf"]),
                dropout=float(datastore["model"]["dropout"]),
                word_dropout=float(datastore["model"]["word_dropout"]),
                locked_dropout=float(datastore["model"]["locked_dropout"]),
                rnn_layers=int(datastore["model"]["rnn_layers"]),
            )

            folder = datastore["train_config"]["folder"]
            best = Path(folder + "/checkpoint.pt")
            if not best.exists():
                best = Path(folder + "/best-model.pt")

            if best.exists():
                trainer = ModelTrainer.load_checkpoint(
                    tagger.load_checkpoint(best), iobes_corpus
                )
            else:
                trainer: ModelTrainer = ModelTrainer(tagger, iobes_corpus)

            # 7. start training

            trainer.train(
                folder,
                learning_rate=float(datastore["train_config"]["learning_rate"]),
                anneal_factor=float(datastore["train_config"]["anneal_factor"]),
                min_learning_rate=float(datastore["train_config"]["min_learning_rate"]),
                mini_batch_size=int(datastore["train_config"]["batch_size"]),
                max_epochs=int(datastore["train_config"]["epoch"]),
                save_final_model=bool(datastore["train_config"]["save_final_model"]),
                checkpoint=bool(datastore["train_config"]["checkpoint"]),
                param_selection_mode=bool(
                    datastore["train_config"]["param_selection_mode"]
                ),
                patience=int(datastore["train_config"]["patience"]),
                monitor_test=bool(datastore["train_config"]["monitor_test"]),
                embeddings_storage_mode=str(datastore["train_config"]["embeddings_storage_mode"]),
                shuffle=bool(datastore["train_config"]["shuffle"]),
            )

            plotter = Plotter()
            if bool(datastore["train_config"]["save_plot_training_curve"]):
                curve = folder + "/loss.tsv"
                plotter.plot_training_curves(curve)
            if bool(datastore["train_config"]["save_plot_weights"]):
                weight = folder + "/weights.txt"
                plotter.plot_weights(weight)
Example #16
0
def train(model, selected_embeddings):
  # 1. get the corpus
  if model == 'AMT':
    corpus = read_in_AMT()
  elif model == 'CADEC':
    corpus = read_in_CADEC()
  elif model == 'TwitterADR':
    corpus = read_in_TwitterADR()
  elif model == 'Micromed':
    corpus = read_in_Micromed()
  print(corpus)

  # 2. what tag do we want to predict?
  tag_type = 'ner'

  # 3. make the tag dictionary from the corpus
  tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
  print(tag_dictionary.idx2item)


  embedding_types: List[TokenEmbeddings] = [
  ]

  if selected_embeddings['glove']:
    embedding_types.append(WordEmbeddings('glove'))

  if selected_embeddings['twitter']:
    embedding_types.append(WordEmbeddings('twitter'))

  if selected_embeddings['char']:
    embedding_types.append(CharacterEmbeddings())

  # FlairEmbeddings
  if selected_embeddings['flair']:
    embedding_types.append(FlairEmbeddings('news-forward'))

  # sFlairEmbeddings
  if selected_embeddings['flair']:
    embedding_types.append(FlairEmbeddings('news-backward'))

  # PooledFlairEmbeddings
  if selected_embeddings['pooled-flair']:
    embedding_types.append(PooledFlairEmbeddings('news-forward', pooling='mean'))

  # PooledFlairEmbeddings
  if selected_embeddings['pooled-flair']:
    embedding_types.append(PooledFlairEmbeddings('news-backward', pooling='mean'))

  # init BERT
  if selected_embeddings['bert']:
    embedding_types.append(BertEmbeddings())

  # init roberta
  if selected_embeddings['roberta']:
    embedding_types.append(RoBERTaEmbeddings())

    # init  BioBERT
  if selected_embeddings['biobert']:
    embedding_types.append(BertEmbeddings("data/embeddings/biobert-pubmed-pmc-cased"))

  # init clinical BERT
  if selected_embeddings['clinicalbiobert']:
    embedding_types.append(BertEmbeddings("data/embeddings/pretrained_bert_tf/biobert-base-clinical-cased"))


  # init multilingual ELMo
  if selected_embeddings['elmo']:
    embedding_types.append(ELMoEmbeddings())



  embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)



  tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                          embeddings=embeddings,
                                          tag_dictionary=tag_dictionary,
                                          tag_type=tag_type,
                                          use_crf=True
                                          )



  trainer: ModelTrainer = ModelTrainer(tagger, corpus)

  selected_embeddings_text = [key  for key in selected_embeddings if selected_embeddings[key]]
  selected_embeddings_text = '_'.join(selected_embeddings_text)

  model_dir = 'resources/taggers/FA_' + model + selected_embeddings_text

  # 7. start training
  trainer.train(model_dir,
                train_with_dev=True,
                learning_rate=0.1,
                mini_batch_size=4,
                max_epochs=200,
                checkpoint=True)

  # 8. plot training curves (optional)
  from flair.visual.training_curves import Plotter
  plotter = Plotter()
  plotter.plot_training_curves(model_dir + '/loss.tsv')
  plotter.plot_weights(model_dir + '/weights.txt')
                                            rnn_layers=1,
                                            rnn_type='RNN_RELU')
classifier = TextClassifier(document_embeddings,
                            label_dictionary=corpus.make_label_dictionary(),
                            multi_label=False)
trainer = ModelTrainer(classifier, corpus)
trainer.train('./',
              learning_rate=0.05,
              mini_batch_size=32,
              max_epochs=10,
              evaluation_metric=EvaluationMetric.MACRO_F1_SCORE)

#plot training curves
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_training_curves('loss.tsv')
plotter.plot_weights('weights.txt')

#make a table with predictions
test_pred = pd.read_csv('test.tsv',
                        sep='\t',
                        encoding="utf-8",
                        names=['text', 'label', 'prediction', 'confidence'])
test_full = pd.concat([data_test_filtered, test_pred], axis=1, sort=False)
#make a table with wrong predicions
df = test_full.loc[test_full['prediction'] != test_full['label']]
wrong_prediction = df[[
    'attribute::id', 'text', 'label', 'prediction', 'confidence'
]]
wrong_prediction.to_csv("cat_without_lemma_flair_wrong_pred.csv",
                        encoding='utf-8',
Example #18
0
def trainer(file_path: Path, filenames: Tuple[str, str, str], checkpoint: str,
            stack: str, n_epochs: int) -> None:
    """Train sentiment model using Flair NLP library:
    https://github.com/zalandoresearch/flair/blob/master/resources/docs/TUTORIAL_7_TRAINING_A_MODEL.md

    To help provide added context, we can stack Glove, Bert or ELMo embeddings along with Flair embeddings.
    """
    # pip install flair allennlp
    from flair.datasets import ClassificationCorpus
    from flair.embeddings import FlairEmbeddings, DocumentRNNEmbeddings, DocumentPoolEmbeddings
    from flair.models import TextClassifier
    from flair.trainers import ModelTrainer
    from flair.training_utils import EvaluationMetric
    from flair.visual.training_curves import Plotter

    if stack == "glove":
        from flair.embeddings import WordEmbeddings
        stacked_embedding = WordEmbeddings('glove')
    elif stack == "fasttext":
        from flair.embeddings import WordEmbeddings
        stacked_embedding = WordEmbeddings('it')
    elif stack == "elmo":
        from flair.embeddings import ELMoEmbeddings
        stacked_embedding = ELMoEmbeddings('original')
    elif stack == "bert":
        from flair.embeddings import BertEmbeddings
        stacked_embedding = BertEmbeddings('bert-base-uncased')
    elif stack == "bert-multi":
        from flair.embeddings import BertEmbeddings
        stacked_embedding = BertEmbeddings('bert-base-multilingual-uncased')
    elif stack == 'bpe':
        from flair.embeddings import BytePairEmbeddings
        stacked_embedding = BytePairEmbeddings('it')
    else:
        stacked_embedding = None

    # Define and Load corpus from the provided dataset
    train, dev, test = filenames
    corpus = ClassificationCorpus(
        file_path,
        train_file=train,
        dev_file=dev,
        test_file=test,
    )
    # Create label dictionary from provided labels in data
    label_dict = corpus.make_label_dictionary()

    # Stack Flair string-embeddings with optional embeddings
    word_embeddings = list(
        filter(None, [
            stacked_embedding,
            FlairEmbeddings('it-forward'),
            FlairEmbeddings('it-backward'),
        ]))
    # Initialize document embedding by passing list of word embeddings
    document_embeddings = DocumentRNNEmbeddings(
        word_embeddings,
        hidden_size=256,
        reproject_words=True,
        dropout=0.5,
        reproject_words_dimension=256,
    )

    #document_embeddings = DocumentPoolEmbeddings([
    #    stacked_embedding,
    #    FlairEmbeddings('it-forward'),
    #    FlairEmbeddings('it-backward')],pooling='mean')

    # Define classifier
    classifier = TextClassifier(document_embeddings,
                                label_dictionary=label_dict,
                                multi_label=True)

    if not checkpoint:
        trainer = ModelTrainer(classifier, corpus)
    else:
        # If checkpoint file is defined, resume training
        #checkpoint = classifier.load_checkpoint(Path(checkpoint))
        trainer = ModelTrainer.load_checkpoint(checkpoint, corpus)

    # Begin training (enable checkpointing to continue training at a later time, if desired)
    trainer.train(
        file_path,
        max_epochs=n_epochs,
        checkpoint=True,
    )

    # Plot curves and store weights and losses
    plotter = Plotter()
    plotter.plot_training_curves(file_path + '/loss.tsv')
    plotter.plot_weights(file_path + '/weights.txt')
Example #19
0
# 5. create the text classifier
classifier = TextClassifier(document_embeddings, label_dictionary=label_dict, multi_label=False)

# 6. initialize the text classifier trainer
trainer = ModelTrainer(classifier, corpus)

# 7. start the training
# Training aborted due to excessive size of documents. With each document limited to 5 sentences, training succesfully performed.
# But the main reason I tried this tool was to overcome maximum length imposed in BERT.
# So a workaround will not be helpful.
trainer.train(base_path=DATA_FOLDER,
              learning_rate=0.1,
              mini_batch_size=32,
              anneal_factor=0.5,
              patience=5,
              max_epochs=2,
              embeddings_in_memory=False,
              evaluation_metric=EvaluationMetric.MACRO_F1_SCORE) #ilk ornekte True idi. False yapinca da bir sey degismedi sorunu cozmede.

# 8. plot training curves (optional)
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_training_curves(data_folder + '/loss.tsv')
plotter.plot_weights(data_folder + '/weights.txt')

# Test model
test_data_folder = Path('/Users/buyukozb/git/berfu/thesis/data/all_data/india/flair_formatted/test')
test_sentences = NLPTaskDataFetcher.load_sentences_from_data(test_data_folder, max_seq_len=128)


]

# 4. init document embedding by passing list of word embeddings
document_embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings(
    word_embeddings,
    hidden_size=512,
    reproject_words=True,
    reproject_words_dimension=256,
)

# 5. create the text classifier
classifier = TextClassifier(document_embeddings,
                            label_dictionary=label_dict,
                            multi_label=True)

# 6. initialize the text classifier trainer
trainer = ModelTrainer(classifier, corpus)

# 7. start the training
trainer.train('C:/Users/jeanc/Documents/reviews/model',
              learning_rate=0.1,
              mini_batch_size=32,
              anneal_factor=0.5,
              patience=5,
              max_epochs=150)

# 8. plot training curves (optional)
plotter = Plotter()
plotter.plot_training_curves('C:/Users/jeanc/Documents/reviews/model/loss.tsv')
plotter.plot_weights('C:/Users/jeanc/Documents/reviews/model/weights.txt')
Example #21
0
def main(train_file):

    # 1. get the corpus
    # define columns
    columns = {0: 'text', 1: '', 2: '', 3: 'ner'}

    # this is the folder in which train, test and dev files reside
    data_folder = './eng_data_mini_onefile/'

    # retrieve corpus using column format, data folder and the names of the train, dev and test files
    corpus: TaggedCorpus = NLPTaskDataFetcher.load_column_corpus(
        data_folder,
        columns,
        train_file=train_file,
        test_file='eng.testb',
        dev_file='eng.testa')

    print(corpus)

    # 2. what tag do we want to predict?
    tag_type = 'ner'

    # 3. make the tag dictionary from the corpus
    tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
    print(tag_dictionary.idx2item)

    # 4. initialize embeddings
    embedding_types: List[TokenEmbeddings] = [
        WordEmbeddings('glove'),

        # comment in this line to use character embeddings
        # CharacterEmbeddings(),

        # comment in these lines to use flair embeddings
        # FlairEmbeddings('news-forward'),
        # FlairEmbeddings('news-backward'),
    ]

    embeddings: StackedEmbeddings = StackedEmbeddings(
        embeddings=embedding_types)

    # 5. initialize sequence tagger
    from flair.models import SequenceTagger

    tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                            embeddings=embeddings,
                                            tag_dictionary=tag_dictionary,
                                            tag_type=tag_type,
                                            use_crf=True)

    # 6. initialize trainer
    from flair.trainers import ModelTrainer

    trainer: ModelTrainer = ModelTrainer(tagger, corpus)

    # 7. start training
    trainer.train('resources/taggers/example-ner',
                  learning_rate=0.1,
                  mini_batch_size=32,
                  max_epochs=150)

    # 8. plot training curves (optional)
    from flair.visual.training_curves import Plotter
    plotter = Plotter()
    plotter.plot_training_curves('resources/taggers/example-ner/loss.tsv')
    plotter.plot_weights('resources/taggers/example-ner/weights.txt')
Example #22
0
class SequenceTaggerEvaluation():
    def __init__(self, path: Union[Path, str], model: str = 'final-model.pt'):
        if type(path) == str:
            path = Path(path)
        assert path.exists()

        self.path = path
        self.model = SequenceTagger.load(path / model)
        self.cv_results = {}
        for file in ['summary', 'details']:
            try:
                self.cv_results[file] = pickle.load(
                    (path / (file + '.pkl')).open(mode='rb'))
            except FileNotFoundError:
                print(
                    f"{file+'.pkl'} not found. Setting cv_results['{file}'] to None"
                )

        self.plotter = Plotter()

    def result_tables(self, save_as_html: bool = True):
        html_0 = self.cv_results['summary'].to_frame('value').to_html()
        html_1 = self.cv_results['details'].to_html()
        display(HTML(html_0))
        print('\n')
        display(HTML(html_1))

        if save_as_html:
            (self.path / 'summary.html').write_text(html_0)
            (self.path / 'details.html').write_text(html_1)

    def plot_tag_stats(self, mode: str, savefig: bool = False, **kwargs):
        """
        mode
        tp-fn: stacked barplot - true-positives and false-negatives
        tp-fp: bar plot - true-positives and false-positives
        """
        details = self.cv_results['details']

        if mode == 'tp_fn':
            details[['true-positive', 'false-negative']].plot.bar(stacked=True,
                                                                  **kwargs)
        elif mode == 'tp_fp':
            details[['true-positive',
                     'false-positive']].plot.bar(stacked=False, **kwargs)
        else:
            details[mode.split('_')].plot.bar(stacked=False, **kwargs)

        plt.gca().yaxis.grid(True, linestyle='--')
        plt.tight_layout()
        if savefig:
            plt.savefig(self.path / (mode + '.png'))

    def confusion_matrix(self, ):
        # confusion matrix tags
        pass

    def predict(self,
                sentences: Union[str, Sentence, List[Sentence], List[str]],
                display_html: bool = True,
                html_file: str = None,
                display_str: bool = False,
                **kwargs):
        if type(sentences) == Sentence:
            sentences = [sentences]
        elif type(sentences) == str:
            sentences = split_single(sentences)

        if type(sentences[0]) == str:
            sentences = [Sentence(s, use_tokenizer=True) for s in sentences]

        self.model.predict(sentences)

        if display_html or html_file:
            html = render_ner_html(sentences, **kwargs)
            if display_html:
                display(HTML(html))
            if html_file:
                (self.path / html_file).write_text(html)
        if display_str:
            for sentence in sentences:
                print(sentence.to_tagged_string())

    def plot_training_curves(self, plot_values: List[str] = ["loss", "F1"]):
        self.plotter.plot_training_curves(self.path / 'loss.tsv', plot_values)

    def plot_weights(self):
        self.plotter.plot_weights(self.path / 'weights.txt')

    def plot_learning_rate(self, skip_first: int = 10, skip_last: int = 5):
        self.plotter.plot_learning_rate(self.path / 'loss.tsv', skip_first,
                                        skip_last)

    @staticmethod
    def _preprocess(text, mode=None):
        '''helper function to preprocess text. returns List of Sentences'''
        sentences = split_single(text)
        if mode:
            nlp = spacy.load('de_core_news_sm')
            if mode == 'lemmatize':
                sentences = [
                    Sentence((' ').join([token.lemma_ for token in nlp(s)]))
                    for s in sentences
                ]
            elif mode == 'stem':
                stemmer = GermanStemmer()
                sentences = [
                    Sentence((' ').join(
                        [stemmer.stem(token.text) for token in nlp(s)]))
                    for s in sentences
                ]
        else:
            sentences = [Sentence(s, use_tokenizer=True) for s in sentences]

        return sentences
Example #23
0
# Just replace the names of model and print the details of training and weights

from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_training_curves('FLAIR/resources/taggers/flairpos1/loss.tsv')
plotter.plot_weights('FLAIR/resources/taggers/flairpos1/weights.txt')
Example #24
0
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

#5. initialize sequence tagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                        embeddings=embeddings,
                                        tag_dictionary=tag_dictionary,
                                        rnn_layers=2,
                                        tag_type=tag_type,
                                        use_crf=True)

# 6. initialize trainer
from flair.trainers import ModelTrainer

trainer: ModelTrainer = ModelTrainer(tagger, corpus)

# 7. start training
trainer.train('resources/taggers/usDL2',
              learning_rate=0.01,
              embeddings_in_memory=False,
              mini_batch_size=32,
              max_epochs=150,
              checkpoint=True)

# 8. plot training curves (optional)
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_training_curves('resources/taggers/nerpan/loss.tsv')
plotter.plot_weights('resources/taggers/nerpan/weights.txt')
Example #25
0
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# initialize sequence tagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(
    hidden_size=nb_cells,
    embeddings=embeddings,
    tag_dictionary=tag_dictionary,
    tag_type=tag_type,
    use_crf=True,
)

# initialize trainer
from flair.trainers import ModelTrainer

trainer: ModelTrainer = ModelTrainer(tagger, corpus)

trainer.train(
    "resources/taggers/" + exp_name,
    learning_rate=0.1,
    embeddings_storage_mode="cpu",
    mini_batch_size=32,
    max_epochs=150,
    shuffle=False,
)

plotter = Plotter()
plotter.plot_training_curves("resources/taggers/" + exp_name + "/loss.tsv")
plotter.plot_weights("resources/taggers/" + exp_name + "/weights.txt")
Example #26
0
def main():

    datasets = os.listdir("./datasets")
    print(datasets)
    language = "fr"

    nb_cells = 32
    dataset = "DESFOSSE_ARRAY"

    exp_name = dataset + "_" + str(nb_cells)
    # 1. get the corpus
    columns = {0: 'text', 1: 'position', 2: "array", 3: "line", 4: "col"}

    # this is the folder in which train, test and dev files reside
    data_folder = './datasets/' + dataset

    # init a corpus using column format, data folder and the names of the train, dev and test files
    corpus: Corpus = ColumnCorpus(data_folder,
                                  columns,
                                  train_file="train_" + dataset + '.txt',
                                  test_file="test_" + dataset + '.txt',
                                  dev_file="valid_" + dataset + '.txt')

    print(corpus)

    # 2. what tag do we want to predict?
    tag_type = "col"

    exp_name = dataset + "_" + tag_type

    # 3. make the tag dictionary from the corpus
    tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
    print(tag_dictionary.idx2item)

    # initialize embeddings
    embedding_types: List[TokenEmbeddings] = []
    embedding_types.append(FlairEmbeddings(language + '-forward'))
    embedding_types.append(FlairEmbeddings(language + '-backward'))
    embedding_types.append(FloatsEmbeddings(field='position', length=4))

    embeddings: StackedEmbeddings = StackedEmbeddings(
        embeddings=embedding_types)

    # initialize sequence tagger
    from flair.models import SequenceTagger

    tagger: SequenceTagger = SequenceTagger(
        hidden_size=nb_cells,
        embeddings=embeddings,
        tag_dictionary=tag_dictionary,
        tag_type=tag_type,
        use_crf=True,
    )

    # initialize trainer
    from flair.trainers import ModelTrainer

    trainer: ModelTrainer = ModelTrainer(tagger, corpus)

    trainer.train(
        "resources/taggers/" + exp_name,
        learning_rate=0.1,
        embeddings_storage_mode="cpu",
        mini_batch_size=32,
        max_epochs=150,
        shuffle=False,
    )

    plotter = Plotter()
    plotter.plot_training_curves("resources/taggers/" + exp_name + "/loss.tsv")
    plotter.plot_weights("resources/taggers/" + exp_name + "/weights.txt")

    predict_tagger(setId, nb_cells, rubric, rubric)
Example #27
0
from flair.visual.training_curves import Plotter

plotter = Plotter()
# plotter.plot_weights('flair_outputs_glove/weights.txt')
# plotter.plot_training_curves('flair_outputs_glove/loss.tsv')
# plotter.plot_learning_rate('flair_outputs_glove/loss.tsv')

plotter.plot_weights("flair_outputs_fastText/weights.txt")
plotter.plot_training_curves("flair_outputs_fastText/loss.tsv")
plotter.plot_learning_rate("flair_outputs_fastText/loss.tsv")
Example #28
0
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# initialize sequence tagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                        embeddings=embeddings,
                                        tag_dictionaries=tag_dictionaries,
                                        tag_types=tag_types,
                                        use_crf=True)

# initialize trainer
from flair.trainers import ModelTrainer

trainer: ModelTrainer = ModelTrainer(tagger, corpus)
# trainer: ModelTrainer = ModelTrainer(tagger, corpus, optimizer=Adam)

trainer.train('resources/taggers/famulus_eda_test_n_bert_long2',
              EvaluationMetric.MICRO_F1_SCORE,
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=1000,
              test_mode=True)
# trainer.train('resources/taggers/famulus_test', EvaluationMetric.MICRO_F1_SCORE, learning_rate=0.001, mini_batch_size=32,
#               max_epochs=100, test_mode=True)

plotter = Plotter()
plotter.plot_training_curves(
    'resources/taggers/famulus_eda_test_n_bert_long2/loss.tsv')
plotter.plot_weights(
    'resources/taggers/famulus_eda_test_n_bert_long2/weights.txt')
from flair.visual.training_curves import Plotter

clf_dir = 'resources/binary_unbiased_031219/'
plotter = Plotter()
plotter.plot_training_curves('./resources/loss.tsv')
plotter.plot_weights(clf_dir + 'weights.txt')
Example #30
0
def test_plotting_training_curves_and_weights(resources_path):
    plotter = Plotter()
    plotter.plot_training_curves((resources_path / u'visual/loss.tsv'))
    plotter.plot_weights((resources_path / u'visual/weights.txt'))
    (resources_path / u'visual/weights.png').unlink()
    (resources_path / u'visual/training.png').unlink()