"/mnt/clef-hipe-parser-master/transformers/examples/token-classification/german-large-2",
        layers="all",
        use_scalar_mix=True)
]

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# initialize sequence tagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(
    hidden_size=256,
    embeddings=embeddings,
    tag_dictionary=tag_dictionary,
    tag_type=tag_type,
    use_crf=True,
)

# initialize trainer
from flair.trainers import ModelTrainer

trainer: ModelTrainer = ModelTrainer(model=tagger,
                                     corpus=corpus,
                                     use_tensorboard=True)

trainer.train("resources/taggers/baseline-de-stacked-we-bert-with-dev-3",
              mini_batch_size=16,
              patience=5,
              max_epochs=200,
              train_with_dev=True)
Esempio n. 2
0
    FlairEmbeddings('news-forward'),
    FlairEmbeddings('news-backward'),
]

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# 5. initialize sequence tagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                        embeddings=embeddings,
                                        tag_dictionary=tag_dictionary,
                                        tag_type=tag_type,
                                        use_crf=True)

# 6. initialize trainer
from flair.trainers import ModelTrainer

trainer: ModelTrainer = ModelTrainer(tagger, corpus)

# 7. start training
trainer.train('resources/taggers/example-ner',
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=150)

# 8. plot training curves (optional)
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_training_curves('resources/taggers/example-ner/loss.tsv')
plotter.plot_weights('resources/taggers/example-ner/weights.txt')
from flair.embeddings import WordEmbeddings, FlairEmbeddings, DocumentLSTMEmbeddings
from flair.models import TextClassifier
from flair.trainers import ModelTrainer
from pathlib import Path
from torch.optim.adam import Adam
from flair.embeddings import ELMoEmbeddings

corpus = NLPTaskDataFetcher.load_classification_corpus(Path('./'),
                                                       test_file='test.csv',
                                                       dev_file='dev.csv',
                                                       train_file='train.csv')
word_embeddings = [
    WordEmbeddings('glove'),
    FlairEmbeddings('news-forward-fast'),
    FlairEmbeddings('news-backward-fast')
]
#word_embeddings = [BertEmbeddings('bert-base-uncased')]
document_embeddings = DocumentLSTMEmbeddings(word_embeddings,
                                             hidden_size=512,
                                             bidirectional=True,
                                             reproject_words=True,
                                             reproject_words_dimension=256)
classifier = TextClassifier(document_embeddings,
                            label_dictionary=corpus.make_label_dictionary(),
                            multi_label=False)
trainer = ModelTrainer(classifier, corpus, optimizer=Adam)
trainer.train('./',
              learning_rate=0.001,
              mini_batch_size=6,
              embeddings_in_memory=False,
              max_epochs=150)
        if tag_no_pref_encoded in tag_dictionary_no_prefix.idx2item and tag_countdown[
                tag_dictionary_no_prefix.item2idx[tag_no_pref_encoded]] > 0:
            corpus_sents.append(sent)
            tag_countdown[
                tag_dictionary_no_prefix.item2idx[tag_no_pref_encoded]] -= 1
            sent_picked = True

print("sents for training: " + str(len(corpus_sents)))
print("amount of items in dict: " + str(len(tag_dictionary.item2idx)))

training_dataset = SentenceDataset(corpus_sents)
training_corpus = Corpus(train=training_dataset,
                         dev=corpus_small.dev,
                         test=corpus_small.test,
                         sample_missing_splits=False)
trainer = ModelTrainer(tagger, training_corpus, optimizer=torch.optim.AdamW)
tag_dictionary = training_corpus.make_label_dictionary(tag_type)
tagger.add_and_switch_to_new_task("fewshot-conll3-simple-to-moviecomplex",
                                  tag_dictionary=tag_dictionary,
                                  tag_type=tag_type)
trainer.train(
    base_path='resources/v3/fewshot-conll_3-simple-to-moviecomplex-k' + str(k),
    learning_rate=5.0e-5,
    mini_batch_size=32,
    mini_batch_chunk_size=None,
    max_epochs=10,
    weight_decay=0.,
    embeddings_storage_mode="none",
    scheduler=OneCycleLR,
)
Esempio n. 5
0
from flair.embeddings import DocumentRNNEmbeddings

document_embeddings = DocumentRNNEmbeddings(word_embeddings,
                                            hidden_size=512,
                                            reproject_words=True,
                                            reproject_words_dimension=256)

# Create model
from flair.models import TextClassifier

classifier = TextClassifier(document_embeddings, label_dictionary=label_dict)

# Create model trainer
from flair.trainers import ModelTrainer

trainer = ModelTrainer(classifier, corpus)

# Train the model
trainer.train('model-saves',
              learning_rate=0.1,
              mini_batch_size=32,
              anneal_factor=0.5,
              patience=8,
              max_epochs=200)

# Load the model and make predictions
from flair.data import Sentence

classifier = TextClassifier.load('model-saves/final-model.pt')

pos_sentence = Sentence(preprocess('I love Python!'))
Esempio n. 6
0
    def _objective(self, params: dict):
        log_line(log)
        log.info(f"Evaluation run: {self.run}")
        log.info(f"Evaluating parameter combination:")
        for k, v in params.items():
            if isinstance(v, Tuple):
                v = ",".join([str(x) for x in v])
            log.info(f"\t{k}: {str(v)}")
        log_line(log)

        for sent in self.corpus.get_all_sentences():
            sent.clear_embeddings()

        scores = []
        vars = []

        for i in range(0, self.training_runs):
            log_line(log)
            log.info(f"Training run: {i + 1}")

            model = self._set_up_model(params)

            training_params = {
                key: params[key] for key in params if key in TRAINING_PARAMETERS
            }
            model_trainer_parameters = {
                key: params[key] for key in params if key in MODEL_TRAINER_PARAMETERS
            }

            trainer: ModelTrainer = ModelTrainer(
                model, self.corpus, **model_trainer_parameters
            )

            result = trainer.train(
                self.base_path,
                max_epochs=self.max_epochs,
                param_selection_mode=True,
                **training_params,
            )

            # take the average over the last three scores of training
            if self.optimization_value == OptimizationValue.DEV_LOSS:
                curr_scores = result["dev_loss_history"][-3:]
            else:
                curr_scores = list(
                    map(lambda s: 1 - s, result["dev_score_history"][-3:])
                )

            score = sum(curr_scores) / float(len(curr_scores))
            var = np.var(curr_scores)
            scores.append(score)
            vars.append(var)

        # take average over the scores from the different training runs
        final_score = sum(scores) / float(len(scores))
        final_var = sum(vars) / float(len(vars))

        test_score = result["test_score"]
        log_line(log)
        log.info(f"Done evaluating parameter combination:")
        for k, v in params.items():
            if isinstance(v, Tuple):
                v = ",".join([str(x) for x in v])
            log.info(f"\t{k}: {v}")
        log.info(f"{self.optimization_value.value}: {final_score}")
        log.info(f"variance: {final_var}")
        log.info(f"test_score: {test_score}\n")
        log_line(log)

        with open(self.param_selection_file, "a") as f:
            f.write(f"evaluation run {self.run}\n")
            for k, v in params.items():
                if isinstance(v, Tuple):
                    v = ",".join([str(x) for x in v])
                f.write(f"\t{k}: {str(v)}\n")
            f.write(f"{self.optimization_value.value}: {final_score}\n")
            f.write(f"variance: {final_var}\n")
            f.write(f"test_score: {test_score}\n")
            f.write("-" * 100 + "\n")

        self.run += 1

        return {"status": "ok", "loss": final_score, "loss_variance": final_var}
Esempio n. 7
0
            # elif GLOVE_CHARS is True:
            #     embeddings = [WordEmbeddings('../../../../Data/Models/Chars/lemma_lowercased_estenten11_freeling_v4_virt.gensim.vec'),
            #                   WordEmbeddings('../../../../Data/Models/Glove/glove-sbwc_spanish.i25.gensim.vec')]
            #     document_embeddings = DocumentPoolEmbeddings(embeddings, pooling='max')
            #     prefix_model_output_dir = "glove_chars_maxpool"
            #
            # elif GLOVE_BPE is True:
            #     embeddings = [BytePairEmbeddings(language='es'),
            #                   WordEmbeddings('../../../../Data/Models/Glove/glove-sbwc_spanish.i25.gensim.vec')]
            #     document_embeddings = DocumentPoolEmbeddings(embeddings, pooling='max')
            #     prefix_model_output_dir = "glove_chars_maxpool"

            if bTestPhase is False:
                classifier = TextClassifier(document_embeddings, label_dictionary=corpus.make_label_dictionary(),
                                            multi_label=False)
                trainer = ModelTrainer(classifier, corpus)
                trainer.train('./' + prefix_model_output_dir + '_' + sLang + prefix + '/',
                              learning_rate=cmd_args.lr,
                              mini_batch_size=16,
                              anneal_factor=0.5,
                              patience=1,
                              evaluation_metric=EvaluationMetric.MICRO_F1_SCORE,
                              max_epochs=cmd_args.iters)

                plotter = Plotter()
                plotter.plot_training_curves('./' + prefix_model_output_dir + '_' + sLang + prefix + '/loss.tsv')
                plotter.plot_weights('./' + prefix_model_output_dir + '_' + sLang + prefix + '/weights.txt')

                # 7. find learning rate
                learning_rate_tsv = trainer.find_learning_rate('./' + prefix_model_output_dir + '_' + sLang + prefix + '/learning_rate.tsv')
Esempio n. 8
0
    # initialize sequence tagger

    tagger: SequenceTagger = SequenceTagger(
        hidden_size=args.hidden_size,
        embeddings=embeddings,
        tag_dictionary=tag_dictionary,
        tag_type=tag_type,
        use_crf=args.crf,
        rnn_layers=args.rnn,
        train_initial_hidden_state=args.train_initial_hidden_state,
        loss_weights={'0': 10.})

# initialize trainer
from flair.trainers import ModelTrainer

trainer: ModelTrainer = ModelTrainer(tagger, corpus, use_tensorboard=False)

# 7. start training
trainer.train(args.output_folder,
              learning_rate=args.learning_rate,
              mini_batch_size=args.mini_batch_size,
              mini_batch_chunk_size=args.mini_batch_chunk_size,
              max_epochs=args.max_epochs,
              min_learning_rate=1e-6,
              shuffle=True,
              anneal_factor=0.5,
              patience=args.patience,
              num_workers=args.num_workers,
              embeddings_storage_mode=args.embeddings_storage_mode,
              monitor_test=True,
              monitor_train=args.monitor_train,
class SequenceClassifierTrainer:
    """Sequence Classifier Trainer

    Usage:

    ```python
    >>> sc_trainer = SequenceClassifierTrainer(corpus="/Path/to/data/dir")
    ```

    **Parameters:**

    * **corpus** - A flair corpus data model or `Path`/string to a directory with train.csv/test.csv/dev.csv
    * **encoder** - A `EasyDocumentEmbeddings` object if training with a flair prediction head or `Path`/string if training with Transformer's prediction models
    * **column_name_map** - Required if corpus is not a `Corpus` object, it's a dictionary specifying the indices of the text and label columns of the csv i.e. {1:"text",2:"label"}
    * **corpus_in_memory** - Boolean for whether to store corpus embeddings in memory
    * **predictive_head** - For now either "flair" or "transformers" for the prediction head
    * ****kwargs** - Keyword arguments for Flair's `TextClassifier` model class
    """
    def __init__(
        self,
        corpus: Union[Corpus, Path, str],
        encoder: Union[EasyDocumentEmbeddings, Path, str],
        column_name_map: None,
        corpus_in_memory: bool = True,
        predictive_head: str = "flair",
        **kwargs,
    ):
        if isinstance(corpus, Corpus):
            self.corpus = corpus
        else:
            if isinstance(corpus, str):
                corpus = Path(corpus)
            if not column_name_map:
                raise ValueError(
                    "If not instantiating with `Corpus` object, must pass in `column_name_map` argument to specify text/label indices"
                )
            self.corpus = CSVClassificationCorpus(
                corpus,
                column_name_map,
                skip_header=True,
                delimiter=",",
                in_memory=corpus_in_memory,
            )

        # Verify predictive head is within available heads
        self.available_predictive_head = ["flair", "transformers"]
        if predictive_head not in self.available_predictive_head:
            raise ValueError(
                f"predictive_head param must be one of the following: {self.available_predictive_head}"
            )
        self.predictive_head = predictive_head

        # Verify correct corresponding encoder is used with predictive head (This can be structured with better design in the future)
        if isinstance(encoder, EasyDocumentEmbeddings):
            if predictive_head == "transformers":
                raise ValueError(
                    "If using `transformers` predictive head, pass in the path to the transformer's model"
                )
            else:
                self.encoder = encoder
        else:
            if isinstance(encoder, str):
                encoder = Path(encoder)
            self.encoder = encoder

        # Create the label dictionary on init (store to keep from constantly generating label_dict) should we use dev/test set instead assuming all labels are provided?
        self.label_dict = self.corpus.make_label_dictionary()

        # Save trainer kwargs dict for reinitializations
        self.trainer_kwargs = kwargs

        # Load trainer with initial setup
        self._initial_setup(self.label_dict, **kwargs)

    def _initial_setup(self, label_dict: Dict, **kwargs):
        if self.predictive_head == "flair":

            # Get Document embeddings from `embeddings`
            document_embeddings: DocumentRNNEmbeddings = self.encoder.rnn_embeddings

            # Create the text classifier
            classifier = TextClassifier(
                document_embeddings,
                label_dictionary=label_dict,
                **kwargs,
            )

            # Initialize the text classifier trainer
            self.trainer = ModelTrainer(classifier, self.corpus)

        # TODO: In internal transformers package, create ****ForSequenceClassification adaptations
        elif self.predictive_head == "transformers":
            with open(self.encoder / "config.json") as config_f:
                configs = json.load(config_f)
                model_name = configs["architectures"][-1]
            if model_name == "BertForMaskedLM":
                pass

    def train(
        self,
        output_dir: Union[Path, str],
        learning_rate: float = 0.07,
        mini_batch_size: int = 32,
        anneal_factor: float = 0.5,
        patience: int = 5,
        max_epochs: int = 150,
        plot_weights: bool = False,
        **kwargs,
    ) -> None:
        """
        Train the Sequence Classifier

        * **output_dir** - The output directory where the model predictions and checkpoints will be written.
        * **learning_rate** - The initial learning rate
        * **mini_batch_size** - Batch size for the dataloader
        * **anneal_factor** - The factor by which the learning rate is annealed
        * **patience** - Patience is the number of epochs with no improvement the Trainer waits until annealing the learning rate
        * **max_epochs** - Maximum number of epochs to train. Terminates training if this number is surpassed.
        * **plot_weights** - Bool to plot weights or not
        * **kwargs** - Keyword arguments for the rest of Flair's `Trainer.train()` hyperparameters
        """
        if isinstance(output_dir, str):
            output_dir = Path(output_dir)

        # Start the training
        self.trainer.train(
            output_dir,
            learning_rate=learning_rate,
            mini_batch_size=mini_batch_size,
            anneal_factor=anneal_factor,
            patience=patience,
            max_epochs=max_epochs,
            **kwargs,
        )

        # Plot weight traces
        if plot_weights:
            plotter = Plotter()
            plotter.plot_weights(output_dir / "weights.txt")

    def find_learning_rate(
        self,
        output_dir: Union[Path, str],
        file_name: str = "learning_rate.tsv",
        start_learning_rate: float = 1e-8,
        end_learning_rate: float = 10,
        iterations: int = 100,
        mini_batch_size: int = 32,
        stop_early: bool = True,
        smoothing_factor: float = 0.7,
        plot_learning_rate: bool = True,
        **kwargs,
    ) -> float:
        """
        Uses Leslie's cyclical learning rate finding method to generate and save the loss x learning rate plot

        This method returns a suggested learning rate using the static method `LMFineTuner.suggest_learning_rate()`
        which is implicitly run in this method.

        * **output_dir** - Path to dir for learning rate file to be saved
        * **file_name** - Name of learning rate .tsv file
        * **start_learning_rate** - Initial learning rate to start cyclical learning rate finder method
        * **end_learning_rate** - End learning rate to stop exponential increase of the learning rate
        * **iterations** - Number of optimizer iterations for the ExpAnnealLR scheduler
        * **mini_batch_size** - Batch size for dataloader
        * **stop_early** - Bool for stopping early once loss diverges
        * **smoothing_factor** - Smoothing factor on moving average of losses
        * **adam_epsilon** - Epsilon for Adam optimizer.
        * **weight_decay** - Weight decay if we apply some.
        * **kwargs** - Additional keyword arguments for the Adam optimizer
        **return** - Learning rate as a float
        """
        # 7. find learning rate
        learning_rate_tsv = self.trainer.find_learning_rate(
            base_path=output_dir,
            file_name=file_name,
            start_learning_rate=start_learning_rate,
            end_learning_rate=end_learning_rate,
            iterations=iterations,
            mini_batch_size=mini_batch_size,
            stop_early=stop_early,
            smoothing_factor=smoothing_factor,
        )

        # Reinitialize optimizer and parameters by reinitializing trainer
        self._initial_setup(self.label_dict, **self.trainer_kwargs)

        if plot_learning_rate:
            plotter = Plotter()
            plotter.plot_learning_rate(learning_rate_tsv)

        # Use the automated learning rate finder
        with open(learning_rate_tsv) as lr_f:
            lr_tsv = list(csv.reader(lr_f, delimiter="\t"))
        losses = np.array([float(row[-1]) for row in lr_tsv[1:]])
        lrs = np.array([float(row[-2]) for row in lr_tsv[1:]])
        lr_to_use = self.suggested_learning_rate(losses, lrs, **kwargs)
        print(f"Recommended Learning Rate {lr_to_use}")
        return lr_to_use

    @staticmethod
    def suggested_learning_rate(
        losses: np.array,
        lrs: np.array,
        lr_diff: int = 15,
        loss_threshold: float = 0.2,
        adjust_value: float = 1,
    ) -> float:
        # This seems redundant unless we can make this configured for each trainer/finetuner
        """
        Attempts to find the optimal learning rate using a interval slide rule approach with the cyclical learning rate method

        * **losses** - Numpy array of losses
        * **lrs** - Numpy array of exponentially increasing learning rates (must match dim of `losses`)
        * **lr_diff** - Learning rate Interval of slide ruler
        * **loss_threshold** - Threshold of loss difference on interval where the sliding stops
        * **adjust_value** - Coefficient for adjustment
        **return** - the optimal learning rate as a float
        """
        # Get loss values and their corresponding gradients, and get lr values
        assert lr_diff < len(losses)
        loss_grad = np.gradient(losses)

        # Search for index in gradients where loss is lowest before the loss spike
        # Initialize right and left idx using the lr_diff as a spacing unit
        # Set the local min lr as -1 to signify if threshold is too low
        r_idx = -1
        l_idx = r_idx - lr_diff
        local_min_lr = lrs[l_idx]
        while (l_idx >= -len(losses)) and (
                abs(loss_grad[r_idx] - loss_grad[l_idx]) > loss_threshold):
            local_min_lr = lrs[l_idx]
            r_idx -= 1
            l_idx -= 1

        lr_to_use = local_min_lr * adjust_value

        return lr_to_use
Esempio n. 10
0
        skip_header=True,
        delimiter='\t',  # tab-separated files
    )
    print(corpus)

    # 2. create the label dictionary
    label_dict = corpus.make_label_dictionary()
    class_weights = utils.get_inverted_class_balance(corpus.train.dataset)

    # 3. initialize transformer document embeddings (many models are available)
    document_embeddings = TransformerDocumentEmbeddings(
        'allenai/scibert_scivocab_uncased', fine_tune=True)

    # 4. create the text classifier
    classifier = TextClassifier(document_embeddings,
                                label_dictionary=label_dict,
                                loss_weights=class_weights)

    # 5. initialize the text classifier trainer with Adam optimizer
    trainer = ModelTrainer(classifier, corpus, optimizer=Adam)

    # 6. start the training
    trainer.train(
        sys.argv[2],
        learning_rate=3e-5,  # use very small learning rate
        mini_batch_size=16,
        mini_batch_chunk_size=
        4,  # optionally set this if transformer is too much for your machine
        max_epochs=5,  # terminate after 5 epochs
    )
Esempio n. 11
0
def train_tagger(options):
    # Define columns
    columns = {1: 'text', 2: 'pos', 3: 'ner'}

    # What tag should be predicted?
    tag_type = 'ner'

    # Folder in which train, test and dev files reside
    data_folder = options.iob_dir + '/' + options.correction_mode

    # Folder in which to save tagging model and additional information
    tagger_folder = '/'.join([
        options.tagger_dir, options.ner_cycle, options.lm_domain,
        options.correction_mode
    ]) + '-stringemb'

    # Retrieve corpus using column format, data folder and the names of the train, dev and test files
    corpus: TaggedCorpus = NLPTaskDataFetcher.load_column_corpus(
        data_folder,
        columns,
        train_file='train.txt',
        test_file='test.txt',
        dev_file='dev.txt')

    # Make the tag dictionary from the corpus
    tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)

    # Initialize embeddings
    char_embeddings = [
        FlairEmbeddings(options.lm_dir + options.lm_domain + '-fw/best-lm.pt',
                        use_cache=False),
        FlairEmbeddings(options.lm_dir + options.lm_domain + '-bw/best-lm.pt',
                        use_cache=False)
    ]

    if not options.use_wiki_wordemb:
        if not options.use_press_wordemb:
            embedding_types: List[TokenEmbeddings] = char_embeddings
        else:
            embedding_types: List[TokenEmbeddings] = [
                WordEmbeddings(
                    'resources.d/embeddings/fasttext/pressfr-wikifr')
            ] + char_embeddings
            tagger_folder = tagger_folder + '-wordemb-pr'
    else:
        embedding_types: List[TokenEmbeddings] = [WordEmbeddings('fr')
                                                  ] + char_embeddings
        tagger_folder = tagger_folder + '-wordemb'

    if options.use_crf:
        tagger_folder = tagger_folder + '-crf'

    # Print information
    print(tagger_folder)
    print(corpus)
    print(tag_dictionary.idx2item)

    embeddings: StackedEmbeddings = StackedEmbeddings(
        embeddings=embedding_types)

    # Initialize sequence tagger
    tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                            embeddings=embeddings,
                                            tag_dictionary=tag_dictionary,
                                            tag_type=tag_type,
                                            use_crf=options.use_crf)

    # Initialize trainer
    trainer: ModelTrainer = ModelTrainer(tagger, corpus)

    # Start training
    trainer.train(
        tagger_folder,
        learning_rate=0.1,
        mini_batch_size=32,
        max_epochs=50,
        patience=options.train_patience,
        #train_with_dev=True,
        anneal_against_train_loss=False,
        embeddings_in_memory=False)

    # Plot training curves (optional)
    plotter = Plotter()
    plotter.plot_training_curves(tagger_folder + '/loss.tsv')
    plotter.plot_weights(tagger_folder + '/weights.txt')
Esempio n. 12
0
def create_trainer(tagger, corpus, optimizer=SGD):
    trainer: ModelTrainer = ModelTrainer(tagger, corpus, optimizer=optimizer)
    return trainer
Esempio n. 13
0
def main(args):
    logger.info('Args = {}'.format(args))
    corpus = CorpusLoader().load_corpus(CORPUS_PATH[args.corpus])
    tokenizer = TokenizerFactory().tokenizer(args.corpus)

    logger.info('Loaded corpus: {}'.format(corpus))
    model_dir = train_utils.model_dir(corpus.name, args.run_id)
    os.makedirs(model_dir, exist_ok=True)

    logger.info('Get sentences...')
    train_sents, train_docs = flair_utils.standoff_to_flair_sents(corpus.train,
                                                                  tokenizer,
                                                                  verbose=True)
    dev_sents, dev_docs = flair_utils.standoff_to_flair_sents(corpus.dev,
                                                              tokenizer,
                                                              verbose=True)
    test_sents, test_docs = flair_utils.standoff_to_flair_sents(corpus.test,
                                                                tokenizer,
                                                                verbose=True)

    flair_corpus = flair_utils.FilteredCorpus(train=train_sents,
                                              dev=dev_sents,
                                              test=test_sents,
                                              ignore_sentence=_ignore_sentence)
    logger.info(flair_corpus)

    if not args.model_file:
        logger.info('Train model...')
        tagger = get_model(
            flair_corpus,
            corpus_name=args.corpus,
            pooled_contextual_embeddings=args.pooled_contextual_embeddings,
            contextual_forward_path=args.contextual_forward_path,
            contextual_backward_path=args.contextual_backward_path)

        trainer = ModelTrainer(tagger, flair_corpus)
        trainer.train(join(model_dir, 'flair'),
                      max_epochs=150,
                      monitor_train=False,
                      train_with_dev=args.train_with_dev)

        if not args.train_with_dev:
            # Model performance is judged by dev data, so we also pick the best performing model
            # according to the dev score to make our final predictions.
            tagger = SequenceTagger.load(
                join(model_dir, 'flair', 'best-model.pt'))
        else:
            # Training is stopped if train loss converges - here, we do not have a "best model" and
            # use the final model to make predictions.
            pass
    else:
        logger.info('Load existing model from {}'.format(args.model_file))
        tagger = SequenceTagger.load(args.model_file)

    logger.info('Make predictions...')
    make_predictions(tagger, flair_corpus)

    train_utils.save_predictions(
        corpus_name=corpus.name,
        run_id=args.run_id,
        train=flair_utils.flair_sents_to_standoff(train_sents, train_docs),
        dev=flair_utils.flair_sents_to_standoff(dev_sents, dev_docs),
        test=flair_utils.flair_sents_to_standoff(test_sents, test_docs))