Esempio n. 1
0
    def train(
        self,
        base_path: str,
        learning_rate: float = 0.1,
        mini_batch_size: int = 32,
        max_epochs: int = 100,
        anneal_factor: float = 0.5,
        patience: int = 4,
        train_with_dev: bool = False,
        embeddings_in_memory: bool = True,
        checkpoint: bool = False,
        save_final_model: bool = True,
        anneal_with_restarts: bool = False,
    ):

        evaluation_method = 'F1'
        if self.model.tag_type in ['pos', 'upos']:
            evaluation_method = 'accuracy'
        log.info('Evaluation method: {}'.format(evaluation_method))

        loss_txt = init_output_file(base_path, 'loss.tsv')
        with open(loss_txt, 'a') as f:
            f.write(
                'EPOCH\tTIMESTAMP\tTRAIN_LOSS\t{}\tDEV_LOSS\t{}\tTEST_LOSS\t{}\n'
                .format(Metric.tsv_header('TRAIN'), Metric.tsv_header('DEV'),
                        Metric.tsv_header('TEST')))

        weight_extractor = WeightExtractor(base_path)

        optimizer = torch.optim.SGD(self.model.parameters(), lr=learning_rate)

        # annealing scheduler
        anneal_mode = 'min' if train_with_dev else 'max'
        scheduler = ReduceLROnPlateau(optimizer,
                                      factor=anneal_factor,
                                      patience=patience,
                                      mode=anneal_mode,
                                      verbose=True)

        train_data = self.corpus.train

        # if training also uses dev data, include in training set
        if train_with_dev:
            train_data.extend(self.corpus.dev)

        # At any point you can hit Ctrl + C to break out of training early.
        try:

            previous_learning_rate = learning_rate

            for epoch in range(0, max_epochs):
                log.info('-' * 100)

                bad_epochs = scheduler.num_bad_epochs
                for group in optimizer.param_groups:
                    learning_rate = group['lr']

                # reload last best model if annealing with restarts is enabled
                if learning_rate != previous_learning_rate and anneal_with_restarts and \
                        os.path.exists(base_path + "/best-model.pt"):
                    log.info('resetting to best model')
                    self.model.load_from_file(base_path + "/best-model.pt")

                previous_learning_rate = learning_rate

                # stop training if learning rate becomes too small
                if learning_rate < 0.001:
                    log.info('learning rate too small - quitting training!')
                    break

                if not self.test_mode:
                    random.shuffle(train_data)

                batches = [
                    train_data[x:x + mini_batch_size]
                    for x in range(0, len(train_data), mini_batch_size)
                ]

                self.model.train()

                current_loss: float = 0
                seen_sentences = 0
                modulo = max(1, int(len(batches) / 10))

                for batch_no, batch in enumerate(batches):
                    batch: List[Sentence] = batch

                    optimizer.zero_grad()

                    # Step 4. Compute the loss, gradients, and update the parameters by calling optimizer.step()
                    loss = self.model.neg_log_likelihood(batch)

                    current_loss += loss.item()
                    seen_sentences += len(batch)

                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                   5.0)
                    optimizer.step()

                    if not embeddings_in_memory:
                        self.clear_embeddings_in_batch(batch)

                    if batch_no % modulo == 0:
                        log.info(
                            "epoch {0} - iter {1}/{2} - loss {3:.8f}".format(
                                epoch + 1, batch_no, len(batches),
                                current_loss / seen_sentences))
                        iteration = epoch * len(batches) + batch_no
                        weight_extractor.extract_weights(
                            self.model.state_dict(), iteration)

                current_loss /= len(train_data)

                # switch to eval mode
                self.model.eval()

                # if checkpointing is enable, save model at each epoch
                if checkpoint:
                    self.model.save(base_path + "/checkpoint.pt")

                log.info('-' * 100)

                dev_score = dev_metric = None
                if not train_with_dev:
                    dev_score, dev_metric = self.evaluate(
                        self.corpus.dev,
                        base_path,
                        evaluation_method=evaluation_method,
                        embeddings_in_memory=embeddings_in_memory)

                test_score, test_metric = self.evaluate(
                    self.corpus.test,
                    base_path,
                    evaluation_method=evaluation_method,
                    embeddings_in_memory=embeddings_in_memory)

                # anneal against train loss if training with dev, otherwise anneal against dev score
                scheduler.step(
                    current_loss) if train_with_dev else scheduler.step(
                        dev_score)

                # logging info
                log.info("EPOCH {0}: lr {1:.4f} - bad epochs {2}".format(
                    epoch + 1, learning_rate, bad_epochs))
                if not train_with_dev:
                    log.info(
                        "{0:<4}: f-score {1:.4f} - acc {2:.4f} - tp {3} - fp {4} - fn {5} - tn {6}"
                        .format('DEV', dev_metric.f_score(),
                                dev_metric.accuracy(), dev_metric._tp,
                                dev_metric._fp, dev_metric._fn,
                                dev_metric._tn))
                log.info(
                    "{0:<4}: f-score {1:.4f} - acc {2:.4f} - tp {3} - fp {4} - fn {5} - tn {6}"
                    .format('TEST', test_metric.f_score(),
                            test_metric.accuracy(), test_metric._tp,
                            test_metric._fp, test_metric._fn, test_metric._tn))

                with open(loss_txt, 'a') as f:
                    dev_metric_str = dev_metric.to_tsv(
                    ) if dev_metric is not None else Metric.to_empty_tsv()
                    f.write('{}\t{:%H:%M:%S}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(
                        epoch, datetime.datetime.now(), '_',
                        Metric.to_empty_tsv(), '_', dev_metric_str, '_',
                        test_metric.to_tsv()))

                # if we use dev data, remember best model based on dev evaluation score
                if not train_with_dev and dev_score == scheduler.best:
                    self.model.save(base_path + "/best-model.pt")

            # if we do not use dev data for model selection, save final model
            if save_final_model:
                self.model.save(base_path + "/final-model.pt")

        except KeyboardInterrupt:
            log.info('-' * 100)
            log.info('Exiting from training early.')
            log.info('Saving model ...')
            self.model.save(base_path + "/final-model.pt")
            log.info('Done.')
    def train(self,
              base_path: str,
              learning_rate: float = 0.1,
              mini_batch_size: int = 32,
              max_epochs: int = 50,
              anneal_factor: float = 0.5,
              patience: int = 5,
              train_with_dev: bool = False,
              embeddings_in_memory: bool = False,
              checkpoint: bool = False,
              save_final_model: bool = True,
              anneal_with_restarts: bool = False,
              eval_on_train: bool = True):
        """
        Trains a text classification model using the training data of the corpus.
        :param base_path: the directory to which any results should be written to
        :param learning_rate: the learning rate
        :param mini_batch_size: the mini batch size
        :param max_epochs: the maximum number of epochs to train
        :param anneal_factor: learning rate will be decreased by this factor
        :param patience: number of 'bad' epochs before learning rate gets decreased
        :param train_with_dev: boolean indicating, if the dev data set should be used for training or not
        :param embeddings_in_memory: boolean indicating, if embeddings should be kept in memory or not
        :param checkpoint: boolean indicating, whether the model should be save after every epoch or not
        :param save_final_model: boolean indicating, whether the final model should be saved or not
        :param anneal_with_restarts: boolean indicating, whether the best model should be reloaded once the learning
        rate changed or not
        :param eval_on_train: boolean value indicating, if evaluation metrics should be calculated on training data set
        or not
        """

        loss_txt = init_output_file(base_path, 'loss.tsv')
        with open(loss_txt, 'a') as f:
            f.write(
                'EPOCH\tTIMESTAMP\tTRAIN_LOSS\t{}\tDEV_LOSS\t{}\tTEST_LOSS\t{}\n'
                .format(Metric.tsv_header('TRAIN'), Metric.tsv_header('DEV'),
                        Metric.tsv_header('TEST')))

        weight_extractor = WeightExtractor(base_path)

        optimizer = torch.optim.SGD(self.model.parameters(), lr=learning_rate)

        anneal_mode = 'min' if train_with_dev else 'max'
        scheduler: ReduceLROnPlateau = ReduceLROnPlateau(optimizer,
                                                         factor=anneal_factor,
                                                         patience=patience,
                                                         mode=anneal_mode)

        train_data = self.corpus.train

        # if training also uses dev data, include in training set
        if train_with_dev:
            train_data.extend(self.corpus.dev)

        # At any point you can hit Ctrl + C to break out of training early.
        try:
            previous_learning_rate = learning_rate

            for epoch in range(max_epochs):
                log.info('-' * 100)

                bad_epochs = scheduler.num_bad_epochs
                for group in optimizer.param_groups:
                    learning_rate = group['lr']

                # reload last best model if annealing with restarts is enabled
                if learning_rate != previous_learning_rate and anneal_with_restarts and \
                        os.path.exists(base_path + "/best-model.pt"):
                    log.info('Resetting to best model ...')
                    self.model.load_from_file(base_path + "/best-model.pt")

                previous_learning_rate = learning_rate

                # stop training if learning rate becomes too small
                if learning_rate < 0.001:
                    log.info('Learning rate too small - quitting training!')
                    break

                if not self.test_mode:
                    random.shuffle(train_data)

                self.model.train()

                batches = [
                    self.corpus.train[x:x + mini_batch_size]
                    for x in range(0, len(self.corpus.train), mini_batch_size)
                ]

                current_loss: float = 0
                seen_sentences = 0
                modulo = max(1, int(len(batches) / 10))

                for batch_no, batch in enumerate(batches):
                    scores = self.model.forward(batch)
                    loss = self.model.calculate_loss(scores, batch)

                    optimizer.zero_grad()
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                   5.0)
                    optimizer.step()

                    seen_sentences += len(batch)
                    current_loss += loss.item()

                    clear_embeddings(
                        batch,
                        also_clear_word_embeddings=not embeddings_in_memory)

                    if batch_no % modulo == 0:
                        log.info(
                            "epoch {0} - iter {1}/{2} - loss {3:.8f}".format(
                                epoch + 1, batch_no, len(batches),
                                current_loss / seen_sentences))
                        iteration = epoch * len(batches) + batch_no
                        weight_extractor.extract_weights(
                            self.model.state_dict(), iteration)

                current_loss /= len(train_data)

                self.model.eval()

                # if checkpoint is enable, save model at each epoch
                if checkpoint:
                    self.model.save(base_path + "/checkpoint.pt")

                log.info('-' * 100)
                log.info("EPOCH {0}: lr {1:.4f} - bad epochs {2}".format(
                    epoch + 1, learning_rate, bad_epochs))

                dev_metric = train_metric = None
                dev_loss = '_'
                train_loss = current_loss

                if eval_on_train:
                    train_metric, train_loss = self._calculate_evaluation_results_for(
                        'TRAIN', self.corpus.train, embeddings_in_memory,
                        mini_batch_size)

                if not train_with_dev:
                    dev_metric, dev_loss = self._calculate_evaluation_results_for(
                        'DEV', self.corpus.dev, embeddings_in_memory,
                        mini_batch_size)

                with open(loss_txt, 'a') as f:
                    train_metric_str = train_metric.to_tsv(
                    ) if train_metric is not None else Metric.to_empty_tsv()
                    dev_metric_str = dev_metric.to_tsv(
                    ) if dev_metric is not None else Metric.to_empty_tsv()
                    f.write('{}\t{:%H:%M:%S}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(
                        epoch, datetime.datetime.now(), train_loss,
                        train_metric_str, dev_loss, dev_metric_str, '_',
                        Metric.to_empty_tsv()))

                # anneal against train loss if training with dev, otherwise anneal against dev score
                scheduler.step(
                    current_loss) if train_with_dev else scheduler.step(
                        dev_metric.f_score())

                current_score = dev_metric.f_score(
                ) if not train_with_dev else train_metric.f_score()

                # if we use dev data, remember best model based on dev evaluation score
                if not train_with_dev and current_score == scheduler.best:
                    self.model.save(base_path + "/best-model.pt")

            if save_final_model:
                self.model.save(base_path + "/final-model.pt")

            log.info('-' * 100)
            log.info('Testing using best model ...')

            self.model.eval()

            if os.path.exists(base_path + "/best-model.pt"):
                self.model = TextClassifier.load_from_file(base_path +
                                                           "/best-model.pt")

            test_metric, test_loss = self.evaluate(
                self.corpus.test,
                mini_batch_size=mini_batch_size,
                eval_class_metrics=True,
                embeddings_in_memory=embeddings_in_memory,
                metric_name='TEST')

            test_metric.print()
            self.model.train()

            log.info('-' * 100)

        except KeyboardInterrupt:
            log.info('-' * 100)
            log.info('Exiting from training early.')
            log.info('Saving model ...')
            with open(base_path + "/final-model.pt", 'wb') as model_save_file:
                torch.save(self.model, model_save_file, pickle_protocol=4)
                model_save_file.close()
            log.info('Done.')
Esempio n. 3
0
    def train(self,
              base_path: Union[Path, str],
              evaluation_metric: EvaluationMetric = EvaluationMetric.
              MICRO_F1_SCORE,
              learning_rate: float = 0.1,
              mini_batch_size: int = 32,
              eval_mini_batch_size: int = None,
              max_epochs: int = 100,
              anneal_factor: float = 0.5,
              patience: int = 3,
              anneal_against_train_loss: bool = True,
              train_with_dev: bool = False,
              monitor_train: bool = False,
              embeddings_in_memory: bool = True,
              checkpoint: bool = False,
              save_final_model: bool = True,
              anneal_with_restarts: bool = False,
              test_mode: bool = False,
              param_selection_mode: bool = False,
              **kwargs) -> dict:

        if eval_mini_batch_size is None:
            eval_mini_batch_size = mini_batch_size

        # cast string to Path
        if type(base_path) is str:
            base_path = Path(base_path)

        add_file_handler(log, base_path / 'training.log')

        log_line(log)
        log.info(f'Evaluation method: {evaluation_metric.name}')

        if not param_selection_mode:
            loss_txt = init_output_file(base_path, 'loss.tsv')
            with open(loss_txt, 'a') as f:
                f.write(
                    f'EPOCH\tTIMESTAMP\tBAD_EPOCHS\tLEARNING_RATE\tTRAIN_LOSS\t{Metric.tsv_header("TRAIN")}\tDEV_LOSS\t{Metric.tsv_header("DEV")}'
                    f'\tTEST_LOSS\t{Metric.tsv_header("TEST")}\n')

            weight_extractor = WeightExtractor(base_path)

        optimizer = self.optimizer(self.model.parameters(),
                                   lr=learning_rate,
                                   **kwargs)
        if self.optimizer_state is not None:
            optimizer.load_state_dict(self.optimizer_state)

        # annealing scheduler
        anneal_mode = 'min' if anneal_against_train_loss else 'max'
        if isinstance(optimizer, (AdamW, SGDW)):
            scheduler = ReduceLRWDOnPlateau(optimizer,
                                            factor=anneal_factor,
                                            patience=patience,
                                            mode=anneal_mode,
                                            verbose=True)
        else:
            scheduler = ReduceLROnPlateau(optimizer,
                                          factor=anneal_factor,
                                          patience=patience,
                                          mode=anneal_mode,
                                          verbose=True)
        if self.scheduler_state is not None:
            scheduler.load_state_dict(self.scheduler_state)

        train_data = self.corpus.train

        # if training also uses dev data, include in training set
        if train_with_dev:
            train_data.extend(self.corpus.dev)

        dev_score_history = []
        dev_loss_history = []
        train_loss_history = []

        # At any point you can hit Ctrl + C to break out of training early.
        try:
            previous_learning_rate = learning_rate

            for epoch in range(0 + self.epoch, max_epochs + self.epoch):
                log_line(log)

                try:
                    bad_epochs = scheduler.num_bad_epochs
                except:
                    bad_epochs = 0
                for group in optimizer.param_groups:
                    learning_rate = group['lr']

                # reload last best model if annealing with restarts is enabled
                if learning_rate != previous_learning_rate and anneal_with_restarts and \
                        (base_path / 'best-model.pt').exists():
                    log.info('resetting to best model')
                    self.model.load_from_file(base_path / 'best-model.pt')

                previous_learning_rate = learning_rate

                # stop training if learning rate becomes too small
                if learning_rate < 0.0001:
                    log_line(log)
                    log.info('learning rate too small - quitting training!')
                    log_line(log)
                    break

                if not test_mode:
                    random.shuffle(train_data)

                batches = [
                    train_data[x:x + mini_batch_size]
                    for x in range(0, len(train_data), mini_batch_size)
                ]

                self.model.train()

                train_loss: float = 0
                seen_sentences = 0
                modulo = max(1, int(len(batches) / 10))

                for batch_no, batch in enumerate(batches):
                    loss = self.model.forward_loss(batch)

                    optimizer.zero_grad()
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                   5.0)
                    optimizer.step()

                    seen_sentences += len(batch)
                    train_loss += loss.item()

                    clear_embeddings(
                        batch,
                        also_clear_word_embeddings=not embeddings_in_memory)

                    if batch_no % modulo == 0:
                        log.info(
                            f'epoch {epoch + 1} - iter {batch_no}/{len(batches)} - loss '
                            f'{train_loss / seen_sentences:.8f}')
                        iteration = epoch * len(batches) + batch_no
                        if not param_selection_mode:
                            weight_extractor.extract_weights(
                                self.model.state_dict(), iteration)

                train_loss /= len(train_data)

                self.model.eval()

                log_line(log)
                log.info(
                    f'EPOCH {epoch + 1} done: loss {train_loss:.4f} - lr {learning_rate:.4f} - bad epochs {bad_epochs}'
                )

                dev_metric = None
                dev_loss = '_'

                train_metric = None
                if monitor_train:
                    train_metric, train_loss = self._calculate_evaluation_results_for(
                        'TRAIN', self.corpus.train, evaluation_metric,
                        embeddings_in_memory, eval_mini_batch_size)

                if not train_with_dev:
                    dev_metric, dev_loss = self._calculate_evaluation_results_for(
                        'DEV', self.corpus.dev, evaluation_metric,
                        embeddings_in_memory, eval_mini_batch_size)

                if not param_selection_mode:
                    test_metric, test_loss = self._calculate_evaluation_results_for(
                        'TEST', self.corpus.test, evaluation_metric,
                        embeddings_in_memory, eval_mini_batch_size,
                        base_path / 'test.tsv')

                if not param_selection_mode:
                    with open(loss_txt, 'a') as f:
                        train_metric_str = train_metric.to_tsv(
                        ) if train_metric is not None else Metric.to_empty_tsv(
                        )
                        dev_metric_str = dev_metric.to_tsv(
                        ) if dev_metric is not None else Metric.to_empty_tsv()
                        test_metric_str = test_metric.to_tsv(
                        ) if test_metric is not None else Metric.to_empty_tsv(
                        )
                        f.write(
                            f'{epoch}\t{datetime.datetime.now():%H:%M:%S}\t{bad_epochs}\t{learning_rate:.4f}\t'
                            f'{train_loss}\t{train_metric_str}\t{dev_loss}\t{dev_metric_str}\t_\t{test_metric_str}\n'
                        )

                # calculate scores using dev data if available
                dev_score = 0.
                if not train_with_dev:
                    if evaluation_metric == EvaluationMetric.MACRO_ACCURACY:
                        dev_score = dev_metric.macro_avg_accuracy()
                    elif evaluation_metric == EvaluationMetric.MICRO_ACCURACY:
                        dev_score = dev_metric.micro_avg_accuracy()
                    elif evaluation_metric == EvaluationMetric.MACRO_F1_SCORE:
                        dev_score = dev_metric.macro_avg_f_score()
                    else:
                        dev_score = dev_metric.micro_avg_f_score()

                    # append dev score to score history
                    dev_score_history.append(dev_score)
                    dev_loss_history.append(dev_loss.item())

                # anneal against train loss if training with dev, otherwise anneal against dev score
                current_score = train_loss if anneal_against_train_loss else dev_score

                scheduler.step(current_score)

                train_loss_history.append(train_loss)

                # if checkpoint is enable, save model at each epoch
                if checkpoint and not param_selection_mode:
                    self.model.save_checkpoint(base_path / 'checkpoint.pt',
                                               optimizer.state_dict(),
                                               scheduler.state_dict(),
                                               epoch + 1, train_loss)

                # if we use dev data, remember best model based on dev evaluation score
                if not train_with_dev and not param_selection_mode and current_score == scheduler.best:
                    self.model.save(base_path / 'best-model.pt')

            # if we do not use dev data for model selection, save final model
            if save_final_model and not param_selection_mode:
                self.model.save(base_path / 'final-model.pt')

        except KeyboardInterrupt:
            log_line(log)
            log.info('Exiting from training early.')
            if not param_selection_mode:
                log.info('Saving model ...')
                self.model.save(base_path / 'final-model.pt')
                log.info('Done.')

        # test best model on test data
        final_score = self.final_test(base_path, embeddings_in_memory,
                                      evaluation_metric, eval_mini_batch_size)

        return {
            'test_score': final_score,
            'dev_score_history': dev_score_history,
            'train_loss_history': train_loss_history,
            'dev_loss_history': dev_loss_history
        }
Esempio n. 4
0
from __future__ import absolute_import
Esempio n. 5
0
from pathlib import Path