def __init__(self,
                 expt_dir='experiment',
                 loss=NLLLoss(),
                 batch_size=64,
                 random_seed=None,
                 checkpoint_every=100,
                 print_every=100,
                 optimizer=Optimizer(optim.Adam, max_grad_norm=5)):
        self._trainer = "Simple Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)
        self.loss = loss
        self.evaluator = Evaluator(loss=self.loss, batch_size=batch_size)
        self.optimizer = optimizer
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every

        if not os.path.isabs(expt_dir):
            expt_dir = os.path.join(os.getcwd(), expt_dir)
        self.expt_dir = expt_dir
        if not os.path.exists(self.expt_dir):
            os.makedirs(self.expt_dir)
        self.batch_size = batch_size
        self.input_vocab_file = os.path.join(self.expt_dir, 'input_vocab')
        self.output_vocab_file = os.path.join(self.expt_dir, 'output_vocab')
    def __init__(self,
                 expt_dir='experiment',
                 loss=NLLLoss(),
                 batch_size=64,
                 random_seed=None,
                 checkpoint_every=1000,
                 print_every=100,
                 tensorboard=True,
                 batch_adv_loss=NLLLoss()):
        self._trainer = "Adversarial Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)
        self.loss = loss
        self.evaluator = Evaluator(loss=self.loss, batch_size=batch_size)
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every

        if not os.path.isabs(expt_dir):
            expt_dir = os.path.join(os.getcwd(), expt_dir)
        self.expt_dir = expt_dir
        if not os.path.exists(self.expt_dir):
            os.makedirs(self.expt_dir)
        self.batch_size = batch_size

        self.logger = logging.getLogger(__name__)

        self.writer = SummaryWriter(log_dir=expt_dir) if tensorboard else None

        self.batch_adv_loss = batch_adv_loss
    def __init__(self,
                 expt_dir='experiment',
                 loss=NLLLoss(),
                 batch_size=64,
                 random_seed=None,
                 checkpoint_every=100,
                 print_every=100):
        self._trainer = "Simple Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)
        self.loss = loss
        self.evaluator = Evaluator(loss=self.loss, batch_size=batch_size)
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every

        if not os.path.isabs(expt_dir):
            expt_dir = os.path.join(os.getcwd(), expt_dir)
        self.expt_dir = expt_dir
        if not os.path.exists(self.expt_dir):
            os.makedirs(self.expt_dir)
        self.batch_size = batch_size

        self.logger = logging.getLogger(__name__)
Example #4
0
def _evaluate(checkpoint_path,
              test_paths,
              metric_names=[
                  "word accuracy", "sequence accuracy", "final target accuracy"
              ],
              loss_names=["nll"],
              max_len=50,
              batch_size=32,
              is_predict_eos=True,
              content_method=None):
    """Evaluates the models saved in a checkpoint."""
    results = []

    print("loading checkpoint from {}".format(os.path.join(checkpoint_path)))
    checkpoint = Checkpoint.load(checkpoint_path)
    seq2seq = checkpoint.model

    tabular_data_fields = get_tabular_data_fields(
        content_method=content_method, is_predict_eos=is_predict_eos)

    dic_data_fields = dict(tabular_data_fields)
    src = dic_data_fields["src"]
    tgt = dic_data_fields["tgt"]

    src.vocab = checkpoint.input_vocab
    tgt.vocab = checkpoint.output_vocab
    tgt.eos_id = tgt.vocab.stoi[tgt.SYM_EOS]
    tgt.sos_id = tgt.vocab.stoi[tgt.SYM_SOS]

    for test_path in test_paths:
        test = get_data(test_path, max_len, tabular_data_fields)

        metrics = get_metrics(metric_names, src, tgt, is_predict_eos)
        losses, loss_weights = get_losses(loss_names, tgt, is_predict_eos)

        evaluator = Evaluator(loss=losses,
                              batch_size=batch_size,
                              metrics=metrics)
        data_func = SupervisedTrainer.get_batch_data
        losses, metrics = evaluator.evaluate(model=seq2seq,
                                             data=test,
                                             get_batch_data=data_func)

        total_loss, log_msg, _ = SupervisedTrainer.get_losses(
            losses, metrics, 0)

        dataset = test_path.split('/')[-1].split('.')[0]
        results.append([dataset, total_loss] +
                       [metric.get_val() for metric in metrics])

    results_df = pd.DataFrame(results,
                              columns=["Dataset", "Loss"] +
                              [metric.name for metric in metrics])

    results_df = results_df.melt(id_vars=['Dataset'],
                                 var_name="Metric",
                                 value_name='Value')

    return results_df
Example #5
0
    def test_set_eval_mode(self, mock_eval, mock_call):
        """ Make sure that evaluation is done in evaluation mode. """
        mock_mgr = MagicMock()
        mock_mgr.attach_mock(mock_eval, 'eval')
        mock_mgr.attach_mock(mock_call, 'call')

        evaluator = Evaluator()
        evaluator.evaluate(self.seq2seq, self.dataset)

        expected_calls = [call.eval()] + \
            self.dataset.num_batches(evaluator.batch_size) * [call.call(ANY, ANY, volatile=ANY)]
        self.assertEquals(expected_calls, mock_mgr.mock_calls)
    def test_set_eval_mode(self, mock_eval, mock_call):
        """ Make sure that evaluation.txt is done in evaluation.txt mode. """
        mock_mgr = MagicMock()
        mock_mgr.attach_mock(mock_eval, 'eval')
        mock_mgr.attach_mock(mock_call, 'call')

        evaluator = Evaluator(batch_size=64)
        with patch('seq2seq.evaluator.evaluator.torch.stack', return_value=None), \
                patch('seq2seq.loss.NLLLoss.eval_batch', return_value=None):
            evaluator.evaluate(self.seq2seq, self.dataset)

        num_batches = int(math.ceil(len(self.dataset) / evaluator.batch_size))
        expected_calls = [call.eval()] + num_batches * [call.call(ANY, ANY, ANY)]
        self.assertEquals(expected_calls, mock_mgr.mock_calls)
    def __init__(self,
                 model_dir='experiment',
                 best_model_dir='experiment/best',
                 loss=NLLLoss(),
                 batch_size=64,
                 random_seed=None,
                 checkpoint_every=100,
                 print_every=100,
                 max_epochs=5,
                 max_steps=10000,
                 max_checkpoints_num=5,
                 best_ppl=100000.0,
                 device=None):
        self._trainer = "Simple Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)
        self.loss = loss
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every
        self.max_steps = max_steps
        self.max_epochs = max_epochs
        self.batch_size = batch_size
        self.best_ppl = best_ppl
        self.max_checkpoints_num = max_checkpoints_num
        self.device = device
        self.evaluator = Evaluator(loss=self.loss,
                                   batch_size=batch_size,
                                   device=device)

        if not os.path.isabs(model_dir):
            model_dir = os.path.join(os.getcwd(), model_dir)
        self.model_dir = model_dir
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)

        if not os.path.isabs(best_model_dir):
            best_model_dir = os.path.join(os.getcwd(), best_model_dir)
        self.best_model_dir = best_model_dir
        if not os.path.exists(self.best_model_dir):
            os.makedirs(self.best_model_dir)

        self.model_checkpoints = []
        self.best_model_checkpoints = []

        self.logger = logging.getLogger(__name__)
Example #8
0
    def test_set_eval_mode(self, mock_eval, mock_call):
        """ Make sure that evaluation is done in evaluation mode. """
        mock_mgr = MagicMock()
        mock_mgr.attach_mock(mock_eval, 'eval')
        mock_mgr.attach_mock(mock_call, 'call')

        evaluator = Evaluator(batch_size=64)
Example #9
0
    def __init__(self, experiment_directory='./experiment', loss=None, batch_size=64,
                random_seed=None, checkpoint_every=100, print_every=100):
        if loss is None:
            loss = NLLLoss()
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)

        self.loss = loss
        self.evaluator = Evaluator(loss=self.loss, batch_size=batch_size)
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every
        self.batch_size = batch_size
        self.experiment_directory = experiment_directory

        if not os.path.exists(self.experiment_directory):
            os.makedirs(self.experiment_directory)
Example #10
0
    def __init__(self,
                 expt_dir='experiment',
                 loss=[NLLLoss()],
                 loss_weights=None,
                 metrics=[],
                 batch_size=64,
                 eval_batch_size=128,
                 random_seed=None,
                 checkpoint_every=100,
                 print_every=100,
                 early_stopper=None,
                 anneal_middropout=0,
                 min_middropout=0.01):
        self._trainer = "Simple Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)
        k = NLLLoss()
        self.loss = loss
        self.metrics = metrics
        self.loss_weights = loss_weights or len(loss) * [1.]
        self.evaluator = Evaluator(loss=self.loss,
                                   metrics=self.metrics,
                                   batch_size=eval_batch_size)
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every
        self.anneal_middropout = anneal_middropout
        self.min_middropout = 0 if self.anneal_middropout == 0 else min_middropout

        self.early_stopper = early_stopper
        if early_stopper is not None:
            assert self.early_stopper.mode == "min", "Can currently only be used with the loss, please use mode='min'"

        if not os.path.isabs(expt_dir):
            expt_dir = os.path.join(os.getcwd(), expt_dir)
        self.expt_dir = expt_dir
        if not os.path.exists(self.expt_dir):
            os.makedirs(self.expt_dir)
        self.batch_size = batch_size

        self.logger = logging.getLogger(__name__)
Example #11
0
def load_model_data_evaluator(expt_dir, model_name, data_path, batch_size=128):
    checkpoint_path = os.path.join(expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, model_name)
    checkpoint = Checkpoint.load(checkpoint_path)
    model = checkpoint.model
    input_vocab = checkpoint.input_vocab
    output_vocab = checkpoint.output_vocab

    data_all, data_sml, data_med, data_lrg, fields_inp, src, tgt, src_adv, idx_field = load_data(data_path)

    src.vocab = input_vocab
    tgt.vocab = output_vocab
    src_adv.vocab = input_vocab

    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    loss = Perplexity(weight, pad)
    if torch.cuda.is_available():
        loss.cuda()
    evaluator = Evaluator(loss=loss, batch_size=batch_size)

    return model, data_all, data_sml, data_med, data_lrg, evaluator, fields_inp
    def __init__(self, expt_dir='experiment', loss=NLLLoss(), batch_size=64,
                 random_seed=None,
                 checkpoint_every=100, print_every=100):
        self._trainer = "Simple Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)
        self.loss = loss
        self.evaluator = Evaluator(loss=self.loss, batch_size=batch_size)
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every

        if not os.path.isabs(expt_dir):
            expt_dir = os.path.join(os.getcwd(), expt_dir)
        self.expt_dir = expt_dir
        if not os.path.exists(self.expt_dir):
            os.makedirs(self.expt_dir)
        self.batch_size = batch_size

        self.logger = logging.getLogger(__name__)
class SupervisedTrainer(object):
    """ The SupervisedTrainer class helps in setting up a training framework in a
    supervised setting.

    Args:
        expt_dir (optional, str): experiment Directory to store details of the experiment,
            by default it makes a folder in the current directory to store the details (default: `experiment`).
        loss (seq2seq.loss.loss.Loss, optional): loss for training, (default: seq2seq.loss.NLLLoss)
        batch_size (int, optional): batch size for experiment, (default: 64)
        checkpoint_every (int, optional): number of batches to checkpoint after, (default: 100)
    """
    def __init__(self,
                 expt_dir='experiment',
                 loss=NLLLoss(),
                 batch_size=64,
                 random_seed=None,
                 checkpoint_every=100,
                 print_every=100):
        self._trainer = "Simple Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)
        self.loss = loss
        self.evaluator = Evaluator(loss=self.loss, batch_size=batch_size)
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every
        if not os.path.isabs(expt_dir):
            expt_dir = os.path.join(os.getcwd(), expt_dir)
        self.expt_dir = expt_dir
        if not os.path.exists(self.expt_dir):
            os.makedirs(self.expt_dir)
        self.batch_size = batch_size

        self.logger = logging.getLogger(__name__)

    def _train_batch(self,
                     model,
                     batch,
                     vocab,
                     num_antecedents=400,
                     teacher_forcing_ratio=0):
        loss = self.loss
        # Forward propagation
        result, target, _ = model(batch, vocab, num_antecedents)

        # Get loss
        loss.reset()
        result = [torch.log(sample) for sample in result]
        for i in range(len(result)):
            for j in range(len(result[i])):
                if target[i][j] > 0 and target[i][j] < 400:
                    arg1 = result[i][j].unsqueeze(0)
                    arg2 = torch.tensor([target[i][j]])
                    if torch.cuda.is_available():
                        arg2 = arg2.cuda()
                    loss.acc_loss += loss.criterion(arg1, arg2)
                    loss.norm_term += 1

        # Backward propagation
        model.zero_grad()

        lvalue = loss.get_loss()
        if lvalue >= 0:
            loss.backward()
            self.optimizer.step()
        else:
            raise AssertionError("NAN Triggered!")
        return lvalue

    def _train_epoches(self,
                       data_train,
                       data_valid,
                       model,
                       n_epochs,
                       start_epoch,
                       start_step,
                       save_file=False,
                       dev_data=None,
                       teacher_forcing_ratio=0,
                       log_dir=None,
                       embed_file=None,
                       num_antecedents=400):
        log = self.logger
        # embed = Embed(embed_file)
        embed = []

        print_loss_total = 0  # Reset every print_every
        epoch_loss_total = 0  # Reset every epoch
        plan_loss_total = 0
        construct_loss_total = 0

        device = torch.device('cuda', 0) if torch.cuda.is_available() else None
        batch_generator = data_train.__iter__()

        steps_per_epoch = len(batch_generator)
        print("Steps per epoch: ", steps_per_epoch)
        total_steps = steps_per_epoch * n_epochs
        """
        dev_loss, recall, precision, F = self.evaluator.evaluate(model, data_train,
                                                                 log_dir=log_dir,
                                                                 cur_step=0)
        exit(0)
        """

        step = start_step
        step_elapsed = 0
        for epoch in range(start_epoch, n_epochs + 1):
            log_file = open(log_dir + "/log.txt", "a+")
            print("Epoch: %d, Step: %d" % (epoch, step))

            # consuming seen batches from previous training
            for _ in range((epoch - 1) * steps_per_epoch, step):
                next(batch_generator)

            model.train(True)
            for batch in batch_generator:
                step += 1
                step_elapsed += 1

                all_loss = self._train_batch(model,
                                             batch,
                                             data_train.vocab,
                                             num_antecedents=num_antecedents)
                loss = all_loss
                # FOR NAN DEBUG
                if not loss >= 0:
                    Checkpoint(model=model,
                               optimizer=self.optimizer,
                               epoch=epoch,
                               step=step,
                               input_vocab=data_train.vocab).save(
                                   self.expt_dir)
                    print("Nan Triggered! Model has been saved.")
                    exit(0)

                # Record average loss
                print_loss_total += loss
                epoch_loss_total += loss

                if step % self.print_every == 0 and step_elapsed > self.print_every:
                    print_loss_avg = print_loss_total / self.print_every
                    print_loss_total = 0
                    log_msg = 'Step %d, Progress: %d%%, Train %s: %.2f, ' % (
                        step, step / total_steps * 100, self.loss.name,
                        print_loss_avg)
                    log.info(log_msg)

                if step % 200 == 0:
                    if log_file:
                        log_file.write("Step " + str(step) + '\n')
                    dev_loss, recall, precision, F = self.evaluator.evaluate(
                        model,
                        data_valid,
                        cur_step=step,
                        log_dir=log_dir,
                        log_file=log_file)
                    # self.optimizer.update(dev_loss, epoch)
                    # log_msg = "Step %d, Dev %s: %.4f, Accuracy: %.4f" % (step, self.loss.name, dev_loss, accuracy)
                    log_msg = "Step %d, Dev %s: %.4f, Recall: %.4f, Precision: %.4f, F: %.4f" % (
                        step, self.loss.name, dev_loss, recall, precision, F)
                    log.info(log_msg)
                    model.train(mode=True)

                # Checkpoint
                """
                if step % self.checkpoint_every == 0 or step == total_steps:
                    Checkpoint(model=model,
                               optimizer=self.optimizer,
                               epoch=epoch, step=step,
                               input_vocab=data.fields[seq2seq.src_field_name].vocab,
                               output_vocab=data.fields[seq2seq.tgt_field_name].vocab).save(self.expt_dir)
                """
            if epoch % 1 == 0 and save_file:
                Checkpoint(
                    model=model,
                    optimizer=self.optimizer,
                    epoch=epoch,
                    step=step,
                    input_vocab=data.fields[seq2seq.src_field_name].vocab,
                    output_vocab=data.fields[
                        seq2seq.tgt_field_name].vocab).save(self.expt_dir)

            if step_elapsed == 0: continue

            epoch_loss_avg = epoch_loss_total / min(steps_per_epoch,
                                                    step - start_step)
            plan_loss_avg = plan_loss_total / min(steps_per_epoch,
                                                  step - start_step)
            construct_loss_avg = construct_loss_total / min(
                steps_per_epoch, step - start_step)
            epoch_loss_total = 0
            plan_loss_total = 0
            construct_loss_total = 0
            log_msg = "Finished epoch %d: Train %s: %.4f  Plan Loss: %.4f" % (
                epoch, self.loss.name, epoch_loss_avg, plan_loss_avg)
            if dev_data is not None:
                if log_file:
                    log_file.write("Step " + str(step) + '\n')
                    log_file.write("Train Average Loss: " +
                                   str(epoch_loss_avg) + " Plan Loss: " +
                                   str(plan_loss_avg) + " Construct Loss: " +
                                   str(construct_loss_avg) + '\n')
                dev_loss, accuracy, = self.evaluator.evaluate(
                    model,
                    dev_data,
                    log_dir=log_dir,
                    cur_step=step,
                    log_file=log_file)
                self.optimizer.update(dev_loss, epoch)
                log_msg += ", Dev %s: %.4f, Accuracy: %.4f" % (
                    self.loss.name, dev_loss, accuracy)
                model.train(mode=True)
            else:
                self.optimizer.update(epoch_loss_avg, epoch)

            log.info(log_msg)
            log_file.close()

        Checkpoint(model=model,
                   optimizer=self.optimizer,
                   epoch=n_epochs,
                   step=step,
                   input_vocab=data_train.vocab).save(self.expt_dir)

    def train(self,
              model,
              data_train,
              data_valid,
              num_epochs=5,
              resume=False,
              dev_data=None,
              optimizer=None,
              teacher_forcing_ratio=0,
              src_vocab=None,
              cpt_vocab=None,
              tgt_vocab=None,
              use_concept=False,
              vocabs=None,
              save_file=False,
              log_dir=None,
              embed_file=None,
              num_antecedents=400):
        """ Run training for a given model.

        Args:
            model (seq2seq.models): model to run training on, if `resume=True`, it would be
               overwritten by the model loaded from the latest checkpoint.
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            num_epochs (int, optional): number of epochs to run (default 5)
            resume(bool, optional): resume training with the latest checkpoint, (default False)
            dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
            optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
               (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
            teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
        Returns:
            model (seq2seq.models): trained model.
            :param log_dir:
        """
        # If training is set to resume
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.expt_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A walk around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(
                model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(model.parameters(),
                                                 weight_decay=0),
                                      max_grad_norm=5)
            self.optimizer = optimizer

        self.logger.info("Optimizer: %s, Scheduler: %s" %
                         (self.optimizer.optimizer, self.optimizer.scheduler))

        self._train_epoches(data_train,
                            data_valid,
                            model,
                            num_epochs,
                            start_epoch,
                            step,
                            dev_data=dev_data,
                            teacher_forcing_ratio=teacher_forcing_ratio,
                            log_dir=log_dir,
                            embed_file=embed_file,
                            save_file=save_file,
                            num_antecedents=num_antecedents)
        return model
Example #14
0
class SupervisedTrainer(object):
    """ The SupervisedTrainer class helps in setting up a training framework in a
    supervised setting.

    Args:
        expt_dir (optional, str): experiment Directory to store details of the experiment,
            by default it makes a folder in the current directory to store the details (default: `experiment`).
        loss (list, optional): list of seq2seq.loss.Loss objects for training (default: [seq2seq.loss.NLLLoss])
        metrics (list, optional): list of seq2seq.metric.metric objects to be computed during evaluation
        batch_size (int, optional): batch size for experiment, (default: 64)
        checkpoint_every (int, optional): number of epochs to checkpoint after, (default: 100)
        print_every (int, optional): number of iterations to print after, (default: 100)
    """
    def __init__(self,
                 expt_dir='experiment',
                 loss=[NLLLoss()],
                 loss_weights=None,
                 metrics=[],
                 batch_size=64,
                 eval_batch_size=128,
                 random_seed=None,
                 checkpoint_every=100,
                 print_every=100):
        self._trainer = "Simple Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)
        k = NLLLoss()
        self.loss = loss
        self.metrics = metrics
        self.loss_weights = loss_weights or len(loss) * [1.]
        self.evaluator = Evaluator(loss=self.loss,
                                   metrics=self.metrics,
                                   batch_size=eval_batch_size)
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every

        if not os.path.isabs(expt_dir):
            expt_dir = os.path.join(os.getcwd(), expt_dir)
        self.expt_dir = expt_dir
        if not os.path.exists(self.expt_dir):
            os.makedirs(self.expt_dir)
        self.batch_size = batch_size

        self.logger = logging.getLogger(__name__)

    def _train_batch(self, input_variable, input_lengths, target_variable,
                     model, teacher_forcing_ratio):
        loss = self.loss

        # Forward propagation
        decoder_outputs, decoder_hidden, other = model(
            input_variable,
            input_lengths,
            target_variable['decoder_output'],
            teacher_forcing_ratio=teacher_forcing_ratio)

        losses = self.evaluator.compute_batch_loss(decoder_outputs,
                                                   decoder_hidden, other,
                                                   target_variable)

        # Backward propagation
        for i, loss in enumerate(losses[:-1], 0):
            loss.scale_loss(self.loss_weights[i])
            loss.backward(retain_graph=True)

        losses[-1].scale_loss(self.loss_weights[-1])
        losses[-1].backward()

        self.optimizer.step()
        model.zero_grad()

        return losses

    def _train_epoches(self,
                       data,
                       model,
                       n_epochs,
                       start_epoch,
                       start_step,
                       dev_data=None,
                       monitor_data=[],
                       teacher_forcing_ratio=0,
                       top_k=5):
        log = self.logger

        print_loss_total = defaultdict(float)  # Reset every print_every
        epoch_loss_total = defaultdict(float)  # Reset every epoch
        epoch_loss_avg = defaultdict(float)
        print_loss_avg = defaultdict(float)

        device = None if torch.cuda.is_available() else -1
        batch_iterator = torchtext.data.BucketIterator(
            dataset=data,
            batch_size=self.batch_size,
            sort=False,
            sort_within_batch=True,
            sort_key=lambda x: len(x.src),
            device=device,
            repeat=False)

        steps_per_epoch = len(batch_iterator)
        total_steps = steps_per_epoch * n_epochs

        step = start_step
        step_elapsed = 0

        # store initial model to be sure at least one model is stored
        val_data = dev_data or data
        losses, metrics = self.evaluator.evaluate(model, val_data,
                                                  self.get_batch_data)

        total_loss, log_msg, model_name = self.get_losses(
            losses, metrics, step)

        loss_best = top_k * [total_loss]
        best_checkpoints = top_k * [None]
        best_checkpoints[0] = model_name

        Checkpoint(
            model=model,
            optimizer=self.optimizer,
            epoch=start_epoch,
            step=start_step,
            input_vocab=data.fields[seq2seq.src_field_name].vocab,
            output_vocab=data.fields[seq2seq.tgt_field_name].vocab).save(
                self.expt_dir, name=model_name)

        for epoch in range(start_epoch, n_epochs + 1):
            log.debug("Epoch: %d, Step: %d" % (epoch, step))

            batch_generator = batch_iterator.__iter__()

            # consuming seen batches from previous training
            for _ in range((epoch - 1) * steps_per_epoch, step):
                next(batch_generator)

            model.train(True)
            for batch in batch_generator:
                step += 1
                step_elapsed += 1

                input_variables, input_lengths, target_variables = self.get_batch_data(
                    batch)

                # compute batch loss
                losses = self._train_batch(input_variables,
                                           input_lengths.tolist(),
                                           target_variables, model,
                                           teacher_forcing_ratio)

                # Record average loss
                for loss in losses:
                    name = loss.log_name
                    print_loss_total[name] += loss.get_loss()
                    epoch_loss_total[name] += loss.get_loss()

                # print log info according to print_every parm
                if step % self.print_every == 0 and step_elapsed > self.print_every:
                    for loss in losses:
                        name = loss.log_name
                        print_loss_avg[
                            name] = print_loss_total[name] / self.print_every
                        print_loss_total[name] = 0

                    train_log_msg = ' '.join([
                        '%s: %.4f' % (loss.log_name, loss.get_loss())
                        for loss in losses
                    ])

                    m_logs = {}
                    # compute vals for all monitored sets
                    for m_data in monitor_data:
                        losses, metrics = self.evaluator.evaluate(
                            model, monitor_data[m_data], self.get_batch_data)
                        total_loss, log_msg, model_name = self.get_losses(
                            losses, metrics, step)
                        m_logs[m_data] = ' '.join([
                            '%s: %.4f' % (loss.log_name, loss.get_loss())
                            for loss in losses
                        ])

                    all_losses = ' '.join(
                        ['%s %s' % (name, m_logs[name]) for name in m_logs])

                    log_msg = 'Progress %d%%, Train %s, %s' % (
                        step / total_steps * 100, train_log_msg, all_losses)

                    log.info(log_msg)

                # check if new model should be saved
                if step % self.checkpoint_every == 0 or step == total_steps:
                    # compute dev loss
                    losses, metrics = self.evaluator.evaluate(
                        model, val_data, self.get_batch_data)
                    total_loss, log_msg, model_name = self.get_losses(
                        losses, metrics, step)

                    max_eval_loss = max(loss_best)
                    if total_loss < max_eval_loss:
                        index_max = loss_best.index(max_eval_loss)
                        # rm prev model
                        if best_checkpoints[index_max] is not None:
                            shutil.rmtree(
                                os.path.join(self.expt_dir,
                                             best_checkpoints[index_max]))
                        best_checkpoints[index_max] = model_name
                        loss_best[index_max] = total_loss

                        # save model
                        Checkpoint(model=model,
                                   optimizer=self.optimizer,
                                   epoch=epoch,
                                   step=step,
                                   input_vocab=data.fields[
                                       seq2seq.src_field_name].vocab,
                                   output_vocab=data.fields[
                                       seq2seq.tgt_field_name].vocab).save(
                                           self.expt_dir, name=model_name)

            if step_elapsed == 0: continue

            for loss in losses:
                epoch_loss_avg[
                    loss.log_name] = epoch_loss_total[loss.log_name] / min(
                        steps_per_epoch, step - start_step)
                epoch_loss_total[loss.log_name] = 0

            loss_msg = ' '.join([
                '%s: %.4f' % (loss.log_name, loss.get_loss())
                for loss in losses
            ])
            log_msg = "Finished epoch %d: Train %s" % (epoch, loss_msg)

            if dev_data is not None:
                losses, metrics = self.evaluator.evaluate(
                    model, dev_data, self.get_batch_data)
                loss_total, log_, model_name = self.get_losses(
                    losses, metrics, step)

                self.optimizer.update(loss_total, epoch)
                log_msg += ", Dev " + log_
                model.train(mode=True)
            else:
                self.optimizer.update(sum(epoch_loss_avg.values()),
                                      epoch)  # TODO check if this makes sense!

            log.info(log_msg)

    def train(self,
              model,
              data,
              num_epochs=5,
              resume=False,
              dev_data=None,
              monitor_data={},
              optimizer=None,
              teacher_forcing_ratio=0,
              learning_rate=0.001,
              checkpoint_path=None,
              top_k=5):
        """ Run training for a given model.

        Args:
            model (seq2seq.models): model to run training on, if `resume=True`, it would be
               overwritten by the model loaded from the latest checkpoint.
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            num_epochs (int, optional): number of epochs to run (default 5)
            resume(bool, optional): resume training with the latest checkpoint, (default False)
            dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
            optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
               (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
            teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
            learing_rate (float, optional): learning rate used by the optimizer (default 0.001)
            checkpoint_path (str, optional): path to load checkpoint from in case training should be resumed
            top_k (int): how many models should be stored during training
        Returns:
            model (seq2seq.models): trained model.
        """
        # If training is set to resume
        if resume:
            resume_checkpoint = Checkpoint.load(checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A walk around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(
                model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0

            def get_optim(optim_name):
                optims = {
                    'adam': optim.Adam,
                    'adagrad': optim.Adagrad,
                    'adadelta': optim.Adadelta,
                    'adamax': optim.Adamax,
                    'rmsprop': optim.RMSprop,
                    'sgd': optim.SGD,
                    None: optim.Adam
                }
                return optims[optim_name]

            self.optimizer = Optimizer(get_optim(optimizer)(model.parameters(),
                                                            lr=learning_rate),
                                       max_grad_norm=5)

        self.logger.info("Optimizer: %s, Scheduler: %s" %
                         (self.optimizer.optimizer, self.optimizer.scheduler))

        self._train_epoches(data,
                            model,
                            num_epochs,
                            start_epoch,
                            step,
                            dev_data=dev_data,
                            monitor_data=monitor_data,
                            teacher_forcing_ratio=teacher_forcing_ratio,
                            top_k=top_k)
        return model

    @staticmethod
    def get_batch_data(batch):
        input_variables, input_lengths = getattr(batch, seq2seq.src_field_name)
        target_variables = {
            'decoder_output': getattr(batch, seq2seq.tgt_field_name)
        }
        return input_variables, input_lengths, target_variables

    @staticmethod
    def get_losses(losses, metrics, step):
        total_loss = 0
        model_name = ''
        log_msg = ''

        for metric in metrics:
            val = metric.get_val()
            log_msg += '%s %.4f ' % (metric.name, val)
            model_name += '%s_%.2f_' % (metric.log_name, val)

        for loss in losses:
            val = loss.get_loss()
            log_msg += '%s %.4f ' % (loss.name, val)
            model_name += '%s_%.2f_' % (loss.log_name, val)
            total_loss += val

        model_name += 's%d' % step

        return total_loss, log_msg, model_name
Example #15
0
tgt = TargetField()
src.vocab = input_vocab
tgt.vocab = output_vocab
max_len = opt.max_len

def len_filter(example):
    return len(example.src) <= max_len and len(example.tgt) <= max_len

# generate test set
test = torchtext.data.TabularDataset(
    path=opt.test_data, format='tsv',
    fields=[('src', src), ('tgt', tgt)],
    filter_pred=len_filter
)

# Prepare loss
weight = torch.ones(len(output_vocab))
pad = output_vocab.stoi[tgt.pad_token]
loss = Perplexity(weight, pad)
if torch.cuda.is_available():
    loss.cuda()

#################################################################################
# Evaluate model on test set

evaluator = Evaluator(loss=loss, batch_size=opt.batch_size)
loss, accuracy = evaluator.evaluate(seq2seq, test)

print("Loss: %f, accuracy: %f" % (loss, accuracy))

Example #16
0
class SupervisedTrainer(object):
    """ The SupervisedTrainer class helps in setting up a training framework in a
    supervised setting.

    Args:
        expt_dir (optional, str): experiment Directory to store details of the experiment,
            by default it makes a folder in the current directory to store the details (default: `experiment`).
        loss (seq2seq.loss.loss.Loss, optional): loss for training, (default: seq2seq.loss.NLLLoss)
        batch_size (int, optional): batch size for experiment, (default: 64)
        checkpoint_every (int, optional): number of batches to checkpoint after, (default: 100)
    """
    def __init__(self,
                 expt_dir='experiment',
                 loss=NLLLoss(),
                 batch_size=64,
                 random_seed=None,
                 checkpoint_every=100,
                 print_every=100,
                 input_vocab=None,
                 output_vocab=None):
        self._trainer = "Simple Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)

        self.loss = loss
        self.evaluator = Evaluator(loss=self.loss,
                                   batch_size=batch_size,
                                   input_vocab=input_vocab)
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every

        self.input_vocab = input_vocab
        self.output_vocab = output_vocab

        if not os.path.isabs(expt_dir):
            expt_dir = os.path.join(os.getcwd(), expt_dir)
        self.expt_dir = expt_dir
        if not os.path.exists(self.expt_dir):
            os.makedirs(self.expt_dir)
        self.batch_size = batch_size

        self.logger = logging.getLogger(__name__)

    def _train_batch(self, input_variable, input_lengths, target_variable,
                     model, teacher_forcing_ratio):
        loss = self.loss
        # Forward propagation
        decoder_outputs, decoder_hidden, other = model(
            input_variable,
            input_lengths,
            target_variable,
            teacher_forcing_ratio=teacher_forcing_ratio)
        # Get loss
        loss.reset()
        for step, step_output in enumerate(decoder_outputs):
            batch_size = target_variable.size(0)
            loss.eval_batch(step_output.contiguous().view(batch_size, -1),
                            target_variable[:, step + 1])
        # Backward propagation
        model.zero_grad()
        loss.backward()
        self.optimizer.step()

        return loss.get_loss()

    def _train_epoches(self,
                       data,
                       model,
                       n_epochs,
                       start_epoch,
                       start_step,
                       dev_data=None,
                       teacher_forcing_ratio=0):
        log = self.logger

        print_loss_total = 0  # Reset every print_every
        epoch_loss_total = 0  # Reset every epoch

        device = torch.device('cuda:0') if torch.cuda.is_available() else -1

        batch_iterator = torchtext.data.BucketIterator(
            dataset=data,
            batch_size=self.batch_size,
            sort=False,
            sort_within_batch=False,
            device=device,
            repeat=False,
            shuffle=True)

        steps_per_epoch = len(batch_iterator)
        total_steps = steps_per_epoch * n_epochs

        step = start_step
        step_elapsed = 0
        best_acc = 0

        # to track the training loss as the model trains
        train_losses = []
        # to track the average training loss per epoch as the model trains
        avg_train_losses = []
        # to track the average validtation loss per epoch as the model trains
        avg_valid_losses = []
        early_stopping = EarlyStopping(patience=7, verbose=True)

        for epoch in range(start_epoch, n_epochs + 1):
            log.debug("Epoch: %d, Step: %d" % (epoch, step))

            batch_generator = batch_iterator.__iter__()
            # consuming seen batches from previous training
            for _ in range((epoch - 1) * steps_per_epoch, step):
                next(batch_generator)

            model.train(True)
            for batch in batch_generator:
                step += 1
                step_elapsed += 1
                target_variables = getattr(batch, 'tgt')

                pos_input_variables = [[] for i in range(batch.batch_size)]
                pos_input_lengths = [[] for i in range(batch.batch_size)]

                neg_input_variables = [[] for i in range(batch.batch_size)]
                neg_input_lengths = [[] for i in range(batch.batch_size)]

                set_size = len(batch.fields) - 1
                max_len_within_batch = -1

                for idx in range(batch.batch_size):
                    for src_idx in range(1, int(set_size / 2) + 1):
                        src, src_len = getattr(batch, 'pos{}'.format(src_idx))
                        pos_input_variables[idx].append(src[idx])
                        pos_input_lengths[idx].append(src_len[idx])

                    for src_idx in range(1, int(set_size / 2) + 1):
                        src, src_len = getattr(batch, 'neg{}'.format(src_idx))
                        neg_input_variables[idx].append(src[idx])
                        neg_input_lengths[idx].append(src_len[idx])

                    pos_input_lengths[idx] = torch.stack(
                        pos_input_lengths[idx], dim=0)
                    neg_input_lengths[idx] = torch.stack(
                        neg_input_lengths[idx], dim=0)

                    if max_len_within_batch < torch.max(
                            pos_input_lengths[idx].view(-1)).item():
                        max_len_within_batch = torch.max(
                            pos_input_lengths[idx].view(-1)).item()

                    if max_len_within_batch < torch.max(
                            neg_input_lengths[idx].view(-1)).item():
                        max_len_within_batch = torch.max(
                            neg_input_lengths[idx].view(-1)).item()

                for batch_idx in range(len(pos_input_variables)):
                    for set_idx in range(int(set_size / 2)):
                        pos_input_variables[batch_idx][set_idx] = pad_tensor(
                            pos_input_variables[batch_idx][set_idx],
                            max_len_within_batch, self.input_vocab)

                        neg_input_variables[batch_idx][set_idx] = pad_tensor(
                            neg_input_variables[batch_idx][set_idx],
                            max_len_within_batch, self.input_vocab)

                    pos_input_variables[batch_idx] = torch.stack(
                        pos_input_variables[batch_idx], dim=0)
                    neg_input_variables[batch_idx] = torch.stack(
                        neg_input_variables[batch_idx], dim=0)

                pos_input_variables = torch.stack(pos_input_variables, dim=0)
                pos_input_lengths = torch.stack(pos_input_lengths, dim=0)

                neg_input_variables = torch.stack(neg_input_variables, dim=0)
                neg_input_lengths = torch.stack(neg_input_lengths, dim=0)

                input_variables = (pos_input_variables, neg_input_variables)
                input_lengths = (pos_input_lengths, neg_input_lengths)
                loss = self._train_batch(input_variables, input_lengths,
                                         target_variables, model,
                                         teacher_forcing_ratio)

                train_losses.append(loss)

                # Record average loss
                print_loss_total += loss
                epoch_loss_total += loss

                if step % self.print_every == 0 and step_elapsed > self.print_every:
                    print_loss_avg = print_loss_total / self.print_every
                    print_loss_total = 0
                    log_msg = 'Progress: %d%%, Train %s: %.4f' % (
                        step / total_steps * 100, self.loss.name,
                        print_loss_avg)
                    log.info(log_msg)

            train_loss = np.average(train_losses)
            avg_train_losses.append(train_loss)

            # clear lists to track next epoch
            train_losses = []
            if step_elapsed == 0: continue

            epoch_loss_avg = epoch_loss_total / min(steps_per_epoch,
                                                    step - start_step)
            epoch_loss_total = 0
            log_msg = "Finished epoch %d: Train %s: %.4f,  %.4f" % (
                epoch, self.loss.name, epoch_loss_avg, train_loss)

            if dev_data is not None:
                dev_loss, accuracy = self.evaluator.evaluate(model, dev_data)
                avg_valid_losses.append(dev_loss)
                log_msg += ", Dev %s: %.4f, Accuracy: %.4f" % (
                    self.loss.name, dev_loss, accuracy)
                early_stopping(dev_loss, model, self.optimizer, epoch, step,
                               self.input_vocab, self.output_vocab,
                               self.expt_dir)
                self.optimizer.update(dev_loss, epoch)
                if accuracy > best_acc:
                    log.info(
                        'accuracy increased >> best_accuracy{}, current_accuracy{}'
                        .format(accuracy, best_acc))
                    best_acc = accuracy
                model.train(mode=True)
            else:
                self.optimizer.update(epoch_loss_avg, epoch)

            if early_stopping.early_stop:
                print("Early Stopping")
                break
            log.info(log_msg)
        return avg_train_losses, avg_valid_losses

    def train(self,
              model,
              data,
              num_epochs=5,
              resume=False,
              dev_data=None,
              optimizer=None,
              teacher_forcing_ratio=0):
        """ Run training for a given model.

        Args:
            model (seq2seq.models): model to run training on, if `resume=True`, it would be
               overwritten by the model loaded from the latest checkpoint.
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            num_epochs (int, optional): number of epochs to run (default 5)
            resume(bool, optional): resume training with the latest checkpoint, (default False)
            dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
            optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
               (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
            teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
        Returns:
            model (seq2seq.models): trained model.
        """
        # If training is set to resume
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.expt_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A walk around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(
                model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(model.parameters()),
                                      max_grad_norm=5)
            self.optimizer = optimizer

        self.logger.info("Optimizer: %s, Scheduler: %s" %
                         (self.optimizer.optimizer, self.optimizer.scheduler))

        train_loss, valid_loss = self._train_epoches(
            data,
            model,
            num_epochs,
            start_epoch,
            step,
            dev_data=dev_data,
            teacher_forcing_ratio=teacher_forcing_ratio)
        visualize_loss(train_loss, valid_loss, self.expt_dir)
        return model
class SupervisedTrainer(object):
    """ The SupervisedTrainer class helps in setting up a training framework in a
    supervised setting.

    Args:
        expt_dir (optional, str): experiment Directory to store details of the experiment,
            by default it makes a folder in the current directory to store the details (default: `experiment`).
        loss (seq2seq.loss.loss.Loss, optional): loss for training, (default: seq2seq.loss.NLLLoss)
        batch_size (int, optional): batch size for experiment, (default: 64)
        checkpoint_every (int, optional): number of epochs to checkpoint after, (default: 100)
    """
    def __init__(self, expt_dir='experiment', loss=NLLLoss(), batch_size=64,
                 random_seed=None,
                 checkpoint_every=100, print_every=100):
        self._trainer = "Simple Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)
        self.loss = loss
        self.evaluator = Evaluator(loss=self.loss, batch_size=batch_size)
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every

        if not os.path.isabs(expt_dir):
            expt_dir = os.path.join(os.getcwd(), expt_dir)
        self.expt_dir = expt_dir
        if not os.path.exists(self.expt_dir):
            os.makedirs(self.expt_dir)
        self.batch_size = batch_size

        self.logger = logging.getLogger(__name__)

    def _train_batch(self, input_variable, input_lengths, target_variable, model, teacher_forcing_ratio):
        loss = self.loss
        # Forward propagation
        decoder_outputs, decoder_hidden, other = model(input_variable, input_lengths, target_variable,
                                                       teacher_forcing_ratio=teacher_forcing_ratio)
        # Get loss
        loss.reset()
        for step, step_output in enumerate(decoder_outputs):
            batch_size = target_variable.size(0)
            loss.eval_batch(step_output.contiguous().view(batch_size, -1), target_variable[:, step + 1])
        # Backward propagation
        model.zero_grad()
        loss.backward()
        self.optimizer.step()

        return loss.get_loss()

    def _train_epoches(self, data, model, n_epochs, start_epoch, start_step,
                       dev_data=None, teacher_forcing_ratio=0):
        log = self.logger

        print_loss_total = 0  # Reset every print_every
        epoch_loss_total = 0  # Reset every epoch

        device = None if torch.cuda.is_available() else -1
        batch_iterator = torchtext.data.BucketIterator(
            dataset=data, batch_size=self.batch_size,
            sort=True, sort_key=lambda x: len(x.src),
            device=device, repeat=False)

        steps_per_epoch = len(batch_iterator)
        total_steps = steps_per_epoch * n_epochs

        step = start_step
        step_elapsed = 0
        for epoch in range(start_epoch, n_epochs + 1):
            log.debug("Epoch: %d, Step: %d" % (epoch, step))

            batch_generator = batch_iterator.__iter__()
            # consuming seen batches from previous training
            for _ in range((epoch - 1) * steps_per_epoch, step):
                next(batch_generator)

            model.train(True)
            for batch in batch_generator:
                step += 1
                step_elapsed += 1

                input_variables, input_lengths = getattr(batch, seq2seq.src_field_name)
                target_variables = getattr(batch, seq2seq.tgt_field_name)

                loss = self._train_batch(input_variables, input_lengths.tolist(), target_variables, model, teacher_forcing_ratio)

                # Record average loss
                print_loss_total += loss
                epoch_loss_total += loss

                if step % self.print_every == 0 and step_elapsed > self.print_every:
                    print_loss_avg = print_loss_total / self.print_every
                    print_loss_total = 0
                    log_msg = 'Progress: %d%%, Train %s: %.4f' % (
                        step / total_steps * 100,
                        self.loss.name,
                        print_loss_avg)
                    log.info(log_msg)

                # Checkpoint
                if step % self.checkpoint_every == 0 or step == total_steps:
                    Checkpoint(model=model,
                               optimizer=self.optimizer,
                               epoch=epoch, step=step,
                               input_vocab=data.fields[seq2seq.src_field_name].vocab,
                               output_vocab=data.fields[seq2seq.tgt_field_name].vocab).save(self.expt_dir)

            if step_elapsed == 0: continue

            epoch_loss_avg = epoch_loss_total / min(steps_per_epoch, step - start_step)
            epoch_loss_total = 0
            log_msg = "Finished epoch %d: Train %s: %.4f" % (epoch, self.loss.name, epoch_loss_avg)
            if dev_data is not None:
                dev_loss, accuracy = self.evaluator.evaluate(model, dev_data)
                self.optimizer.update(dev_loss, epoch)
                log_msg += ", Dev %s: %.4f, Accuracy: %.4f" % (self.loss.name, dev_loss, accuracy)
                model.train(mode=True)
            else:
                self.optimizer.update(epoch_loss_avg, epoch)

            log.info(log_msg)

    def train(self, model, data, num_epochs=5,
              resume=False, dev_data=None,
              optimizer=None, teacher_forcing_ratio=0):
        """ Run training for a given model.

        Args:
            model (seq2seq.models): model to run training on, if `resume=True`, it would be
               overwritten by the model loaded from the latest checkpoint.
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            num_epochs (int, optional): number of epochs to run (default 5)
            resume(bool, optional): resume training with the latest checkpoint, (default False)
            dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
            optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
               (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
            teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
        Returns:
            model (seq2seq.models): trained model.
        """
        # If training is set to resume
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(self.expt_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A walk around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            self.optimizer.optimizer = resume_optim.__class__(model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(model.parameters()), max_grad_norm=5)
            self.optimizer = optimizer

        self.logger.info("Optimizer: %s, Scheduler: %s" % (self.optimizer.optimizer, self.optimizer.scheduler))

        self._train_epoches(data, model, num_epochs,
                            start_epoch, step, dev_data=dev_data,
                            teacher_forcing_ratio=teacher_forcing_ratio)
        return model
Example #18
0
class SupervisedTrainer(object):
    """ The SupervisedTrainer class helps in setting up a training framework in a
    supervised setting.

    Args:
        expt_dir (optional, str): experiment Directory to store details of the experiment,
            by default it makes a folder in the current directory to store the details (default: `experiment`).
        loss (list, optional): list of seq2seq.loss.Loss objects for training (default: [seq2seq.loss.NLLLoss])
        metrics (list, optional): list of seq2seq.metric.metric objects to be computed during evaluation
        batch_size (int, optional): batch size for experiment, (default: 64)
        checkpoint_every (int, optional): number of epochs to checkpoint after, (default: 100)
        print_every (int, optional): number of iterations to print after, (default: 100)
    """
    def __init__(self, expt_dir='experiment', loss=[NLLLoss()], loss_weights=None, metrics=[], batch_size=64, eval_batch_size=128,
                 random_seed=None,
                 checkpoint_every=100, print_every=100):
        self._trainer = "Simple Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)
        k = NLLLoss()
        self.loss = loss
        self.metrics = metrics
        self.loss_weights = loss_weights or len(loss)*[1.]
        self.evaluator = Evaluator(loss=self.loss, metrics=self.metrics, batch_size=eval_batch_size)
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every

        if not os.path.isabs(expt_dir):
            expt_dir = os.path.join(os.getcwd(), expt_dir)
        self.expt_dir = expt_dir
        if not os.path.exists(self.expt_dir):
            os.makedirs(self.expt_dir)
        self.batch_size = batch_size

        self.logger = logging.getLogger(__name__)

    def _train_batch(self, input_variable, input_lengths, target_variable, model, teacher_forcing_ratio):
        loss = self.loss

        # Forward propagation
        decoder_outputs, decoder_hidden, other = model(input_variable, input_lengths, target_variable, teacher_forcing_ratio=teacher_forcing_ratio)

        losses = self.evaluator.compute_batch_loss(decoder_outputs, decoder_hidden, other, target_variable)
        
        # Backward propagation
        for i, loss in enumerate(losses, 0):
            loss.scale_loss(self.loss_weights[i])
            loss.backward(retain_graph=True)
        self.optimizer.step()
        model.zero_grad()

        return losses

    def _train_epoches(self, data, model, n_epochs, start_epoch, start_step,
                       dev_data=None, callbacks=[], monitor_data=[], 
                       teacher_forcing_ratio=0,
                       top_k=5):

        self.set_callbacks(callbacks, top_k=top_k, data=data, dev_data=dev_data)

        iterator_device = torch.cuda.current_device() if torch.cuda.is_available() else -1
        batch_iterator = torchtext.data.BucketIterator(
            dataset=data, batch_size=self.batch_size,
            sort=False, sort_within_batch=True,
            sort_key=lambda x: len(x.src),
            device=iterator_device, repeat=False)

        val_data = dev_data or data

        steps_per_epoch = len(batch_iterator)
        total_steps = steps_per_epoch * n_epochs

        info = dict(zip(['steps_per_epoch', 'total_steps'],\
                [steps_per_epoch, total_steps]))

        info['step'] = start_step
        info['start_epoch'] = start_epoch
        info['epoch'] = start_epoch
        info['start_step'] = start_step
        info['step_elapsed'] = 0

        info['model'] = model       # TODO I find this also a bit hacky

        # store initial model to be sure at least one model is stored
        val_data = dev_data or data

        losses, metrics = self.evaluator.evaluate(model, val_data, self.get_batch_data)

        info['losses'] = losses
        info['metrics'] = metrics

        self.callbacks.on_train_begin(info)

        # TODO this should also be in a callback
        logs = Log()

        for epoch in range(start_epoch, n_epochs + 1):

            self.callbacks.on_epoch_begin(epoch, info)

            batch_generator = batch_iterator.__iter__()

            # consuming seen batches from previous training
            for _ in range((epoch - 1) * steps_per_epoch, info['step']):
                next(batch_generator)

            model.train(True)
            for batch in batch_generator:

                self.callbacks.on_batch_begin(info, batch)
                info['step'] += 1
                info['step_elapsed'] += 1

                input_variables, input_lengths, target_variables = self.get_batch_data(batch)

                losses = self._train_batch(input_variables, input_lengths.tolist(), target_variables, model, teacher_forcing_ratio)

                info['losses'] = losses

                # print log info according to print_every parm
                if info['step'] % self.print_every == 0 and info['step_elapsed'] >= self.print_every:

                    m_logs = {}
                    train_losses, train_metrics = self.evaluator.evaluate(model, data, self.get_batch_data)
                    train_loss, train_log_msg, model_name = self.get_losses(train_losses, train_metrics, info['step'])
                    logs.write_to_log('Train', train_losses, train_metrics, info['step'])
                    logs.update_step(info['step'])

                    m_logs['Train'] = train_log_msg

                    # compute vals for all monitored sets
                    for m_data in monitor_data:
                        m_losses, m_metrics = self.evaluator.evaluate(model, monitor_data[m_data], self.get_batch_data)
                        total_loss, log_msg, model_name = self.get_losses(m_losses, m_metrics, info['step'])
                        m_logs[m_data] = log_msg
                        logs.write_to_log(m_data, m_losses, m_metrics, info['step'])

                    all_losses = ' '.join(['%s:\t %s\n' % (os.path.basename(name), m_logs[name]) for name in m_logs])

                    log_msg = 'Progress %d%%, %s' % (
                            info['step'] / total_steps * 100,
                            all_losses)

                    info['log_msg'] = log_msg

                # check if new model should be saved
                if info['step'] % self.checkpoint_every == 0 or info['step'] == total_steps:
                    # compute dev loss
                    losses, metrics = self.evaluator.evaluate(model, val_data, self.get_batch_data)

                    info['val_losses'] = losses

                self.callbacks.on_batch_end(batch, info)

            if info['step_elapsed'] == 0: continue

            log_msg = ''

            if dev_data is not None:
                losses, metrics = self.evaluator.evaluate(model, dev_data, self.get_batch_data)
                loss_total, log_, model_name = self.get_losses(losses, metrics, info['step'])

                self.optimizer.update(loss_total, epoch)    # TODO check if this makes sense!
                log_msg += ", Dev set: " + log_
                model.train(mode=True)
            else:
                # TODO THIS IS SUPER HACKY, UPDATE IT!!!
                self.optimizer.update(self.callbacks.callbacks[0].epoch_loss_avg, epoch)

            self.callbacks.on_epoch_end(epoch, info)
            info['epoch'] += 1

        self.callbacks.on_train_end(info)

        return logs

    def train(self, model, data, num_epochs=5,
              resume=False, dev_data=None, 
              monitor_data={}, optimizer=None,
              teacher_forcing_ratio=0,
              learning_rate=0.001, checkpoint_path=None, top_k=5):
        """ Run training for a given model.

        Args:
            model (seq2seq.models): model to run training on, if `resume=True`, it would be
               overwritten by the model loaded from the latest checkpoint.
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            num_epochs (int, optional): number of epochs to run (default 5)
            resume(bool, optional): resume training with the latest checkpoint, (default False)
            dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
            optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
               (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
            teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
            learing_rate (float, optional): learning rate used by the optimizer (default 0.001)
            checkpoint_path (str, optional): path to load checkpoint from in case training should be resumed
            top_k (int): how many models should be stored during training
        Returns:
            model (seq2seq.models): trained model.
        """
        # If training is set to resume
        if resume:
            resume_checkpoint = Checkpoint.load(checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A walk around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0

            def get_optim(optim_name):
                optims = {'adam': optim.Adam, 'adagrad': optim.Adagrad,
                          'adadelta': optim.Adadelta, 'adamax': optim.Adamax,
                          'rmsprop': optim.RMSprop, 'sgd': optim.SGD,
                           None:optim.Adam}
                return optims[optim_name]

            self.optimizer = Optimizer(get_optim(optimizer)(model.parameters(), lr=learning_rate),
                                       max_grad_norm=5)

        self.logger.info("Optimizer: %s, Scheduler: %s" % (self.optimizer.optimizer, self.optimizer.scheduler))

        logs = self._train_epoches(data, model, num_epochs,
                            start_epoch, step, dev_data=dev_data,
                            monitor_data=monitor_data,
                            teacher_forcing_ratio=teacher_forcing_ratio,
                            top_k=top_k)
        return model, logs

    def set_callbacks(self, callbacks, data, dev_data, top_k):
        """
        Create a callback collection and associate it
        with the current trainer.
        """
        # every training outcomes are logged and models
        callbacks = [Logger(), ModelCheckpoint(data, dev_data, top_k=top_k)] + callbacks
        self.callbacks = CallbackContainer(callbacks)
        self.callbacks.set_trainer(self)

    @staticmethod
    def get_batch_data(batch):
        input_variables, input_lengths = getattr(batch, seq2seq.src_field_name)
        target_variables = {'decoder_output': getattr(batch, seq2seq.tgt_field_name),
                            'encoder_input': input_variables}  # The k-grammar metric needs to have access to the inputs

        # If available, also get provided attentive guidance data
        if hasattr(batch, seq2seq.attn_field_name):
            attention_target = getattr(batch, seq2seq.attn_field_name)

            # When we ignore output EOS, the sequence target will not contain the EOS, but if present
            # in the data, the attention indices might still. We should remove this.
            target_length = target_variables['decoder_output'].size(1)
            attn_length = attention_target.size(1)

            # If the attention sequence is exactly 1 longer than the output sequence, the EOS attention
            # index is present.
            if attn_length == target_length + 1:
                # First we replace each of these indices with a -1. This makes sure that the hard
                # attention method will not attend to an input that might not be present (the EOS)
                # We need this if there are attentions of multiple lengths in a bath
                attn_eos_indices = input_lengths.unsqueeze(1) + 1
                attention_target = attention_target.scatter_(dim=1, index=attn_eos_indices, value=-1)
                
                # Next we also make sure that the longest attention sequence in the batch is truncated
                attention_target = attention_target[:, :-1]

            target_variables['attention_target'] = attention_target

        return input_variables, input_lengths, target_variables

    @staticmethod
    def get_losses(losses, metrics, step):
        total_loss = 0
        model_name = ''
        log_msg= ''

        for metric in metrics:
            val = metric.get_val()
            log_msg += '%s %.4f ' % (metric.name, val)
            model_name += '%s_%.2f_' % (metric.log_name, val)

        for loss in losses:
            val = loss.get_loss()
            log_msg += '%s %.4f ' % (loss.name, val)
            model_name += '%s_%.2f_' % (loss.log_name, val)
            total_loss += val

        model_name += 's%d' % step

        return total_loss, log_msg, model_name
Example #19
0
def run_training(opt, default_data_dir, num_epochs=100):
    if opt.load_checkpoint is not None:
        logging.info("loading checkpoint from {}".format(
            os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint)))
        checkpoint_path = os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint)
        checkpoint = Checkpoint.load(checkpoint_path)
        seq2seq = checkpoint.model
        input_vocab = checkpoint.input_vocab
        output_vocab = checkpoint.output_vocab
    else:

        # Prepare dataset
        src = SourceField()
        tgt = TargetField()
        max_len = 50

        data_file = os.path.join(default_data_dir, opt.train_path, 'data.txt')

        logging.info("Starting new Training session on %s", data_file)

        def len_filter(example):
            return (len(example.src) <= max_len) and (len(example.tgt) <= max_len) \
                   and (len(example.src) > 0) and (len(example.tgt) > 0)

        train = torchtext.data.TabularDataset(
            path=data_file, format='json',
            fields={'src': ('src', src), 'tgt': ('tgt', tgt)},
            filter_pred=len_filter
        )

        dev = None
        if opt.no_dev is False:
            dev_data_file = os.path.join(default_data_dir, opt.train_path, 'dev-data.txt')
            dev = torchtext.data.TabularDataset(
                path=dev_data_file, format='json',
                fields={'src': ('src', src), 'tgt': ('tgt', tgt)},
                filter_pred=len_filter
            )

        src.build_vocab(train, max_size=50000)
        tgt.build_vocab(train, max_size=50000)
        input_vocab = src.vocab
        output_vocab = tgt.vocab

        # NOTE: If the source field name and the target field name
        # are different from 'src' and 'tgt' respectively, they have
        # to be set explicitly before any training or inference
        # seq2seq.src_field_name = 'src'
        # seq2seq.tgt_field_name = 'tgt'

        # Prepare loss
        weight = torch.ones(len(tgt.vocab))
        pad = tgt.vocab.stoi[tgt.pad_token]
        loss = Perplexity(weight, pad)
        if torch.cuda.is_available():
            logging.info("Yayyy We got CUDA!!!")
            loss.cuda()
        else:
            logging.info("No cuda available device found running on cpu")

        seq2seq = None
        optimizer = None
        if not opt.resume:
            hidden_size = 128
            decoder_hidden_size = hidden_size * 2
            logging.info("EncoderRNN Hidden Size: %s", hidden_size)
            logging.info("DecoderRNN Hidden Size: %s", decoder_hidden_size)
            bidirectional = True
            encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                                 bidirectional=bidirectional,
                                 rnn_cell='lstm',
                                 variable_lengths=True)
            decoder = DecoderRNN(len(tgt.vocab), max_len, decoder_hidden_size,
                                 dropout_p=0, use_attention=True,
                                 bidirectional=bidirectional,
                                 rnn_cell='lstm',
                                 eos_id=tgt.eos_id, sos_id=tgt.sos_id)

            seq2seq = Seq2seq(encoder, decoder)
            if torch.cuda.is_available():
                seq2seq.cuda()

            for param in seq2seq.parameters():
                param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.

        optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        scheduler = StepLR(optimizer.optimizer, 1)
        optimizer.set_scheduler(scheduler)

        # train

        num_epochs = num_epochs
        batch_size = 32
        checkpoint_every = num_epochs / 10
        print_every = num_epochs / 100

        properties = dict(batch_size=batch_size,
                          checkpoint_every=checkpoint_every,
                          print_every=print_every, expt_dir=opt.expt_dir,
                          num_epochs=num_epochs,
                          teacher_forcing_ratio=0.5,
                          resume=opt.resume)

        logging.info("Starting training with the following Properties %s", json.dumps(properties, indent=2))
        t = SupervisedTrainer(loss=loss, batch_size=num_epochs,
                              checkpoint_every=checkpoint_every,
                              print_every=print_every, expt_dir=opt.expt_dir)

        seq2seq = t.train(seq2seq, train,
                          num_epochs=num_epochs, dev_data=dev,
                          optimizer=optimizer,
                          teacher_forcing_ratio=0.5,
                          resume=opt.resume)

        evaluator = Evaluator(loss=loss, batch_size=batch_size)

        if opt.no_dev is False:
            dev_loss, accuracy = evaluator.evaluate(seq2seq, dev)
            logging.info("Dev Loss: %s", dev_loss)
            logging.info("Accuracy: %s", dev_loss)

    beam_search = Seq2seq(seq2seq.encoder, TopKDecoder(seq2seq.decoder, 4))

    predictor = Predictor(beam_search, input_vocab, output_vocab)
    while True:
        try:
            seq_str = raw_input("Type in a source sequence:")
            seq = seq_str.strip().split()
            results = predictor.predict_n(seq, n=3)
            for i, res in enumerate(results):
                print('option %s: %s\n', i + 1, res)
        except KeyboardInterrupt:
            logging.info("Bye Bye")
            exit(0)
Example #20
0
optimizer_new = Optimizer(
    torch.optim.Adadelta(seq2seq_model.parameters(), lr=0.05))

#if you want to train by oracle, put mode to None
sc_t = SelfCriticalTrainer(loss=PositiveLoss(mode='prob',
                                             prob_model=compare_regex_model,
                                             loss_vocab=sc_loss_vocab),
                           batch_size=32,
                           checkpoint_every=100,
                           print_every=100,
                           expt_dir='./lstm_model/' + data_tuple[0] +
                           '/SoftRegex',
                           output_vocab=output_vocab)

seq2seq_model = sc_t.train(seq2seq_model,
                           train,
                           num_epochs=30,
                           dev_data=dev,
                           optimizer=optimizer_new,
                           teacher_forcing_ratio=0.5,
                           resume=False)

# In[26]:

evaluator = Evaluator()

# In[27]:

evaluator.evaluate(seq2seq_model,
                   dev)  # (5.799417234628771, 0.6468332123976366)
class SupervisedTrainer(object):
    """ The SupervisedTrainer class helps in setting up a training framework in a
    supervised setting.

    Args:
        expt_dir (optional, str): experiment Directory to store details of the experiment,
            by default it makes a folder in the current directory to store the details (default: `experiment`).
        loss (seq2seq.loss.loss.Loss, optional): loss for training, (default: seq2seq.loss.NLLLoss)
        batch_size (int, optional): batch size for experiment, (default: 64)
        checkpoint_every (int, optional): number of batches to checkpoint after, (default: 100)
    """
    def __init__(self,
                 expt_dir='experiment',
                 loss=NLLLoss(),
                 batch_size=64,
                 random_seed=None,
                 state_loss=NLLLoss(),
                 checkpoint_every=100,
                 print_every=100):
        self._trainer = "Simple Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)
        self.loss = loss
        self.state_loss = state_loss
        self.evaluator = Evaluator(loss=self.loss, batch_size=batch_size)
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every

        if not os.path.isabs(expt_dir):
            expt_dir = os.path.join(os.getcwd(), expt_dir)
        self.expt_dir = expt_dir
        if not os.path.exists(self.expt_dir):
            os.makedirs(self.expt_dir)
        self.batch_size = batch_size

        self.logger = logging.getLogger(__name__)

    def _train_batch(self,
                     input_variable,
                     input_lengths,
                     target_variable,
                     model,
                     teacher_forcing_ratio,
                     concept=None,
                     vocabs=None,
                     use_concept=False):
        loss = self.loss
        if use_concept:
            state_loss = self.state_loss
            state_loss.reset()
        # Forward propagation
        if use_concept:
            (decoder_outputs, decoder_hidden,
             other), (state, response_concept) = model(
                 input_variable,
                 input_lengths,
                 target_variable,
                 teacher_forcing_ratio=teacher_forcing_ratio,
                 concept=concept,
                 vocabs=vocabs,
                 use_concept=use_concept)
        else:
            decoder_outputs, decoder_hidden, other = model(
                input_variable,
                input_lengths,
                target_variable,
                teacher_forcing_ratio=teacher_forcing_ratio,
                concept=concept,
                vocabs=vocabs,
                use_concept=use_concept)
        # Get loss
        loss.reset()
        for step, step_output in enumerate(decoder_outputs):
            batch_size = target_variable.size(0)
            loss.eval_batch(step_output.contiguous().view(batch_size, -1),
                            target_variable[:, step + 1])
        """ 
        if use_concept:
            for i in range(len(response_concept)):
                for j in range(len(response_concept[i])):
                    arg1 = state[i].unsqueeze(0)
                    arg2 = torch.tensor([response_concept[i][j]])
                    if torch.cuda.is_available():
                        arg2 = arg2.cuda()
                    state_loss.acc_loss += state_loss.criterion(arg1, arg2)
                    state_loss.norm_term += 1
        """
        """
        for i in range(response_concept.shape[1]):
            state_loss.eval_batch(state.contiguous().view(batch_size, -1), response_concept[:, i])
        """
        # Backward propagation
        model.zero_grad()

        lvalue = loss.get_loss()
        if use_concept:
            #state_value = state_loss.get_loss()
            state_value = 0
        if lvalue >= 0:
            if use_concept:
                # loss.backward(retain_graph=True)
                loss.backward()
                # state_loss.backward()
            else:
                loss.backward()
            self.optimizer.step()
        else:
            raise AssertionError("NAN Triggered!")
        if use_concept:
            return lvalue, state_value
        else:
            return lvalue

    def _train_epoches(self,
                       data,
                       model,
                       n_epochs,
                       start_epoch,
                       start_step,
                       save_file=False,
                       dev_data=None,
                       teacher_forcing_ratio=0,
                       vocabs=None,
                       use_concept=False,
                       log_dir=None,
                       embed_file=None):
        log = self.logger
        # embed = Embed(embed_file)
        embed = []

        print_loss_total = 0  # Reset every print_every
        epoch_loss_total = 0  # Reset every epoch

        device = torch.device('cuda', 0) if torch.cuda.is_available() else None
        batch_iterator = torchtext.data.BucketIterator(
            dataset=data,
            batch_size=self.batch_size,
            sort=False,
            sort_within_batch=True,
            sort_key=lambda x: len(x.src),
            device=device,
            repeat=False)

        steps_per_epoch = len(batch_iterator)
        print("Steps per epoch: ", steps_per_epoch)
        total_steps = steps_per_epoch * n_epochs
        """
        dev_loss, accuracy = self.evaluator.evaluate(model, dev_data, vocabs=vocabs,
                                                     use_concept=use_concept,
                                                     log_dir=log_dir,
                                                     cur_step=0)
        """
        step = start_step
        step_elapsed = 0
        for epoch in range(start_epoch, n_epochs + 1):
            print("Epoch: %d, Step: %d" % (epoch, step))
            #if epoch > 20:
            #    teacher_forcing_ratio = 0

            batch_generator = batch_iterator.__iter__()
            # consuming seen batches from previous training
            for _ in range((epoch - 1) * steps_per_epoch, step):
                next(batch_generator)
            """
            # for debugging
            dev_loss, accuracy = self.evaluator.evaluate(model, dev_data, vocabs=vocabs,
                                                         use_concept=use_concept, cur_step=step,
                                                         log_dir=log_dir)
            """

            model.train(True)
            for batch in batch_generator:
                step += 1
                step_elapsed += 1

                input_variables, input_lengths = getattr(
                    batch, seq2seq.src_field_name)
                if use_concept:
                    concepts, _ = getattr(batch, seq2seq.cpt_field_name)
                else:
                    concepts = []
                target_variables = getattr(batch, seq2seq.tgt_field_name)

                if use_concept:
                    loss, state_loss = self._train_batch(
                        input_variables,
                        input_lengths.tolist(),
                        target_variables,
                        model,
                        teacher_forcing_ratio,
                        concept=concepts,
                        vocabs=vocabs,
                        use_concept=use_concept)
                else:
                    loss = self._train_batch(input_variables,
                                             input_lengths.tolist(),
                                             target_variables,
                                             model,
                                             teacher_forcing_ratio,
                                             concept=concepts,
                                             vocabs=vocabs,
                                             use_concept=use_concept)
                    state_loss = 0

                # FOR NAN DEBUG
                if not loss >= 0:
                    Checkpoint(
                        model=model,
                        optimizer=self.optimizer,
                        epoch=epoch,
                        step=step,
                        input_vocab=data.fields[seq2seq.src_field_name].vocab,
                        output_vocab=data.fields[
                            seq2seq.tgt_field_name].vocab).save(self.expt_dir)
                    print("Nan Triggered! Model has been saved.")
                    exit(0)

                # Record average loss
                print_loss_total += loss
                epoch_loss_total += loss

                if step % self.print_every == 0 and step_elapsed > self.print_every:
                    print_loss_avg = print_loss_total / self.print_every
                    print_loss_total = 0
                    log_msg = 'Step %d, Progress: %d%%, Train %s: %.4f, State loss: %.4f' % (
                        step, step / total_steps * 100, self.loss.name,
                        print_loss_avg, state_loss)
                    log.info(log_msg)

                if step % 200 == 0:
                    dev_loss, accuracy = self.evaluator.evaluate(
                        model,
                        dev_data,
                        vocabs=vocabs,
                        use_concept=use_concept,
                        cur_step=step,
                        log_dir=log_dir)
                    # self.optimizer.update(dev_loss, epoch)
                    log_msg = "Step %d, Dev %s: %.4f, Accuracy: %.4f" % (
                        step, self.loss.name, dev_loss, accuracy)
                    log.info(log_msg)
                    model.train(mode=True)

                # Checkpoint
                """
                if step % self.checkpoint_every == 0 or step == total_steps:
                    Checkpoint(model=model,
                               optimizer=self.optimizer,
                               epoch=epoch, step=step,
                               input_vocab=data.fields[seq2seq.src_field_name].vocab,
                               output_vocab=data.fields[seq2seq.tgt_field_name].vocab).save(self.expt_dir)
                """
            if epoch % 5 == 0 and save_file:
                Checkpoint(
                    model=model,
                    optimizer=self.optimizer,
                    epoch=n_epochs,
                    step=step,
                    input_vocab=data.fields[seq2seq.src_field_name].vocab,
                    output_vocab=data.fields[
                        seq2seq.tgt_field_name].vocab).save(self.expt_dir)

            if step_elapsed == 0: continue

            epoch_loss_avg = epoch_loss_total / min(steps_per_epoch,
                                                    step - start_step)
            epoch_loss_total = 0
            log_msg = "Finished epoch %d: Train %s: %.4f" % (
                epoch, self.loss.name, epoch_loss_avg)
            with open(log_dir + '/log.txt', 'a+', encoding='utf-8') as file:
                file.write("Step {}, avg loss: {}\n".format(
                    step, epoch_loss_avg))
            if dev_data is not None:
                dev_loss, accuracy = self.evaluator.evaluate(
                    model,
                    dev_data,
                    vocabs=vocabs,
                    use_concept=use_concept,
                    log_dir=log_dir,
                    cur_step=step)
                self.optimizer.update(dev_loss, epoch)
                log_msg += ", Dev %s: %.4f, Accuracy: %.4f" % (
                    self.loss.name, dev_loss, accuracy)
                model.train(mode=True)
            else:
                self.optimizer.update(epoch_loss_avg, epoch)

            log.info(log_msg)

        Checkpoint(
            model=model,
            optimizer=self.optimizer,
            epoch=n_epochs,
            step=step,
            input_vocab=data.fields[seq2seq.src_field_name].vocab,
            output_vocab=data.fields[seq2seq.tgt_field_name].vocab).save(
                self.expt_dir)

    def train(self,
              model,
              data,
              num_epochs=5,
              resume=False,
              dev_data=None,
              optimizer=None,
              teacher_forcing_ratio=0,
              src_vocab=None,
              cpt_vocab=None,
              tgt_vocab=None,
              use_concept=False,
              vocabs=None,
              save_file=False,
              log_dir=None,
              embed_file=None,
              full_matrix=None):
        """ Run training for a given model.

        Args:
            model (seq2seq.models): model to run training on, if `resume=True`, it would be
               overwritten by the model loaded from the latest checkpoint.
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            num_epochs (int, optional): number of epochs to run (default 5)
            resume(bool, optional): resume training with the latest checkpoint, (default False)
            dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
            optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
               (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
            teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
        Returns:
            model (seq2seq.models): trained model.
            :param log_dir:
        """
        # If training is set to resume
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.expt_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            model = resume_checkpoint.model
            model.full_matrix = full_matrix
            self.optimizer = resume_checkpoint.optimizer

            # A walk around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(
                model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(model.parameters(),
                                                 weight_decay=0),
                                      max_grad_norm=5)
            self.optimizer = optimizer

        self.logger.info("Optimizer: %s, Scheduler: %s" %
                         (self.optimizer.optimizer, self.optimizer.scheduler))

        self._train_epoches(data,
                            model,
                            num_epochs,
                            start_epoch,
                            step,
                            dev_data=dev_data,
                            teacher_forcing_ratio=teacher_forcing_ratio,
                            log_dir=log_dir,
                            embed_file=embed_file,
                            vocabs=vocabs,
                            use_concept=use_concept,
                            save_file=save_file)
        return model
Example #22
0
src.vocab = input_vocab
tgt.vocab = output_vocab
max_len = opt.max_len

def len_filter(example):
    return len(example.src) <= max_len and len(example.tgt) <= max_len

# generate test set
test = torchtext.data.TabularDataset(
    path=opt.test_data, format='tsv',
    fields=[('src', src), ('tgt', tgt)],
    filter_pred=len_filter
)

# Prepare loss
weight = torch.ones(len(output_vocab))
pad = output_vocab.stoi[tgt.pad_token]
loss = Perplexity(weight, pad)
if torch.cuda.is_available():
    loss.cuda()

#################################################################################
# Evaluate model on test set

evaluator = Evaluator(loss=loss, batch_size=opt.batch_size)
losses, metrics = evaluator.evaluate(seq2seq, test)

total_loss, log_msg, _ = SupervisedTrainer.print_eval(losses, metrics, 0)

print(log_msg)
class SupervisedTrainer(object):
    """ The SupervisedTrainer class helps in setting up a training framework in a
    supervised setting.

    Args:
        model_dir (optional, str): experiment Directory to store details of the experiment,
            by default it makes a folder in the current directory to store the details (default: `experiment`).
        loss (seq2seq.loss.loss.Loss, optional): loss for training, (default: seq2seq.loss.NLLLoss)
        batch_size (int, optional): batch size for experiment, (default: 64)
        checkpoint_every (int, optional): number of batches to checkpoint after, (default: 100)
    """
    def __init__(self,
                 model_dir='experiment',
                 best_model_dir='experiment/best',
                 loss=NLLLoss(),
                 batch_size=64,
                 random_seed=None,
                 checkpoint_every=100,
                 print_every=100,
                 max_epochs=5,
                 max_steps=10000,
                 max_checkpoints_num=5,
                 best_ppl=100000.0,
                 device=None):
        self._trainer = "Simple Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)
        self.loss = loss
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every
        self.max_steps = max_steps
        self.max_epochs = max_epochs
        self.batch_size = batch_size
        self.best_ppl = best_ppl
        self.max_checkpoints_num = max_checkpoints_num
        self.device = device
        self.evaluator = Evaluator(loss=self.loss,
                                   batch_size=batch_size,
                                   device=device)

        if not os.path.isabs(model_dir):
            model_dir = os.path.join(os.getcwd(), model_dir)
        self.model_dir = model_dir
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)

        if not os.path.isabs(best_model_dir):
            best_model_dir = os.path.join(os.getcwd(), best_model_dir)
        self.best_model_dir = best_model_dir
        if not os.path.exists(self.best_model_dir):
            os.makedirs(self.best_model_dir)

        self.model_checkpoints = []
        self.best_model_checkpoints = []

        self.logger = logging.getLogger(__name__)

    def save_model(self, model, steps, dev_ppl=None):
        model_fn = f"{steps}.pt"
        model_fp = os.path.join(self.model_dir, model_fn)

        # save model checkpoints
        while len(self.model_checkpoints) >= self.max_checkpoints_num:
            os.system(f"rm {self.model_checkpoints[0]}")
            self.model_checkpoints = self.model_checkpoints[1:]
        torch.save(model.state_dict(), model_fp)
        self.model_checkpoints.append(model_fp)

        # update checkpoints file
        with open(os.path.join(self.model_dir, "checkpoints"), 'w') as f:
            f.write('\n'.join(self.model_checkpoints[::-1]))

        # save best model checkpoints
        if dev_ppl and dev_ppl < self.best_ppl:
            self.logger.info(f"Best model dev ppl {dev_ppl}.")
            self.best_ppl = dev_ppl
            while len(self.best_model_checkpoints) >= self.max_checkpoints_num:
                os.system(f"rm {self.best_model_checkpoints[0]}")
                self.best_model_checkpoints = self.best_model_checkpoints[1:]

            best_model_fp = os.path.join(self.best_model_dir, model_fn)
            os.system(f"cp {model_fp} {best_model_fp}")
            self.best_model_checkpoints.append(best_model_fp)

    def _train_batch(self, input_variable, input_lengths, target_variable,
                     model, teacher_forcing_ratio):
        loss = self.loss
        # Forward propagation
        decoder_outputs, decoder_hidden, other = model(
            input_variable,
            input_lengths,
            target_variable,
            teacher_forcing_ratio=teacher_forcing_ratio)
        # Get loss
        loss.reset()
        for step, step_output in enumerate(decoder_outputs):
            batch_size = target_variable.size(0)
            loss.eval_batch(step_output.contiguous().view(batch_size, -1),
                            target_variable[:, step + 1])
        # Backward propagation
        model.zero_grad()
        loss.backward()
        self.optimizer.step()

        return loss.get_loss()

    def _train_epoches(self,
                       data,
                       model,
                       start_step,
                       dev_data=None,
                       teacher_forcing_ratio=0):
        device = self.device
        log = self.logger
        max_epochs = self.max_epochs
        max_steps = self.max_steps

        print_loss_total = 0  # Reset every print_every
        epoch_loss_total = 0  # Reset every epoch

        step = 0
        steps_per_epoch = len(data)
        start_epoch = (start_step - step) // steps_per_epoch
        step = start_epoch * steps_per_epoch
        for batch in data:
            if step >= start_step: break
            step += 1
        if start_epoch or start_step:
            logging.info(f"Resume from Epoch {start_epoch}, Step {start_step}")

        for epoch in range(start_epoch, max_epochs):
            model.train(True)
            for batch in data:
                step += 1
                src_variables = batch['src'].to(device)
                tgt_variables = batch['tgt'].to(device)
                src_lens = batch['src_len'].view(-1).to(device)
                tgt_lens = batch['tgt_len'].view(-1).to(device)

                # print(src_variables, src_lens, tgt_variables)
                # exit(0)

                loss = self._train_batch(src_variables, src_lens.tolist(),
                                         tgt_variables, model,
                                         teacher_forcing_ratio)

                # Record average loss
                print_loss_total += loss
                epoch_loss_total += loss

                if step % self.print_every == 0:
                    print_loss_avg = print_loss_total / self.print_every
                    print_loss_total = 0
                    log_msg = f"Process {100.0*(step%steps_per_epoch)/steps_per_epoch:.2f}% of Epoch {epoch}, Total step {step}, Train {self.loss.name} {print_loss_avg:.4f}"
                    if hvd.rank() == 0:
                        log.info(log_msg)

                # Checkpoint
                if step % self.checkpoint_every == 0:
                    dev_loss = None
                    if dev_data is not None:
                        dev_loss, accuracy = self.evaluator.evaluate(
                            model, dev_data)
                        log_msg = f"Dev {self.loss.name}: {dev_loss:.4f}, Accuracy: {accuracy:.4f}"
                        if hvd.rank() == 0:
                            log.info(log_msg)
                        model.train(mode=True)
                    if hvd.rank() == 0:
                        self.save_model(model, step, dev_ppl=dev_loss)

                if step >= max_steps:
                    break

            if step >= max_steps:
                if hvd.rank() == 0:
                    log.info(f"Finish max steps {max_steps} at Epoch {epoch}.")
                break

            epoch_loss_avg = epoch_loss_total / min(steps_per_epoch,
                                                    step - start_step)
            epoch_loss_total = 0
            log_msg = f"Finished Epoch {epoch}, Train {self.loss.name} {epoch_loss_avg:.4f}"
            if dev_data is not None:
                dev_loss, accuracy = self.evaluator.evaluate(model, dev_data)
                self.optimizer.update(dev_loss, epoch)
                log_msg += f", Dev {self.loss.name}: {dev_loss:.4f}, Accuracy: {accuracy:.4f}"
                model.train(mode=True)
            else:
                self.optimizer.update(epoch_loss_avg, epoch)
            if hvd.rank() == 0:
                self.save_model(model, step, dev_ppl=dev_loss)
                log.info(log_msg)
                log.info(f"Finish Epoch {epoch}, Total steps {step}.")

    def train(self,
              model,
              data,
              start_step=0,
              dev_data=None,
              optimizer=None,
              teacher_forcing_ratio=0):
        """ Run training for a given model.

        Args:
            model (seq2seq.models): model to run training on, if `resume=True`, it would be
               overwritten by the model loaded from the latest checkpoint.
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            num_epochs (int, optional): number of epochs to run (default 5)
            resume(bool, optional): resume training with the latest checkpoint, (default False)
            dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
            optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
               (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
            teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
        Returns:
            model (seq2seq.models): trained model.
        """

        if optimizer is None:
            optimizer = Optimizer(optim.Adam(model.parameters()),
                                  max_grad_norm=5)
        self.optimizer = optimizer

        self.logger.info("Optimizer: %s, Scheduler: %s" %
                         (self.optimizer.optimizer, self.optimizer.scheduler))

        self._train_epoches(data,
                            model,
                            start_step,
                            dev_data=dev_data,
                            teacher_forcing_ratio=teacher_forcing_ratio)
        return model
class SupervisedAdversarialTrainer(object):
    """ 
	Args:
		expt_dir (optional, str): experiment Directory to store details of the experiment,
			by default it makes a folder in the current directory to store the details (default: `experiment`).
		loss (seq2seq.loss.loss.Loss, optional): loss for training, (default: seq2seq.loss.NLLLoss)
		batch_size (int, optional): batch size for experiment, (default: 64)
		checkpoint_every (int, optional): number of batches to checkpoint after, (default: 100)
	"""
    def __init__(self,
                 expt_dir='experiment',
                 loss=NLLLoss(),
                 batch_size=64,
                 random_seed=None,
                 checkpoint_every=1000,
                 print_every=100,
                 tensorboard=True,
                 batch_adv_loss=NLLLoss()):
        self._trainer = "Adversarial Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)
        self.loss = loss
        self.evaluator = Evaluator(loss=self.loss, batch_size=batch_size)
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every

        if not os.path.isabs(expt_dir):
            expt_dir = os.path.join(os.getcwd(), expt_dir)
        self.expt_dir = expt_dir
        if not os.path.exists(self.expt_dir):
            os.makedirs(self.expt_dir)
        self.batch_size = batch_size

        self.logger = logging.getLogger(__name__)

        self.writer = SummaryWriter(log_dir=expt_dir) if tensorboard else None

        self.batch_adv_loss = batch_adv_loss

    def _train_batch(self, input_variable, input_lengths, target_variable,
                     model, teacher_forcing_ratio):
        loss = self.loss
        # Forward propagation
        decoder_outputs, decoder_hidden, other = model(
            input_variable,
            input_lengths,
            target_variable,
            teacher_forcing_ratio=teacher_forcing_ratio)
        # Get loss
        loss.reset()
        for step, step_output in enumerate(decoder_outputs):
            batch_size = target_variable.size(0)
            loss.eval_batch(step_output.contiguous().view(batch_size, -1),
                            target_variable[:, step + 1])
        # Backward propagation
        model.zero_grad()
        loss.backward()
        self.optimizer.step()

        return loss.get_loss()

    def _get_best_attack(self, batch, model, attacks):
        if attacks is None or len(attacks) == 0:
            return seq2seq.src_field_name, -1, {}
        else:
            model.eval()
            loss = self.batch_adv_loss
            d = {}
            with torch.no_grad():
                for attack in attacks:
                    input_variables, input_lengths = getattr(batch, attack)
                    target_variables = getattr(batch, seq2seq.tgt_field_name)

                    decoder_outputs, decoder_hidden, other = model(
                        input_variables, input_lengths.tolist(),
                        target_variables)

                    loss.reset()
                    for step, step_output in enumerate(decoder_outputs):
                        batch_size = target_variables.size(0)
                        loss.eval_batch(
                            step_output.contiguous().view(batch_size, -1),
                            target_variables[:, step + 1])

                    d[attack] = loss.get_loss()

            model.train()
            best_loss = max(d.values())
            best_attack = max(d, key=d.get)

            return best_attack, best_loss, d

    def _train_epoches(self,
                       data,
                       model,
                       n_epochs,
                       start_epoch,
                       start_step,
                       dev_data=None,
                       teacher_forcing_ratio=0,
                       attacks=None,
                       lamb=0.0):
        # Train adversarially with lamb*normal loss + (1-lamb)*adv_loss
        # lamb should either be a float or a list of floats of length (n_epochs+1-start_epoch)

        log = self.logger

        if isinstance(lamb, float):
            lamb = [lamb] * (n_epochs + 1 - start_epoch)

        print_loss_total = 0  # Reset every print_every
        epoch_loss_total = 0  # Reset every epoch

        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        # device = None if torch.cuda.is_available() else -1
        batch_iterator = torchtext.data.BucketIterator(
            dataset=data,
            batch_size=self.batch_size,
            sort=False,
            sort_within_batch=True,
            sort_key=lambda x: len(x.src),
            device=device,
            repeat=False)

        steps_per_epoch = len(batch_iterator)
        total_steps = steps_per_epoch * n_epochs

        self.print_every = steps_per_epoch // 25

        log.info('Steps per epoch: %d' % steps_per_epoch)
        log.info('Total steps: %d' % total_steps)

        step = start_step
        step_elapsed = 0

        # num_checkpoints = 25
        # self.checkpoint_every = (total_steps+1)//num_checkpoints

        if attacks is not None:
            chosen_attack_counts = {x: 0 for x in attacks}

        if start_step > 0 and dev_data is not None:
            d = self.evaluator.evaluate(model, dev_data)
            dev_loss = d['metrics']['Loss']
            accuracy = d['metrics']['accuracy (torch)']
            other_metrics = d['metrics']
            best_f1 = other_metrics['f1']
            best_acc = accuracy
        else:
            best_f1 = 0.0
            best_acc = 0.0

        lidx = 0
        for epoch in range(start_epoch, n_epochs + 1):
            lamb_epoch = lamb[lidx] if lidx < len(lamb) else lamb[-1]
            lidx += 1
            log.info("Epoch: %d, Step: %d, Lambda: %.2f" %
                     (epoch, step, lamb_epoch))

            batch_generator = batch_iterator.__iter__()
            # consuming seen batches from previous training
            for _ in range((epoch - 1) * steps_per_epoch, step):
                next(batch_generator)

            model.train(True)
            for batch in batch_generator:
                step += 1
                step_elapsed += 1

                chosen_src_field_name, max_loss, d = self._get_best_attack(
                    batch, model, attacks)

                if attacks is not None and len(attacks) > 0:
                    chosen_attack_counts[chosen_src_field_name] += 1

                # print(chosen_src_field_name, max_loss, d)
                # exit()

                self.loss.reset()

                if lamb_epoch > 0:
                    # normal training term
                    input_variables, input_lengths = getattr(
                        batch, seq2seq.src_field_name)
                    target_variables = getattr(batch, seq2seq.tgt_field_name)
                    decoder_outputs, decoder_hidden, other = model(
                        input_variables,
                        input_lengths,
                        target_variables,
                        teacher_forcing_ratio=teacher_forcing_ratio)
                    # Get loss

                    for step1, step_output in enumerate(decoder_outputs):
                        batch_size = target_variables.size(0)
                        self.loss.eval_batch(step_output.contiguous().view(
                            batch_size, -1),
                                             target_variables[:, step1 + 1],
                                             weight=lamb_epoch)

                # adversarial training term
                input_variables, input_lengths = getattr(
                    batch, chosen_src_field_name)
                target_variables = getattr(batch, seq2seq.tgt_field_name)
                decoder_outputs, decoder_hidden, other = model(
                    input_variables,
                    input_lengths,
                    target_variables,
                    teacher_forcing_ratio=teacher_forcing_ratio)
                # Get loss

                for step2, step_output in enumerate(decoder_outputs):
                    batch_size = target_variables.size(0)
                    self.loss.eval_batch(step_output.contiguous().view(
                        batch_size, -1),
                                         target_variables[:, step2 + 1],
                                         weight=(1 - lamb_epoch))

                model.zero_grad()
                self.loss.backward()
                self.optimizer.step()

                loss_adv = self.loss.get_loss()

                # loss = self._train_batch(input_variables, input_lengths.tolist(), target_variables, model, teacher_forcing_ratio)

                # Record average loss
                print_loss_total += loss_adv
                epoch_loss_total += loss_adv

                if step % self.print_every == 0 and step_elapsed >= self.print_every:
                    print_loss_avg = print_loss_total / self.print_every
                    print_loss_total = 0
                    log_msg = 'Epoch: %d, Step: %d, Progress: %d%%, Train %s: %.4f' % (
                        epoch, step, step / total_steps * 100, self.loss.name,
                        print_loss_avg)
                    log.info(log_msg)

                    if self.writer:
                        self.writer.add_scalar('Train/loss_step',
                                               print_loss_avg, step)

            if step_elapsed == 0:
                continue

            epoch_loss_avg = epoch_loss_total / min(steps_per_epoch,
                                                    step - start_step)
            epoch_loss_total = 0
            log_msg = "Finished epoch %d: Train %s: %.4f" % (
                epoch, self.loss.name, epoch_loss_avg)
            self.writer.add_scalar('Train/loss_epoch', epoch_loss_avg, epoch)

            other_metrics = {}
            if dev_data is not None:
                d = self.evaluator.evaluate(model, dev_data)
                dev_loss = d['metrics']['Loss']
                accuracy = d['metrics']['accuracy (torch)']
                other_metrics = d['metrics']
                self.optimizer.update(dev_loss, epoch)
                log_msg += ", Dev %s: %.4f, Accuracy: %.4f" % (
                    self.loss.name, dev_loss, accuracy)
                self.writer.add_scalar('Val/loss', dev_loss, epoch)
                self.writer.add_scalar('Val/acc', accuracy, epoch)

                for metric in other_metrics:
                    try:
                        log_msg += ", %s: %.4f" % (metric.replace(
                            ' ', '_').replace('-', '_'), other_metrics[metric])
                        self.writer.add_scalar(
                            'Val/%s' %
                            metric.replace(' ', '_').replace('-', '_'),
                            other_metrics[metric], epoch)
                    except:
                        continue

                log.info(log_msg)

                if other_metrics['f1'] > best_f1:
                    Checkpoint(
                        model=model,
                        optimizer=self.optimizer,
                        epoch=epoch,
                        step=step,
                        input_vocab=data.fields[seq2seq.src_field_name].vocab,
                        output_vocab=data.fields[
                            seq2seq.tgt_field_name].vocab).save(self.expt_dir,
                                                                name='Best_F1')
                    log_msg = 'Checkpoint saved, Epoch %d, Prev Val F1: %.4f, New Val F1: %.4f' % (
                        epoch, best_f1, other_metrics['f1'])
                    log.info(log_msg)
                    best_f1 = other_metrics['f1']

                # if accuracy > best_acc:
                #     Checkpoint(model=model,
                #                    optimizer=self.optimizer,
                #                    epoch=epoch, step=step,
                #                    input_vocab=data.fields[seq2seq.src_field_name].vocab,
                #                    output_vocab=data.fields[seq2seq.tgt_field_name].vocab).save(self.expt_dir, name='Best_Acc')
                #     log_msg = 'Checkpoint saved, Epoch %d, Prev Val Acc: %.4f, New Val Acc: %.4f' % (epoch, best_acc, accuracy)
                #     log.info(log_msg)
                #     best_acc = accuracy

                model.train(mode=True)

            else:
                self.optimizer.update(epoch_loss_avg, epoch)
                log.info(log_msg)

            Checkpoint(
                model=model,
                optimizer=self.optimizer,
                epoch=epoch,
                step=step,
                input_vocab=data.fields[seq2seq.src_field_name].vocab,
                output_vocab=data.fields[seq2seq.tgt_field_name].vocab).save(
                    self.expt_dir, name='Latest')

            log_msg = 'Latest Checkpoint saved, Epoch %d, %s' % (
                epoch, str(other_metrics))
            log.info(log_msg)
            log.info(str(chosen_attack_counts))

    def train(self,
              model,
              data,
              num_epochs=5,
              resume=False,
              dev_data=None,
              optimizer=None,
              teacher_forcing_ratio=0,
              load_checkpoint=None,
              attacks=None,
              lamb=0.5):
        """ Run training for a given model.

		Args:
			model (seq2seq.models): model to run training on, if `resume=True`, it would be
			   overwritten by the model loaded from the latest checkpoint.
			data (seq2seq.dataset.dataset.Dataset): dataset object to train on
			num_epochs (int, optional): number of epochs to run (default 5)
			resume(bool, optional): resume training with the latest checkpoint, (default False)
			dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
			optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
			   (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
			teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
		Returns:
			model (seq2seq.models): trained model.
		"""
        # If training is set to resume
        if resume:
            if load_checkpoint is None:
                load_checkpoint = Checkpoint.get_latest_checkpoint(
                    self.expt_dir)
            resume_checkpoint = Checkpoint.load(load_checkpoint)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A walk around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(
                model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step

            self.logger.info("Resuming training from %d epoch, %d step" %
                             (start_epoch, step))
        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(model.parameters()),
                                      max_grad_norm=5)
            self.optimizer = optimizer

        self.logger.info("Optimizer: %s, Scheduler: %s" %
                         (self.optimizer.optimizer, self.optimizer.scheduler))

        self._train_epoches(data,
                            model,
                            start_epoch + num_epochs if resume else num_epochs,
                            start_epoch,
                            step,
                            dev_data=dev_data,
                            teacher_forcing_ratio=teacher_forcing_ratio,
                            attacks=attacks,
                            lamb=lamb)

        return model
Example #25
0
    label_word2index = checkpoint.label_word2index
    label_index2word = checkpoint.label_index2word

    if not os.path.isfile(opt.gaussian_dict_path):
        print('calculating means and stds of box positions and sizes...')
        get_class_sta(opt.train_path, opt.gaussian_dict_path)

    gaussian_dict = np.load(opt.gaussian_dict_path).item()

    hidden_size = opt.embedding_dim
    encoder = PreEncoderRNN(len(cap_word2index), nhidden=opt.embedding_dim)
    state_dict = torch.load(opt.encoder_path,
                            map_location=lambda storage, loc: storage)
    encoder.load_state_dict(state_dict)
    encoder.eval()

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    # prepare dataset
    dev_cap_lang, dev_label_lang, dev_tuples, x_mean_std, y_mean_std, w_mean_std, r_mean_std, \
    keys = prepare_test_data(opt.dev_path, opt.mean_std_path, opt.max_len, opt.min_len,
        cap_word2index, cap_index2word, label_word2index, label_index2word, opt.dev_filename_path)

    evaluator = Evaluator(opt.batch_size, opt.early_stop_len, opt.expt_dir,
                          dev_cap_lang, dev_label_lang, x_mean_std, y_mean_std,
                          w_mean_std, r_mean_std, gaussian_dict,
                          opt.box_saving_folder, opt.output_opt)
    evaluator.evaluate(encoder, decoder, dev_tuples, keys)
Example #26
0
    # train
    t = SupervisedTrainer(
        loss=loss,
        batch_size=32,
        checkpoint_every=50,
        print_every=10,
        expt_dir=opt.expt_dir,
    )

    seq2seq = t.train(
        seq2seq,
        train,
        num_epochs=6,
        dev_data=dev,
        optimizer=optimizer,
        teacher_forcing_ratio=0.5,
        resume=opt.resume,
    )

evaluator = Evaluator(loss=loss, batch_size=32)
dev_loss, accuracy = evaluator.evaluate(seq2seq, dev)
assert dev_loss < 1.5

beam_search = Seq2seq(seq2seq.encoder, TopKDecoder(seq2seq.decoder, 3))

predictor = Predictor(beam_search, input_vocab, output_vocab)
inp_seq = "1 3 5 7 9"
seq = predictor.predict(inp_seq.split())
assert " ".join(seq[:-1]) == inp_seq[::-1]
    )

    seq2seq = t.train(
        seq2seq,
        train,
        num_epochs=6,
        optimizer=optimizer,
        teacher_forcing_ratio=0.6,
        teacher_forcing_half_life=5000,
        resume=opt.resume,
    )

predictor = Predictor(seq2seq, input_vocab, output_vocab)
loss, acc = Evaluator(loss=loss).evaluate(
    seq2seq,
    torchtext.data.TabularDataset(path=opt.test_path,
                                  format="tsv",
                                  fields=[("src", src), ("tgt", tgt)]),
)
logging.info("Loss: {}, Acc: {}".format(loss, acc))

import nltk
nltk.download('perluniprops')

from nltk.tokenize.nist import NISTTokenizer
nist = NISTTokenizer()

while True:
    seq_str = input("Type in a source sequence:")
    seq = nist.tokenize(seq_str.strip(), lowercase=False)
    print(predictor.predict(seq))

def predict(predictor):
    while True:
        tree_or_sentence = input("Type in a tree or a sentence?")
        if tree_or_sentence != 'tree':
            seq_str = input("Type in a source sequence:")
            if seq_str == ':end':
                break
            seq = seq_str.strip().split()
            print(predictor.predict(seq))
        else:
            tree_str = input("Type in a source sequence:")
            try:
                tree = Tree.fromstring(tree_str)
                seq = tree.leaves()
                print(predictor.predict(seq, tree))
            except:
                print('The input is not a valid tree.')


if __name__ == '__main__':
    opt = type_in()
    predictor, dev, train = train(opt)
    evaluator = Evaluator(loss=NLLLoss(), batch_size=1)
    # dev_loss, accuracy, tree_acc = evaluator.evaluate(predictor.model, dev)
    # print(accuracy)
    # dev_loss, accuracy, tree_acc = evaluator.evaluate(predictor.model, train)
    # print(accuracy)
    predict(predictor)
class SupervisedTrainer(object):
    """ The SupervisedTrainer class helps in setting up a training framework in a
    supervised setting.

    Args:
        expt_dir (optional, str): experiment Directory to store details of the experiment,
            by default it makes a folder in the current directory to store the details (default: `experiment`).
        loss (seq2seq.loss.loss.Loss, optional): loss for training, (default: seq2seq.loss.NLLLoss)
        batch_size (int, optional): batch size for experiment, (default: 64)
        checkpoint_every (int, optional): number of epochs to checkpoint after, (default: 100)
        optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
            (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
    """
    def __init__(self,
                 expt_dir='experiment',
                 loss=NLLLoss(),
                 batch_size=64,
                 random_seed=None,
                 checkpoint_every=100,
                 print_every=100,
                 optimizer=Optimizer(optim.Adam, max_grad_norm=5)):
        self._trainer = "Simple Trainer"
        self.random_seed = random_seed
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)
        self.loss = loss
        self.evaluator = Evaluator(loss=self.loss, batch_size=batch_size)
        self.optimizer = optimizer
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every

        if not os.path.isabs(expt_dir):
            expt_dir = os.path.join(os.getcwd(), expt_dir)
        self.expt_dir = expt_dir
        if not os.path.exists(self.expt_dir):
            os.makedirs(self.expt_dir)
        self.batch_size = batch_size
        self.input_vocab_file = os.path.join(self.expt_dir, 'input_vocab')
        self.output_vocab_file = os.path.join(self.expt_dir, 'output_vocab')

        self.logger = logging.getLogger(__name__)

    def _train_batch(self, input_variable, target_variable, model,
                     teacher_forcing_ratio):
        loss = self.loss
        # Forward propagation
        decoder_outputs, decoder_hidden, other = model(
            input_variable,
            target_variable,
            teacher_forcing_ratio=teacher_forcing_ratio)
        # Get loss
        loss.reset()
        targets = other['inputs']
        lengths = other['length']
        for batch in range(len(targets)):
            # Batch wise loss
            batch_target = targets[batch]
            batch_len = lengths[batch]
            # Crop output and target to batch length
            batch_output = torch.stack(
                [output[batch] for output in decoder_outputs[:batch_len]])
            batch_target = batch_target[:batch_len]
            # Evaluate loss
            loss.eval_batch(batch_output, batch_target)
        # Backward propagation
        model.zero_grad()
        loss.backward()
        self.optimizer.step()

        return loss.get_loss()

    def _train_epoches(self,
                       data,
                       model,
                       n_epochs,
                       batch_size,
                       resume,
                       dev_data=None,
                       teacher_forcing_ratio=0):
        start = time.time()
        print_loss_total = 0  # Reset every print_every
        steps_per_epoch = data.num_batches(batch_size)
        total_steps = steps_per_epoch * n_epochs

        # If training is set to resume
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.expt_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer.set_parameters(model.parameters())
            self.optimizer.load_state_dict(
                resume_checkpoint.optimizer_state_dict)
            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0
            self.optimizer.set_parameters(model.parameters())

        for epoch in range(start_epoch, n_epochs + 1):
            data.shuffle(self.random_seed)

            batch_generator = data.make_batches(batch_size)

            # consuming seen batches from previous training
            for _ in range((epoch - 1) * steps_per_epoch, step):
                next(batch_generator)

            model.train(True)
            for batch in batch_generator:
                step += 1

                input_variables = batch[0]
                target_variables = batch[1]

                loss = self._train_batch(input_variables, target_variables,
                                         model, teacher_forcing_ratio)

                # Record average loss
                print_loss_total += loss

                if step % self.print_every == 0:
                    print_loss_avg = print_loss_total / (self.print_every)
                    print_loss_total = 0
                    log_msg = 'Time elapsed: %s, Progress: %d%%, Train %s: %.4f' % (
                        pretty_interval(start), float(step) / total_steps *
                        100, self.loss.name, print_loss_avg)
                    self.logger.info(log_msg)

                # Checkpoint
                if step % self.checkpoint_every == 0 or step == total_steps:
                    Checkpoint(
                        model=model,
                        optimizer_state_dict=self.optimizer.state_dict(),
                        epoch=epoch,
                        step=step,
                        input_vocab=data.input_vocab,
                        output_vocab=data.output_vocab).save(self.expt_dir)

            log_msg = "Finished epoch {0}".format(epoch)
            if dev_data is not None:
                dev_loss = self.evaluator.evaluate(model, dev_data)
                self.optimizer.update(dev_loss, epoch)
                log_msg += ", Dev %s: %.4f" % (self.loss.name, dev_loss)
                model.train(mode=True)
            self.logger.info(log_msg)

    def train(self,
              model,
              data,
              num_epochs=5,
              resume=False,
              dev_data=None,
              teacher_forcing_ratio=0):
        """ Run training for a given model.

         Args:
             model (seq2seq.models): model to run training on, if `resume=True`, it would be
                overwritten by the model loaded from the latest checkpoint.
             data (seq2seq.dataset.dataset.Dataset): dataset object to train on
             num_epochs (int, optional): number of epochs to run (default 5)
             resume(bool, optional): resume training with the latest checkpoint, (default False)
             dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
             teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)

        """
        # Make Checkpoint Directories
        data.input_vocab.save(self.input_vocab_file)
        data.output_vocab.save(self.output_vocab_file)

        self._train_epoches(data,
                            model,
                            num_epochs,
                            self.batch_size,
                            resume=resume,
                            dev_data=dev_data,
                            teacher_forcing_ratio=teacher_forcing_ratio)
Example #30
0
class SupervisedTrainer(object):
    """The SupervisedTrainer class helps in setting up a training framework
    in a supervised setting.

    Args:
        experiment_directory (optional, str): directory to store experiments in
        loss (seq2seq.loss.loss.Loss, optional): loss for training
        batch_size (int, optional): batch size for experiment
        checkpoint_every (int, optional): number of batches to checkpoint after
    """
    def __init__(self, experiment_directory='./experiment', loss=None, batch_size=64,
                random_seed=None, checkpoint_every=100, print_every=100):
        if loss is None:
            loss = NLLLoss()
        if random_seed is not None:
            random.seed(random_seed)
            torch.manual_seed(random_seed)

        self.loss = loss
        self.evaluator = Evaluator(loss=self.loss, batch_size=batch_size)
        self.optimizer = None
        self.checkpoint_every = checkpoint_every
        self.print_every = print_every
        self.batch_size = batch_size
        self.experiment_directory = experiment_directory

        if not os.path.exists(self.experiment_directory):
            os.makedirs(self.experiment_directory)

    def train(self, model, data, n_epochs=5, resume=False,
            dev_data=None, optimizer=None, teacher_forcing_ratio=0):
        """Train a given model.

        Args:
            model (seq2seq.models): model to run training on. If resume=True,
                it will be overwritten by the model loaded from the latest
                checkpoint
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            n_epochs (int): number of epochs to run
            resume(bool): resume training with the latest checkpoint
            dev_data (seq2seq.dataset.dataset.Dataset): dev Dataset
            optimizer (seq2seq.optim.Optimizer): optimizer for training
            teacher_forcing_ratio (float): teaching forcing ratio
        Returns:
            model (seq2seq.models): trained model.
        """
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.experiment_directory)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A work-around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(
                model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(
                    optim.Adam(model.parameters()), max_grad_norm=5)
            self.optimizer = optimizer

        logger.info('Optimizer: %s, Scheduler: %s',
                    self.optimizer.optimizer, self.optimizer.scheduler)

        self._train_epochs(data, model, n_epochs, 
                            start_epoch, step, dev_data=dev_data, 
                            teacher_forcing_ratio=teacher_forcing_ratio)
        return model

    def _train_epochs(self, data, model, n_epochs, start_epoch, 
                    start_step, dev_data=None, teacher_forcing_ratio=0):
        print_loss_total = epoch_loss_total = 0
        device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

        batch_iterator = torchtext.data.BucketIterator(
            dataset=data,
            batch_size=self.batch_size,
            sort=False,
            sort_within_batch=True,
            sort_key=lambda x: len(x.src),
            device=device,
            repeat=False,
        )

        steps_per_epoch = len(batch_iterator)
        total_steps = steps_per_epoch * n_epochs

        step = start_step
        step_elapsed = 0
        for epoch in range(start_epoch, n_epochs + 1):
            logger.debug('Epoch: %d, Step: %d', epoch, step)

            batch_generator = iter(batch_iterator)
            # Consuming seen batches from previous training
            for _ in range((epoch - 1) * steps_per_epoch, step):
                next(batch_generator)

            model.train()
            progress_bar = tqdm(
                batch_generator,
                total=steps_per_epoch,
                desc='Train {}: '.format(self.loss.name),
            )
            for batch in progress_bar:
                step += 1
                step_elapsed += 1

                loss = self._train_batch(
                    batch,
                    model,
                    teacher_forcing_ratio,
                    data,
                )
                print_loss_total += loss
                epoch_loss_total += loss

                if step % self.print_every == 0 \
                   and step_elapsed > self.print_every:
                    print_loss_avg = print_loss_total / self.print_every
                    print_loss_total = 0
                    progress_bar.set_description('Train {}: {:.4f}'.format(
                        self.loss.name,
                        print_loss_avg,
                    ))

                # Checkpoint
                if step % self.checkpoint_every == 0 or step == total_steps:
                    Checkpoint(
                        model=model,
                        optimizer=self.optimizer,
                        epoch=epoch, step=step,
                        input_vocab=data.fields[seq2seq.src_field_name].vocab,
                        output_vocab=data.fields[seq2seq.tgt_field_name].vocab,
                    ).save(self.experiment_directory)

            if step_elapsed == 0:
                continue

            epoch_loss_avg = epoch_loss_total / min(
                steps_per_epoch, step - start_step)
            epoch_loss_total = 0
            log_msg = 'Finished epoch {:d}: Train {}: {:.4f}'.format(
                epoch, self.loss.name, epoch_loss_avg)
            if dev_data is not None:
                dev_loss, accuracy = self.evaluator.evaluate(model, dev_data)
                self.optimizer.update(dev_loss, epoch)
                log_msg += ', Dev {}: {:.4f}, Accuracy: {:.4f}'.format(
                    self.loss.name, dev_loss, accuracy)
                model.train()
            else:
                self.optimizer.update(epoch_loss_avg, epoch)

            logger.info(log_msg)

    def _train_batch(self, batch, model, teacher_forcing_ratio, dataset):
        # Forward propagation
        output, _, _ = model(
            batch,
            dataset=dataset,
            teacher_forcing_ratio=teacher_forcing_ratio,
        )
        # Get loss
        self.loss.reset()
        self.loss.eval_batch(output, batch)

        # Backward propagation
        model.zero_grad()
        self.loss.backward()
        self.optimizer.step()

        return self.loss.get_loss()
Example #31
0
class SupervisedTrainer(object):
	""" The SupervisedTrainer class helps in setting up a training framework in a
	supervised setting.

	Args:
		expt_dir (optional, str): experiment Directory to store details of the experiment,
			by default it makes a folder in the current directory to store the details (default: `experiment`).
		loss (seq2seq.loss.loss.Loss, optional): loss for training, (default: seq2seq.loss.NLLLoss)
		batch_size (int, optional): batch size for experiment, (default: 64)
		checkpoint_every (int, optional): number of batches to checkpoint after, (default: 100)
	"""
	def __init__(self, expt_dir='experiment', loss=NLLLoss(), batch_size=64,
				 random_seed=None,
				 checkpoint_every=100, print_every=100):
		self._trainer = "Simple Trainer"
		self.random_seed = random_seed
		if random_seed is not None:
			random.seed(random_seed)
			torch.manual_seed(random_seed)
		self.loss = loss
		self.evaluator = Evaluator(loss=self.loss, batch_size=batch_size)
		self.optimizer = None
		self.checkpoint_every = checkpoint_every
		self.print_every = print_every

		if not os.path.isabs(expt_dir):
			expt_dir = os.path.join(os.getcwd(), expt_dir)
		self.expt_dir = expt_dir
		if not os.path.exists(self.expt_dir):
			os.makedirs(self.expt_dir)
		self.batch_size = batch_size

		self.logger = logging.getLogger(__name__)

	def _train_batch(self, input_variable, input_lengths, target_variable, model, teacher_forcing_ratio):
		loss = self.loss
		# Forward propagation
		decoder_outputs, decoder_hidden, other = model(input_variable, input_lengths, target_variable,
													   teacher_forcing_ratio=teacher_forcing_ratio)
		# Get loss
		loss.reset()
		for step, step_output in enumerate(decoder_outputs):
			batch_size = target_variable.size(0)
			loss.eval_batch(step_output.contiguous().view(batch_size, -1), target_variable[:, step + 1])
		# Backward propagation
		model.zero_grad()
		loss.backward()
		self.optimizer.step()

		return loss.get_loss()

	def _train_epoches(self, data, model, n_epochs, start_epoch, start_step,
					   dev_data=None, teacher_forcing_ratio=0):
		log = self.logger

		print_loss_total = 0  # Reset every print_every
		epoch_loss_total = 0  # Reset every epoch

		device = None if torch.cuda.is_available() else -1
		batch_iterator = torchtext.data.BucketIterator(
			dataset=data, batch_size=self.batch_size,
			sort=False, sort_within_batch=True,
			sort_key=lambda x: len(x.src),
			device=device, repeat=False)

		steps_per_epoch = len(batch_iterator)
		total_steps = steps_per_epoch * n_epochs

		step = start_step
		step_elapsed = 0
		for epoch in range(start_epoch, n_epochs + 1):
			log.debug("Epoch: %d, Step: %d" % (epoch, step))

			batch_generator = batch_iterator.__iter__()
			# consuming seen batches from previous training
			for _ in range((epoch - 1) * steps_per_epoch, step):
				next(batch_generator)

			model.train(True)
			for batch in batch_generator:
				step += 1
				step_elapsed += 1

				input_variables, input_lengths = getattr(batch, seq2seq.src_field_name)
				target_variables = getattr(batch, seq2seq.tgt_field_name)

				loss = self._train_batch(input_variables, input_lengths.tolist(), target_variables, model, teacher_forcing_ratio)

				# Record average loss
				print_loss_total += loss
				epoch_loss_total += loss

				if step % self.print_every == 0 and step_elapsed > self.print_every:
					print_loss_avg = print_loss_total / self.print_every
					print_loss_total = 0
					log_msg = 'Progress: %d%%, Train %s: %.4f' % (
						step / total_steps * 100,
						self.loss.name,
						print_loss_avg)
					log.info(log_msg)

				# Checkpoint
				if step % self.checkpoint_every == 0 or step == total_steps:
					Checkpoint(model=model,
							   optimizer=self.optimizer,
							   epoch=epoch, step=step,
							   input_vocab=data.fields[seq2seq.src_field_name].vocab,
							   output_vocab=data.fields[seq2seq.tgt_field_name].vocab).save(self.expt_dir)

			if step_elapsed == 0: continue

			epoch_loss_avg = epoch_loss_total / min(steps_per_epoch, step - start_step)
			epoch_loss_total = 0
			log_msg = "Finished epoch %d: Train %s: %.4f" % (epoch, self.loss.name, epoch_loss_avg)
			if dev_data is not None:
				dev_loss, accuracy = self.evaluator.evaluate(model, dev_data)
				self.optimizer.update(dev_loss, epoch)
				log_msg += ", Dev %s: %.4f, Accuracy: %.4f" % (self.loss.name, dev_loss, accuracy)
				model.train(mode=True)
			else:
				self.optimizer.update(epoch_loss_avg, epoch)

			log.info(log_msg)

	def train(self, model, data, num_epochs=5,
			  resume=False, dev_data=None,
			  optimizer=None, teacher_forcing_ratio=0):
		""" Run training for a given model.

		Args:
			model (seq2seq.models): model to run training on, if `resume=True`, it would be
			   overwritten by the model loaded from the latest checkpoint.
			data (seq2seq.dataset.dataset.Dataset): dataset object to train on
			num_epochs (int, optional): number of epochs to run (default 5)
			resume(bool, optional): resume training with the latest checkpoint, (default False)
			dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
			optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
			   (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
			teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
		Returns:
			model (seq2seq.models): trained model.
		"""
		# If training is set to resume
		if resume:
			latest_checkpoint_path = Checkpoint.get_latest_checkpoint(self.expt_dir)
			resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
			model = resume_checkpoint.model
			self.optimizer = resume_checkpoint.optimizer

			# A walk around to set optimizing parameters properly
			resume_optim = self.optimizer.optimizer
			defaults = resume_optim.param_groups[0]
			defaults.pop('params', None)
			defaults.pop('initial_lr', None)
			self.optimizer.optimizer = resume_optim.__class__(model.parameters(), **defaults)

			start_epoch = resume_checkpoint.epoch
			step = resume_checkpoint.step
		else:
			start_epoch = 1
			step = 0
			if optimizer is None:
				optimizer = Optimizer(optim.Adam(model.parameters()), max_grad_norm=5)
			self.optimizer = optimizer

		self.logger.info("Optimizer: %s, Scheduler: %s" % (self.optimizer.optimizer, self.optimizer.scheduler))

		self._train_epoches(data, model, num_epochs,
							start_epoch, step, dev_data=dev_data,
							teacher_forcing_ratio=teacher_forcing_ratio)
		return model
Example #32
0
pretrain_generator(gen, pre_train, pre_dev)

# init discriminator
dis = ClassifierCNN(len(tgt.vocab),
                    embed_dim=hidden_size,
                    num_class=1,
                    num_kernel=100,
                    kernel_sizes=[3, 4, 5],
                    dropout_p=0.25)
if torch.cuda.is_available():
    dis.cuda()

# init trainers, optimizers, evaluators
g_trainer = gan.PolicyGradientTrainer(max_len=max_len, logger=logger)
g_optimizer = torch.optim.Adam(gen.parameters(), lr=0.01)
g_evaluator = Evaluator()

d_trainer = trainer.BinaryClassifierTrainer()
d_optimizer = torch.optim.Adam(dis.parameters(), lr=0.01)

# pre-train discriminator
samples = [
    sample for sample, _, _, _ in g_trainer.gen_sample(
        gen, adv_train_iter, num_src=256, src2sample=1)
]
batch = next(iter(real_iter))
reals = batch.tgt.data[:, 1:]  # 裁掉<sos>
for epoch in range(1, 20 + 1):
    logging.info('Epoch[%d]' % epoch)
    _train_iter = helper.batch_gen(samples,
                                   reals,