def train(self,
              model,
              data,
              num_epochs=5,
              resume=False,
              dev_data=None,
              optimizer=None,
              teacher_forcing_ratio=0):
        """ Run training for a given model.

        Args:
            model (seq2seq.models): model to run training on, if `resume=True`, it would be
               overwritten by the model loaded from the latest checkpoint.
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            num_epochs (int, optional): number of epochs to run (default 5)
            resume(bool, optional): resume training with the latest checkpoint, (default False)
            dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
            optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
               (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
            teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
        Returns:
            model (seq2seq.models): trained model.
        """
        # If training is set to resume
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.expt_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A walk around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(
                model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(model.parameters()),
                                      max_grad_norm=5)
            self.optimizer = optimizer

        self.logger.info("Optimizer: %s, Scheduler: %s" %
                         (self.optimizer.optimizer, self.optimizer.scheduler))

        self._train_epoches(data,
                            model,
                            num_epochs,
                            start_epoch,
                            step,
                            dev_data=dev_data,
                            teacher_forcing_ratio=teacher_forcing_ratio)
        return model
Esempio n. 2
0
    def test(self, args, model, dataloader, scheduler, num_epochs=5,
              resume=False, dev_data=None,
              optimizer=None, teacher_forcing_ratio=1.0, save_dir='runs/exp'):

        # If training is set to resume
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(self.model_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            # model = resume_checkpoint.model
            model.load_state_dict(resume_checkpoint.model)
            self.optimizer = None
            self.args = args
            model.args = args
            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
            print('Resume from ', latest_checkpoint_path)
            print('start_epoch : ', start_epoch)
            print('step : ', step)
            start_epoch = 1
            step = 0
        else:
            print('Please Resume !')
            raise
        self._test_epoches(dataloader, model, num_epochs, start_epoch, step, dev_data=dev_data, teacher_forcing_ratio=teacher_forcing_ratio, clip=args.clip, save_dir=save_dir, args=args)
        return model
Esempio n. 3
0
    def test(self, args, model, dataloader, resume=False, save_dir='runs/exp'):

        # If training is set to resume
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.model_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            model.load_state_dict(resume_checkpoint.model)
            self.optimizer = None
            self.args = args
            model.args = args
            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
            print('Resume from ', latest_checkpoint_path)
            print('start_epoch : ', start_epoch)
            print('step : ', step)
            start_epoch = 1
            step = 0
        else:
            print('Please Resume !')
            raise
        if args.mode == 'bleu_t1t2':
            print('Eval on bleu_t1&t2 !')
            self._test_epoches_t1t2(dataloader,
                                    model,
                                    step,
                                    save_dir=save_dir,
                                    args=args)
        else:
            print('Please choose t1 | t2 mode !')
            raise
        return model
Esempio n. 4
0
    def train(self,
              D,
              G,
              optimizer_D,
              optimizer_G,
              train,
              val=None,
              num_epoch=200,
              resume=False,
              opt=None):
        start_epoch = 0
        if resume:
            cp = Checkpoint.load(
                Checkpoint.get_latest_checkpoint('./experiment/gan'))
            self.model = cp.model
            start_epoch = cp.epoch + 1

        for epoch in range(start_epoch, num_epoch):
            logging.info('Epoch[%d] CycleGAN train' % epoch)

            train_iter, val_iter = torchtext.data.BucketIterator.splits(
                (train, val),
                batch_sizes=(1, 64),
                device=opt.device,
                sort_key=lambda x: len(x.real_a),
                repeat=False)

            self.train_epoch(D, G, optimizer_D, optimizer_G, train_iter)
Esempio n. 5
0
    def train(self, train, val=None, num_epoch=200, resume=False):
        start_epoch = 0
        if resume:
            cp = Checkpoint.load(
                Checkpoint.get_latest_checkpoint('./experiment/gan'))
            self.model = cp.model
            start_epoch = cp.epoch + 1

        for epoch in range(start_epoch, num_epoch):
            logging.info('Epoch[%d] CycleGAN train' % epoch)

            train_iter, val_iter = torchtext.data.BucketIterator.splits(
                (train, val),
                batch_sizes=(1, 64),
                device=self.device,
                sort_key=lambda x: len(x.real_a),
                repeat=False)

            self._train_epoch(train_iter)
            self.evaluate(val_iter)

            Checkpoint(model=self.model,
                       optimizer=None,
                       epoch=epoch,
                       step=0,
                       input_vocab=None,
                       output_vocab=None).save('./experiment/gan')
Esempio n. 6
0
 def test_get_latest_checkpoint(self, mock_listdir):
     mock_listdir.return_value = ['2017_05_22_09_47_26',
                                  '2017_05_22_09_47_31',
                                  '2017_05_23_10_47_29']
     latest_checkpoint = Checkpoint.get_latest_checkpoint(self.EXP_DIR)
     self.assertEquals(latest_checkpoint,
                       os.path.join(self.EXP_DIR,
                                    'checkpoints/2017_05_23_10_47_29'))
def load_checkpoint(checkpoint_name, expt_dir):
    if checkpoint_name is not None:
        logging.info("loading checkpoint from {}".format(
            os.path.join(expt_dir, Checkpoint.CHECKPOINT_DIR_NAME,
                         checkpoint_name)))
        checkpoint_path = os.path.join(expt_dir,
                                       Checkpoint.CHECKPOINT_DIR_NAME,
                                       checkpoint_name)
    else:
        checkpoint_path = Checkpoint.get_latest_checkpoint(expt_dir)
    return Checkpoint.load(checkpoint_path)
    def train(self,
              encoder,
              decoder,
              data,
              num_epochs=5,
              resume=False,
              dev_data=None,
              optimizer=None,
              is_training=0):
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.expt_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            decoder = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A walk around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(
                decoder.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(decoder.parameters()),
                                      max_grad_norm=5)
            self.optimizer = optimizer

        self.logger.info("Optimizer: %s, Scheduler: %s" %
                         (self.optimizer.optimizer, self.optimizer.scheduler))

        self._train_epoches(data,
                            encoder,
                            decoder,
                            num_epochs,
                            start_epoch,
                            step,
                            dev_data=dev_data,
                            is_training=is_training)
        return decoder
    def train(self, model, data, num_epochs=5,
              resume=False, dev_data=None,
              optimizer=None, teacher_forcing_ratio=0):
        """ Run training for a given model.

        Args:
            model (seq2seq.models): model to run training on, if `resume=True`, it would be
               overwritten by the model loaded from the latest checkpoint.
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            num_epochs (int, optional): number of epochs to run (default 5)
            resume(bool, optional): resume training with the latest checkpoint, (default False)
            dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
            optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
               (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
            teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
        Returns:
            model (seq2seq.models): trained model.
        """
        # If training is set to resume
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(self.expt_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A walk around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            self.optimizer.optimizer = resume_optim.__class__(model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(model.parameters()), max_grad_norm=5)
            self.optimizer = optimizer

        self.logger.info("Optimizer: %s, Scheduler: %s" % (self.optimizer.optimizer, self.optimizer.scheduler))

        self._train_epoches(data, model, num_epochs,
                            start_epoch, step, dev_data=dev_data,
                            teacher_forcing_ratio=teacher_forcing_ratio)
        return model
Esempio n. 10
0
    def train(self, args, model, dataloader, scheduler, num_epochs=5,
              resume=False, dev_data=None,
              optimizer=None, teacher_forcing_ratio=1.0, save_dir='runs/exp'):

        # If training is set to resume
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(self.model_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            # model = resume_checkpoint.model
            model.load_state_dict(resume_checkpoint.model)
            self.optimizer = optimizer
            self.args = args
            model.args = args
            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
            print('Resume from ', latest_checkpoint_path)
            print('start_epoch : ', start_epoch)
            print('step : ', step)

            if args.adam:
                self.optimizer = torch.optim.Adam(model.parameters())
                optimizer.load_state_dict(resume_checkpoint.optimizer)
            elif args.sgd:
                self.optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
                optimizer.load_state_dict(resume_checkpoint.optimizer)
            self.scheduler = lr_scheduler.StepLR(self.optimizer, args.decay_steps, gamma=args.decay_factor)
            # for i in range(step):
            #     self.scheduler.step()
            self.scheduler._step_count = step
            for param_group in self.optimizer.param_groups:
                print('learning rate', param_group['lr'], step)
        else:
            start_epoch = 1
            step = 0
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(model.parameters()), max_grad_norm=5)
            self.optimizer = optimizer
            self.scheduler = scheduler

        # self.logger.info("Optimizer: %s, Scheduler: %s" % (self.optimizer.optimizer, self.optimizer.scheduler))
        if args.only_sql:
            self._train_sql_epoches(dataloader, model, num_epochs, start_epoch, step, dev_data=dev_data, teacher_forcing_ratio=teacher_forcing_ratio, clip=args.clip, save_dir=save_dir, args=args)
        else:
            self._train_epoches(dataloader, model, num_epochs, start_epoch, step, dev_data=dev_data, teacher_forcing_ratio=teacher_forcing_ratio, clip=args.clip, save_dir=save_dir, args=args)
        return model
Esempio n. 11
0
    def _train_epoches(self,
                       data,
                       model,
                       n_epochs,
                       batch_size,
                       resume,
                       dev_data=None,
                       teacher_forcing_ratio=0):
        start = time.time()
        print_loss_total = 0  # Reset every print_every
        steps_per_epoch = data.num_batches(batch_size)
        total_steps = steps_per_epoch * n_epochs

        # If training is set to resume
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.expt_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer.set_parameters(model.parameters())
            self.optimizer.load_state_dict(
                resume_checkpoint.optimizer_state_dict)
            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0
            self.optimizer.set_parameters(model.parameters())

        for epoch in range(start_epoch, n_epochs + 1):
            data.shuffle(self.random_seed)

            batch_generator = data.make_batches(batch_size)

            # consuming seen batches from previous training
            for _ in range((epoch - 1) * steps_per_epoch, step):
                next(batch_generator)

            model.train(True)
            for batch in batch_generator:
                step += 1

                input_variables = batch[0]
                target_variables = batch[1]

                loss = self._train_batch(input_variables, target_variables,
                                         model, teacher_forcing_ratio)

                # Record average loss
                print_loss_total += loss

                if step % self.print_every == 0:
                    print_loss_avg = print_loss_total / (self.print_every)
                    print_loss_total = 0
                    log_msg = 'Time elapsed: %s, Progress: %d%%, Train %s: %.4f' % (
                        pretty_interval(start), float(step) / total_steps *
                        100, self.loss.name, print_loss_avg)
                    self.logger.info(log_msg)

                # Checkpoint
                if step % self.checkpoint_every == 0 or step == total_steps:
                    Checkpoint(
                        model=model,
                        optimizer_state_dict=self.optimizer.state_dict(),
                        epoch=epoch,
                        step=step,
                        input_vocab=data.input_vocab,
                        output_vocab=data.output_vocab).save(self.expt_dir)

            log_msg = "Finished epoch {0}".format(epoch)
            if dev_data is not None:
                dev_loss = self.evaluator.evaluate(model, dev_data)
                self.optimizer.update(dev_loss, epoch)
                log_msg += ", Dev %s: %.4f" % (self.loss.name, dev_loss)
                model.train(mode=True)
            self.logger.info(log_msg)
Esempio n. 12
0
def load_model(exp_path):
    cp = Checkpoint.load(Checkpoint.get_latest_checkpoint(exp_path))
    model = cp.model
    return model
Esempio n. 13
0
                      src_vocab=input_vocab,
                      tgt_vocab=output_vocab)

    # Prepare model
    hidden_size = 128
    encoder = EncoderRNN(input_vocab, dataset.src_max_len, hidden_size)
    decoder = DecoderRNN(output_vocab,
                         dataset.tgt_max_len,
                         hidden_size,
                         dropout_p=0.2,
                         use_attention=True)
    seq2seq = Seq2seq(encoder, decoder)

    if opt.resume:
        print("resuming training")
        latest_checkpoint = Checkpoint.get_latest_checkpoint(opt.expt_dir)
        seq2seq.load(latest_checkpoint)
    else:
        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

    # Prepare loss
    weight = torch.ones(output_vocab.get_vocab_size())
    mask = output_vocab.MASK_token_id
    loss = Perplexity(weight, mask)

    if torch.cuda.is_available():
        seq2seq.cuda()
        loss.cuda()

    # train
Esempio n. 14
0
parser = argparse.ArgumentParser()
parser.add_argument('--train_path',
                    action='store',
                    dest='train_path',
                    help='path to train data')
parser.add_argument('--test_path',
                    action='store',
                    dest='test_path',
                    help='path to test data')
parser.add_argument('--checkpoint',
                    action='store',
                    dest='checkpoint',
                    help='path to checkpoint')
opt = parser.parse_args()

latest_check_point = Checkpoint.get_latest_checkpoint(opt.checkpoint)
checkpoint = Checkpoint.load(latest_check_point)
input_vocab = checkpoint.input_vocab
output_vocab = checkpoint.output_vocab

model = checkpoint.model
optimizer = checkpoint.optimizer
weight = torch.ones(len(output_vocab))
pad = output_vocab.stoi['<pad>']
loss = NLLLoss(weight, pad)
batch_size = 1
print(model)

train_file = opt.train_path
test_file = opt.test_path
set_num = get_set_num(train_file)
    print('Initializing dataset')
    train = torchtext.data.TabularDataset(path=opt.train_path,
                                          format='tsv',
                                          fields=[('src', src), ('tgt', tgt),
                                                  ('beh', beh)],
                                          filter_pred=len_filter)
    dev = torchtext.data.TabularDataset(path=opt.dev_path,
                                        format='tsv',
                                        fields=[('src', src), ('tgt', tgt)],
                                        filter_pred=len_filter)

    if not os.path.exists(opt.ckpt_dir):
        os.makedirs(opt.ckpt_dir)

    if opt.resume:
        latest_checkpoint_path = Checkpoint.get_latest_checkpoint(opt.ckpt_dir)
        resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
        src.vocab = resume_checkpoint.input_vocab
        tgt.vocab = resume_checkpoint.output_vocab
    else:
        print('Building vocab')
        #src.build_vocab(train, max_size=50000)
        #tgt.build_vocab(train, max_size=opt.vocab_size, vectors='glove.840B.300d')
        if hidden_size == 300:
            vectors = 'glove.42B.300d'
        elif hidden_size == 100:
            vectors = 'glove.6B.100d'
        else:
            vectors = None

        tgt.build_vocab(train, max_size=vocab_size, vectors=vectors)
Esempio n. 16
0
    def train(self,
              model,
              data,
              round1=2,
              round2=10,
              norm_epochs=3,
              class_epochs=3,
              resume=False,
              dev_data=None,
              test_data=None,
              optimizer=None,
              teacher_forcing_ratio=0,
              lr=0.003):
        """ Run training for a given model.
        Args:
            model (seq2seq.models): model to run training on, if `resume=True`, it would be
               overwritten by the model loaded from the latest checkpoint.
            data (seq2seq.dataset.dataset.Dataset): dataset object to train on
            norm_epochs (int, optional): number of epochs to run (default 5)
            resume(bool, optional): resume training with the latest checkpoint, (default False)
            dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None)
            optimizer (seq2seq.optim.Optimizer, optional): optimizer for training
               (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5))
            teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0)
        Returns:
            model (seq2seq.models): trained model.
        """
        # If training is set to resume
        if resume:
            latest_checkpoint_path = Checkpoint.get_latest_checkpoint(
                self.expt_dir)
            resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            # A walk around to set optimizing parameters properly
            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            defaults.pop('initial_lr', None)
            self.optimizer.optimizer = resume_optim.__class__(
                model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            step = resume_checkpoint.step
        else:
            start_epoch = 1
            step = 0
            # norm_parameters = list(map(id, model.decoder.parameters()))
            # class_parameters = list(map(id, model.classification.parameters()))
            # base_params = filter(lambda p: id(p) not in class_parameters, model.parameters())
            # self.logger.info(norm_parameters)
            # self.logger.info(class_parameters)
            # self.logger.info(base_params)
            # ignored_params = list(map(id, model.encoder.elmo._scalar_mixes[0].parameters()))
            # base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
            if optimizer is None:
                # optimizer = Optimizer(optim.Adam([{'params': base_params},
                #                                   {'params': model.encoder.elmo._scalar_mixes[0].parameters(), 'lr':1e-2}],
                #                                  lr=lr, weight_decay=1e-4), max_grad_norm=5)
                optimizer = Optimizer(optim.Adam(model.parameters(),
                                                 lr=lr,
                                                 weight_decay=1e-4),
                                      max_grad_norm=5)
            self.optimizer = optimizer

        self.logger.info("Optimizer: %s, Scheduler: %s" %
                         (self.optimizer.optimizer, self.optimizer.scheduler))

        self.logger.info(
            "{} rounds of per training data, {} epochs of per round, Starting....."
            .format(round1, norm_epochs))
        for i in range(round1):
            self.logger.info("Round: {}".format(i))
            self._pre_train_epochs(data[0],
                                   model,
                                   norm_epochs,
                                   1,
                                   0,
                                   dev_data=dev_data[0],
                                   teacher_forcing_ratio=teacher_forcing_ratio)

            self._train_epoches(data[1],
                                model,
                                class_epochs,
                                1,
                                0,
                                dev_data=dev_data[1],
                                test_data=test_data,
                                teacher_forcing_ratio=teacher_forcing_ratio)
        self.logger.info(
            "{} rounds of per training data, 1 epoch of per round, Starting......"
            .format(round2))
        for i in range(round2):
            self.logger.info("Round : {}".format(i))
            self._pre_train_epochs(data[0],
                                   model,
                                   1,
                                   1,
                                   0,
                                   dev_data=dev_data[0],
                                   teacher_forcing_ratio=teacher_forcing_ratio)

            self._train_epoches(data[1],
                                model,
                                1,
                                1,
                                0,
                                dev_data=dev_data[1],
                                test_data=test_data,
                                teacher_forcing_ratio=teacher_forcing_ratio)

        self.logger.info('best_fb_f1: {}, best_tw_f1: {}'.format(
            self.best_fb_f1, self.best_tw_f1))

        return model