Ejemplo n.º 1
0
def train_model(
    train_source,
    train_target,
    dev_source,
    dev_target,
    experiment_directory,
    resume=False,
):
    # Prepare dataset
    train = Seq2SeqDataset.from_file(train_source, train_target)
    train.build_vocab(300, 6000)
    dev = Seq2SeqDataset.from_file(
        dev_source,
        dev_target,
        share_fields_from=train,
    )
    input_vocab = train.src_field.vocab
    output_vocab = train.tgt_field.vocab

    # Prepare loss
    weight = torch.ones(len(output_vocab))
    pad = output_vocab.stoi[train.tgt_field.pad_token]
    loss = Perplexity(weight, pad)
    if torch.cuda.is_available():
        loss.cuda()

    seq2seq = None
    optimizer = None
    if not resume:
        seq2seq, optimizer, scheduler = initialize_model(
            train, input_vocab, output_vocab)

    # Train
    trainer = SupervisedTrainer(
        loss=loss,
        batch_size=32,
        checkpoint_every=50,
        print_every=10,
        experiment_directory=experiment_directory,
    )
    start = time.clock()
    try:
        seq2seq = trainer.train(
            seq2seq,
            train,
            n_epochs=10,
            dev_data=dev,
            optimizer=optimizer,
            teacher_forcing_ratio=0.5,
            resume=resume,
        )
    # Capture ^C
    except KeyboardInterrupt:
        pass
    end = time.clock() - start
    logging.info('Training time: %.2fs', end)

    return seq2seq, input_vocab, output_vocab
Ejemplo n.º 2
0
    def test_resume_from_multiple_of_epoches(self, mock_evaluator, mock_checkpoint, mock_func):
        mock_model = mock.Mock()
        mock_optim = mock.Mock()

        trainer = SupervisedTrainer(batch_size=16)
        trainer.optimizer = mock_optim
        n_epoches = 1
        start_epoch = 1
        step = 7
        trainer._train_epoches(self.dataset, mock_model, n_epoches, start_epoch, step, dev_data=self.dataset)
Ejemplo n.º 3
0
    def test_batch_num_when_resuming(self, mock_evaluator, mock_checkpoint, mock_func):
        mock_model = mock.Mock()
        mock_optim = mock.Mock()

        trainer = SupervisedTrainer(batch_size=16)
        trainer.optimizer = mock_optim
        n_epoches = 1
        start_epoch = 1
        steps_per_epoch = 7
        step = 3
        trainer._train_epoches(self.dataset, mock_model, n_epoches, start_epoch, step)
        self.assertEqual(steps_per_epoch - step, mock_func.call_count)
Ejemplo n.º 4
0
 def train(self, epoch=20, resume=False):
     t = SupervisedTrainer(loss=self.loss,
                           batch_size=96,
                           checkpoint_every=1000,
                           print_every=1000,
                           expt_dir=self.model_save_path)
     self.seq2seq = t.train(self.seq2seq,
                            self.trainset,
                            num_epochs=epoch,
                            dev_data=self.devset,
                            optimizer=self.optimizer,
                            teacher_forcing_ratio=0.5,
                            resume=resume)
Ejemplo n.º 5
0
def _evaluate(checkpoint_path,
              test_paths,
              metric_names=[
                  "word accuracy", "sequence accuracy", "final target accuracy"
              ],
              loss_names=["nll"],
              max_len=50,
              batch_size=32,
              is_predict_eos=True,
              content_method=None):
    """Evaluates the models saved in a checkpoint."""
    results = []

    print("loading checkpoint from {}".format(os.path.join(checkpoint_path)))
    checkpoint = Checkpoint.load(checkpoint_path)
    seq2seq = checkpoint.model

    tabular_data_fields = get_tabular_data_fields(
        content_method=content_method, is_predict_eos=is_predict_eos)

    dic_data_fields = dict(tabular_data_fields)
    src = dic_data_fields["src"]
    tgt = dic_data_fields["tgt"]

    src.vocab = checkpoint.input_vocab
    tgt.vocab = checkpoint.output_vocab
    tgt.eos_id = tgt.vocab.stoi[tgt.SYM_EOS]
    tgt.sos_id = tgt.vocab.stoi[tgt.SYM_SOS]

    for test_path in test_paths:
        test = get_data(test_path, max_len, tabular_data_fields)

        metrics = get_metrics(metric_names, src, tgt, is_predict_eos)
        losses, loss_weights = get_losses(loss_names, tgt, is_predict_eos)

        evaluator = Evaluator(loss=losses,
                              batch_size=batch_size,
                              metrics=metrics)
        data_func = SupervisedTrainer.get_batch_data
        losses, metrics = evaluator.evaluate(model=seq2seq,
                                             data=test,
                                             get_batch_data=data_func)

        total_loss, log_msg, _ = SupervisedTrainer.get_losses(
            losses, metrics, 0)

        dataset = test_path.split('/')[-1].split('.')[0]
        results.append([dataset, total_loss] +
                       [metric.get_val() for metric in metrics])

    results_df = pd.DataFrame(results,
                              columns=["Dataset", "Loss"] +
                              [metric.name for metric in metrics])

    results_df = results_df.melt(id_vars=['Dataset'],
                                 var_name="Metric",
                                 value_name='Value')

    return results_df
Ejemplo n.º 6
0
def pretrain_generator(model, train, dev):
    # pre-train generator
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    loss = Perplexity(weight, pad)
    if torch.cuda.is_available():
        loss.cuda()

    optimizer = Optimizer(torch.optim.Adam(gen.parameters()), max_grad_norm=5)
    scheduler = StepLR(optimizer.optimizer, 1)
    optimizer.set_scheduler(scheduler)

    supervised = SupervisedTrainer(loss=loss,
                                   batch_size=32,
                                   random_seed=random_seed,
                                   expt_dir=expt_gen_dir)
    supervised.train(model,
                     train,
                     num_epochs=20,
                     dev_data=dev,
                     optimizer=optimizer,
                     teacher_forcing_ratio=0,
                     resume=resume)
Ejemplo n.º 7
0
def build_model(src, tgt, hidden_size, mini_batch_size, bidirectional, dropout,
                attention, init_value):
    EXPERIMENT.param("Hidden", hidden_size)
    EXPERIMENT.param("Bidirectional", bidirectional)
    EXPERIMENT.param("Dropout", dropout)
    EXPERIMENT.param("Attention", attention)
    EXPERIMENT.param("Mini-batch", mini_batch_size)
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    loss = Perplexity(weight, pad)
    encoder = EncoderRNN(len(src.vocab),
                         MAX_LEN,
                         hidden_size,
                         rnn_cell="lstm",
                         bidirectional=bidirectional,
                         dropout_p=dropout,
                         variable_lengths=False)
    decoder = DecoderRNN(
        len(tgt.vocab),
        MAX_LEN,
        hidden_size,  # * 2 if bidirectional else hidden_size,
        rnn_cell="lstm",
        use_attention=attention,
        eos_id=tgt.eos_id,
        sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)
    using_cuda = False
    if torch.cuda.is_available():
        using_cuda = True
        encoder.cuda()
        decoder.cuda()
        seq2seq.cuda()
        loss.cuda()
    EXPERIMENT.param("CUDA", using_cuda)
    for param in seq2seq.parameters():
        param.data.uniform_(-init_value, init_value)

    trainer = SupervisedTrainer(loss=loss,
                                batch_size=mini_batch_size,
                                checkpoint_every=5000,
                                random_seed=42,
                                print_every=1000)
    return seq2seq, trainer
Ejemplo n.º 8
0
    def test_loading_optimizer(self, train_func, sgd, optimizer, load_function, checkpoint):

        load_function.returnvalue = checkpoint
        mock_model = mock.Mock()
        mock_model.params.returnvalue = True
        n_epoches = 2

        trainer = SupervisedTrainer(batch_size=16)

        trainer.train(mock_model, self.dataset, n_epoches, resume=True, checkpoint_path='dummy', optimizer='sgd')

        self.assertFalse(sgd.called, "Failed to not call Optimizer() when optimizer should be loaded from checkpoint")

        trainer.train(mock_model, self.dataset, n_epoches, resume=False, checkpoint_path='dummy', optimizer='sgd')

        sgd.assert_called()
        
        return
Ejemplo n.º 9
0
elif args.self_play_eval:
    print('Self play eval ... ')
    t = SupervisedSelfPlayEval(model_dir=args.model_dir,
                               args=args,
                               corpus=corpus)
    seq2seq = t.test(args,
                     seq2seq,
                     dataloader,
                     resume=args.resume,
                     save_dir=args.save_dir)
else:
    print('SupervisedTrainer ... ')
    # train
    t = SupervisedTrainer(batch_size=args.batch_size,
                          checkpoint_every=args.checkpoint_every,
                          print_every=args.print_every,
                          expt_dir=args.expt_dir,
                          model_dir=args.model_dir,
                          args=args)
    if args.eval:
        seq2seq = t.test(args,
                         seq2seq,
                         dataloader,
                         scheduler,
                         num_epochs=5,
                         dev_data=None,
                         optimizer=optimizer,
                         teacher_forcing_ratio=1.0,
                         resume=args.resume,
                         save_dir=args.save_dir)
    else:
        seq2seq = t.train(args,
Ejemplo n.º 10
0
def eval_D():
    pool = helper.DiscriminatorDataPool(opt.max_len, D.min_len, Constants.PAD)
    val_iter = data.BucketIterator(
        dataset=val, batch_size=opt.batch_size, device=opt.device,
        sort_key=lambda x: len(x.src), repeat=False)
    pool.fill(val_iter)
    trainer_D.evaluate(D, val_iter=pool.batch_gen(), crit=crit_D)

    # eval_D()


# pre-train G
if not hasattr(opt, 'load_G_from'):
    print('Pre-train G')
    trainer_G = SupervisedTrainer()
    trainer_G.optimizer = optim_G

    G.train()

    for epoch in range(5):
        train_iter = data.BucketIterator(
            dataset=train, batch_size=64, device=opt.device,
            sort_within_batch=True, sort_key=lambda x: len(x.src), repeat=False)
        for step, batch in enumerate(train_iter):
            src_seq = batch.src[0]
            src_length = batch.src[1]
            tgt_seq = src_seq.clone()

            # print(src_seq)
Ejemplo n.º 11
0
        if opt.decay_factor:
            optimizer.set_scheduler(
                torch.optim.lr_scheduler.ReduceLROnPlateau(
                    optimizer.optimizer,
                    'min',
                    factor=opt.decay_factor,
                    patience=1))

        # Prepare trainer and train
        t = SupervisedTrainer(loss=loss,
                              model_dir=opt.model_dir,
                              best_model_dir=opt.best_model_dir,
                              batch_size=opt.batch_size,
                              checkpoint_every=opt.checkpoint_every,
                              print_every=opt.print_every,
                              max_epochs=opt.max_epochs,
                              max_steps=opt.max_steps,
                              max_checkpoints_num=opt.max_checkpoints_num,
                              best_ppl=opt.best_ppl,
                              device=device,
                              multi_gpu=multi_gpu,
                              logger=logger)

        seq2seq = t.train(seq2seq,
                          data=train,
                          start_step=opt.skip_steps,
                          dev_data=dev,
                          optimizer=optimizer,
                          teacher_forcing_ratio=opt.teacher_forcing_ratio)

    elif opt.phase == "infer":
Ejemplo n.º 12
0
        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

    # Prepare loss
    weight = torch.ones(output_vocab.get_vocab_size())
    mask = output_vocab.MASK_token_id
    loss = Perplexity(weight, mask)

    if torch.cuda.is_available():
        seq2seq.cuda()
        loss.cuda()

    # train
    t = SupervisedTrainer(loss=loss,
                          batch_size=32,
                          checkpoint_every=50,
                          print_every=10,
                          expt_dir=opt.expt_dir)
    t.train(seq2seq,
            dataset,
            num_epochs=4,
            dev_data=dev_set,
            resume=opt.resume)

predictor = Predictor(seq2seq, input_vocab, output_vocab)

while True:
    seq_str = raw_input("Type in a source sequence:")
    seq = seq_str.split()
    print(predictor.predict(seq))
        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        #
        # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters(), lr=1e-3), max_grad_norm=5)
        # scheduler = StepLR(optimizer.optimizer, 1)
        # optimizer.set_scheduler(scheduler)

    # train
    t = SupervisedTrainer(
        loss=loss,
        batch_size=64,
        checkpoint_every=50,
        print_every=10,
        expt_dir=EXPERIMENT_PATH,
    )

    seq2seq = t.train(
        seq2seq,
        train,
        num_epochs=6,
        optimizer=optimizer,
        teacher_forcing_ratio=0.6,
        teacher_forcing_half_life=5000,
        resume=opt.resume,
    )

predictor = Predictor(seq2seq, input_vocab, output_vocab)
Ejemplo n.º 14
0
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        #
        # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        # scheduler = StepLR(optimizer.optimizer, 1)
        # optimizer.set_scheduler(scheduler)

    # train
    t = SupervisedTrainer(loss=loss,
                          batch_size=32,
                          checkpoint_every=50,
                          print_every=1,
                          expt_dir=opt.expt_dir)

    if opt.concept:
        VOCAB = namedtuple('vocabs', ("src_vocab", "tgt_vocab", "cpt_vocab"))
        vocabs = VOCAB(src.vocab, tgt.vocab, cpt.vocab)
    else:
        vocabs = []

    seq2seq = t.train(seq2seq,
                      train,
                      num_epochs=30,
                      dev_data=dev,
                      optimizer=optimizer,
                      teacher_forcing_ratio=0.5,
Ejemplo n.º 15
0
def train():
    src = SourceField(sequential=True,
                      tokenize=lambda x: [i for i in jieba.lcut(x)])
    tgt = TargetField(sequential=True,
                      tokenize=lambda x: [i for i in jieba.lcut(x)])
    max_len = 50

    def len_filter(example):
        return len(example.src) <= max_len and len(example.tgt) <= max_len

    train = torchtext.data.TabularDataset(path=opt.train_path,
                                          format='csv',
                                          fields=[('src', src), ('tgt', tgt)],
                                          filter_pred=len_filter)
    dev = torchtext.data.TabularDataset(path=opt.dev_path,
                                        format='csv',
                                        fields=[('src', src), ('tgt', tgt)],
                                        filter_pred=len_filter)

    src.build_vocab(train, max_size=50000)
    tgt.build_vocab(train, max_size=50000)
    input_vocab = src.vocab
    output_vocab = tgt.vocab

    # NOTE: If the source field name and the target field name
    # are different from 'src' and 'tgt' respectively, they have
    # to be set explicitly before any training or inference
    # seq2seq.src_field_name = 'src'
    # seq2seq.tgt_field_name = 'tgt'

    # Prepare loss
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    loss = Perplexity(weight, pad)
    if torch.cuda.is_available():
        loss.cuda()

    seq2seq = None
    optimizer = None
    if not opt.resume:
        # Initialize model
        hidden_size = 128
        bidirectional = True
        encoder = EncoderRNN(len(src.vocab),
                             max_len,
                             hidden_size,
                             bidirectional=bidirectional,
                             variable_lengths=True)
        decoder = DecoderRNN(len(tgt.vocab),
                             max_len,
                             hidden_size * 2 if bidirectional else hidden_size,
                             dropout_p=0.2,
                             use_attention=True,
                             bidirectional=bidirectional,
                             eos_id=tgt.eos_id,
                             sos_id=tgt.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        #
        # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        # scheduler = StepLR(optimizer.optimizer, 1)
        # optimizer.set_scheduler(scheduler)

    # train
    t = SupervisedTrainer(loss=loss,
                          batch_size=32,
                          checkpoint_every=50,
                          print_every=10,
                          expt_dir=opt.expt_dir)

    seq2seq = t.train(seq2seq,
                      train,
                      num_epochs=6,
                      dev_data=dev,
                      optimizer=optimizer,
                      teacher_forcing_ratio=0.5,
                      resume=opt.resume)
    predictor = Predictor(seq2seq, input_vocab, output_vocab)
Ejemplo n.º 16
0
#     G.get_trainable_parameters(),
#     betas=(0.9, 0.98), eps=1e-09),
#     opt.d_model, opt.n_warmup_steps)

optim_G_a = optim.Adam(filter(lambda p: p.requires_grad, G_a.parameters()), lr=1e-4,
                       betas=(0.9, 0.98), eps=1e-09)
optim_G_b = optim.Adam(filter(lambda p: p.requires_grad, G_b.parameters()), lr=1e-4,
                       betas=(0.9, 0.98), eps=1e-09)
crit_G = NLLLoss(size_average=False)

if opt.cuda:
    G_a.cuda()
    G_b.cuda()
    crit_G.cuda()

trainer_G_a = SupervisedTrainer()
trainer_G_b = SupervisedTrainer()
trainer_G_a.optimizer = optim_G_a
trainer_G_b.optimizer = optim_G_b


# ---------- train ----------

def pretrain(model, trainer, dataset):
    model.train()
    for epoch in range(20):
        print('\n[Epoch %d]' % epoch)
        train_iter = data.BucketIterator(
            dataset=dataset, batch_size=64, device=opt.device,
            sort_within_batch=True, sort_key=lambda x: len(x.src), repeat=False)
        for step, batch in enumerate(train_iter):
Ejemplo n.º 17
0
        param.data.uniform_(-0.08, 0.08)

##############################################################################
# train model

# Prepare loss
weight = torch.ones(len(output_vocab))
pad = output_vocab.stoi[tgt.pad_token]
loss = Perplexity(weight, pad)
if torch.cuda.is_available():
    loss.cuda()

# create trainer
t = SupervisedTrainer(loss=loss,
                      batch_size=opt.batch_size,
                      checkpoint_every=opt.save_every,
                      print_every=opt.print_every,
                      expt_dir=opt.output_dir)

checkpoint_path = os.path.join(opt.output_dir,
                               opt.load_checkpoint) if opt.resume else None

seq2seq = t.train(seq2seq,
                  train,
                  num_epochs=opt.epochs,
                  dev_data=dev,
                  optimizer=opt.optim,
                  teacher_forcing_ratio=opt.teacher_forcing_ratio,
                  learning_rate=opt.lr,
                  resume=opt.resume,
                  checkpoint_path=checkpoint_path)
Ejemplo n.º 18
0
        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        #
        optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()),
                              max_grad_norm=5)
        scheduler = StepLR(optimizer.optimizer, 1)
        optimizer.set_scheduler(scheduler)

    # train
    t = SupervisedTrainer(loss=loss,
                          batch_size=config['batch size'],
                          checkpoint_every=config['checkpoint_every'],
                          print_every=config['print every'],
                          expt_dir=config['expt_dir'])
    # expt_dir=opt.expt_dir)

    # TODO add dev eval here for early stopping
    if config['train model']:
        seq2seq = t.train(input_vocab,
                          feats_vocab,
                          seq2seq,
                          train,
                          num_epochs=config['epochs'],
                          vectors=vectors,
                          dev_data=dev,
                          optimizer=optimizer,
                          teacher_forcing_ratio=0.5,
Ejemplo n.º 19
0
def run_training(opt, default_data_dir, num_epochs=100):
    if opt.load_checkpoint is not None:
        logging.info("loading checkpoint from {}".format(
            os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint)))
        checkpoint_path = os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint)
        checkpoint = Checkpoint.load(checkpoint_path)
        seq2seq = checkpoint.model
        input_vocab = checkpoint.input_vocab
        output_vocab = checkpoint.output_vocab
    else:

        # Prepare dataset
        src = SourceField()
        tgt = TargetField()
        max_len = 50

        data_file = os.path.join(default_data_dir, opt.train_path, 'data.txt')

        logging.info("Starting new Training session on %s", data_file)

        def len_filter(example):
            return (len(example.src) <= max_len) and (len(example.tgt) <= max_len) \
                   and (len(example.src) > 0) and (len(example.tgt) > 0)

        train = torchtext.data.TabularDataset(
            path=data_file, format='json',
            fields={'src': ('src', src), 'tgt': ('tgt', tgt)},
            filter_pred=len_filter
        )

        dev = None
        if opt.no_dev is False:
            dev_data_file = os.path.join(default_data_dir, opt.train_path, 'dev-data.txt')
            dev = torchtext.data.TabularDataset(
                path=dev_data_file, format='json',
                fields={'src': ('src', src), 'tgt': ('tgt', tgt)},
                filter_pred=len_filter
            )

        src.build_vocab(train, max_size=50000)
        tgt.build_vocab(train, max_size=50000)
        input_vocab = src.vocab
        output_vocab = tgt.vocab

        # NOTE: If the source field name and the target field name
        # are different from 'src' and 'tgt' respectively, they have
        # to be set explicitly before any training or inference
        # seq2seq.src_field_name = 'src'
        # seq2seq.tgt_field_name = 'tgt'

        # Prepare loss
        weight = torch.ones(len(tgt.vocab))
        pad = tgt.vocab.stoi[tgt.pad_token]
        loss = Perplexity(weight, pad)
        if torch.cuda.is_available():
            logging.info("Yayyy We got CUDA!!!")
            loss.cuda()
        else:
            logging.info("No cuda available device found running on cpu")

        seq2seq = None
        optimizer = None
        if not opt.resume:
            hidden_size = 128
            decoder_hidden_size = hidden_size * 2
            logging.info("EncoderRNN Hidden Size: %s", hidden_size)
            logging.info("DecoderRNN Hidden Size: %s", decoder_hidden_size)
            bidirectional = True
            encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                                 bidirectional=bidirectional,
                                 rnn_cell='lstm',
                                 variable_lengths=True)
            decoder = DecoderRNN(len(tgt.vocab), max_len, decoder_hidden_size,
                                 dropout_p=0, use_attention=True,
                                 bidirectional=bidirectional,
                                 rnn_cell='lstm',
                                 eos_id=tgt.eos_id, sos_id=tgt.sos_id)

            seq2seq = Seq2seq(encoder, decoder)
            if torch.cuda.is_available():
                seq2seq.cuda()

            for param in seq2seq.parameters():
                param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.

        optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        scheduler = StepLR(optimizer.optimizer, 1)
        optimizer.set_scheduler(scheduler)

        # train

        num_epochs = num_epochs
        batch_size = 32
        checkpoint_every = num_epochs / 10
        print_every = num_epochs / 100

        properties = dict(batch_size=batch_size,
                          checkpoint_every=checkpoint_every,
                          print_every=print_every, expt_dir=opt.expt_dir,
                          num_epochs=num_epochs,
                          teacher_forcing_ratio=0.5,
                          resume=opt.resume)

        logging.info("Starting training with the following Properties %s", json.dumps(properties, indent=2))
        t = SupervisedTrainer(loss=loss, batch_size=num_epochs,
                              checkpoint_every=checkpoint_every,
                              print_every=print_every, expt_dir=opt.expt_dir)

        seq2seq = t.train(seq2seq, train,
                          num_epochs=num_epochs, dev_data=dev,
                          optimizer=optimizer,
                          teacher_forcing_ratio=0.5,
                          resume=opt.resume)

        evaluator = Evaluator(loss=loss, batch_size=batch_size)

        if opt.no_dev is False:
            dev_loss, accuracy = evaluator.evaluate(seq2seq, dev)
            logging.info("Dev Loss: %s", dev_loss)
            logging.info("Accuracy: %s", dev_loss)

    beam_search = Seq2seq(seq2seq.encoder, TopKDecoder(seq2seq.decoder, 4))

    predictor = Predictor(beam_search, input_vocab, output_vocab)
    while True:
        try:
            seq_str = raw_input("Type in a source sequence:")
            seq = seq_str.strip().split()
            results = predictor.predict_n(seq, n=3)
            for i, res in enumerate(results):
                print('option %s: %s\n', i + 1, res)
        except KeyboardInterrupt:
            logging.info("Bye Bye")
            exit(0)
Ejemplo n.º 20
0
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        #
        # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        # scheduler = StepLR(optimizer.optimizer, 1)
        # optimizer.set_scheduler(scheduler)

    # train
    t = SupervisedTrainer(loss=loss,
                          batch_size=32,
                          checkpoint_every=500,
                          print_every=10,
                          expt_dir=opt.expt_dir)

    seq2seq = t.train(seq2seq,
                      train,
                      num_epochs=1,
                      dev_data=dev,
                      optimizer=optimizer,
                      resume=opt.resume)

predictor = Predictor(seq2seq, input_vocab, output_vocab)

while True:
    seq_str = raw_input("Type in a source sequence:")
    seq = seq_str.strip().split()
Ejemplo n.º 21
0
    param.data.uniform_(-0.1, 0.1)

optimizer = Optimizer(torch.optim.Adam(seq2seq_model.parameters()),
                      max_grad_norm=5)

# In[20]:

seq2seq_model = torch.nn.DataParallel(seq2seq_model)

# In[21]:

# train

t = SupervisedTrainer(loss=loss,
                      batch_size=8,
                      checkpoint_every=200,
                      print_every=10000,
                      expt_dir='./lstm_model/' + data_tuple[0] + '/Deepregex')

# In[22]:

seq2seq_model = t.train(seq2seq_model,
                        train,
                        num_epochs=data_tuple[1],
                        dev_data=dev,
                        optimizer=optimizer,
                        teacher_forcing_ratio=0.5,
                        resume=False)

# ### Self Critical Training
Ejemplo n.º 22
0
def create_trainer(opt, losses, loss_weights, metrics):
    return SupervisedTrainer(loss=losses, metrics=metrics, loss_weights=loss_weights, batch_size=opt.batch_size,
                             eval_batch_size=opt.eval_batch_size, checkpoint_every=opt.save_every,
                             print_every=opt.print_every, expt_dir=opt.output_dir)
Ejemplo n.º 23
0
def offline_training(opt, traget_file_path):

    # Prepare dataset with torchtext
    src = SourceField(tokenize=treebank_tokenizer)
    tgt = TargetField(tokenize=treebank_tokenizer)

    def sample_filter(sample):
        """ sample example for future purpose"""
        return True

    train = torchtext.data.TabularDataset(path=opt.train_path,
                                          format='tsv',
                                          fields=[('src', src), ('tgt', tgt)],
                                          filter_pred=sample_filter)
    dev = torchtext.data.TabularDataset(path=opt.dev_path,
                                        format='tsv',
                                        fields=[('src', src), ('tgt', tgt)],
                                        filter_pred=sample_filter)
    test = torchtext.data.TabularDataset(path=opt.dev_path,
                                         format='tsv',
                                         fields=[('src', src), ('tgt', tgt)],
                                         filter_pred=sample_filter)
    src.build_vocab(train, max_size=opt.src_vocab_size)
    tgt.build_vocab(train, max_size=opt.tgt_vocab_size)
    input_vocab = src.vocab
    output_vocab = tgt.vocab

    # NOTE: If the source field name and the target field name
    # are different from 'src' and 'tgt' respectively, they have
    # to be set explicitly before any training or inference
    # seq2seq.src_field_name = 'src'
    # seq2seq.tgt_field_name = 'tgt'

    # Prepare loss
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    if opt.loss == 'perplexity':
        loss = Perplexity(weight, pad)
    else:
        raise TypeError

    seq2seq = None
    optimizer = None
    if not opt.resume:
        # Initialize model
        encoder = EncoderRNN(vocab_size=len(src.vocab),
                             max_len=opt.max_length,
                             hidden_size=opt.hidden_size,
                             input_dropout_p=opt.intput_dropout_p,
                             dropout_p=opt.dropout_p,
                             n_layers=opt.n_layers,
                             bidirectional=opt.bidirectional,
                             rnn_cell=opt.rnn_cell,
                             variable_lengths=True,
                             embedding=input_vocab.vectors
                             if opt.use_pre_trained_embedding else None,
                             update_embedding=opt.update_embedding)
        decoder = DecoderRNN(vocab_size=len(tgt.vocab),
                             max_len=opt.max_length,
                             hidden_size=opt.hidden_size *
                             2 if opt.bidirectional else opt.hidden_size,
                             sos_id=tgt.sos_id,
                             eos_id=tgt.eos_id,
                             n_layers=opt.n_layers,
                             rnn_cell=opt.rnn_cell,
                             bidirectional=opt.bidirectional,
                             input_dropout_p=opt.input_dropout_p,
                             dropout_p=opt.dropout_p,
                             use_attention=opt.use_attention)
        seq2seq = Seq2seq(encoder=encoder, decoder=decoder)
        if opt.gpu >= 0 and torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
    # train
    trainer = SupervisedTrainer(loss=loss,
                                batch_size=opt.batch_size,
                                checkpoint_every=opt.checkpoint_every,
                                print_every=opt.print_every,
                                expt_dir=opt.expt_dir)
    seq2seq = trainer.train(model=seq2seq,
                            data=train,
                            num_epochs=opt.epochs,
                            resume=opt.resume,
                            dev_data=dev,
                            optimizer=optimizer,
                            teacher_forcing_ratio=opt.teacher_forcing_rate)
Ejemplo n.º 24
0
    for param in seq2seq.parameters():
        param.data.uniform_(-0.08, 0.08)

    # Optimizer and learning rate scheduler can be customized by
    # explicitly constructing the objects and pass to the trainer.
    #
    optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()),
                          max_grad_norm=5)
    scheduler = StepLR(optimizer.optimizer, 1)
    optimizer.set_scheduler(scheduler)

logging.info(seq2seq)

# train
t = SupervisedTrainer(loss=loss,
                      batch_size=params['batch_size'],
                      checkpoint_every=50,
                      print_every=100,
                      expt_dir=opt.expt_dir,
                      tensorboard=True)

seq2seq = t.train(seq2seq,
                  train,
                  num_epochs=params['num_epochs'],
                  dev_data=dev,
                  optimizer=optimizer,
                  teacher_forcing_ratio=0.5,
                  resume=opt.resume,
                  load_checkpoint=opt.load_checkpoint)
Ejemplo n.º 25
0
                             variable_lengths=True)

        decoder = DecoderRNN(len(output_vocab), max_len, hidden_size * 2,
                             dropout_p=0.2, use_attention=True,
                             bidirectional=bidirectional,rnn_cell='lstm',
                             eos_id=train.tgt_field.eos_id, sos_id=train.tgt_field.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        if torch.cuda.is_available():
            seq2seq = seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

    # train
    t = SupervisedTrainer(loss=loss, batch_size=32,
                          checkpoint_every=50,
                          print_every=10, experiment_directory=opt.expt_dir)
    start = time.clock()
    seq2seq = t.train(seq2seq, train,
                      n_epochs=6, dev_data=dev,
                      optimizer=optimizer,
                      teacher_forcing_ratio=0.5,
                      resume=opt.resume)
    end = time.clock() - start
    print('Training time: {:.2f}s'.format(end))

evaluator = Evaluator(loss=loss, batch_size=32)
dev_loss, accuracy = evaluator.evaluate(seq2seq, dev)
assert dev_loss < 1.5

beam_search = Seq2seq(seq2seq.encoder, TopKDecoder(seq2seq.decoder, 3))
Ejemplo n.º 26
0
        seq2seq = Seq2seq(encoder, decoder)
        
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.1, 0.1)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        
        optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters(), lr = 0.001), max_grad_norm=5)
        #scheduler = StepLR(optimizer.optimizer, 1)
        scheduler = ReduceLROnPlateau(optimizer.optimizer, 'min', factor = 0.1, verbose=True, patience=9)
        optimizer.set_scheduler(scheduler)
    expt_dir = opt.expt_dir + '_hidden_{}'.format(hidden_size)
    # train
    t = SupervisedTrainer(loss=loss, batch_size=64,
                          checkpoint_every=1800,
                          print_every=100, expt_dir=expt_dir, input_vocab=input_vocab, output_vocab=output_vocab)
    
    start_time = time.time()
    seq2seq = t.train(seq2seq, train,
                      num_epochs=100, dev_data=dev,
                      optimizer=optimizer,
                      teacher_forcing_ratio=0.5,
                      resume=opt.resume)
    end_time = time.time()
    print('total time > ', end_time-start_time)
    
predictor = Predictor(seq2seq, input_vocab, output_vocab)
                             use_attention=True,
                             bidirectional=bidirectional,
                             eos_id=tgt.eos_id,
                             sos_id=tgt.sos_id,
                             rnn_cell="lstm")
        seq2seq = Seq2seq(encoder, decoder)
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

    # train
    t = SupervisedTrainer(loss=loss,
                          batch_size=10000,
                          checkpoint_every=50,
                          print_every=10,
                          expt_dir=opt.expt_dir)

    seq2seq = t.train(seq2seq,
                      train,
                      num_epochs=10,
                      dev_data=validation,
                      optimizer=optimizer,
                      teacher_forcing_ratio=0.5,
                      resume=opt.resume)

predictor = Predictor(seq2seq, input_vocab, output_vocab)

while True:
    sentence = raw_input("Type in a source sequence:")
Ejemplo n.º 28
0
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        #
        # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        # scheduler = StepLR(optimizer.optimizer, 1)
        # optimizer.set_scheduler(scheduler)

    # train
    t = SupervisedTrainer(loss=loss,
                          batch_size=128,
                          checkpoint_every_epoch=5,
                          print_every_batch=50,
                          expt_dir=opt.expt_dir)

    seq2seq = t.train(seq2seq,
                      train,
                      num_epochs=int(opt.epochs),
                      dev_data=dev,
                      optimizer=optimizer,
                      teacher_forcing_ratio=0.5,
                      resume=opt.resume)

predictor = Predictor(seq2seq, input_vocab, output_vocab)

while True:
    seq_str = raw_input("Type in a source sequence:")
Ejemplo n.º 29
0
            optimizer=optimizer.optimizer,
            mode='min',
            factor=0.5,
            patience=5,
            verbose=True,
            threshold=0.0001,
            threshold_mode='rel',
            cooldown=0,
            min_lr=0,
            eps=1e-08)
        optimizer.set_scheduler(scheduler)

    # train
    t = SupervisedTrainer(loss=loss,
                          batch_size=opt.batch_size,
                          random_seed=opt.random_seed,
                          checkpoint_every=1000000,
                          print_every=50,
                          expt_dir=opt.expt_dir)
    seq2seq = t.train(seq2seq,
                      train,
                      num_epochs=100,
                      dev_data=dev,
                      test_data=test,
                      optimizer=optimizer,
                      teacher_forcing_ratio=0.5,
                      resume=opt.resume)
    # teacher_forcing_ratio=0.5,

# predictor = Predictor(seq2seq, input_vocab, output_vocab)

# while True:
Ejemplo n.º 30
0
                             use_attention=False,
                             bidirectional=bidirectional)

        for param in decoder.parameters():
            param.data.uniform_(-0.08, 0.08)

        if torch.cuda.is_available():
            decoder.cuda()

    # train
    t = SupervisedTrainer(lloss=lloss,
                          bloss=bloss,
                          batch_size=opt.batch_size,
                          checkpoint_every=100,
                          print_every=50,
                          expt_dir=opt.expt_dir,
                          train_cap_lang=train_cap_lang,
                          train_label_lang=train_label_lang,
                          x_mean_std=x_mean_std,
                          y_mean_std=y_mean_std,
                          w_mean_std=w_mean_std,
                          r_mean_std=r_mean_std)

    seq2seq = t.train(encoder,
                      decoder,
                      train_tuples,
                      num_epochs=10,
                      dev_data=dev_tuples,
                      optimizer=optimizer,
                      resume=opt.resume,
                      is_training=opt.is_training)