def train_model( train_source, train_target, dev_source, dev_target, experiment_directory, resume=False, ): # Prepare dataset train = Seq2SeqDataset.from_file(train_source, train_target) train.build_vocab(300, 6000) dev = Seq2SeqDataset.from_file( dev_source, dev_target, share_fields_from=train, ) input_vocab = train.src_field.vocab output_vocab = train.tgt_field.vocab # Prepare loss weight = torch.ones(len(output_vocab)) pad = output_vocab.stoi[train.tgt_field.pad_token] loss = Perplexity(weight, pad) if torch.cuda.is_available(): loss.cuda() seq2seq = None optimizer = None if not resume: seq2seq, optimizer, scheduler = initialize_model( train, input_vocab, output_vocab) # Train trainer = SupervisedTrainer( loss=loss, batch_size=32, checkpoint_every=50, print_every=10, experiment_directory=experiment_directory, ) start = time.clock() try: seq2seq = trainer.train( seq2seq, train, n_epochs=10, dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=0.5, resume=resume, ) # Capture ^C except KeyboardInterrupt: pass end = time.clock() - start logging.info('Training time: %.2fs', end) return seq2seq, input_vocab, output_vocab
def test_resume_from_multiple_of_epoches(self, mock_evaluator, mock_checkpoint, mock_func): mock_model = mock.Mock() mock_optim = mock.Mock() trainer = SupervisedTrainer(batch_size=16) trainer.optimizer = mock_optim n_epoches = 1 start_epoch = 1 step = 7 trainer._train_epoches(self.dataset, mock_model, n_epoches, start_epoch, step, dev_data=self.dataset)
def test_batch_num_when_resuming(self, mock_evaluator, mock_checkpoint, mock_func): mock_model = mock.Mock() mock_optim = mock.Mock() trainer = SupervisedTrainer(batch_size=16) trainer.optimizer = mock_optim n_epoches = 1 start_epoch = 1 steps_per_epoch = 7 step = 3 trainer._train_epoches(self.dataset, mock_model, n_epoches, start_epoch, step) self.assertEqual(steps_per_epoch - step, mock_func.call_count)
def train(self, epoch=20, resume=False): t = SupervisedTrainer(loss=self.loss, batch_size=96, checkpoint_every=1000, print_every=1000, expt_dir=self.model_save_path) self.seq2seq = t.train(self.seq2seq, self.trainset, num_epochs=epoch, dev_data=self.devset, optimizer=self.optimizer, teacher_forcing_ratio=0.5, resume=resume)
def _evaluate(checkpoint_path, test_paths, metric_names=[ "word accuracy", "sequence accuracy", "final target accuracy" ], loss_names=["nll"], max_len=50, batch_size=32, is_predict_eos=True, content_method=None): """Evaluates the models saved in a checkpoint.""" results = [] print("loading checkpoint from {}".format(os.path.join(checkpoint_path))) checkpoint = Checkpoint.load(checkpoint_path) seq2seq = checkpoint.model tabular_data_fields = get_tabular_data_fields( content_method=content_method, is_predict_eos=is_predict_eos) dic_data_fields = dict(tabular_data_fields) src = dic_data_fields["src"] tgt = dic_data_fields["tgt"] src.vocab = checkpoint.input_vocab tgt.vocab = checkpoint.output_vocab tgt.eos_id = tgt.vocab.stoi[tgt.SYM_EOS] tgt.sos_id = tgt.vocab.stoi[tgt.SYM_SOS] for test_path in test_paths: test = get_data(test_path, max_len, tabular_data_fields) metrics = get_metrics(metric_names, src, tgt, is_predict_eos) losses, loss_weights = get_losses(loss_names, tgt, is_predict_eos) evaluator = Evaluator(loss=losses, batch_size=batch_size, metrics=metrics) data_func = SupervisedTrainer.get_batch_data losses, metrics = evaluator.evaluate(model=seq2seq, data=test, get_batch_data=data_func) total_loss, log_msg, _ = SupervisedTrainer.get_losses( losses, metrics, 0) dataset = test_path.split('/')[-1].split('.')[0] results.append([dataset, total_loss] + [metric.get_val() for metric in metrics]) results_df = pd.DataFrame(results, columns=["Dataset", "Loss"] + [metric.name for metric in metrics]) results_df = results_df.melt(id_vars=['Dataset'], var_name="Metric", value_name='Value') return results_df
def pretrain_generator(model, train, dev): # pre-train generator weight = torch.ones(len(tgt.vocab)) pad = tgt.vocab.stoi[tgt.pad_token] loss = Perplexity(weight, pad) if torch.cuda.is_available(): loss.cuda() optimizer = Optimizer(torch.optim.Adam(gen.parameters()), max_grad_norm=5) scheduler = StepLR(optimizer.optimizer, 1) optimizer.set_scheduler(scheduler) supervised = SupervisedTrainer(loss=loss, batch_size=32, random_seed=random_seed, expt_dir=expt_gen_dir) supervised.train(model, train, num_epochs=20, dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=0, resume=resume)
def build_model(src, tgt, hidden_size, mini_batch_size, bidirectional, dropout, attention, init_value): EXPERIMENT.param("Hidden", hidden_size) EXPERIMENT.param("Bidirectional", bidirectional) EXPERIMENT.param("Dropout", dropout) EXPERIMENT.param("Attention", attention) EXPERIMENT.param("Mini-batch", mini_batch_size) weight = torch.ones(len(tgt.vocab)) pad = tgt.vocab.stoi[tgt.pad_token] loss = Perplexity(weight, pad) encoder = EncoderRNN(len(src.vocab), MAX_LEN, hidden_size, rnn_cell="lstm", bidirectional=bidirectional, dropout_p=dropout, variable_lengths=False) decoder = DecoderRNN( len(tgt.vocab), MAX_LEN, hidden_size, # * 2 if bidirectional else hidden_size, rnn_cell="lstm", use_attention=attention, eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) using_cuda = False if torch.cuda.is_available(): using_cuda = True encoder.cuda() decoder.cuda() seq2seq.cuda() loss.cuda() EXPERIMENT.param("CUDA", using_cuda) for param in seq2seq.parameters(): param.data.uniform_(-init_value, init_value) trainer = SupervisedTrainer(loss=loss, batch_size=mini_batch_size, checkpoint_every=5000, random_seed=42, print_every=1000) return seq2seq, trainer
def test_loading_optimizer(self, train_func, sgd, optimizer, load_function, checkpoint): load_function.returnvalue = checkpoint mock_model = mock.Mock() mock_model.params.returnvalue = True n_epoches = 2 trainer = SupervisedTrainer(batch_size=16) trainer.train(mock_model, self.dataset, n_epoches, resume=True, checkpoint_path='dummy', optimizer='sgd') self.assertFalse(sgd.called, "Failed to not call Optimizer() when optimizer should be loaded from checkpoint") trainer.train(mock_model, self.dataset, n_epoches, resume=False, checkpoint_path='dummy', optimizer='sgd') sgd.assert_called() return
elif args.self_play_eval: print('Self play eval ... ') t = SupervisedSelfPlayEval(model_dir=args.model_dir, args=args, corpus=corpus) seq2seq = t.test(args, seq2seq, dataloader, resume=args.resume, save_dir=args.save_dir) else: print('SupervisedTrainer ... ') # train t = SupervisedTrainer(batch_size=args.batch_size, checkpoint_every=args.checkpoint_every, print_every=args.print_every, expt_dir=args.expt_dir, model_dir=args.model_dir, args=args) if args.eval: seq2seq = t.test(args, seq2seq, dataloader, scheduler, num_epochs=5, dev_data=None, optimizer=optimizer, teacher_forcing_ratio=1.0, resume=args.resume, save_dir=args.save_dir) else: seq2seq = t.train(args,
def eval_D(): pool = helper.DiscriminatorDataPool(opt.max_len, D.min_len, Constants.PAD) val_iter = data.BucketIterator( dataset=val, batch_size=opt.batch_size, device=opt.device, sort_key=lambda x: len(x.src), repeat=False) pool.fill(val_iter) trainer_D.evaluate(D, val_iter=pool.batch_gen(), crit=crit_D) # eval_D() # pre-train G if not hasattr(opt, 'load_G_from'): print('Pre-train G') trainer_G = SupervisedTrainer() trainer_G.optimizer = optim_G G.train() for epoch in range(5): train_iter = data.BucketIterator( dataset=train, batch_size=64, device=opt.device, sort_within_batch=True, sort_key=lambda x: len(x.src), repeat=False) for step, batch in enumerate(train_iter): src_seq = batch.src[0] src_length = batch.src[1] tgt_seq = src_seq.clone() # print(src_seq)
if opt.decay_factor: optimizer.set_scheduler( torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer.optimizer, 'min', factor=opt.decay_factor, patience=1)) # Prepare trainer and train t = SupervisedTrainer(loss=loss, model_dir=opt.model_dir, best_model_dir=opt.best_model_dir, batch_size=opt.batch_size, checkpoint_every=opt.checkpoint_every, print_every=opt.print_every, max_epochs=opt.max_epochs, max_steps=opt.max_steps, max_checkpoints_num=opt.max_checkpoints_num, best_ppl=opt.best_ppl, device=device, multi_gpu=multi_gpu, logger=logger) seq2seq = t.train(seq2seq, data=train, start_step=opt.skip_steps, dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=opt.teacher_forcing_ratio) elif opt.phase == "infer":
for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # Prepare loss weight = torch.ones(output_vocab.get_vocab_size()) mask = output_vocab.MASK_token_id loss = Perplexity(weight, mask) if torch.cuda.is_available(): seq2seq.cuda() loss.cuda() # train t = SupervisedTrainer(loss=loss, batch_size=32, checkpoint_every=50, print_every=10, expt_dir=opt.expt_dir) t.train(seq2seq, dataset, num_epochs=4, dev_data=dev_set, resume=opt.resume) predictor = Predictor(seq2seq, input_vocab, output_vocab) while True: seq_str = raw_input("Type in a source sequence:") seq = seq_str.split() print(predictor.predict(seq))
for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # Optimizer and learning rate scheduler can be customized by # explicitly constructing the objects and pass to the trainer. # # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters(), lr=1e-3), max_grad_norm=5) # scheduler = StepLR(optimizer.optimizer, 1) # optimizer.set_scheduler(scheduler) # train t = SupervisedTrainer( loss=loss, batch_size=64, checkpoint_every=50, print_every=10, expt_dir=EXPERIMENT_PATH, ) seq2seq = t.train( seq2seq, train, num_epochs=6, optimizer=optimizer, teacher_forcing_ratio=0.6, teacher_forcing_half_life=5000, resume=opt.resume, ) predictor = Predictor(seq2seq, input_vocab, output_vocab)
seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # Optimizer and learning rate scheduler can be customized by # explicitly constructing the objects and pass to the trainer. # # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5) # scheduler = StepLR(optimizer.optimizer, 1) # optimizer.set_scheduler(scheduler) # train t = SupervisedTrainer(loss=loss, batch_size=32, checkpoint_every=50, print_every=1, expt_dir=opt.expt_dir) if opt.concept: VOCAB = namedtuple('vocabs', ("src_vocab", "tgt_vocab", "cpt_vocab")) vocabs = VOCAB(src.vocab, tgt.vocab, cpt.vocab) else: vocabs = [] seq2seq = t.train(seq2seq, train, num_epochs=30, dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=0.5,
def train(): src = SourceField(sequential=True, tokenize=lambda x: [i for i in jieba.lcut(x)]) tgt = TargetField(sequential=True, tokenize=lambda x: [i for i in jieba.lcut(x)]) max_len = 50 def len_filter(example): return len(example.src) <= max_len and len(example.tgt) <= max_len train = torchtext.data.TabularDataset(path=opt.train_path, format='csv', fields=[('src', src), ('tgt', tgt)], filter_pred=len_filter) dev = torchtext.data.TabularDataset(path=opt.dev_path, format='csv', fields=[('src', src), ('tgt', tgt)], filter_pred=len_filter) src.build_vocab(train, max_size=50000) tgt.build_vocab(train, max_size=50000) input_vocab = src.vocab output_vocab = tgt.vocab # NOTE: If the source field name and the target field name # are different from 'src' and 'tgt' respectively, they have # to be set explicitly before any training or inference # seq2seq.src_field_name = 'src' # seq2seq.tgt_field_name = 'tgt' # Prepare loss weight = torch.ones(len(tgt.vocab)) pad = tgt.vocab.stoi[tgt.pad_token] loss = Perplexity(weight, pad) if torch.cuda.is_available(): loss.cuda() seq2seq = None optimizer = None if not opt.resume: # Initialize model hidden_size = 128 bidirectional = True encoder = EncoderRNN(len(src.vocab), max_len, hidden_size, bidirectional=bidirectional, variable_lengths=True) decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else hidden_size, dropout_p=0.2, use_attention=True, bidirectional=bidirectional, eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) if torch.cuda.is_available(): seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # Optimizer and learning rate scheduler can be customized by # explicitly constructing the objects and pass to the trainer. # # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5) # scheduler = StepLR(optimizer.optimizer, 1) # optimizer.set_scheduler(scheduler) # train t = SupervisedTrainer(loss=loss, batch_size=32, checkpoint_every=50, print_every=10, expt_dir=opt.expt_dir) seq2seq = t.train(seq2seq, train, num_epochs=6, dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=0.5, resume=opt.resume) predictor = Predictor(seq2seq, input_vocab, output_vocab)
# G.get_trainable_parameters(), # betas=(0.9, 0.98), eps=1e-09), # opt.d_model, opt.n_warmup_steps) optim_G_a = optim.Adam(filter(lambda p: p.requires_grad, G_a.parameters()), lr=1e-4, betas=(0.9, 0.98), eps=1e-09) optim_G_b = optim.Adam(filter(lambda p: p.requires_grad, G_b.parameters()), lr=1e-4, betas=(0.9, 0.98), eps=1e-09) crit_G = NLLLoss(size_average=False) if opt.cuda: G_a.cuda() G_b.cuda() crit_G.cuda() trainer_G_a = SupervisedTrainer() trainer_G_b = SupervisedTrainer() trainer_G_a.optimizer = optim_G_a trainer_G_b.optimizer = optim_G_b # ---------- train ---------- def pretrain(model, trainer, dataset): model.train() for epoch in range(20): print('\n[Epoch %d]' % epoch) train_iter = data.BucketIterator( dataset=dataset, batch_size=64, device=opt.device, sort_within_batch=True, sort_key=lambda x: len(x.src), repeat=False) for step, batch in enumerate(train_iter):
param.data.uniform_(-0.08, 0.08) ############################################################################## # train model # Prepare loss weight = torch.ones(len(output_vocab)) pad = output_vocab.stoi[tgt.pad_token] loss = Perplexity(weight, pad) if torch.cuda.is_available(): loss.cuda() # create trainer t = SupervisedTrainer(loss=loss, batch_size=opt.batch_size, checkpoint_every=opt.save_every, print_every=opt.print_every, expt_dir=opt.output_dir) checkpoint_path = os.path.join(opt.output_dir, opt.load_checkpoint) if opt.resume else None seq2seq = t.train(seq2seq, train, num_epochs=opt.epochs, dev_data=dev, optimizer=opt.optim, teacher_forcing_ratio=opt.teacher_forcing_ratio, learning_rate=opt.lr, resume=opt.resume, checkpoint_path=checkpoint_path)
for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # Optimizer and learning rate scheduler can be customized by # explicitly constructing the objects and pass to the trainer. # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5) scheduler = StepLR(optimizer.optimizer, 1) optimizer.set_scheduler(scheduler) # train t = SupervisedTrainer(loss=loss, batch_size=config['batch size'], checkpoint_every=config['checkpoint_every'], print_every=config['print every'], expt_dir=config['expt_dir']) # expt_dir=opt.expt_dir) # TODO add dev eval here for early stopping if config['train model']: seq2seq = t.train(input_vocab, feats_vocab, seq2seq, train, num_epochs=config['epochs'], vectors=vectors, dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=0.5,
def run_training(opt, default_data_dir, num_epochs=100): if opt.load_checkpoint is not None: logging.info("loading checkpoint from {}".format( os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint))) checkpoint_path = os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint) checkpoint = Checkpoint.load(checkpoint_path) seq2seq = checkpoint.model input_vocab = checkpoint.input_vocab output_vocab = checkpoint.output_vocab else: # Prepare dataset src = SourceField() tgt = TargetField() max_len = 50 data_file = os.path.join(default_data_dir, opt.train_path, 'data.txt') logging.info("Starting new Training session on %s", data_file) def len_filter(example): return (len(example.src) <= max_len) and (len(example.tgt) <= max_len) \ and (len(example.src) > 0) and (len(example.tgt) > 0) train = torchtext.data.TabularDataset( path=data_file, format='json', fields={'src': ('src', src), 'tgt': ('tgt', tgt)}, filter_pred=len_filter ) dev = None if opt.no_dev is False: dev_data_file = os.path.join(default_data_dir, opt.train_path, 'dev-data.txt') dev = torchtext.data.TabularDataset( path=dev_data_file, format='json', fields={'src': ('src', src), 'tgt': ('tgt', tgt)}, filter_pred=len_filter ) src.build_vocab(train, max_size=50000) tgt.build_vocab(train, max_size=50000) input_vocab = src.vocab output_vocab = tgt.vocab # NOTE: If the source field name and the target field name # are different from 'src' and 'tgt' respectively, they have # to be set explicitly before any training or inference # seq2seq.src_field_name = 'src' # seq2seq.tgt_field_name = 'tgt' # Prepare loss weight = torch.ones(len(tgt.vocab)) pad = tgt.vocab.stoi[tgt.pad_token] loss = Perplexity(weight, pad) if torch.cuda.is_available(): logging.info("Yayyy We got CUDA!!!") loss.cuda() else: logging.info("No cuda available device found running on cpu") seq2seq = None optimizer = None if not opt.resume: hidden_size = 128 decoder_hidden_size = hidden_size * 2 logging.info("EncoderRNN Hidden Size: %s", hidden_size) logging.info("DecoderRNN Hidden Size: %s", decoder_hidden_size) bidirectional = True encoder = EncoderRNN(len(src.vocab), max_len, hidden_size, bidirectional=bidirectional, rnn_cell='lstm', variable_lengths=True) decoder = DecoderRNN(len(tgt.vocab), max_len, decoder_hidden_size, dropout_p=0, use_attention=True, bidirectional=bidirectional, rnn_cell='lstm', eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) if torch.cuda.is_available(): seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # Optimizer and learning rate scheduler can be customized by # explicitly constructing the objects and pass to the trainer. optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5) scheduler = StepLR(optimizer.optimizer, 1) optimizer.set_scheduler(scheduler) # train num_epochs = num_epochs batch_size = 32 checkpoint_every = num_epochs / 10 print_every = num_epochs / 100 properties = dict(batch_size=batch_size, checkpoint_every=checkpoint_every, print_every=print_every, expt_dir=opt.expt_dir, num_epochs=num_epochs, teacher_forcing_ratio=0.5, resume=opt.resume) logging.info("Starting training with the following Properties %s", json.dumps(properties, indent=2)) t = SupervisedTrainer(loss=loss, batch_size=num_epochs, checkpoint_every=checkpoint_every, print_every=print_every, expt_dir=opt.expt_dir) seq2seq = t.train(seq2seq, train, num_epochs=num_epochs, dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=0.5, resume=opt.resume) evaluator = Evaluator(loss=loss, batch_size=batch_size) if opt.no_dev is False: dev_loss, accuracy = evaluator.evaluate(seq2seq, dev) logging.info("Dev Loss: %s", dev_loss) logging.info("Accuracy: %s", dev_loss) beam_search = Seq2seq(seq2seq.encoder, TopKDecoder(seq2seq.decoder, 4)) predictor = Predictor(beam_search, input_vocab, output_vocab) while True: try: seq_str = raw_input("Type in a source sequence:") seq = seq_str.strip().split() results = predictor.predict_n(seq, n=3) for i, res in enumerate(results): print('option %s: %s\n', i + 1, res) except KeyboardInterrupt: logging.info("Bye Bye") exit(0)
seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # Optimizer and learning rate scheduler can be customized by # explicitly constructing the objects and pass to the trainer. # # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5) # scheduler = StepLR(optimizer.optimizer, 1) # optimizer.set_scheduler(scheduler) # train t = SupervisedTrainer(loss=loss, batch_size=32, checkpoint_every=500, print_every=10, expt_dir=opt.expt_dir) seq2seq = t.train(seq2seq, train, num_epochs=1, dev_data=dev, optimizer=optimizer, resume=opt.resume) predictor = Predictor(seq2seq, input_vocab, output_vocab) while True: seq_str = raw_input("Type in a source sequence:") seq = seq_str.strip().split()
param.data.uniform_(-0.1, 0.1) optimizer = Optimizer(torch.optim.Adam(seq2seq_model.parameters()), max_grad_norm=5) # In[20]: seq2seq_model = torch.nn.DataParallel(seq2seq_model) # In[21]: # train t = SupervisedTrainer(loss=loss, batch_size=8, checkpoint_every=200, print_every=10000, expt_dir='./lstm_model/' + data_tuple[0] + '/Deepregex') # In[22]: seq2seq_model = t.train(seq2seq_model, train, num_epochs=data_tuple[1], dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=0.5, resume=False) # ### Self Critical Training
def create_trainer(opt, losses, loss_weights, metrics): return SupervisedTrainer(loss=losses, metrics=metrics, loss_weights=loss_weights, batch_size=opt.batch_size, eval_batch_size=opt.eval_batch_size, checkpoint_every=opt.save_every, print_every=opt.print_every, expt_dir=opt.output_dir)
def offline_training(opt, traget_file_path): # Prepare dataset with torchtext src = SourceField(tokenize=treebank_tokenizer) tgt = TargetField(tokenize=treebank_tokenizer) def sample_filter(sample): """ sample example for future purpose""" return True train = torchtext.data.TabularDataset(path=opt.train_path, format='tsv', fields=[('src', src), ('tgt', tgt)], filter_pred=sample_filter) dev = torchtext.data.TabularDataset(path=opt.dev_path, format='tsv', fields=[('src', src), ('tgt', tgt)], filter_pred=sample_filter) test = torchtext.data.TabularDataset(path=opt.dev_path, format='tsv', fields=[('src', src), ('tgt', tgt)], filter_pred=sample_filter) src.build_vocab(train, max_size=opt.src_vocab_size) tgt.build_vocab(train, max_size=opt.tgt_vocab_size) input_vocab = src.vocab output_vocab = tgt.vocab # NOTE: If the source field name and the target field name # are different from 'src' and 'tgt' respectively, they have # to be set explicitly before any training or inference # seq2seq.src_field_name = 'src' # seq2seq.tgt_field_name = 'tgt' # Prepare loss weight = torch.ones(len(tgt.vocab)) pad = tgt.vocab.stoi[tgt.pad_token] if opt.loss == 'perplexity': loss = Perplexity(weight, pad) else: raise TypeError seq2seq = None optimizer = None if not opt.resume: # Initialize model encoder = EncoderRNN(vocab_size=len(src.vocab), max_len=opt.max_length, hidden_size=opt.hidden_size, input_dropout_p=opt.intput_dropout_p, dropout_p=opt.dropout_p, n_layers=opt.n_layers, bidirectional=opt.bidirectional, rnn_cell=opt.rnn_cell, variable_lengths=True, embedding=input_vocab.vectors if opt.use_pre_trained_embedding else None, update_embedding=opt.update_embedding) decoder = DecoderRNN(vocab_size=len(tgt.vocab), max_len=opt.max_length, hidden_size=opt.hidden_size * 2 if opt.bidirectional else opt.hidden_size, sos_id=tgt.sos_id, eos_id=tgt.eos_id, n_layers=opt.n_layers, rnn_cell=opt.rnn_cell, bidirectional=opt.bidirectional, input_dropout_p=opt.input_dropout_p, dropout_p=opt.dropout_p, use_attention=opt.use_attention) seq2seq = Seq2seq(encoder=encoder, decoder=decoder) if opt.gpu >= 0 and torch.cuda.is_available(): seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # train trainer = SupervisedTrainer(loss=loss, batch_size=opt.batch_size, checkpoint_every=opt.checkpoint_every, print_every=opt.print_every, expt_dir=opt.expt_dir) seq2seq = trainer.train(model=seq2seq, data=train, num_epochs=opt.epochs, resume=opt.resume, dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=opt.teacher_forcing_rate)
for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # Optimizer and learning rate scheduler can be customized by # explicitly constructing the objects and pass to the trainer. # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5) scheduler = StepLR(optimizer.optimizer, 1) optimizer.set_scheduler(scheduler) logging.info(seq2seq) # train t = SupervisedTrainer(loss=loss, batch_size=params['batch_size'], checkpoint_every=50, print_every=100, expt_dir=opt.expt_dir, tensorboard=True) seq2seq = t.train(seq2seq, train, num_epochs=params['num_epochs'], dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=0.5, resume=opt.resume, load_checkpoint=opt.load_checkpoint)
variable_lengths=True) decoder = DecoderRNN(len(output_vocab), max_len, hidden_size * 2, dropout_p=0.2, use_attention=True, bidirectional=bidirectional,rnn_cell='lstm', eos_id=train.tgt_field.eos_id, sos_id=train.tgt_field.sos_id) seq2seq = Seq2seq(encoder, decoder) if torch.cuda.is_available(): seq2seq = seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # train t = SupervisedTrainer(loss=loss, batch_size=32, checkpoint_every=50, print_every=10, experiment_directory=opt.expt_dir) start = time.clock() seq2seq = t.train(seq2seq, train, n_epochs=6, dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=0.5, resume=opt.resume) end = time.clock() - start print('Training time: {:.2f}s'.format(end)) evaluator = Evaluator(loss=loss, batch_size=32) dev_loss, accuracy = evaluator.evaluate(seq2seq, dev) assert dev_loss < 1.5 beam_search = Seq2seq(seq2seq.encoder, TopKDecoder(seq2seq.decoder, 3))
seq2seq = Seq2seq(encoder, decoder) if torch.cuda.is_available(): seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.1, 0.1) # Optimizer and learning rate scheduler can be customized by # explicitly constructing the objects and pass to the trainer. optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters(), lr = 0.001), max_grad_norm=5) #scheduler = StepLR(optimizer.optimizer, 1) scheduler = ReduceLROnPlateau(optimizer.optimizer, 'min', factor = 0.1, verbose=True, patience=9) optimizer.set_scheduler(scheduler) expt_dir = opt.expt_dir + '_hidden_{}'.format(hidden_size) # train t = SupervisedTrainer(loss=loss, batch_size=64, checkpoint_every=1800, print_every=100, expt_dir=expt_dir, input_vocab=input_vocab, output_vocab=output_vocab) start_time = time.time() seq2seq = t.train(seq2seq, train, num_epochs=100, dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=0.5, resume=opt.resume) end_time = time.time() print('total time > ', end_time-start_time) predictor = Predictor(seq2seq, input_vocab, output_vocab)
use_attention=True, bidirectional=bidirectional, eos_id=tgt.eos_id, sos_id=tgt.sos_id, rnn_cell="lstm") seq2seq = Seq2seq(encoder, decoder) if torch.cuda.is_available(): seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # train t = SupervisedTrainer(loss=loss, batch_size=10000, checkpoint_every=50, print_every=10, expt_dir=opt.expt_dir) seq2seq = t.train(seq2seq, train, num_epochs=10, dev_data=validation, optimizer=optimizer, teacher_forcing_ratio=0.5, resume=opt.resume) predictor = Predictor(seq2seq, input_vocab, output_vocab) while True: sentence = raw_input("Type in a source sequence:")
seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # Optimizer and learning rate scheduler can be customized by # explicitly constructing the objects and pass to the trainer. # # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5) # scheduler = StepLR(optimizer.optimizer, 1) # optimizer.set_scheduler(scheduler) # train t = SupervisedTrainer(loss=loss, batch_size=128, checkpoint_every_epoch=5, print_every_batch=50, expt_dir=opt.expt_dir) seq2seq = t.train(seq2seq, train, num_epochs=int(opt.epochs), dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=0.5, resume=opt.resume) predictor = Predictor(seq2seq, input_vocab, output_vocab) while True: seq_str = raw_input("Type in a source sequence:")
optimizer=optimizer.optimizer, mode='min', factor=0.5, patience=5, verbose=True, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08) optimizer.set_scheduler(scheduler) # train t = SupervisedTrainer(loss=loss, batch_size=opt.batch_size, random_seed=opt.random_seed, checkpoint_every=1000000, print_every=50, expt_dir=opt.expt_dir) seq2seq = t.train(seq2seq, train, num_epochs=100, dev_data=dev, test_data=test, optimizer=optimizer, teacher_forcing_ratio=0.5, resume=opt.resume) # teacher_forcing_ratio=0.5, # predictor = Predictor(seq2seq, input_vocab, output_vocab) # while True:
use_attention=False, bidirectional=bidirectional) for param in decoder.parameters(): param.data.uniform_(-0.08, 0.08) if torch.cuda.is_available(): decoder.cuda() # train t = SupervisedTrainer(lloss=lloss, bloss=bloss, batch_size=opt.batch_size, checkpoint_every=100, print_every=50, expt_dir=opt.expt_dir, train_cap_lang=train_cap_lang, train_label_lang=train_label_lang, x_mean_std=x_mean_std, y_mean_std=y_mean_std, w_mean_std=w_mean_std, r_mean_std=r_mean_std) seq2seq = t.train(encoder, decoder, train_tuples, num_epochs=10, dev_data=dev_tuples, optimizer=optimizer, resume=opt.resume, is_training=opt.is_training)