def __init__(self, expt_dir='experiment', loss=[NLLLoss()], loss_weights=None, metrics=[], batch_size=64, eval_batch_size=128, random_seed=None, checkpoint_every=100, print_every=100): self._trainer = "Simple Trainer" self.random_seed = random_seed if random_seed is not None: random.seed(random_seed) torch.manual_seed(random_seed) k = NLLLoss() self.loss = loss self.metrics = metrics self.loss_weights = loss_weights or len(loss) * [1.] self.evaluator = Evaluator(loss=self.loss, metrics=self.metrics, batch_size=eval_batch_size) self.optimizer = None self.checkpoint_every = checkpoint_every self.print_every = print_every if not os.path.isabs(expt_dir): expt_dir = os.path.join(os.getcwd(), expt_dir) self.expt_dir = expt_dir if not os.path.exists(self.expt_dir): os.makedirs(self.expt_dir) self.batch_size = batch_size self.logger = logging.getLogger(__name__)
def test_nllloss_WITH_OUT_SIZE_AVERAGE(self): loss = NLLLoss(size_average=False) pytorch_loss = 0 pytorch_criterion = torch.nn.NLLLoss(size_average=False) for output, target in zip(self.outputs, self.targets): loss.eval_step(output, target) pytorch_loss += pytorch_criterion(output, target) loss_val = loss.get_loss() self.assertAlmostEqual(loss_val, pytorch_loss.item())
def test_perplexity(self): nll = NLLLoss() ppl = Perplexity() for output, target in zip(self.outputs, self.targets): nll.eval_step(output, target) ppl.eval_step(output, target) nll_loss = nll.get_loss() ppl_loss = ppl.get_loss() self.assertAlmostEqual(ppl_loss, math.exp(nll_loss))
def test_nllloss(self): loss = NLLLoss() pytorch_loss = 0 pytorch_criterion = torch.nn.NLLLoss() for output, target in zip(self.outputs, self.targets): loss.eval_step(output, target) pytorch_loss += pytorch_criterion(output, target) loss_val = loss.get_loss() pytorch_loss /= self.num_batch self.assertAlmostEqual(loss_val, pytorch_loss.item())
def prepare_losses_and_metrics(pad, eos): # Prepare loss and metrics losses = [NLLLoss(ignore_index=pad)] loss_weights = [1.] for loss in losses: loss.to(device) metrics = [] metrics.append(WordAccuracy(ignore_index=pad)) metrics.append(SequenceAccuracy(ignore_index=pad)) return losses, loss_weights, metrics
def train_lookup_model(): parser = init_argparser() opt = parser.parse_args() default_settings = opt.default_settings # Add machine-task to path # gets the lookupt task from tasks T = get_task("lookup", is_mini=True) print("Got Task") parameters = T.default_params[default_settings] train_path = T.train_path valid_path = T.valid_path test_paths = T.test_paths # # Prepare logging and data set init_logging(parameters) src, tgt, train, dev, monitor_data = prepare_iters( parameters, train_path, test_paths, valid_path, parameters['batch_size']) # Prepare model seq2seq, output_vocab = initialize_model(parameters, src, tgt, train) pad = output_vocab.stoi[tgt.pad_token] # Prepare training losses = [NLLLoss(ignore_index=pad)] for loss in losses: loss.to(device) loss_weights = [1.] metrics = [SequenceAccuracy(ignore_index=pad)] trainer = SupervisedTrainer(expt_dir='../models') # Train print("Training") seq2seq, _ = trainer.train(seq2seq, train, num_epochs=20, dev_data=dev, monitor_data=monitor_data, optimizer='adam', checkpoint_path='../models', losses=losses, metrics=metrics, loss_weights=loss_weights, checkpoint_every=10, print_every=10)
def prepare_losses_and_metrics( opt, pad, unk, sos, eos, input_vocab, output_vocab): use_output_eos = not opt.ignore_output_eos # Prepare loss and metrics losses = [NLLLoss(ignore_index=pad)] loss_weights = [1.] for loss in losses: loss.to(device) metrics = [] if 'word_acc' in opt.metrics: metrics.append(WordAccuracy(ignore_index=pad)) if 'seq_acc' in opt.metrics: metrics.append(SequenceAccuracy(ignore_index=pad)) if 'target_acc' in opt.metrics: metrics.append(FinalTargetAccuracy(ignore_index=pad, eos_id=eos)) if 'sym_rwr_acc' in opt.metrics: metrics.append(SymbolRewritingAccuracy( input_vocab=input_vocab, output_vocab=output_vocab, use_output_eos=use_output_eos, output_sos_symbol=sos, output_pad_symbol=pad, output_eos_symbol=eos, output_unk_symbol=unk)) if 'bleu' in opt.metrics: metrics.append(BLEU( input_vocab=input_vocab, output_vocab=output_vocab, use_output_eos=use_output_eos, output_sos_symbol=sos, output_pad_symbol=pad, output_eos_symbol=eos, output_unk_symbol=unk)) return losses, loss_weights, metrics
# example in the data set. We can assume that the other examples are then also correct. if opt.use_attention_loss or opt.attention_method == 'hard': if len(test) > 0: if 'attn' not in vars(test[0]): raise Exception("AttentionField not found in test data") tgt_len = len(vars(test[0])['tgt']) - 1 # -1 for SOS attn_len = len(vars( test[0])['attn']) - 1 # -1 for preprended ignore_index if attn_len != tgt_len: raise Exception( "Length of output sequence does not equal length of attention sequence in test data." ) # Prepare loss and metrics pad = output_vocab.stoi[tgt.pad_token] losses = [NLLLoss(ignore_index=pad)] loss_weights = [1.] if opt.use_attention_loss: losses.append(AttentionLoss(ignore_index=IGNORE_INDEX)) loss_weights.append(opt.scale_attention_loss) for loss in losses: loss.to(device) metrics = [ WordAccuracy(ignore_index=pad), SequenceAccuracy(ignore_index=pad), FinalTargetAccuracy(ignore_index=pad, eos_id=tgt.eos_id) ] # Since we need the actual tokens to determine k-grammar accuracy,
def test_nllloss_init(self): loss = NLLLoss() self.assertEqual(loss.name, NLLLoss._NAME) self.assertEqual(loss.log_name, NLLLoss._SHORTNAME)
def train(self, model, data, dev_data, num_epochs=5, resume_training=False, monitor_data={}, optimizer=None, teacher_forcing_ratio=0, custom_callbacks=[], learning_rate=0.001, checkpoint_path=None, top_k=5, losses=[NLLLoss()], loss_weights=None, metrics=[], random_seed=None, checkpoint_every=100, print_every=100): """ Run training for a given model. Args: model (machine.models): model to run training on, if `resume=True`, it would be overwritten by the model loaded from the latest checkpoint. data (torchtext.data.Iterator: torchtext iterator object to train on num_epochs (int, optional): number of epochs to run (default 5) resume_training(bool, optional): resume training with the latest checkpoint up until the number of epochs (default False) dev_data (torchtext.data.Iterator): dev/validation set iterator Note: must not pass in the train iterator here as this gets evaluated during training (in between batches) If you want to evaluate on the full train during training then make two iterators and pass the second one here monitor_data (list of torchtext.data.Iterator, optional): list of iterators to test on (default None) Note: must not pass in the train iterator here as this gets evaluated during training (in between batches) If you want to evaluate on the full train during training then make two iterators and pass the second one here optimizer (machine.optim.Optimizer, optional): optimizer for training (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5)) teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0) custom_callbacks (list, optional): list of custom call backs (see utils.callbacks.callback for base class) learing_rate (float, optional): learning rate used by the optimizer (default 0.001) checkpoint_path (str, optional): path to load checkpoint from in case training should be resumed top_k (int): how many models should be stored during training loss (list, optional): list of machine.loss.Loss objects for training (default: [machine.loss.NLLLoss]) metrics (list, optional): list of machine.metric.metric objects to be computed during evaluation checkpoint_every (int, optional): number of epochs to checkpoint after, (default: 100) print_every (int, optional): number of iterations to print after, (default: 100) Returns: model (machine.models): trained model. """ self.set_local_parameters(random_seed, losses, metrics, loss_weights, checkpoint_every, print_every) # If training is set to resume if resume_training: resume_checkpoint = Checkpoint.load(checkpoint_path) model = resume_checkpoint.model self.model = model self.optimizer = resume_checkpoint.optimizer # A walk around to set optimizing parameters properly resume_optim = self.optimizer.optimizer defaults = resume_optim.param_groups[0] defaults.pop('params', None) defaults.pop('initial_lr', None) self.optimizer.optimizer = resume_optim.__class__( self.model.parameters(), **defaults) start_epoch = resume_checkpoint.epoch step = resume_checkpoint.step else: start_epoch = 1 step = 0 self.model = model def get_optim(optim_name): optims = {'adam': optim.Adam, 'adagrad': optim.Adagrad, 'adadelta': optim.Adadelta, 'adamax': optim.Adamax, 'rmsprop': optim.RMSprop, 'sgd': optim.SGD, None: optim.Adam} return optims[optim_name] self.optimizer = Optimizer(get_optim(optimizer)(self.model.parameters(), lr=learning_rate), max_grad_norm=5) self.logger.info("Optimizer: %s, Scheduler: %s" % (self.optimizer.optimizer, self.optimizer.scheduler)) callbacks = CallbackContainer(self, [Logger(), ModelCheckpoint(top_k=top_k), History()] + custom_callbacks) logs = self._train_epoches(data, num_epochs, start_epoch, step, dev_data=dev_data, monitor_data=monitor_data, callbacks=callbacks, teacher_forcing_ratio=teacher_forcing_ratio) return self.model, logs
def test_nllloss_init(self): loss = NLLLoss() self.assertEqual(loss.name, NLLLoss._NAME) self.assertEqual(loss.log_name, NLLLoss._SHORTNAME) self.assertTrue(isinstance(loss.criterion, torch.nn.NLLLoss))
NUM_EPOCHS = 10 HIDDEN_SIZE = 128 init_logging() # Get data train_iter, valid_iter, test_iters, src, tgt = get_iters() # Prepare model baseline_seq2seq = get_baseline_model(src, tgt, HIDDEN_SIZE) baseline_seq2seq.to(device) # Prepare training pad = tgt.vocab.stoi[tgt.pad_token] losses = [NLLLoss(ignore_index=pad).to(device)] metrics = [SequenceAccuracy(ignore_index=pad)] trainer = SupervisedTrainer(expt_dir='runs/models/baseline') # Train logging.info("Training") seq2seq, logs = trainer.train(baseline_seq2seq, train_iter, dev_data=valid_iter, monitor_data=test_iters, num_epochs=NUM_EPOCHS, optimizer='adam', checkpoint_path='runs/models/baseline', losses=losses, metrics=metrics, checkpoint_every=100,
def __init__(self, loss=[NLLLoss()], metrics=[WordAccuracy(), SequenceAccuracy()], batch_size=64): self.losses = loss self.metrics = metrics self.batch_size = batch_size
def __init__(self, loss=[NLLLoss()], metrics=[WordAccuracy(), SequenceAccuracy()]): self.losses = loss self.metrics = metrics