def __init__(self, params): self.trainer, self.early_stopping = None, None m = LM(len(d), params['emb_dim'], params['hid_dim'], num_layers=params['num_layers'], cell=params['cell'], dropout=params['dropout'], train_init=params['train_init'], deepout_layers=params['deepout_layers'], maxouts=params['maxouts'], target_code=d.get_unk(), word_dropout=params['word_dropout']) u.initialize_model(m) optim = Optimizer( m.parameters(), args.optim, lr=args.lr, max_norm=args.max_norm) self.early_stopping = EarlyStopping( 5, patience=3, reset_patience=False) def early_stop_hook(trainer, epoch, batch_num, num_checkpoints): valid_loss = trainer.validate_model() self.early_stopping.add_checkpoint(sum(valid_loss.pack())) trainer = Trainer( m, {"train": train, "test": test, "valid": valid}, optim) trainer.add_hook(early_stop_hook, hooks_per_epoch=5) trainer.add_loggers(StdLogger()) self.trainer = trainer
def validation_end(self, payload): loss = StdLogger.loss_str(payload['loss'], 'valid') self.logger.info("Epoch[%d]; %s" % (payload['epoch'], loss)) if self.json_file: payload['dataset'] = 'valid' self.history.append(payload) with open(self.json_file, 'w') as jf: jf.write(json.dumps(self.history, indent=4))
def checkpoint(self, payload): e, b, bs = payload['epoch'], payload['batch'], payload['total_batches'] speed = payload["examples"] / payload["duration"] loss = StdLogger.loss_str(payload['loss'], 'train') self.logger.info("Epoch[%d]; batch [%d/%d]; %s; speed %d tokens/sec" % (e, b, bs, loss, speed)) if self.json_file: payload['dataset'] = 'train' self.history.append(payload) with open(self.json_file, 'w') as jf: jf.write(json.dumps(self.history, indent=4))
start_decay_at=args.start_decay_at, lr_decay=args.lr_decay, on_lr_update=on_lr_update) class VAETrainer(Trainer): def on_batch_end(self, epoch, batch, loss): # reset kl weight total_batches = len(self.datasets['train']) self.model.kl_weight = self.model.kl_schedule(batch + total_batches * epoch) losses = [{ 'loss': 'log-loss' }, { 'loss': 'kl', 'format': lambda loss: loss }] trainer = VAETrainer(model, { 'train': train, 'valid': valid, 'test': test }, optimizer, losses=losses) trainer.add_loggers( StdLogger(), VisdomLogger(env='vae', losses=('rec', 'kl'), max_y=600)) trainer.train(args.epochs, args.checkpoints, shuffle=True)
# Decrease lr by a factor after each epoch scheduler = optim.lr_scheduler.StepLR(optimizer, args.lr_schedule_epochs, args.lr_schedule_factor) model_name = 'AE.GRL{}.C{}'.format(str(args.grl), str(conditional)) trainer = Trainer(m, { 'train': train, 'valid': valid }, optimizer, losses=losses, max_norm=args.max_norm, scheduler=scheduler, weights=weights) trainer.add_loggers(StdLogger()) # Hooks # - early stopping & checkpoint early_stopping = None if args.patience: early_stopping = EarlyStopping(args.patience) checkpoint = None if not args.test: checkpoint = Checkpoint(model_name, keep=3).setup(args, d=conds_d) trainer.add_hook(make_check_hook(early_stopping, checkpoint), hooks_per_epoch=args.hook) # - print hook trainer.add_hook(make_report_hook(valid, 5, conditional), hooks_per_epoch=args.hook)
model.parameters(), args.optim, args.learning_rate, args.max_grad_norm, lr_decay=args.learning_rate_decay, start_decay_at=args.start_decay_at) criterion = make_criterion(len(src_dict), src_dict.get_pad()) model.apply(u.make_initializer( rnn={'type': 'orthogonal', 'args': {'gain': 1.0}})) print('* number of parameters: %d' % model.n_params()) print(model) if args.gpu: model.cuda(), criterion.cuda() trainer = EncoderDecoderTrainer( model, {'train': train, 'valid': valid}, criterion, optimizer) # target = args.target.split() if args.target else None # hook = make_encdec_hook(args.target, args.gpu) # Logging if args.csv != 'empty': trainer.add_loggers(notesLogger(args=args, model=model, save_path=args.csv)) if args.logging: trainer.add_loggers(StdLogger(args=args, model=model)) if args.visdom: trainer.add_loggers(StdLogger(), VisdomLogger(env='encdec')) num_checkpoints = len(train) // (args.checkpoint * args.hooks_per_epoch) # trainer.add_hook(hook, num_checkpoints=num_checkpoints) trainer.train(args.epochs, args.checkpoint, shuffle=True, gpu=args.gpu)
# hooks early_stopping = None if args.early_stopping > 0: early_stopping = EarlyStopping(args.early_stopping) model_check_hook = make_lm_check_hook( d, method=args.decoding_method, temperature=args.temperature, max_seq_len=args.max_seq_len, seed_text=args.seed, gpu=args.gpu, early_stopping=early_stopping) num_checkpoints = len(train) // (args.checkpoint * args.hooks_per_epoch) trainer.add_hook(model_check_hook, num_checkpoints=num_checkpoints) # loggers visdom_logger = VisdomLogger( log_checkpoints=args.log_checkpoints, title=args.prefix, env='lm', server='http://' + args.visdom_server) trainer.add_loggers(StdLogger(), visdom_logger) trainer.train(args.epochs, args.checkpoint, gpu=args.gpu) if args.save: test_ppl = trainer.validate_model(test=True) print("Test perplexity: %g" % test_ppl) if args.save: f = '{prefix}.{cell}.{layers}l.{hid_dim}h.{emb_dim}e.{bptt}b.{ppl}' fname = f.format(ppl="%.2f" % test_ppl, **vars(args)) if os.path.isfile(fname): answer = input("File [%s] exists. Overwrite? (y/n): " % fname) if answer.lower() not in ("y", "yes"): print("Goodbye!") sys.exit(0) print("Saving model to [%s]..." % fname)
args.max_grad_norm, lr_decay=args.learning_rate_decay, start_decay_at=args.start_decay_at) criterion = make_criterion(len(src_dict), src_dict.get_pad()) model.apply( u.make_initializer(rnn={ 'type': 'orthogonal', 'args': { 'gain': 1.0 } })) print('* number of parameters: %d' % model.n_params()) print(model) if args.gpu: model.cuda(), criterion.cuda() trainer = EncoderDecoderTrainer(model, { 'train': train, 'valid': valid }, criterion, optimizer) trainer.add_loggers(StdLogger(), VisdomLogger(env='encdec')) target = args.target.split() if args.target else None hook = make_encdec_hook(args.target, args.gpu) num_checkpoints = len(train) // (args.checkpoint * args.hooks_per_epoch) trainer.add_hook(hook, num_checkpoints=num_checkpoints) trainer.train(args.epochs, args.checkpoint, shuffle=True, gpu=args.gpu)
# model.encoder.register_backward_hook(u.log_grad) if args.load_embeddings: weight = load_embeddings( train.d['src'].vocab, args.flavor, args.suffix, '~/data/word_embeddings') model.init_embeddings(weight) criterion = vae_criterion(vocab, train.d['src'].get_pad()) if args.gpu: model.cuda(), criterion.cuda() def on_lr_update(old_lr, new_lr): trainer.log("info", "Resetting lr [%g -> %g]" % (old_lr, new_lr)) optimizer = Optimizer( model.parameters(), args.optim, lr=args.learning_rate, max_norm=args.max_norm, weight_decay=args.weight_decay, # SGD-only start_decay_at=args.start_decay_at, lr_decay=args.lr_decay, on_lr_update=on_lr_update) trainer = VAETrainer( model, datasets, criterion, optimizer, inflection_point=args.inflection_point) trainer.add_loggers( StdLogger(), VisdomLogger(env='vae_gender', losses=('rec', 'kl'), max_y=600)) # trainer.add_hook(kl_weight_hook) trainer.train(args.epochs, args.checkpoints, shuffle=True, gpu=args.gpu)
batch_size=args.batch_size, gpu=args.gpu, fitted=True) valid = PairedDataset( [s for t in datasets for s in datasets[t]['valid'].data['src']], [s for t in datasets for s in datasets[t]['valid'].data['trg']], {'src': src_dict, 'trg': src_dict}, batch_size=args.batch_size, gpu=args.gpu, fitted=True) test = PairedDataset( [s for t in datasets for s in datasets[t]['test'].data['src']], [s for t in datasets for s in datasets[t]['test'].data['trg']], {'src': src_dict, 'trg': src_dict}, batch_size=args.batch_size, gpu=args.gpu, fitted=True) stdlogger = StdLogger(outputfile='.multitarget.log') trainer = EncoderDecoderTrainer( model, {'train': train, 'valid': valid, 'test': test}, criterion, optimizer) trainer.add_loggers( stdlogger, VisdomLogger(env='multitarget', title='general')) num_checkpoints = \ max(len(train) // (args.checkpoint * args.hooks_per_epoch), 1) trainer.add_hook(hook, num_checkpoints=num_checkpoints) trainer.log('info', ' * vocabulary size. %d' % len(src_dict)) trainer.log('info', ' * maximum batch size. %d' % args.batch_size) trainer.log('info', ' * number of train batches. %d' % len(train)) trainer.log('info', ' * number of parameters. %d' % model.n_params()) trainer.log('info', str(model)) trainer.log('info', "**********************")
lr=args.lr, max_norm=args.max_norm, lr_decay=args.lr_decay, start_decay_at=args.start_decay_at, decay_every=args.decay_every) early_stopping = EarlyStopping(max(args.patience, 10), args.patience) # hook check_hook = u.make_clm_hook(d, max_seq_len=args.max_seq_len, gpu=args.gpu, sampled_conds=5, method=args.decoding_method, temperature=args.temperature) # logger std_logger = StdLogger() # trainer trainer = Trainer(m, { 'train': train, 'valid': valid, 'test': test }, optim, early_stopping=early_stopping) trainer.add_loggers(std_logger) trainer.add_hook(check_hook, hooks_per_epoch=args.hooks_per_epoch) (best_model, val_ppl), test_ppl = trainer.train(args.epochs, args.checkpoint) if args.save: