def __init__(self, params): self.trainer, self.early_stopping = None, None m = LM(params['emb_dim'], params['hid_dim'], d, num_layers=params['num_layers'], cell=params['cell'], dropout=params['dropout'], train_init=params['train_init'], deepout_layers=params['deepout_layers'], maxouts=params['maxouts'], word_dropout=params['word_dropout']) u.initialize_model(m) optimizer = getattr(optim, args.optim)(m.parameters(), lr=args.lr) self.early_stopping = EarlyStopping(5, patience=3) def early_stop_hook(trainer, epoch, batch_num, num_checkpoints): valid_loss = trainer.validate_model() self.early_stopping.add_checkpoint(sum(valid_loss.pack())) trainer = Trainer(m, { "train": train, "test": test, "valid": valid }, optimizer, max_norm=args.max_norm) trainer.add_hook(early_stop_hook, hooks_per_epoch=5) trainer.add_loggers(StdLogger()) self.trainer = trainer
class create_runner(object): def __init__(self, params): self.trainer, self.early_stopping = None, None m = LM(params['emb_dim'], params['hid_dim'], d, num_layers=params['num_layers'], cell=params['cell'], dropout=params['dropout'], train_init=params['train_init'], deepout_layers=params['deepout_layers'], maxouts=params['maxouts'], word_dropout=params['word_dropout']) u.initialize_model(m) optimizer = getattr(optim, args.optim)(m.parameters(), lr=args.lr) self.early_stopping = EarlyStopping(5, patience=3) def early_stop_hook(trainer, epoch, batch_num, num_checkpoints): valid_loss = trainer.validate_model() self.early_stopping.add_checkpoint(sum(valid_loss.pack())) trainer = Trainer(m, { "train": train, "test": test, "valid": valid }, optimizer, max_norm=args.max_norm) trainer.add_hook(early_stop_hook, hooks_per_epoch=5) trainer.add_loggers(StdLogger()) self.trainer = trainer def __call__(self, n_iters): # max run will be 5 epochs batches = len(self.trainer.datasets['train']) batches = int((batches / args.max_iter) * args.max_epochs) if args.gpu: self.trainer.model.cuda() (_, loss), _ = self.trainer.train_batches(batches * n_iters, 10) self.trainer.model.cpu() return {'loss': loss, 'early_stop': self.early_stopping.stopped}
rnn={ 'type': 'orthogonal', 'args': { 'gain': 1.0 } }) optimizer = getattr(optim, args.optim)(model.parameters(), lr=args.lr) print(model) print() print('* number of parameters: {}'.format(model.n_params())) model.to(device=args.device) early_stopping = EarlyStopping(args.patience) trainer = Trainer(model, { 'train': train, 'valid': valid }, optimizer, losses=('ppl', ), early_stopping=early_stopping, max_norm=args.max_norm, checkpoint=Checkpoint('EncoderDecoder', mode='nlast', keep=3).setup(args)) trainer.add_loggers(StdLogger()) # trainer.add_loggers(VisdomLogger(env='encdec')) trainer.add_loggers(TensorboardLogger(comment='encdec'))
print(m) print('* number of parameters: {}'.format(m.n_params())) optimizer = getattr(optim, args.optim)( m.parameters(), lr=args.lr, betas=(0., 0.99), eps=1e-5) # create trainer trainer = Trainer( m, {"train": train, "test": test, "valid": valid}, optimizer, max_norm=args.max_norm) # hooks # - general hook early_stopping = None if args.patience > 0: early_stopping = EarlyStopping(10, patience=args.patience) model_hook = u.make_lm_hook( d, temperature=args.temperature, max_seq_len=args.max_seq_len, gpu=args.gpu, level=args.level, early_stopping=early_stopping) trainer.add_hook(model_hook, hooks_per_epoch=args.hooks_per_epoch) # - scheduled sampling hook if args.use_schedule: schedule = inflection_sigmoid( len(train) * args.schedule_inflection, args.schedule_steepness, a=args.schedule_init, inverse=True) trainer.add_hook( u.make_schedule_hook(schedule, verbose=True), hooks_per_epoch=10e4) # - lr schedule hook hook = make_lr_hook( optimizer, args.lr_schedule_factor, args.lr_schedule_checkpoints) # run a hook args.checkpoint * 4 batches
# kl annealing kl_schedule = kl_sigmoid_annealing_schedule(inflection=args.inflection) class VAETrainer(Trainer): def on_batch_end(self, epoch, batch, loss): # reset kl weight total_batches = len(self.datasets['train']) self.model.encoder.kl_weight = kl_schedule( batch + total_batches * epoch) losses = [{'loss': 'rec'}, {'loss': 'kl', 'format': lambda loss: loss}] trainer = VAETrainer( model, {'train': train, 'valid': valid, 'test': test}, optimizer, losses=losses, early_stopping=EarlyStopping(5, patience=args.patience), max_norm=args.max_norm, scheduler=scheduler) # hooks trainer.add_hook(make_generate_hook(args.level), hooks_per_epoch=args.hooks_per_epoch) trainer.add_hook(kl_weight_hook, hooks_per_epoch=100) if args.use_schedule: hook = u.make_schedule_hook( inflection_sigmoid(len(train) * 2, 1.75, inverse=True)) trainer.add_hook(hook, hooks_per_epoch=1000) trainer.add_loggers( StdLogger(), # VisdomLogger(env='vae', losses=('rec', 'kl'), max_y=600) )
deepout_layers=args.deepout_layers, deepout_act=args.deepout_act, word_dropout=args.word_dropout, target_code=lang_d.get_unk(), conds=conds) u.initialize_model(m) print(m) print(' * n parameters. {}'.format(m.n_params())) if args.gpu: m.cuda() optimizer = getattr(optim, args.optim)(m.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.StepLR(optimizer, 1, 0.5) early_stopping = EarlyStopping(max(args.patience, 10), patience=args.patience) # hook check_hook = u.make_clm_hook(d, max_seq_len=args.max_seq_len, gpu=args.gpu, sampled_conds=5, method=args.decoding_method, temperature=args.temperature) # logger std_logger = StdLogger() # trainer trainer = Trainer(m, { 'train': train, 'valid': valid, 'test': test
def main(): parser = argparse.ArgumentParser() # dataset parser.add_argument('--input', type=str, default='data') parser.add_argument('--min_item_freq', type=int, default=50) parser.add_argument('--max_vocab_size', type=int, default=20000) parser.add_argument('--min_len', default=1, type=int) parser.add_argument('--max_len', default=15, type=int) parser.add_argument('--dev', default=0.1, type=float) parser.add_argument('--rnd_seed', default=12345, type=int) parser.add_argument('--max_items', default=None, type=int) parser.add_argument('--task', default='sentences', type=str) parser.add_argument('--level', default='word', type=str) parser.add_argument('--focus_size', default=15, type=int) parser.add_argument('--left_size', default=15, type=int) parser.add_argument('--right_size', default=15, type=int) parser.add_argument('--shingle_stride', default=None, type=int) parser.add_argument('--allow_overlap', action='store_true', default=False) parser.add_argument('--shuffle', action='store_true') parser.add_argument('--tokenize', action='store_true', default=False) parser.add_argument('--grow', action='store_true') parser.add_argument('--grow_n_epochs', default=1, type=int) # training parser.add_argument('--epochs', default=5, type=int) parser.add_argument('--gpu', action='store_true') parser.add_argument('--batch_size', default=30, type=int) parser.add_argument('--optim', default='Adam', type=str) parser.add_argument('--lr', default=0.0003, type=float) parser.add_argument('--max_norm', default=10., type=float) parser.add_argument('--dropout', default=0.25, type=float) parser.add_argument('--word_dropout', default=0.0, type=float) parser.add_argument('--use_schedule', action='store_true') parser.add_argument('--patience', default=10, type=int) parser.add_argument('--reverse', action='store_true') parser.add_argument('--batches_for_checkpoint', default=50, type=int) parser.add_argument('--checkpoints_for_hooks', default=1, type=int) parser.add_argument('--target', default='Ze was', type=str) parser.add_argument('--bidi', action='store_true') parser.add_argument('--beam', action='store_true') parser.add_argument('--plot', action='store_true') parser.add_argument('--json', type=str, default='history.json') # model parser.add_argument('--model_path', default='./model_storage', type=str) parser.add_argument('--num_layers', default=1, type=int) parser.add_argument('--emb_dim', default=64, type=int) parser.add_argument('--hid_dim', default=150, type=int) parser.add_argument('--cell', default='GRU') parser.add_argument('--train_init', action='store_true') parser.add_argument('--add_init_jitter', action='store_true') parser.add_argument('--encoder-summary', default='inner-attention') parser.add_argument('--deepout_layers', type=int, default=0) parser.add_argument('--att_type', type=str, default='none') args = parser.parse_args() if args.task == 'sentences' and args.level == 'word': args.target = [t.lower() for t in word_tokenize(args.target)] elif args.task == 'sentences' and args.level == 'char': args.target = tuple(args.target.lower()) train, valid, vocab_dict = uz.shingle_dataset(args, vocab_dict=None) print(f' * vocabulary size {len(vocab_dict)}') print(f' * number of train batches {len(train)}') print(f' * number of dev batches {len(valid)}') print(f' * maximum batch size {args.batch_size}') model = make_skipthoughts_model(num_layers=args.num_layers, emb_dim=args.emb_dim, hid_dim=args.hid_dim, src_dict=vocab_dict, cell=args.cell, bidi=args.bidi, encoder_summary=args.encoder_summary, att_type=args.att_type, task=args.task, tie_weights=False) u.initialize_model(model, rnn={ 'type': 'orthogonal', 'args': { 'gain': 1.0 } }) optimizer = getattr(optim, args.optim)(model.parameters(), lr=args.lr) #, amsgrad=True) print(model) print('* number of parameters: {}'.format(model.n_params())) if args.gpu: model.cuda() early_stopping = EarlyStopping(patience=args.patience, maxsize=1) trainer = SkipthoughtsTrainer(model, { 'train': train, 'valid': valid }, optimizer, early_stopping=early_stopping, max_norm=args.max_norm) if args.json: logger = JsonLogger(json_file=args.json) else: logger = StdLogger() trainer.add_loggers(logger) trainer.set_additional_params(args, vocab_dict) hook = make_translation_hook(args.target, args.gpu, beam=args.beam, max_len=args.right_size) trainer.add_hook(hook, num_checkpoints=args.checkpoints_for_hooks) #hook = u.make_schedule_hook( # inflection_sigmoid(len(train) * 2, 1.75, inverse=True)) #trainer.add_hook(hook, num_checkpoints=args.checkpoints_for_hooks) (best_model, valid_loss), test_loss = trainer.train(args.epochs, args.batches_for_checkpoint, shuffle=True, use_schedule=args.use_schedule) u.save_checkpoint(args.model_path, best_model, vars(args), d=vocab_dict, ppl=valid_loss, suffix='final')