def train(args): """Train with the given args :param Namespace args: The program arguments """ # TODO(karita): support this if args.model_module != "default": raise NotImplementedError( "chainer backend does not support --model-module") # display chainer version logging.info('chainer version = ' + chainer.__version__) set_deterministic_chainer(args) # check cuda and cudnn availability if not chainer.cuda.available: logging.warning('cuda is not available') if not chainer.cuda.cudnn_enabled: logging.warning('cudnn is not available') # get special label ids unk = args.char_list_dict['<unk>'] eos = args.char_list_dict['<eos>'] # read tokens as a sequence of sentences train = read_tokens(args.train_label, args.char_list_dict) val = read_tokens(args.valid_label, args.char_list_dict) # count tokens n_train_tokens, n_train_oovs = count_tokens(train, unk) n_val_tokens, n_val_oovs = count_tokens(val, unk) logging.info('#vocab = ' + str(args.n_vocab)) logging.info('#sentences in the training data = ' + str(len(train))) logging.info('#tokens in the training data = ' + str(n_train_tokens)) logging.info('oov rate in the training data = %.2f %%' % (n_train_oovs / n_train_tokens * 100)) logging.info('#sentences in the validation data = ' + str(len(val))) logging.info('#tokens in the validation data = ' + str(n_val_tokens)) logging.info('oov rate in the validation data = %.2f %%' % (n_val_oovs / n_val_tokens * 100)) use_sortagrad = args.sortagrad == -1 or args.sortagrad > 0 # Create the dataset iterators train_iter = ParallelSentenceIterator(train, args.batchsize, max_length=args.maxlen, sos=eos, eos=eos, shuffle=not use_sortagrad) val_iter = ParallelSentenceIterator(val, args.batchsize, max_length=args.maxlen, sos=eos, eos=eos, repeat=False) epoch_iters = int(len(train_iter.batch_indices) / args.accum_grad) logging.info('#iterations per epoch = %d' % epoch_iters) logging.info('#total iterations = ' + str(args.epoch * epoch_iters)) # Prepare an RNNLM model rnn = RNNLM(args.n_vocab, args.layer, args.unit, args.type) model = ClassifierWithState(rnn) if args.ngpu > 1: logging.warning( "currently, multi-gpu is not supported. use single gpu.") if args.ngpu > 0: # Make the specified GPU current gpu_id = 0 chainer.cuda.get_device_from_id(gpu_id).use() model.to_gpu() else: gpu_id = -1 # Save model conf to json model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to ' + model_conf) f.write( json.dumps(vars(args), indent=4, ensure_ascii=False, sort_keys=True).encode('utf_8')) # Set up an optimizer opt_class = dynamic_import_optimizer(args.opt, args.backend) optimizer = opt_class.from_args(model, args) if args.schedulers is None: schedulers = [] else: schedulers = [ dynamic_import_scheduler(v)(k, args) for k, v in args.schedulers ] optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) updater = BPTTUpdater(train_iter, optimizer, schedulers, gpu_id, args.accum_grad) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir) trainer.extend(LMEvaluator(val_iter, model, device=gpu_id)) trainer.extend( extensions.LogReport(postprocess=compute_perplexity, trigger=(args.report_interval_iters, 'iteration'))) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'perplexity', 'val_perplexity', 'elapsed_time']), trigger=(args.report_interval_iters, 'iteration')) trainer.extend( extensions.ProgressBar(update_interval=args.report_interval_iters)) trainer.extend( extensions.snapshot(filename='snapshot.ep.{.updater.epoch}')) trainer.extend( extensions.snapshot_object(model, 'rnnlm.model.{.updater.epoch}')) # MEMO(Hori): wants to use MinValueTrigger, but it seems to fail in resuming trainer.extend( MakeSymlinkToBestModel('validation/main/loss', 'rnnlm.model')) if use_sortagrad: trainer.extend( ShufflingEnabler([train_iter]), trigger=(args.sortagrad if args.sortagrad != -1 else args.epoch, 'epoch')) if args.resume: logging.info('resumed from %s' % args.resume) chainer.serializers.load_npz(args.resume, trainer) set_early_stop(trainer, args, is_lm=True) if args.tensorboard_dir is not None and args.tensorboard_dir != "": writer = SummaryWriter(args.tensorboard_dir) trainer.extend(TensorboardLogger(writer), trigger=(args.report_interval_iters, 'iteration')) trainer.run() check_early_stop(trainer, args.epoch) # compute perplexity for test set if args.test_label: logging.info('test the best model') chainer.serializers.load_npz(args.outdir + '/rnnlm.model.best', model) test = read_tokens(args.test_label, args.char_list_dict) n_test_tokens, n_test_oovs = count_tokens(test, unk) logging.info('#sentences in the test data = ' + str(len(test))) logging.info('#tokens in the test data = ' + str(n_test_tokens)) logging.info('oov rate in the test data = %.2f %%' % (n_test_oovs / n_test_tokens * 100)) test_iter = ParallelSentenceIterator(test, args.batchsize, max_length=args.maxlen, sos=eos, eos=eos, repeat=False) evaluator = LMEvaluator(test_iter, model, device=gpu_id) with chainer.using_config('train', False): result = evaluator() logging.info('test perplexity: ' + str(np.exp(float(result['main/loss']))))
def train(args): """Train with the given args :param Namespace args: The program arguments """ # display torch version logging.info('torch version = ' + torch.__version__) set_deterministic_pytorch(args) # check cuda and cudnn availability if not torch.cuda.is_available(): logging.warning('cuda is not available') # get special label ids unk = args.char_list_dict['<unk>'] eos = args.char_list_dict['<eos>'] # read tokens as a sequence of sentences train = read_tokens(args.train_label, args.char_list_dict) val = read_tokens(args.valid_label, args.char_list_dict) # count tokens n_train_tokens, n_train_oovs = count_tokens(train, unk) n_val_tokens, n_val_oovs = count_tokens(val, unk) logging.info('#vocab = ' + str(args.n_vocab)) logging.info('#sentences in the training data = ' + str(len(train))) logging.info('#tokens in the training data = ' + str(n_train_tokens)) logging.info('oov rate in the training data = %.2f %%' % (n_train_oovs / n_train_tokens * 100)) logging.info('#sentences in the validation data = ' + str(len(val))) logging.info('#tokens in the validation data = ' + str(n_val_tokens)) logging.info('oov rate in the validation data = %.2f %%' % (n_val_oovs / n_val_tokens * 100)) use_sortagrad = args.sortagrad == -1 or args.sortagrad > 0 # Create the dataset iterators train_iter = ParallelSentenceIterator(train, args.batchsize, max_length=args.maxlen, sos=eos, eos=eos, shuffle=not use_sortagrad) val_iter = ParallelSentenceIterator(val, args.batchsize, max_length=args.maxlen, sos=eos, eos=eos, repeat=False) logging.info('#iterations per epoch = ' + str(len(train_iter.batch_indices))) logging.info('#total iterations = ' + str(args.epoch * len(train_iter.batch_indices))) # Prepare an RNNLM model rnn = RNNLM(args.n_vocab, args.layer, args.unit, args.type) model = ClassifierWithState(rnn) if args.ngpu > 1: logging.warning( "currently, multi-gpu is not supported. use single gpu.") if args.ngpu > 0: # Make the specified GPU current gpu_id = 0 model.cuda(gpu_id) else: gpu_id = -1 # Save model conf to json model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to ' + model_conf) f.write( json.dumps(vars(args), indent=4, sort_keys=True).encode('utf_8')) # Set up an optimizer if args.opt == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=1.0) elif args.opt == 'adam': optimizer = torch.optim.Adam(model.parameters()) # FIXME: TOO DIRTY HACK reporter = model.reporter setattr(optimizer, "target", reporter) setattr(optimizer, "serialize", lambda s: reporter.serialize(s)) updater = BPTTUpdater(train_iter, model, optimizer, gpu_id, gradclip=args.gradclip) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir) trainer.extend(LMEvaluator(val_iter, model, reporter, device=gpu_id)) trainer.extend( extensions.LogReport(postprocess=compute_perplexity, trigger=(REPORT_INTERVAL, 'iteration'))) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'perplexity', 'val_perplexity', 'elapsed_time']), trigger=(REPORT_INTERVAL, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL)) # Save best models trainer.extend(torch_snapshot(filename='snapshot.ep.{.updater.epoch}')) trainer.extend( extensions.snapshot_object(model, 'rnnlm.model.{.updater.epoch}', savefun=torch_save)) # T.Hori: MinValueTrigger should be used, but it fails when resuming trainer.extend( MakeSymlinkToBestModel('validation/main/loss', 'rnnlm.model')) if use_sortagrad: trainer.extend( ShufflingEnabler([train_iter]), trigger=(args.sortagrad if args.sortagrad != -1 else args.epoch, 'epoch')) if args.resume: logging.info('resumed from %s' % args.resume) torch_resume(args.resume, trainer) set_early_stop(trainer, args, is_lm=True) if args.tensorboard_dir is not None and args.tensorboard_dir != "": writer = SummaryWriter(args.tensorboard_dir) trainer.extend(TensorboardLogger(writer)) trainer.run() check_early_stop(trainer, args.epoch) # compute perplexity for test set if args.test_label: logging.info('test the best model') torch_load(args.outdir + '/rnnlm.model.best', model) test = read_tokens(args.test_label, args.char_list_dict) n_test_tokens, n_test_oovs = count_tokens(test, unk) logging.info('#sentences in the test data = ' + str(len(test))) logging.info('#tokens in the test data = ' + str(n_test_tokens)) logging.info('oov rate in the test data = %.2f %%' % (n_test_oovs / n_test_tokens * 100)) test_iter = ParallelSentenceIterator(test, args.batchsize, max_length=args.maxlen, sos=eos, eos=eos, repeat=False) evaluator = LMEvaluator(test_iter, model, reporter, device=gpu_id) result = evaluator() logging.info('test perplexity: ' + str(np.exp(float(result['main/loss']))))
def train(args): # display chainer version logging.info('chainer version = ' + chainer.__version__) # seed setting (chainer seed may not need it) nseed = args.seed os.environ['CHAINER_SEED'] = str(nseed) logging.info('chainer seed = ' + os.environ['CHAINER_SEED']) # debug mode setting # 0 would be fastest, but 1 seems to be reasonable # by considering reproducability # revmoe type check if args.debugmode < 2: chainer.config.type_check = False logging.info('chainer type check is disabled') # use determinisitic computation or not if args.debugmode < 1: chainer.config.cudnn_deterministic = False logging.info('chainer cudnn deterministic is disabled') else: chainer.config.cudnn_deterministic = True # check cuda and cudnn availability if not chainer.cuda.available: logging.warning('cuda is not available') if not chainer.cuda.cudnn_enabled: logging.warning('cudnn is not available') # get special label ids unk = args.char_list_dict['<unk>'] eos = args.char_list_dict['<eos>'] # read tokens as a sequence of sentences train = read_tokens(args.train_label, args.char_list_dict) val = read_tokens(args.valid_label, args.char_list_dict) # count tokens n_train_tokens, n_train_oovs = count_tokens(train, unk) n_val_tokens, n_val_oovs = count_tokens(val, unk) logging.info('#vocab = ' + str(args.n_vocab)) logging.info('#sentences in the training data = ' + str(len(train))) logging.info('#tokens in the training data = ' + str(n_train_tokens)) logging.info('oov rate in the training data = %.2f %%' % (n_train_oovs / n_train_tokens * 100)) logging.info('#sentences in the validation data = ' + str(len(val))) logging.info('#tokens in the validation data = ' + str(n_val_tokens)) logging.info('oov rate in the validation data = %.2f %%' % (n_val_oovs / n_val_tokens * 100)) # Create the dataset iterators train_iter = ParallelSentenceIterator(train, args.batchsize, max_length=args.maxlen, sos=eos, eos=eos) val_iter = ParallelSentenceIterator(val, args.batchsize, max_length=args.maxlen, sos=eos, eos=eos, repeat=False) logging.info('#iterations per epoch = ' + str(len(train_iter.batch_indices))) logging.info('#total iterations = ' + str(args.epoch * len(train_iter.batch_indices))) # Prepare an RNNLM model rnn = RNNLM(args.n_vocab, args.layer, args.unit) model = ClassifierWithState(rnn) if args.ngpu > 1: logging.warn("currently, multi-gpu is not supported. use single gpu.") if args.ngpu > 0: # Make the specified GPU current gpu_id = 0 chainer.cuda.get_device_from_id(gpu_id).use() model.to_gpu() else: gpu_id = -1 # Save model conf to json model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to ' + model_conf) f.write( json.dumps(vars(args), indent=4, sort_keys=True).encode('utf_8')) # Set up an optimizer if args.opt == 'sgd': optimizer = chainer.optimizers.SGD(lr=1.0) elif args.opt == 'adam': optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) updater = BPTTUpdater(train_iter, optimizer, gpu_id) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir) trainer.extend(LMEvaluator(val_iter, model, device=gpu_id)) trainer.extend( extensions.LogReport(postprocess=compute_perplexity, trigger=(REPORT_INTERVAL, 'iteration'))) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'perplexity', 'val_perplexity', 'elapsed_time']), trigger=(REPORT_INTERVAL, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL)) trainer.extend( extensions.snapshot(filename='snapshot.ep.{.updater.epoch}')) trainer.extend( extensions.snapshot_object(model, 'rnnlm.model.{.updater.epoch}')) # MEMO(Hori): wants to use MinValueTrigger, but it seems to fail in resuming trainer.extend( MakeSymlinkToBestModel('validation/main/loss', 'rnnlm.model')) if args.resume: logging.info('resumed from %s' % args.resume) chainer.serializers.load_npz(args.resume, trainer) trainer.run() # compute perplexity for test set if args.test_label: logging.info('test the best model') chainer.serializers.load_npz(args.outdir + '/rnnlm.model.best', model) test = read_tokens(args.test_label, args.char_list_dict) n_test_tokens, n_test_oovs = count_tokens(test, unk) logging.info('#sentences in the test data = ' + str(len(test))) logging.info('#tokens in the test data = ' + str(n_test_tokens)) logging.info('oov rate in the test data = %.2f %%' % (n_test_oovs / n_test_tokens * 100)) test_iter = ParallelSentenceIterator(test, args.batchsize, max_length=args.maxlen, sos=eos, eos=eos, repeat=False) evaluator = LMEvaluator(test_iter, model, device=gpu_id) with chainer.using_config('train', False): result = evaluator() logging.info('test perplexity: ' + str(np.exp(float(result['main/loss']))))
def train(args): """Train with the given args. :param Namespace args: The program arguments :param type model_class: LMInterface class for training """ model_class = dynamic_import_lm(args.model_module, args.backend) assert issubclass(model_class, LMInterface), "model should implement LMInterface" # display torch version logging.info('torch version = ' + torch.__version__) set_deterministic_pytorch(args) # check cuda and cudnn availability if not torch.cuda.is_available(): logging.warning('cuda is not available') # get special label ids unk = args.char_list_dict['<unk>'] eos = args.char_list_dict['<eos>'] # read tokens as a sequence of sentences val, n_val_tokens, n_val_oovs = load_dataset(args.valid_label, args.char_list_dict, args.dump_hdf5_path) train, n_train_tokens, n_train_oovs = load_dataset(args.train_label, args.char_list_dict, args.dump_hdf5_path) logging.info('#vocab = ' + str(args.n_vocab)) logging.info('#sentences in the training data = ' + str(len(train))) logging.info('#tokens in the training data = ' + str(n_train_tokens)) logging.info('oov rate in the training data = %.2f %%' % (n_train_oovs / n_train_tokens * 100)) logging.info('#sentences in the validation data = ' + str(len(val))) logging.info('#tokens in the validation data = ' + str(n_val_tokens)) logging.info('oov rate in the validation data = %.2f %%' % (n_val_oovs / n_val_tokens * 100)) use_sortagrad = args.sortagrad == -1 or args.sortagrad > 0 # Create the dataset iterators batch_size = args.batchsize * max(args.ngpu, 1) if batch_size * args.accum_grad > args.batchsize: logging.info( f'batch size is automatically increased ({args.batchsize} -> {batch_size * args.accum_grad})' ) train_iter = ParallelSentenceIterator(train, batch_size, max_length=args.maxlen, sos=eos, eos=eos, shuffle=not use_sortagrad) val_iter = ParallelSentenceIterator(val, batch_size, max_length=args.maxlen, sos=eos, eos=eos, repeat=False) epoch_iters = int(len(train_iter.batch_indices) / args.accum_grad) logging.info('#iterations per epoch = %d' % epoch_iters) logging.info('#total iterations = ' + str(args.epoch * epoch_iters)) # Prepare an RNNLM model if args.train_dtype in ("float16", "float32", "float64"): dtype = getattr(torch, args.train_dtype) else: dtype = torch.float32 model = model_class(args.n_vocab, args).to(dtype=dtype) if args.ngpu > 0: model.to("cuda") gpu_id = list(range(args.ngpu)) else: gpu_id = [-1] # Save model conf to json model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to ' + model_conf) f.write( json.dumps(vars(args), indent=4, ensure_ascii=False, sort_keys=True).encode('utf_8')) # Set up an optimizer opt_class = dynamic_import_optimizer(args.opt, args.backend) optimizer = opt_class.from_args(model.parameters(), args) if args.schedulers is None: schedulers = [] else: schedulers = [ dynamic_import_scheduler(v)(k, args) for k, v in args.schedulers ] # setup apex.amp if args.train_dtype in ("O0", "O1", "O2", "O3"): try: from apex import amp except ImportError as e: logging.error( f"You need to install apex for --train-dtype {args.train_dtype}. " "See https://github.com/NVIDIA/apex#linux") raise e model, optimizer = amp.initialize(model, optimizer, opt_level=args.train_dtype) use_apex = True else: use_apex = False # FIXME: TOO DIRTY HACK reporter = Reporter() setattr(model, "reporter", reporter) setattr(optimizer, "target", reporter) setattr(optimizer, "serialize", lambda s: reporter.serialize(s)) print('----------------------', gpu_id[0]) updater = BPTTUpdater(train_iter, model, optimizer, schedulers, gpu_id, gradclip=args.gradclip, use_apex=use_apex, accum_grad=args.accum_grad) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir) trainer.extend(LMEvaluator(val_iter, model, reporter, device=gpu_id)) trainer.extend( extensions.LogReport(postprocess=compute_perplexity, trigger=(args.report_interval_iters, 'iteration'))) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'perplexity', 'val_perplexity', 'elapsed_time' ]), trigger=(args.report_interval_iters, 'iteration')) trainer.extend( extensions.ProgressBar(update_interval=args.report_interval_iters)) # Save best models trainer.extend(torch_snapshot(filename='snapshot.ep.{.updater.epoch}')) trainer.extend(snapshot_object(model, 'rnnlm.model.{.updater.epoch}')) # T.Hori: MinValueTrigger should be used, but it fails when resuming trainer.extend( MakeSymlinkToBestModel('validation/main/loss', 'rnnlm.model')) if use_sortagrad: trainer.extend( ShufflingEnabler([train_iter]), trigger=(args.sortagrad if args.sortagrad != -1 else args.epoch, 'epoch')) if args.resume: logging.info('resumed from %s' % args.resume) torch_resume(args.resume, trainer) set_early_stop(trainer, args, is_lm=True) if args.tensorboard_dir is not None and args.tensorboard_dir != "": writer = SummaryWriter(args.tensorboard_dir) trainer.extend(TensorboardLogger(writer), trigger=(args.report_interval_iters, 'iteration')) trainer.run() check_early_stop(trainer, args.epoch) # compute perplexity for test set if args.test_label: logging.info('test the best model') torch_load(args.outdir + '/rnnlm.model.best', model) test = read_tokens(args.test_label, args.char_list_dict) n_test_tokens, n_test_oovs = count_tokens(test, unk) logging.info('#sentences in the test data = ' + str(len(test))) logging.info('#tokens in the test data = ' + str(n_test_tokens)) logging.info('oov rate in the test data = %.2f %%' % (n_test_oovs / n_test_tokens * 100)) test_iter = ParallelSentenceIterator(test, batch_size, max_length=args.maxlen, sos=eos, eos=eos, repeat=False) evaluator = LMEvaluator(test_iter, model, reporter, device=gpu_id) result = evaluator() compute_perplexity(result) logging.info(f"test perplexity: {result['perplexity']}")
def train(args): """Train with the given args. :param Namespace args: The program arguments :param type model_class: LMInterface class for training """ model_class = dynamic_import_lm(args.model_module, args.backend) assert issubclass(model_class, LMInterface), "model should implement LMInterface" # display torch version logging.info('torch version = ' + torch.__version__) set_deterministic_pytorch(args) # check cuda and cudnn availability if not torch.cuda.is_available(): logging.warning('cuda is not available') # get special label ids unk = args.char_list_dict['<unk>'] eos = args.char_list_dict['<eos>'] # read tokens as a sequence of sentences val, n_val_tokens, n_val_oovs = load_dataset(args.valid_label, args.char_list_dict, args.dump_hdf5_path) train, n_train_tokens, n_train_oovs = load_dataset(args.train_label, args.char_list_dict, args.dump_hdf5_path) logging.info('#vocab = ' + str(args.n_vocab)) logging.info('#sentences in the training data = ' + str(len(train))) logging.info('#tokens in the training data = ' + str(n_train_tokens)) logging.info('oov rate in the training data = %.2f %%' % (n_train_oovs / n_train_tokens * 100)) logging.info('#sentences in the validation data = ' + str(len(val))) logging.info('#tokens in the validation data = ' + str(n_val_tokens)) logging.info('oov rate in the validation data = %.2f %%' % (n_val_oovs / n_val_tokens * 100)) use_sortagrad = args.sortagrad == -1 or args.sortagrad > 0 # Create the dataset iterators batch_size = args.batchsize * max(args.ngpu, 1) if batch_size > args.batchsize: logging.info(f'batch size is automatically increased ({args.batchsize} -> {batch_size})') train_iter = ParallelSentenceIterator(train, batch_size, max_length=args.maxlen, sos=eos, eos=eos, shuffle=not use_sortagrad) val_iter = ParallelSentenceIterator(val, batch_size, max_length=args.maxlen, sos=eos, eos=eos, repeat=False) logging.info('#iterations per epoch = ' + str(len(train_iter.batch_indices))) logging.info('#total iterations = ' + str(args.epoch * len(train_iter.batch_indices))) # Prepare an RNNLM model model = model_class(args.n_vocab, args) reporter = Reporter() if args.ngpu > 0: model = torch.nn.DataParallel(model, device_ids=list(range(args.ngpu))).cuda() gpu_id = 0 else: gpu_id = -1 setattr(model, "reporter", reporter) # Save model conf to json model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to ' + model_conf) f.write(json.dumps(vars(args), indent=4, ensure_ascii=False, sort_keys=True).encode('utf_8')) # Set up an optimizer if args.opt == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=1.0) elif args.opt == 'adam': optimizer = torch.optim.Adam(model.parameters()) # FIXME: TOO DIRTY HACK setattr(optimizer, "target", reporter) setattr(optimizer, "serialize", lambda s: reporter.serialize(s)) updater = BPTTUpdater(train_iter, model, optimizer, gpu_id, gradclip=args.gradclip) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir) trainer.extend(LMEvaluator(val_iter, model, reporter, device=gpu_id)) trainer.extend(extensions.LogReport(postprocess=compute_perplexity, trigger=(args.report_interval_iters, 'iteration'))) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'main/loss', 'perplexity', 'val_perplexity', 'elapsed_time'] ), trigger=(args.report_interval_iters, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=args.report_interval_iters)) # Save best models trainer.extend(torch_snapshot(filename='snapshot.ep.{.updater.epoch}')) trainer.extend(snapshot_object(model, 'rnnlm.model.{.updater.epoch}')) # T.Hori: MinValueTrigger should be used, but it fails when resuming trainer.extend(MakeSymlinkToBestModel('validation/main/loss', 'rnnlm.model')) if use_sortagrad: trainer.extend(ShufflingEnabler([train_iter]), trigger=(args.sortagrad if args.sortagrad != -1 else args.epoch, 'epoch')) if args.resume: logging.info('resumed from %s' % args.resume) torch_resume(args.resume, trainer) set_early_stop(trainer, args, is_lm=True) if args.tensorboard_dir is not None and args.tensorboard_dir != "": writer = SummaryWriter(args.tensorboard_dir) trainer.extend(TensorboardLogger(writer), trigger=(args.report_interval_iters, 'iteration')) trainer.run() check_early_stop(trainer, args.epoch) # compute perplexity for test set if args.test_label: logging.info('test the best model') torch_load(args.outdir + '/rnnlm.model.best', model) test = read_tokens(args.test_label, args.char_list_dict) n_test_tokens, n_test_oovs = count_tokens(test, unk) logging.info('#sentences in the test data = ' + str(len(test))) logging.info('#tokens in the test data = ' + str(n_test_tokens)) logging.info('oov rate in the test data = %.2f %%' % (n_test_oovs / n_test_tokens * 100)) test_iter = ParallelSentenceIterator(test, batch_size, max_length=args.maxlen, sos=eos, eos=eos, repeat=False) evaluator = LMEvaluator(test_iter, model, reporter, device=gpu_id) result = evaluator() compute_perplexity(result) logging.info(f"test perplexity: {result['perplexity']}")