def train(self, model, data, num_epochs=5, resume=False, dev_data=None, optimizer=None, teacher_forcing_ratio=0): """ Run training for a given model. Args: model (seq2seq.models): model to run training on, if `resume=True`, it would be overwritten by the model loaded from the latest checkpoint. data (seq2seq.dataset.dataset.Dataset): dataset object to train on num_epochs (int, optional): number of epochs to run (default 5) resume(bool, optional): resume training with the latest checkpoint, (default False) dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None) optimizer (seq2seq.optim.Optimizer, optional): optimizer for training (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5)) teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0) Returns: model (seq2seq.models): trained model. """ # If training is set to resume if resume: latest_checkpoint_path = Checkpoint.get_latest_checkpoint( self.expt_dir) resume_checkpoint = Checkpoint.load(latest_checkpoint_path) model = resume_checkpoint.model self.optimizer = resume_checkpoint.optimizer # A walk around to set optimizing parameters properly resume_optim = self.optimizer.optimizer defaults = resume_optim.param_groups[0] defaults.pop('params', None) defaults.pop('initial_lr', None) self.optimizer.optimizer = resume_optim.__class__( model.parameters(), **defaults) start_epoch = resume_checkpoint.epoch step = resume_checkpoint.step else: start_epoch = 1 step = 0 if optimizer is None: optimizer = Optimizer(optim.Adam(model.parameters()), max_grad_norm=5) self.optimizer = optimizer self.logger.info("Optimizer: %s, Scheduler: %s" % (self.optimizer.optimizer, self.optimizer.scheduler)) self._train_epoches(data, model, num_epochs, start_epoch, step, dev_data=dev_data, teacher_forcing_ratio=teacher_forcing_ratio) return model
def train(self, train, val=None, num_epoch=200, resume=False): start_epoch = 0 if resume: cp = Checkpoint.load( Checkpoint.get_latest_checkpoint('./experiment/gan')) self.model = cp.model start_epoch = cp.epoch + 1 for epoch in range(start_epoch, num_epoch): logging.info('Epoch[%d] CycleGAN train' % epoch) train_iter, val_iter = torchtext.data.BucketIterator.splits( (train, val), batch_sizes=(1, 64), device=self.device, sort_key=lambda x: len(x.real_a), repeat=False) self._train_epoch(train_iter) self.evaluate(val_iter) Checkpoint(model=self.model, optimizer=None, epoch=epoch, step=0, input_vocab=None, output_vocab=None).save('./experiment/gan')
def train(self, D, G, optimizer_D, optimizer_G, train, val=None, num_epoch=200, resume=False, opt=None): start_epoch = 0 if resume: cp = Checkpoint.load( Checkpoint.get_latest_checkpoint('./experiment/gan')) self.model = cp.model start_epoch = cp.epoch + 1 for epoch in range(start_epoch, num_epoch): logging.info('Epoch[%d] CycleGAN train' % epoch) train_iter, val_iter = torchtext.data.BucketIterator.splits( (train, val), batch_sizes=(1, 64), device=opt.device, sort_key=lambda x: len(x.real_a), repeat=False) self.train_epoch(D, G, optimizer_D, optimizer_G, train_iter)
def test_save_checkpoint_calls_torch_save(self, mock_open, mock_dill, mock_torch): epoch = 5 step = 10 optim = mock.Mock() state_dict = {'epoch': epoch, 'step': step, 'optimizer': optim} mock_model = mock.Mock() mock_vocab = mock.Mock() mock_open.return_value = mock.MagicMock() chk_point = Checkpoint(model=mock_model, optimizer=optim, epoch=epoch, step=step, input_vocab=mock_vocab, output_vocab=mock_vocab) path = chk_point.save(self._get_experiment_dir()) self.assertEquals(2, mock_torch.save.call_count) mock_torch.save.assert_any_call( state_dict, os.path.join(chk_point.path, Checkpoint.TRAINER_STATE_NAME)) mock_torch.save.assert_any_call( mock_model, os.path.join(chk_point.path, Checkpoint.MODEL_NAME)) self.assertEquals(2, mock_open.call_count) mock_open.assert_any_call( os.path.join(path, Checkpoint.INPUT_VOCAB_FILE), ANY) mock_open.assert_any_call( os.path.join(path, Checkpoint.OUTPUT_VOCAB_FILE), ANY) self.assertEquals(2, mock_dill.dump.call_count) mock_dill.dump.assert_any_call( mock_vocab, mock_open.return_value.__enter__.return_value)
def test(self, args, model, dataloader, scheduler, num_epochs=5, resume=False, dev_data=None, optimizer=None, teacher_forcing_ratio=1.0, save_dir='runs/exp'): # If training is set to resume if resume: latest_checkpoint_path = Checkpoint.get_latest_checkpoint(self.model_dir) resume_checkpoint = Checkpoint.load(latest_checkpoint_path) # model = resume_checkpoint.model model.load_state_dict(resume_checkpoint.model) self.optimizer = None self.args = args model.args = args start_epoch = resume_checkpoint.epoch step = resume_checkpoint.step print('Resume from ', latest_checkpoint_path) print('start_epoch : ', start_epoch) print('step : ', step) start_epoch = 1 step = 0 else: print('Please Resume !') raise self._test_epoches(dataloader, model, num_epochs, start_epoch, step, dev_data=dev_data, teacher_forcing_ratio=teacher_forcing_ratio, clip=args.clip, save_dir=save_dir, args=args) return model
def test(self, args, model, dataloader, resume=False, save_dir='runs/exp'): # If training is set to resume if resume: latest_checkpoint_path = Checkpoint.get_latest_checkpoint( self.model_dir) resume_checkpoint = Checkpoint.load(latest_checkpoint_path) model.load_state_dict(resume_checkpoint.model) self.optimizer = None self.args = args model.args = args start_epoch = resume_checkpoint.epoch step = resume_checkpoint.step print('Resume from ', latest_checkpoint_path) print('start_epoch : ', start_epoch) print('step : ', step) start_epoch = 1 step = 0 else: print('Please Resume !') raise if args.mode == 'bleu_t1t2': print('Eval on bleu_t1&t2 !') self._test_epoches_t1t2(dataloader, model, step, save_dir=save_dir, args=args) else: print('Please choose t1 | t2 mode !') raise return model
def load_checkpoint(checkpoint_name, expt_dir): if checkpoint_name is not None: logging.info("loading checkpoint from {}".format( os.path.join(expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, checkpoint_name))) checkpoint_path = os.path.join(expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, checkpoint_name) else: checkpoint_path = Checkpoint.get_latest_checkpoint(expt_dir) return Checkpoint.load(checkpoint_path)
def __init__(self, ckpt_path='./Res/PretrainModel/2019_12_27_08_48_21/'): checkpoint = Checkpoint.load(ckpt_path) self.seq2seq = checkpoint.model self.input_vocab = checkpoint.input_vocab self.output_vocab = checkpoint.output_vocab self.predictor = Predictor(self.seq2seq, self.input_vocab, self.output_vocab)
def train(self, model, data, num_epochs=5, resume=False, dev_data=None, monitor_data={}, optimizer=None, teacher_forcing_ratio=0, learning_rate=0.001, checkpoint_path=None, top_k=5): """ Run training for a given model. Args: model (seq2seq.models): model to run training on, if `resume=True`, it would be overwritten by the model loaded from the latest checkpoint. data (seq2seq.dataset.dataset.Dataset): dataset object to train on num_epochs (int, optional): number of epochs to run (default 5) resume(bool, optional): resume training with the latest checkpoint, (default False) dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None) optimizer (seq2seq.optim.Optimizer, optional): optimizer for training (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5)) teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0) learing_rate (float, optional): learning rate used by the optimizer (default 0.001) checkpoint_path (str, optional): path to load checkpoint from in case training should be resumed top_k (int): how many models should be stored during training Returns: model (seq2seq.models): trained model. """ # If training is set to resume if resume: resume_checkpoint = Checkpoint.load(checkpoint_path) model = resume_checkpoint.model self.optimizer = resume_checkpoint.optimizer # A walk around to set optimizing parameters properly resume_optim = self.optimizer.optimizer defaults = resume_optim.param_groups[0] defaults.pop('params', None) defaults.pop('initial_lr', None) self.optimizer.optimizer = resume_optim.__class__(model.parameters(), **defaults) start_epoch = resume_checkpoint.epoch step = resume_checkpoint.step else: start_epoch = 1 step = 0 def get_optim(optim_name): optims = {'adam': optim.Adam, 'adagrad': optim.Adagrad, 'adadelta': optim.Adadelta, 'adamax': optim.Adamax, 'rmsprop': optim.RMSprop, 'sgd': optim.SGD, None:optim.Adam} return optims[optim_name] self.optimizer = Optimizer(get_optim(optimizer)(model.parameters(), lr=learning_rate), max_grad_norm=5) self.logger.info("Optimizer: %s, Scheduler: %s" % (self.optimizer.optimizer, self.optimizer.scheduler)) logs = self._train_epoches(data, model, num_epochs, start_epoch, step, dev_data=dev_data, monitor_data=monitor_data, teacher_forcing_ratio=teacher_forcing_ratio, top_k=top_k) return model, logs
def predict_with_checkpoint(checkpoint_path, sequence, hierarchial = False, remote = None, word_vectors = None): checkpoint = Checkpoint.load(checkpoint_path) seq2seq = checkpoint.model input_vocab = checkpoint.input_vocab output_vocab = checkpoint.output_vocab seq2seq.encoder.word_vectors, seq2seq.decoder.word_vectors = None, None if word_vectors != None: input_vects = Word2Vectors(input_vocab, word_vectors, word_vectors.dim_size) output_vects = Word2Vectors(output_vocab, word_vectors, word_vectors.dim_size) seq2seq.encoder.word_vectors, seq2seq.decoder.word_vectors = input_vects, output_vects seq2seq.decoder = TopKDecoder(seq2seq.decoder, 5) if not hierarchial: predictor = Predictor(seq2seq, input_vocab, output_vocab) seq = sequence.strip().split() else: predictor = HierarchialPredictor(seq2seq, input_vocab, output_vocab) seq = ['|'.join(x.split()) for x in sequence] return ' '.join(predictor.predict(seq))
def __init__(self, checkpoint, is_predict_eos=True, is_symbol_rewriting=False): if isinstance(checkpoint, str): checkpoint = Checkpoint.load(checkpoint) self.is_predict_eos = is_predict_eos self.tabular_data_fields = get_tabular_data_fields( is_predict_eos=self.is_predict_eos) dic_data_fields = dict(self.tabular_data_fields) src = dic_data_fields["src"] tgt = dic_data_fields["tgt"] src.vocab = checkpoint.input_vocab tgt.vocab = checkpoint.output_vocab tgt.eos_id = tgt.vocab.stoi[tgt.SYM_EOS] tgt.sos_id = tgt.vocab.stoi[tgt.SYM_SOS] if is_symbol_rewriting: metric_names = ["symbol rewriting accuracy"] elif self.is_predict_eos: metric_names = ["word accuracy", "sequence accuracy"] else: metric_names = ["word accuracy", "final target accuracy"] self.metrics = get_metrics(metric_names, src, tgt, self.is_predict_eos) self.tgt = tgt self.src = src
def on_batch_end(self, batch, info=None): # this check is also hacky, occurs also in # supervised trainer to not compute the dev # loss too often if info['step'] % self.checkpoint_every == 0 or \ info['step'] == info['total_steps']: total_loss, log_msg, model_name = \ self.get_losses(info['losses'], info['metrics'], info['step']) max_eval_loss = max(self.loss_best) if total_loss < max_eval_loss: index_max = self.loss_best.index(max_eval_loss) # rm prev model if self.best_checkpoints[index_max] is not None: shutil.rmtree( os.path.join(self.expt_dir, self.best_checkpoints[index_max])) self.best_checkpoints[index_max] = model_name self.loss_best[index_max] = total_loss # save model Checkpoint( model=info['model'], optimizer=self.trainer.optimizer, epoch=info['epoch'], step=info['step'], input_vocab=self.data.fields[seq2seq.src_field_name].vocab, output_vocab=self.data.fields[ seq2seq.tgt_field_name].vocab).save(self.expt_dir, name=model_name)
def dev_predict(task_path, src_str, is_plot=True): """Helper used to visualize and understand why and what the model predicts. Args: task_path (str): path to the saved task directory containing, amongst other, the model. src_str (str): source sentence that will be used to predict. is_plot (bool, optional): whether to plots the attention pattern. Returns: out_words (list): decoder predictions. other (dictionary): additional information used for predictions. test (dictionary): additional information that is only stored in dev mode. These can include temporary variables that do not have to be stored in `other` but that can still be interesting to inspect. """ check = Checkpoint.load(task_path) check.model.set_dev_mode() predictor = Predictor(check.model, check.input_vocab, check.output_vocab) out_words, other = predictor.predict(src_str.split()) test = dict() for k, v in other["test"].items(): tensor = v if isinstance(v, torch.Tensor) else torch.cat(v) test[k] = tensor.detach().cpu().numpy().squeeze()[:other["length"][0]] # except: # for using "step" # test[k] = v if is_plot: visualizer = AttentionVisualizer(task_path) visualizer(src_str) return out_words, other, test
def _evaluate(checkpoint_path, test_paths, metric_names=[ "word accuracy", "sequence accuracy", "final target accuracy" ], loss_names=["nll"], max_len=50, batch_size=32, is_predict_eos=True, content_method=None): """Evaluates the models saved in a checkpoint.""" results = [] print("loading checkpoint from {}".format(os.path.join(checkpoint_path))) checkpoint = Checkpoint.load(checkpoint_path) seq2seq = checkpoint.model tabular_data_fields = get_tabular_data_fields( content_method=content_method, is_predict_eos=is_predict_eos) dic_data_fields = dict(tabular_data_fields) src = dic_data_fields["src"] tgt = dic_data_fields["tgt"] src.vocab = checkpoint.input_vocab tgt.vocab = checkpoint.output_vocab tgt.eos_id = tgt.vocab.stoi[tgt.SYM_EOS] tgt.sos_id = tgt.vocab.stoi[tgt.SYM_SOS] for test_path in test_paths: test = get_data(test_path, max_len, tabular_data_fields) metrics = get_metrics(metric_names, src, tgt, is_predict_eos) losses, loss_weights = get_losses(loss_names, tgt, is_predict_eos) evaluator = Evaluator(loss=losses, batch_size=batch_size, metrics=metrics) data_func = SupervisedTrainer.get_batch_data losses, metrics = evaluator.evaluate(model=seq2seq, data=test, get_batch_data=data_func) total_loss, log_msg, _ = SupervisedTrainer.get_losses( losses, metrics, 0) dataset = test_path.split('/')[-1].split('.')[0] results.append([dataset, total_loss] + [metric.get_val() for metric in metrics]) results_df = pd.DataFrame(results, columns=["Dataset", "Loss"] + [metric.name for metric in metrics]) results_df = results_df.melt(id_vars=['Dataset'], var_name="Metric", value_name='Value') return results_df
def test(): checkpoint_path = 'model' checkpoint = Checkpoint.load(checkpoint_path) model = checkpoint.model input_vocab = checkpoint.input_vocab output_vocab = checkpoint.output_vocab predictor = KinasePredictor(model, input_vocab, output_vocab) prediction(predictor, output_vocab)
def test_get_latest_checkpoint(self, mock_listdir): mock_listdir.return_value = ['2017_05_22_09_47_26', '2017_05_22_09_47_31', '2017_05_23_10_47_29'] latest_checkpoint = Checkpoint.get_latest_checkpoint(self.EXP_DIR) self.assertEquals(latest_checkpoint, os.path.join(self.EXP_DIR, 'checkpoints/2017_05_23_10_47_29'))
def load_model(expt_dir, model_name): checkpoint_path = os.path.join(expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, model_name) checkpoint = Checkpoint.load(checkpoint_path) model = checkpoint.model input_vocab = checkpoint.input_vocab output_vocab = checkpoint.output_vocab return model, input_vocab, output_vocab
def train(self, encoder, decoder, data, num_epochs=5, resume=False, dev_data=None, optimizer=None, is_training=0): if resume: latest_checkpoint_path = Checkpoint.get_latest_checkpoint( self.expt_dir) resume_checkpoint = Checkpoint.load(latest_checkpoint_path) decoder = resume_checkpoint.model self.optimizer = resume_checkpoint.optimizer # A walk around to set optimizing parameters properly resume_optim = self.optimizer.optimizer defaults = resume_optim.param_groups[0] defaults.pop('params', None) defaults.pop('initial_lr', None) self.optimizer.optimizer = resume_optim.__class__( decoder.parameters(), **defaults) start_epoch = resume_checkpoint.epoch step = resume_checkpoint.step else: start_epoch = 1 step = 0 if optimizer is None: optimizer = Optimizer(optim.Adam(decoder.parameters()), max_grad_norm=5) self.optimizer = optimizer self.logger.info("Optimizer: %s, Scheduler: %s" % (self.optimizer.optimizer, self.optimizer.scheduler)) self._train_epoches(data, encoder, decoder, num_epochs, start_epoch, step, dev_data=dev_data, is_training=is_training) return decoder
def train(self, model, data, num_epochs=5, resume=False, dev_data=None, optimizer=None, teacher_forcing_ratio=0): """ Run training for a given model. Args: model (seq2seq.models): model to run training on, if `resume=True`, it would be overwritten by the model loaded from the latest checkpoint. data (seq2seq.dataset.dataset.Dataset): dataset object to train on num_epochs (int, optional): number of epochs to run (default 5) resume(bool, optional): resume training with the latest checkpoint, (default False) dev_data (seq2seq.dataset.dataset.Dataset, optional): dev Dataset (default None) optimizer (seq2seq.optim.Optimizer, optional): optimizer for training (default: Optimizer(pytorch.optim.Adam, max_grad_norm=5)) teacher_forcing_ratio (float, optional): teaching forcing ratio (default 0) Returns: model (seq2seq.models): trained model. """ # If training is set to resume if resume: latest_checkpoint_path = Checkpoint.get_latest_checkpoint(self.expt_dir) resume_checkpoint = Checkpoint.load(latest_checkpoint_path) model = resume_checkpoint.model self.optimizer = resume_checkpoint.optimizer # A walk around to set optimizing parameters properly resume_optim = self.optimizer.optimizer defaults = resume_optim.param_groups[0] defaults.pop('params', None) self.optimizer.optimizer = resume_optim.__class__(model.parameters(), **defaults) start_epoch = resume_checkpoint.epoch step = resume_checkpoint.step else: start_epoch = 1 step = 0 if optimizer is None: optimizer = Optimizer(optim.Adam(model.parameters()), max_grad_norm=5) self.optimizer = optimizer self.logger.info("Optimizer: %s, Scheduler: %s" % (self.optimizer.optimizer, self.optimizer.scheduler)) self._train_epoches(data, model, num_epochs, start_epoch, step, dev_data=dev_data, teacher_forcing_ratio=teacher_forcing_ratio) return model
def train(self, args, model, dataloader, scheduler, num_epochs=5, resume=False, dev_data=None, optimizer=None, teacher_forcing_ratio=1.0, save_dir='runs/exp'): # If training is set to resume if resume: latest_checkpoint_path = Checkpoint.get_latest_checkpoint(self.model_dir) resume_checkpoint = Checkpoint.load(latest_checkpoint_path) # model = resume_checkpoint.model model.load_state_dict(resume_checkpoint.model) self.optimizer = optimizer self.args = args model.args = args start_epoch = resume_checkpoint.epoch step = resume_checkpoint.step print('Resume from ', latest_checkpoint_path) print('start_epoch : ', start_epoch) print('step : ', step) if args.adam: self.optimizer = torch.optim.Adam(model.parameters()) optimizer.load_state_dict(resume_checkpoint.optimizer) elif args.sgd: self.optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) optimizer.load_state_dict(resume_checkpoint.optimizer) self.scheduler = lr_scheduler.StepLR(self.optimizer, args.decay_steps, gamma=args.decay_factor) # for i in range(step): # self.scheduler.step() self.scheduler._step_count = step for param_group in self.optimizer.param_groups: print('learning rate', param_group['lr'], step) else: start_epoch = 1 step = 0 if optimizer is None: optimizer = Optimizer(optim.Adam(model.parameters()), max_grad_norm=5) self.optimizer = optimizer self.scheduler = scheduler # self.logger.info("Optimizer: %s, Scheduler: %s" % (self.optimizer.optimizer, self.optimizer.scheduler)) if args.only_sql: self._train_sql_epoches(dataloader, model, num_epochs, start_epoch, step, dev_data=dev_data, teacher_forcing_ratio=teacher_forcing_ratio, clip=args.clip, save_dir=save_dir, args=args) else: self._train_epoches(dataloader, model, num_epochs, start_epoch, step, dev_data=dev_data, teacher_forcing_ratio=teacher_forcing_ratio, clip=args.clip, save_dir=save_dir, args=args) return model
def test_save_checkpoint_saves_vocab_if_not_exist(self, mock_torch, mock_os_path_isfile): epoch = 5 step = 10 model_dict = {"key1": "val1"} opt_dict = {"key2": "val2"} mock_model = mock.Mock() mock_model.state_dict.return_value = model_dict input_vocab = mock.Mock() output_vocab = mock.Mock() chk_point = Checkpoint(model=mock_model, optimizer_state_dict=opt_dict, epoch=epoch, step=step, input_vocab=input_vocab, output_vocab=output_vocab) chk_point.save(self._get_experiment_dir()) input_vocab.save.assert_called_once_with(os.path.join(chk_point.path, "input_vocab.pt")) output_vocab.save.assert_called_once_with(os.path.join(chk_point.path, "output_vocab.pt"))
def test_save_checkpoint_calls_torch_save(self, mock_torch): epoch = 5 step = 10 opt_state_dict = {"key2": "val2"} state_dict = {'epoch': epoch, 'step': step, 'optimizer': opt_state_dict} mock_model = mock.Mock() chk_point = Checkpoint(model=mock_model, optimizer_state_dict=opt_state_dict, epoch=epoch, step=step, input_vocab=mock.Mock(), output_vocab=mock.Mock()) chk_point.save(self._get_experiment_dir()) self.assertEquals(2, mock_torch.save.call_count) mock_torch.save.assert_any_call(state_dict, os.path.join(chk_point.path, Checkpoint.TRAINER_STATE_NAME)) mock_torch.save.assert_any_call(mock_model, os.path.join(chk_point.path, Checkpoint.MODEL_NAME))
def load_model(expt_dir, model_name): checkpoint_path = os.path.join(expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, model_name) checkpoint = Checkpoint.load(checkpoint_path) seq2seq = checkpoint.model input_vocab = checkpoint.input_vocab output_vocab = checkpoint.output_vocab seq2seq.eval() return seq2seq, input_vocab, output_vocab
def init_model(): if args.load_checkpoint is not None: logging.info("loading checkpoint from {}".format( os.path.join(args.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, args.load_checkpoint))) checkpoint_path = os.path.join(args.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, args.load_checkpoint) checkpoint = Checkpoint.load(checkpoint_path) model = checkpoint.model input_vocab = checkpoint.input_vocab output_vocab = checkpoint.output_vocab else: # build the vocabulary index and embedding spk.build_vocab(train, vectors="glove.6B.100d") src.build_vocab(train, max_size=args.vocab_size, vectors="glove.6B.100d") tgt.build_vocab(train, max_size=args.vocab_size, vectors="glove.6B.100d") input_vocab, output_vocab = src.vocab, tgt.vocab # Initialize model encoder = EncoderRNN( vocab_size=len(input_vocab), max_len=args.max_len, vectors=input_vocab.vectors if args.embedding else None, input_dropout_p=args.input_dropout_p, dropout_p=args.dropout_p, hidden_size=args.hidden_size, bidirectional=args.bidirectional, variable_lengths=True) decoder = SpkDecoderRNN( num_spk=args.num_spk, spk_embed_size=args.spk_embed_size, vocab_size=len(output_vocab), max_len=args.max_len, hidden_size=args.hidden_size * 2 if args.bidirectional else args.hidden_size, dropout_p=args.dropout_p, input_dropout_p=args.input_dropout_p, vectors=input_vocab.vectors if args.embedding else None, use_attention=True, bidirectional=args.bidirectional, eos_id=tgt.eos_id, sos_id=tgt.sos_id) model = SpkSeq2seq(encoder, decoder) if torch.cuda.is_available(): model.cuda() for param in model.parameters(): param.data.uniform_(-0.08, 0.08) return model, input_vocab, output_vocab
def _train(self, model, training_data, validation_data, crit, optimizer, opt, TGT_FIELD): ''' Start training ''' log_train_file = None log_valid_file = None if opt.log: log_train_file = opt.log + '.train.log' log_valid_file = opt.log + '.valid.log' print('[Info] Training performance will be written to file: {} and {}'.format( log_train_file, log_valid_file)) with open(log_train_file, 'w') as log_tf, open(log_valid_file, 'w') as log_vf: log_tf.write('epoch,loss,ppl,accuracy\n') log_vf.write('epoch,loss,ppl,accuracy\n') valid_accus = [] for epoch_i in range(opt.epoch): print('[ Epoch', epoch_i, ']') train_iter, val_iter = torchtext.data.BucketIterator.splits( (training_data, validation_data), batch_sizes=(opt.batch_size, opt.batch_size), device=opt.device, sort_key=lambda x: len(x.src), repeat=False) start = time.time() train_loss, train_accu = self.train_epoch(model, train_iter, crit, optimizer) print(' - (Training) ppl: {ppl: 8.5f}, accuracy: {accu:3.3f} %, ' \ 'elapse: {elapse:3.3f} min'.format( ppl=math.exp(min(train_loss, 100)), accu=100 * train_accu, elapse=(time.time() - start) / 60)) start = time.time() valid_loss, valid_accu, bleu = self.evaluate(model, val_iter, crit, TGT_FIELD) print(' - (Validation) ppl: %8.5f, accuracy: %3.3f%%, BLEU %2.2f, elapse: %3.3f min' % ( math.exp(min(valid_loss, 100)), 100 * valid_accu, bleu, (time.time() - start) / 60)) valid_accus += [valid_accu] # save model Checkpoint(model=model, optimizer=None, epoch=epoch_i, step=0, input_vocab=None, output_vocab=None).save('./experiment/transformer') if log_train_file and log_valid_file: with open(log_train_file, 'a') as log_tf, open(log_valid_file, 'a') as log_vf: log_tf.write('{epoch},{loss: 8.5f},{ppl: 8.5f},{accu:3.3f}\n'.format( epoch=epoch_i, loss=train_loss, ppl=math.exp(min(train_loss, 100)), accu=100 * train_accu)) log_vf.write('{epoch},{loss: 8.5f},{ppl: 8.5f},{accu:3.3f}\n'.format( epoch=epoch_i, loss=valid_loss, ppl=math.exp(min(valid_loss, 100)), accu=100 * valid_accu))
def load_checkpoint(experiment_directory, checkpoint): checkpoint_path = os.path.join( experiment_directory, Checkpoint.CHECKPOINT_DIR_NAME, checkpoint, ) logging.info('Loading checkpoint from {}'.format(checkpoint_path, )) checkpoint = Checkpoint.load(checkpoint_path) seq2seq = checkpoint.model input_vocab = checkpoint.input_vocab output_vocab = checkpoint.output_vocab return seq2seq, input_vocab, output_vocab
def load_model(opt): checkpoint_path = os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint) checkpoint = Checkpoint.load(checkpoint_path) model = checkpoint.model src_vocab = checkpoint.input_vocab tgt_vocab = checkpoint.output_vocab if opt.verbose: print('Loaded model') # model.eval() return model, src_vocab, tgt_vocab
def __init__(self, device='cuda', model_path='./PretrainModel/Seq2Rank/', model_file='seq2rank.ckpt'): try: checkpoint = Checkpoint.load(os.path.join(model_path, model_file)) except: logging.error( '[ERROR] [Seq2Rank] Pretrain Encode model load failed on {0}'. format(os.path.join(model_path, model_file))) self.seq2seq = checkpoint.model self.input_vocab = checkpoint.input_vocab self.output_vocab = checkpoint.output_vocab self.device = device
def __call__(self, val_loss, model, optimizer, epoch, step, input_vocab, output_vocab, expt_dir): score = -val_loss if self.best_score is None: self.best_score = score Checkpoint(model=model, optimizer=optimizer, epoch=epoch, step=step, input_vocab=input_vocab, output_vocab=output_vocab).save(expt_dir + '/best_model') elif score < self.best_score + self.delta: self.counter += 1 print( f'EarlyStopping counter: {self.counter} out of {self.patience}' ) if self.counter >= self.patience: self.early_stop = True else: self.best_score = score self.val_loss_min = val_loss if self.verbose: print( f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...' ) Checkpoint(model=model, optimizer=optimizer, epoch=epoch, step=step, input_vocab=input_vocab, output_vocab=output_vocab).save(expt_dir + '/lowest_loss') self.counter = 0
def load_model_from_checkpoint(opt, src, tgt): logging.info("loading checkpoint from {}".format( os.path.join(opt.output_dir, opt.load_checkpoint))) checkpoint_path = os.path.join(opt.output_dir, opt.load_checkpoint) checkpoint = Checkpoint.load(checkpoint_path) seq2seq = checkpoint.model input_vocab = checkpoint.input_vocab src.vocab = input_vocab output_vocab = checkpoint.output_vocab tgt.vocab = output_vocab tgt.eos_id = tgt.vocab.stoi[tgt.SYM_EOS] tgt.sos_id = tgt.vocab.stoi[tgt.SYM_SOS] return seq2seq, input_vocab, output_vocab
def test_load(self, mock_vocabulary, mock_torch): dummy_vocabulary = mock.Mock() mock_optimizer_state_dict = mock.Mock() torch_dict = {"optimizer": mock_optimizer_state_dict, "epoch": 5, "step": 10} mock_torch.load.return_value = torch_dict mock_vocabulary.load.return_value = dummy_vocabulary loaded_chk_point = Checkpoint.load("mock_checkpoint_path") mock_torch.load.assert_any_call( os.path.join('mock_checkpoint_path', Checkpoint.TRAINER_STATE_NAME)) mock_torch.load.assert_any_call( os.path.join("mock_checkpoint_path", Checkpoint.MODEL_NAME)) self.assertEquals(loaded_chk_point.epoch, torch_dict['epoch']) self.assertEquals(loaded_chk_point.optimizer_state_dict, torch_dict['optimizer']) self.assertEquals(loaded_chk_point.step, torch_dict['step'])