def build_trainer(opt, device_id, model, fields, optim, model_saver=None): """ Simplify `Trainer` creation based on user `opt`s* Args: opt (:obj:`Namespace`): user options (usually from argument parsing) model (:obj:`onmt.models.NMTModel`): the model to train fields (dict): dict of fields optim (:obj:`onmt.utils.Optimizer`): optimizer used during training model_saver(:obj:`onmt.models.ModelSaverBase`): the utility object used to save the model """ # 传词表是干嘛的? train_loss = build_loss_compute(model, fields["tgt"].vocab, opt) valid_loss = build_loss_compute(model, fields["tgt"].vocab, opt, train=False) # 截断后向传播?默认是0 trunc_size = opt.truncated_decoder # Badly named... # 默认是2, 可以并行运行generator shard_size = opt.max_generator_batches # 梯度的normalization方法: tokens norm_method = opt.normalization # 一次更新梯度,batch_size * accum_count, 默认accum_count = 1 grad_accum_count = opt.accum_count # 这里gpu的数量指定为分布式进程的数量 n_gpu = opt.world_size if device_id >= 0: gpu_rank = opt.gpu_ranks[device_id] else: gpu_rank = 0 n_gpu = 0 # 啥玩意 gpu_verbose_level = opt.gpu_verbose_level report_manager = build_report_manager(opt) trainer = Trainer(model, train_loss, valid_loss, optim, trunc_size, shard_size, norm_method, grad_accum_count, n_gpu, gpu_rank, gpu_verbose_level, report_manager, model_saver=model_saver) return trainer
def build_trainer(opt, device_id, model, fields, optim, model_saver=None): """ Simplify `Trainer` creation based on user `opt`s* Args: opt (:obj:`Namespace`): user options (usually from argument parsing) model (:obj:`onmt.models.NMTModel`): the model to train fields (dict): dict of fields optim (:obj:`onmt.utils.Optimizer`): optimizer used during training model_saver(:obj:`onmt.models.ModelSaverBase`): the utility object used to save the model """ train_loss = [build_loss_compute( model, fields["tgt"].vocab, opt), nn.NLLLoss(reduction='sum')] valid_loss = build_loss_compute( model, fields["tgt"].vocab, opt, train=False) trunc_size = opt.truncated_decoder # Badly named... shard_size = opt.max_generator_batches norm_method = opt.normalization grad_accum_count = opt.accum_count n_gpu = opt.world_size if device_id >= 0: gpu_rank = opt.gpu_ranks[device_id] else: gpu_rank = 0 n_gpu = 0 gpu_verbose_level = opt.gpu_verbose_level report_manager = build_report_manager(opt) trainer = Trainer(model, train_loss, valid_loss, optim, trunc_size, shard_size, norm_method, grad_accum_count, n_gpu, gpu_rank, gpu_verbose_level, report_manager, model_saver=model_saver) return trainer