def build_criterion(args, dataset): padding_idx = dataset.dst_dict.pad() if args.label_smoothing > 0: return criterions.LabelSmoothedCrossEntropyCriterion( args.label_smoothing, padding_idx) else: return criterions.CrossEntropyCriterion(padding_idx)
def __init__(self, opt, shared=None): # initialize defaults first super().__init__(opt, shared) if not shared: # this is not a shared instance of this class, so do full # initialization. if shared is set, only set up shared members. saved_state = None if opt.get('model_file') and os.path.isfile(opt['model_file']): # load model parameters if available print('Loading existing model params from ' + opt['model_file']) new_opt, saved_state = self.load(opt['model_file']) # override options with stored ones opt = self._override_opt(new_opt) self.args = OptWrapper(opt) self.fairseq_dict = _make_fairseq_dict(DictionaryAgent(opt)) self.id = 'Fairseq' self.truncate = opt['truncate'] if opt['truncate'] > 0 else None self.EOS = self.fairseq_dict[self.fairseq_dict.eos()] self.EOS_TENSOR = (torch.LongTensor(1, 1).fill_( self.fairseq_dict.eos())) self.NULL_IDX = self.fairseq_dict.pad() encoder = fconv.Encoder(self.fairseq_dict, embed_dim=self.args.encoder_embed_dim, convolutions=eval( self.args.encoder_layers), dropout=self.args.dropout, max_positions=self.args.max_positions) decoder = fconv.Decoder( self.fairseq_dict, embed_dim=self.args.decoder_embed_dim, convolutions=eval(self.args.decoder_layers), out_embed_dim=self.args.decoder_out_embed_dim, attention=eval(self.args.decoder_attention), dropout=self.args.dropout, max_positions=self.args.max_positions) self.model = fconv.FConvModel(encoder, decoder) # from fairseq's build_criterion() if self.args.label_smoothing > 0: self.criterion = criterions.LabelSmoothedCrossEntropyCriterion( self.args.label_smoothing, self.NULL_IDX) else: self.criterion = criterions.CrossEntropyCriterion( self.NULL_IDX) self.trainer = MultiprocessingTrainer(self.args, self.model, self.criterion) if saved_state is not None: self.set_states(saved_state) self.reset()
def build_criterion(args, src_dict, dst_dict): if args.label_smoothing > 0: return criterions.LabelSmoothedCrossEntropyCriterion(args, dst_dict) else: return criterions.CrossEntropyCriterion(args, dst_dict)
def build_token_criterion(args, padding_idx): # token-level training if args.label_smoothing > 0: return criterions.LabelSmoothedCrossEntropyCriterion(args.label_smoothing, padding_idx) else: return criterions.CrossEntropyCriterion(padding_idx)