Exemple #1
0
def build_criterion(args, dataset):
    padding_idx = dataset.dst_dict.pad()
    if args.label_smoothing > 0:
        return criterions.LabelSmoothedCrossEntropyCriterion(
            args.label_smoothing, padding_idx)
    else:
        return criterions.CrossEntropyCriterion(padding_idx)
Exemple #2
0
    def __init__(self, opt, shared=None):
        # initialize defaults first
        super().__init__(opt, shared)
        if not shared:
            # this is not a shared instance of this class, so do full
            # initialization. if shared is set, only set up shared members.
            saved_state = None
            if opt.get('model_file') and os.path.isfile(opt['model_file']):
                # load model parameters if available
                print('Loading existing model params from ' +
                      opt['model_file'])
                new_opt, saved_state = self.load(opt['model_file'])
                # override options with stored ones
                opt = self._override_opt(new_opt)

            self.args = OptWrapper(opt)
            self.fairseq_dict = _make_fairseq_dict(DictionaryAgent(opt))
            self.id = 'Fairseq'
            self.truncate = opt['truncate'] if opt['truncate'] > 0 else None

            self.EOS = self.fairseq_dict[self.fairseq_dict.eos()]
            self.EOS_TENSOR = (torch.LongTensor(1, 1).fill_(
                self.fairseq_dict.eos()))
            self.NULL_IDX = self.fairseq_dict.pad()

            encoder = fconv.Encoder(self.fairseq_dict,
                                    embed_dim=self.args.encoder_embed_dim,
                                    convolutions=eval(
                                        self.args.encoder_layers),
                                    dropout=self.args.dropout,
                                    max_positions=self.args.max_positions)
            decoder = fconv.Decoder(
                self.fairseq_dict,
                embed_dim=self.args.decoder_embed_dim,
                convolutions=eval(self.args.decoder_layers),
                out_embed_dim=self.args.decoder_out_embed_dim,
                attention=eval(self.args.decoder_attention),
                dropout=self.args.dropout,
                max_positions=self.args.max_positions)
            self.model = fconv.FConvModel(encoder, decoder)

            # from fairseq's build_criterion()
            if self.args.label_smoothing > 0:
                self.criterion = criterions.LabelSmoothedCrossEntropyCriterion(
                    self.args.label_smoothing, self.NULL_IDX)
            else:
                self.criterion = criterions.CrossEntropyCriterion(
                    self.NULL_IDX)

            self.trainer = MultiprocessingTrainer(self.args, self.model,
                                                  self.criterion)
            if saved_state is not None:
                self.set_states(saved_state)
        self.reset()
def build_criterion(args, src_dict, dst_dict):
    if args.label_smoothing > 0:
        return criterions.LabelSmoothedCrossEntropyCriterion(args, dst_dict)
    else:
        return criterions.CrossEntropyCriterion(args, dst_dict)
Exemple #4
0
def build_token_criterion(args, padding_idx):
    # token-level training
    if args.label_smoothing > 0:
        return criterions.LabelSmoothedCrossEntropyCriterion(args.label_smoothing, padding_idx)
    else:
        return criterions.CrossEntropyCriterion(padding_idx)