コード例 #1
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure all arguments are present in older models
        base_bi_lm_architecture(args)

        if not hasattr(args, 'max_source_positions'):
            args.max_source_positions = args.tokens_per_sample
        if not getattr(args, "max_target_positions", None):
            args.max_target_positions = args.tokens_per_sample

        if args.character_embeddings:
            embed_tokens = CharacterTokenEmbedder(
                task.dictionary,
                eval(args.character_filters),
                args.character_embedding_dim,
                args.decoder_embed_dim,
                args.char_embedder_highway_layers,
                max_char_len=args.max_char_len,
                char_inputs=args.char_inputs,
            )
        else:
            embed_tokens = Embedding(len(task.dictionary),
                                     args.decoder_embed_dim,
                                     task.dictionary.pad())

        logger.info(args)

        decoder = BiTransformerDecoder(args, task.output_dictionary,
                                       embed_tokens)
        return BiTransformerLanguageModel(decoder)
コード例 #2
0
    def build_model(cls, args, task):
        """Build a new model instance."""

        # make sure all arguments are present in older models
        base_lm_architecture(args)

        if not hasattr(args, 'max_source_positions'):
            args.max_source_positions = args.tokens_per_sample
        if not hasattr(args, 'max_target_positions'):
            args.max_target_positions = args.tokens_per_sample

        if args.character_embeddings:
            embed_tokens = CharacterTokenEmbedder(
                task.dictionary,
                eval(args.character_filters),
                args.character_embedding_dim,
                args.decoder_embed_dim,
                args.char_embedder_highway_layers,
            )
        else:
            embed_tokens = Embedding(len(task.dictionary),
                                     args.decoder_input_dim,
                                     task.dictionary.pad())

        decoder = TransformerDecoder(args,
                                     task.dictionary,
                                     embed_tokens,
                                     no_encoder_attn=True,
                                     final_norm=False)
        return TransformerLanguageModel(decoder)
コード例 #3
0
    def test_character_token_embedder(self):
        vocab = Dictionary()
        vocab.add_symbol('hello')
        vocab.add_symbol('there')

        embedder = CharacterTokenEmbedder(vocab, [(2, 16), (4, 32), (8, 64),
                                                  (16, 2)], 64, 5, 2)

        test_sents = [['hello', 'unk', 'there'], ['there'], ['hello', 'there']]
        max_len = max(len(s) for s in test_sents)
        input = torch.LongTensor(len(test_sents),
                                 max_len + 2).fill_(vocab.pad())
        for i in range(len(test_sents)):
            input[i][0] = vocab.eos()
            for j in range(len(test_sents[i])):
                input[i][j + 1] = vocab.index(test_sents[i][j])
            input[i][j + 2] = vocab.eos()
        embs = embedder(input)

        assert embs.size() == (len(test_sents), max_len + 2, 5)
        self.assertAlmostEqual(embs[0][0], embs[1][0])
        self.assertAlmostEqual(embs[0][0], embs[0][-1])
        self.assertAlmostEqual(embs[0][1], embs[2][1])
        self.assertAlmostEqual(embs[0][3], embs[1][1])

        embs.sum().backward()
        assert embedder.char_embeddings.weight.grad is not None
コード例 #4
0
ファイル: bitransformer.py プロジェクト: mbevila/qbert
    def build_model_decoder(cls, args, dictionary, output_dictionary=None):

        if output_dictionary is None:
            output_dictionary = dictionary

        if args.character_embeddings:
            embed_tokens = CharacterTokenEmbedder(
                dictionary,
                eval(args.character_filters),
                args.character_embedding_dim,
                args.decoder_embed_dim,
                args.char_embedder_highway_layers,
            )
        elif args.adaptive_input:
            embed_tokens = AdaptiveInput(
                len(dictionary), dictionary.pad(), args.decoder_input_dim,
                args.adaptive_input_factor, args.decoder_embed_dim,
                options.eval_str_list(args.adaptive_input_cutoff, type=int))
        else:
            embed_tokens = Embedding(len(dictionary), args.decoder_input_dim,
                                     dictionary.pad())

        return BiTransformerDecoder(args,
                                    output_dictionary,
                                    embed_tokens,
                                    no_encoder_attn=True,
                                    final_norm=False)
コード例 #5
0
    def build_model(cls, args, task):
        """Build a new model instance."""

        # make sure all arguments are present in older models
        base_lm_architecture(args)

        if getattr(args, 'max_target_positions', None) is None:
            args.max_target_positions = getattr(args, 'tokens_per_sample', DEFAULT_MAX_TARGET_POSITIONS)

        if args.character_embeddings:
            embed_tokens = CharacterTokenEmbedder(
                task.source_dictionary, eval(args.character_filters),
                args.character_embedding_dim, args.decoder_embed_dim,
                args.char_embedder_highway_layers,
            )
        elif args.adaptive_input:
            embed_tokens = AdaptiveInput(
                len(task.source_dictionary), task.source_dictionary.pad(), args.decoder_input_dim,
                args.adaptive_input_factor, args.decoder_embed_dim,
                options.eval_str_list(args.adaptive_input_cutoff, type=int),
            )
        else:
            embed_tokens = Embedding(len(task.source_dictionary), args.decoder_input_dim, task.source_dictionary.pad())

        if args.tie_adaptive_weights:
            assert args.adaptive_input
            assert args.adaptive_input_factor == args.adaptive_softmax_factor
            assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format(
                args.adaptive_softmax_cutoff, args.adaptive_input_cutoff)
            assert args.decoder_input_dim == args.decoder_output_dim

        decoder = TransformerDecoder(
            args, task.target_dictionary, embed_tokens, no_encoder_attn=True,
        )
        return TransformerLanguageModel(decoder)
コード例 #6
0
ファイル: lightconv_lm.py プロジェクト: skeshaw/LoReNMT
    def build_model(cls, args, task):
        """Build a new model instance."""

        # make sure all arguments are present in older models
        base_lm_architecture(args)

        if not hasattr(args, 'max_source_positions'):
            args.max_source_positions = args.tokens_per_sample
        if not hasattr(args, 'max_target_positions'):
            args.max_target_positions = args.tokens_per_sample

        if args.character_embeddings:
            embed_tokens = CharacterTokenEmbedder(task.dictionary, eval(args.character_filters),
                                                  args.character_embedding_dim,
                                                  args.decoder_embed_dim,
                                                  args.char_embedder_highway_layers,
                                                  )
        elif args.adaptive_input:
            embed_tokens = AdaptiveInput(len(task.dictionary), task.dictionary.pad(), args.decoder_input_dim,
                                         args.adaptive_input_factor, args.decoder_embed_dim,
                                         options.eval_str_list(args.adaptive_input_cutoff, type=int))
        else:
            embed_tokens = Embedding(len(task.dictionary), args.decoder_input_dim, task.dictionary.pad())

        if args.tie_adaptive_weights:
            assert args.adaptive_input
            assert args.adaptive_input_factor == args.adaptive_softmax_factor
            assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format(
                args.adaptive_softmax_cutoff, args.adaptive_input_cutoff)
            assert args.decoder_input_dim == args.decoder_output_dim

        decoder = LightConvDecoder(args, task.output_dictionary, embed_tokens, no_encoder_attn=True, final_norm=False)
        return LightConvLanguageModel(decoder)
コード例 #7
0
    def build_model(cls, args, task):
        """Build a new model instance."""

        # make sure all arguments are present in older models
        base_lm_architecture(args)

        if hasattr(args, "decoder_layers_to_keep"):
            args.decoder_layers = len(args.decoder_layers_to_keep.split(","))

        if getattr(args, 'max_target_positions', None) is None:
            args.max_target_positions = getattr(args, 'tokens_per_sample',
                                                DEFAULT_MAX_TARGET_POSITIONS)

        if args.character_embeddings:
            embed_tokens = CharacterTokenEmbedder(
                task.source_dictionary,
                eval(args.character_filters),
                args.character_embedding_dim,
                args.decoder_embed_dim,
                args.char_embedder_highway_layers,
            )
        elif args.adaptive_input:
            print("Adaptive Input " + str(args.adaptive_input))
            print("Adaptive Cutoff: " + str(args.adaptive_input_cutoff))
            print("Vocab Size: " + str(len(task.source_dictionary.symbols)))
            embed_tokens = AdaptiveInput(
                len(task.source_dictionary),
                task.source_dictionary.pad(),
                args.decoder_input_dim,
                args.adaptive_input_factor,
                args.decoder_embed_dim,
                options.eval_str_list(args.adaptive_input_cutoff, type=int),
                args.quant_noise_pq,
                args.quant_noise_pq_block_size,
            )
        else:
            embed_tokens = cls.build_embedding(args, task.source_dictionary,
                                               args.decoder_input_dim)

        if args.tie_adaptive_weights:
            assert args.adaptive_input
            assert args.adaptive_input_factor == args.adaptive_softmax_factor
            assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format(
                args.adaptive_softmax_cutoff, args.adaptive_input_cutoff)
            assert args.decoder_input_dim == args.decoder_output_dim

        decoder = TransformerDecoder(
            args,
            task.target_dictionary,
            embed_tokens,
            no_encoder_attn=True,
        )
        return cls(decoder)
コード例 #8
0
ファイル: sequence_tagging.py プロジェクト: sakib1486/ewiser
    def build_model_input(cls, args, dictionary):
        # make sure all arguments are present in older fairseq_ext

        args.context_embeddings = getattr(args, 'context_embeddings', False)
        args.context_embeddings_layers = getattr(args,
                                                 'context_embeddings_layers',
                                                 [-1])

        args.max_source_positions = args.tokens_per_sample
        args.max_target_positions = args.tokens_per_sample

        if args.context_embeddings:
            if args.context_embeddings_type == 'bert':
                embed_tokens = BERTEmbedder(
                    args.context_embeddings_bert_model,
                    layers=args.context_embeddings_layers)

            elif args.context_embeddings_type == 'transformers':
                embed_tokens = TransformerEmbedder(
                    args.context_embeddings_bert_model,
                    layers=args.context_embeddings_layers)

            else:
                raise NotImplementedError
        elif args.character_embeddings:
            embed_tokens = CharacterTokenEmbedder(
                dictionary,
                eval(args.character_filters),
                args.character_embedding_dim,
                args.decoder_embed_dim,
                args.char_embedder_highway_layers,
            )
        elif args.adaptive_input:
            embed_tokens = AdaptiveInput(
                len(dictionary), dictionary.pad(), args.decoder_input_dim,
                args.adaptive_input_factor, args.decoder_embed_dim,
                options.eval_str_list(args.adaptive_input_cutoff, type=int))
        else:
            args.decoder_embed_pretrained = getattr(
                args, 'decoder_embed_pretrained', '')
            if args.decoder_embed_pretrained:
                embed_tokens = load_pretrained_embedding_from_file(
                    args.decoder_embed_pretrained, dictionary,
                    args.decoder_input_dim)
            else:
                embed_tokens = Embedding(len(dictionary),
                                         args.decoder_input_dim,
                                         dictionary.pad())

        return embed_tokens
コード例 #9
0
ファイル: transformer_lm.py プロジェクト: sdadas/fairseq
    def build_model(cls, args, task):
        """Build a new model instance."""

        if args.decoder_layers_to_keep:
            args.decoder_layers = len(args.decoder_layers_to_keep.split(","))

        if safe_getattr(args, "max_target_positions", None) is None:
            args.max_target_positions = safe_getattr(
                args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS
            )

        if args.character_embeddings:
            embed_tokens = CharacterTokenEmbedder(
                task.source_dictionary,
                eval(args.character_filters),
                args.character_embedding_dim,
                args.decoder_embed_dim,
                args.char_embedder_highway_layers,
            )
        elif args.adaptive_input:
            embed_tokens = AdaptiveInput(
                len(task.source_dictionary),
                task.source_dictionary.pad(),
                args.decoder_input_dim,
                args.adaptive_input_factor,
                args.decoder_embed_dim,
                options.eval_str_list(args.adaptive_input_cutoff, type=int),
                args.quant_noise_pq,
                args.quant_noise_pq_block_size,
            )
        else:
            embed_tokens = cls.build_embedding(
                args, task.source_dictionary, args.decoder_input_dim
            )

        if args.tie_adaptive_weights:
            assert args.adaptive_input
            assert args.adaptive_input_factor == args.adaptive_softmax_factor
            assert (
                args.adaptive_softmax_cutoff == args.adaptive_input_cutoff
            ), "{} != {}".format(
                args.adaptive_softmax_cutoff, args.adaptive_input_cutoff
            )
            assert args.decoder_input_dim == args.decoder_output_dim

        decoder = TransformerDecoder(
            args, task.target_dictionary, embed_tokens, no_encoder_attn=True
        )
        return cls(decoder)
コード例 #10
0
    def build_model(cls, args, task):
        """Build a new model instance."""

        # make sure all arguments are present in older models
        base_lm_architecture(args)

        if hasattr(
                args,
                'no_tie_adaptive_proj') and args.no_tie_adaptive_proj == False:
            # backward compatibility
            args.tie_adaptive_proj = True

        if not hasattr(args, 'max_positions'):
            args.max_positions = args.tokens_per_sample

        if args.character_embeddings:
            embed_tokens = CharacterTokenEmbedder(
                task.dictionary,
                eval(args.character_filters),
                args.character_embedding_dim,
                args.embed_dim,
                args.char_embedder_highway_layers,
            )
        elif args.adaptive_input:
            embed_tokens = AdaptiveInput(
                len(task.dictionary), task.dictionary.pad(), args.input_dim,
                args.adaptive_input_factor, args.embed_dim,
                options.eval_str_list(args.adaptive_input_cutoff, type=int))
        else:
            embed_tokens = nn.Embedding(len(task.dictionary), args.embed_dim,
                                        task.dictionary.pad())

        if args.tie_adaptive_weights:
            assert args.adaptive_input
            assert args.adaptive_input_factor == args.adaptive_softmax_factor
            assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format(
                args.adaptive_softmax_cutoff, args.adaptive_input_cutoff)
            assert args.input_dim == args.output_dim

        decoder = SpanTransformerDecoder(args,
                                         task.dictionary,
                                         embed_tokens,
                                         final_norm=False)
        return SpanTransformerAutoregressive(decoder)
コード例 #11
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted (in case there are any new ones)
        set_default_args(args)

        if args.character_embeddings:
            char_embed = CharacterTokenEmbedder(
                task.source_dictionary, eval(args.character_filters),
                args.character_embedding_dim, 160,
                args.char_embedder_highway_layers)
            args.char_embed = char_embed

        if args.encoder_layers != args.decoder_layers:
            raise ValueError("--encoder-layers must match --decoder-layers")

        def load_pretrained_embedding_from_file(embed_path, dictionary,
                                                embed_dim):
            num_embeddings = len(dictionary)
            padding_idx = dictionary.pad()
            embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
            embed_dict = utils.parse_embedding(embed_path)
            utils.print_embed_overlap(embed_dict, dictionary)
            return utils.load_embedding(embed_dict, dictionary, embed_tokens)

        args.pretrained_word_embed = load_pretrained_embedding_from_file(
            args.word_embed_path, task.source_dictionary,
            args.encoder_embed_dim)
        args.pretrained_word_embed.requires_grad = False

        if args.encoder_embed_path:
            args.pretrained_encoder_embed = load_pretrained_embedding_from_file(
                args.encoder_embed_path, task.source_dictionary,
                args.encoder_embed_dim)
        else:
            num_embeddings = len(task.source_dictionary)
            args.pretrained_encoder_embed = Embedding(
                num_embeddings, args.encoder_embed_dim,
                task.source_dictionary.pad())

        if args.share_all_embeddings:
            # double check all parameters combinations are valid
            if task.source_dictionary != task.target_dictionary:
                raise ValueError(
                    "--share-all-embeddings requires a joint dictionary")
            if args.decoder_embed_path and (args.decoder_embed_path !=
                                            args.encoder_embed_path):
                raise ValueError(
                    "--share-all-embed not compatible with --decoder-embed-path"
                )
            if args.encoder_embed_dim != args.decoder_embed_dim:
                raise ValueError(
                    "--share-all-embeddings requires --encoder-embed-dim to "
                    "match --decoder-embed-dim")
            args.pretrained_decoder_embed = args.pretrained_encoder_embed
            args.share_decoder_input_output_embed = True
        else:
            # separate decoder input embeddings
            args.pretrained_decoder_embed = None
            if args.decoder_embed_path:
                args.pretrained_decoder_embed = load_pretrained_embedding_from_file(
                    args.decoder_embed_path,
                    task.target_dictionary,
                    args.decoder_embed_dim,
                )
        # one last double check of parameter combinations
        if args.share_decoder_input_output_embed and (
                args.decoder_embed_dim != args.decoder_out_embed_dim):
            raise ValueError(
                "--share-decoder-input-output-embeddings requires "
                "--decoder-embed-dim to match --decoder-out-embed-dim")

        if args.encoder_freeze_embed:
            args.pretrained_encoder_embed.weight.requires_grad = False
        if args.decoder_freeze_embed:
            args.pretrained_decoder_embed.weight.requires_grad = False

        args.source_dictionary = task.source_dictionary
        args.target_dictionary = task.target_dictionary

        encoder = cls.build_encoder(task, args, task.source_dictionary)
        decoder = cls.build_decoder(
            task,
            args,
            task.target_dictionary,
            encoder,
        )
        return cls(args, encoder, decoder)
コード例 #12
0
    def build_model(cls, args, task):
        """Build a new model instance."""

        # make sure all arguments are present in older models
        base_lm_architecture(args)

        if args.decoder_layers_to_keep:
            args.decoder_layers = len(args.decoder_layers_to_keep.split(","))

        if getattr(args, "max_target_positions", None) is None:
            args.max_target_positions = getattr(args, "tokens_per_sample",
                                                DEFAULT_MAX_TARGET_POSITIONS)

        if args.character_embeddings:
            embed_tokens = CharacterTokenEmbedder(
                task.source_dictionary,
                eval(args.character_filters),
                args.character_embedding_dim,
                args.decoder_embed_dim,
                args.char_embedder_highway_layers,
            )
        elif args.adaptive_input:
            embed_tokens = AdaptiveInput(
                len(task.source_dictionary),
                task.source_dictionary.pad(),
                args.decoder_input_dim,
                args.adaptive_input_factor,
                args.decoder_embed_dim,
                options.eval_str_list(args.adaptive_input_cutoff, type=int),
                args.quant_noise_pq,
                args.quant_noise_pq_block_size,
            )
        else:
            embed_tokens = cls.build_embedding(args, task.source_dictionary,
                                               args.decoder_input_dim)

        if args.tie_adaptive_weights:
            assert args.adaptive_input
            assert args.adaptive_input_factor == args.adaptive_softmax_factor
            assert (args.adaptive_softmax_cutoff == args.adaptive_input_cutoff
                    ), "{} != {}".format(args.adaptive_softmax_cutoff,
                                         args.adaptive_input_cutoff)
            assert args.decoder_input_dim == args.decoder_output_dim

        decoder = TransformerDecoder(args,
                                     task.target_dictionary,
                                     embed_tokens,
                                     no_encoder_attn=True)

        if getattr(args, "lm_path", None):
            print('load Transformer_LM from {}'.format(args.lm_path))
            state = checkpoint_utils.load_checkpoint_to_cpu(args.lm_path)
            lm_args = state["args"]
            lm_args.data = args.data
            assert getattr(lm_args, "lm_path", None) is None

            task = tasks.setup_task(lm_args)
            decoder = task.build_model(lm_args)
            print('restore Transformer_LM from {}'.format(args.lm_path))
            decoder.load_state_dict(state["model"], strict=True)
        decoder.dim_output = len(task.dictionary)

        return cls(decoder)
コード例 #13
0
    def __init__(
        self,
        dictionary,
        targets,
        character_embeddings,
        embed_dim=512,
        hidden_size=512,
        out_embed_dim=512,
        num_layers=2,
        dropout_in=0.1,
        dropout_out=0.1,
        encoder_output_units=512,
        adaptive_softmax_cutoff=None,
        bidirectional=True,
        memory_dim=4096,
        memory_clip_value=3.0,
        state_clip_value=3.0,
        residual=True,
        character_filters=None,
        character_embedding_dim=16,
        char_embedder_highway_layers=0,
    ):
        super().__init__(dictionary)
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.hidden_size = hidden_size
        self.directions = 2 if bidirectional else 1
        self.memory_dim = memory_dim
        self.memory_clip_value = memory_clip_value
        self.state_clip_value = state_clip_value
        self.residual = residual
        self.targets = targets
        self.num_layers = num_layers

        if character_embeddings:
            self.embed_tokens = CharacterTokenEmbedder(
                dictionary, eval(character_filters), character_embedding_dim,
                embed_dim, char_embedder_highway_layers)
        else:
            self.embed_tokens = Embedding(len(dictionary), embed_dim,
                                          dictionary.pad())

        assert num_layers > 0

        for layer_index in range(self.num_layers):
            forward_layer = LstmCellWithProjection(
                input_size=embed_dim if layer_index == 0 else hidden_size,
                hidden_size=hidden_size,
                cell_size=memory_dim,
                go_forward=True,
                recurrent_dropout_probability=dropout_out,
                memory_cell_clip_value=memory_clip_value,
                state_projection_clip_value=state_clip_value,
                is_training=self.training)
            self.add_module('forward_layer_{}'.format(layer_index),
                            forward_layer)

            if bidirectional:

                backward_layer = LstmCellWithProjection(
                    input_size=embed_dim if layer_index == 0 else hidden_size,
                    hidden_size=hidden_size,
                    cell_size=memory_dim,
                    go_forward=False,
                    recurrent_dropout_probability=dropout_out,
                    memory_cell_clip_value=memory_clip_value,
                    state_projection_clip_value=state_clip_value,
                    is_training=self.training)
                self.add_module('backward_layer_{}'.format(layer_index),
                                backward_layer)

        self.adaptive_softmax = self.additional_fc = self.fc_out = None

        if adaptive_softmax_cutoff is not None:
            self.adaptive_softmax = AdaptiveSoftmax(len(dictionary),
                                                    out_embed_dim,
                                                    adaptive_softmax_cutoff,
                                                    dropout=dropout_out)
        else:
            if hidden_size != out_embed_dim:
                self.additional_fc = Linear(hidden_size,
                                            out_embed_dim,
                                            dropout=dropout_out)
            self.fc_out = Linear(out_embed_dim,
                                 len(dictionary),
                                 dropout=dropout_out)
            self.adaptive_softmax = None
コード例 #14
0
ファイル: sequence_tagging.py プロジェクト: mbevila/qbert
    def build_model_input(cls, args, dictionary):
        # make sure all arguments are present in older fairseq_ext

        args.context_embeddings = getattr(args, 'context_embeddings', False)

        args.max_source_positions = args.tokens_per_sample
        args.max_target_positions = args.tokens_per_sample

        if args.context_embeddings:
            if args.context_embeddings_type == 'qbert':
                embed_tokens = QBERTEmbedder.from_args(
                    args, {"dictionary": dictionary})
            elif args.context_embeddings_type == 'bert':
                assert not args.context_embeddings_use_embeddings
                embed_tokens = BERTEmbedder(args.context_embeddings_bert_model,
                                            False)
            elif args.context_embeddings_type == 'elmo':
                embed_tokens = ELMOEmbedder(
                    args.context_embeddings_elmo_options,
                    args.context_embeddings_elmo_weights, False)
            elif args.context_embeddings_type == 'flair':
                embed_tokens = FlairEmbedder(
                    args.context_embeddings_flair_forward,
                    args.context_embeddings_flair_backward,
                    args.context_embeddings_flair_embeddings, False)
            else:
                raise NotImplementedError

        elif args.character_embeddings:
            embed_tokens = CharacterTokenEmbedder(
                dictionary,
                eval(args.character_filters),
                args.character_embedding_dim,
                args.decoder_embed_dim,
                args.char_embedder_highway_layers,
            )
        elif args.adaptive_input:
            embed_tokens = AdaptiveInput(
                len(dictionary), dictionary.pad(), args.decoder_input_dim,
                args.adaptive_input_factor, args.decoder_embed_dim,
                options.eval_str_list(args.adaptive_input_cutoff, type=int))
        else:

            def load_pretrained_embedding_from_file(embed_path, dictionary,
                                                    embed_dim):
                from fairseq import utils

                num_embeddings = len(dictionary)
                padding_idx = dictionary.pad()
                embed_tokens = Embedding(num_embeddings, embed_dim,
                                         padding_idx)
                embed_dict = utils.parse_embedding(embed_path)
                utils.print_embed_overlap(embed_dict, dictionary)
                return utils.load_embedding(embed_dict, dictionary,
                                            embed_tokens)

            if args.decoder_embed_pretrained:
                embed_tokens = load_pretrained_embedding_from_file(
                    args.decoder_embed_pretrained, dictionary,
                    args.decoder_input_dim)
                if getattr(args, 'decoder_embed_pretrained', False):
                    for par in embed_tokens.parameters():
                        par.requires_grad = False
            else:
                embed_tokens = Embedding(len(dictionary),
                                         args.decoder_input_dim,
                                         dictionary.pad())

        return embed_tokens