Beispiel #1
0
    def __init__(
        self,
        cfg: Wav2BartChrConfig,
        dictionary=None,
        embed_tokens=None,
        no_encoder_attn=False,
    ):
        super().__init__(dictionary)
        self.cfg = cfg
        # bart = torch.hub.load('pytorch/fairseq', 'bart.base')
        from fairseq.models.bart import BARTModel
        if os.path.isfile(os.path.join(cfg.bart_path, 'model.pt')):
            print('loading bart from cfg path')
            bart = BARTModel.from_pretrained(cfg.bart_path, checkpoint_file='model.pt')
        else:
            print('loading bart from relative path')
            bart = BARTModel.from_pretrained('models/bart.base', checkpoint_file='model.pt')
        
        bart_decoder = bart.model.decoder
        bart_dictionary_size = len(bart_decoder.dictionary)
        self.decoder = TransformerDecoder(bart_decoder.args, bart_decoder.dictionary, bart_decoder.embed_tokens)
        self.decoder.load_state_dict(bart_decoder.state_dict())

        # self.output_embed_dim = cfg.decoder_embed_dim

        ################## Dirty hack to alter output embedding layer of the decoder
        self.decoder.share_input_output_embed = False

        self.output_projection = nn.Linear(
            bart_dictionary_size, len(dictionary), bias=False
        )
        nn.init.normal_(
            self.output_projection.weight, mean=0, std=bart_dictionary_size ** -0.5
        )
Beispiel #2
0
    def __init__(
        self,
        cfg: Wav2BartPoolConfig,
        dictionary=None,
        embed_tokens=None,
        no_encoder_attn=False,
    ):
        super().__init__(dictionary)
        self.cfg = cfg
        # bart = torch.hub.load('pytorch/fairseq', 'bart.base')
        from fairseq.models.bart import BARTModel
        if os.path.isfile(os.path.join(cfg.bart_path, 'model.pt')):
            print('loading bart from cfg path')
            bart = BARTModel.from_pretrained(cfg.bart_path,
                                             checkpoint_file='model.pt')
        else:
            print('loading bart from relative path')
            bart = BARTModel.from_pretrained('models/bart.base',
                                             checkpoint_file='model.pt')

        bart_decoder = bart.model.decoder
        self.decoder = TransformerDecoder(bart_decoder.args,
                                          bart_decoder.dictionary,
                                          bart_decoder.embed_tokens)
        self.decoder.load_state_dict(bart_decoder.state_dict())
Beispiel #3
0
    def from_roberta(roberta_enc: roberta.RobertaModel, args, dictionary):
        encoder = roberta_enc.encoder.sentence_encoder
        vocab_size, embed_dim = encoder.embed_tokens.weight.shape

        if args.share_all_embeddings:
            lm_head = roberta_enc.encoder.lm_head
            assert encoder.embed_tokens.weight is lm_head.weight, (
                "Can't use --share-all-embeddings with a model "
                "that was pretraiend with --untie-weights-roberta_enc")
        else:
            lm_head = roberta.RobertaLMHead(embed_dim, vocab_size,
                                            roberta_enc.args.activation_fn)

        dec_embs = nn.Embedding(vocab_size, embed_dim, dictionary.pad())
        if args.share_all_embeddings or args.share_decoder_input_output_embed:
            # Note: I wasn't able to use Embedding _weight parameter to achive this sharing.
            dec_embs.weight = lm_head.weight

        decoder = TransformerDecoder(
            RobertaEncDecModel.read_args_from_roberta(roberta_enc.args),
            dictionary,
            dec_embs,
            no_encoder_attn=False,
            output_projection=lm_head,
        )
        if getattr(args, "pretrained_decoder", False):
            decoder_dict = encoder.state_dict()

            # TODO: hide setting "encoder_attn" layers behind a flag.
            for k, w in list(decoder_dict.items()):
                if ".self_attn" in k:
                    k_enc_attn = k.replace(".self_attn", ".encoder_attn")
                    decoder_dict[k_enc_attn] = w.detach().clone()

            for k, w in lm_head.state_dict().items():
                decoder_dict["output_projection." + k] = w

            missing_keys, unexpected_keys = decoder.load_state_dict(
                decoder_dict, strict=False)
            # missing_keys = [m for m in missing_keys if ".encoder_attn" not in m]
            assert not missing_keys and not unexpected_keys, (
                "Failed to load state dict. "
                f"Missing keys: {missing_keys}. "
                f"Unexpected keys: {unexpected_keys}.")

        if args.share_all_embeddings:
            assert decoder.output_projection.weight is decoder.embed_tokens.weight
            assert encoder.embed_tokens.weight is decoder.embed_tokens.weight
        elif args.share_decoder_input_output_embed:
            assert decoder.output_projection.weight is decoder.embed_tokens.weight
            assert encoder.embed_tokens.weight is not decoder.embed_tokens.weight
        else:
            assert decoder.output_projection.weight is not decoder.embed_tokens.weight
            assert encoder.embed_tokens.weight is not decoder.embed_tokens.weight

        return RobertaEncDecModel(encoder, decoder)
Beispiel #4
0
    def build_model(cls, args, task):
        """Build a new model instance."""

        # make sure all arguments are present in older models
        base_architecture(args)

        if not hasattr(args, 'max_source_positions'):
            args.max_source_positions = 1024
        if not hasattr(args, 'max_target_positions'):
            args.max_target_positions = 1024

        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary

        def build_embedding(dictionary, embed_dim, path=None):
            num_embeddings = len(dictionary)
            padding_idx = dictionary.pad()
            emb = Embedding(num_embeddings, embed_dim, padding_idx)
            # if provided, load from preloaded dictionaries
            if path:
                embed_dict = utils.parse_embedding(path)
                utils.load_embedding(embed_dict, dictionary, emb)
            return emb

        if args.share_all_embeddings:
            if src_dict != tgt_dict:
                raise RuntimeError(
                    '--share-all-embeddings requires a joined dictionary')
            if args.encoder_embed_dim != args.decoder_embed_dim:
                raise RuntimeError(
                    '--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim'
                )
            if args.decoder_embed_path and (args.decoder_embed_path !=
                                            args.encoder_embed_path):
                raise RuntimeError(
                    '--share-all-embeddings not compatible with --decoder-embed-path'
                )
            encoder_embed_tokens = build_embedding(src_dict,
                                                   args.encoder_embed_dim,
                                                   args.encoder_embed_path)
            decoder_embed_tokens = encoder_embed_tokens
            args.share_decoder_input_output_embed = True
        else:
            encoder_embed_tokens = build_embedding(src_dict,
                                                   args.encoder_embed_dim,
                                                   args.encoder_embed_path)
            decoder_embed_tokens = build_embedding(tgt_dict,
                                                   args.decoder_embed_dim,
                                                   args.decoder_embed_path)

        encoder = TransformerEncoder(args, src_dict, encoder_embed_tokens)
        decoder = TransformerDecoder(args, tgt_dict, decoder_embed_tokens)
        encoder2 = TransformerEncoder(args, tgt_dict, decoder_embed_token)
        decoder2 = TransformerDecoder(args, src_dict, encoder_embed_tokens)
        return TransformerDualModel(encoder, decoder, encoder2, decoder2)
Beispiel #5
0
 def __init__(self,
              cfg: WavBart2BartConfig,
              dictionary=None,
              embed_tokens=None,
              no_encoder_attn=False,
              bart=None):
     super().__init__(dictionary)
     self.cfg = cfg
     # bart = torch.hub.load('pytorch/fairseq', 'bart.base')
     bart_decoder = bart.model.decoder
     self.decoder = TransformerDecoder(bart_decoder.args,
                                       bart_decoder.dictionary,
                                       bart_decoder.embed_tokens)
     self.decoder.load_state_dict(bart_decoder.state_dict())
 def build_decoder(cls, args, tgt_dict, embed_tokens):
     if (safe_hasattr(args, "decoder_self_attn_head_select") and args.decoder_self_attn_head_select) or (safe_hasattr(args, "dec_enc_attn_head_select") and args.dec_enc_attn_head_select):
         return HeadSelectionTransformerDecoder(
             args, tgt_dict, embed_tokens
         )
     else:
         return TransformerDecoder(args, tgt_dict, embed_tokens)
Beispiel #7
0
 def build_decoder(cls, args, tgt_dict, embed_tokens):
     return TransformerDecoder(
         args,
         tgt_dict,
         embed_tokens,
         no_encoder_attn=getattr(args, 'no_cross_attention', False),
     )
Beispiel #8
0
    def build_model(cls, args, task):
        """Build a new model instance."""

        # make sure all arguments are present in older models
        base_lm_architecture(args)

        if getattr(args, 'max_target_positions', None) is None:
            args.max_target_positions = getattr(args, 'tokens_per_sample', DEFAULT_MAX_TARGET_POSITIONS)

        if args.character_embeddings:
            embed_tokens = CharacterTokenEmbedder(
                task.source_dictionary, eval(args.character_filters),
                args.character_embedding_dim, args.decoder_embed_dim,
                args.char_embedder_highway_layers,
            )
        elif args.adaptive_input:
            embed_tokens = AdaptiveInput(
                len(task.source_dictionary), task.source_dictionary.pad(), args.decoder_input_dim,
                args.adaptive_input_factor, args.decoder_embed_dim,
                options.eval_str_list(args.adaptive_input_cutoff, type=int),
            )
        else:
            embed_tokens = Embedding(len(task.source_dictionary), args.decoder_input_dim, task.source_dictionary.pad())

        if args.tie_adaptive_weights:
            assert args.adaptive_input
            assert args.adaptive_input_factor == args.adaptive_softmax_factor
            assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format(
                args.adaptive_softmax_cutoff, args.adaptive_input_cutoff)
            assert args.decoder_input_dim == args.decoder_output_dim

        decoder = TransformerDecoder(
            args, task.target_dictionary, embed_tokens, no_encoder_attn=True,
        )
        return TransformerLanguageModel(decoder)
    def build_model(cls, args, task):
        """Build a new model instance."""

        # make sure all arguments are present in older models
        base_architecture(args)

        if not hasattr(args, 'max_source_positions'):
            args.max_source_positions = 100000
        if not hasattr(args, 'max_target_positions'):
            args.max_target_positions = 100000

        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary

        def build_embedding(dictionary, embed_dim, path=None):
            num_embeddings = len(dictionary)
            padding_idx = dictionary.pad()
            emb = Embedding(num_embeddings, embed_dim, padding_idx)
            # if provided, load from preloaded dictionaries
            if path:
                embed_dict = utils.parse_embedding(path)
                utils.load_embedding(embed_dict, dictionary, emb)
            return emb

        decoder_embed_tokens = build_embedding(
            tgt_dict, args.decoder_embed_dim, args.decoder_embed_path)
        encoder = ConvolutionalTransformerEncoder(
            args, src_dict if src_dict is not None else tgt_dict, audio_features=args.input_feat_per_channel)
        decoder = TransformerDecoder(args, tgt_dict, decoder_embed_tokens)
        return ConvolutionalTransformerModel(encoder, decoder)
Beispiel #10
0
    def build_decoder(cls, args, task, embed_tokens):
        _args = copy.deepcopy(args)
        if args.adaptor_proj or args.encoder_proj:  # not V0 arch
            _args.encoder_embed_dim = _args.decoder_embed_dim
        _args.dropout = args.decoder_dropout
        _args.attention_dropout = args.decoder_attention_dropout
        _args.activation_dropout = args.decoder_activation_dropout

        decoder = TransformerDecoder(_args, task.target_dictionary,
                                     embed_tokens)
        decoder = cls.maybe_load_pretrained(
            decoder, getattr(args, "load_pretrained_decoder_from", None))

        for k, p in decoder.named_parameters():
            p.requires_grad = need_finetuning(args.finetune_decoder_params, k)
        return decoder
Beispiel #11
0
    def build_multitask_decoder(cls, args, tgt_dict, in_dim):
        decoder_args = args.decoder_args
        decoder_args.encoder_embed_dim = in_dim
        if args.decoder_type == "transformer":
            base_multitask_text_transformer_decoder_arch(decoder_args)
            task_decoder = TransformerDecoder(
                decoder_args,
                tgt_dict,
                embed_tokens=TransformerModelBase.build_embedding(
                    decoder_args,
                    tgt_dict,
                    decoder_args.decoder_embed_dim,
                ),
            )
        elif args.decoder_type == "ctc":
            task_decoder = CTCDecoder(
                dictionary=tgt_dict,
                in_dim=in_dim,
            )
        else:
            raise NotImplementedError(
                "currently only support multitask decoder_type 'transformer', 'ctc'"
            )

        return task_decoder
Beispiel #12
0
    def __init__(self,args,dictionary):
        super().__init__()
        embedding_dim=768
        self.padding_idx=1

        self.dense = nn.Linear(embedding_dim, embedding_dim)
        self.layer_norm = LayerNorm(embedding_dim)
        init_bert_params(self.dense)
        self.encoder=TransformerSentenceEncoder(
                padding_idx=1,
                vocab_size=50265,
                num_encoder_layers=12,
                embedding_dim=768,
                ffn_embedding_dim=3072,
                num_attention_heads=12,
                dropout=0.1,
                attention_dropout=0.1,
                activation_dropout=0.0,
                layerdrop=0.0,
                max_seq_len=512,
                num_segments=0,
                encoder_normalize_before=True,
                apply_bert_init=True,
                activation_fn="gelu",
                q_noise=0.0,
                qn_block_size=8,
        )
        embed_tokens=self.encoder.embed_tokens
        self.lm_head = RobertaLMHead(
            embed_dim=embedding_dim,
            output_dim=50265,
            activation_fn="gelu",
            weight=embed_tokens.weight,
        )

        #args=base_architecture(args)

        if args.encoder_layers_to_keep:
            args.encoder_layers = len(args.encoder_layers_to_keep.split(","))
        if args.decoder_layers_to_keep:
            args.decoder_layers = len(args.decoder_layers_to_keep.split(","))

        if getattr(args, "max_source_positions", None) is None:
            args.max_source_positions = 512
        if getattr(args, "max_target_positions", None) is None:
            args.max_target_positions = 512
        print('???',embed_tokens.embedding_dim)

        self.decoder=TransformerDecoder(args, dictionary, embed_tokens, no_encoder_attn=getattr(args, "no_cross_attention", False))

        self.class_num=args.num_classes
        self.classification_heads = RobertaClassificationHead(
            768,
            768,
            self.class_num,
            'tanh',
            0.0,
            0.0,
            8,
        )
Beispiel #13
0
    def build_decoder(cls, args, task):
        _args = copy.deepcopy(args)
        _args.dropout = args.mbart_dropout
        _args.attention_dropout = args.mbart_attention_dropout
        _args.activation_dropout = args.mbart_activation_dropout
        _args.max_target_positions = 1024
        dec_emb = nn.Embedding(
            len(task.tgt_dict), _args.encoder_embed_dim, task.tgt_dict.pad()
        )
        decoder = TransformerDecoder(_args, task.tgt_dict, dec_emb)
        if getattr(args, "load_pretrained_mbart_from", None):
            decoder = checkpoint_utils.load_pretrained_component_from_model(
                component=decoder, checkpoint=args.load_pretrained_mbart_from
            )
        if getattr(args, "no_final_norm_decoder", False):
            decoder.layer_norm = None
        for k, p in decoder.named_parameters():
            # Freeze pretrained models by default
            if safe_hasattr(
                args, "finetune_mbart_decoder_params"
            ) and need_finetuning(
                args.finetune_mbart_decoder_params, k
            ):
                p.requires_grad = True
            else:
                p.requires_grad = False

        compute_cross_attentive_loss = (
            True if getattr(args, "attentive_cost_regularization", 0.0) > 0.0 else False
        )
        cross_attentive_loss_without_norm = getattr(
            args, "attentive_cost_without_normalize", False
        )
        cross_attentive_loss_reverse = (
            False  # getattr(args, "attentive_cost_reverse", False)
        )
        decoder = TransformerMultiInputDecoder(
            dictionary=task.target_dictionary,
            spch_decoder=decoder,
            text_decoder=decoder,
            compute_cross_attentive_loss=compute_cross_attentive_loss,
            cross_attentive_loss_with_norm=True
            if not cross_attentive_loss_without_norm
            else False,
            cross_attentive_loss_reverse=cross_attentive_loss_reverse,
        )
        return decoder
    def build_model(cls, args, task):
        mode = {
            e.split('=')[0]: e.split('=')[1] if len(e.split('=')) > 1 else None
            for e in args.user_mode.split(',')
        }
        if 'gated' in mode:
            tmodel = GatedTransformerModel.build_model(args, task)
        elif any([m in mode for m in ['decomposable', 'sep_lm', 'sep_lm1']]):
            tmodel = DecomposableTransformerModel.build_model(args, task)
        elif any([m in mode for m in ['attn_endorse', 'dbg_log_endorsement']]):
            tmodel = SimpleTransformerModel.build_model(
                args, task, DecoderModelLayer=UserTransformerDecoderLayer)
        else:
            tmodel = SimpleTransformerModel.build_model(args, task)

        model = DistantTransformerModel(tmodel)
        model.args = args
        model.user_mode = mode
        model.sampler_grad = SequenceGeneratorGrad(
            model.model.decoder.dictionary, beam_size=1, max_len_b=60)
        model.sampler = SequenceGenerator(model.model.decoder.dictionary,
                                          beam_size=1,
                                          max_len_b=60)
        model.decoder = ProxyDecoder(tmodel, model.user_mode, args, task,
                                     model.sampler_grad, model.sampler)
        model.encoder = ProxyEncoder(tmodel, model.user_mode, args, task,
                                     model.sampler_grad, model.sampler)
        tmodel.encoder.user_mode = mode
        tmodel.decoder.user_mode = mode
        if any([
                m in mode for m in [
                    'diff_lm', 'pretrain_lm', 'sep_lm', 'max_lm_margin',
                    'sep_lm2', 'sep_lm3'
                ]
        ]):
            model.lm = TransformerDecoder(args,
                                          tmodel.decoder.dictionary,
                                          tmodel.decoder.embed_tokens,
                                          no_encoder_attn=True)
            model.decoder.lm = model.lm
        if 'sep_lm3' in mode:
            tmodel.decoder.gate_fc1 = Linear(
                len(tmodel.decoder.dictionary) * 2,
                len(tmodel.decoder.dictionary))
            tmodel.decoder.gate_fc2 = Linear(len(tmodel.decoder.dictionary), 1)
        if any([m in mode for m in ['endorsement', 'rl_edm', 'beam_endorse']]):
            model.edm = EndorsementDetectorModel.build_model(args, task)
            model.decoder.edm = model.encoder.edm = model.edm
            model.encoder.edm.decoder.user_mode = model.encoder.edm.encoder.user_mode = mode
            if any([m in mode for m in ['self_align']]):
                model.self_edm = EndorsementDetectorModel.build_model(
                    args, task)
                model.decoder.self_edm = model.encoder.self_edm = model.self_edm
                model.encoder.self_edm.decoder.user_mode = model.encoder.self_edm.encoder.user_mode = mode

        return model
Beispiel #15
0
 def get_decoder(lang):
     if lang not in lang_decoders:
         if shared_decoder_embed_tokens is not None:
             decoder_embed_tokens = shared_decoder_embed_tokens
         else:
             decoder_embed_tokens = build_embedding(
                 task.dicts[lang], args.decoder_embed_dim, args.decoder_embed_path
             )
         lang_decoders[lang] = TransformerDecoder(args, task.dicts[lang], decoder_embed_tokens)
     return lang_decoders[lang]
Beispiel #16
0
    def build_model(cls, args, task):
        """Build a new model instance."""

        embed_tokens = cls.build_embedding(
            task.source_dictionary, args.decoder_input_dim
        )

        decoder = TransformerDecoder(
            args, task.target_dictionary, embed_tokens, no_encoder_attn=True
        )
        return cls(decoder)
Beispiel #17
0
    def build_model(cls, args, task):
        """Build a new model instance."""

        # make sure all arguments are present in older models
        base_lm_architecture(args)

        if hasattr(
                args,
                'no_tie_adaptive_proj') and args.no_tie_adaptive_proj is False:
            # backward compatibility
            args.tie_adaptive_proj = True

        if not hasattr(args, 'max_source_positions'):
            args.max_source_positions = args.tokens_per_sample
        if not hasattr(args, 'max_target_positions'):
            args.max_target_positions = args.tokens_per_sample

        if args.character_embeddings:
            embed_tokens = CharacterTokenEmbedder(
                task.dictionary,
                eval(args.character_filters),
                args.character_embedding_dim,
                args.decoder_embed_dim,
                args.char_embedder_highway_layers,
            )
        elif args.adaptive_input:
            embed_tokens = AdaptiveInput(
                len(task.dictionary),
                task.dictionary.pad(),
                args.decoder_input_dim,
                args.adaptive_input_factor,
                args.decoder_embed_dim,
                options.eval_str_list(args.adaptive_input_cutoff, type=int),
            )
        else:
            embed_tokens = Embedding(len(task.dictionary),
                                     args.decoder_input_dim,
                                     task.dictionary.pad())

        if args.tie_adaptive_weights:
            assert args.adaptive_input
            assert args.adaptive_input_factor == args.adaptive_softmax_factor
            assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format(
                args.adaptive_softmax_cutoff, args.adaptive_input_cutoff)
            assert args.decoder_input_dim == args.decoder_output_dim

        decoder = TransformerDecoder(
            args,
            task.output_dictionary,
            embed_tokens,
            no_encoder_attn=True,
            final_norm=False,
        )
        return TransformerLanguageModel(decoder)
Beispiel #18
0
    def build_model(cls, args, task):
        """Build a new model instance."""

        # make sure all arguments are present in older models
        base_lm_architecture(args)

        if hasattr(args, "decoder_layers_to_keep"):
            args.decoder_layers = len(args.decoder_layers_to_keep.split(","))

        if getattr(args, 'max_target_positions', None) is None:
            args.max_target_positions = getattr(args, 'tokens_per_sample',
                                                DEFAULT_MAX_TARGET_POSITIONS)

        if args.character_embeddings:
            embed_tokens = CharacterTokenEmbedder(
                task.source_dictionary,
                eval(args.character_filters),
                args.character_embedding_dim,
                args.decoder_embed_dim,
                args.char_embedder_highway_layers,
            )
        elif args.adaptive_input:
            print("Adaptive Input " + str(args.adaptive_input))
            print("Adaptive Cutoff: " + str(args.adaptive_input_cutoff))
            print("Vocab Size: " + str(len(task.source_dictionary.symbols)))
            embed_tokens = AdaptiveInput(
                len(task.source_dictionary),
                task.source_dictionary.pad(),
                args.decoder_input_dim,
                args.adaptive_input_factor,
                args.decoder_embed_dim,
                options.eval_str_list(args.adaptive_input_cutoff, type=int),
                args.quant_noise_pq,
                args.quant_noise_pq_block_size,
            )
        else:
            embed_tokens = cls.build_embedding(args, task.source_dictionary,
                                               args.decoder_input_dim)

        if args.tie_adaptive_weights:
            assert args.adaptive_input
            assert args.adaptive_input_factor == args.adaptive_softmax_factor
            assert args.adaptive_softmax_cutoff == args.adaptive_input_cutoff, '{} != {}'.format(
                args.adaptive_softmax_cutoff, args.adaptive_input_cutoff)
            assert args.decoder_input_dim == args.decoder_output_dim

        decoder = TransformerDecoder(
            args,
            task.target_dictionary,
            embed_tokens,
            no_encoder_attn=True,
        )
        return cls(decoder)
Beispiel #19
0
 def get_decoder(lang, lang_pair=None):
     if lang not in lang_decoders:
         if shared_decoder_embed_tokens is not None:
             decoder_embed_tokens = shared_decoder_embed_tokens
         elif args.share_all_langpair_embeddings:
             decoder_embed_tokens = lang_pair_embed[lang_pair]
         else:
             decoder_embed_tokens = build_embedding(
                 task.dicts[lang], args.decoder_embed_dim,
                 args.decoder_embed_path)
         lang_decoders[lang] = TransformerDecoder(
             args, task.dicts[lang], decoder_embed_tokens)
     return lang_decoders[lang]
Beispiel #20
0
    def build_decoder(cls, args, task, embed_tokens):
        _args = copy.deepcopy(args)
        _args.dropout = args.decoder_dropout
        _args.attention_dropout = args.decoder_attention_dropout
        _args.activation_dropout = args.decoder_activation_dropout
        _args.max_target_positions = 1024

        decoder = TransformerDecoder(_args, task.target_dictionary,
                                     embed_tokens)
        if getattr(args, "load_pretrained_decoder_from", None):
            decoder = checkpoint_utils.load_pretrained_component_from_model(
                component=decoder,
                checkpoint=args.load_pretrained_decoder_from)
        for k, p in decoder.named_parameters():
            # Freeze pretrained models by default
            if safe_hasattr(args, 'finetune_decoder_params'
                            ) and XMTransformerModel.finetune_params(
                                args.finetune_decoder_params, k):
                p.requires_grad = True
            else:
                p.requires_grad = False
        return decoder
    def build_model(cls, args, vocab_size):
        """Build a new model instance."""

        # make sure all arguments are present in older models
        base_lm_architecture(args)
        embed_tokens = Embedding(vocab_size, args.decoder_input_dim)
        decoder = TransformerDecoder(
            args,
            None,
            embed_tokens,
            no_encoder_attn=True,
        )
        return TransformerLanguageModel(decoder)
Beispiel #22
0
    def build_model(cls, args, task):
        """Build a new model instance."""

        # make sure all arguments are present in older models
        base_architecture(args)

        if not hasattr(args, 'max_source_positions'):
            args.max_source_positions = 100000
        if not hasattr(args, 'max_target_positions'):
            args.max_target_positions = 100000

        # This model requires a task that provides source dictionary and transcripts
        assert isinstance(task, SpeechTranslationCTCTask)

        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary

        def build_embedding(dictionary, embed_dim, path=None):
            num_embeddings = len(dictionary)
            padding_idx = dictionary.pad()
            emb = Embedding(num_embeddings, embed_dim, padding_idx)
            # if provided, load from preloaded dictionaries
            if path:
                embed_dict = utils.parse_embedding(path)
                utils.load_embedding(embed_dict, dictionary, emb)
            return emb

        target_embed_tokens = build_embedding(tgt_dict, args.decoder_embed_dim,
                                              args.decoder_embed_path)
        src_embed_tokens = build_embedding(src_dict, args.decoder_embed_dim,
                                           args.auxiliary_decoder_embed_path)
        encoder = ConvolutionalTransformerEncoder(
            args, tgt_dict, audio_features=args.input_feat_per_channel)
        decoder = TransformerDecoder(args, tgt_dict, target_embed_tokens)
        auxiliary_decoder = TransformerDecoder(args, src_dict,
                                               src_embed_tokens)
        return ConvolutionalTransformerDualDecoder(encoder, decoder,
                                                   auxiliary_decoder)
Beispiel #23
0
    def build_model(cls, args, task):
        """Build a new model instance."""

        if args.decoder_layers_to_keep:
            args.decoder_layers = len(args.decoder_layers_to_keep.split(","))

        if safe_getattr(args, "max_target_positions", None) is None:
            args.max_target_positions = safe_getattr(
                args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS
            )

        if args.character_embeddings:
            embed_tokens = CharacterTokenEmbedder(
                task.source_dictionary,
                eval(args.character_filters),
                args.character_embedding_dim,
                args.decoder_embed_dim,
                args.char_embedder_highway_layers,
            )
        elif args.adaptive_input:
            embed_tokens = AdaptiveInput(
                len(task.source_dictionary),
                task.source_dictionary.pad(),
                args.decoder_input_dim,
                args.adaptive_input_factor,
                args.decoder_embed_dim,
                options.eval_str_list(args.adaptive_input_cutoff, type=int),
                args.quant_noise_pq,
                args.quant_noise_pq_block_size,
            )
        else:
            embed_tokens = cls.build_embedding(
                args, task.source_dictionary, args.decoder_input_dim
            )

        if args.tie_adaptive_weights:
            assert args.adaptive_input
            assert args.adaptive_input_factor == args.adaptive_softmax_factor
            assert (
                args.adaptive_softmax_cutoff == args.adaptive_input_cutoff
            ), "{} != {}".format(
                args.adaptive_softmax_cutoff, args.adaptive_input_cutoff
            )
            assert args.decoder_input_dim == args.decoder_output_dim

        decoder = TransformerDecoder(
            args, task.target_dictionary, embed_tokens, no_encoder_attn=True
        )
        return cls(decoder)
 def _get_module_class(cls, is_encoder, args, lang_dict, embed_tokens, langs):
     if is_encoder:
         if hasattr(args, "encoder_latent_layer") and args.encoder_latent_layer:
             return LatentTransformerEncoder(
                 args, lang_dict, embed_tokens, num_logits=len(langs)
             )
         else:
             return TransformerEncoder(args, lang_dict, embed_tokens)
     else:
         if hasattr(args, "decoder_latent_layer") and args.decoder_latent_layer:
             return LatentTransformerDecoder(
                 args, lang_dict, embed_tokens, num_logits=len(langs)
             )
         else:
             return TransformerDecoder(args, lang_dict, embed_tokens)
Beispiel #25
0
 def build_decoder(cls, args, tgt_dict, embed_tokens):
     if args.add_topic_decoder:
         return TopicTransformerDecoder(
             args,
             tgt_dict,
             embed_tokens,
             no_encoder_attn=getattr(args, "no_cross_attention", False),
         )
     else:
         return TransformerDecoder(
             args,
             tgt_dict,
             embed_tokens,
             no_encoder_attn=getattr(args, "no_cross_attention", False),
         )
Beispiel #26
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted (in case there are any new ones)
        base_architecture(args)

        if not hasattr(args, 'max_source_positions'):
            args.max_source_positions = 4000
        if not hasattr(args, 'max_target_positions'):
            args.max_target_positions = 1024

        tgt_dict = task.target_dictionary

        def build_embedding(dictionary, embed_dim, path=None):
            num_embeddings = len(dictionary)
            padding_idx = dictionary.pad()
            emb = Embedding(num_embeddings, embed_dim, padding_idx)
            # if provided, load from preloaded dictionaries
            if path:
                embed_dict = utils.parse_embedding(path)
                utils.load_embedding(embed_dict, dictionary, emb)
            return emb

        if args.decoder_embed_path:
            raise NotImplementedError(
                "Pretrained embedding not available with Ber2Transf yet")
        else:
            decoder_embed_tokens = build_embedding(tgt_dict,
                                                   args.decoder_embed_dim,
                                                   args.decoder_embed_path)

        encoder = ProxyEncoder(
            linear_dim=args.encoder_embed_dim,
            convolutions=eval(args.encoder_convolutions),
            layers=args.encoder_layers,
            dropout=args.encoder_dropout,
            max_positions=args.max_source_positions,
            normalization_constant=args.normalization_constant,
            weight_norm=args.weight_norm,
            audio_features=task.audio_features,
        )
        args.dropout = args.decoder_dropout
        decoder = TransformerDecoder(
            args,
            tgt_dict,
            decoder_embed_tokens,
        )

        return Ber2Transf(encoder, decoder)
    def __init__(
        self,
        args,
        dictionary,
        embed_tokens,
        no_encoder_attn_phase1=None,
        no_encoder_decoder_attn_phase2=None,
    ):
        super().__init__(dictionary)
        self.register_buffer("version", torch.Tensor([3]))

        self.decoder_phase1 = TransformerDecoder(args, dictionary,
                                                 embed_tokens,
                                                 no_encoder_attn_phase1)
        self.decoder_phase2 = TransformerDecoderPhase2(
            args, dictionary, embed_tokens, no_encoder_decoder_attn_phase2)
Beispiel #28
0
    def build_model(cls, args, task: SemparseClassificationTask):
        # Fairseq initializes models by calling the ``build_model()``
        # function. This provides more flexibility, since the returned model
        # instance can be of a different type than the one that was called.
        # In this case we'll just return a SimpleLSTMModel instance.

        # Initialize our Encoder and Decoder.
        xlmr = XlmrEncoder(task.xlmr, task.max_positions()[0])
        # encoder = LSTMEncoder(
        #     dictionary=task.source_dictionary,
        #     pretrained_embed=xlmr,
        #     embed_dim=args.xlmr_out_dim,
        #     hidden_size=args.decoder_hidden_dim,
        #     dropout_in=args.decoder_dropout,
        #     dropout_out=args.decoder_dropout
        # )

        from fairseq.models.transformer_from_pretrained_xlm import TransformerDecoderFromPretrainedXLM
        dictionary = task.output_vocab
        num_embeddings = len(dictionary)
        padding_idx = dictionary.pad()
        embed_tokens = Embedding(num_embeddings, args.decoder_embed_dim,
                                 padding_idx)
        decoder = TransformerDecoder(args, dictionary, embed_tokens)

        # decoder = LSTMDecoder(
        #     dictionary=task.target_dictionary,
        #     encoder_output_units=args.encoder_hidden_dim,
        #     embed_dim=args.decoder_embed_dim,
        #     hidden_size=args.decoder_hidden_dim,
        #     dropout_in=args.decoder_dropout,
        #     dropout_out=args.decoder_dropout
        # )
        model = XlmrTransformerEncoderDecoder(xlmr, decoder)

        # Print the model architecture.
        print(model)

        return model
Beispiel #29
0
    def build_model(cls, args, task):
        """Build a new model instance."""

        # make sure all arguments are present
        base_architecture(args)

        if not hasattr(args, 'max_positions'):
            args.max_positions = args.tokens_per_sample

        if getattr(args, "max_source_positions", None) is None:
            args.max_source_positions = 512
        if getattr(args, "max_target_positions", None) is None:
            args.max_target_positions = 512

        encoder = RobertaEncoder(args, task.source_dictionary)
        decoder = TransformerDecoder(args,
                                     task.source_dictionary,
                                     encoder.sentence_encoder.embed_tokens,
                                     no_encoder_attn=getattr(
                                         args, "no_cross_attention", False))

        return cls(args, encoder, decoder)
Beispiel #30
0
def build_decoder(vocab,
                  tokens_embeddings,
                  ffn_dim=32,
                  layers=2,
                  attention_heads=2):

    args = Namespace(**FAIRSEQ_DEFAULT_ARGS)

    d = tokens_embeddings.embedding_dim
    args.share_all_embeddings = True

    args.encoder_embed_dim = d

    args.decoder_embed_dim = d
    args.decoder_ffn_embed_dim = ffn_dim
    args.decoder_attention_heads = attention_heads
    args.decoder_layers = layers

    args.tie_adaptive_weights = False
    args.decoder_output_dim = d
    args.decoder_input_dim = d

    return TransformerDecoder(args, vocab, tokens_embeddings)