def __build_transformer_decoder(cls, args: argparse.Namespace, tgt_dict: Dictionary): decoder_embed_tokens = cls.__build_embedding(tgt_dict, args.decoder_embed_dim) decoder = transformer.TransformerDecoder(args, tgt_dict, decoder_embed_tokens) return decoder
def do_build_decoder(cls, args, captions_dict): decoder_embedding = transformer.Embedding( num_embeddings=len(captions_dict), embedding_dim=args.decoder_embed_dim, padding_idx=captions_dict.pad()) return transformer.TransformerDecoder(args, captions_dict, decoder_embedding)
def __init__( self, embed_dim, attention_heads, ffn_embed_dim, num_layers, embedding_layer, # torch.nn.Embedding. Must have a padding_idx field dropout=0, normalize_before=False, torch_encoder=None, # torch encoder that you can map weights from activation="relu", ): super().__init__() cfg = fairseq_transformer.TransformerConfig() cfg.decoder.embed_dim = embed_dim cfg.decoder.output_dim = embed_dim cfg.decoder.attention_heads = attention_heads cfg.decoder.ffn_embed_dim = ffn_embed_dim cfg.dropout = dropout cfg.decoder.normalize_before = normalize_before cfg.decoder.layers = num_layers # make embedding behavior same as other encoders cfg.no_token_positional_embeddings = True cfg.no_scale_embedding = True cfg.activation_fn = activation dictionary = {} # TODO: verify what this is self.decoder = fairseq_transformer.TransformerDecoder( cfg, dictionary, embedding_layer, no_encoder_attn=True, output_projection=None, ) if torch_encoder is not None: self.decoder = torch_to_fairseq(torch_encoder, self.decoder) self.decoder = self.decoder.eval().cuda().half()