Esempio n. 1
0
    def __init__(self, config: BartConfig, embed_tokens):
        super().__init__(config, embed_tokens)

        self.dropout = config.dropout
        self.layerdrop = config.encoder_layerdrop
        self.visual = None

        embed_dim = embed_tokens.embedding_dim
        self.embed_scale = math.sqrt(
            embed_dim) if config.scale_embedding else 1.0
        self.padding_idx = embed_tokens.padding_idx
        self.max_source_positions = config.max_position_embeddings

        self.embed_tokens = embed_tokens
        if config.static_position_embeddings:
            self.embed_positions = SinusoidalPositionalEmbedding(
                config.max_position_embeddings, embed_dim, self.padding_idx)
        else:
            self.embed_positions = LearnedPositionalEmbedding(
                config.max_position_embeddings,
                embed_dim,
                self.padding_idx,
                config.extra_pos_embeddings,
            )
        self.layers = nn.ModuleList(
            [EncoderLayer(config) for _ in range(config.encoder_layers)])
        self.layernorm_embedding = LayerNorm(
            embed_dim) if config.normalize_embedding else nn.Identity()
        # mbart has one extra layer_norm
        self.layer_norm = LayerNorm(
            config.d_model) if config.normalize_before else None
Esempio n. 2
0
 def __init__(self, config: BartConfig, embed_tokens: nn.Embedding):
     super().__init__()
     self.output_attentions = config.output_attentions
     self.output_hidden_states = config.output_hidden_states
     self.dropout = config.dropout
     self.layerdrop = config.decoder_layerdrop
     self.padding_idx = embed_tokens.padding_idx
     self.max_target_positions = config.max_position_embeddings
     self.embed_scale = math.sqrt(
         config.d_model) if config.scale_embedding else 1.0
     self.embed_tokens = embed_tokens
     if config.static_position_embeddings:
         self.embed_positions = SinusoidalPositionalEmbedding(
             config.max_position_embeddings, config.d_model,
             config.pad_token_id)
     else:
         self.embed_positions = LearnedPositionalEmbedding(
             config.max_position_embeddings,
             config.d_model,
             self.padding_idx,
         )
     self.layers = nn.ModuleList([
         DecoderLayer(config) for _ in range(config.decoder_layers)
     ])  # type: List[DecoderLayer]
     self.layernorm_embedding = LayerNorm(
         config.d_model) if config.normalize_embedding else nn.Identity()
     self.layer_norm = LayerNorm(
         config.d_model) if config.add_final_layer_norm else None
Esempio n. 3
0
    def __init__(self, config: BartConfig):
        super().__init__()
        self.embed_dim = config.d_model
        self.output_attentions = config.output_attentions
        self.self_attn = SelfAttention(
            embed_dim=self.embed_dim,
            num_heads=config.decoder_attention_heads,
            dropout=config.attention_dropout,
        )
        self.dropout = config.dropout
        self.activation_fn = ACT2FN[config.activation_function]
        self.activation_dropout = config.activation_dropout
        self.normalize_before = config.normalize_before

        self.self_attn_layer_norm = LayerNorm(self.embed_dim)
        self.encoder_attn = SelfAttention(
            self.embed_dim,
            config.decoder_attention_heads,
            dropout=config.attention_dropout,
            encoder_decoder_attention=True,
        )
        self.encoder_attn_layer_norm = LayerNorm(self.embed_dim)
        self.fc1 = nn.Linear(self.embed_dim, config.decoder_ffn_dim)
        self.fc2 = nn.Linear(config.decoder_ffn_dim, self.embed_dim)
        self.final_layer_norm = LayerNorm(self.embed_dim)
Esempio n. 4
0
    def __init__(self, config, embed_tokens):
        super().__init__()
        self.config = config

        self.dropout = config.dropout
        self.embed_tokens = embed_tokens

        self.embed_synt = nn.Embedding(77, config.d_model, config.pad_token_id)
        self.embed_synt.weight.data.normal_(mean=0.0, std=config.init_std)
        self.embed_synt.weight.data[config.pad_token_id].zero_()

        self.embed_positions = LearnedPositionalEmbedding(
            config.max_position_embeddings, config.d_model,
            config.pad_token_id, config.extra_pos_embeddings)

        self.layers = nn.ModuleList(
            [EncoderLayer(config) for _ in range(config.encoder_layers)])
        self.synt_layers = nn.ModuleList(
            [EncoderLayer(config) for _ in range(1)])

        self.layernorm_embedding = LayerNorm(config.d_model)

        self.synt_layernorm_embedding = LayerNorm(config.d_model)

        self.pooling = MeanPooling(config)
Esempio n. 5
0
    def __init__(self, config, embed_tokens):
        super().__init__()

        self.dropout = config.dropout

        self.embed_tokens = embed_tokens

        self.embed_positions = LearnedPositionalEmbedding(
            config.max_position_embeddings, config.d_model,
            config.pad_token_id, config.extra_pos_embeddings)

        self.layers = nn.ModuleList([DecoderLayer(config) for _ in range(1)])
        self.layernorm_embedding = LayerNorm(config.d_model)
Esempio n. 6
0
 def __init__(self, config):
     super().__init__()
     self.sent_layernorm_embedding = LayerNorm(config.d_model,
                                               elementwise_affine=False)
     self.adv = nn.Linear(config.d_model, 74)