Esempio n. 1
0
    def __init__(self,
                 embed_dim,
                 num_heads,
                 layers,
                 src_attn_dropout=0.0,
                 relu_dropout=0.0,
                 res_dropout=0.0,
                 tgt_attn_dropout=0.0,
                 crossmodal=None):
        super().__init__()
        self.dropout = 0  # Embedding dropout
        # self.attn_dropout = attn_dropout
        self.embed_dim = embed_dim
        self.embed_scale = 1
        # self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = SinusoidalPositionalEmbedding(embed_dim)

        # self.crossmodal = crossmodal

        # self.attn_mask = attn_mask

        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerDecoderLayer(embed_dim=embed_dim,
                                    num_heads=num_heads,
                                    src_attn_dropout=src_attn_dropout,
                                    relu_dropout=relu_dropout,
                                    res_dropout=res_dropout,
                                    tgt_attn_dropout=tgt_attn_dropout)
            for _ in range(layers)
        ])
        self.register_buffer('version', torch.Tensor([2]))
    def __init__(self,
                 embed_dim,
                 num_heads,
                 layers,
                 attn_dropout=0.0,
                 relu_dropout=0.0,
                 res_dropout=0.0,
                 embed_dropout=0.0,
                 attn_mask=False):
        super().__init__()
        self.dropout = embed_dropout  # Embedding dropout
        self.attn_dropout = attn_dropout
        self.embed_dim = embed_dim
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = SinusoidalPositionalEmbedding(embed_dim)

        self.attn_mask = attn_mask

        self.layers = nn.ModuleList([])
        for layer in range(layers):
            new_layer = TransformerEncoderLayer(embed_dim,
                                                num_heads=num_heads,
                                                attn_dropout=attn_dropout,
                                                relu_dropout=relu_dropout,
                                                res_dropout=res_dropout,
                                                attn_mask=attn_mask)
            self.layers.append(new_layer)

        self.register_buffer('version', torch.Tensor([2]))
        self.normalize = True
        if self.normalize:
            self.layer_norm = LayerNorm(embed_dim)
Esempio n. 3
0
 def __init__(self,
              embed_dim,
              num_heads,
              layers,
              attn_dropout,
              relu_dropout,
              res_dropout,
              attn_mask=False):
     super().__init__()
     self.dropout = 0.3  # Embedding dropout
     self.attn_dropout = attn_dropout
     self.embed_dim = embed_dim
     self.embed_scale = 1
     self.embed_positions = SinusoidalPositionalEmbedding(embed_dim)
     self.attn_mask = attn_mask
     self.layers = nn.ModuleList([])
     self.layers.extend([
         TransformerEncoderLayer(embed_dim=embed_dim,
                                 num_heads=num_heads,
                                 attn_dropout=attn_dropout,
                                 relu_dropout=relu_dropout,
                                 res_dropout=res_dropout,
                                 attn_mask=attn_mask) for _ in range(layers)
     ])
     self.register_buffer('version', torch.Tensor([2]))
Esempio n. 4
0
 def __init__(self,
              embed_dim,
              num_heads,
              layers,
              src_attn_dropout=0.0,
              relu_dropout=0.0,
              res_dropout=0.0,
              tgt_attn_dropout=0.0):
     super().__init__()
     self.dropout = 0.3
     self.embed_dim = embed_dim
     self.embed_scale = 1
     self.embed_positions = SinusoidalPositionalEmbedding(embed_dim)
     self.layers = nn.ModuleList([])
     self.layers.extend([
         TransformerConcatDecoderLayer(embed_dim=embed_dim,
                                       num_heads=num_heads,
                                       src_attn_dropout=src_attn_dropout,
                                       relu_dropout=relu_dropout,
                                       res_dropout=res_dropout,
                                       tgt_attn_dropout=tgt_attn_dropout)
         for _ in range(layers)
     ])
     self.register_buffer('version', torch.Tensor([2]))
Esempio n. 5
0
class TransformerEncoder(nn.Module):
    """
    Transformer encoder consisting of *args.encoder_layers* layers. Each layer
    is a :class:`TransformerEncoderLayer`.
    Args:
        embed_tokens (torch.nn.Embedding): input embedding
        num_heads (int): number of heads
        layers (int): number of layers
        attn_dropout (float): dropout applied on the attention weights
        relu_dropout (float): dropout applied on the first layer of the residual block
        res_dropout (float): dropout applied on the residual block
        attn_mask (bool): whether to apply mask on the attention weights
        crossmodal (boo): whether we do cross-modal transformer or not
    """
    def __init__(self,
                 embed_dim,
                 num_heads,
                 layers,
                 attn_dropout=0.0,
                 relu_dropout=0.0,
                 res_dropout=0.0,
                 attn_mask=False,
                 crossmodal=None):
        super().__init__()
        self.dropout = 0  # Embedding dropout
        self.attn_dropout = attn_dropout
        self.embed_dim = embed_dim
        self.embed_scale = 1
        # self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = SinusoidalPositionalEmbedding(embed_dim)

        self.crossmodal = crossmodal

        self.attn_mask = attn_mask

        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerEncoderLayer(embed_dim=embed_dim,
                                    num_heads=num_heads,
                                    attn_dropout=attn_dropout,
                                    relu_dropout=relu_dropout,
                                    res_dropout=res_dropout,
                                    attn_mask=attn_mask) for _ in range(layers)
        ])
        self.register_buffer('version', torch.Tensor([2]))

    def forward(self, input_A, input_B):
        """
        Args:
            x_in (FloatTensor): embedded input of shape `(src_len, batch, embed_dim)`
            x_in_k (FloatTensor): embedded input of shape `(src_len, batch, embed_dim) if cross-modal`
            x_in_v (FloatTensor): embedded input of shape `(src_len, batch, embed_dim) if cross-modal`
        Returns:
            dict:
                - **encoder_out** (Tensor): the last encoder layer's output of
                  shape `(src_len, batch, embed_dim)`
                - **encoder_padding_mask** (ByteTensor): the positions of
                  padding elements of shape `(batch, src_len)`
        """
        input_A = self.scale_embed_position_dropout(input_A)
        input_B = self.scale_embed_position_dropout(input_B)
        # For each transformer encoder layer:
        for layer in self.layers:
            input_A, input_B = layer(input_A, input_B)
        return input_A, input_B

    def max_positions(self):
        """Maximum input length supported by the encoder."""
        if self.embed_positions is None:
            return self.max_source_positions
        return min(self.max_source_positions,
                   self.embed_positions.max_positions())

    def scale_embed_position_dropout(self, x_in):
        x = self.embed_scale * x_in
        if self.embed_positions is not None:
            x += self.embed_positions(x_in.transpose(0, 1)[:, :,
                                                           0]).transpose(0, 1)
        x = F.dropout(x, p=self.dropout, training=self.training)  # may change
        return x
class TransformerEncoder(nn.Module):
    """
    Transformer encoder consisting of *args.encoder_layers* layers. Each layer
    is a :class:`TransformerEncoderLayer`.
    Args:
        embed_tokens (torch.nn.Embedding): input embedding
        num_heads (int): number of heads
        layers (int): number of layers
        attn_dropout (float): dropout applied on the attention weights
        relu_dropout (float): dropout applied on the first layer of the residual block
        res_dropout (float): dropout applied on the residual block
        attn_mask (bool): whether to apply mask on the attention weights
    """
    def __init__(self,
                 embed_dim,
                 num_heads,
                 layers,
                 attn_dropout=0.0,
                 relu_dropout=0.0,
                 res_dropout=0.0,
                 embed_dropout=0.0,
                 attn_mask=False):
        super().__init__()
        self.dropout = embed_dropout  # Embedding dropout
        self.attn_dropout = attn_dropout
        self.embed_dim = embed_dim
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = SinusoidalPositionalEmbedding(embed_dim)

        self.attn_mask = attn_mask

        self.layers = nn.ModuleList([])
        for layer in range(layers):
            new_layer = TransformerEncoderLayer(embed_dim,
                                                num_heads=num_heads,
                                                attn_dropout=attn_dropout,
                                                relu_dropout=relu_dropout,
                                                res_dropout=res_dropout,
                                                attn_mask=attn_mask)
            self.layers.append(new_layer)

        self.register_buffer('version', torch.Tensor([2]))
        self.normalize = True
        if self.normalize:
            self.layer_norm = LayerNorm(embed_dim)

    def forward(self, x_in, x_in_k=None, x_in_v=None):
        """
        Args:
            x_in (FloatTensor): embedded input of shape `(src_len, batch, embed_dim)`
            x_in_k (FloatTensor): embedded input of shape `(src_len, batch, embed_dim)`
            x_in_v (FloatTensor): embedded input of shape `(src_len, batch, embed_dim)`
        Returns:
            dict:
                - **encoder_out** (Tensor): the last encoder layer's output of
                  shape `(src_len, batch, embed_dim)`
                - **encoder_padding_mask** (ByteTensor): the positions of
                  padding elements of shape `(batch, src_len)`
        """
        # embed tokens and positions
        x = self.embed_scale * x_in
        if self.embed_positions is not None:
            x += self.embed_positions(x_in.transpose(0, 1)[:, :, 0]).transpose(
                0, 1)  # Add positional embedding
        x = F.dropout(x, p=self.dropout, training=self.training)

        if x_in_k is not None and x_in_v is not None:
            # embed tokens and positions
            x_k = self.embed_scale * x_in_k
            x_v = self.embed_scale * x_in_v
            if self.embed_positions is not None:
                x_k += self.embed_positions(x_in_k.transpose(
                    0, 1)[:, :, 0]).transpose(0, 1)  # Add positional embedding
                x_v += self.embed_positions(x_in_v.transpose(
                    0, 1)[:, :, 0]).transpose(0, 1)  # Add positional embedding
            x_k = F.dropout(x_k, p=self.dropout, training=self.training)
            x_v = F.dropout(x_v, p=self.dropout, training=self.training)

        # encoder layers
        intermediates = [x]
        for layer in self.layers:
            if x_in_k is not None and x_in_v is not None:
                x = layer(x, x_k, x_v)
            else:
                x = layer(x)
            intermediates.append(x)

        if self.normalize:
            x = self.layer_norm(x)

        return x

    def max_positions(self):
        """Maximum input length supported by the encoder."""
        if self.embed_positions is None:
            return self.max_source_positions
        return min(self.max_source_positions,
                   self.embed_positions.max_positions())