def __init__(self, d_model, heads, d_ff, dropout):
        super(TransformerEncoderLayer, self).__init__()

        self.self_attn = onmt.sublayer.MultiHeadedAttention(heads,
                                                            d_model,
                                                            dropout=dropout)

        self.feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout)

        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
        self.dropout = nn.Dropout(dropout)
Esempio n. 2
0
  def __init__(self, d_model, heads, d_ff, dropout):
    super(TransformerEncoderLayer, self).__init__()
    self.self_attn = onmt.sublayer.MultiHeadedAttention(heads, d_model, dropout=dropout)
    
    self.cnn = nn.Conv1d(64, 64, 4)

    self.feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout)   #d_ff (int): the hidden layer size of the second-layer of the FNN.
    self.att_layer_norm = nn.LayerNorm(d_model, eps=1e-6)
    self.ffn_layer_norm = nn.LayerNorm(d_model, eps=1e-6)   #FeedForwardnorm
    self.structure_layer_norm = nn.LayerNorm(64, eps=1e-6)
    self.dropout = nn.Dropout(dropout)
    def __init__(self, d_model, heads, d_ff, dropout):
        super(TransformerDecoderLayer, self).__init__()

        self.self_attn = onmt.sublayer.MultiHeadedAttention(
            heads, d_model, dropout=dropout)

        self.context_attn = onmt.sublayer.MultiHeadedAttention(
            heads, d_model, dropout=dropout)

        self.feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout)

        self.layer_norm_1 = nn.LayerNorm(d_model, eps=1e-6)
        self.layer_norm_2 = nn.LayerNorm(d_model, eps=1e-6)

        self.dropout = dropout
        self.drop = nn.Dropout(dropout)
        mask = self._get_attn_subsequent_mask(MAX_SIZE)
        # Register self.mask as a buffer in TransformerDecoderLayer, so
        # it gets TransformerDecoderLayer's cuda behavior automatically.
        self.register_buffer('mask', mask)