def __init__(self, h, d_model, d_per, d_ff, dropout): super(BlockLayer, self).__init__() self.self_attn = MultiHeadedAttention( h, d_model, d_per, dropout) # multi-head layer of block self.feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout) # FFN of block self.sublayer = clones(SublayerConnection(d_model, dropout), 2) # Add and Layer Norm
def __init__(self, d_model, d_inner_hid, n_head, d_k, d_v, dropout=0.1): super(EncoderLayer, self).__init__() self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner_hid, dropout=dropout)
def __init__(self, d_model, d_inner, n_heads, d_k, d_v, dropout): super().__init__() self.slf_attention = Mutlti_Head_Attention(n_head=n_heads, d_model=d_model, d_k=d_k, d_v=d_v, dropout=dropout) self.fw = PositionwiseFeedForward(d_in=d_model, d_hid=d_inner, dropout=dropout)
def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.2): super().__init__() self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout) self.layer_norm = nn.LayerNorm(d_model) self.dropout = nn.Dropout(dropout)
def __init__(self, emb_dim, fnn_dim, dropout=0.1): super(SelfAttentionBlock, self).__init__() self.self_attention = SelfAttention(emb_dim, dropout=dropout) self.ffn = PositionwiseFeedForward(emb_dim, fnn_dim, dropout=dropout)