Exemple #1
0
 def __init__(
     self,
     hidden_size,
     inner_size,
     num_attention_heads=1,
     attn_score_dropout=0,
     attn_layer_dropout=0,
     ffn_dropout=0,
     hidden_act="relu",
     pre_ln=False,
 ):
     super().__init__()
     self.pre_ln = pre_ln
     self.layer_norm_1 = nn.LayerNorm(hidden_size, eps=1e-5)
     self.first_sub_layer = MultiHeadAttention(hidden_size,
                                               num_attention_heads,
                                               attn_score_dropout,
                                               attn_layer_dropout)
     self.layer_norm_2 = nn.LayerNorm(hidden_size, eps=1e-5)
     self.second_sub_layer = MultiHeadAttention(hidden_size,
                                                num_attention_heads,
                                                attn_score_dropout,
                                                attn_layer_dropout)
     self.layer_norm_3 = nn.LayerNorm(hidden_size, eps=1e-5)
     self.third_sub_layer = PositionWiseFF(hidden_size, inner_size,
                                           ffn_dropout, hidden_act)
Exemple #2
0
    def __init__(
        self,
        hidden_size,
        inner_size,
        num_attention_heads=1,
        attn_score_dropout=0,
        attn_layer_dropout=0,
        ffn_dropout=0,
        hidden_act="relu",
    ):
        super().__init__()

        self.first_sub_layer = MultiHeadAttention(
            hidden_size, num_attention_heads, attn_score_dropout, attn_layer_dropout
        )
        self.second_sub_layer = PositionWiseFF(hidden_size, inner_size, ffn_dropout, hidden_act)