def __init__(self, d_model, d_ff, cov_kernel_size, n_heads, slf_attn_dropout=0.0, ffn_dropout=0.0, residual_dropout=0.1, conv_dropout=0.0, macaron_style=True, conv_first=False, ffn_scale=0.5, conv_bias=True, relative_positional=True, activation='glu'): super(ConformerEncoderBlock, self).__init__() self.conv_first = conv_first self.macaron_style = macaron_style self.ffn_scale = ffn_scale self.relative_positional = relative_positional self.residual_dropout = residual_dropout if self.macaron_style: self.pre_ffn = PositionwiseFeedForward(d_model, d_ff, ffn_dropout, activation=activation) self.macaron_ffn_norm = nn.LayerNorm(d_model) if self.relative_positional: self.mha = MultiHeadedSelfAttentionWithRelPos(n_heads, d_model, slf_attn_dropout) else: self.mha = MultiHeadedSelfAttention(n_heads, d_model, slf_attn_dropout) self.mha_norm = nn.LayerNorm(d_model) self.conv = ConformerConvolutionModule(d_model, cov_kernel_size, conv_bias, conv_dropout) self.conv_norm = nn.LayerNorm(d_model) self.post_ffn = PositionwiseFeedForward(d_model, d_ff, ffn_dropout, activation=activation) self.post_ffn_norm = nn.LayerNorm(d_model) self.final_norm = nn.LayerNorm(d_model)
def __init__(self, n_heads, d_model, d_ff, memory_dim, slf_attn_dropout=0.0, src_attn_dropout=0.0, ffn_dropout=0.0, residual_dropout=0.1, normalize_before=False, concat_after=False, relative_positional=False, activation='relu'): super(TransformerDecoderLayer, self).__init__() self.relative_positional = relative_positional if self.relative_positional: self.slf_attn = MultiHeadedSelfAttentionWithRelPos(n_heads, d_model, slf_attn_dropout) else: self.slf_attn = MultiHeadedSelfAttention(n_heads, d_model, slf_attn_dropout) self.src_attn = MultiHeadedCrossAttention(n_heads, d_model, memory_dim, src_attn_dropout) self.feed_forward = PositionwiseFeedForward(d_model, d_ff, ffn_dropout, activation) self.norm1 = nn.LayerNorm(d_model) self.norm2 = nn.LayerNorm(d_model) self.norm3 = nn.LayerNorm(d_model) self.dropout1 = nn.Dropout(residual_dropout) self.dropout2 = nn.Dropout(residual_dropout) self.dropout3 = nn.Dropout(residual_dropout) self.normalize_before = normalize_before self.concat_after = concat_after if self.concat_after: self.concat_linear1 = nn.Linear(d_model * 2, d_model) self.concat_linear2 = nn.Linear(d_model * 2, d_model)