Ejemplo n.º 1
0
    def __init__(self, d_model, d_ff, n_heads, dropout, dropout_att,
                 dropout_layer, layer_norm_eps, ffn_activation, param_init):
        super(SyncBidirTransformerDecoderBlock, self).__init__()

        self.n_heads = n_heads

        # synchronous bidirectional attention
        self.norm1 = nn.LayerNorm(d_model, eps=layer_norm_eps)
        from neural_sp.models.modules.sync_bidir_multihead_attention import SyncBidirMultiheadAttentionMechanism as SyncBidirMHA
        self.self_attn = SyncBidirMHA(kdim=d_model,
                                      qdim=d_model,
                                      adim=d_model,
                                      n_heads=n_heads,
                                      dropout=dropout_att,
                                      param_init=param_init)

        # attention over encoder stacks
        self.norm2 = nn.LayerNorm(d_model, eps=layer_norm_eps)
        self.src_attn = MHA(kdim=d_model,
                            qdim=d_model,
                            adim=d_model,
                            n_heads=n_heads,
                            dropout=dropout_att,
                            param_init=param_init)

        # feed-forward
        self.norm3 = nn.LayerNorm(d_model, eps=layer_norm_eps)
        self.feed_forward = FFN(d_model, d_ff, dropout, ffn_activation,
                                param_init)

        self.dropout = nn.Dropout(p=dropout)
Ejemplo n.º 2
0
    def __init__(self, d_model, d_ff, n_heads, dropout, dropout_att,
                 dropout_residual, layer_norm_eps, ffn_activation, param_init):
        super(SyncBidirTransformerDecoderBlock, self).__init__()

        self.n_heads = n_heads

        # synchronous bidirectional attention
        self.norm1 = nn.LayerNorm(d_model, eps=layer_norm_eps)
        self.self_attn = SyncBidirMHA(kdim=d_model,
                                      qdim=d_model,
                                      adim=d_model,
                                      n_heads=n_heads,
                                      dropout=dropout_att,
                                      param_init=param_init)

        # attention over encoder stacks
        self.norm2 = nn.LayerNorm(d_model, eps=layer_norm_eps)
        self.src_attn = MHA(kdim=d_model,
                            qdim=d_model,
                            adim=d_model,
                            n_heads=n_heads,
                            dropout=dropout_att,
                            param_init=param_init)

        # feed-forward
        self.norm3 = nn.LayerNorm(d_model, eps=layer_norm_eps)
        self.feed_forward = FFN(d_model, d_ff, dropout, ffn_activation,
                                param_init)

        self.dropout = nn.Dropout(p=dropout)
        self.death_rate = dropout_residual