Example #1
0
    def __init__(self,
                 attention_cell='multi_head',
                 units=128,
                 hidden_size=512,
                 num_heads=4,
                 scaled=True,
                 dropout=0.0,
                 use_residual=True,
                 output_attention=False,
                 weight_initializer=None,
                 bias_initializer='zeros',
                 prefix=None,
                 params=None):
        super(TransformerDecoderCell, self).__init__(prefix=prefix,
                                                     params=params)
        self._units = units
        self._num_heads = num_heads
        self._dropout = dropout
        self._use_residual = use_residual
        self._output_attention = output_attention
        self._scaled = scaled
        with self.name_scope():
            self.dropout_layer = nn.Dropout(dropout)
            self.attention_cell_in = _get_attention_cell(attention_cell,
                                                         units=units,
                                                         num_heads=num_heads,
                                                         scaled=scaled,
                                                         dropout=dropout)
            self.attention_cell_inter = _get_attention_cell(
                attention_cell,
                units=units,
                num_heads=num_heads,
                scaled=scaled,
                dropout=dropout)
            self.proj_in = nn.Dense(units=units,
                                    flatten=False,
                                    use_bias=False,
                                    weight_initializer=weight_initializer,
                                    bias_initializer=bias_initializer,
                                    prefix='proj_in_')
            self.proj_inter = nn.Dense(units=units,
                                       flatten=False,
                                       use_bias=False,
                                       weight_initializer=weight_initializer,
                                       bias_initializer=bias_initializer,
                                       prefix='proj_inter_')
            self.ffn = PositionwiseFFN(hidden_size=hidden_size,
                                       units=units,
                                       use_residual=use_residual,
                                       dropout=dropout,
                                       weight_initializer=weight_initializer,
                                       bias_initializer=bias_initializer)

            self.layer_norm_in = nn.LayerNorm()
            self.layer_norm_inter = nn.LayerNorm()
Example #2
0
 def __init__(self, cell_type='lstm', attention_cell='scaled_luong',
              num_layers=2, hidden_size=128,
              dropout=0.0, use_residual=True, output_attention=False,
              i2h_weight_initializer=None, h2h_weight_initializer=None,
              i2h_bias_initializer='zeros', h2h_bias_initializer='zeros',
              prefix=None, params=None):
     super(GNMTDecoder, self).__init__(prefix=prefix, params=params)
     self._cell_type = _get_cell_type(cell_type)
     self._num_layers = num_layers
     self._hidden_size = hidden_size
     self._dropout = dropout
     self._use_residual = use_residual
     self._output_attention = output_attention
     with self.name_scope():
         self.attention_cell = _get_attention_cell(attention_cell, units=hidden_size)
         self.dropout_layer = nn.Dropout(dropout)
         self.rnn_cells = nn.HybridSequential()
         for i in range(num_layers):
             self.rnn_cells.add(
                 self._cell_type(hidden_size=self._hidden_size,
                                 i2h_weight_initializer=i2h_weight_initializer,
                                 h2h_weight_initializer=h2h_weight_initializer,
                                 i2h_bias_initializer=i2h_bias_initializer,
                                 h2h_bias_initializer=h2h_bias_initializer,
                                 prefix='rnn%d_' % i))
Example #3
0
 def __init__(self,
              conv_channels,
              embed_dim,
              normalization_constant=0.5,
              attention_cell='dot',
              weight_initializer=None,
              bias_initializer='zeros',
              prefix=None,
              params=None):
     super(FConvAttentionLayer, self).__init__(prefix=prefix, params=params)
     self._normalization_constant = normalization_constant
     # projects from output of convolution to embedding dimension
     self.in_projection = nn.Dense(embed_dim,
                                   flatten=False,
                                   in_units=conv_channels,
                                   weight_initializer=weight_initializer,
                                   bias_initializer=bias_initializer,
                                   prefix=prefix + 'in_proj_')
     self.attention_layer = _get_attention_cell(attention_cell)
     # projects from embedding dimension to convolution size
     self.out_projection = nn.Dense(conv_channels,
                                    flatten=False,
                                    in_units=embed_dim,
                                    weight_initializer=weight_initializer,
                                    bias_initializer=bias_initializer,
                                    prefix=prefix + 'out_proj_')