def __init__(self, num_units, num_heads, ffn_inner_dim, dropout=0.1, attention_dropout=0.1, relu_dropout=0.1, **kwargs): """Initializes the layer. Args: num_units: The number of hidden units. num_heads: The number of heads in the multi-head attention. ffn_inner_dim: The number of units of the inner linear transformation in the feed forward layer. dropout: The probability to drop units from the outputs. attention_dropout: The probability to drop units from the attention. relu_dropout: The probability to drop units from the ReLU activation in the feed forward layer. kwargs: Additional layer arguments. """ super(_SelfAttentionEncoderLayer, self).__init__(**kwargs) self.self_attention = transformer.MultiHeadAttention( num_heads, num_units, dropout=attention_dropout) self.self_attention = common.LayerWrapper(self.self_attention, normalize_input=True, output_dropout=dropout, residual_connection=True) self.ffn = transformer.FeedForwardNetwork(ffn_inner_dim, num_units, dropout=relu_dropout) self.ffn = common.LayerWrapper(self.ffn, normalize_input=True, output_dropout=dropout, residual_connection=True)
def __init__(self, num_layers, num_units, bidirectional=False, reducer=reducer_lib.ConcatReducer(), dropout=0, residual_connections=False, **kwargs): """Initializes the layer. Args: num_layers: Number of stacked LSTM layers. num_units: Dimension of the output space of each LSTM. bidirectional: Make each layer bidirectional. reducer: A :class:`opennmt.layers.Reducer` instance to merge the bidirectional states and outputs of each layer. dropout: The probability to drop units in each layer output. residual_connections: If ``True``, each layer input will be added to its output. **kwargs: Additional layer arguments. """ super(GRU, self).__init__(**kwargs) rnn_layers = [ _RNNWrapper(tf.keras.layers.GRU(num_units, return_sequences=True, return_state=True), bidirectional=bidirectional, reducer=reducer) for _ in range(num_layers) ] self.layers = [ common.LayerWrapper(layer, output_dropout=dropout, residual_connection=residual_connections) for layer in rnn_layers ]
def __init__(self, num_layers=6, num_units=1024, cell_class=None, dropout=0.3): """Initializes the parameters of the encoder. Args: num_layers: The number of layers. num_units: The number of units in each RNN layer and the final output. cell_class: The inner cell class or a callable taking :obj:`num_units` as argument and returning a cell. Defaults to a layer normalized LSTM cell. dropout: The probability to drop units in each layer output. """ if cell_class is None: cell_class = tfa.rnn.LayerNormLSTMCell layers = [ RNNEncoder(1, num_units, bidirectional=True, dropout=0.0, cell_class=cell_class) for _ in range(num_layers) ] layers = [ common.LayerWrapper(layer, output_dropout=dropout, residual_connection=True) for layer in layers ] super().__init__(layers) self.dropout = dropout self.projection = tf.keras.layers.Dense(num_units)
def testLayerWrapperInputOutputDepthMismatch(self): layer = common.LayerWrapper(tf.keras.layers.Dense(10)) x = tf.random.uniform([4, 5, 5]) y = layer(x) self.assertListEqual(y.shape.as_list(), [4, 5, 10])
def testLayerWrapper(self): layer = common.LayerWrapper(tf.keras.layers.Dense(10)) x = tf.random.uniform([4, 5, 10]) y = layer(x) self.assertEqual(y.shape, x.shape)