예제 #1
0
 def test_get_config(self):
   num_attention_heads = 2
   encoder_block = TransformerEncoderBlock(
       num_attention_heads=num_attention_heads,
       inner_dim=32,
       inner_activation='relu',
       output_dropout=0.1,
       attention_dropout=0.1,
       use_bias=False,
       norm_first=True,
       norm_epsilon=1e-6,
       inner_dropout=0.1,
       attention_initializer=tf.keras.initializers.RandomUniform(
           minval=0., maxval=1.))
   encoder_block_config = encoder_block.get_config()
   new_encoder_block = TransformerEncoderBlock.from_config(
       encoder_block_config)
   self.assertEqual(encoder_block_config, new_encoder_block.get_config())
예제 #2
0
 def test_several_attention_axes(self, attention_axes):
   test_layer = TransformerEncoderBlock(
       inner_dim=32,
       inner_activation='relu',
       output_dropout=0.1,
       attention_dropout=0.1,
       use_bias=False,
       norm_first=True,
       norm_epsilon=1e-6,
       inner_dropout=0.1,
       num_attention_heads=10,
       attention_axes=attention_axes)
   num_rows = 21
   num_cols = 13
   width = 80
   # Create a 3-dimensional input (the first dimension is implicit).
   data_tensor = tf.keras.Input(shape=(num_rows, num_cols, width))
   output_tensor = test_layer(data_tensor)
   # The default output of a transformer layer should be the same as the input.
   self.assertEqual(data_tensor.shape.as_list(), output_tensor.shape.as_list())
예제 #3
0
 def test_use_bias_norm_first(self):
   num_attention_heads = 2
   hidden_size = 16
   encoder_block = TransformerEncoderBlock(
       num_attention_heads=num_attention_heads,
       inner_dim=32,
       inner_activation='relu',
       output_dropout=0.1,
       attention_dropout=0.1,
       use_bias=False,
       norm_first=True,
       norm_epsilon=1e-6,
       inner_dropout=0.1,
       attention_initializer=tf.keras.initializers.RandomUniform(
           minval=0., maxval=1.))
   # Forward path.
   dummy_tensor = tf.zeros([2, 4, 16], dtype=tf.float32)
   dummy_mask = tf.zeros([2, 4, 4], dtype=tf.float32)
   inputs = [dummy_tensor, dummy_mask]
   output = encoder_block(inputs)
   self.assertEqual(output.shape, (2, 4, hidden_size))