Example #1
0
    def test_layer_output_range(self, _):
        # XLA has an obvious numeric issue in this test case.
        test_layer = transformer.Transformer(num_attention_heads=10,
                                             intermediate_size=2048,
                                             intermediate_activation='relu')
        sequence_length = 21
        width = 80

        batch_size = 6
        input_data = 10 * np.random.random_sample(
            (batch_size, sequence_length, width))
        mask_data = np.random.randint(2,
                                      size=(batch_size, sequence_length,
                                            sequence_length))
        output_tensor = test_layer([input_data, mask_data])

        # The layer only attends to the first token and outputs the first token
        # embeeding.
        new_layer = transformer.Transformer(num_attention_heads=10,
                                            intermediate_size=2048,
                                            intermediate_activation='relu',
                                            output_range=1)
        _ = new_layer([input_data, mask_data])
        new_layer.set_weights(test_layer.get_weights())
        new_output_tensor = new_layer([input_data, mask_data])
        self.assertAllClose(new_output_tensor,
                            output_tensor[:, 0:1, :],
                            atol=5e-5,
                            rtol=0.003)
Example #2
0
    def test_layer_invocation_with_mask(self):
        test_layer = transformer.Transformer(num_attention_heads=10,
                                             intermediate_size=2048,
                                             intermediate_activation='relu')
        sequence_length = 21
        width = 80
        # Create a 3-dimensional input (the first dimension is implicit).
        data_tensor = tf.keras.Input(shape=(sequence_length, width))
        # Create a 2-dimensional input (the first dimension is implicit).
        mask_tensor = tf.keras.Input(shape=(sequence_length, sequence_length))
        output_tensor = test_layer([data_tensor, mask_tensor])

        # Create a model from the test layer.
        model = tf.keras.Model([data_tensor, mask_tensor], output_tensor)

        # Invoke the model on test data. We can't validate the output data itself
        # (the NN is too complex) but this will rule out structural runtime errors.
        batch_size = 6
        input_data = 10 * np.random.random_sample(
            (batch_size, sequence_length, width))
        # The attention mask should be of shape (batch, from_seq_len, to_seq_len),
        # which here is (batch, sequence_length, sequence_length)
        mask_data = np.random.randint(2,
                                      size=(batch_size, sequence_length,
                                            sequence_length))
        _ = model.predict([input_data, mask_data])
Example #3
0
 def test_layer_creation(self):
     test_layer = transformer.Transformer(num_attention_heads=10,
                                          intermediate_size=2048,
                                          intermediate_activation='relu')
     sequence_length = 21
     width = 80
     # Create a 3-dimensional input (the first dimension is implicit).
     data_tensor = tf.keras.Input(shape=(sequence_length, width))
     output_tensor = test_layer(data_tensor)
     # The default output of a transformer layer should be the same as the input.
     self.assertEqual(data_tensor.shape.as_list(),
                      output_tensor.shape.as_list())
Example #4
0
 def test_layer_creation_with_incorrect_mask_fails(self):
     test_layer = transformer.Transformer(num_attention_heads=10,
                                          intermediate_size=2048,
                                          intermediate_activation='relu')
     sequence_length = 21
     width = 80
     # Create a 3-dimensional input (the first dimension is implicit).
     data_tensor = tf.keras.Input(shape=(sequence_length, width))
     # Create a 2-dimensional input (the first dimension is implicit).
     mask_tensor = tf.keras.Input(shape=(sequence_length,
                                         sequence_length - 3))
     with self.assertRaisesRegex(ValueError,
                                 'When passing a mask tensor.*'):
         _ = test_layer([data_tensor, mask_tensor])
 def test_get_config(self):
   num_attention_heads = 2
   encoder_block = transformer.Transformer(
       num_attention_heads=num_attention_heads,
       intermediate_size=32,
       intermediate_activation='relu',
       dropout_rate=0.1,
       attention_dropout_rate=0.1,
       use_bias=False,
       norm_first=True,
       norm_epsilon=1e-6)
   encoder_block_config = encoder_block.get_config()
   new_encoder_block = transformer.Transformer.from_config(
       encoder_block_config)
   self.assertEqual(encoder_block_config, new_encoder_block.get_config())
 def test_use_bias_norm_first(self):
   num_attention_heads = 2
   hidden_size = 16
   encoder_block = transformer.Transformer(
       num_attention_heads=num_attention_heads,
       intermediate_size=32,
       intermediate_activation='relu',
       dropout_rate=0.1,
       attention_dropout_rate=0.1,
       use_bias=False,
       norm_first=True,
       norm_epsilon=1e-6)
   # Forward path.
   dummy_tensor = tf.zeros([2, 4, 16], dtype=tf.float32)
   dummy_mask = tf.zeros([2, 4, 4], dtype=tf.float32)
   inputs = [dummy_tensor, dummy_mask]
   output = encoder_block(inputs)
   self.assertEqual(output.shape, (2, 4, hidden_size))
Example #7
0
 def test_get_config(self):
     num_attention_heads = 2
     encoder_block = transformer.Transformer(
         num_attention_heads=num_attention_heads,
         intermediate_size=32,
         intermediate_activation='relu',
         dropout_rate=0.1,
         attention_dropout_rate=0.1,
         use_bias=False,
         norm_first=True,
         norm_epsilon=1e-6,
         intermediate_dropout=0.1,
         attention_initializer=tf.keras.initializers.RandomUniform(
             minval=0., maxval=1.))
     encoder_block_config = encoder_block.get_config()
     new_encoder_block = transformer.Transformer.from_config(
         encoder_block_config)
     self.assertEqual(encoder_block_config, new_encoder_block.get_config())
Example #8
0
    def test_dynamic_layer_sequence(self):
        test_layer = transformer.Transformer(
            num_attention_heads=10,
            intermediate_size=2048,
            intermediate_activation='relu',
            kernel_initializer=tf.keras.initializers.TruncatedNormal(
                stddev=0.02))
        # Create a 3-dimensional input (the first dimension is implicit).
        width = 30
        input_tensor = tf.keras.Input(shape=(None, width))
        output_tensor = test_layer(input_tensor)
        model = tf.keras.Model(input_tensor, output_tensor)

        input_length = 17
        input_data = np.ones((1, input_length, width))
        output_data = model.predict(input_data)

        self.assertAllEqual([1, input_length, width], output_data.shape)
Example #9
0
    def test_layer_invocation(self):
        test_layer = transformer.Transformer(num_attention_heads=10,
                                             intermediate_size=2048,
                                             intermediate_activation='relu')
        sequence_length = 21
        width = 80
        # Create a 3-dimensional input (the first dimension is implicit).
        data_tensor = tf.keras.Input(shape=(sequence_length, width))
        output_tensor = test_layer(data_tensor)

        # Create a model from the test layer.
        model = tf.keras.Model(data_tensor, output_tensor)

        # Invoke the model on test data. We can't validate the output data itself
        # (the NN is too complex) but this will rule out structural runtime errors.
        batch_size = 6
        input_data = 10 * np.random.random_sample(
            (batch_size, sequence_length, width))
        _ = model.predict(input_data)