Exemple #1
0
    def test_high_dim_attention(self, q_dims, v_dims, mask_dims,
                                attention_axes):
        """Test with a mask tensor."""
        test_layer = talking_heads_attention.TalkingHeadsAttention(
            num_heads=12, key_dim=2, attention_axes=attention_axes)
        batch_size, hidden_size = 3, 8
        # Generate data for the input (non-mask) tensors.
        query_shape = [batch_size] + q_dims + [hidden_size]
        value_shape = [batch_size] + v_dims + [hidden_size]
        mask_shape = [batch_size] + mask_dims
        query = 10 * np.random.random_sample(query_shape)
        value = 10 * np.random.random_sample(value_shape)

        # Invoke the data with a random set of mask data. This should mask at least
        # one element.
        mask_data = np.random.randint(2, size=mask_shape).astype("bool")
        output = test_layer(query=query, value=value, attention_mask=mask_data)

        # Invoke the same data, but with a null mask (where no elements are masked).
        null_mask_data = np.ones(mask_shape)
        unmasked_output = test_layer(query=query,
                                     value=value,
                                     attention_mask=null_mask_data)
        # Because one data is masked and one is not, the outputs should not be the
        # same.
        self.assertNotAllClose(output, unmasked_output)
    def test_masked_attention(self):
        """Test with a mask tensor."""
        test_layer = talking_heads_attention.TalkingHeadsAttention(num_heads=2,
                                                                   head_size=2)
        # Create a 3-dimensional input (the first dimension is implicit).
        from_tensor = tf.keras.Input(shape=(4, 8))
        to_tensor = tf.keras.Input(shape=(2, 8))
        mask_tensor = tf.keras.Input(shape=(4, 2))
        output = test_layer([from_tensor, to_tensor, mask_tensor])

        # Create a model containing the test layer.
        model = tf.keras.Model([from_tensor, to_tensor, mask_tensor], output)

        # Generate data for the input (non-mask) tensors.
        from_data = 10 * np.random.random_sample((3, 4, 8))
        to_data = 10 * np.random.random_sample((3, 2, 8))

        # Invoke the data with a random set of mask data. This should mask at least
        # one element.
        mask_data = np.random.randint(2, size=(3, 4, 2))
        masked_output_data = model.predict([from_data, to_data, mask_data])

        # Invoke the same data, but with a null mask (where no elements are masked).
        null_mask_data = np.ones((3, 4, 2))
        unmasked_output_data = model.predict(
            [from_data, to_data, null_mask_data])

        # Because one data is masked and one is not, the outputs should not be the
        # same.
        self.assertNotAllClose(masked_output_data, unmasked_output_data)
Exemple #3
0
 def test_non_masked_self_attention(self):
     """Test with one input (self-attenntion) and no mask tensor."""
     test_layer = talking_heads_attention.TalkingHeadsAttention(
         num_heads=12, key_dim=64)
     # Create a 3-dimensional input (the first dimension is implicit).
     query = tf.keras.Input(shape=(40, 80))
     output = test_layer(query=query, value=query)
     self.assertEqual(output.shape.as_list(), [None, 40, 80])
 def test_non_masked_self_attention(self):
     """Test with one input (self-attenntion) and no mask tensor."""
     test_layer = talking_heads_attention.TalkingHeadsAttention(
         num_heads=12, head_size=64)
     # Create a 3-dimensional input (the first dimension is implicit).
     from_tensor = tf.keras.Input(shape=(40, 80))
     output = test_layer([from_tensor, from_tensor])
     self.assertEqual(output.shape.as_list(), [None, 40, 12, 64])
Exemple #5
0
 def test_attention_scores(self):
     """Test attention outputs with coefficients."""
     test_layer = talking_heads_attention.TalkingHeadsAttention(
         num_heads=12, key_size=64, return_attention_scores=True)
     # Create a 3-dimensional input (the first dimension is implicit).
     query = tf.keras.Input(shape=(40, 80))
     output, coef = test_layer(query=query, value=query)
     self.assertEqual(output.shape.as_list(), [None, 40, 80])
     self.assertEqual(coef.shape.as_list(), [None, 12, 40, 40])
 def test_non_masked_attention(self):
     """Test that the attention layer can be created without a mask tensor."""
     test_layer = talking_heads_attention.TalkingHeadsAttention(
         num_heads=12, key_size=64)
     # Create a 3-dimensional input (the first dimension is implicit).
     from_tensor = tf.keras.Input(shape=(40, 80))
     to_tensor = tf.keras.Input(shape=(20, 80))
     output = test_layer([from_tensor, to_tensor])
     self.assertEqual(output.shape.as_list(), [None, 40, 80])
Exemple #7
0
    def test_masked_attention(self, use_bias):
        """Test with a mask tensor."""
        test_layer = talking_heads_attention.TalkingHeadsAttention(
            num_heads=12, key_dim=2, use_bias=use_bias)
        # Create a 3-dimensional input (the first dimension is implicit).
        batch_size = 3
        query = tf.keras.Input(shape=(4, 8))
        value = tf.keras.Input(shape=(2, 8))
        mask_tensor = tf.keras.Input(shape=(4, 2))
        output = test_layer(query=query,
                            value=value,
                            attention_mask=mask_tensor)

        # Create a model containing the test layer.
        model = tf.keras.Model([query, value, mask_tensor], output)

        # Generate data for the input (non-mask) tensors.
        from_data = 10 * np.random.random_sample((batch_size, 4, 8))
        to_data = 10 * np.random.random_sample((batch_size, 2, 8))

        # Invoke the data with a random set of mask data. This should mask at least
        # one element.
        mask_data = np.random.randint(2, size=(batch_size, 4, 2))
        masked_output_data = model.predict([from_data, to_data, mask_data])

        # Invoke the same data, but with a null mask (where no elements are masked).
        null_mask_data = np.ones((batch_size, 4, 2))
        unmasked_output_data = model.predict(
            [from_data, to_data, null_mask_data])

        # Because one data is masked and one is not, the outputs should not be the
        # same.
        self.assertNotAllClose(masked_output_data, unmasked_output_data)

        # Tests the layer with three inputs: Q, K, V.
        key = tf.keras.Input(shape=(2, 8))
        output = test_layer(query=query,
                            value=value,
                            key=key,
                            attention_mask=mask_tensor)
        model = tf.keras.Model([query, value, key, mask_tensor], output)

        masked_output_data = model.predict(
            [from_data, to_data, to_data, mask_data])
        unmasked_output_data = model.predict(
            [from_data, to_data, to_data, null_mask_data])
        # Because one data is masked and one is not, the outputs should not be the
        # same.
        self.assertNotAllClose(masked_output_data, unmasked_output_data)

        if use_bias:
            self.assertLen(test_layer._query_dense.trainable_variables, 2)
            self.assertLen(test_layer._output_dense.trainable_variables, 2)
        else:
            self.assertLen(test_layer._query_dense.trainable_variables, 1)
            self.assertLen(test_layer._output_dense.trainable_variables, 1)
 def test_initializer(self):
   """Test with a specified initializer."""
   test_layer = talking_heads_attention.TalkingHeadsAttention(
       num_heads=12,
       key_dim=64,
       kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02))
   # Create a 3-dimensional input (the first dimension is implicit).
   query = tf.keras.Input(shape=(40, 80))
   output = test_layer(query=query, value=query)
   self.assertEqual(output.shape.as_list(), [None, 40, 80])
Exemple #9
0
 def test_non_masked_attention(self, value_dim, output_shape, output_dims):
     """Test that the attention layer can be created without a mask tensor."""
     test_layer = talking_heads_attention.TalkingHeadsAttention(
         num_heads=12,
         key_dim=64,
         value_dim=value_dim,
         output_shape=output_shape)
     # Create a 3-dimensional input (the first dimension is implicit).
     query = tf.keras.Input(shape=(40, 80))
     value = tf.keras.Input(shape=(20, 80))
     output = test_layer(query=query, value=value)
     self.assertEqual(output.shape.as_list(), [None] + output_dims)