Esempio n. 1
0
    def _build_attention(self, qkv_rank):
        """Builds multi-head dot-product attention computations.
    
        This function builds attributes necessary for `_compute_attention` to
        costomize attention computation to replace the default dot-product
        attention.
    
        Args:
          qkv_rank: the rank of query, key, value tensors.
        """
        # qkv_rank = 4
        # 进行注意力的维度 (1,)
        if self._attention_axes is None:
            self._attention_axes = tuple(range(1, qkv_rank - 2))
        else:
            self._attention_axes = tuple(self._attention_axes)

        # 创建注意力权重计算公式
        self._dot_product_equation, self._combine_equation, attn_scores_rank = (
            _build_attention_equation(qkv_rank,
                                      attn_axes=self._attention_axes))

        norm_axes = tuple(
            range(attn_scores_rank - len(self._attention_axes),
                  attn_scores_rank))

        # 注意力权重,加上遮挡,计算注意力分布
        self._masked_softmax = masked_softmax.MaskedSoftmax(
            mask_expansion_axes=[1], normalization_axes=norm_axes)
        self._dropout_layer = tf.keras.layers.Dropout(rate=self._dropout)
Esempio n. 2
0
    def __init__(self,
                 num_heads,
                 key_size,
                 value_size=None,
                 dropout_rate=0.0,
                 use_bias=True,
                 output_shape=None,
                 kernel_initializer="glorot_uniform",
                 bias_initializer="zeros",
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 **kwargs):
        super(MultiHeadAttention, self).__init__(**kwargs)
        self._num_heads = num_heads
        self._key_size = key_size
        self._value_size = value_size if value_size else key_size
        self._dropout_rate = dropout_rate
        self._use_bias = use_bias
        self._output_shape = output_shape
        self._kernel_initializer = tf.keras.initializers.get(
            kernel_initializer)
        self._bias_initializer = tf.keras.initializers.get(bias_initializer)
        self._kernel_regularizer = tf.keras.regularizers.get(
            kernel_regularizer)
        self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer)
        self._kernel_constraint = tf.keras.constraints.get(kernel_constraint)
        self._bias_constraint = tf.keras.constraints.get(bias_constraint)

        self._masked_softmax = masked_softmax.MaskedSoftmax(
            mask_expansion_axes=[1])
        self._dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)
Esempio n. 3
0
  def __init__(self,
               num_heads,
               head_size,
               dropout_rate=0.0,
               kernel_initializer="glorot_uniform",
               bias_initializer="zeros",
               kernel_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               kernel_constraint=None,
               bias_constraint=None,
               **kwargs):
    super(MultiHeadAttention, self).__init__(**kwargs)
    self._num_heads = num_heads
    self._head_size = head_size
    self._dropout_rate = dropout_rate
    self._kernel_initializer = tf.keras.initializers.get(kernel_initializer)
    self._bias_initializer = tf.keras.initializers.get(bias_initializer)
    self._kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer)
    self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer)
    self._kernel_constraint = tf.keras.constraints.get(kernel_constraint)
    self._bias_constraint = tf.keras.constraints.get(bias_constraint)

    self._query_dense = dense_einsum.DenseEinsum(
        output_shape=(self._num_heads, self._head_size),
        kernel_initializer=self._kernel_initializer,
        bias_initializer=self._bias_initializer,
        kernel_regularizer=self._kernel_regularizer,
        bias_regularizer=self._bias_regularizer,
        activity_regularizer=self._activity_regularizer,
        kernel_constraint=self._kernel_constraint,
        bias_constraint=self._bias_constraint,
        name="query")

    self._key_dense = dense_einsum.DenseEinsum(
        output_shape=(self._num_heads, self._head_size),
        kernel_initializer=self._kernel_initializer,
        bias_initializer=self._bias_initializer,
        kernel_regularizer=self._kernel_regularizer,
        bias_regularizer=self._bias_regularizer,
        activity_regularizer=self._activity_regularizer,
        kernel_constraint=self._kernel_constraint,
        bias_constraint=self._bias_constraint,
        name="key")

    self._value_dense = dense_einsum.DenseEinsum(
        output_shape=(self._num_heads, self._head_size),
        kernel_initializer=self._kernel_initializer,
        bias_initializer=self._bias_initializer,
        kernel_regularizer=self._kernel_regularizer,
        bias_regularizer=self._bias_regularizer,
        activity_regularizer=self._activity_regularizer,
        kernel_constraint=self._kernel_constraint,
        bias_constraint=self._bias_constraint,
        name="value")

    self._masked_softmax = masked_softmax.MaskedSoftmax(mask_expansion_axes=[1])

    self._dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)
Esempio n. 4
0
    def test_serialize_deserialize(self):
        test_layer = masked_softmax.MaskedSoftmax(mask_expansion_axes=[1],
                                                  normalization_axes=[6, 7])
        new_layer = masked_softmax.MaskedSoftmax.from_config(
            test_layer.get_config())

        # If the serialization was successful, the new config should match the old.
        self.assertAllEqual(test_layer.get_config(), new_layer.get_config())
Esempio n. 5
0
    def test_masked_softmax_with_none_mask(self):
        test_layer = masked_softmax.MaskedSoftmax()
        input_tensor = tf.keras.Input(shape=(4, 8))
        output = test_layer(input_tensor, None)
        model = tf.keras.Model(input_tensor, output)

        input_data = 10 * np.random.random_sample((3, 4, 8))
        output_data = model.predict(input_data)
        expected_data = tf.nn.softmax(input_data)
        self.assertAllClose(expected_data, output_data)
Esempio n. 6
0
    def test_masked_softmax(self):
        test_layer = masked_softmax.MaskedSoftmax()
        input_tensor = tf.keras.Input(shape=(4, 8))
        mask_tensor = tf.keras.Input(shape=(4, 8))
        output = test_layer(input_tensor, mask_tensor)
        model = tf.keras.Model([input_tensor, mask_tensor], output)

        input_data = 10 * np.random.random_sample((3, 4, 8))
        mask_data = np.random.randint(2, size=(3, 4, 8))

        output_data = model.predict([input_data, mask_data])
        expected_zeros = np.greater(mask_data, 0)
        is_zeros = np.greater(output_data, 0)
        self.assertAllEqual(expected_zeros, is_zeros)
Esempio n. 7
0
  def test_softmax_with_axes_expansion(self):
    test_layer = masked_softmax.MaskedSoftmax(mask_expansion_axes=[1])
    input_tensor = tf.keras.Input(shape=(4, 8))
    mask_tensor = tf.keras.Input(shape=(8))
    output = test_layer(input_tensor, mask_tensor)
    model = tf.keras.Model([input_tensor, mask_tensor], output)

    input_data = 10 * np.random.random_sample((3, 4, 8))
    mask_data = np.random.randint(2, size=(3, 8))

    output_data = model.predict([input_data, mask_data])
    expanded_mask = np.expand_dims(mask_data, axis=1) * np.ones_like(input_data)
    expected_zeros = np.greater(expanded_mask, 0)
    is_zeros = np.greater(output_data, 0)
    self.assertAllEqual(expected_zeros, is_zeros)
Esempio n. 8
0
    def test_masked_softmax_high_dims(self):
        test_layer = masked_softmax.MaskedSoftmax(mask_expansion_axes=[1],
                                                  normalization_axes=[6, 7])
        input_shape = [2, 3, 4, 5, 6, 7, 8]
        mask_shape = [5, 6, 7, 8]
        input_tensor = tf.keras.Input(shape=input_shape)
        mask_tensor = tf.keras.Input(shape=mask_shape)
        output = test_layer(input_tensor, mask_tensor)
        model = tf.keras.Model([input_tensor, mask_tensor], output)

        input_data = 10 * np.random.random_sample([3] + input_shape)
        mask_data = np.random.randint(2, size=[3] + mask_shape)

        output_data = model.predict([input_data, mask_data])
        expanded_mask = np.expand_dims(mask_data, axis=1)
        expanded_mask = np.expand_dims(expanded_mask, axis=1)
        expanded_mask = np.expand_dims(expanded_mask,
                                       axis=1) * np.ones_like(input_data)
        expected_zeros = np.greater(expanded_mask, 0)
        is_zeros = np.greater(output_data, 0)
        self.assertAllEqual(expected_zeros, is_zeros)
 def _build_attention(self, rank):
     super()._build_attention(rank)  # pytype: disable=attribute-error  # typed-keras
     self._masked_softmax = masked_softmax.MaskedSoftmax(
         mask_expansion_axes=[2])
Esempio n. 10
0
 def _build_attention(self, rank):
     self._masked_softmax = masked_softmax.MaskedSoftmax(
         mask_expansion_axes=[1], normalization_axes=[2])
     self._dropout_layer = tf.keras.layers.Dropout(rate=self._dropout)
 def build_attention(self, rank):
     super(MultiChannelAttention, self).build_attention(rank)
     self._masked_softmax = masked_softmax.MaskedSoftmax(
         mask_expansion_axes=[2])
Esempio n. 12
0
 def build(self, input_shape):
     super(MultiChannelAttention, self).build(input_shape)
     self._masked_softmax = masked_softmax.MaskedSoftmax(
         mask_expansion_axes=[2])