Example #1
0
    def __init__(self,
                 num_heads,
                 key_size,
                 value_size=None,
                 dropout_rate=0.0,
                 use_bias=True,
                 output_shape=None,
                 kernel_initializer="glorot_uniform",
                 bias_initializer="zeros",
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 **kwargs):
        super(MultiHeadAttention, self).__init__(**kwargs)
        self._num_heads = num_heads
        self._key_size = key_size
        self._value_size = value_size if value_size else key_size
        self._dropout_rate = dropout_rate
        self._use_bias = use_bias
        self._output_shape = output_shape
        self._kernel_initializer = tf.keras.initializers.get(
            kernel_initializer)
        self._bias_initializer = tf.keras.initializers.get(bias_initializer)
        self._kernel_regularizer = tf.keras.regularizers.get(
            kernel_regularizer)
        self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer)
        self._kernel_constraint = tf.keras.constraints.get(kernel_constraint)
        self._bias_constraint = tf.keras.constraints.get(bias_constraint)

        self._masked_softmax = masked_softmax.MaskedSoftmax(
            mask_expansion_axes=[1])
        self._dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)
Example #2
0
    def test_masked_softmax_with_none_mask(self):
        test_layer = masked_softmax.MaskedSoftmax()
        input_tensor = tf.keras.Input(shape=(4, 8))
        output = test_layer([input_tensor, None])
        model = tf.keras.Model(input_tensor, output)

        input_data = 10 * np.random.random_sample((3, 4, 8))
        output_data = model.predict(input_data)
        expected_data = tf.nn.softmax(input_data)
        self.assertAllClose(expected_data, output_data)
Example #3
0
    def test_masked_softmax(self):
        test_layer = masked_softmax.MaskedSoftmax()
        input_tensor = tf.keras.Input(shape=(4, 8))
        mask_tensor = tf.keras.Input(shape=(4, 8))
        output = test_layer([input_tensor, mask_tensor])
        model = tf.keras.Model([input_tensor, mask_tensor], output)

        input_data = 10 * np.random.random_sample((3, 4, 8))
        mask_data = np.random.randint(2, size=(3, 4, 8))

        output_data = model.predict([input_data, mask_data])
        expected_zeros = np.greater(mask_data, 0)
        is_zeros = np.greater(output_data, 0)
        self.assertAllEqual(expected_zeros, is_zeros)
Example #4
0
    def test_softmax_with_axes_expansion(self):
        test_layer = masked_softmax.MaskedSoftmax(mask_expansion_axes=[1])
        input_tensor = tf.keras.Input(shape=(4, 8))
        mask_tensor = tf.keras.Input(shape=(8))
        output = test_layer([input_tensor, mask_tensor])
        model = tf.keras.Model([input_tensor, mask_tensor], output)

        input_data = 10 * np.random.random_sample((3, 4, 8))
        mask_data = np.random.randint(2, size=(3, 8))

        output_data = model.predict([input_data, mask_data])
        expanded_mask = np.expand_dims(mask_data,
                                       axis=1) * np.ones_like(input_data)
        expected_zeros = np.greater(expanded_mask, 0)
        is_zeros = np.greater(output_data, 0)
        self.assertAllEqual(expected_zeros, is_zeros)
Example #5
0
    def __init__(self,
                 num_heads,
                 key_size,
                 dropout_rate=0.0,
                 output_shape=None,
                 kernel_initializer="glorot_uniform",
                 bias_initializer="zeros",
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 **kwargs):
        super(TalkingHeadsAttention, self).__init__(**kwargs)
        self._num_heads = num_heads
        self._key_size = key_size
        self._dropout_rate = dropout_rate
        self._output_shape = output_shape
        self._kernel_initializer = tf.keras.initializers.get(
            kernel_initializer)
        self._bias_initializer = tf.keras.initializers.get(bias_initializer)
        self._kernel_regularizer = tf.keras.regularizers.get(
            kernel_regularizer)
        self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer)
        self._kernel_constraint = tf.keras.constraints.get(kernel_constraint)
        self._bias_constraint = tf.keras.constraints.get(bias_constraint)

        self._query_dense = dense_einsum.DenseEinsum(
            output_shape=(self._num_heads, self._key_size),
            kernel_initializer=self._kernel_initializer,
            bias_initializer=self._bias_initializer,
            kernel_regularizer=self._kernel_regularizer,
            bias_regularizer=self._bias_regularizer,
            activity_regularizer=self._activity_regularizer,
            kernel_constraint=self._kernel_constraint,
            bias_constraint=self._bias_constraint,
            name="query")

        self._key_dense = dense_einsum.DenseEinsum(
            output_shape=(self._num_heads, self._key_size),
            kernel_initializer=self._kernel_initializer,
            bias_initializer=self._bias_initializer,
            kernel_regularizer=self._kernel_regularizer,
            bias_regularizer=self._bias_regularizer,
            activity_regularizer=self._activity_regularizer,
            kernel_constraint=self._kernel_constraint,
            bias_constraint=self._bias_constraint,
            name="key")

        self._value_dense = dense_einsum.DenseEinsum(
            output_shape=(self._num_heads, self._key_size),
            kernel_initializer=self._kernel_initializer,
            bias_initializer=self._bias_initializer,
            kernel_regularizer=self._kernel_regularizer,
            bias_regularizer=self._bias_regularizer,
            activity_regularizer=self._activity_regularizer,
            kernel_constraint=self._kernel_constraint,
            bias_constraint=self._bias_constraint,
            name="value")

        self._masked_softmax = masked_softmax.MaskedSoftmax(
            mask_expansion_axes=[1])

        self._dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)