def test_multi_dim_no_mask(self): # Scores tensor of shape [1, 1, 3] scores = np.array([[[1., 0., 1.]]], dtype=np.float32) # Value tensor of shape [1, 3, 1] v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) actual, actual_scores = dense_attention.BaseDenseAttention( )._apply_scores(scores=scores, value=v) # Expected softmax_scores = softmax(scores). # => softmax_scores000 = exp(1)/(exp(1) + exp(0) + exp(1)) # = 0.42231879825 # softmax_scores001 = exp(0)/(exp(1) + exp(0) + exp(1)) # = 0.15536240349 # softmax_scores002 = exp(1)/(exp(1) + exp(0) + exp(1)) # = 0.42231879825 expected_scores = np.array( [[[0.42231879825, 0.15536240349, 0.42231879825]]], dtype=np.float32) self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 1, 1]. # expected000 = 0.42231879825 * 1.6 + 0.15536240349 * 0.7 # - 0.42231879825 * 0.8 # = 0.44660872104 expected = np.array([[[0.44660872104]]], dtype=np.float32) self.assertAllClose(expected, actual)
def test_serialization(self): # Test serialization with causal layer = dense_attention.BaseDenseAttention(causal=True) config = keras.layers.serialize(layer) new_layer = keras.layers.deserialize(config) self.assertEqual(new_layer.causal, True) config = layer.get_config() new_layer = dense_attention.BaseDenseAttention.from_config(config) self.assertEqual(new_layer.causal, True)
def test_one_dim_no_mask(self): # Scores tensor of shape [1, 1, 1] scores = np.array([[[1.1]]], dtype=np.float32) # Value tensor of shape [1, 1, 1] v = np.array([[[1.6]]], dtype=np.float32) actual, actual_scores = dense_attention.BaseDenseAttention( )._apply_scores(scores=scores, value=v) # Expected softmax_scores = [[[1]]] expected_scores = np.array([[[1.]]], dtype=np.float32) self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 1, 1]. # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 expected = np.array([[[1.6]]], dtype=np.float32) self.assertAllClose(expected, actual)
def test_one_dim_batch_size_two(self): # Scores tensor of shape [2, 1, 1] scores = np.array([[[1.1]], [[2.1]]], dtype=np.float32) # Value tensor of shape [2, 1, 1] v = np.array([[[1.6]], [[2.6]]], dtype=np.float32) # Scpres mask tensor of shape [2, 1, 1] scores_mask = np.array([[[True]], [[True]]], dtype=np.bool_) actual, actual_scores = dense_attention.BaseDenseAttention( )._apply_scores(scores=scores, value=v, scores_mask=scores_mask) # Expected softmax_scores = [[[1]], [[1]]] expected_scores = np.array([[[1.]], [[1.]]], dtype=np.float32) self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [2, 1, 1]. # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 # expected100 = softmax_scores[1, 0] * 2.6 = 2.6 expected = np.array([[[1.6]], [[2.6]]], dtype=np.float32) self.assertAllClose(expected, actual)
def test_shape_with_dropout(self): # scores: Scores float tensor of shape `[batch_size, tq, tv]`. # value: Value tensor of shape `[batch_size, tv, dim]`. batch_size = 4 tq = 5 tv = 6 dim = 7 scores = np.ones((batch_size, tq, tv)) value = np.ones((batch_size, tv, dim)) actual, actual_scores = dense_attention.BaseDenseAttention( dropout=0.1)._apply_scores(scores=scores, value=value, training=False) # Expected Tensor of shape `[batch_size, tq, tv]`. expected_scores_shape = [batch_size, tq, tv] self.assertAllEqual(expected_scores_shape, tf.shape(actual_scores)) # Expected Tensor of shape `[batch_size, tq, dim]`. expected_shape = [batch_size, tq, dim] self.assertAllEqual(expected_shape, tf.shape(actual))
def test_multi_dim_with_mask(self): # Scores tensor of shape [1, 1, 3] scores = np.array([[[1., 0., 1.]]], dtype=np.float32) # Value tensor of shape [1, 3, 1] v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) # Scores mask tensor of shape [1, 1, 3] scores_mask = np.array([[[True, True, False]]], dtype=np.bool_) actual, actual_scores = dense_attention.BaseDenseAttention( )._apply_scores(scores=scores, value=v, scores_mask=scores_mask) # Expected softmax scores = softmax(scores) with zeros in positions where # v_mask == False. # => softmax_scores000 = exp(1)/(exp(1) + exp(0)) = 0.73105857863 # softmax_scores001 = exp(0)/(exp(1) + exp(0)) = 0.26894142137 # softmax_scores002 = 0 expected_scores = np.array([[[0.73105857863, 0.26894142137, 0.]]], dtype=np.float32) self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 1, 1]. # expected000 = 0.73105857863 * 1.6 + 0.26894142137 * 0.7 - 0 * 0.8 # = 1.35795272077 expected = np.array([[[1.35795272077]]], dtype=np.float32) self.assertAllClose(expected, actual)