def test_split_works_correctly_on_word_embeddings_with_masking(self): vocabulary_size = 10 sentence_length = 10 word_length = 5 embedding_dim = 10 num_sentences = 7 sentence_input = Input(shape=(sentence_length, word_length), dtype='int32') embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim, mask_zero=True) embedded_sentence = embedding( sentence_input ) # (batch_size, sentence_length, word_length, embedding_dim) sentence_mask = OutputMask()(embedded_sentence) # Note that this mask_split_axis doesn't make practical sense; I'm just testing the code # with a different axis for the mask and the input. split_layer = VectorMatrixSplit(split_axis=2, mask_split_axis=1) words, characters = split_layer(embedded_sentence) word_mask = OutputMask()(words) character_mask = OutputMask()(characters) outputs = [ embedded_sentence, words, characters, sentence_mask, word_mask, character_mask ] model = Model(inputs=[sentence_input], outputs=outputs) sentence_tensor = numpy.random.randint( 0, vocabulary_size, (num_sentences, sentence_length, word_length)) actual_outputs = model.predict([sentence_tensor]) sentence_tensor, word_tensor, character_tensor, sentence_mask, word_mask, character_mask = actual_outputs assert numpy.array_equal(word_tensor, sentence_tensor[:, :, 0, :]) assert numpy.array_equal(character_tensor, sentence_tensor[:, :, 1:, :]) assert numpy.array_equal(word_mask, sentence_mask[:, 0, :]) assert numpy.array_equal(character_mask, sentence_mask[:, 1:, :])
def test_a_smaller_than_b(self): batch_size = 3 tensor_a = numpy.random.randint(7, size=(batch_size, 5)) tensor_b = numpy.random.randint(7, size=(batch_size, 2, 5)) # Manually set some values to 1 here, which will be masked later # (1 and not 0 so that masked values are still non-zero in the output) tensor_a[0] = 0 tensor_b[0][1] = 0 input_tensor_a = Input(shape=(5, )) masked_tensor_a = Masking(mask_value=0)(input_tensor_a) input_tensor_b = Input(shape=(2, 5)) masked_tensor_b = Masking(mask_value=0)(input_tensor_b) a_dot_b = BatchDot()([masked_tensor_a, masked_tensor_b]) a_dot_b_mask = OutputMask()(a_dot_b) model = Model(inputs=[input_tensor_a, input_tensor_b], outputs=[a_dot_b, a_dot_b_mask]) # a_dot_b and mask_tensor are of shape (3, 2). a_dot_b_tensor, mask_tensor = model.predict([tensor_a, tensor_b]) # Test that the dot happened like we expected. for i in range(batch_size): # each dot product should be of shape (2,) assert_almost_equal( a_dot_b_tensor[i], numpy.einsum("i,mi->m", tensor_a[i], tensor_b[i])) # Check that the values in the output mask are 0 where the # values were set to 1 above. assert mask_tensor[0][0] == 0 assert mask_tensor[0][1] == 0
def test_call_handles_masking_properly(self): sentence_length = 4 vocab_size = 4 embedding_dim = 3 embedding_weights = numpy.asarray([[0, 0, 0], [1, 1, 1], [-1, 0, 1], [-1, -1, 0]]) embedding = Embedding(vocab_size, embedding_dim, weights=[embedding_weights], mask_zero=True) sentence_1_input = Input(shape=(sentence_length, ), dtype='int32') sentence_2_input = Input(shape=(sentence_length, ), dtype='int32') sentence_1_embedding = embedding(sentence_1_input) sentence_2_embedding = embedding(sentence_2_input) attention_layer = MatrixAttention() attention = attention_layer( [sentence_1_embedding, sentence_2_embedding]) attention_mask = OutputMask()(attention) model = Model(inputs=[sentence_1_input, sentence_2_input], outputs=[attention, attention_mask]) sentence_1_tensor = numpy.asarray([[0, 0, 1, 3]]) sentence_2_tensor = numpy.asarray([[0, 1, 0, 2]]) attention_tensor, attention_mask = model.predict( [sentence_1_tensor, sentence_2_tensor]) expected_attention = numpy.asarray([[[0, 0, 0, 0], [0, 0, 0, 0], [0, 3, 0, 0], [0, -2, 0, 1]]]) expected_mask = numpy.asarray([[[0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 1], [0, 1, 0, 1]]]) assert_allclose(attention_tensor, expected_attention) assert_allclose(attention_mask, expected_mask)
def test_mask_is_computed_correctly(self): background_input = Input(shape=(None, 3), dtype='int32') embedding = TimeDistributedEmbedding(input_dim=3, output_dim=2, mask_zero=True) embedded_background = embedding(background_input) encoded_background = BOWEncoder(units=2)(embedded_background) encoded_background_with_mask = AddEncoderMask()( [encoded_background, embedded_background]) mask_output = OutputMask()(encoded_background_with_mask) model = DeepQaModel(inputs=[background_input], outputs=mask_output) test_background = numpy.asarray([[ [0, 0, 0], [2, 2, 2], [0, 0, 0], [0, 1, 2], [1, 0, 0], [0, 0, 0], [0, 1, 0], [1, 1, 1], ]]) expected_mask = numpy.asarray([[0, 1, 0, 1, 1, 0, 1, 1]]) actual_mask = model.predict([test_background]) numpy.testing.assert_array_equal(expected_mask, actual_mask)
def test_on_masked_input(self): # TODO(matt): I don't really like having to build the whole model up to the attention # component here, but I'm not sure how to just test the selector with the right mask # without going through this. sentence_input = Input(shape=(3, ), dtype='int32') background_input = Input(shape=(3, 3), dtype='int32') embedding = TimeDistributedEmbedding(input_dim=3, output_dim=2, mask_zero=True) embedded_sentence = embedding(sentence_input) embedded_background = embedding(background_input) encoder = BOWEncoder(output_dim=2) encoded_sentence = encoder(embedded_sentence) encoded_background = EncoderWrapper(encoder)(embedded_background) merge_mode = lambda layer_outs: K.concatenate([ K.expand_dims(layer_outs[0], dim=1), K.expand_dims(layer_outs[0], dim=1), layer_outs[1] ], axis=1) merge_masks = lambda mask_outs: K.concatenate([ K.expand_dims(K.zeros_like(mask_outs[1][:, 0]), dim=1), K.expand_dims(K.zeros_like(mask_outs[1][:, 0]), dim=1), mask_outs[1 ] ], axis=1) merged = merge([encoded_sentence, encoded_background], mode=merge_mode, output_shape=(5, 2), output_mask=merge_masks) merged_mask = OutputMask()(merged) selector = DotProductKnowledgeSelector() attention_weights = selector(merged) model = DeepQaModel(input=[sentence_input, background_input], output=[merged_mask, attention_weights]) model.summary(show_masks=True) test_input = numpy.asarray([[2, 2, 2]]) test_background = numpy.asarray([[ [2, 2, 2], [2, 2, 2], [0, 0, 0], ]]) expected_mask = numpy.asarray([[0, 0, 1, 1, 0]]) expected_attention = numpy.asarray([[0.5, 0.5, 0.0]]) actual_mask, actual_attention = model.predict( [test_input, test_background]) numpy.testing.assert_array_almost_equal(expected_mask, actual_mask) numpy.testing.assert_array_almost_equal(expected_attention, actual_attention)
def test_handles_multiple_masks(self): # We'll use the SlotSimilarityTupleMatcher to test this, because it takes two masked # inputs. Here we're using an input of shape (batch_size, num_options, num_tuples, # num_slots, num_words). tuple_input = Input(shape=(2, 3, 4, 5), dtype='int32') tuple_input_2 = Input(shape=(2, 3, 4, 5), dtype='int32') embedding = TimeDistributedEmbedding(input_dim=3, output_dim=6, mask_zero=True) # shape is now (batch_size, num_options, num_tuples, num_slots, num_words, embedding_dim) embedded_tuple = embedding(tuple_input) embedded_tuple_2 = embedding(tuple_input_2) encoder = EncoderWrapper(EncoderWrapper(EncoderWrapper(BOWEncoder()))) # shape is now (batch_size, num_options, num_tuples, num_slots, embedding_dim) encoded_tuple = encoder(embedded_tuple) encoded_tuple_2 = encoder(embedded_tuple_2) # Shape of input to the tuple matcher is [(batch size, 2, 3, 4, 6), (batch size, 2, 3, 4, 6)] # Shape of input_mask to the tuple matcher is [(batch size, 2, 3, 4), (batch size, 2, 3, 4)] # Expected output mask shape (batch_size, 2, 3) time_distributed = TimeDistributedWithMask( TimeDistributedWithMask( SlotSimilarityTupleMatcher({"type": "cosine_similarity"}))) time_distributed_output = time_distributed( [encoded_tuple, encoded_tuple_2]) mask_output = OutputMask()(time_distributed_output) model = DeepQaModel(input=[tuple_input, tuple_input_2], output=mask_output) zeros = [0, 0, 0, 0, 0] non_zeros = [1, 1, 1, 1, 1] # shape: (batch size, num_options, num_tuples, num_slots, num_words), or (1, 2, 3, 4, 5) tuples1 = numpy.asarray([[[[zeros, zeros, zeros, zeros], [non_zeros, zeros, zeros, zeros], [non_zeros, non_zeros, zeros, zeros]], [[non_zeros, non_zeros, zeros, zeros], [non_zeros, zeros, zeros, zeros], [zeros, zeros, zeros, zeros]]]]) tuples2 = numpy.asarray([[[[non_zeros, zeros, zeros, zeros], [non_zeros, zeros, zeros, zeros], [zeros, zeros, zeros, zeros]], [[non_zeros, non_zeros, zeros, zeros], [non_zeros, zeros, zeros, zeros], [non_zeros, zeros, zeros, zeros]]]]) actual_mask = model.predict([tuples1, tuples2]) expected_mask = numpy.asarray( [[[0, 1, 0], [1, 1, 0]]]) # shape: (batch size, num_options, num_tuples) assert actual_mask.shape == (1, 2, 3) numpy.testing.assert_array_almost_equal(expected_mask, actual_mask)
def test_non_normalized_attention_works(self): sentence_length = 4 vocab_size = 4 embedding_dim = 3 embedding_weights = numpy.asarray([[-1, 0, 4], [1, 1, 1], [-1, 0, -1], [-1, -1, 0]]) embedding = Embedding(vocab_size, embedding_dim, weights=[embedding_weights], mask_zero=True) sentence_input = Input(shape=(sentence_length,), dtype='int32') sentence_embedding = embedding(sentence_input) query_input = Input(shape=(embedding_dim,), dtype='float32') attention_layer = Attention(normalize=False) attention = attention_layer([query_input, sentence_embedding]) attention_mask = OutputMask()(attention) model = Model(inputs=[query_input, sentence_input], outputs=[attention, attention_mask]) sentence_tensor = numpy.asarray([[0, 1, 0, 2]]) query_tensor = numpy.asarray([[.1, .8, .5]]) attention_tensor, mask_tensor = model.predict([query_tensor, sentence_tensor]) assert_almost_equal(attention_tensor, [[1.9, 1.4, 1.9, -.6]]) assert_almost_equal(mask_tensor, [[0, 1, 0, 1]])
def test_mask_is_computed_correctly(self): background_input = Input(shape=(3, 3), dtype='int32') embedding = Embedding(input_dim=3, output_dim=2, mask_zero=True) embedded_background = embedding(background_input) encoded_background = EncoderWrapper(BOWEncoder(units=2))(embedded_background) mask_output = OutputMask()(encoded_background) model = DeepQaModel(inputs=[background_input], outputs=mask_output) test_background = numpy.asarray([ [ [0, 0, 0], [2, 2, 2], [0, 0, 0], ] ]) expected_mask = numpy.asarray([[0, 1, 0]]) actual_mask = model.predict([test_background]) numpy.testing.assert_array_almost_equal(expected_mask, actual_mask)
def test_call_works_with_uneven_dims(self): batch_size = 1 input_length = 2 input_length_2 = 5 input_1_layer = Input(shape=(input_length, input_length_2), dtype='float32') input_2_layer = Input(shape=(input_length,), dtype='float32') masking_layer = AddMask() masked_input_1 = masking_layer(input_1_layer) masked_input_2 = masking_layer(input_2_layer) multiply_output = Multiply()([masked_input_1, masked_input_2]) multiply_mask = OutputMask()(multiply_output) model = Model(inputs=[input_1_layer, input_2_layer], outputs=[multiply_output, multiply_mask]) input_1_tensor = numpy.asarray([[[2, 5, 0, 1, -4], [-1, 0, -2, -10, -4]]]) input_2_tensor = numpy.asarray([[2, 1]]) multiply_tensor, mask_tensor = model.predict([input_1_tensor, input_2_tensor]) assert multiply_tensor.shape == (batch_size, input_length, input_length_2) numpy.testing.assert_almost_equal(multiply_tensor, [[[4, 10, 0, 2, -8], [-1, 0, -2, -10, -4]]]) numpy.testing.assert_almost_equal(mask_tensor, [[[1, 1, 0, 1, 1], [1, 0, 1, 1, 1]]])
def test_returns_masks_if_no_input_mask(self): # For this test, we use WordOverlapTupleMatcher, which takes no input mask, but # returns an output mask. We're using an input of shape (batch_size, num_options, # num_tuples, num_slots, num_words). tuple_input = Input(shape=(2, 3, 4, 5), dtype='int32') tuple_input_2 = Input(shape=(2, 3, 4, 5), dtype='int32') # shape is (batch_size, num_options, num_tuples, num_slots, num_words) # Shape of input to the tuple matcher is [(batch size, 2, 3, 4, 5), (batch size, 2, 3, 4, 5)] # Shape of input_mask to the tuple matcher is [None, None] # Expected output mask shape (batch_size, 2, 3) time_distributed = TimeDistributedWithMask( TimeDistributedWithMask(WordOverlapTupleMatcher())) time_distributed_output = time_distributed( [tuple_input, tuple_input_2]) mask_output = OutputMask()(time_distributed_output) model = DeepQaModel(inputs=[tuple_input, tuple_input_2], outputs=mask_output) zeros = [0, 0, 0, 0, 0] non_zeros = [1, 1, 1, 1, 1] # shape: (batch size, num_options, num_tuples, num_slots, num_words), or (1, 2, 3, 4, 5) tuples1 = numpy.asarray([[[[zeros, zeros, zeros, zeros], [non_zeros, zeros, zeros, zeros], [non_zeros, non_zeros, zeros, zeros]], [[non_zeros, non_zeros, zeros, zeros], [non_zeros, zeros, zeros, zeros], [zeros, zeros, zeros, zeros]]]]) tuples2 = numpy.asarray([[[[non_zeros, zeros, zeros, zeros], [non_zeros, zeros, zeros, zeros], [zeros, zeros, zeros, zeros]], [[non_zeros, non_zeros, zeros, zeros], [non_zeros, zeros, zeros, zeros], [non_zeros, zeros, zeros, zeros]]]]) actual_mask = model.predict([tuples1, tuples2]) expected_mask = numpy.asarray( [[[0, 1, 0], [1, 1, 0]]]) # shape: (batch size, num_options, num_tuples) assert actual_mask.shape == (1, 2, 3) numpy.testing.assert_array_almost_equal(expected_mask, actual_mask)
def test_mask_is_computed_correctly(self): # TODO(matt): I don't really like having to build a model to test this, but I'm not sure of # how else to do it. background_input = Input(shape=(3, 3), dtype='int32') embedding = TimeDistributedEmbedding(input_dim=3, output_dim=2, mask_zero=True) embedded_background = embedding(background_input) encoded_background = EncoderWrapper( BOWEncoder(output_dim=2))(embedded_background) mask_output = OutputMask()(encoded_background) model = DeepQaModel(input=[background_input], output=mask_output) test_background = numpy.asarray([[ [0, 0, 0], [2, 2, 2], [0, 0, 0], ]]) expected_mask = numpy.asarray([[0, 1, 0]]) actual_mask = model.predict([test_background]) numpy.testing.assert_array_almost_equal(expected_mask, actual_mask)
def test_merge_adds_words_to_sentence_correctly(self): # The thing to note here is that when we're adding words, we're adding rows to the mask as # well. This test makes sure that this works correctly. vocab_size = 10 sentence_length = 3 word_length = 3 embedding_dim = 10 sentence_input = Input(shape=(sentence_length, word_length), dtype='int32') extra_word_input = Input(shape=(word_length,), dtype='int32') embedding = Embedding(input_dim=vocab_size, output_dim=embedding_dim, mask_zero=True) embedded_sentence = embedding(sentence_input) # (batch_size, sentence_length, word_length, embedding_dim) embedded_extra_word = embedding(extra_word_input) # (batch_size, word_length, embedding_dim) merge_layer = VectorMatrixMerge(concat_axis=1) merged_sentence = merge_layer([embedded_extra_word, embedded_sentence]) result_mask = OutputMask()(merged_sentence) model = Model(inputs=[sentence_input, extra_word_input], outputs=[merged_sentence, result_mask]) sentence_tensor = numpy.asarray([[[1, 3, 0], [2, 8, 7], [0, 0, 0]]]) extra_word_tensor = numpy.asarray([[9, 0, 0]]) merged_tensor, result_mask_tensor = model.predict([sentence_tensor, extra_word_tensor]) expected_mask = numpy.asarray([[[1, 0, 0], [1, 1, 0], [1, 1, 1], [0, 0, 0]]]) assert merged_tensor.shape == (1, sentence_length + 1, word_length, embedding_dim) assert_array_equal(result_mask_tensor, expected_mask)
def test_merge_adds_dims_to_word_embedding_correctly(self): # The thing to note here is that when we're adding dimensions to an embedding, we're not # changing the mask. That is, the concat axis is greater than the dimensionality of the # mask. This test makes sure that this works correctly. vocab_size = 10 sentence_length = 6 embedding_dim = 10 for concat_axis in [2, -1]: sentence_input = Input(shape=(sentence_length,), dtype='int32') extra_embedding_input = Input(shape=(sentence_length,), dtype='float32') embedding = Embedding(input_dim=vocab_size, output_dim=embedding_dim, mask_zero=True) embedded_sentence = embedding(sentence_input) # (batch_size, sentence_length, embedding_dim) merge_layer = VectorMatrixMerge(concat_axis=concat_axis) merged_sentence = merge_layer([extra_embedding_input, embedded_sentence]) result_mask = OutputMask()(merged_sentence) model = Model(inputs=[sentence_input, extra_embedding_input], outputs=[merged_sentence, result_mask]) sentence_tensor = numpy.asarray([[1, 3, 6, 2, 0, 0]]) extra_word_tensor = numpy.asarray([[1, 2, 3, 4, 5, 6]]) merged_tensor, result_mask_tensor = model.predict([sentence_tensor, extra_word_tensor]) expected_mask = numpy.asarray([[1, 1, 1, 1, 0, 0]]) assert merged_tensor.shape == (1, sentence_length, embedding_dim + 1) assert_array_equal(merged_tensor[0, :, 0], [1, 2, 3, 4, 5, 6]) assert_array_equal(result_mask_tensor, expected_mask)