def test_squeeze_case_mask(self): input_length = 4 mask_value = 3 input_layer = Input(shape=(input_length, 1), dtype='float32', name="input") mask_layer = Masking(mask_value=mask_value) masked_input = mask_layer(input_layer) l1_normalize_layer = L1Normalize() normalized_input = l1_normalize_layer(masked_input) model = Model([input_layer], normalized_input) unnormalized_vector = np.array([[[1.0], [2.0], [3.0], [4.0]]]) result = model.predict([unnormalized_vector]) assert_array_almost_equal( result, np.array([[0.14285715, 0.2857143, 0, 0.5714286]])) assert_array_almost_equal(np.sum(result, axis=1), np.ones(1)) # Testing general masked batched case unnormalized_matrix = np.array([[[1.0], [2.0], [3.0], [4.0]], [[3.0], [2.0], [3.0], [4.0]]]) result = model.predict([unnormalized_matrix]) assert_array_almost_equal( result, np.array([[0.14285715, 0.2857143, 0, 0.5714286], [0, 2.0 / 6.0, 0, 4.0 / 6.0]])) assert_array_almost_equal(np.sum(result, axis=1), np.ones(2))
def test_squeeze_case(self): input_length = 6 input_layer = Input(shape=(input_length, 1), dtype='float32', name="input") l1_normalize_layer = L1Normalize() normalized_input = l1_normalize_layer(input_layer) model = Model([input_layer], normalized_input) unnormalized_vector = np.array([[[.1], [.2], [.3], [.4], [0.01], [0.03]]]) result = model.predict([unnormalized_vector]) assert_array_almost_equal( result, np.array([[ 0.09615385, 0.1923077, 0.28846157, 0.38461539, 0.00961538, 0.02884615 ]])) assert_array_almost_equal(np.sum(result, axis=1), np.ones(1)) # Testing general unmasked batched case. unnormalized_matrix = np.array([[[.1], [.2], [.3], [.4], [0.01], [0.03]], [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]]) result = model.predict([unnormalized_matrix]) assert_array_almost_equal( result, np.array([[ 0.09615385, 0.1923077, 0.28846157, 0.38461539, 0.00961538, 0.02884615 ], [ 1.0 / 21.0, 2.0 / 21.0, 3.0 / 21.0, 4.0 / 21.0, 5.0 / 21.0, 6.0 / 21.0 ]])) assert_array_almost_equal(np.sum(result, axis=1), np.ones(2))
def run_biDAF(): # Create embedding for both Question and News ON both word level and char level question_input = Input(shape=(max_len_Q,), dtype='int32', name="question_input") passage_input = Input(shape=(max_len_P,), dtype='int32', name="passage_input") # Load num of options input options_input = Input(shape=(max_num_options,), dtype='int32', name="options_input") # in order to map only options output embedding_layer_P = Embedding(em_len, emb_dim, weights=[embeddings], input_length=max_len_P, batch_input_shape=(batch_size, max_len_P), trainable=False) embedding_layer_Q = Embedding(em_len, emb_dim, weights=[embeddings], input_length=max_len_Q, batch_input_shape=(batch_size, max_len_Q), trainable=False) passage_embedding = embedding_layer_P(passage_input) question_embedding = embedding_layer_Q(question_input) bi_lstm_Q = Bidirectional(LSTM(256, return_sequences=True), batch_input_shape=(batch_size, max_len_Q, emb_dim))( question_embedding) bi_lstm_Q1 = Bidirectional(LSTM(256), batch_input_shape=(batch_size, max_len_Q, emb_dim))(question_embedding) bi_lstm_P = Bidirectional(LSTM(256, return_sequences=True), batch_input_shape=(batch_size, max_len_P, emb_dim))( passage_embedding) ##### Create Attention Layer similarity_function_params = {'type': 'linear', 'combination': 'x,y,x*y'} matrix_attention_layer = MatrixAttention(similarity_function=similarity_function_params,name='matrix_attention_layer') # Shape: (batch_size, num_passage_words, num_question_words) passage_question_similarity = matrix_attention_layer([bi_lstm_P, bi_lstm_Q]) # Shape: (batch_size, num_passage_words, num_question_words), normalized over question words for each passage word. passage_question_attention = MaskedSoftmax()(passage_question_similarity) weighted_sum_layer = WeightedSum(name="passage_question_vectors", use_masking=False) # Shape: (batch_size, num_passage_words, embedding_dim * 2) passage_question_vectors = weighted_sum_layer([bi_lstm_Q, passage_question_attention]) # sum at(U~:t)=1 ## Query - Passage 2d * max_len_Q # find most important context words by max() passage_question_similarity question_passage_similarity = Max(axis=-1)(passage_question_similarity) # Shape: (batch_size, num_passage_words) # use softmax for b (max softmax value for similarity matrix column wise) question_passage_attention = MaskedSoftmax()(question_passage_similarity) # Shape: (batch_size, num_passage_words) weighted_sum_layer = WeightedSum(name="question_passage_vector", use_masking=False) # h~ = sum(weighted_bt * H:t) 2*embed_dim question_passage_vector = weighted_sum_layer([bi_lstm_P, question_passage_attention]) # sum bt(H~:t)=1 repeat_layer = RepeatLike(axis=1, copy_from_axis=1) # Shape: (batch_size, num_passage_words, embedding_dim * 2) tiled_question_passage_vector = repeat_layer([question_passage_vector, bi_lstm_P]) # Shape: (batch_size, num_passage_words, embedding_dim * 8) complex_concat_layer = ComplexConcat(combination='1,2,1*2,1*3', name='final_merged_passage') final_merged_passage = complex_concat_layer([bi_lstm_P, passage_question_vectors, tiled_question_passage_vector]) # Denote G # Modelling layer. Take input of (?,?,emb*8) and apply bi-directional LSTM each with d dimensions, finally get 2d * Max_len_[] bi_model_passage = Bidirectional(LSTM(256, return_sequences=True), batch_input_shape=(batch_size, max_len_P, emb_dim))(final_merged_passage) # denote M # span begin output is calculated by Attention weight & LSTM softmax(Wp1 * [G;M]) span_begin_input = Concatenate()([final_merged_passage, bi_model_passage]) span_begin_weights = TimeDistributed(Dense(units=1))(span_begin_input) # Wp1 # Shape: (batch_size, num_passage_words) span_begin_probabilities = MaskedSoftmax(name="span_begin_softmax")(span_begin_weights) # (700,) # as Minjoon's bidaf indicated, after obtain p1, span_start_prob, he sum all probability values of the entity instances # by mask out all non-entity value. and the loss function apply withoutp2 multiword_option_mode = 'mean' options_sum_layer_minj = OptionAttentionSum(multiword_option_mode, name="options_probability_sum_minj") options_probabilities_minj = options_sum_layer_minj([passage_input, span_begin_probabilities, options_input]) l1_norm_layer = L1Normalize() option_normalized_probabilities_cnn = l1_norm_layer(options_probabilities_minj) # dense = Dense(377, activation='sigmoid')(option_normalized_probabilities_cnn) biDAF = Model(inputs=[question_input, passage_input, options_input], outputs=option_normalized_probabilities_cnn) biDAF.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) return biDAF