Esempio n. 1
0
    def test_squeeze_case_mask(self):
        input_length = 4
        mask_value = 3

        input_layer = Input(shape=(input_length, 1),
                            dtype='float32',
                            name="input")
        mask_layer = Masking(mask_value=mask_value)
        masked_input = mask_layer(input_layer)
        l1_normalize_layer = L1Normalize()
        normalized_input = l1_normalize_layer(masked_input)

        model = Model([input_layer], normalized_input)
        unnormalized_vector = np.array([[[1.0], [2.0], [3.0], [4.0]]])
        result = model.predict([unnormalized_vector])
        assert_array_almost_equal(
            result, np.array([[0.14285715, 0.2857143, 0, 0.5714286]]))
        assert_array_almost_equal(np.sum(result, axis=1), np.ones(1))

        # Testing general masked batched case
        unnormalized_matrix = np.array([[[1.0], [2.0], [3.0], [4.0]],
                                        [[3.0], [2.0], [3.0], [4.0]]])

        result = model.predict([unnormalized_matrix])
        assert_array_almost_equal(
            result,
            np.array([[0.14285715, 0.2857143, 0, 0.5714286],
                      [0, 2.0 / 6.0, 0, 4.0 / 6.0]]))
        assert_array_almost_equal(np.sum(result, axis=1), np.ones(2))
Esempio n. 2
0
    def test_squeeze_case(self):
        input_length = 6

        input_layer = Input(shape=(input_length, 1),
                            dtype='float32',
                            name="input")
        l1_normalize_layer = L1Normalize()
        normalized_input = l1_normalize_layer(input_layer)

        model = Model([input_layer], normalized_input)
        unnormalized_vector = np.array([[[.1], [.2], [.3], [.4], [0.01],
                                         [0.03]]])
        result = model.predict([unnormalized_vector])
        assert_array_almost_equal(
            result,
            np.array([[
                0.09615385, 0.1923077, 0.28846157, 0.38461539, 0.00961538,
                0.02884615
            ]]))
        assert_array_almost_equal(np.sum(result, axis=1), np.ones(1))

        # Testing general unmasked batched case.
        unnormalized_matrix = np.array([[[.1], [.2], [.3], [.4], [0.01],
                                         [0.03]],
                                        [[1.0], [2.0], [3.0], [4.0], [5.0],
                                         [6.0]]])
        result = model.predict([unnormalized_matrix])
        assert_array_almost_equal(
            result,
            np.array([[
                0.09615385, 0.1923077, 0.28846157, 0.38461539, 0.00961538,
                0.02884615
            ],
                      [
                          1.0 / 21.0, 2.0 / 21.0, 3.0 / 21.0, 4.0 / 21.0,
                          5.0 / 21.0, 6.0 / 21.0
                      ]]))
        assert_array_almost_equal(np.sum(result, axis=1), np.ones(2))
def run_biDAF():
    # Create embedding for both Question and News ON both word level and char level
    question_input = Input(shape=(max_len_Q,),
                           dtype='int32', name="question_input")
    passage_input = Input(shape=(max_len_P,),
                          dtype='int32', name="passage_input")
    # Load num of options input
    options_input = Input(shape=(max_num_options,),
                          dtype='int32', name="options_input")  # in order to map only options output
    embedding_layer_P = Embedding(em_len,
                                  emb_dim,
                                  weights=[embeddings],
                                  input_length=max_len_P,
                                  batch_input_shape=(batch_size, max_len_P),
                                  trainable=False)
    embedding_layer_Q = Embedding(em_len,
                                  emb_dim,
                                  weights=[embeddings],
                                  input_length=max_len_Q,
                                  batch_input_shape=(batch_size, max_len_Q),
                                  trainable=False)

    passage_embedding = embedding_layer_P(passage_input)
    question_embedding = embedding_layer_Q(question_input)



    bi_lstm_Q = Bidirectional(LSTM(256, return_sequences=True), batch_input_shape=(batch_size, max_len_Q, emb_dim))(
        question_embedding)
    bi_lstm_Q1 = Bidirectional(LSTM(256), batch_input_shape=(batch_size, max_len_Q, emb_dim))(question_embedding)
    bi_lstm_P = Bidirectional(LSTM(256, return_sequences=True), batch_input_shape=(batch_size, max_len_P, emb_dim))(
        passage_embedding)
    ##### Create Attention Layer

    similarity_function_params = {'type': 'linear', 'combination': 'x,y,x*y'}
    matrix_attention_layer = MatrixAttention(similarity_function=similarity_function_params,name='matrix_attention_layer')
    # Shape: (batch_size, num_passage_words, num_question_words)
    passage_question_similarity = matrix_attention_layer([bi_lstm_P, bi_lstm_Q])

    # Shape: (batch_size, num_passage_words, num_question_words), normalized over question words for each passage word.
    passage_question_attention = MaskedSoftmax()(passage_question_similarity)

    weighted_sum_layer = WeightedSum(name="passage_question_vectors",
                                     use_masking=False)  # Shape: (batch_size, num_passage_words, embedding_dim * 2)
    passage_question_vectors = weighted_sum_layer([bi_lstm_Q, passage_question_attention])  # sum at(U~:t)=1
    ## Query - Passage 2d * max_len_Q
    # find most important context words by max() passage_question_similarity

    question_passage_similarity = Max(axis=-1)(passage_question_similarity)  # Shape: (batch_size, num_passage_words)
    # use softmax for b (max softmax value for similarity matrix column wise)
    question_passage_attention = MaskedSoftmax()(question_passage_similarity)  # Shape: (batch_size, num_passage_words)

    weighted_sum_layer = WeightedSum(name="question_passage_vector",
                                     use_masking=False)  # h~ = sum(weighted_bt * H:t) 2*embed_dim
    question_passage_vector = weighted_sum_layer([bi_lstm_P, question_passage_attention])  # sum bt(H~:t)=1

    repeat_layer = RepeatLike(axis=1, copy_from_axis=1)
    # Shape: (batch_size, num_passage_words, embedding_dim * 2)
    tiled_question_passage_vector = repeat_layer([question_passage_vector, bi_lstm_P])

    # Shape: (batch_size, num_passage_words, embedding_dim * 8)
    complex_concat_layer = ComplexConcat(combination='1,2,1*2,1*3', name='final_merged_passage')
    final_merged_passage = complex_concat_layer([bi_lstm_P,
                                                 passage_question_vectors,
                                                 tiled_question_passage_vector])  # Denote G
    # Modelling layer. Take input of (?,?,emb*8) and apply bi-directional LSTM each with d dimensions, finally get 2d * Max_len_[]
    bi_model_passage = Bidirectional(LSTM(256, return_sequences=True),
                                     batch_input_shape=(batch_size, max_len_P, emb_dim))(final_merged_passage)
    # denote M

    # span begin output is calculated by Attention weight & LSTM softmax(Wp1 * [G;M])
    span_begin_input = Concatenate()([final_merged_passage, bi_model_passage])
    span_begin_weights = TimeDistributed(Dense(units=1))(span_begin_input)  # Wp1
    # Shape: (batch_size, num_passage_words)
    span_begin_probabilities = MaskedSoftmax(name="span_begin_softmax")(span_begin_weights)  # (700,)

    # as Minjoon's bidaf indicated, after obtain p1, span_start_prob, he sum all probability values of the entity instances
    # by mask out all non-entity value. and the loss function apply withoutp2
    multiword_option_mode = 'mean'
    options_sum_layer_minj = OptionAttentionSum(multiword_option_mode, name="options_probability_sum_minj")
    options_probabilities_minj = options_sum_layer_minj([passage_input, span_begin_probabilities, options_input])
    l1_norm_layer = L1Normalize()
    option_normalized_probabilities_cnn = l1_norm_layer(options_probabilities_minj)
    # dense = Dense(377, activation='sigmoid')(option_normalized_probabilities_cnn)

    biDAF = Model(inputs=[question_input, passage_input, options_input],
                      outputs=option_normalized_probabilities_cnn)
    biDAF.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

    return biDAF