def match_passage_with_question(self, passage_reps, question_reps, passage_mask, question_mask, passage_lengths, question_lengths, context_lstm_dim, scope=None, with_full_match=True, with_maxpool_match=True, with_attentive_match=True, with_max_attentive_match=True, dropout_rate=0, forward=True): passage_reps = tf.multiply(passage_reps, tf.expand_dims(passage_mask,-1)) question_reps = tf.multiply(question_reps, tf.expand_dims(question_mask,-1)) all_question_aware_representatins = [] dim = 0 with tf.variable_scope(scope or "match_passage_with_question"): # relevancy_matrix: [batch_size, p_len, q_len] relevancy_matrix = self.cal_relevancy_matrix(question_reps, passage_reps) relevancy_matrix = self.mask_relevancy_matrix(relevancy_matrix, question_mask, passage_mask) all_question_aware_representatins.append(tf.reduce_max(relevancy_matrix, axis=2, keep_dims=True)) all_question_aware_representatins.append(tf.reduce_mean(relevancy_matrix, axis=2, keep_dims=True)) dim += 2 if with_full_match: if forward: question_full_rep = layer_utils.collect_final_step_of_lstm(question_reps, question_lengths - 1) else: question_full_rep = question_reps[:,0,:] passage_len = tf.shape(passage_reps)[1] question_full_rep = tf.expand_dims(question_full_rep, axis=1) # [batch_size, pasasge_len, feature_dim] question_full_rep = tf.tile(question_full_rep, [1, passage_len, 1]) # attentive_rep: [batch_size, passage_len, match_dim] (attentive_rep, match_dim) = self.multi_perspective_match(context_lstm_dim, passage_reps, question_full_rep, dropout_rate=self.dropout_rate, scope_name='mp-match-full-match') all_question_aware_representatins.append(attentive_rep) dim += match_dim if with_maxpool_match: maxpooling_decomp_params = tf.get_variable("maxpooling_matching_decomp", shape=[self.config.cosine_MP_dim, context_lstm_dim], dtype=tf.float32) # maxpooling_rep: [batch_size, passage_len, 2 * cosine_MP_dim] maxpooling_rep = self.cal_maxpooling_matching(passage_reps, question_reps, maxpooling_decomp_params) all_question_aware_representatins.append(maxpooling_rep) dim += 2 * self.config.cosine_MP_dim if with_attentive_match: # atten_scores: [batch_size, p_len, q_len] atten_scores = layer_utils.calcuate_attention(passage_reps, question_reps, context_lstm_dim, context_lstm_dim, scope_name="attention", att_type=self.config.att_type, att_dim=self.config.att_dim, remove_diagnoal=False, mask1=passage_mask, mask2=question_mask, dropout_rate=self.dropout_rate) att_question_contexts = tf.matmul(atten_scores, question_reps) (attentive_rep, match_dim) = self.multi_perspective_match(context_lstm_dim, passage_reps, att_question_contexts, dropout_rate=self.dropout_rate, scope_name='mp-match-att_question') all_question_aware_representatins.append(attentive_rep) dim += match_dim if with_max_attentive_match: # relevancy_matrix: [batch_size, p_len, q_len] # question_reps: [batch_size, q_len, dim] # max_att: [batch_size, p_len, dim] max_att = self.cal_max_question_representation(question_reps, relevancy_matrix) # max_attentive_rep: [batch_size, passage_len, match_dim] (max_attentive_rep, match_dim) = self.multi_perspective_match(context_lstm_dim, passage_reps, max_att, dropout_rate=self.dropout_rate, scope_name='mp-match-max-att') all_question_aware_representatins.append(max_attentive_rep) dim += match_dim all_question_aware_representatins = tf.concat(axis=2, values=all_question_aware_representatins) return (all_question_aware_representatins, dim)
def match_passage_with_question(passage_reps, question_reps, passage_mask, question_mask, passage_lengths, question_lengths, context_lstm_dim, scope=None, with_full_match=True, with_maxpool_match=True, with_attentive_match=True, with_max_attentive_match=True, is_training=True, options=None, dropout_rate=0, forward=True): passage_mask = tf.cast(passage_mask, tf.float32) question_mask = tf.cast(question_mask, tf.float32) passage_reps = tf.multiply(passage_reps, tf.expand_dims(passage_mask, -1)) question_reps = tf.multiply(question_reps, tf.expand_dims(question_mask, -1)) all_question_aware_representatins = [] dim = 0 with tf.variable_scope(scope or "match_passage_with_question"): relevancy_matrix = cal_relevancy_matrix(question_reps, passage_reps) relevancy_matrix = mask_relevancy_matrix(relevancy_matrix, question_mask, passage_mask) # relevancy_matrix = layer_utils.calcuate_attention(passage_reps, question_reps, context_lstm_dim, context_lstm_dim, # scope_name="fw_attention", att_type=options.att_type, att_dim=options.att_dim, # remove_diagnoal=False, mask1=passage_mask, mask2=question_mask, is_training=is_training, dropout_rate=dropout_rate) all_question_aware_representatins.append( tf.reduce_max(relevancy_matrix, axis=2, keep_dims=True)) all_question_aware_representatins.append( tf.reduce_mean(relevancy_matrix, axis=2, keep_dims=True)) dim += 2 if with_full_match: print("-------------using full match-----------") if forward: question_full_rep = layer_utils.collect_final_step_of_lstm( question_reps, question_lengths - 1) else: question_full_rep = question_reps[:, 0, :] passage_len = tf.shape(passage_reps)[1] question_full_rep = tf.expand_dims(question_full_rep, axis=1) question_full_rep = tf.tile( question_full_rep, [1, passage_len, 1]) # [batch_size, pasasge_len, feature_dim] (attentive_rep, match_dim) = multi_perspective_match( context_lstm_dim, passage_reps, question_full_rep, is_training=is_training, dropout_rate=dropout_rate, options=options, scope_name='mp-match-full-match') all_question_aware_representatins.append(attentive_rep) dim += match_dim if with_maxpool_match: print("-------------using maxpool match-----------") maxpooling_decomp_params = tf.get_variable( "maxpooling_matching_decomp", shape=[options["cosine_MP_dim"], context_lstm_dim], dtype=tf.float32) maxpooling_rep = cal_maxpooling_matching(passage_reps, question_reps, maxpooling_decomp_params) all_question_aware_representatins.append(maxpooling_rep) dim += 2 * options["cosine_MP_dim"] if with_attentive_match: print("-------------using attentive match-----------") atten_scores = layer_utils.calcuate_attention( passage_reps, question_reps, context_lstm_dim, context_lstm_dim, scope_name="attention", att_type=options["att_type"], att_dim=options["att_dim"], remove_diagnoal=False, mask1=passage_mask, mask2=question_mask, is_training=is_training, dropout_rate=dropout_rate) att_question_contexts = tf.matmul(atten_scores, question_reps) (attentive_rep, match_dim) = multi_perspective_match( context_lstm_dim, passage_reps, att_question_contexts, is_training=is_training, dropout_rate=dropout_rate, options=options, scope_name='mp-match-att_question') all_question_aware_representatins.append(attentive_rep) dim += match_dim if with_max_attentive_match: print("-------------using max attentive match-----------") max_att = cal_max_question_representation(question_reps, relevancy_matrix) (max_attentive_rep, match_dim) = multi_perspective_match( context_lstm_dim, passage_reps, max_att, is_training=is_training, dropout_rate=dropout_rate, options=options, scope_name='mp-match-max-att') all_question_aware_representatins.append(max_attentive_rep) dim += match_dim all_question_aware_representatins = tf.concat( axis=2, values=all_question_aware_representatins) return (all_question_aware_representatins, dim)
def multihead_attention(inputs1, inputs2, hidden_size, head_size, mask1=None, mask2=None, remove_diagnoal=False, attn_type='dot', scope="Multi_Head_Attention", reuse=None, is_training=True, dropout_rate=0.2, activation=tf.nn.relu): in_shapes = tf.shape(inputs1) batch_size = in_shapes[0] seq_len1 = in_shapes[1] seq_len2 = tf.shape(inputs2)[1] with tf.variable_scope(scope, reuse=reuse): Q = tf.layers.dense(inputs1, head_size * hidden_size, activation=activation, name='inputs1_' + scope) Q = tf.reshape(Q, [batch_size, seq_len1, head_size, hidden_size ]) # [batch_size, seq_len1, head_size, hidden_size] Q = tf.transpose( Q, perm=[0, 2, 1, 3]) # [batch_size, head_size, seq_len1, hidden_size] Q = tf.reshape(Q, [batch_size * head_size, seq_len1, hidden_size]) K = tf.layers.dense(inputs2, head_size * hidden_size, activation=activation, name='inputs2_' + scope) K = tf.reshape(K, [batch_size, seq_len2, head_size, hidden_size ]) # [batch_size, seq_len2, head_size, hidden_size] K = tf.transpose( K, perm=[0, 2, 1, 3]) # [batch_size, head_size, seq_len2, hidden_size] K = tf.reshape(K, [batch_size * head_size, seq_len2, hidden_size]) Q *= hidden_size**-0.5 if mask1 is not None: mask1 = tf.reshape(mask1, shape=[batch_size, 1, seq_len1]) # [batch_size, 1, seq_len1] mask1 = tf.tile(mask1, multiples=[1, head_size, 1]) # [batch_size, head_size, seq_len1] mask1 = tf.reshape(mask1, shape=[batch_size * head_size, seq_len1 ]) # [batch_size*head_size, seq_len1] if mask2 is not None: mask2 = tf.reshape(mask2, shape=[batch_size, 1, seq_len2]) # [batch_size, 1, seq_len1] mask2 = tf.tile(mask2, multiples=[1, head_size, 1]) # [batch_size, head_size, seq_len1] mask2 = tf.reshape(mask2, shape=[batch_size * head_size, seq_len2 ]) # [batch_size*head_size, seq_len1] atten_scores = layer_utils.calcuate_attention( Q, K, hidden_size, hidden_size, scope_name='attention', att_type=attn_type, remove_diagnoal=remove_diagnoal, mask1=mask1, mask2=mask2, is_training=is_training, dropout_rate=dropout_rate ) # [batch_size*head_size, seq_len1, seq_len2] atten_scores = tf.reshape(atten_scores, [batch_size, head_size, seq_len2 ]) # [batch_size, head_size, seq_len2] atten_scores = tf.transpose( atten_scores, [0, 2, 1]) # [batch_size, seq_len2, head_size] atten_scores = tf.layers.dense( atten_scores, 1, activation=activation, name='final_projection') # [batch_size, seq_len2, 1] atten_scores = tf.reshape( atten_scores, [batch_size, seq_len2]) # [batch_size, seq_len2] return atten_scores
def FusionNet_match_Amit(feature_dim, feature_each_dim, passage, question, passage_length, question_length, passage_mask, question_mask, onehot_binary=None, options=None, scope_name='FusionNet_Amit_match_layer', is_training=True, dropout_rate=0.2, reuse=False): batch_size = tf.shape(passage)[0] passage_len = tf.shape(passage)[1] question_len = tf.shape(question)[1] word_dim, char_dim, POS_dim, NER_dim, cove_dim, lm_dim = feature_each_dim with tf.variable_scope(scope_name, reuse=reuse): # Fully Aware MultiLevel Fusion (FAMF) Word Layer with tf.variable_scope('famf_word_layer'): famf_word_level_dim = word_dim # assuming famf_word_level_dim=dim-of-glove=300 p_wordlevel_input = tf.slice( passage, [0, 0, 0], [batch_size, passage_len, word_dim ]) # only use word embedding for word layer q_wordlevel_input = tf.slice(question, [0, 0, 0], [batch_size, question_len, word_dim]) alphas = layer_utils.calcuate_attention( p_wordlevel_input, q_wordlevel_input, famf_word_level_dim, famf_word_level_dim, scope_name="famf_word_layer_attention", att_type=options.att_type, mask1=passage_mask, mask2=question_mask, att_dim=250, is_training=is_training, dropout_rate=dropout_rate) # (in_value_1, in_value_2, feature_dim1, feature_dim2, scope_name='att', # att_type='symmetric', att_dim=20, remove_diagnoal=False, mask1=None, mask2=None, is_training=False, dropout_rate=0.2, # cosine_attention_scale=200) weighted_by_question_words = tf.matmul( alphas, layer_utils.dropout_layer(q_wordlevel_input, dropout_rate, is_training=is_training)) # Reading layer with tf.variable_scope('reading'): q_rep_reading_input = question # [glove, cove, NER, POS] p_rep_reading_input = tf.concat( axis=2, values=[passage, onehot_binary, weighted_by_question_words ]) # use all embeddings for reading and understanding. # [glove, cove, NER, POS, binary,famf_word_attention] with tf.variable_scope('reading_layer_1'): reading_layer_lstm_dim = 125 q_rep_reading_1_output = layer_utils.my_lstm_layer( q_rep_reading_input, reading_layer_lstm_dim, scope_name='bilstm_reading_1_q', reuse=False, is_training=is_training, dropout_rate=options.dropout_rate)[2] # [B, Q, 250 ] p_rep_reading_1_output = layer_utils.my_lstm_layer( p_rep_reading_input, reading_layer_lstm_dim, scope_name='bilstm_reading_1_p', reuse=False, is_training=is_training, dropout_rate=options.dropout_rate)[2] # [B, Q, 250 ] with tf.variable_scope('reading_layer_2'): q_rep_reading_2_output = layer_utils.my_lstm_layer( q_rep_reading_1_output, reading_layer_lstm_dim, scope_name='bilstm_reading_1_q', reuse=False, is_training=is_training, dropout_rate=options.dropout_rate)[2] # [B, Q, 250 ] p_rep_reading_2_output = layer_utils.my_lstm_layer( p_rep_reading_1_output, reading_layer_lstm_dim, scope_name='bilstm_reading_1_p', reuse=False, is_training=is_training, dropout_rate=options.dropout_rate)[2] # [B, Q, 250 ] # Understanding Layer with tf.variable_scope('question_understanding_layer'): q_rep_understanding_input = tf.concat( axis=2, values=(q_rep_reading_1_output, q_rep_reading_2_output)) U_q = layer_utils.my_lstm_layer( q_rep_understanding_input, reading_layer_lstm_dim, scope_name='bilstm_understanding_q', reuse=False, is_training=is_training, dropout_rate=options.dropout_rate)[2] # [B, Q, 250 ] U_q_dim = reading_layer_lstm_dim * 2 # FAMF : higher level with tf.variable_scope('famf_higher_layer'): famf_higher_layer_w_dim1 = 500 famf_higher_layer_w_dim2 = 250 famf_q_input = [] famf_p_input = [] # famf_p_input.append(in_passage_word_repres) famf_p_input.append(p_wordlevel_input) famf_higher_layer_w_dim1 += word_dim famf_p_input.append(p_rep_reading_1_output) famf_p_input.append(p_rep_reading_2_output) # famf_q_input.append(in_question_word_repres) famf_q_input.append(q_wordlevel_input) famf_q_input.append(q_rep_reading_1_output) famf_q_input.append(q_rep_reading_2_output) cove_dim_begin = word_dim + char_dim + POS_dim + NER_dim if cove_dim != 0: #cove_dim_begin = word_dim + char_dim + POS_dim + NER_dim p_cove_repres = tf.slice(passage, [0, 0, cove_dim_begin], [batch_size, passage_len, cove_dim]) q_cove_repres = tf.slice(question, [0, 0, cove_dim_begin], [batch_size, question_len, cove_dim]) famf_p_input.append(p_cove_repres) famf_q_input.append(q_cove_repres) famf_higher_layer_w_dim1 += cove_dim if lm_dim != 0: lm_dim_begin = cove_dim_begin + cove_dim p_lm_repres = tf.slice(passage, [0, 0, lm_dim_begin], [batch_size, passage_len, lm_dim]) q_lm_repres = tf.slice(question, [0, 0, lm_dim_begin], [batch_size, question_len, lm_dim]) famf_p_input.append(p_lm_repres) famf_q_input.append(q_lm_repres) famf_higher_layer_w_dim1 += lm_dim famf_p_input = tf.concat(axis=2, values=famf_p_input) # (B, P, D ) famf_q_input = tf.concat(axis=2, values=famf_q_input) # (B, Q, D ) alphas = layer_utils.calcuate_attention( famf_p_input, famf_q_input, famf_higher_layer_w_dim1, famf_higher_layer_w_dim1, scope_name="famf_high_lowlevel", att_type=options.att_type, mask1=passage_mask, mask2=question_mask, att_dim=famf_higher_layer_w_dim2, is_training=is_training, dropout_rate=dropout_rate) h_Cl = tf.matmul( alphas, layer_utils.dropout_layer(q_rep_reading_1_output, dropout_rate, is_training=is_training)) alphas = layer_utils.calcuate_attention( famf_p_input, famf_q_input, famf_higher_layer_w_dim1, famf_higher_layer_w_dim1, scope_name="famf_high_highlevel", att_type=options.att_type, mask1=passage_mask, mask2=question_mask, att_dim=famf_higher_layer_w_dim2, is_training=is_training, dropout_rate=dropout_rate) h_Ch = tf.matmul( alphas, layer_utils.dropout_layer(q_rep_reading_2_output, dropout_rate, is_training=is_training)) alphas = layer_utils.calcuate_attention( famf_p_input, famf_q_input, famf_higher_layer_w_dim1, famf_higher_layer_w_dim1, scope_name="famf_high_understandinglevel", att_type=options.att_type, mask1=passage_mask, mask2=question_mask, att_dim=famf_higher_layer_w_dim2, is_training=is_training, dropout_rate=dropout_rate) u_C = tf.matmul( alphas, layer_utils.dropout_layer(U_q, dropout_rate, is_training=is_training)) with tf.variable_scope('famf_higher_layer_passage_lstm'): p_rep_highlayer_input = [] p_rep_highlayer_input.append(p_rep_reading_1_output) p_rep_highlayer_input.append(p_rep_reading_2_output) p_rep_highlayer_input.append(h_Cl) p_rep_highlayer_input.append(h_Ch) p_rep_highlayer_input.append(u_C) p_rep_highlayer_input = tf.concat( axis=2, values=p_rep_highlayer_input) # (B, P, D ) D=(250*5) famf_higher_layer_passage_lstm_dim = 125 V_c = layer_utils.my_lstm_layer( p_rep_highlayer_input, famf_higher_layer_passage_lstm_dim, scope_name='bilstm_higher_layer_p', reuse=False, is_training=is_training, dropout_rate=options.dropout_rate)[2] # [B, Q, 250 ] # FAMF: Self-boosted with tf.variable_scope('famf_selfboosted_layer'): famf_self_boosted_input = [] famf_self_boosted_w_dim1 = 250 * 6 # famf_self_boosted_input.append(in_passage_word_repres) famf_self_boosted_input.append(p_wordlevel_input) famf_self_boosted_w_dim1 += word_dim famf_self_boosted_input.append(p_rep_reading_1_output) famf_self_boosted_input.append(p_rep_reading_2_output) famf_self_boosted_input.append(h_Cl) famf_self_boosted_input.append(h_Ch) famf_self_boosted_input.append(u_C) famf_self_boosted_input.append(V_c) if cove_dim != 0: famf_self_boosted_input.append( tf.slice(passage, [0, 0, cove_dim_begin], [batch_size, passage_len, cove_dim])) famf_self_boosted_w_dim1 += cove_dim # 300 + (250 * 6) + 600(if cove) + 300 (if lm) # if lm_dim != 0: not used in old codebase famf_self_boosted_w_dim2 = 50 # 250 does not fit in memory famf_self_boosted_input = tf.concat( axis=2, values=famf_self_boosted_input ) # (B, P, D ) D=(600 ,300 , 250*6 ) = 2400 useProjectionLayer = True if useProjectionLayer: projection_dim = 50 famf_self_boosted_input_dropout = famf_self_boosted_input famf_self_boosted_projection = layer_utils.projection_layer( famf_self_boosted_input_dropout, famf_self_boosted_w_dim1, projection_dim, scope="self-match-projection") famf_self_boosted_w_dim1 = projection_dim vv_C_input = famf_self_boosted_projection else: vv_C_input = famf_self_boosted_input alphas = layer_utils.calcuate_attention( vv_C_input, vv_C_input, famf_self_boosted_w_dim1, famf_self_boosted_w_dim1, scope_name="famf_selfboosted_layer_attention", att_type=options.att_type, mask1=passage_mask, mask2=passage_mask, att_dim=famf_self_boosted_w_dim2, is_training=is_training, dropout_rate=dropout_rate) vv_C = tf.matmul( alphas, layer_utils.dropout_layer(V_c, dropout_rate, is_training=is_training)) p_rep_selfboosted_layer_input = tf.concat( axis=2, values=(famf_self_boosted_input, vv_C)) return (p_rep_selfboosted_layer_input, 0)
def BiMPM_match(feature_dim, passage, question, passage_length, question_length, passage_mask, question_mask, onehot_binary=None, options=None, scope_name='BiMPM_match_layer', is_training=True, dropout_rate=0.2, reuse=False): match_results = [] match_dim = 0 with tf.variable_scope(scope_name, reuse=reuse): # word-level matching (word_match_reps, word_match_dim, word_PoQ_reps, word_QoP_reps) = onelayer_BiMPM_match(feature_dim, passage, question, passage_mask, question_mask, options=options, scope_name='word_level_BiMPM', is_training=is_training, dropout_rate=dropout_rate, reuse=False) match_results.append(word_match_reps) match_dim += word_match_dim # contextual level matching passage_reps = [passage, word_PoQ_reps] passage_dim = 2 * feature_dim # if onehot_binary is not None: # passage_reps.append(onehot_binary) # passage_dim += 11 question_reps = [question] if options.with_QoP: question_reps.append(word_QoP_reps) passage_context = passage if onehot_binary is not None: passage_context = tf.concat( axis=2, values=[passage_context, onehot_binary]) question_context = question for i in xrange(options.context_layer_num): cur_passage_reps = tf.concat(axis=2, values=passage_reps) cur_question_reps = tf.concat(axis=2, values=question_reps) # lstm over passage and question individually passage_context = layer_utils.my_lstm_layer( passage_context, options.context_lstm_dim, scope_name="passage_context_lstm_{}".format(i), reuse=False, is_training=is_training, dropout_rate=dropout_rate)[2] passage_context = tf.multiply( passage_context, tf.expand_dims(passage_mask, axis=-1)) question_context = layer_utils.my_lstm_layer( question_context, options.context_lstm_dim, scope_name="question_context_lstm_{}".format(i), reuse=False, is_training=is_training, dropout_rate=dropout_rate)[2] question_context = tf.multiply( question_context, tf.expand_dims(question_mask, axis=-1)) # matching (cur_match_reps, cur_match_dim, cur_PoQ_reps, cur_QoP_reps) = onelayer_BiMPM_match( 2 * options.context_lstm_dim, passage_context, question_context, passage_mask, question_mask, accum_dim=passage_dim, passage_accum=cur_passage_reps, question_accum=cur_question_reps, options=options, scope_name='context_BiMPM_{}'.format(i), is_training=is_training, dropout_rate=dropout_rate, reuse=False) match_results.append(cur_match_reps) match_dim += cur_match_dim if options.accumulate_match_input: passage_reps.append(passage_context) passage_reps.append(cur_PoQ_reps) # passage_reps.append(cur_match_reps) passage_dim += 4 * options.context_lstm_dim question_reps.append(question_context) if options.with_QoP: question_reps.append(cur_QoP_reps) else: # passage_reps = [passage_context, cur_PoQ_reps, cur_match_reps] passage_reps = [passage_context, cur_PoQ_reps] passage_dim = 4 * options.context_lstm_dim question_reps = [question_context] if options.with_QoP: question_reps.append(cur_QoP_reps) match_results = tf.concat(axis=2, values=match_results) if options.with_self_match: cur_passage_reps = tf.concat(axis=2, values=passage_reps) cur_passage_reps_projection = layer_utils.projection_layer( cur_passage_reps, passage_dim, options.self_compress_dim, scope="self-match-projection") self_atten_scores = layer_utils.calcuate_attention( cur_passage_reps_projection, cur_passage_reps_projection, options.self_compress_dim, options.self_compress_dim, scope_name="self_boost_att", att_type=options.att_type, att_dim=options.att_dim, remove_diagnoal=True, mask1=passage_mask, mask2=passage_mask, is_training=is_training, dropout_rate=dropout_rate) self_match_reps = tf.matmul( self_atten_scores, layer_utils.dropout_layer(match_results, dropout_rate, is_training=is_training)) match_results = tf.concat(axis=2, values=[match_results, self_match_reps]) match_dim = 2 * match_dim return (match_results, match_dim)
def onelayer_BiMPM_match(in_dim, passage, question, passage_mask, question_mask, accum_dim=0, passage_accum=None, question_accum=None, options=None, scope_name='onelayer_BiMPM_match', is_training=True, dropout_rate=0.2, reuse=False): if passage_accum is None: passage_accum = passage question_accum = question accum_dim = in_dim match_results = [] match_dim = 0 QoP_reps = None with tf.variable_scope(scope_name, reuse=reuse): # attention passage over question PoQ_atten = layer_utils.calcuate_attention(passage_accum, question_accum, accum_dim, accum_dim, scope_name="PoQ_atten", att_type=options.att_type, att_dim=options.att_dim, remove_diagnoal=False, mask1=passage_mask, mask2=question_mask, is_training=is_training, dropout_rate=dropout_rate) PoQ_reps = tf.matmul( PoQ_atten, layer_utils.dropout_layer(question, dropout_rate, is_training=is_training)) if options.with_QoP: # attention question over passage QoP_atten = layer_utils.calcuate_attention( question_accum, passage_accum, accum_dim, accum_dim, scope_name="QoP_atten", att_type=options.att_type, att_dim=options.att_dim, remove_diagnoal=False, mask1=question_mask, mask2=passage_mask, is_training=is_training, dropout_rate=dropout_rate) QoP_reps = tf.matmul( QoP_atten, layer_utils.dropout_layer(passage, dropout_rate, is_training=is_training)) # attentive matching (att_match_rep, att_match_dim) = multi_perspective_match(in_dim, passage, PoQ_reps, is_training=is_training, dropout_rate=dropout_rate, options=options, scope_name='att_match') match_results.append(att_match_rep) match_dim += att_match_dim # max attentive matching PoQ_max_reps = layer_utils.collect_representation( question, tf.argmax(PoQ_atten, axis=2, output_type=tf.int32)) (max_att_match_rep, max_att_match_dim) = multi_perspective_match( in_dim, passage, PoQ_max_reps, is_training=is_training, dropout_rate=dropout_rate, options=options, scope_name='max_att_match') match_results.append(max_att_match_rep) match_dim += max_att_match_dim match_results = tf.concat(axis=2, values=match_results) return (match_results, match_dim, PoQ_reps, QoP_reps)
def FusionNet_match(feature_dim, passage, question, passage_length, question_length, passage_mask, question_mask, onehot_binary=None, options=None, scope_name='FusionNet_match_layer', is_training=True, dropout_rate=0.2, reuse=False): # passage_mask = None # question_mask = None with tf.variable_scope(scope_name, reuse=reuse): #======= Fully Aware MultiLevel Fusion (FAMF) Word Layer # word_atten_scores = layer_utils.calcuate_attention \ word_atten_scores = layer_utils.calcuate_attention( passage, question, feature_dim, feature_dim, scope_name="FAMF_word", att_type=options.att_type, att_dim=options.att_dim, remove_diagnoal=False, mask1=passage_mask, mask2=question_mask, is_training=is_training, dropout_rate=dropout_rate) weighted_by_question_words = tf.matmul( word_atten_scores, layer_utils.dropout_layer(question, dropout_rate, is_training=is_training)) #====== Reading layer passage_tmp = [passage, weighted_by_question_words] passage_tmp_dim = 2 * feature_dim if onehot_binary is not None: passage_tmp.append(onehot_binary) passage_tmp_dim += 11 passage_tmp = tf.concat(axis=2, values=passage_tmp) passage_context1 = layer_utils.my_lstm_layer( passage_tmp, options.context_lstm_dim, scope_name="passage_context1_lstm", reuse=False, is_training=is_training, dropout_rate=dropout_rate)[2] passage_context2 = layer_utils.my_lstm_layer( passage_context1, options.context_lstm_dim, scope_name="passage_context2_lstm", reuse=False, is_training=is_training, dropout_rate=dropout_rate)[2] question_context1 = layer_utils.my_lstm_layer( question, options.context_lstm_dim, scope_name="question_context1_lstm", reuse=False, is_training=is_training, dropout_rate=dropout_rate)[2] question_context2 = layer_utils.my_lstm_layer( question_context1, options.context_lstm_dim, scope_name="question_context2_lstm", reuse=False, is_training=is_training, dropout_rate=dropout_rate)[2] # ==== Understanding Layer quesiton_understand_input = tf.concat(axis=2, values=(question_context1, question_context2)) quesiton_understand_output = layer_utils.my_lstm_layer( quesiton_understand_input, options.context_lstm_dim, scope_name="question_under_lstm", reuse=False, is_training=is_training, dropout_rate=dropout_rate)[2] # ==== FAMF : higher level famf_passage_input = tf.concat(axis=2, values=(passage, passage_context1, passage_context2)) famf_question_input = tf.concat(axis=2, values=(question, question_context1, question_context2)) passage_in_dim = feature_dim + 4 * options.context_lstm_dim lower_level_atten_scores = layer_utils.calcuate_attention( famf_passage_input, famf_question_input, passage_in_dim, passage_in_dim, scope_name="lower_level_att", att_type=options.att_type, att_dim=options.att_dim, remove_diagnoal=False, mask1=passage_mask, mask2=question_mask, is_training=is_training, dropout_rate=dropout_rate) high_level_atten_scores = layer_utils.calcuate_attention( famf_passage_input, famf_question_input, passage_in_dim, passage_in_dim, scope_name="high_level_att", att_type=options.att_type, att_dim=options.att_dim, remove_diagnoal=False, mask1=passage_mask, mask2=question_mask, is_training=is_training, dropout_rate=dropout_rate) understand_atten_scores = layer_utils.calcuate_attention( famf_passage_input, famf_question_input, passage_in_dim, passage_in_dim, scope_name="understand_att", att_type=options.att_type, att_dim=options.att_dim, remove_diagnoal=False, mask1=passage_mask, mask2=question_mask, is_training=is_training, dropout_rate=dropout_rate) h_Cl = tf.matmul( lower_level_atten_scores, layer_utils.dropout_layer(question_context1, dropout_rate, is_training=is_training)) h_Ch = tf.matmul( high_level_atten_scores, layer_utils.dropout_layer(question_context2, dropout_rate, is_training=is_training)) u_C = tf.matmul( understand_atten_scores, layer_utils.dropout_layer(quesiton_understand_output, dropout_rate, is_training=is_training)) # ====famf_higher_layer_passage_lstm V_c_input = tf.concat( axis=2, values=[passage_context1, passage_context2, h_Cl, h_Ch, u_C]) V_c = layer_utils.my_lstm_layer( V_c_input, options.context_lstm_dim, scope_name="famf_higher_layer_passage_lstm", reuse=False, is_training=is_training, dropout_rate=dropout_rate)[2] # VV_c_input = tf.concat(axis=2, values=[passage_tmp, V_c_input, V_c]) # input_dim = 12*options.context_lstm_dim + passage_tmp_dim VV_c_input = tf.concat(axis=2, values=[passage, V_c_input, V_c]) input_dim = 12 * options.context_lstm_dim + feature_dim # ==== FAMF: Self-boosted if options.with_self_match: VV_c_input_projection = layer_utils.projection_layer( VV_c_input, input_dim, options.self_compress_dim, scope="self-boost-projection") self_atten_scores = layer_utils.calcuate_attention( VV_c_input_projection, VV_c_input_projection, options.self_compress_dim, options.self_compress_dim, scope_name="self_boost_att", att_type=options.att_type, att_dim=options.att_dim, remove_diagnoal=options.remove_diagonal, mask1=passage_mask, mask2=passage_mask, is_training=is_training, dropout_rate=dropout_rate) VV_c = tf.matmul( self_atten_scores, layer_utils.dropout_layer(V_c, dropout_rate, is_training=is_training)) VV_c_input = tf.concat(axis=2, values=[VV_c_input, VV_c]) input_dim += 2 * options.context_lstm_dim # match_results = layer_utils.my_lstm_layer(VV_c_input, options.context_lstm_dim, scope_name="match_result", reuse=False, # is_training=is_training, dropout_rate=dropout_rate)[2] # match_dim = 2 * options.context_lstm_dim # return (match_results, match_dim) return (VV_c_input, input_dim)
def multi_granularity_match(feature_dim, passage, question, passage_length, question_length, passage_mask=None, question_mask=None, is_training=True, dropout_rate=0.2, options=None, with_full_matching=False, with_attentive_matching=True, with_max_attentive_matching=True, scope_name='mgm', reuse=False): ''' passage: [batch_size, passage_length, feature_dim] question: [batch_size, question_length, feature_dim] passage_length: [batch_size] question_length: [batch_size] ''' input_shape = tf.shape(passage) batch_size = input_shape[0] passage_len = input_shape[1] match_reps = [] with tf.variable_scope(scope_name, reuse=reuse): match_dim = 0 if with_full_matching: passage_fw = passage[:, :, 0:feature_dim / 2] passage_bw = passage[:, :, feature_dim / 2:feature_dim] question_fw = question[:, :, 0:feature_dim / 2] question_bw = question[:, :, feature_dim / 2:feature_dim] question_fw = layer_utils.collect_final_step_of_lstm( question_fw, question_length - 1) # [batch_size, feature_dim/2] question_bw = question_bw[:, 0, :] question_fw = tf.expand_dims(question_fw, axis=1) question_fw = tf.tile( question_fw, [1, passage_len, 1]) # [batch_size, pasasge_len, feature_dim] question_bw = tf.expand_dims(question_bw, axis=1) question_bw = tf.tile( question_bw, [1, passage_len, 1]) # [batch_size, pasasge_len, feature_dim] (fw_full_match_reps, fw_full_match_dim) = multi_perspective_match( feature_dim / 2, passage_fw, question_fw, is_training=is_training, dropout_rate=dropout_rate, options=options, scope_name='fw_full_match') (bw_full_match_reps, bw_full_match_dim) = multi_perspective_match( feature_dim / 2, passage_bw, question_bw, is_training=is_training, dropout_rate=dropout_rate, options=options, scope_name='bw_full_match') match_reps.append(fw_full_match_reps) match_reps.append(bw_full_match_reps) match_dim += fw_full_match_dim match_dim += bw_full_match_dim if with_attentive_matching or with_max_attentive_matching: atten_scores = layer_utils.calcuate_attention( passage, question, feature_dim, feature_dim, scope_name="attention", att_type=options.attn_type, att_dim=options.attn_depth, remove_diagnoal=False, mask1=passage_mask, mask2=question_mask, is_training=is_training, dropout_rate=dropout_rate) # match_reps.append(tf.reduce_max(atten_scores, axis=2, keep_dims=True)) # match_reps.append(tf.reduce_mean(atten_scores, axis=2, keep_dims=True)) # match_dim += 2 if with_max_attentive_matching: atten_positions = tf.argmax( atten_scores, axis=2, output_type=tf.int32) # [batch_size, passage_len] max_question_reps = layer_utils.collect_representation( question, atten_positions) (max_att_match_rep, max_att_match_dim) = multi_perspective_match( feature_dim, passage, max_question_reps, is_training=is_training, dropout_rate=dropout_rate, options=options, scope_name='max_att_match') match_reps.append(max_att_match_rep) match_dim += max_att_match_dim if with_attentive_matching: att_rep = tf.matmul(atten_scores, question) (attentive_match_rep, attentive_match_dim) = multi_perspective_match( feature_dim, passage, att_rep, is_training=is_training, dropout_rate=dropout_rate, options=options, scope_name='att_match') match_reps.append(attentive_match_rep) match_dim += attentive_match_dim match_reps = tf.concat(axis=2, values=match_reps) return (match_reps, match_dim)