def attention_matching_layer(seq1, seq1_length, seq2, seq2_length, attn_type='diagonal_bilinear', scaled=True, with_sentinel=False): """Encodes seq1 conditioned on seq2, e.g., using word-by-word attention.""" if attn_type == 'bilinear': _, _, attn_states = attention.bilinear_attention( seq1, seq2, seq2_length, scaled, with_sentinel) elif attn_type == 'dot': _, _, attn_states = attention.dot_attention(seq1, seq2, seq2_length, scaled, with_sentinel) elif attn_type == 'diagonal_bilinear': _, _, attn_states = attention.diagonal_bilinear_attention( seq1, seq2, seq2_length, scaled, with_sentinel) elif attn_type == 'mlp': _, _, attn_states = attention.mlp_attention(seq1.get_shape()[-1].value, tf.nn.relu, seq1, seq2, seq2_length, with_sentinel) else: raise ValueError("Unknown attention type: %s" % attn_type) return attn_states
def self_attention(inputs, lengths, attn_type='bilinear', scaled=True, repr_dim=None, activation=None, with_sentinel=False, name='self_attention', reuse=False): with tf.variable_scope(name, reuse): if attn_type == 'bilinear': attn_states = attention.bilinear_attention(inputs, inputs, lengths, scaled, with_sentinel)[2] elif attn_type == 'dot': attn_states = attention.dot_attention(inputs, inputs, lengths, scaled, with_sentinel)[2] elif attn_type == 'diagonal_bilinear': attn_states = attention.diagonal_bilinear_attention( inputs, inputs, lengths, scaled, with_sentinel)[2] elif attn_type == 'mlp': attn_states = attention.mlp_attention(repr_dim, activation, inputs, inputs, lengths, with_sentinel)[2] else: raise ValueError("Unknown attention type: %s" % attn_type) return attn_states
def san_answer_layer(size, encoded_question, question_length, encoded_support, support_length, support2question, answer2support, is_eval, topk=1, max_span_size=10000, num_steps=5, dropout=0.4, **kwargs): question_state = compute_question_state(encoded_question, question_length) question_state = tf.layers.dense(question_state, encoded_support.get_shape()[-1].value, tf.tanh) question_state = tf.gather(question_state, support2question) cell = tf.contrib.rnn.GRUBlockCell(size) all_start_scores = [] all_end_scores = [] support_mask = misc.mask_for_lengths(support_length) for i in range(num_steps): with tf.variable_scope('SAN', reuse=i > 0): question_state = tf.expand_dims(question_state, 1) support_attn = attention.bilinear_attention( question_state, encoded_support, support_length, False, False)[2] question_state = tf.squeeze(question_state, 1) support_attn = tf.squeeze(support_attn, 1) question_state = cell(support_attn, question_state)[0] hidden_start = tf.layers.dense(question_state, size, name="hidden_start") start_scores = tf.einsum('ik,ijk->ij', hidden_start, encoded_support) start_scores = start_scores + support_mask start_probs = segment_softmax(start_scores, support2question) start_states = tf.einsum('ij,ijk->ik', start_probs, encoded_support) start_states = tf.unsorted_segment_sum( start_states, support2question, tf.shape(question_length)[0]) start_states = tf.gather(start_states, support2question) hidden_end = tf.layers.dense(tf.concat( [question_state, start_states], 1), size, name="hidden_end") end_scores = tf.einsum('ik,ijk->ij', hidden_end, encoded_support) end_scores = end_scores + support_mask all_start_scores.append(start_scores) all_end_scores.append(end_scores) all_start_scores = tf.stack(all_start_scores) all_end_scores = tf.stack(all_end_scores) dropout_mask = tf.nn.dropout(tf.ones([num_steps, 1, 1]), 1.0 - dropout) all_start_scores = tf.cond(is_eval, lambda: all_start_scores * dropout_mask, lambda: all_start_scores) all_end_scores = tf.cond(is_eval, lambda: all_end_scores * dropout_mask, lambda: all_end_scores) start_scores = tf.reduce_mean(all_start_scores, axis=0) end_scores = tf.reduce_mean(all_end_scores, axis=0) return compute_spans(start_scores, end_scores, answer2support, is_eval, support2question, topk=topk, max_span_size=max_span_size)