def _model_m00(features, mode, params, scope=None): """LSTM-based model. This model uses two stacked LSTMs to output vectors for context, and self-attention to output vectors for question. This model reaches 57~58% F1. Args: features: A dict of feature tensors. mode: https://www.tensorflow.org/api_docs/python/tf/contrib/learn/ModeKeys params: `params` passed during initialization of `Estimator` object. scope: Variable scope, default is `feature_model`. Returns: `(logits_start, logits_end, tensors)` pair. `tensors` is a dictionary of tensors that can be useful outside of this function, e.g. visualization. """ with tf.variable_scope(scope or 'feature_model'): training = mode == learn.ModeKeys.TRAIN x, q = embedding_layer(features, mode, params) x1 = tf_utils.bi_rnn( params.hidden_size, x, sequence_length_list=features['context_num_words'], scope='x_bi_rnn_1', training=training, dropout_rate=params.dropout_rate) x2 = tf_utils.bi_rnn( params.hidden_size, x1, sequence_length_list=features['context_num_words'], scope='x_bi_rnn_2', training=training, dropout_rate=params.dropout_rate) q1 = tf_utils.bi_rnn( params.hidden_size, q, sequence_length_list=features['question_num_words'], scope='q_bi_rnn_1', training=training, dropout_rate=params.dropout_rate) q2 = tf_utils.bi_rnn( params.hidden_size, q1, sequence_length_list=features['question_num_words'], scope='q_bi_rnn_2', training=training, dropout_rate=params.dropout_rate) # Self-attention to obtain single vector representation. q_start = tf_utils.self_att( q1, mask=features['question_num_words'], scope='q_start') q_end = tf_utils.self_att( q2, mask=features['question_num_words'], scope='q_end') logits_start, logits_end = _get_logits_from_multihead_x_and_q( x1, x2, q_start, q_end, features['context_num_words'], params.dist) return logits_start, logits_end, dict()
def _model_m03(features, mode, params, scope=None): """Independent self-attention with LSTM, reaching 60~61%. Args: features: A dict of feature tensors. mode: https://www.tensorflow.org/api_docs/python/tf/contrib/learn/ModeKeys params: `params` passed during initialization of `Estimator` object. scope: Variable scope, default is `feature_model`. Returns: `(logits_start, logits_end, tensors)` pair. `tensors` is a dictionary of tensors that can be useful outside of this function, e.g. visualization. """ with tf.variable_scope(scope or 'feature_model'): training = mode == learn.ModeKeys.TRAIN tensors = {} x, q = embedding_layer(features, mode, params) def get_x_and_q(scope=None): with tf.variable_scope(scope or 'get_x_and_q'): x1 = tf_utils.bi_rnn( params.hidden_size, x, sequence_length_list=features['context_num_words'], scope='bi_rnn_x1', training=training, dropout_rate=params.dropout_rate) attendee = tf_utils.bi_rnn( params.hidden_size, x1, sequence_length_list=features['context_num_words'], scope='bi_rnn_attendee', training=training, dropout_rate=params.dropout_rate) attender = tf_utils.bi_rnn( params.hidden_size, x1, sequence_length_list=features['context_num_words'], scope='bi_rnn_attender', training=training, dropout_rate=params.dropout_rate) clue = tf_utils.att2d( attendee, attender, a_val=x1, mask=features['context_num_words'], logit_fn='dot', tensors=tensors) x_out = tf.concat([x1, clue], 2) q1 = tf_utils.bi_rnn( params.hidden_size, q, sequence_length_list=features['question_num_words'], scope='bi_rnn_q1', training=training, dropout_rate=params.dropout_rate) q1_type = tf_utils.self_att( q1, mask=features['question_num_words'], tensors=tensors, scope='self_att_q1_type') q1_clue = tf_utils.self_att( q1, mask=features['question_num_words'], tensors=tensors, scope='self_att_q1_clue') q_out = tf.concat([q1_type, q1_clue], 1) return x_out, q_out x_start, q_start = get_x_and_q('start') x_end, q_end = get_x_and_q('end') logits_start, logits_end = _get_logits_from_multihead_x_and_q( x_start, x_end, q_start, q_end, features['context_num_words'], params.dist) return logits_start, logits_end, tensors
def _model_m00(features, mode, params, scope=None): """Simplified BiDAF, reaching 74~75% F1. Args: features: A dict of feature tensors. mode: https://www.tensorflow.org/api_docs/python/tf/contrib/learn/ModeKeys params: `params` passed during initialization of `Estimator` object. scope: Variable name scope. Returns: `(logits_start, logits_end, tensors)` pair. Tensors is a dictionary of tensors that can be useful outside of this function, e.g. visualization. """ with tf.variable_scope(scope or 'kernel_model'): training = mode == learn.ModeKeys.TRAIN tensors = {} x, q = embedding_layer(features, mode, params) x0 = tf_utils.bi_rnn( params.hidden_size, x, sequence_length_list=features['context_num_words'], scope='x_bi_rnn_0', training=training, dropout_rate=params.dropout_rate) q0 = tf_utils.bi_rnn( params.hidden_size, q, sequence_length_list=features['question_num_words'], scope='q_bi_rnn_0', training=training, dropout_rate=params.dropout_rate) xq = tf_utils.att2d(q0, x0, mask=features['question_num_words'], tensors=tensors, scope='xq') xq = tf.concat([x0, xq, x0 * xq], 2) x1 = tf_utils.bi_rnn( params.hidden_size, xq, sequence_length_list=features['context_num_words'], training=training, scope='x1_bi_rnn', dropout_rate=params.dropout_rate) x2 = tf_utils.bi_rnn( params.hidden_size, x1, sequence_length_list=features['context_num_words'], training=training, scope='x2_bi_rnn', dropout_rate=params.dropout_rate) x3 = tf_utils.bi_rnn( params.hidden_size, x2, sequence_length_list=features['context_num_words'], training=training, scope='x3_bi_rnn', dropout_rate=params.dropout_rate) logits_start = tf_utils.exp_mask( tf.squeeze( tf.layers.dense(tf.concat([x1, x2], 2), 1, name='logits1'), 2), features['context_num_words']) logits_end = tf_utils.exp_mask( tf.squeeze( tf.layers.dense(tf.concat([x1, x3], 2), 1, name='logits2'), 2), features['context_num_words']) return logits_start, logits_end, tensors