Beispiel #1
0
def _model_m00(features, mode, params, scope=None):
  """LSTM-based model.

  This model uses two stacked LSTMs to output vectors for context, and
  self-attention to output vectors for question. This model reaches 57~58% F1.

  Args:
    features: A dict of feature tensors.
    mode: https://www.tensorflow.org/api_docs/python/tf/contrib/learn/ModeKeys
    params: `params` passed during initialization of `Estimator` object.
    scope: Variable scope, default is `feature_model`.
  Returns:
    `(logits_start, logits_end, tensors)` pair. `tensors` is a dictionary of
    tensors that can be useful outside of this function, e.g. visualization.
  """

  with tf.variable_scope(scope or 'feature_model'):
    training = mode == learn.ModeKeys.TRAIN

    x, q = embedding_layer(features, mode, params)

    x1 = tf_utils.bi_rnn(
        params.hidden_size,
        x,
        sequence_length_list=features['context_num_words'],
        scope='x_bi_rnn_1',
        training=training,
        dropout_rate=params.dropout_rate)

    x2 = tf_utils.bi_rnn(
        params.hidden_size,
        x1,
        sequence_length_list=features['context_num_words'],
        scope='x_bi_rnn_2',
        training=training,
        dropout_rate=params.dropout_rate)

    q1 = tf_utils.bi_rnn(
        params.hidden_size,
        q,
        sequence_length_list=features['question_num_words'],
        scope='q_bi_rnn_1',
        training=training,
        dropout_rate=params.dropout_rate)

    q2 = tf_utils.bi_rnn(
        params.hidden_size,
        q1,
        sequence_length_list=features['question_num_words'],
        scope='q_bi_rnn_2',
        training=training,
        dropout_rate=params.dropout_rate)

    # Self-attention to obtain single vector representation.
    q_start = tf_utils.self_att(
        q1, mask=features['question_num_words'], scope='q_start')
    q_end = tf_utils.self_att(
        q2, mask=features['question_num_words'], scope='q_end')

    logits_start, logits_end = _get_logits_from_multihead_x_and_q(
        x1, x2, q_start, q_end, features['context_num_words'], params.dist)
    return logits_start, logits_end, dict()
Beispiel #2
0
def _model_m03(features, mode, params, scope=None):
  """Independent self-attention with LSTM, reaching 60~61%.

  Args:
    features: A dict of feature tensors.
    mode: https://www.tensorflow.org/api_docs/python/tf/contrib/learn/ModeKeys
    params: `params` passed during initialization of `Estimator` object.
    scope: Variable scope, default is `feature_model`.
  Returns:
    `(logits_start, logits_end, tensors)` pair. `tensors` is a dictionary of
    tensors that can be useful outside of this function, e.g. visualization.
  """

  with tf.variable_scope(scope or 'feature_model'):
    training = mode == learn.ModeKeys.TRAIN
    tensors = {}

    x, q = embedding_layer(features, mode, params)

    def get_x_and_q(scope=None):
      with tf.variable_scope(scope or 'get_x_and_q'):
        x1 = tf_utils.bi_rnn(
            params.hidden_size,
            x,
            sequence_length_list=features['context_num_words'],
            scope='bi_rnn_x1',
            training=training,
            dropout_rate=params.dropout_rate)

        attendee = tf_utils.bi_rnn(
            params.hidden_size,
            x1,
            sequence_length_list=features['context_num_words'],
            scope='bi_rnn_attendee',
            training=training,
            dropout_rate=params.dropout_rate)
        attender = tf_utils.bi_rnn(
            params.hidden_size,
            x1,
            sequence_length_list=features['context_num_words'],
            scope='bi_rnn_attender',
            training=training,
            dropout_rate=params.dropout_rate)
        clue = tf_utils.att2d(
            attendee,
            attender,
            a_val=x1,
            mask=features['context_num_words'],
            logit_fn='dot',
            tensors=tensors)

        x_out = tf.concat([x1, clue], 2)

        q1 = tf_utils.bi_rnn(
            params.hidden_size,
            q,
            sequence_length_list=features['question_num_words'],
            scope='bi_rnn_q1',
            training=training,
            dropout_rate=params.dropout_rate)

        q1_type = tf_utils.self_att(
            q1,
            mask=features['question_num_words'],
            tensors=tensors,
            scope='self_att_q1_type')
        q1_clue = tf_utils.self_att(
            q1,
            mask=features['question_num_words'],
            tensors=tensors,
            scope='self_att_q1_clue')
        q_out = tf.concat([q1_type, q1_clue], 1)

        return x_out, q_out

    x_start, q_start = get_x_and_q('start')
    x_end, q_end = get_x_and_q('end')

    logits_start, logits_end = _get_logits_from_multihead_x_and_q(
        x_start, x_end, q_start, q_end, features['context_num_words'],
        params.dist)
    return logits_start, logits_end, tensors
Beispiel #3
0
def _model_m00(features, mode, params, scope=None):
    """Simplified BiDAF, reaching 74~75% F1.

  Args:
    features: A dict of feature tensors.
    mode: https://www.tensorflow.org/api_docs/python/tf/contrib/learn/ModeKeys
    params: `params` passed during initialization of `Estimator` object.
    scope: Variable name scope.
  Returns:
    `(logits_start, logits_end, tensors)` pair. Tensors is a dictionary of
    tensors that can be useful outside of this function, e.g. visualization.
  """
    with tf.variable_scope(scope or 'kernel_model'):
        training = mode == learn.ModeKeys.TRAIN
        tensors = {}

        x, q = embedding_layer(features, mode, params)

        x0 = tf_utils.bi_rnn(
            params.hidden_size,
            x,
            sequence_length_list=features['context_num_words'],
            scope='x_bi_rnn_0',
            training=training,
            dropout_rate=params.dropout_rate)

        q0 = tf_utils.bi_rnn(
            params.hidden_size,
            q,
            sequence_length_list=features['question_num_words'],
            scope='q_bi_rnn_0',
            training=training,
            dropout_rate=params.dropout_rate)

        xq = tf_utils.att2d(q0,
                            x0,
                            mask=features['question_num_words'],
                            tensors=tensors,
                            scope='xq')
        xq = tf.concat([x0, xq, x0 * xq], 2)
        x1 = tf_utils.bi_rnn(
            params.hidden_size,
            xq,
            sequence_length_list=features['context_num_words'],
            training=training,
            scope='x1_bi_rnn',
            dropout_rate=params.dropout_rate)
        x2 = tf_utils.bi_rnn(
            params.hidden_size,
            x1,
            sequence_length_list=features['context_num_words'],
            training=training,
            scope='x2_bi_rnn',
            dropout_rate=params.dropout_rate)
        x3 = tf_utils.bi_rnn(
            params.hidden_size,
            x2,
            sequence_length_list=features['context_num_words'],
            training=training,
            scope='x3_bi_rnn',
            dropout_rate=params.dropout_rate)

        logits_start = tf_utils.exp_mask(
            tf.squeeze(
                tf.layers.dense(tf.concat([x1, x2], 2), 1, name='logits1'), 2),
            features['context_num_words'])
        logits_end = tf_utils.exp_mask(
            tf.squeeze(
                tf.layers.dense(tf.concat([x1, x3], 2), 1, name='logits2'), 2),
            features['context_num_words'])

        return logits_start, logits_end, tensors