Ejemplo n.º 1
0
def attention_matching_layer(seq1,
                             seq1_length,
                             seq2,
                             seq2_length,
                             attn_type='diagonal_bilinear',
                             scaled=True,
                             with_sentinel=False):
    """Encodes seq1 conditioned on seq2, e.g., using word-by-word attention."""
    if attn_type == 'bilinear':
        _, _, attn_states = attention.bilinear_attention(
            seq1, seq2, seq2_length, scaled, with_sentinel)
    elif attn_type == 'dot':
        _, _, attn_states = attention.dot_attention(seq1, seq2, seq2_length,
                                                    scaled, with_sentinel)
    elif attn_type == 'diagonal_bilinear':
        _, _, attn_states = attention.diagonal_bilinear_attention(
            seq1, seq2, seq2_length, scaled, with_sentinel)
    elif attn_type == 'mlp':
        _, _, attn_states = attention.mlp_attention(seq1.get_shape()[-1].value,
                                                    tf.nn.relu, seq1, seq2,
                                                    seq2_length, with_sentinel)
    else:
        raise ValueError("Unknown attention type: %s" % attn_type)

    return attn_states
Ejemplo n.º 2
0
def bidaf_layer(seq1,
                seq1_length,
                seq2,
                seq2_length,
                seq1_to_seq2=None,
                seq2_to_seq1=None):
    """Encodes seq1 conditioned on seq2, e.g., using word-by-word attention."""
    attn_scores, attn_probs, seq2_weighted = attention.diagonal_bilinear_attention(
        seq1, seq2, seq2_length, False, seq2_to_seq1=seq2_to_seq1)

    attn_scores += tf.expand_dims(
        mask_for_lengths(seq1_length,
                         tf.shape(attn_scores)[1]), 2)

    max_seq1 = tf.reduce_max(attn_scores, 2)
    if seq1_to_seq2 is None:
        seq1_attention = tf.nn.softmax(max_seq1, 1)
    else:
        segm_max_seq1 = tf.unsorted_segment_max(
            max_seq1, seq1_to_seq2,
            tf.reduce_max(seq1_to_seq2) + 1)
        seq1_attention = tf.nn.softmax(segm_max_seq1, 1)
        seq1_attention = tf.gather(seq1_attention, seq1_to_seq2)
        seq1_attention.set_shape(max_seq1.get_shape())
    seq1_weighted = tf.einsum('ij,ijk->ik', seq1_attention, seq1)
    seq1_weighted = tf.expand_dims(seq1_weighted, 1)
    seq1_weighted = tf.tile(seq1_weighted, [1, tf.shape(seq1)[1], 1])

    return tf.concat(
        [seq2_weighted, seq1 * seq2_weighted, seq1 * seq1_weighted], 2)
Ejemplo n.º 3
0
def self_attention(inputs,
                   lengths,
                   attn_type='bilinear',
                   scaled=True,
                   repr_dim=None,
                   activation=None,
                   with_sentinel=False,
                   name='self_attention',
                   reuse=False):
    with tf.variable_scope(name, reuse):
        if attn_type == 'bilinear':
            attn_states = attention.bilinear_attention(inputs, inputs, lengths,
                                                       scaled,
                                                       with_sentinel)[2]
        elif attn_type == 'dot':
            attn_states = attention.dot_attention(inputs, inputs, lengths,
                                                  scaled, with_sentinel)[2]
        elif attn_type == 'diagonal_bilinear':
            attn_states = attention.diagonal_bilinear_attention(
                inputs, inputs, lengths, scaled, with_sentinel)[2]
        elif attn_type == 'mlp':
            attn_states = attention.mlp_attention(repr_dim, activation, inputs,
                                                  inputs, lengths,
                                                  with_sentinel)[2]
        else:
            raise ValueError("Unknown attention type: %s" % attn_type)

    return attn_states
Ejemplo n.º 4
0
def bidaf_layer(seq1, seq1_length, seq2, seq2_length):
    """Encodes seq1 conditioned on seq2, e.g., using word-by-word attention."""
    attn_scores, attn_probs, seq2_weighted = attention.diagonal_bilinear_attention(
        seq1, seq2, seq2_length, False)

    attn_scores += tf.expand_dims(
        mask_for_lengths(seq1_length,
                         tf.shape(attn_scores)[1]), 2)

    max_seq1 = tf.reduce_max(attn_scores, 2)
    seq1_attention = tf.nn.softmax(max_seq1, 1)
    seq1_weighted = tf.einsum('ij,ijk->ik', seq1_attention, seq1)
    seq1_weighted = tf.expand_dims(seq1_weighted, 1)
    seq1_weighted = tf.tile(seq1_weighted, [1, tf.shape(seq1)[1], 1])

    return tf.concat(
        [seq2_weighted, seq1 * seq2_weighted, seq1 * seq1_weighted], 2)