Ejemplo n.º 1
0
    def forward_pass(self, shared_resources, embedded_question,
                     embedded_support, num_classes, tensors):
        # question - hypothesis; support - premise
        repr_dim = shared_resources.config['repr_dim']
        dropout = shared_resources.config.get("dropout", 0.0)

        with tf.variable_scope('embedding_projection') as vs:
            embedded_question = tf.layers.dense(embedded_question,
                                                repr_dim,
                                                tf.tanh,
                                                name='projection')
            vs.reuse_variables()
            embedded_support = tf.layers.dense(embedded_support,
                                               repr_dim,
                                               tf.tanh,
                                               name='projection')
            # keep dropout mask constant over time
            dropout_shape = [
                tf.shape(embedded_question)[0], 1,
                tf.shape(embedded_question)[2]
            ]
            embedded_question = tf.nn.dropout(embedded_question, 1.0 - dropout,
                                              dropout_shape)
            embedded_support = tf.nn.dropout(embedded_support, 1.0 - dropout,
                                             dropout_shape)

        fused_rnn = tf.contrib.rnn.LSTMBlockFusedCell(repr_dim)
        # [batch, 2*output_dim] -> [batch, num_classes]
        _, q_states = fused_birnn(fused_rnn,
                                  embedded_question,
                                  sequence_length=tensors.question_length,
                                  dtype=tf.float32,
                                  time_major=False,
                                  scope="question_rnn")

        outputs, _ = fused_birnn(fused_rnn,
                                 embedded_support,
                                 sequence_length=tensors.support_length,
                                 dtype=tf.float32,
                                 initial_state=q_states,
                                 time_major=False,
                                 scope="support_rnn")

        # [batch, T, 2 * dim] -> [batch, dim]
        outputs = tf.concat([outputs[0], outputs[1]], axis=2)
        hidden = tf.layers.dense(
            outputs, repr_dim, tf.nn.relu, name="hidden") * tf.expand_dims(
                tf.sequence_mask(tensors.support_length,
                                 maxlen=tf.shape(outputs)[1],
                                 dtype=tf.float32), 2)
        hidden = tf.reduce_max(hidden, axis=1)
        # [batch, dim] -> [batch, num_classes]
        outputs = tf.layers.dense(hidden, num_classes, name="classification")
        return outputs
Ejemplo n.º 2
0
def nli_model(size, num_classes, emb_question, question_length, emb_support, support_length):
    fused_rnn = tf.contrib.rnn.LSTMBlockFusedCell(size)
    # [batch, 2*output_dim] -> [batch, num_classes]
    _, q_states = fused_birnn(fused_rnn, emb_question, sequence_length=question_length,
                              dtype=tf.float32, time_major=False, scope="question_rnn")

    outputs, _ = fused_birnn(fused_rnn, emb_support, sequence_length=support_length,
                             dtype=tf.float32, initial_state=q_states, time_major=False, scope="support_rnn")

    # [batch, T, 2 * dim] -> [batch, dim]
    outputs = tf.concat([outputs[0], outputs[1]], axis=2)
    hidden = tf.layers.dense(outputs, size, tf.nn.relu, name="hidden") * tf.expand_dims(
        misc.mask_for_lengths(support_length, max_length=tf.shape(outputs)[1], mask_right=False, value=1.0), 2)
    hidden = tf.reduce_max(hidden, axis=1)
    # [batch, dim] -> [batch, num_classes]
    outputs = tf.layers.dense(hidden, num_classes, name="classification")
    return outputs
Ejemplo n.º 3
0
def _bi_rnn(size, fused_rnn, sequence, seq_length, with_projection=False):
    output = rnn.fused_birnn(fused_rnn,
                             sequence,
                             seq_length,
                             dtype=tf.float32,
                             scope='rnn')[0]
    output = tf.concat(output, 2)
    if with_projection:
        projection_initializer = tf.constant_initializer(
            np.concatenate([np.eye(size), np.eye(size)]))
        output = tf.layers.dense(output,
                                 size,
                                 kernel_initializer=projection_initializer,
                                 name='projection')
    return output