예제 #1
0
def sequence_labeling_loss(logits, labels, sequence_length=None):
    assert ex.static_rank(logits) == 3
    assert ex.static_rank(labels) == 2

    losses = tf.reshape(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            tf.reshape(logits, [-1, ex.static_shape(logits)[-1]]),
            tf.reshape(labels, [-1])), [-1, *ex.static_shape(labels)[1:]])

    if sequence_length == None:
        return tf.reduce_mean(losses)

    mask = tf.sequence_mask(sequence_length, dtype=losses.dtype)

    return tf.reduce_sum(losses * mask) / tf.reduce_sum(mask)
예제 #2
0
def batch_linear(h, output_size):
    assert ex.static_rank(h) == 3

    shape = ex.static_shape(h)
    return (tf.batch_matmul(
        h,
        tf.tile(tf.expand_dims(ex.variable([shape[2], output_size]), 0),
                [shape[0], 1, 1])) + ex.variable([output_size]))
예제 #3
0
    def ar_lm(key, sentence, labels, *, char_embeddings):
        cell = tf.contrib.rnn.LayerNormBasicLSTMCell(qnd.FLAGS.cell_size)

        batch = tf.contrib.training.batch_sequences_with_states(
            key,
            input_sequences={
                'sentence': tf.gather(char_embeddings, sentence),
                'labels': labels,
            },
            input_context={},
            input_length=None,
            initial_states={
                'c': tf.zeros([cell.state_size.c], tf.float32),
                'h': tf.zeros([cell.state_size.h], tf.float32),
            },
            num_unroll=qnd.FLAGS.num_unroll,
            batch_size=qnd.FLAGS.batch_size,
            num_threads=qnd.FLAGS.num_batch_threads,
            capacity=qnd.FLAGS.batch_queue_capacity)

        outputs, _ = tf.nn.state_saving_rnn(cell,
                                            tf.unstack(
                                                batch.sequences['sentence'],
                                                axis=1),
                                            sequence_length=batch.length,
                                            state_saver=batch,
                                            state_name=('c', 'h'))

        logits = batch_linear(outputs, ex.static_shape(char_embeddings)[0])
        labels = batch.sequences['labels']

        loss = sequence_labeling_loss(logits, labels, batch.length)

        return (
            {
                'key':
                key,
                'labels': (tf.argmax(logits, axis=2) *
                           tf.sequence_mask(batch.length, dtype=tf.int64)),
            },
            loss,
            ex.minimize(loss),
        )
def _restore_sentence_shape(words, sentences):
    return tf.reshape(
        words,
        [-1, ex.static_shape(sentences)[1]] + ex.static_shape(words)[1:])
def _restore_document_shape(sentences, document):
    return tf.reshape(
        sentences,
        [-1, ex.static_shape(document)[1]] + ex.static_shape(sentences)[1:])
def _flatten_document_into_sentences(document):
    return tf.reshape(document, [-1] + ex.static_shape(document)[2:])
예제 #7
0
def _restore_sentence_shape(words, sentences):
    return tf.reshape(words, [-1, ex.static_shape(sentences)[1]] +
                      ex.static_shape(words)[1:])
예제 #8
0
def _restore_document_shape(sentences, document):
    return tf.reshape(sentences, [-1, ex.static_shape(document)[1]] +
                      ex.static_shape(sentences)[1:])
예제 #9
0
def _flatten_document_into_sentences(document):
    return tf.reshape(document, [-1] + ex.static_shape(document)[2:])