Exemple #1
0
def two_layer_lstm(prefix, x, layer_size, hidden_size, axis, is_forward):
    tparams = collections.OrderedDict()
    tparams['%s_Wx1' % prefix] = theano.shared(0.01*np.random.randn(hidden_size*4, layer_size).astype(np.float32))
    tparams['%s_Wh1' % prefix] = theano.shared(0.01*np.random.randn(hidden_size*4, hidden_size).astype(np.float32))
    tparams['%s_b1' % prefix] = theano.shared(np.zeros(hidden_size*4).astype(np.float32))
    tparams['%s_Wx2' % prefix] = theano.shared(0.01*np.random.randn(hidden_size*4, hidden_size).astype(np.float32))
    tparams['%s_Wh2' % prefix] = theano.shared(0.01*np.random.randn(hidden_size*4, hidden_size).astype(np.float32))
    tparams['%s_b2' % prefix] = theano.shared(np.zeros(hidden_size*4).astype(np.float32))
    tparams['%s_Wx3' % prefix] = theano.shared(0.01*np.random.randn(layer_size, hidden_size).astype(np.float32))
    tparams['%s_b3' % prefix] = theano.shared(np.zeros(layer_size).astype(np.float32))

    l = x
    l = layers.recurrent_layer(
        l,
        tparams['%s_Wx1' % prefix],
        tparams['%s_Wh1' % prefix],
        tparams['%s_b1' % prefix],
        axis=axis,
        is_forward=is_forward,
    )
    l = layers.recurrent_layer(
        l,
        tparams['%s_Wx2' % prefix],
        tparams['%s_Wh2' % prefix],
        tparams['%s_b2' % prefix],
        axis=axis,
        is_forward=is_forward,
    )
    l = layers.linear_layer(
        l,
        tparams['%s_Wx3' % prefix],
        tparams['%s_b3' % prefix],
    )
    return l, tparams
Exemple #2
0
def generator(source,
              target,
              sequence_length,
              vocab_size,
              decoder_fn=None,
              **opts):
    """
    Args:
        source: TensorFlow queue or placeholder tensor for word ids for source 
        target: TensorFlow queue or placeholder tensor for word ids for target
        sequence_length: TensorFlow queue or placeholder tensor for number of word ids for each sentence
        vocab_size: max vocab size determined from data
        decoder_fn: if using custom decoder_fn else use the default dynamic_rnn
    """
    tf.logging.info(" Setting up generator")

    embedding_layer = lay.embedding_layer(vocab_size,
                                          opts["embedding_dim"],
                                          name="embedding_matrix")

    # TODO: add batch norm?
    rnn_outputs = (source >> embedding_layer >> lay.word_dropout_layer(
        keep_prob=opts["word_dropout_keep_prob"]) >> lay.recurrent_layer(
            hidden_dims=opts["rnn_hidden_dim"],
            keep_prob=opts["recurrent_dropout_keep_prob"],
            sequence_length=sequence_length,
            decoder_fn=decoder_fn,
            name="rnn_cell"))

    output_projection_layer = lay.dense_layer(hidden_dims=vocab_size,
                                              name="output_projections")

    flat_logits = (rnn_outputs >> lay.reshape_layer(
        shape=(-1, opts["rnn_hidden_dim"])) >> output_projection_layer)

    probs = flat_logits >> lay.softmax_layer()

    embedding_matrix = embedding_layer.get_variables_in_scope()
    output_projections = output_projection_layer.get_variables_in_scope()

    if decoder_fn is not None:
        return GeneratorTuple(rnn_outputs=rnn_outputs,
                              flat_logits=flat_logits,
                              probs=probs,
                              loss=None,
                              embedding_matrix=embedding_matrix[0],
                              output_projections=output_projections)

    loss = (flat_logits >> lay.cross_entropy_layer(target=target) >>
            lay.reshape_layer(shape=tf.shape(target)) >>
            lay.mean_loss_by_example_layer(sequence_length=sequence_length))

    # TODO: add dropout penalty
    return GeneratorTuple(rnn_outputs=rnn_outputs,
                          flat_logits=flat_logits,
                          probs=probs,
                          loss=loss,
                          embedding_matrix=embedding_matrix[0],
                          output_projections=output_projections)
def two_layer_lstm(prefix, x, layer_size, hidden_size, axis, is_forward):
    tparams = collections.OrderedDict()
    tparams['%s_Wx1' % prefix] = theano.shared(
        0.01 * np.random.randn(hidden_size * 4, layer_size).astype(np.float32))
    tparams['%s_Wh1' % prefix] = theano.shared(
        0.01 *
        np.random.randn(hidden_size * 4, hidden_size).astype(np.float32))
    tparams['%s_b1' % prefix] = theano.shared(
        np.zeros(hidden_size * 4).astype(np.float32))
    tparams['%s_Wx2' % prefix] = theano.shared(
        0.01 *
        np.random.randn(hidden_size * 4, hidden_size).astype(np.float32))
    tparams['%s_Wh2' % prefix] = theano.shared(
        0.01 *
        np.random.randn(hidden_size * 4, hidden_size).astype(np.float32))
    tparams['%s_b2' % prefix] = theano.shared(
        np.zeros(hidden_size * 4).astype(np.float32))
    tparams['%s_Wx3' % prefix] = theano.shared(
        0.01 * np.random.randn(layer_size, hidden_size).astype(np.float32))
    tparams['%s_b3' % prefix] = theano.shared(
        np.zeros(layer_size).astype(np.float32))

    l = x
    l = layers.recurrent_layer(
        l,
        tparams['%s_Wx1' % prefix],
        tparams['%s_Wh1' % prefix],
        tparams['%s_b1' % prefix],
        axis=axis,
        is_forward=is_forward,
    )
    l = layers.recurrent_layer(
        l,
        tparams['%s_Wx2' % prefix],
        tparams['%s_Wh2' % prefix],
        tparams['%s_b2' % prefix],
        axis=axis,
        is_forward=is_forward,
    )
    l = layers.linear_layer(
        l,
        tparams['%s_Wx3' % prefix],
        tparams['%s_b3' % prefix],
    )
    return l, tparams
Exemple #4
0
def generator(source, target, sequence_length, vocab_size, decoder_fn=None, **opts):
    """
    Args:
        source: TensorFlow queue or placeholder tensor for word ids for source 
        target: TensorFlow queue or placeholder tensor for word ids for target
        sequence_length: TensorFlow queue or placeholder tensor for number of word ids for each sentence
        vocab_size: max vocab size determined from data
        decoder_fn: if using custom decoder_fn else use the default dynamic_rnn
    """
    tf.logging.info(" Setting up generator")

    embedding_layer = lay.embedding_layer(vocab_size, opts["embedding_dim"], name="embedding_matrix")

    # TODO: add batch norm?
    rnn_outputs = (
        source >>
        embedding_layer >>
        lay.word_dropout_layer(keep_prob=opts["word_dropout_keep_prob"]) >>
        lay.recurrent_layer(hidden_dims=opts["rnn_hidden_dim"], keep_prob=opts["recurrent_dropout_keep_prob"],
                            sequence_length=sequence_length, decoder_fn=decoder_fn, name="rnn_cell")
    )

    output_projection_layer = lay.dense_layer(hidden_dims=vocab_size, name="output_projections")

    flat_logits = (
        rnn_outputs >>
        lay.reshape_layer(shape=(-1, opts["rnn_hidden_dim"])) >>
        output_projection_layer
    )

    probs = flat_logits >> lay.softmax_layer()

    embedding_matrix = embedding_layer.get_variables_in_scope()
    output_projections = output_projection_layer.get_variables_in_scope()

    if decoder_fn is not None:
        return GeneratorTuple(rnn_outputs=rnn_outputs, flat_logits=flat_logits, probs=probs, loss=None,
                              embedding_matrix=embedding_matrix[0], output_projections=output_projections)

    loss = (
        flat_logits >>
        lay.cross_entropy_layer(target=target) >>
        lay.reshape_layer(shape=tf.shape(target)) >>
        lay.mean_loss_by_example_layer(sequence_length=sequence_length)
    )

    # TODO: add dropout penalty
    return GeneratorTuple(rnn_outputs=rnn_outputs, flat_logits=flat_logits, probs=probs, loss=loss,
                          embedding_matrix=embedding_matrix[0], output_projections=output_projections)
Exemple #5
0
def discriminator(input_vectors, sequence_length, is_real=True, **opts):
    """
    Args:
        input_vectors: output of the RNN either from real or generated data
        sequence_length: TensorFlow queue or placeholder tensor for number of word ids for each sentence
        is_real: if True, RNN outputs when feeding in actual data, if False feeds in generated data
    """
    tf.logging.info(" Setting up discriminator")

    rnn_final_state = (
        input_vectors >>
        lay.dense_layer(hidden_dims=opts["embedding_dim"]) >>
        lay.recurrent_layer(sequence_length=sequence_length, hidden_dims=opts["rnn_hidden_dim"],
                            return_final_state=True)
    )

    prediction_logits = (
        rnn_final_state >>
        lay.dense_layer(hidden_dims=opts["output_hidden_dim"]) >>
        lay.relu_layer() >>
        lay.dropout_layer(opts["output_dropout_keep_prob"]) >>
        lay.dense_layer(hidden_dims=opts["output_hidden_dim"]) >>
        lay.relu_layer() >>
        lay.dropout_layer(opts["output_dropout_keep_prob"]) >>
        lay.dense_layer(hidden_dims=1)
    )

    if is_real:
        target = tf.ones_like(prediction_logits)
    else:
        target = tf.zeros_like(prediction_logits)

    # TODO: add accuracy
    loss = (
        prediction_logits >>
        lay.sigmoid_cross_entropy_layer(target=target)
    )

    # TODO: return logits in case for WGAN and l2 GANs
    return DiscriminatorTuple(rnn_final_state=rnn_final_state, prediction_logits=prediction_logits, loss=loss)
def discriminator(input_vectors, sequence_length, is_real=True, **opts):
    """
    Args:
        input_vectors: output of the RNN either from real or generated data
        sequence_length: TensorFlow queue or placeholder tensor for number of word ids for each sentence
        is_real: if True, RNN outputs when feeding in actual data, if False feeds in generated data
    """
    tf.logging.info(" Setting up discriminator")

    rnn_final_state = (
        input_vectors >>
        lay.dense_layer(hidden_dims=opts["embedding_dim"]) >>
        lay.recurrent_layer(sequence_length=sequence_length, hidden_dims=opts["rnn_hidden_dim"],
                            return_final_state=True)
    )

    prediction_logits = (
        rnn_final_state >>
        lay.dense_layer(hidden_dims=opts["output_hidden_dim"]) >>
        lay.relu_layer() >>
        lay.dropout_layer(opts["output_dropout_keep_prob"]) >>
        lay.dense_layer(hidden_dims=opts["output_hidden_dim"]) >>
        lay.relu_layer() >>
        lay.dropout_layer(opts["output_dropout_keep_prob"]) >>
        lay.dense_layer(hidden_dims=1)
    )

    if is_real:
        target = tf.ones_like(prediction_logits)
    else:
        target = tf.zeros_like(prediction_logits)

    # TODO: add accuracy
    loss = (
        prediction_logits >>
        lay.sigmoid_cross_entropy_layer(target=target)
    )

    # TODO: return logits in case for WGAN and l2 GANs
    return DiscriminatorTuple(rnn_final_state=rnn_final_state, prediction_logits=prediction_logits, loss=loss)