def two_layer_lstm(prefix, x, layer_size, hidden_size, axis, is_forward): tparams = collections.OrderedDict() tparams['%s_Wx1' % prefix] = theano.shared(0.01*np.random.randn(hidden_size*4, layer_size).astype(np.float32)) tparams['%s_Wh1' % prefix] = theano.shared(0.01*np.random.randn(hidden_size*4, hidden_size).astype(np.float32)) tparams['%s_b1' % prefix] = theano.shared(np.zeros(hidden_size*4).astype(np.float32)) tparams['%s_Wx2' % prefix] = theano.shared(0.01*np.random.randn(hidden_size*4, hidden_size).astype(np.float32)) tparams['%s_Wh2' % prefix] = theano.shared(0.01*np.random.randn(hidden_size*4, hidden_size).astype(np.float32)) tparams['%s_b2' % prefix] = theano.shared(np.zeros(hidden_size*4).astype(np.float32)) tparams['%s_Wx3' % prefix] = theano.shared(0.01*np.random.randn(layer_size, hidden_size).astype(np.float32)) tparams['%s_b3' % prefix] = theano.shared(np.zeros(layer_size).astype(np.float32)) l = x l = layers.recurrent_layer( l, tparams['%s_Wx1' % prefix], tparams['%s_Wh1' % prefix], tparams['%s_b1' % prefix], axis=axis, is_forward=is_forward, ) l = layers.recurrent_layer( l, tparams['%s_Wx2' % prefix], tparams['%s_Wh2' % prefix], tparams['%s_b2' % prefix], axis=axis, is_forward=is_forward, ) l = layers.linear_layer( l, tparams['%s_Wx3' % prefix], tparams['%s_b3' % prefix], ) return l, tparams
def generator(source, target, sequence_length, vocab_size, decoder_fn=None, **opts): """ Args: source: TensorFlow queue or placeholder tensor for word ids for source target: TensorFlow queue or placeholder tensor for word ids for target sequence_length: TensorFlow queue or placeholder tensor for number of word ids for each sentence vocab_size: max vocab size determined from data decoder_fn: if using custom decoder_fn else use the default dynamic_rnn """ tf.logging.info(" Setting up generator") embedding_layer = lay.embedding_layer(vocab_size, opts["embedding_dim"], name="embedding_matrix") # TODO: add batch norm? rnn_outputs = (source >> embedding_layer >> lay.word_dropout_layer( keep_prob=opts["word_dropout_keep_prob"]) >> lay.recurrent_layer( hidden_dims=opts["rnn_hidden_dim"], keep_prob=opts["recurrent_dropout_keep_prob"], sequence_length=sequence_length, decoder_fn=decoder_fn, name="rnn_cell")) output_projection_layer = lay.dense_layer(hidden_dims=vocab_size, name="output_projections") flat_logits = (rnn_outputs >> lay.reshape_layer( shape=(-1, opts["rnn_hidden_dim"])) >> output_projection_layer) probs = flat_logits >> lay.softmax_layer() embedding_matrix = embedding_layer.get_variables_in_scope() output_projections = output_projection_layer.get_variables_in_scope() if decoder_fn is not None: return GeneratorTuple(rnn_outputs=rnn_outputs, flat_logits=flat_logits, probs=probs, loss=None, embedding_matrix=embedding_matrix[0], output_projections=output_projections) loss = (flat_logits >> lay.cross_entropy_layer(target=target) >> lay.reshape_layer(shape=tf.shape(target)) >> lay.mean_loss_by_example_layer(sequence_length=sequence_length)) # TODO: add dropout penalty return GeneratorTuple(rnn_outputs=rnn_outputs, flat_logits=flat_logits, probs=probs, loss=loss, embedding_matrix=embedding_matrix[0], output_projections=output_projections)
def two_layer_lstm(prefix, x, layer_size, hidden_size, axis, is_forward): tparams = collections.OrderedDict() tparams['%s_Wx1' % prefix] = theano.shared( 0.01 * np.random.randn(hidden_size * 4, layer_size).astype(np.float32)) tparams['%s_Wh1' % prefix] = theano.shared( 0.01 * np.random.randn(hidden_size * 4, hidden_size).astype(np.float32)) tparams['%s_b1' % prefix] = theano.shared( np.zeros(hidden_size * 4).astype(np.float32)) tparams['%s_Wx2' % prefix] = theano.shared( 0.01 * np.random.randn(hidden_size * 4, hidden_size).astype(np.float32)) tparams['%s_Wh2' % prefix] = theano.shared( 0.01 * np.random.randn(hidden_size * 4, hidden_size).astype(np.float32)) tparams['%s_b2' % prefix] = theano.shared( np.zeros(hidden_size * 4).astype(np.float32)) tparams['%s_Wx3' % prefix] = theano.shared( 0.01 * np.random.randn(layer_size, hidden_size).astype(np.float32)) tparams['%s_b3' % prefix] = theano.shared( np.zeros(layer_size).astype(np.float32)) l = x l = layers.recurrent_layer( l, tparams['%s_Wx1' % prefix], tparams['%s_Wh1' % prefix], tparams['%s_b1' % prefix], axis=axis, is_forward=is_forward, ) l = layers.recurrent_layer( l, tparams['%s_Wx2' % prefix], tparams['%s_Wh2' % prefix], tparams['%s_b2' % prefix], axis=axis, is_forward=is_forward, ) l = layers.linear_layer( l, tparams['%s_Wx3' % prefix], tparams['%s_b3' % prefix], ) return l, tparams
def generator(source, target, sequence_length, vocab_size, decoder_fn=None, **opts): """ Args: source: TensorFlow queue or placeholder tensor for word ids for source target: TensorFlow queue or placeholder tensor for word ids for target sequence_length: TensorFlow queue or placeholder tensor for number of word ids for each sentence vocab_size: max vocab size determined from data decoder_fn: if using custom decoder_fn else use the default dynamic_rnn """ tf.logging.info(" Setting up generator") embedding_layer = lay.embedding_layer(vocab_size, opts["embedding_dim"], name="embedding_matrix") # TODO: add batch norm? rnn_outputs = ( source >> embedding_layer >> lay.word_dropout_layer(keep_prob=opts["word_dropout_keep_prob"]) >> lay.recurrent_layer(hidden_dims=opts["rnn_hidden_dim"], keep_prob=opts["recurrent_dropout_keep_prob"], sequence_length=sequence_length, decoder_fn=decoder_fn, name="rnn_cell") ) output_projection_layer = lay.dense_layer(hidden_dims=vocab_size, name="output_projections") flat_logits = ( rnn_outputs >> lay.reshape_layer(shape=(-1, opts["rnn_hidden_dim"])) >> output_projection_layer ) probs = flat_logits >> lay.softmax_layer() embedding_matrix = embedding_layer.get_variables_in_scope() output_projections = output_projection_layer.get_variables_in_scope() if decoder_fn is not None: return GeneratorTuple(rnn_outputs=rnn_outputs, flat_logits=flat_logits, probs=probs, loss=None, embedding_matrix=embedding_matrix[0], output_projections=output_projections) loss = ( flat_logits >> lay.cross_entropy_layer(target=target) >> lay.reshape_layer(shape=tf.shape(target)) >> lay.mean_loss_by_example_layer(sequence_length=sequence_length) ) # TODO: add dropout penalty return GeneratorTuple(rnn_outputs=rnn_outputs, flat_logits=flat_logits, probs=probs, loss=loss, embedding_matrix=embedding_matrix[0], output_projections=output_projections)
def discriminator(input_vectors, sequence_length, is_real=True, **opts): """ Args: input_vectors: output of the RNN either from real or generated data sequence_length: TensorFlow queue or placeholder tensor for number of word ids for each sentence is_real: if True, RNN outputs when feeding in actual data, if False feeds in generated data """ tf.logging.info(" Setting up discriminator") rnn_final_state = ( input_vectors >> lay.dense_layer(hidden_dims=opts["embedding_dim"]) >> lay.recurrent_layer(sequence_length=sequence_length, hidden_dims=opts["rnn_hidden_dim"], return_final_state=True) ) prediction_logits = ( rnn_final_state >> lay.dense_layer(hidden_dims=opts["output_hidden_dim"]) >> lay.relu_layer() >> lay.dropout_layer(opts["output_dropout_keep_prob"]) >> lay.dense_layer(hidden_dims=opts["output_hidden_dim"]) >> lay.relu_layer() >> lay.dropout_layer(opts["output_dropout_keep_prob"]) >> lay.dense_layer(hidden_dims=1) ) if is_real: target = tf.ones_like(prediction_logits) else: target = tf.zeros_like(prediction_logits) # TODO: add accuracy loss = ( prediction_logits >> lay.sigmoid_cross_entropy_layer(target=target) ) # TODO: return logits in case for WGAN and l2 GANs return DiscriminatorTuple(rnn_final_state=rnn_final_state, prediction_logits=prediction_logits, loss=loss)