def testSharingWeightsWithDifferentNamescope(self): num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 with self.test_session(graph=tf.Graph()) as sess: initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed) inputs = 10 * [ tf.placeholder(tf.float32, shape=(None, input_size)) ] cell = rnn_cell.LSTMCell(num_units, input_size, use_peepholes=True, num_proj=num_proj, initializer=initializer) with tf.name_scope("scope0"): with tf.variable_scope("share_scope"): outputs0, _ = rnn.rnn(cell, inputs, dtype=tf.float32) with tf.name_scope("scope1"): with tf.variable_scope("share_scope", reuse=True): outputs1, _ = rnn.rnn(cell, inputs, dtype=tf.float32) tf.initialize_all_variables().run() input_value = np.random.randn(batch_size, input_size) output_values = sess.run(outputs0 + outputs1, feed_dict={inputs[0]: input_value}) outputs0_values = output_values[:10] outputs1_values = output_values[10:] self.assertEqual(len(outputs0_values), len(outputs1_values)) for out0, out1 in zip(outputs0_values, outputs1_values): self.assertAllEqual(out0, out1)
def testDropout(self): cell = Plus1RNNCell() full_dropout_cell = rnn_cell.DropoutWrapper( cell, input_keep_prob=1e-12, seed=0) batch_size = 2 inputs = [tf.placeholder(tf.float32, shape=(batch_size, 5))] * 10 with tf.variable_scope("share_scope"): outputs, states = rnn.rnn(cell, inputs, dtype=tf.float32) with tf.variable_scope("drop_scope"): dropped_outputs, _ = rnn.rnn(full_dropout_cell, inputs, dtype=tf.float32) self.assertEqual(len(outputs), len(inputs)) for out, inp in zip(outputs, inputs): self.assertEqual(out.get_shape().as_list(), inp.get_shape().as_list()) self.assertEqual(out.dtype, inp.dtype) with self.test_session(use_gpu=False) as sess: input_value = np.random.randn(batch_size, 5) values = sess.run(outputs + [states[-1]], feed_dict={inputs[0]: input_value}) full_dropout_values = sess.run(dropped_outputs, feed_dict={inputs[0]: input_value}) for v in values[:-1]: self.assertAllClose(v, input_value + 1.0) for d_v in full_dropout_values[:-1]: # Add 1.0 to dropped_out (all zeros) self.assertAllClose(d_v, np.ones_like(input_value))
def testDropout(self): cell = Plus1RNNCell() full_dropout_cell = rnn_cell.DropoutWrapper(cell, input_keep_prob=1e-12, seed=0) batch_size = 2 inputs = [tf.placeholder(tf.float32, shape=(batch_size, 5))] * 10 with tf.variable_scope("share_scope"): outputs, states = rnn.rnn(cell, inputs, dtype=tf.float32) with tf.variable_scope("drop_scope"): dropped_outputs, _ = rnn.rnn(full_dropout_cell, inputs, dtype=tf.float32) self.assertEqual(len(outputs), len(inputs)) for out, inp in zip(outputs, inputs): self.assertEqual(out.get_shape().as_list(), inp.get_shape().as_list()) self.assertEqual(out.dtype, inp.dtype) with self.test_session(use_gpu=False) as sess: input_value = np.random.randn(batch_size, 5) values = sess.run(outputs + [states[-1]], feed_dict={inputs[0]: input_value}) full_dropout_values = sess.run(dropped_outputs, feed_dict={inputs[0]: input_value}) for v in values[:-1]: self.assertAllClose(v, input_value + 1.0) for d_v in full_dropout_values[: -1]: # Add 1.0 to dropped_out (all zeros) self.assertAllClose(d_v, np.ones_like(input_value))
def testSharingWeightsWithDifferentNamescope(self): num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 with self.test_session(graph=tf.Graph()) as sess: initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed) inputs = 10 * [ tf.placeholder(tf.float32, shape=(None, input_size))] cell = rnn_cell.LSTMCell( num_units, input_size, use_peepholes=True, num_proj=num_proj, initializer=initializer) with tf.name_scope("scope0"): with tf.variable_scope("share_scope"): outputs0, _ = rnn.rnn(cell, inputs, dtype=tf.float32) with tf.name_scope("scope1"): with tf.variable_scope("share_scope", reuse=True): outputs1, _ = rnn.rnn(cell, inputs, dtype=tf.float32) tf.initialize_all_variables().run() input_value = np.random.randn(batch_size, input_size) output_values = sess.run( outputs0 + outputs1, feed_dict={inputs[0]: input_value}) outputs0_values = output_values[:10] outputs1_values = output_values[10:] self.assertEqual(len(outputs0_values), len(outputs1_values)) for out0, out1 in zip(outputs0_values, outputs1_values): self.assertAllEqual(out0, out1)
def testSharingWeightsWithReuse(self): num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 with self.test_session(graph=tf.Graph()) as sess: initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed) inputs = 10 * [ tf.placeholder(tf.float32, shape=(None, input_size))] cell = rnn_cell.LSTMCell( num_units, input_size, use_peepholes=True, num_proj=num_proj, initializer=initializer) with tf.variable_scope("share_scope"): outputs0, _ = rnn.rnn(cell, inputs, dtype=tf.float32) with tf.variable_scope("share_scope", reuse=True): outputs1, _ = rnn.rnn(cell, inputs, dtype=tf.float32) with tf.variable_scope("diff_scope"): outputs2, _ = rnn.rnn(cell, inputs, dtype=tf.float32) tf.initialize_all_variables().run() input_value = np.random.randn(batch_size, input_size) output_values = sess.run( outputs0 + outputs1 + outputs2, feed_dict={inputs[0]: input_value}) outputs0_values = output_values[:10] outputs1_values = output_values[10:20] outputs2_values = output_values[20:] self.assertEqual(len(outputs0_values), len(outputs1_values)) self.assertEqual(len(outputs0_values), len(outputs2_values)) for o1, o2, o3 in zip(outputs0_values, outputs1_values, outputs2_values): # Same weights used by both RNNs so outputs should be the same. self.assertAllEqual(o1, o2) # Different weights used so outputs should be different. self.assertTrue(np.linalg.norm(o1-o3) > 1e-6)
def _testShardNoShardEquivalentOutput(self, use_gpu): num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 num_proj_shards = 4 num_unit_shards = 2 with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: inputs = 10 * [tf.placeholder(tf.float32)] initializer = tf.constant_initializer(0.001) cell_noshard = rnn_cell.LSTMCell(num_units, input_size, num_proj=num_proj, use_peepholes=True, initializer=initializer, num_unit_shards=num_unit_shards, num_proj_shards=num_proj_shards) cell_shard = rnn_cell.LSTMCell(num_units, input_size, use_peepholes=True, initializer=initializer, num_proj=num_proj) with tf.variable_scope("noshard_scope"): outputs_noshard, states_noshard = rnn.rnn(cell_noshard, inputs, dtype=tf.float32) with tf.variable_scope("shard_scope"): outputs_shard, states_shard = rnn.rnn(cell_shard, inputs, dtype=tf.float32) self.assertEqual(len(outputs_noshard), len(inputs)) self.assertEqual(len(outputs_noshard), len(outputs_shard)) tf.initialize_all_variables().run() input_value = np.random.randn(batch_size, input_size) feeds = dict((x, input_value) for x in inputs) values_noshard = sess.run(outputs_noshard, feed_dict=feeds) values_shard = sess.run(outputs_shard, feed_dict=feeds) state_values_noshard = sess.run(states_noshard, feed_dict=feeds) state_values_shard = sess.run(states_shard, feed_dict=feeds) self.assertEqual(len(values_noshard), len(values_shard)) self.assertEqual(len(state_values_noshard), len(state_values_shard)) for (v_noshard, v_shard) in zip(values_noshard, values_shard): self.assertAllClose(v_noshard, v_shard, atol=1e-3) for (s_noshard, s_shard) in zip(state_values_noshard, state_values_shard): self.assertAllClose(s_noshard, s_shard, atol=1e-3)
def RNN(X, num_words_in_X, hidden_size, input_vector_size, max_input_size): """ Passes the input data through an RNN and outputs the final states. X: Input is a MAX_INPUT_LENGTH X BATCH_SIZE X WORD_VECTOR_LENGTH matrix num_words_in_X: Number of words in X, which is needed because X is zero padded hidden_size: The dimensionality of the hidden layer of the RNN input_vector_size: This is the dimensionality of each input vector, in this case it is WORD_VECTOR_LENGTH max_input_size: This is the max number of input vectors that can be passed in to the RNN. """ # Split X into a list of tensors of length max_input_size where each tensor is a BATCH_SIZE x input_vector_size vector X = tf.split(0, max_input_size, X) squeezed = [] for i in range(len(X)): squeezed.append(tf.squeeze(X[i])) gru_cell = rnn_cell.GRUCell(num_units=hidden_size, input_size=input_vector_size) output, state = rnn.rnn(gru_cell, squeezed, sequence_length=num_words_in_X, dtype=tf.float32) return output, state
def myRNN(_x, _istate, _weights, _biases): ''' input shape: (batch_size, n_steps, n_input) ''' _x = tf.transpose(_x, [1, 0, 2]) # permute n_steps and batch_size ''' _x: (n_steps,batch_size, n_input) ''' ''' All first row in all batches are aggregate together [[all first rows (2d matrix)], [all second rows (2d matrix)] [all third rows (2d matrix)], .... .... [all 28-th rows (2d matrix)]] Take first 2d matrix as example [[first row of no.1 image (vector)], [first row of no.2 image (vector)], ..... ..... [first row of no.batch_size image (vector)]] ''' ''' Reshape to prepare input to hidden activation ''' _x = tf.reshape(_x, [-1, n_input]) # (n_steps*batch_size, n_input) ''' Linear activation ''' _x = tf.matmul(_x, _weights['hidden']) + _biases['hidden'] # (n_steps*batch_size, n_hidden) ''' Define a lstm cell with tensorflow ''' lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) ''' Split data because rnn cell needs a list of inputs for the RNN inner loop ''' _x = tf.split(0, n_steps, _x) # n_steps * (batch_size, n_hidden) ''' Get lstm cell output ''' outputs, states = rnn.rnn(lstm_cell, _x, initial_state=_istate) ''' Linear activation Get inner loop last output ''' return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
def __init__(self, config): lstm_cell = rnn_cell.BasicLSTMCell(config.n_hidden, forget_bias=0.0) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self._train_op = tf.no_op() self._input_data = tf.placeholder(tf.int32, [config.batch_size]) _X = tf.matmul(self._input_data, tf.get_variable("weights_out", [ config.n_hidden, 1 ])) + tf.get_variable("bias_hidden", [config.n_hidden]) self._targets = tf.placeholder(tf.int32, [config.batch_size]) self._initial_state = cell.zero_state(config.batch_size, tf.float32) state = self._initial_state outputs, states = rnn.rnn(cell, self.input_data, tf.split(0, 1, _X), initial_state=state) pred = tf.matmul( outputs[-1], tf.get_variable("weights_hidden", [config.n_features, config.n_hidden ])) + tf.get_variable("weights_out", [1]) self._final_state = states[-1] self._cost = cost = tf.reduce_mean(tf.square(pred - self.targets)) #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss) if not config.is_training: return optimizer = tf.train.GradientDescentOptimizer( learning_rate=config.learning_rate).minimize(cost) self._train_op = optimizer
def RNN(_X, _istate, _weights, _biases): # input shape: (batch_size, n_steps, n_input) _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size => (n_steps,batch_size,n_input) # Reshape to prepare input to hidden activation _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input) (2D list with 28*128 vectors with 28 features each) # Linear activation _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] # (n_steps*batch_size=128x28,n_hidden=128) # Define a lstm cell with tensorflow lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) #lstm_cell_drop = rnn_cell.DropoutWrapper(lstm_cell) #multi_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * 2) # Split data because rnn cell needs a list of inputs for the RNN inner loop _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden) => step1 (batch_size=128,n_hidden=128)..step28 (batch_size=128,n_hidden=128) # It means that RNN receives list with element (batch_size,n_hidden) for each time step # Get lstm cell output outputs, states = rnn.rnn(lstm_cell, _X, initial_state=_istate) # Output is list with element (batch_size,n_hidden) for each time step? #for output in outputs: # print(output) #exit(0) # Linear activation # Get inner loop last output return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
def RNN(x, weights, biases, init_state): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) #(n_steps , batch_size, n_input) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_hidden) # This input shape is required by `rnn` function x = tf.split(0, n_steps, x) ''' 个人觉得上面的三行代码是最难理解的,具体的reshape 的demo可以看1_Introduction中的basic_op. 最后转化成了每一副图像的第一行拿出来作为一个矩阵, 这样正好满足了[batch_size, cell.input_zise]的要求的格式, 具体的逻辑处理在rnn.rnn函数里边 ''' # Define a lstm cell with tensorflow lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Get lstm cell output outputs, states = rnn.rnn(lstm_cell, x, initial_state=init_state, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out'], lstm_cell.state_size
def build_model(self): # Representation Generator self.inputs = tf.placeholder(tf.int32, [self.batch_size, self.seq_length]) embed = tf.get_variable("embed", [self.vocab_size, self.embed_dim]) word_embeds = tf.nn.embedding_lookup(embed, self.inputs) self.cell = rnn_cell.BasicLSTMCell(self.rnn_size) self.stacked_cell = rnn_cell.MultiRNNCell([self.cell] * self.layer_depth) outputs, _ = rnn.rnn(self.cell, [tf.squeeze(embed_t) for embed_t in tf.split(1, self.seq_length, word_embeds)], dtype=tf.float32) output_embed = tf.pack(outputs) mean_pool = tf.nn.relu(tf.reduce_mean(output_embed, 1)) self.num_action = 4 self.object_size = 4 # Action scorer. no bias in paper self.pred_action = rnn_cell.linear(mean_pool, self.num_action, 0, "action") self.object_ = rnn_cell.linear(mean_pool, self.object_size, 0, "object") self.true_action = tf.placeholder(tf.int32, [self.batch_size, self.num_action])
def __init__(self, vocabularySize, config_param): self.vocabularySize = vocabularySize self.config = config_param self._inputX = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputsX") self._inputTargetsY = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputTargetsY") #Converting Input in an Embedded form with tf.device("/cpu:0"): #Tells Tensorflow what GPU to use specifically embedding = tf.get_variable("embedding", [self.vocabularySize, self.config.embeddingSize]) embeddingLookedUp = tf.nn.embedding_lookup(embedding, self._inputX) inputs = tf.split(1, self.config.sequence_size, embeddingLookedUp) inputTensorsAsList = [tf.squeeze(input_, [1]) for input_ in inputs] #Define Tensor RNN singleRNNCell = rnn_cell.BasicRNNCell(self.config.hidden_size) self.multilayerRNN = rnn_cell.MultiRNNCell([singleRNNCell] * self.config.num_layers) self._initial_state = self.multilayerRNN.zero_state(self.config.batch_size, tf.float32) #Defining Logits hidden_layer_output, states = rnn.rnn(self.multilayerRNN, inputTensorsAsList, initial_state=self._initial_state) hidden_layer_output = tf.reshape(tf.concat(1, hidden_layer_output), [-1, self.config.hidden_size]) self._logits = tf.nn.xw_plus_b(hidden_layer_output, tf.get_variable("softmax_w", [self.config.hidden_size, self.vocabularySize]), tf.get_variable("softmax_b", [self.vocabularySize])) self._predictionSoftmax = tf.nn.softmax(self._logits) #Define the loss loss = seq2seq.sequence_loss_by_example([self._logits], [tf.reshape(self._inputTargetsY, [-1])], [tf.ones([self.config.batch_size * self.config.sequence_size])], self.vocabularySize) self._cost = tf.div(tf.reduce_sum(loss), self.config.batch_size) self._final_state = states[-1]
def unidirectional_lstm(inputs,keep_prob,INPUT_SIZE,HIDDEN_SIZE,SEQ_LENGTH): initializer = tf.random_uniform_initializer(-0.01,0.01) cell = LSTMCell(HIDDEN_SIZE, INPUT_SIZE,initializer=initializer) inputs_ = [tf.nn.dropout(each,keep_prob) for each in inputs] outputs,_ = rnn( cell, inputs_, initial_state=None, sequence_length=None,dtype=tf.float32) return outputs
def build_lstm_model(self): # r = rnn_cell.LSTMCell(tf.split(0, self.batch_size, self.inputs), self.input_size, # initializer=tf.contrib.layers.xavier_initializer()) r = rnn_cell.BasicLSTMCell(self.input_size) istate = r.zero_state(1, dtype=tf.float32) o, s = rnn.rnn(r, tf.split(0, self.batch_size, self.inputs), istate) return o[-1]
def rnn_estimator(X, y): """RNN estimator with target predictor function on top.""" X = input_op_fn(X) if cell_type == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif cell_type == 'gru': cell_fn = rnn_cell.GRUCell elif cell_type == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise ValueError( "cell_type {} is not supported. ".format(cell_type)) if bidirection: # forward direction cell rnn_fw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) # backward direction cell rnn_bw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) # pylint: disable=unexpected-keyword-arg, no-value-for-parameter encoding = rnn.bidirectional_rnn(rnn_fw_cell, rnn_bw_cell, sequence_length=sequence_length, initial_state=initial_state) else: cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) _, encoding = rnn.rnn(cell, X, dtype=tf.float32, sequence_length=sequence_length, initial_state=initial_state) return target_predictor_fn(encoding[-1], y)
def _testDoubleInput(self, use_gpu): num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 num_proj_shards = 4 num_unit_shards = 2 with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed) inputs = 10 * [tf.placeholder(tf.float64)] cell = rnn_cell.LSTMCell(num_units, input_size=input_size, use_peepholes=True, num_proj=num_proj, num_unit_shards=num_unit_shards, num_proj_shards=num_proj_shards, initializer=initializer) outputs, _ = rnn.rnn(cell, inputs, initial_state=cell.zero_state( batch_size, tf.float64)) self.assertEqual(len(outputs), len(inputs)) tf.initialize_all_variables().run() input_value = np.asarray(np.random.randn(batch_size, input_size), dtype=np.float64) values = sess.run(outputs, feed_dict={inputs[0]: input_value}) self.assertEqual(values[0].dtype, input_value.dtype)
def tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, loop_function=None, dtype=tf.float32, scope=None): """RNN sequence-to-sequence model with tied encoder and decoder parameters. This model first runs an RNN to encode encoder_inputs into a state vector, and then runs decoder, initialized with the last encoder state, on decoder_inputs. Encoder and decoder use the same RNN cell and share parameters. Args: encoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. cell: rnn_cell.RNNCell defining the cell function and size. loop_function: if not None, this function will be applied to i-th output in order to generate i+1-th input, and decoder_inputs will be ignored, except for the first element ("GO" symbol), see rnn_decoder for details. dtype: The dtype of the initial state of the rnn cell (default: tf.float32). scope: VariableScope for the created subgraph; default: "tied_rnn_seq2seq". Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x cell.output_size] containing the generated outputs. states: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. Each item is a 2D Tensor of shape [batch_size x cell.state_size]. """ with tf.variable_scope("combined_tied_rnn_seq2seq"): scope = scope or "tied_rnn_seq2seq" _, enc_states = rnn.rnn( cell, encoder_inputs, dtype=dtype, scope=scope) tf.get_variable_scope().reuse_variables() return rnn_decoder(decoder_inputs, enc_states[-1], cell, loop_function=loop_function, scope=scope)
def RNN(x, input_size, num_hidden): weights = { 'hidden': tf.Variable(tf.random_normal([input_size, num_hidden])), # Hidden layer weights 'out': tf.Variable(tf.random_normal([num_hidden, 1])) } biases = { 'hidden': tf.Variable(tf.random_normal([num_hidden])), 'out': tf.Variable(tf.random_normal([1])) } X_t = tf.transpose(x, [1, 0, 2]) # permute n_steps and batch_size # Reshape to prepare input to hidden activation X_r = tf.reshape(X_t, [-1, input_size]) # (n_steps*batch_size, n_input) X_m = tf.matmul(X_r, weights['hidden']) + biases['hidden'] X_s = tf.split(0, seq_len, X_m) # n_steps * (batch_size, n_hidden) lstm_cell = rnn_cell.BasicLSTMCell(num_hidden, forget_bias=1.0) outputs, states = rnn.rnn( lstm_cell, X_s, dtype=tf.float32) #note that outputs is a list of seq_len return tf.matmul(outputs[-1], weights['out']) + biases[ 'out'] #each element is a tensor of size [batch_size,num_units]
def rnn_estimator(X, y): """RNN estimator with target predictor function on top.""" X = input_op_fn(X) if cell_type == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif cell_type == 'gru': cell_fn = rnn_cell.GRUCell elif cell_type == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise ValueError("cell_type {} is not supported. ".format(cell_type)) if bidirection: # forward direction cell rnn_fw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) # backward direction cell rnn_bw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) # pylint: disable=unexpected-keyword-arg, no-value-for-parameter encoding = rnn.bidirectional_rnn(rnn_fw_cell, rnn_bw_cell, sequence_length=sequence_length, initial_state=initial_state) else: cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) _, encoding = rnn.rnn(cell, X, dtype=tf.float32, sequence_length=sequence_length, initial_state=initial_state) return target_predictor_fn(encoding[-1], y)
def _shared_layer(input_data, config): """Build the model to decoding Args: input_data = size batch_size X num_steps X embedding size Returns: output units """ cell = rnn_cell.BasicLSTMCell(config.encoder_size) inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, config.num_steps, input_data) ] if is_training and config.keep_prob < 1: cell = rnn_cell.DropoutWrapper( cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([cell] * config.num_shared_layers) initial_state = cell.zero_state(config.batch_size, tf.float32) encoder_outputs, encoder_states = rnn.rnn( cell, inputs, initial_state=initial_state, scope="encoder_rnn") return encoder_outputs, initial_state
def LSTM(x, y): x, y = reshape(x, y, 1) W_out = weight_variable([FLAGS.hidden_size, FLAGS.out_size]) b_out = bias_variable([FLAGS.out_size]) predictions = list() cost_all = list() with tf.variable_scope('lstm1') as scope: lstm_cell = rnn_cell.BasicLSTMCell(FLAGS.hidden_size, forget_bias=1.0) outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) # # print(len(outputs)) # for i in range(len(outputs)): # print(outputs[i].get_shape()) => (?, 128) # for i in range(len(outputs)): output = outputs[i] pred = tf.matmul(output, W_out) + b_out current_y = y[i] # tensorflow.python.pywrap_tensorflow.StatusNotOK: # Invalid argument: logits and labels must be same size: # logits_size=[9800,3] # labels_size=[100,3] loss = tf.nn.softmax_cross_entropy_with_logits(pred, current_y) cost = tf.reduce_mean(loss) cost_all.append(cost) predictions.append(pred) return predictions, cost_all
def __init__(self, vocab_size, batch_size, sequece_length, embedding_size, num_classes): self.hyperParam = {} self.hyperParam["hidden_num"] = 20 self.hyperParam["l2_lamda"] = 3; self.hyperParam["dropout_keep_prob"] = 0.5; l2_loss = tf.constant(0.0) self.dropout_keep_prob = 0.5 ##rnnCell = rnn_cell.BasicRNNCell(hidden_num) rnnCell = rnn_cell.BasicLSTMCell(self.hyperParam["hidden_num"], forget_bias=1.0) self.input_data = tf.placeholder(tf.int32, shape=[None, sequece_length], name = "input_data") self.weights = tf.placeholder(tf.int32, shape=[None, sequece_length], name= "weights") self.output_data = tf.placeholder(tf.int32, [None, sequece_length], name = "output_data") a = tf.shape(self.output_data)[0] #self.inputs = [] with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, embedding_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) #for i, v in enumerate(input_refine): # self.inputs.append(tf.nn.embedding_lookup(embedding, input_refine[i])) self.inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, sequece_length, inputs)] self.output, self.states = rnn.rnn(rnnCell, self.inputs, dtype=tf.float32) # Add dropout with tf.name_scope("dropout"): self.h_drop = [tf.nn.dropout(p, self.hyperParam["dropout_keep_prob"]) for p in self.output] predictions = []; with tf.name_scope("result"): W = tf.Variable(tf.truncated_normal([self.hyperParam["hidden_num"], num_classes], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) #output = tf.reshape(tf.concat(1, self.output), [-1, hidden_num]) output = tf.reshape(tf.concat(1, self.h_drop), [-1, self.hyperParam["hidden_num"]]) logits = tf.matmul(output, W) + b self.scores = logits #self.new_scores = [tf.squeeze(k, [1]) for k in tf.split(1, sequece_length, tf.reshape(logits, [-1, sequece_length ,num_classes]))] losses = 0; accuracy = [] with tf.name_scope("loss"): output_refine = tf.reshape(self.output_data, [-1]) #output_refine = tf.split(1, sequece_length, self.output_data) #weigth = tf.ones_like(output_refine, dtype="float32") weight = tf.reshape(tf.cast(self.weights, "float32"), [-1]) loss = seq2seq.sequence_loss_by_example([self.scores], [output_refine], [weight],num_classes); self.loss = tf.reduce_sum(loss)/tf.cast(a, "float32") + self.hyperParam["l2_lamda"]*l2_loss #self.accuracy = tf.reduce_mean(tf.cast(tf.concat(0, accuracy), "float")) with tf.name_scope("accurcy"): self.predictions = tf.argmax(tf.reshape(self.scores, [-1, sequece_length, num_classes]), 2) #self.kk = tf.cast(tf.equal(self.predictions, tf.cast(self.output_data, "int64")), "int64") aa = tf.expand_dims(tf.reshape(tf.cast(tf.equal(self.predictions, tf.cast(self.output_data, "int64")), "float32"), [-1]), 0) bb = tf.expand_dims(tf.cast(tf.reshape(self.weights, [-1]), "float32"), 0) self.kk = tf.squeeze(tf.matmul(aa, bb, transpose_b=True))/tf.reduce_sum(tf.cast(self.weights, "float32"), [0,1]) self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.predictions, tf.cast(self.output_data, "int64")), "float32"), name="accrucy")
def rnn_model(X, y): word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words') word_list = [tf.squeeze(w, [1]) for w in tf.split(1, MAX_DOCUMENT_LENGTH, word_vectors)] cell = rnn_cell.GRUCell(EMBEDDING_SIZE) _, encoding = rnn.rnn(cell, word_list, dtype=tf.float32) return skflow.models.logistic_regression(encoding[-1], y)
def __init__(self, vocab_size, size=256, depth=2, learning_rate=1e-4, batch_size=32, keep_prob=0.1, num_steps=100, checkpoint_dir="checkpoint", forward_only=False): """Initialize the parameters for an Deep Bidirectional LSTM model. Args: vocab_size: int, The dimensionality of the input vocab size: int, The dimensionality of the inputs into the Deep LSTM cell [32, 64, 256] learning_rate: float, [1e-3, 5e-4, 1e-4, 5e-5] batch_size: int, The size of a batch [16, 32] keep_prob: unit Tensor or float between 0 and 1 [0.0, 0.1, 0.2] num_steps: int, The max time unit [100] """ super(DeepBiLSTM, self).__init__() self.vocab_size = int(vocab_size) self.size = int(size) self.depth = int(depth) self.learning_rate = float(learning_rate) self.batch_size = int(batch_size) self.keep_prob = float(keep_prob) self.num_steps = int(seq_length) self.inputs = tf.placeholder(tf.int32, [self.batch_size, self.num_steps]) self.input_lengths = tf.placeholder(tf.int64, [self.batch_size]) with tf.device("/cpu:0"): self.emb = tf.Variable(tf.truncated_normal( [self.vocab_size, self.size], -0.1, 0.1), name='emb') import ipdb ipdb.set_trace() self.embed_inputs = tf.nn.embedding_lookup( self.emb, tf.transpose(self.inputs)) self.cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) self.stacked_cell = rnn_cell.MultiRNNCell([self.cell] * depth) self.initial_state = self.stacked_cell.zero_state( batch_size, tf.float32) if not forward_only and self.keep_prob < 1: lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob) self.outputs, self.states = rnn.rnn(self.stacked_cell, tf.unpack(self.embed_inputs), dtype=tf.float32, sequence_length=self.input_lengths, initial_state=self.initial_state) output = tf.reduce_sum(tf.pack(self.output), 0)
def RNN(X, num_words_in_X, hidden_size, max_input_size): # Reshape `X` as a vector. -1 means "set this dimension automatically". X_as_vector = tf.reshape(X, [-1]) # Create another vector containing zeroes to pad `X` to (MAX_INPUT_LENGTH * WORD_VECTOR_LENGTH) elements. zero_padding = tf.zeros([max_input_size * WORD_VECTOR_LENGTH] - tf.shape(X_as_vector), dtype=X.dtype) # Concatenate `X_as_vector` with the padding. X_padded_as_vector = tf.concat(0, [X_as_vector, zero_padding]) # Reshape the padded vector to the desired shape. X_padded = tf.reshape(X_padded_as_vector, [max_input_size, WORD_VECTOR_LENGTH]) # Split X into a list of tensors of length MAX_INPUT_LENGTH where each tensor is a 1xWORD_VECTOR_LENGTH vector # of the word vectors # TODO change input to be a list of tensors of length MAX_INPUT_LENGTH where each tensor is a BATCH_SIZExWORD_VECTOR_LENGTH vector X = tf.split(0, max_input_size, X_padded) print "Length X: {}".format(len(X)) gru_cell = rnn_cell.GRUCell(num_units=hidden_size, input_size=WORD_VECTOR_LENGTH) output, state = rnn.rnn(gru_cell, X, sequence_length=(num_words_in_X), dtype=tf.float32) print "State: {}".format(state) return output, state, X_padded
def _testCellClipping(self, use_gpu): num_units = 3 input_size = 5 batch_size = 2 with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed) cell = rnn_cell.LSTMCell(num_units, input_size, use_peepholes=True, cell_clip=0.0, initializer=initializer) inputs = 10 * [ tf.placeholder(tf.float32, shape=(batch_size, input_size)) ] outputs, _ = rnn.rnn(cell, inputs, dtype=tf.float32) self.assertEqual(len(outputs), len(inputs)) for out in outputs: self.assertEqual(out.get_shape().as_list(), [batch_size, num_units]) tf.initialize_all_variables().run() input_value = np.random.randn(batch_size, input_size) values = sess.run(outputs, feed_dict={inputs[0]: input_value}) for value in values: # if cell c is clipped to 0, tanh(c) = 0 => m==0 self.assertAllEqual(value, np.zeros((batch_size, num_units)))
def create_model(self): print "Setting up model", sys.stdout.flush() # placeholders for data + targets self._input_data = tf.placeholder(tf.int32, shape=(self.batch_size, self.num_steps)) self._targets = tf.placeholder(tf.int32, [self.batch_size, self.num_steps]) # set up lookup function self.embedding = tf.constant(self.saved_embedding,name="embedding") self.inputs = tf.nn.embedding_lookup(self.embedding, self._input_data) # lstm model self.lstm_cell = rnn_cell.BasicLSTMCell(self.lstm_size) self.cell = rnn_cell.MultiRNNCell([self.lstm_cell] * self.num_layers) self._initial_state = self.cell.zero_state(self.batch_size, tf.float32) from tensorflow.models.rnn import rnn self.inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, self.num_steps, self.inputs)] self.outputs, self.states = rnn.rnn(self.cell, self.inputs, initial_state=self._initial_state) self.output = tf.reshape(tf.concat(1, self.outputs), [-1, self.lstm_size]) self.softmax_w = tf.get_variable("softmax_w", [self.lstm_size, self.vocab_size]) self.softmax_b = tf.get_variable("softmax_b", [self.vocab_size]) self.logits = tf.matmul(self.output, self.softmax_w) + self.softmax_b #print "self.states.get_shape():",self.states.get_shape() #print "tf.shape(self.states)",tf.shape(self.states) self._final_state = self.states self.saver = tf.train.Saver() #delete data to save memory if network is used for sampling only if self.only_for_sampling: del self.data print "done"
def __init__(self, session, input_pipeline): self.session = session self.input_pipeline = input_pipeline text_embeddings = weight_init(config.words_count + 2, config.hidden_count) embedded = tf.split(1, config.max_len, tf.nn.embedding_lookup(text_embeddings, input_pipeline.text_input)) inputs = [tf.squeeze(input_, [1]) for input_ in embedded] w_image = weight_init(config.image_features_count, config.hidden_count) b_image = bias_init([config.hidden_count]) image_transform = tf.matmul(input_pipeline.image_input, w_image) + b_image hidden_start = tf.concat(1, [tf.zeros_like(image_transform), image_transform]) cell = WordCell(config.hidden_count, config.output_words_count + 1) probs_list, self.hidden = rnn.rnn( cell=cell, inputs=inputs, initial_state=hidden_start, sequence_length=input_pipeline.lens_input) self.probs = tf.concat(1, [tf.expand_dims(prob, 1) for prob in probs_list]) float_lens = tf.cast(input_pipeline.lens_input, 'float') sample_losses = tf.reduce_sum(self.probs * input_pipeline.result_input, [1, 2]) / float_lens self.loss = -tf.reduce_mean(sample_losses) self.train_task = tf.train.AdamOptimizer(1e-4).minimize(self.loss) self.loss_summary = tf.scalar_summary('loss', self.loss) self.saver = tf.train.Saver()
def build_generator(self): tf.get_variable_scope().reuse_variables() video = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps, self.dim_image]) video_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps]) video_flat = tf.reshape(video, [-1, self.dim_image]) image_emb = tf.nn.xw_plus_b( video_flat, self.encode_image_W, self.encode_image_b) image_emb = tf.reshape(image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden]) image_emb = tf.transpose(image_emb, [1,0,2]) state2 = tf.zeros([self.batch_size, self.lstm2.state_size]) generated_HL = [] _X = tf.reshape(image_emb, [-1, self.dim_hidden]) # (n x b) x h _X = tf.split(0, self.n_lstm_steps, _X) # n x (b x h) [output2, state2] = rnn.rnn(self.lstm_HL_net,_X,dtype=tf.float32) # n x (b x h) output2 = tf.transpose(tf.pack(output2), [1,0,2]) # b x n x h for ii in range(self.batch_size): logit_words = tf.nn.xw_plus_b( output2[ii,:,:], self.embed_HL_W, self.embed_HL_b) # n x 2 logit_words = tf.nn.softmax(logit_words) # n x 2 generated_HL.append(logit_words[:,1]) # n x 1 generated_HL = tf.pack(generated_HL) # b x n generated_HL = tf.mul(generated_HL,video_mask) # b x n with tf.variable_scope("RNN") as vs: lstmRNN_variables = [v for v in tf.all_variables() if v.name.startswith(vs.name)] return video, video_mask, generated_HL, lstmRNN_variables
def RNN(_X, _istate, _weights, _biases): # input shape: (batch_size, n_steps, 28, 28, 1) _X = tf.transpose(_X, [1, 0, 2, 3, 4]) # permute n_steps and batch_size # input shape: (n_steps=3, batch_size=20, 28, 28, 1) # Reshape to prepare input to hidden activation #_X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input) # Linear activation ==> convolutional net #_X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] A = CNN(_X[0,:,:,:,:]) B = CNN(_X[1,:,:,:,:]) C = CNN(_X[2,:,:,:,:]) # Define a lstm cell with tensorflow lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Split data because rnn cell needs a list of inputs for the RNN inner loop #_X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden) # Get lstm cell output outputs, states = rnn.rnn(lstm_cell, [A,B,C], initial_state=_istate) # Linear activation # Get inner loop last output out1 = tf.nn.relu( tf.matmul(outputs[-1], _weights['out1']) + _biases['out1'] ) out2 = tf.matmul(out1, _weights['out2']) + _biases['out2'] return out2
def _testProjSharding(self, use_gpu): num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 num_proj_shards = 4 num_unit_shards = 2 with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed) inputs = 10 * [ tf.placeholder(tf.float32, shape=(None, input_size))] cell = rnn_cell.LSTMCell( num_units, input_size=input_size, use_peepholes=True, num_proj=num_proj, num_unit_shards=num_unit_shards, num_proj_shards=num_proj_shards, initializer=initializer) outputs, _ = rnn.rnn(cell, inputs, dtype=tf.float32) self.assertEqual(len(outputs), len(inputs)) tf.initialize_all_variables().run() input_value = np.random.randn(batch_size, input_size) sess.run(outputs, feed_dict={inputs[0]: input_value})
def testDynamicCalculation(self): cell = Plus1RNNCell() sequence_length = tf.placeholder(tf.int64) batch_size = 2 inputs = [tf.placeholder(tf.float32, shape=(batch_size, 5))] * 10 with tf.variable_scope("drop_scope"): dynamic_outputs, dynamic_states = rnn.rnn( cell, inputs, sequence_length=sequence_length, dtype=tf.float32) self.assertEqual(len(dynamic_outputs), len(inputs)) self.assertEqual(len(dynamic_states), len(inputs)) with self.test_session(use_gpu=False) as sess: input_value = np.random.randn(batch_size, 5) dynamic_values = sess.run(dynamic_outputs, feed_dict={inputs[0]: input_value, sequence_length: [2, 3]}) dynamic_state_values = sess.run(dynamic_states, feed_dict={inputs[0]: input_value, sequence_length: [2, 3]}) # fully calculated for t = 0, 1, 2 for v in dynamic_values[:3]: self.assertAllClose(v, input_value + 1.0) for vi, v in enumerate(dynamic_state_values[:3]): self.assertAllEqual(v, 1.0 * (vi + 1) * np.ones((batch_size, 5))) # zeros for t = 3+ for v in dynamic_values[3:]: self.assertAllEqual(v, np.zeros_like(input_value)) for v in dynamic_state_values[3:]: self.assertAllEqual(v, np.zeros_like(input_value))
def basic_rnn_seq2seq( encoder_inputs, decoder_inputs, cell, dtype=tf.float32, scope=None): """Basic RNN sequence-to-sequence model. This model first runs an RNN to encode encoder_inputs into a state vector, and then runs decoder, initialized with the last encoder state, on decoder_inputs. Encoder and decoder use the same RNN cell type, but don't share parameters. Args: encoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. cell: rnn_cell.RNNCell defining the cell function and size. dtype: The dtype of the initial state of the RNN cell (default: tf.float32). scope: VariableScope for the created subgraph; default: "basic_rnn_seq2seq". Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x cell.output_size] containing the generated outputs. states: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. Each item is a 2D Tensor of shape [batch_size x cell.state_size]. """ with tf.variable_scope(scope or "basic_rnn_seq2seq"): _, enc_states = rnn.rnn(cell, encoder_inputs, dtype=dtype) return rnn_decoder(decoder_inputs, enc_states[-1], cell)
def _testDoubleInput(self, use_gpu): num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 num_proj_shards = 4 num_unit_shards = 2 with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: initializer = tf.random_uniform_initializer(-1, 1, seed=self._seed) inputs = 10 * [tf.placeholder(tf.float64)] cell = rnn_cell.LSTMCell( num_units, input_size=input_size, use_peepholes=True, num_proj=num_proj, num_unit_shards=num_unit_shards, num_proj_shards=num_proj_shards, initializer=initializer) outputs, _ = rnn.rnn( cell, inputs, initial_state=cell.zero_state(batch_size, tf.float64)) self.assertEqual(len(outputs), len(inputs)) tf.initialize_all_variables().run() input_value = np.asarray(np.random.randn(batch_size, input_size), dtype=np.float64) values = sess.run(outputs, feed_dict={inputs[0]: input_value}) self.assertEqual(values[0].dtype, input_value.dtype)
def _testProjSharding(self, use_gpu): num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 num_proj_shards = 4 num_unit_shards = 2 with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed) inputs = 10 * [ tf.placeholder(tf.float32, shape=(None, input_size)) ] cell = rnn_cell.LSTMCell(num_units, input_size=input_size, use_peepholes=True, num_proj=num_proj, num_unit_shards=num_unit_shards, num_proj_shards=num_proj_shards, initializer=initializer) outputs, _ = rnn.rnn(cell, inputs, dtype=tf.float32) self.assertEqual(len(outputs), len(inputs)) tf.initialize_all_variables().run() input_value = np.random.randn(batch_size, input_size) sess.run(outputs, feed_dict={inputs[0]: input_value})
def testEmbeddingAttentionDecoder(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): inp = [tf.constant(0.5, shape=[2, 2]) for _ in xrange(2)] cell = rnn_cell.GRUCell(2) enc_outputs, enc_states = rnn.rnn(cell, inp, dtype=tf.float32) attn_states = tf.concat(1, [ tf.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs ]) dec_inp = [ tf.constant(i, tf.int32, shape=[2]) for i in xrange(3) ] dec, mem = seq2seq.embedding_attention_decoder(dec_inp, enc_states[-1], attn_states, cell, 4, output_size=3) sess.run([tf.initialize_all_variables()]) res = sess.run(dec) self.assertEqual(len(res), 3) self.assertEqual(res[0].shape, (2, 3)) res = sess.run(mem) self.assertEqual(len(res), 4) self.assertEqual(res[0].shape, (2, 2))
def build_model(self): video = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps, self.dim_image]) video_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps]) HLness = tf.placeholder(tf.int32, [self.batch_size, self.n_lstm_steps]) HLness_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps]) video_flat = tf.reshape(video, [-1, self.dim_image]) image_emb = tf.nn.xw_plus_b( video_flat, self.encode_image_W, self.encode_image_b) # (batch_size*n_lstm_steps, dim_hidden) image_emb = tf.reshape(image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden]) image_emb = tf.transpose(image_emb, [1,0,2]) # n x b x h state2 = tf.zeros([self.batch_size, self.lstm2.state_size]) loss_HL = 0.0 _X = tf.reshape(image_emb, [-1, self.dim_hidden]) # (n x b) x h _X = tf.split(0, self.n_lstm_steps, _X) # n x (b x h) [output2, state2] = rnn.rnn(self.lstm_HL_net,_X,dtype=tf.float32) # n x (b x h) output2 = tf.transpose(tf.pack(output2), [1,0,2]) # b x n x h onehot_labels = [] logit_words = [] indices = tf.expand_dims(tf.range(0, self.n_lstm_steps, 1), 1) # n x 1 for ii in xrange(10): labels = tf.expand_dims(HLness[ii,:], 1) # n x 1 concated = tf.concat(1, [indices, labels]) # n x 2 onehot_labels = tf.sparse_to_dense(concated, tf.pack([self.n_lstm_steps, 2]), 1.0, 0.0) # n x 2 logit_words = tf.nn.xw_plus_b(output2[ii,:,:], self.embed_HL_W, self.embed_HL_b) # n x 2 cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logit_words, onehot_labels) # n x 1 cross_entropy = tf.mul(cross_entropy, HLness_mask[ii,:]) # n x 1 loss_HL += tf.reduce_sum(cross_entropy) # 1 loss_HL = loss_HL / tf.reduce_sum(HLness_mask) loss = loss_HL return loss, video, video_mask, HLness, HLness_mask
def seq2seq_f(cell, encoder_inputs, decoder_inputs, loop_output): ''' The seq2seq neural network structurei Args: cell: the RNNCell object encoder_inputs: a list of Tensors to feed the encoder decoder_inputs: a list of Tensors to feed the decoder loop_output: True for using the loop_func to construct the next decoder_input element using the previous output element Returns: outputs: a list of Tensors generated by the decoder states: the hidden states at the final step of the encoder ''' if loop_output: def loop_func(prev, i): # simplest construction: using the previous output as the next input return prev # use rnn() directly for modified decoder. _, enc_states = rnn.rnn(cell, encoder_inputs, dtype=tf.float32) # note that the returned states are all hidden states, not just the last one outputs,states = seq2seq.rnn_decoder(decoder_inputs, enc_states[-1], cell, loop_func) else: # using the given decoder inputs outputs,states = seq2seq.basic_rnn_seq2seq( encoder_inputs, decoder_inputs, cell) # one way to bound the output in [-1,1]. but not used. # for x in outputs: # x = tf.tanh(x) # print(states) # the output states is just the last element of all hidden states return outputs,states
def char_rnn_model(X, y): byte_list = skflow.ops.one_hot_matrix(X, 256) byte_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, byte_list) cell = rnn_cell.GRUCell(HIDDEN_SIZE) #cell = rnn_cell.BasicLSTMCell(HIDDEN_SIZE) _, encoding = rnn.rnn(cell, byte_list, dtype=tf.float32) return skflow.models.logistic_regression(encoding, y)
def build(graph, input, num_steps, hidden_size, num_layers, num_classes, is_training): """ num_steps: the number of unrolled steps of LSTM hidden_size: the number of LSTM units """ input_shape = input.get_shape().as_list() batch_size = input_shape[0] # Add the GRU Cell with graph.name_scope("rnn") as scope: gru_cell = tf.nn.rnn_cell.GRUCell(num_units=hidden_size, input_size=hidden_size) if is_training: gru_cell = tf.nn.rnn_cell.DropoutWrapper(gru_cell, output_keep_prob=1.0) cell = tf.nn.rnn_cell.MultiRNNCell([gru_cell] * num_layers) initial_state = cell.zero_state(batch_size, tf.float32) # A length T list of inputs, each a tensor of shape [batch_size, input_size]. inputs = [tf.squeeze(input_) for input_ in tf.split(1, num_steps, input)] print "Inputs to RNN Cell shape:" print "[%d, %s]" % (len(inputs), inputs[0].get_shape().as_list()) outputs, state = rnn.rnn(cell, inputs, initial_state = initial_state) # [num_steps * batch_size, hidden_size] #features = tf.reshape(tf.concat(1, outputs), [-1, hidden_size]) features = state print "Outputs from RNN Cell shape:" print features.get_shape().as_list() # Add Softmax with graph.name_scope("softmax") as scope: weights = _variable_with_weight_decay( name='weights', shape=[hidden_size, num_classes], initializer=tf.contrib.layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32), wd=4e-5 ) graph.add_to_collection('softmax_params', weights) biases = _variable_on_cpu( name='biases', shape=[num_classes], initializer=tf.constant_initializer(0.0) ) graph.add_to_collection('softmax_params', biases) softmax_linear = tf.nn.xw_plus_b(features, weights, biases, name="logits") return softmax_linear
def final_state_of_rnn_over_embedded_sequence(idx, embedded_seq): with tf.variable_scope("rnn_%s" % idx): gru = rnn_cell.GRUCell(opts.hidden_dim) initial_state = gru.zero_state(opts.batch_size, tf.float32) outputs, _states = rnn.rnn(gru, embedded_seq, initial_state=initial_state) return outputs[-1]
def _testShardNoShardEquivalentOutput(self, use_gpu): num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 num_proj_shards = 4 num_unit_shards = 2 with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: inputs = 10 * [tf.placeholder(tf.float32)] initializer = tf.constant_initializer(0.001) cell_noshard = rnn_cell.LSTMCell( num_units, input_size, num_proj=num_proj, use_peepholes=True, initializer=initializer, num_unit_shards=num_unit_shards, num_proj_shards=num_proj_shards) cell_shard = rnn_cell.LSTMCell( num_units, input_size, use_peepholes=True, initializer=initializer, num_proj=num_proj) with tf.variable_scope("noshard_scope"): outputs_noshard, states_noshard = rnn.rnn( cell_noshard, inputs, dtype=tf.float32) with tf.variable_scope("shard_scope"): outputs_shard, states_shard = rnn.rnn( cell_shard, inputs, dtype=tf.float32) self.assertEqual(len(outputs_noshard), len(inputs)) self.assertEqual(len(outputs_noshard), len(outputs_shard)) tf.initialize_all_variables().run() input_value = np.random.randn(batch_size, input_size) feeds = dict((x, input_value) for x in inputs) values_noshard = sess.run(outputs_noshard, feed_dict=feeds) values_shard = sess.run(outputs_shard, feed_dict=feeds) state_values_noshard = sess.run(states_noshard, feed_dict=feeds) state_values_shard = sess.run(states_shard, feed_dict=feeds) self.assertEqual(len(values_noshard), len(values_shard)) self.assertEqual(len(state_values_noshard), len(state_values_shard)) for (v_noshard, v_shard) in zip(values_noshard, values_shard): self.assertAllClose(v_noshard, v_shard, atol=1e-3) for (s_noshard, s_shard) in zip(state_values_noshard, state_values_shard): self.assertAllClose(s_noshard, s_shard, atol=1e-3)
def __init__ (self): # ******* PARAMS ********* # total vocabulary size vocab_size = 50 # one character at a time lstm_size = 2 # will feed 50 chars sequentially num_steps = 40 # only 1 batch for simplicity batch_size = 3 # define is training is_training = True # ********* SET UP ********* # make the lstm cell, with size lstm_size self.lstm_cell = rnn_cell.BasicLSTMCell (lstm_size, forget_bias=0.0) # if in training mode, add a dropout layer if is_training: self.lstm_cell = rnn_cell.DropoutWrapper (self.lstm_cell, output_keep_prob=0.5) # set initial state to zeroes self.initial_state = self.lstm_cell.zero_state(batch_size, tf.float32) # define inputs. has size batch_size X num_steps self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) # define the embedding tensor initial = tf.truncated_normal ([batch_size, lstm_size], stddev=0.1) self.embedding = tf.Variable (initial) # get the inputs from embedded data self.inputs = tf.split (1, num_steps, tf.nn.embedding_lookup (self.embedding, self.input_data)) self.inputs = [tf.squeeze (input_, [1]) for input_ in self.inputs] # define outputs self.outputs, self.states = rnn.rnn(self.lstm_cell, self.inputs, initial_state=self.initial_state) print self.outputs [0] print self.states [0] # reshape input into [batch_size * num_steps, lstm_size] output = tf.reshape(tf.concat(1, self.outputs), [-1, lstm_size]) print output # ********* TRAINING ********** # quit if not training if not is_training: return
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size size = config.n_hidden num_steps = config.num_steps self._input_data = tf.placeholder(tf.float32, (batch_size, config.num_steps)) self._targets = tf.placeholder(tf.float32, [batch_size, 1]) lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=2.8) # lstm_cell = rnn_cell.LSTMCell(size, 1) # cell = lstm_cell cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self._initial_state = cell.zero_state(batch_size, tf.float32) self._train_op = tf.no_op() self._result = -1 weights_hidden = tf.constant( 1.0, shape=[config.num_features, config.n_hidden]) weights_hidden = tf.get_variable( "weights_hidden", [config.num_features, config.n_hidden]) inputs = [] for k in range(num_steps): nextitem = tf.matmul( tf.reshape(self._input_data[:, k], [config.batch_size, config.num_features]), weights_hidden) inputs.append(nextitem) outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state) #output = tf.reshape(tf.concat(1, outputs), [-1, config.n_hidden]) #pred = tf.matmul(outputs[-1], tf.get_variable("weights_out", [config.n_hidden,1])) + tf.get_variable("bias_out", [1]) output = tf.reshape(tf.concat(1, outputs[-1]), [-1, size]) #pred = tf.matmul(output, tf.get_variable("weights_out", [config.n_hidden,1])) + tf.get_variable("bias_out", [1]) pred = tf.sigmoid( tf.matmul(outputs[-1], tf.get_variable("weights_out", [config.n_hidden, 1])) + tf.get_variable("bias_out", [1])) self._pred = pred self._final_state = states[-1] self._cost = cost = tf.square((pred[:, 0] - self.targets[:, 0])) self._result = tf.abs(pred[0, 0] - self.targets[0, 0]) # self._cost = cost = tf.abs(pred[0, 0] - self.targets[0,0]) if not config.is_training: return #optimizer = tf.train.GradientDescentOptimizer(learning_rate = config.learning_rate).minimize(cost) optimizer = tf.train.AdamOptimizer().minimize(cost) self._train_op = optimizer print("top ", self._train_op)
def get_pred(self, n_input, n_steps, input_val): """Perform forward pass and return output of UrlRnn.""" input_val = Model.reshape_data(n_input, n_steps, input_val) outputs, _ = rnn.rnn(self.lstm_cell, input_val, initial_state=self.istate) return tf.nn.softmax( tf.matmul(outputs[-1], self.weights['out']) + self.biases['out'])
def _init_seq2seq(self, encoder_inputs, decoder_inputs, cell, feed_previous): def inference_loop_function(prev, _): prev = tf.nn.xw_plus_b(prev, self.w_softmax, self.b_softmax) return tf.to_float(tf.equal(prev, tf.reduce_max(prev, reduction_indices=[1], keep_dims=True))) loop_function = inference_loop_function if feed_previous else None with variable_scope.variable_scope('seq2seq'): _, final_enc_state = rnn.rnn(cell, encoder_inputs, dtype=dtypes.float32) return seq2seq.rnn_decoder(decoder_inputs, final_enc_state, cell, loop_function=loop_function)
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size vocab_size = config.vocab_size self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) self._targets = tf.placeholder(tf.int32, [batch_size, num_steps]) # Slightly better results can be obtained with forget gate biases # initialized to 1 but the hyperparameters of the model would need to be # different than reported in the paper. lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(size, forget_bias=0.0) if is_training and config.keep_prob < 1: lstm_cell = tf.nn.rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=config.keep_prob) cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self._initial_state = cell.zero_state(batch_size, tf.float32) with tf.device('/cpu:0'): embedding = tf.get_variable('embedding', [vocab_size, size]) inputs = tf.nn.embedding_lookup(embedding, self._input_data) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, num_steps, inputs) ] outputs, state = rnn.rnn(cell, inputs, initial_state=self._initial_state) output = tf.reshape(tf.concat(1, outputs), [-1, size]) softmax_w = tf.get_variable('softmax_w', [size, vocab_size]) softmax_b = tf.get_variable('softmax_b', [vocab_size]) self._logits = logits = tf.matmul(output, softmax_w) + softmax_b loss = tf.nn.seq2seq.sequence_loss_by_example( [logits], [tf.reshape(self._targets, [-1])], [tf.ones([batch_size * num_steps])]) self._cost = cost = tf.reduce_sum(loss) / batch_size self._final_state = state if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self.lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars))
def get_encoded_segment(self, segment, reuse): inputs = tf.split( 0, len(segment), tf.nn.embedding_lookup(self._embedding_matrix, tf.pack(segment))) inputs = [tf.squeeze(input_, [0]) for input_ in inputs] with tf.variable_scope("encoder", reuse=reuse): encoder_outputs, encoder_states = rnn.rnn( self.encoder, inputs, initial_state=self._initial_encoder_state) return encoder_outputs[-1]
def RNN(x, weight, biases): # shape of input x: [batch_size, num_steps, dim_input] x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, dim_input]) x = tf.split(0, num_steps, x) lstm_cell = rnn_cell.BasicLSTMCell(dim_hidden, forget_bias=1.0) outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) return tf.matmul(outputs[-1], weight['out']+biases['out'])
def _make_graph(self): # Encode sequence. # TODO: MultilayerRNN? encoder_cell = util.GRUCell(self.input_dim, self.spec.policy_dims[0]) _, self.encoder_states = rnn.rnn(encoder_cell, self.inputs, dtype=tf.float32, scope="encoder") assert len(self.encoder_states) == self.seq_length # DEV # Reshape encoder states into an "attention states" tensor of shape # `batch_size * seq_length * policy_dim`. attn_states = tf.concat( 1, [tf.expand_dims(state_t, 1) for state_t in self.inputs]) # Build a simple GRU-powered recurrent decoder cell. decoder_cell = util.GRUCell(self.input_dim, self.spec.policy_dims[0]) # Prepare dummy encoder input. This will only be used on the first # timestep; in subsequent timesteps, the `loop_function` we provide # will be used to dynamically calculate new input values. batch_size = tf.shape(self.inputs[0])[0] dec_inp_shape = tf.pack([batch_size, decoder_cell.input_size]) dec_inp_dummy = tf.zeros(dec_inp_shape, dtype=tf.float32) dec_inp_dummy.set_shape((None, decoder_cell.input_size)) dec_inp = [dec_inp_dummy] * self.seq_length # Build pointer-network decoder. self.a_pred, dec_states, dec_inputs = ptr_net_decoder( dec_inp, self.encoder_states[-1], attn_states, decoder_cell, loop_function=self._loop_function(), scope="decoder") # Store dynamically calculated inputs -- critic may want to use these self.decoder_inputs = dec_inputs # Again strip the initial state. self.decoder_states = dec_states[1:] # Use noiser to build exploratory rollouts. self.a_explore = self.noiser(self.inputs, self.a_pred) # Now "dereference" the soft pointers produced by the policy network. a_pred_deref = self._deref_rollout(self.a_pred) a_explore_deref = self._deref_rollout(self.a_explore) # Build main model: recurrently apply a critic over the entire rollout. _, self.critic_on, self.critic_on_track = self._critic(a_pred_deref) self.critic_off_pre, self.critic_off, self.critic_off_track = \ self._critic(a_explore_deref, reuse=True) self._make_q_targets()
def unidirectional_lstm(inputs, keep_prob, INPUT_SIZE, HIDDEN_SIZE, SEQ_LENGTH): initializer = tf.random_uniform_initializer(-0.01, 0.01) cell = LSTMCell(HIDDEN_SIZE, INPUT_SIZE, initializer=initializer) inputs_ = [tf.nn.dropout(each, keep_prob) for each in inputs] outputs, _ = rnn(cell, inputs_, initial_state=None, sequence_length=None, dtype=tf.float32) return outputs
def __init__(self, args, deterministic=False): self.args = args if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell elif args.model == 'bn-lstm': cell_fn = BNLSTMCell else: raise Exception('model type not supported: {}'.format(args.model)) deterministic = tf.Variable(deterministic, name='deterministic') # when training, set to False; when testing, set to True if args.model == 'bn-lstm': cell = cell_fn(args.rnn_size, bn=args.bn_level, deterministic=deterministic) else: cell = cell_fn(args.rnn_size) self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int64, [None, args.seq_length]) # self.targets = tf.placeholder(tf.int64, [None, args.seq_length]) # seq2seq model self.targets = tf.placeholder(tf.int64, [None, ]) # target is class label self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('embeddingLayer'): with tf.device('/cpu:0'): W = tf.get_variable('W', [args.vocab_size, args.rnn_size]) embedded = tf.nn.embedding_lookup(W, self.input_data) # shape: (batch_size, seq_length, cell.input_size) => (seq_length, batch_size, cell.input_size) inputs = tf.split(1, args.seq_length, embedded) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] outputs, last_state = rnn.rnn(cell, inputs, self.initial_state, scope='rnnLayer') with tf.variable_scope('softmaxLayer'): softmax_w = tf.get_variable('w', [args.rnn_size, args.label_size]) softmax_b = tf.get_variable('b', [args.label_size]) logits = tf.matmul(outputs[-1], softmax_w) + softmax_b self.probs = tf.nn.softmax(logits) # self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, self.targets)) # Softmax loss self.cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, self.targets)) # Softmax loss self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.cost) # Adam Optimizer self.correct_pred = tf.equal(tf.argmax(self.probs, 1), self.targets) self.correct_num = tf.reduce_sum(tf.cast(self.correct_pred, tf.float32)) self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
def rnn_model(X, init_state, lstm_size, slicing_tensors): # X, input shape: (batch_size, input_vec_size, time_step_size) # print "X shape", X.get_shape().as_list() XT = tf.transpose(X, [1, 0, 2]) # permute time_step_size and batch_size # XT shape: (input_vec_size, batch_szie, time_step_size) # print "XT shape", XT.get_shape().as_list() XR = tf.reshape( XT, [-1, lstm_size]) # each row has input for each lstm cell (lstm_size) # XR shape: (input vec_size, batch_size) # print sess.run(num_steps) # print "XR shape", XR.get_shape().as_list() X_split = tf.split(0, n_lstm_steps, XR) # split them to time_step_size (28 arrays) # Each array shape: (batch_size, input_vec_size) # print "X_split" # print len(X_split) # print X_split # Make lstm with lstm_size (each input vector size) lstm = rnn_cell.BasicLSTMCell(lstm_size, forget_bias=1.0) # Get lstm cell output, time_step_size (28) arrays with lstm_size output: (batch_size, lstm_size) outputs, _states = rnn.rnn(lstm, X_split, initial_state=init_state) # print "outputs", outputs[0].get_shape() outputs = tf.reshape(tf.concat(0, outputs), [n_lstm_steps, batch_size, dim_hidden]) # Linear activation is NOT REQUIRED!! # Get the last output. # print "outputs" # print len(outputs) # print outputs # Slicing the appropriate output vectors from the <outputs> # sliced_outputs = [tf.slice(outputs[break_points[i]-1], slicing_lengths[i][0], slicing_lengths[i][1]) for i in range(batch_size)] slicing_tensors = [ tf.squeeze(tsr) for tsr in tf.split(0, batch_size, slicing_tensors) ] # print "slicing_tensors", slicing_tensors[0].get_shape() sliced_outputs = [ tf.slice(outputs, begin=tensor, size=[1, 1, dim_hidden]) for tensor in slicing_tensors ] # for begin,size in slicing_lengths: # print tf.slice(outputs, begin, size) # return outputs[-1], lstm.state_size # State size to initialize the state # return tf.squeeze(tf.concat(0, sliced_outputs)), lstm.state_size return sliced_outputs, lstm.state_size
def __init__(self, config): self._config = config # Create placeholders for the input and the targets self._input_data = tf.placeholder( tf.int32, [config.batch_size, config.num_steps]) self._targets = tf.placeholder(tf.int32, [config.batch_size, config.num_steps]) self._actual_seq_lengths = tf.placeholder(tf.int32, [config.batch_size]) self._prediction = tf.placeholder( tf.int32, [config.batch_size, config.num_steps]) lstm_cell = tf.nn.rnn_cell.BasicLSTMCell( config.hidden_size) # Create a basic LSTM cell # Now replicate the LSTM cell to create layers for a deep network cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) # Make the initial state operator available self._initial_state = cell.zero_state(config.batch_size, tf.float32) # Map the inputs to their current embedding vectors # Embedding lookup must happen on the CPU as it is not currently supported on GPU with tf.device("/cpu:0"): embedding = tf.get_variable( "embedding", [config.num_songs, config.embedding_size]) inputs = tf.nn.embedding_lookup(embedding, self._input_data) inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, config.num_steps, inputs) ] outputs, state = rnn.rnn(cell, inputs, initial_state=self._initial_state, sequence_length=self._actual_seq_lengths) output = tf.reshape(tf.concat(1, outputs), [-1, config.hidden_size]) softmax_w = tf.get_variable("softmax_w", [config.hidden_size, config.num_songs]) softmax_b = tf.get_variable("softmax_b", [config.num_songs]) logits = tf.matmul(output, softmax_w) + softmax_b # Compute the cross-entropy loss of the sequence by comparing each prediction with each target loss = tf.nn.seq2seq.sequence_loss_by_example( [logits], [tf.reshape(self._targets, [-1])], [tf.ones([config.batch_size * config.num_steps])]) # Added prediction self._prediction = logits # Expose the cost and final_state self._cost = tf.reduce_sum(loss) / config.batch_size self._final_state = state