def model(input, vocab_size): # 构建随机的词向量矩阵 # tf.get_variable(name, shape, initializer): name变量的名称,shape变量的维度,initializer变量初始化的方式 embeddings = tf.get_variable("embeddings", [vocab_size, embedding_size], initializer=tf.truncated_normal_initializer) embedded = tf.nn.embedding_lookup(embeddings, input) # 将数据处理成LSTM的输入格式(时序) rnn_input = tf.unstack(embedded, max_document_length, axis=1, name="rnn-input") # 定义LSTM lstm_cell = BasicLSTMCell(20, forget_bias=1.0) rnn_outputs, rnn_states = static_rnn(lstm_cell, rnn_input, dtype=tf.float32) # predict logits = tf.layers.dense(rnn_outputs[-1], num_classes) predicted_labels = tf.argmax(logits, axis=1) return predicted_labels, [embeddings, embedded, lstm_cell, logits]
def biLSTM(x, hidden_size): # biLSTM�� # ���ܣ����bidirectional_lstm���� # ������ # x: [batch, height, width] / [batch, step, embedding_size] # hidden_size: lstm���ز�ڵ���� # ����� # output: [batch, height, 2*hidden_size] / [batch, step, 2*hidden_size] # input transformation input_x = tf.transpose(x, [1, 0, 2]) # input_x = tf.reshape(input_x, [-1, w]) # input_x = tf.split(0, h, input_x) input_x = tf.unstack(input_x) # define the forward and backward lstm cells # lstm_fw_cell = rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True) lstm_cell = rnn.BasicLSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True) # 修改的 # lstm_bw_cell = rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True) output, states = rnn.static_rnn(lstm_cell, input_x, dtype=tf.float32) # output transformation to the original tensor type output = tf.stack(output) output = tf.transpose(output, [1, 0, 2]) return output
def recurrent_neural_network(x): # create a layer of rnn_size layer = { 'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases': tf.Variable(tf.random_normal([n_classes])) } # RNN TAKES IN SPECIFIC DATA STRUCTURE, THEREFORE RESHAPE x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, chunk_size]) # x = tf.split(0, n_chunks, x) x = tf.split(x, n_chunks, 0) # CREATE A BASIC RNN CELL (LOOPS RNN_SIZE TIMES????) # LSTM IS HOW RNN FORGETS SOME INFO lstm_cell = rnn_cell.BasicLSTMCell(rnn_size) # GET THE OUTPUT OF RNN AND STATES (NOT SURE WHAT STATES IS...) # RNN PROBABLY HAS HIDDEN BACKGROUND FUNCTIONALITY outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # RETURN THE OUTPUT'S LAST MULTIPLED BY THE LAYER WEIGHTS then add biases output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] return output
def recurrent_neural_network(x): # Defining the layers and the output # weights: weights of each input going into a layer # biases: added after weights. what happens is we have # (input*weight)+biases. Biases will make sure a neuron may still fire if # all inputs are 0. # rnn_size: layer = { 'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases': tf.Variable(tf.random_normal([n_classes])) } # checknumpy's tranpose for details. Basically changes the shape (from e.g. # (1,2,3) to (2,1,3). debug print to figure out x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, chunk_size]) x = tf.split(x, n_chunks, 0) # rnn_cell is defined by tensorflow lstm_cell = rnn_cell.BasicLSTMCell(rnn_size, state_is_tuple=True) outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # output layer has no activation output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] return output
def recurrent_neural_network(x): #create dictionaries for hidden layer weights and biases layer = { 'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases': tf.Variable(tf.random_normal([n_classes])) } #formatting and modifying data ''' e.g., for 5x5 image x = np.ones((1, 5, 5)) = np.ones((None, n_chunks, chunk_size)) x = array([[ [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1] ]]) After transpose, swap 0th and 1st dimension, so x = np.ones((5, 1, 5)) = np.ones((n_chunks, None, chunk_size)) x = array([ [[1, 1, 1, 1, 1]], [[1, 1, 1, 1, 1]], [[1, 1, 1, 1, 1]], [[1, 1, 1, 1, 1]], [[1, 1, 1, 1, 1]] ]) After reshape, flatten by one dimension x = np.ones((5, 5)) = np.ones((n_chunks, chunk_size)) x = array([ [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1] ]) After split, split into 5 chunks/5 arrays x = [ array([[1, 1, 1, 1, 1]]), array([[1, 1, 1, 1, 1]]), array([[1, 1, 1, 1, 1]]), array([[1, 1, 1, 1, 1]]), array([[1, 1, 1, 1, 1]]) ] ''' x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, chunk_size]) x = tf.split(x, n_chunks, 0) #create long-short-term-memory cell lstm_cell = rnn_cell.BasicLSTMCell(rnn_size) outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) print("OUTPUTS:", outputs[-1]) output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] return output
def lstm_model(self, x, is_training): layer = {'weights': tf.Variable(tf.random_normal([self.rnn_size, 4])), 'biases': tf.Variable(tf.random_normal([4]))} outputs = [] x = tf.transpose(x, [1, 0, 2, 3]) x_ = tf.unstack(x) lstm_cells = [] i=0 for x_entry in x_: # x_hold = tf.transpose(x_entry[:,2], [2, 0, 1]) x_hold = tf.reshape(x_entry[:,:,2], [-1, self.chunk_size]) x_hold = tf.split(x_hold,self. n_chunks, 0) scope_name = 'lstm_'+self.name+str(i) with tf.variable_scope(scope_name): lstm_cell = rnn_cell.BasicLSTMCell(self.rnn_size, state_is_tuple=True) lstm_cells.append(lstm_cell) output, states = rnn.static_rnn(lstm_cells[-1], x_hold, dtype=tf.float32) # outputs =tf.reshape(outputs,[-1]) rnn_result = tf.matmul(output[-1], layer['weights']) + layer['biases'] # rnn_result = tf.layers.dense(output[-1],4) tf.layers.dropout(rnn_result,self.drop_rate, seed=232, training=is_training) rnn_result = tf.nn.tanh(rnn_result) i+=1 return rnn_result
def recurrent_neural_network(vector, n_classes, chunk_size, n_chunks): """Create the neural network model. Args: vector: Vector data n_classes: Number of classes vector_length: Length of vector making up the tensor Returns: output: Output """ # Initialize key variables rnn_size = 128 layer = {'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases': tf.Variable(tf.random_normal([n_classes]))} vector = tf.transpose(vector, [1, 0, 2]) vector = tf.reshape(vector, [-1, chunk_size]) vector = tf.split(vector, n_chunks, 0) lstm_cell = rnn_cell.BasicLSTMCell(rnn_size, state_is_tuple=True) outputs, states = rnn.static_rnn(lstm_cell, vector, dtype=tf.float32) output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] # Return return output
def reccurent_neural_network(x): layer = { 'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases': tf.Variable(tf.random_normal([n_classes])) } print(x) x = tf.transpose(x, [1, 0, 2]) print(x) x = tf.reshape(x, [-1, chunk_size]) # x = tf.split(0, n_chunks, x) x = tf.split(x, n_chunks, 0) print(x) # lstm_cell = rnn_cell.BasicLSTMCell(rnn_size) # outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) # outputs, states = tf.contrib.rnn.static_rnn(lstm_cell, x, dtype=tf.float32) lstm_cell = rnn_cell.BasicLSTMCell(rnn_size, state_is_tuple=True) outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] print(x) return output
def testDynamicAttentionDecoderStateIsTuple(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): cell_fn = lambda: rnn_cell.MultiRNNCell( # pylint: disable=g-long-lambda cells=[rnn_cell.BasicLSTMCell(2) for _ in range(2)]) cell = cell_fn() inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 enc_outputs, enc_state = rnn.static_rnn(cell, inp, dtype=dtypes.float32) attn_states = array_ops.concat([ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs ], 1) dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3 # Use a new cell instance since the attention decoder uses a # different variable scope. dec, mem = seq2seq_lib.attention_decoder( dec_inp, enc_state, attn_states, cell_fn(), output_size=4) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 4), res[0].shape) res = sess.run([mem]) self.assertEqual(2, len(res[0])) self.assertEqual((2, 2), res[0][0].c.shape) self.assertEqual((2, 2), res[0][0].h.shape) self.assertEqual((2, 2), res[0][1].c.shape) self.assertEqual((2, 2), res[0][1].h.shape)
def testAttentionDecoder2(self): with self.cached_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): cell_fn = lambda: rnn_cell.GRUCell(2) cell = cell_fn() inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 enc_outputs, enc_state = rnn.static_rnn(cell, inp, dtype=dtypes.float32) attn_states = array_ops.concat([ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs ], 1) dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3 # Use a new cell instance since the attention decoder uses a # different variable scope. dec, mem = seq2seq_lib.attention_decoder( dec_inp, enc_state, attn_states, cell_fn(), output_size=4, num_heads=2) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 4), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].shape)
def recurrent_neural_network(self, x): layer = { 'weights': tf.Variable(tf.random_normal([self.rnn_size, self.n_classes])), 'biases': tf.Variable(tf.random_normal([self.n_classes])) } # layer = {'weights':tf.Variable(np.random.normal(size=(rnn_size,n_classes)).astype('float32')), # 'biases':tf.Variable(tf.random_normal([n_classes]))} x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, self.chunk_size]) x = tf.split(x, self.seq_len, 0) lstm_cells = [] for _ in range(self.num_layers): cell = tf.contrib.rnn.BasicRNNCell(self.rnn_size) if self.attention: cell = tf.contrib.rnn.AttentionCellWrapper(cell, self.seq_len) cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=1, output_keep_prob=1) lstm_cells.append( tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=1.0, output_keep_prob=1.0)) multi_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells) outputs, states = rnn.static_rnn(multi_cell, x, dtype=tf.float32) output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] # softmax layer return output
def benchmarkTfRNNLSTMTraining(self): test_configs = self._GetTestConfig() for config_name, config in test_configs.items(): num_layers = config["num_layers"] num_units = config["num_units"] batch_size = config["batch_size"] seq_length = config["seq_length"] with ops.Graph().as_default(), ops.device("/cpu"): inputs = seq_length * [ array_ops.zeros([batch_size, num_units], dtypes.float32) ] initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=127) cell = rnn_cell.LSTMCell( num_units=num_units, initializer=initializer, state_is_tuple=True) multi_cell = rnn_cell.MultiRNNCell( [cell() for _ in range(num_layers)]) outputs, final_state = rnn.static_rnn( multi_cell, inputs, dtype=dtypes.float32) trainable_variables = ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) gradients = gradients_impl.gradients([outputs, final_state], trainable_variables) training_op = control_flow_ops.group(*gradients) self._BenchmarkOp(training_op, "tf_rnn_lstm %s %s" % (config_name, self._GetConfigDesc(config)))
def benchmarkTfRNNLSTMBlockCellTraining(self): test_configs = self._GetTestConfig() for config_name, config in test_configs.items(): num_layers = config["num_layers"] num_units = config["num_units"] batch_size = config["batch_size"] seq_length = config["seq_length"] with ops.Graph().as_default(), ops.device("/cpu"): inputs = seq_length * [ array_ops.zeros([batch_size, num_units], dtypes.float32) ] cell = lambda: lstm_ops.LSTMBlockCell(num_units=num_units) # pylint: disable=cell-var-from-loop multi_cell = rnn_cell.MultiRNNCell( [cell() for _ in range(num_layers)]) outputs, final_state = rnn.static_rnn( multi_cell, inputs, dtype=dtypes.float32) trainable_variables = ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) gradients = gradients_impl.gradients([outputs, final_state], trainable_variables) training_op = control_flow_ops.group(*gradients) self._BenchmarkOp(training_op, "tf_rnn_lstm_block_cell %s %s" % (config_name, self._GetConfigDesc(config)))
def testEmbeddingAttentionDecoder(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 cell_fn = lambda: rnn_cell.GRUCell(2) cell = cell_fn() enc_outputs, enc_state = rnn.static_rnn(cell, inp, dtype=dtypes.float32) attn_states = array_ops.concat([ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs ], 1) dec_inp = [ constant_op.constant( i, dtypes.int32, shape=[2]) for i in range(3) ] # Use a new cell instance since the attention decoder uses a # different variable scope. dec, mem = seq2seq_lib.embedding_attention_decoder( dec_inp, enc_state, attn_states, cell_fn(), num_symbols=4, embedding_size=2, output_size=3) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 3), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].shape)
def testEmbeddingRNNDecoder(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 cell_fn = lambda: rnn_cell.BasicLSTMCell(2) cell = cell_fn() _, enc_state = rnn.static_rnn(cell, inp, dtype=dtypes.float32) dec_inp = [ constant_op.constant( i, dtypes.int32, shape=[2]) for i in range(3) ] # Use a new cell instance since the attention decoder uses a # different variable scope. dec, mem = seq2seq_lib.embedding_rnn_decoder( dec_inp, enc_state, cell_fn(), num_symbols=4, embedding_size=2) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 2), res[0].shape) res = sess.run([mem]) self.assertEqual(1, len(res)) self.assertEqual((2, 2), res[0].c.shape) self.assertEqual((2, 2), res[0].h.shape)
def testDynamicAttentionDecoderStateIsTuple(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): cell_fn = lambda: rnn_cell.MultiRNNCell( # pylint: disable=g-long-lambda cells=[rnn_cell.BasicLSTMCell(2) for _ in range(2)]) cell = cell_fn() inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 enc_outputs, enc_state = rnn.static_rnn(cell, inp, dtype=dtypes.float32) attn_states = array_ops.concat([ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs ], 1) dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3 # Use a new cell instance since the attention decoder uses a # different variable scope. dec, mem = seq2seq_lib.attention_decoder( dec_inp, enc_state, attn_states, cell_fn(), output_size=4) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 4), res[0].shape) res = sess.run([mem]) self.assertEqual(2, len(res[0])) self.assertEqual((2, 2), res[0][0].c.shape) self.assertEqual((2, 2), res[0][0].h.shape) self.assertEqual((2, 2), res[0][1].c.shape) self.assertEqual((2, 2), res[0][1].h.shape)
def __call__(self, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): is_list = isinstance(inputs, list) if self._use_dynamic_rnn: if is_list: inputs = array_ops.stack(inputs) outputs, state = rnn.dynamic_rnn(self._cell, inputs, sequence_length=sequence_length, initial_state=initial_state, dtype=dtype, time_major=True, scope=scope) if is_list: # Convert outputs back to list outputs = array_ops.unstack(outputs) else: # non-dynamic rnn if not is_list: inputs = array_ops.unstack(inputs) outputs, state = rnn.static_rnn(self._cell, inputs, initial_state=initial_state, dtype=dtype, sequence_length=sequence_length, scope=scope) if not is_list: # Convert outputs back to tensor outputs = array_ops.stack(outputs) return outputs, state
def recurrent_neural_network(vector, n_classes, chunk_size, n_chunks): """Create the neural network model. Args: vector: Vector data n_classes: Number of classes vector_length: Length of vector making up the tensor Returns: output: Output """ # Initialize key variables rnn_size = 128 layer = { 'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases': tf.Variable(tf.random_normal([n_classes])) } vector = tf.transpose(vector, [1, 0, 2]) vector = tf.reshape(vector, [-1, chunk_size]) vector = tf.split(vector, n_chunks, 0) lstm_cell = rnn_cell.BasicLSTMCell(rnn_size, state_is_tuple=True) outputs, states = rnn.static_rnn(lstm_cell, vector, dtype=tf.float32) output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] # Return return output
def build_graph(parameters): """Build a simple graph with BasicLSTMCell.""" num_batchs = parameters["num_batchs"] time_step_size = parameters["time_step_size"] input_vec_size = parameters["input_vec_size"] num_cells = parameters["num_cells"] inputs_after_split = [] for i in xrange(time_step_size): one_timestamp_input = tf.compat.v1.placeholder( dtype=parameters["dtype"], name="split_{}".format(i), shape=[num_batchs, input_vec_size]) inputs_after_split.append(one_timestamp_input) # Currently lstm identifier has a few limitations: only supports # forget_bias == 0, inner state activation == tanh. # TODO(zhixianyan): Add another test with forget_bias == 1. # TODO(zhixianyan): Add another test with relu as activation. lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_cells, forget_bias=0.0, state_is_tuple=True) cell_outputs, _ = rnn.static_rnn(lstm_cell, inputs_after_split, dtype=tf.float32) out = cell_outputs[-1] return inputs_after_split, [out]
def testEmbeddingRNNDecoder(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 cell_fn = lambda: rnn_cell.BasicLSTMCell(2) cell = cell_fn() _, enc_state = rnn.static_rnn(cell, inp, dtype=dtypes.float32) dec_inp = [ constant_op.constant( i, dtypes.int32, shape=[2]) for i in range(3) ] # Use a new cell instance since the attention decoder uses a # different variable scope. dec, mem = seq2seq_lib.embedding_rnn_decoder( dec_inp, enc_state, cell_fn(), num_symbols=4, embedding_size=2) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 2), res[0].shape) res = sess.run([mem]) self.assertEqual(1, len(res)) self.assertEqual((2, 2), res[0].c.shape) self.assertEqual((2, 2), res[0].h.shape)
def GRURNN(x, weights, biases): x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, nInput]) x = tf.split(x, nSteps, 0) lstmCell = rnn_cell.GRUCell(nHidden) outputs, states = rnn.static_rnn(lstmCell, x, dtype=tf.float32) return tf.matmul(outputs[-1], weights['out']) + biases['out']
def testEmbeddingAttentionDecoder(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 cell_fn = lambda: rnn_cell.GRUCell(2) cell = cell_fn() enc_outputs, enc_state = rnn.static_rnn(cell, inp, dtype=dtypes.float32) attn_states = array_ops.concat([ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs ], 1) dec_inp = [ constant_op.constant( i, dtypes.int32, shape=[2]) for i in range(3) ] # Use a new cell instance since the attention decoder uses a # different variable scope. dec, mem = seq2seq_lib.embedding_attention_decoder( dec_inp, enc_state, attn_states, cell_fn(), num_symbols=4, embedding_size=2, output_size=3) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 3), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].shape)
def recurrent_neural_network_model(self, data, mode): lstm_cell = rnn_cell.BasicLSTMCell(self.rnn_chunk_size, state_is_tuple=True) print("Transposing input data to split data") print("Input data shape: ", str(data[self.input_feature_tag])) x_rnn = tf.transpose(data[self.input_feature_tag], [1, 0, 2]) print("Transposed data shape: ", str(x_rnn)) x_rnn = tf.reshape(x_rnn, [-1, self.rnn_chunk_size]) print("Reshaped data shape: ", str(x_rnn)) x_rnn = tf.split(x_rnn, self.rnn_chunks, 0) print("Split data shape: ", str(x_rnn)) outputs, states = rnn.static_rnn(lstm_cell, x_rnn, dtype=tf.float32) dense = tf.layers.dense(inputs=outputs[-1], units=self.dense_units, activation=self.dense_activation) dense_dropout = tf.layers.dropout( inputs=dense, rate=self.dense_dropout, training=mode == tf.estimator.ModeKeys.TRAIN) output = tf.layers.dense(inputs=dense_dropout, units=self.output_classes) predictions = { # Generate predictions (for PREDICT and EVAL mode) self.prediction_class_key: tf.argmax(input=output, axis=1), # Add `softmax_tensor` to the graph. It is used for PREDICT and by the # `logging_hook`. self.prediction_probability_key: tf.nn.softmax(output, name=self.prediction_probability_name) } return output, predictions
def build_graph(parameters): """Build a simple graph with BasicLSTMCell.""" num_batches = parameters["num_batches"] time_step_size = parameters["time_step_size"] input_vec_size = parameters["input_vec_size"] num_cells = parameters["num_cells"] inputs_after_split = [] for i in range(time_step_size): one_timestamp_input = tf.placeholder( dtype=parameters["dtype"], name="split_{}".format(i), shape=[num_batches, input_vec_size]) inputs_after_split.append(one_timestamp_input) lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_cells, activation=tf.nn.relu, state_is_tuple=True) sequence_length = None if parameters["use_sequence_length"]: # Using different sequence length in each bach, like [1, 2, 3, 3...]. sequence_length = [ min(i + 1, time_step_size) for i in range(num_batches) ] cell_outputs, _ = rnn.static_rnn(lstm_cell, inputs_after_split, dtype=tf.float32, sequence_length=sequence_length) out = cell_outputs[-1] return inputs_after_split, [out]
def _build(self, incoming, *args, **kwargs): """ Args: incoming: `Tensor`. 3-D Tensor [samples, timesteps, input dim]. """ self._declare_dependencies() sequence_length = None if self.dynamic: sequence_length = retrieve_seq_length_op(incoming if isinstance( incoming, tf.Tensor) else tf.stack(incoming)) input_shape = get_shape(incoming) inference = incoming # If a tensor given, convert it to a per timestep list if type(inference) not in [list, np.array]: ndim = len(input_shape) assert ndim >= 3, 'Input dim should be at least 3.' axes = [1, 0] + list(xrange(2, ndim)) inference = tf.transpose(inference, (axes)) inference = tf.unstack(value=inference) if self.dynamic: outputs, state = tf.nn.dynamic_rnn( cell=self._cell, inputs=inference, dtype=tf.float32, initial_state=self.initial_state, sequence_length=sequence_length, scope=self.module_name) else: outputs, state = rnn.static_rnn(cell=self._cell, inputs=inference, dtype=tf.float32, initial_state=self.initial_state, sequence_length=sequence_length, scope=self.module_name) for v in [self._cell.w, self._cell.b]: if hasattr(v, '__len__'): for var in v: track(var, tf.GraphKeys.LAYER_VARIABLES, self.module_name) else: track(v, tf.GraphKeys.LAYER_VARIABLES, self.module_name) track(outputs[-1], tf.GraphKeys.ACTIVATIONS, self.module_name) if self.dynamic: if self.return_seq: o = outputs else: outputs = tf.transpose(tf.stack(outputs), [1, 0, 2]) o = advanced_indexing_op(outputs, sequence_length) else: o = outputs if self.return_seq else outputs[-1] track(o, tf.GraphKeys.LAYER_TENSOR, self.module_name) return (o, state) if self.return_state else o
def recurrent_neural_network(x, weights, biases): # Unstack to get a list of 'n_steps' tensors of shape (batch_size, chunk_size) x = tf.unstack(x, n_steps, 1) lstm_cell = tf.contrib.rnn.core_rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['w']) + biases['b']
def rnn_model(x, weights, biases): """RNN (LSTM or GRU) model for image""" x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, n_input]) x = tf.split(x, n_steps, 0) lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) return tf.matmul(outputs[-1], weights) + biases
def model(self, data): data = tf.transpose(data, (1, 0, 2)) data = tf.reshape(data, (-1, self.chunk_size)) data = tf.split(data, self.n_chunks, 0) outputs, _ = rnn.static_rnn(self.lstm_cell, data, dtype=tf.float32) return tf.add(tf.matmul(outputs[-1], self.layer['weights']), self.layer['biases'])
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False, beam_search=True, beam_size=10): with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = copy.deepcopy(cell) encoder_cell = core_rnn_cell.EmbeddingWrapper( encoder_cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) # Decoder. output_size = None if output_projection is None: cell = core_rnn_cell.OutputProjectionWrapper( cell, num_decoder_symbols) output_size = num_decoder_symbols return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention, beam_search=beam_search, beam_size=beam_size)
def lstm(x): n_classes = 10 rnn_size = 4 layer = {'weights':tf.Variable(tf.random_normal([rnn_size,n_classes])), 'biases':tf.Variable(tf.random_normal([n_classes]))} x = tf.expand_dims(x, 2) lstm_cell = rnn_cell.BasicLSTMCell(rnn_size,state_is_tuple=True) outputs, states = rnn.static_rnn(lstm_cell, tf.unstack(tf.transpose(x, perm=[1, 0, 2])), dtype=tf.float32) output = tf.matmul(outputs[-1],layer['weights']) + layer['biases'] return output
def recurrent_neural_network(x): x = tf.transpose(x, [1,0,2]) x = tf.reshape(x, [-1, chunk_size]) x = tf.split(x, n_chunks, 0) l1 = tf.add(tf.matmul(x,hidden_1_layer['weight']), hidden_1_layer['bias']) l1 = tf.nn.relu(l1) l2 = tf.add(tf.matmul(l1,hidden_2_layer['weight']), hidden_2_layer['bias']) l2 = tf.nn.tanh(l2)#relu(l2) lstm_cell = rnn.BasicLSTMCell(rnn_size) outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) output = tf.matmul(outputs[-1],output_layer['weight']) + output_layer['bias']
def rnn_encoder(self, encoder_inputs, cell, dtype=dtypes.float32, scope=None): with tf.variable_scope(scope or "basic_rnn_seq2seq"): enc_cell = copy.deepcopy(cell) enc_output, enc_state = rnn.static_rnn(enc_cell, encoder_inputs, dtype=dtype) return enc_output, enc_state
def build_lstm_graph(self): """ Build the lstm graph without the input data :return: the graph """ tf.reset_default_graph() lstm_graph = tf.Graph() with lstm_graph.as_default(): self.xx = tf.placeholder('float32', [None, 1, self.n_features], name='features') self.yy = tf.placeholder('float32', name='labels') self.bins = tf.constant(self.bins, name='bins') with tf.name_scope("output_layer"): weight = tf.Variable(tf.random_normal( [self._lstm_size, self.n_labels]), name='weights') biases = tf.Variable(tf.random_normal([self.n_labels]), name='biases') x = tf.transpose(self.xx, [1, 0, 2]) x = tf.reshape(x, [-1, self.n_features]) x = tf.split(x, 1) lstm_cell = rnn_cell.LSTMCell(self._lstm_size, name='basic_lstm_cell') outputs, _ = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) logits = tf.add(tf.matmul(outputs[-1], weight), biases, name='rnn_model') tf.summary.histogram("last_lstm_output", outputs[-1]) tf.summary.histogram("weights", weight) tf.summary.histogram("biases", biases) with tf.name_scope("train"): correct = tf.equal(tf.argmax(logits, 1), tf.argmax(self.yy, 1)) accuracy = tf.reduce_mean(tf.cast(correct, 'float'), name='accuracy') loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=self.yy), name='loss') tf.train.AdamOptimizer().minimize( loss, name="loss_mse_adam_minimize") tf.summary.scalar("loss", loss) tf.summary.scalar("accuracy", accuracy) # Operators to use after restoring the model for op in [logits, loss]: tf.add_to_collection('ops_to_restore', op) return lstm_graph
def recurrent_neural_network(data): layer = {'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases': tf.Variable(tf.random_normal([n_classes]))} data = tf.transpose(data, [1, 0, 2]) data = tf.reshape(data, [-1, chunk_size]) data = tf.split(data, n_chunks, 0) lstm_cell = rnn_cell.BasicLSTMCell(rnn_size, state_is_tuple=True) outputs, states = rnn.static_rnn(lstm_cell, data, dtype=tf.float32) # (input_data * weights) + biases output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] return output
def _build(self, incoming, *args, **kwargs): """ Args: incoming: `Tensor`. 3-D Tensor [samples, timesteps, input dim]. """ self._declare_dependencies() sequence_length = kwargs.get('sequence_length') if self.dynamic and sequence_length is None: sequence_length = retrieve_seq_length_op( incoming if isinstance(incoming, tf.Tensor) else tf.stack(incoming)) input_shape = get_shape(incoming) inference = incoming # If a static rnn and tensor given, convert it to a per timestep list if type(inference) not in [list, np.array] and not self.dynamic: ndim = len(input_shape) assert ndim >= 3, 'Input dim should be at least 3.' axes = [1, 0] + list(xrange(2, ndim)) inference = tf.transpose(inference, axes) inference = tf.unstack(value=inference) if self.dynamic: outputs, state = tf.nn.dynamic_rnn( cell=self._cell, inputs=inference, dtype=tf.float32, initial_state=self.initial_state, sequence_length=sequence_length, scope=self.module_name) else: outputs, state = rnn.static_rnn( cell=self._cell, inputs=inference, dtype=tf.float32, initial_state=self.initial_state, sequence_length=sequence_length, scope=self.module_name) for v in [self._cell.w, self._cell.b]: if hasattr(v, '__len__'): for var in v: track(var, tf.GraphKeys.LAYER_VARIABLES, self.module_name) else: track(v, tf.GraphKeys.LAYER_VARIABLES, self.module_name) track(outputs[-1], tf.GraphKeys.ACTIVATIONS, self.module_name) if self.dynamic: if self.return_seq: o = outputs else: o = get_sequence_relevant_output(outputs, sequence_length) else: o = outputs if self.return_seq else outputs[-1] track(o, tf.GraphKeys.LAYER_TENSOR, self.module_name) return (o, state) if self.return_state else o
def neural_network(x): layer = {'weights' : tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases' : tf.Variable(tf.random_normal([n_classes]))} x = tf.transpose(x, [1,0,2]) x = tf.reshape(x, [-1, chunk_size]) x = tf.split(x, n_chunks) lstm = rnn_cell.BasicLSTMCell(rnn_size) outputs, states = rnn.static_rnn(lstm, x, dtype = tf.float32) output = tf.add(tf.matmul(outputs[-1], layer['weights']), layer['biases'], name = 'output') return output
def testRNNDecoder(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 _, enc_state = rnn.static_rnn( rnn_cell.GRUCell(2), inp, dtype=dtypes.float32) dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3 cell = core_rnn_cell.OutputProjectionWrapper(rnn_cell.GRUCell(2), 4) dec, mem = seq2seq_lib.rnn_decoder(dec_inp, enc_state, cell) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 4), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].shape)
def _build_lstm_model(self, number_of_layers): batch_size = 8 dim = 10 inputs = variables.Variable(random_ops.random_normal([batch_size, dim])) def lstm_cell(): return rnn_cells.MaskedBasicLSTMCell( dim, forget_bias=0.0, state_is_tuple=True, reuse=False) cell = tf_rnn_cells.MultiRNNCell( [lstm_cell() for _ in range(number_of_layers)], state_is_tuple=True) outputs = rnn.static_rnn( cell, [inputs], initial_state=cell.zero_state(batch_size, dtypes.float32)) return outputs
def _half_seq_len_vs_unroll_half_rnn_benchmark(inputs_list_t, sequence_length): (_, input_size) = inputs_list_t[0].get_shape().as_list() initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=127) cell = rnn_cell_impl.LSTMCell( num_units=input_size, use_peepholes=True, initializer=initializer, state_is_tuple=False) outputs, final_state = rnn.static_rnn( cell, inputs_list_t, sequence_length=sequence_length, dtype=dtypes.float32) trainable_variables = ops_lib.get_collection( ops_lib.GraphKeys.TRAINABLE_VARIABLES) gradients = gradients_impl.gradients(outputs + [final_state], trainable_variables) return control_flow_ops.group(final_state, *(gradients + outputs))
def testGrid3LSTMCellReLUWithRNN(self): batch_size = 3 input_size = 5 max_length = 6 # unrolled up to this length num_units = 2 with variable_scope.variable_scope( 'root', initializer=init_ops.constant_initializer(0.5)): cell = grid_rnn_cell.Grid3LSTMCell( num_units=num_units, non_recurrent_fn=nn_ops.relu) inputs = max_length * [ array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size)) ] outputs, state = rnn.static_rnn(cell, inputs, dtype=dtypes.float32) self.assertEqual(len(outputs), len(inputs)) self.assertEqual(state[0].c.get_shape(), (batch_size, 2)) self.assertEqual(state[0].h.get_shape(), (batch_size, 2)) self.assertEqual(state[1].c.get_shape(), (batch_size, 2)) self.assertEqual(state[1].h.get_shape(), (batch_size, 2)) for out, inp in zip(outputs, inputs): self.assertEqual(len(out), 1) self.assertEqual(out[0].get_shape()[0], inp.get_shape()[0]) self.assertEqual(out[0].get_shape()[1], num_units) self.assertEqual(out[0].dtype, inp.dtype) with self.cached_session() as sess: sess.run(variables.global_variables_initializer()) input_value = np.ones((batch_size, input_size)) values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value}) for tp in values[:-1]: for v in tp: self.assertTrue(np.all(np.isfinite(v))) for tp in values[-1]: for st in tp: for v in st: self.assertTrue(np.all(np.isfinite(v)))
def testGrid1LSTMCellWithRNN(self): batch_size = 3 input_size = 5 max_length = 6 # unrolled up to this length num_units = 2 with variable_scope.variable_scope( 'root', initializer=init_ops.constant_initializer(0.5)): cell = grid_rnn_cell.Grid1LSTMCell(num_units=num_units) # for 1-LSTM, we only feed the first step inputs = ([ array_ops.placeholder( dtypes.float32, shape=(batch_size, input_size)) ] + (max_length - 1) * [array_ops.zeros([batch_size, input_size])]) outputs, state = rnn.static_rnn(cell, inputs, dtype=dtypes.float32) self.assertEqual(len(outputs), len(inputs)) self.assertEqual(state[0].c.get_shape(), (batch_size, 2)) self.assertEqual(state[0].h.get_shape(), (batch_size, 2)) for out, inp in zip(outputs, inputs): self.assertEqual(len(out), 1) self.assertEqual(out[0].get_shape(), (3, num_units)) self.assertEqual(out[0].dtype, inp.dtype) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) input_value = np.ones((batch_size, input_size)) values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value}) for tp in values[:-1]: for v in tp: self.assertTrue(np.all(np.isfinite(v))) for tp in values[-1]: for st in tp: for v in st: self.assertTrue(np.all(np.isfinite(v)))
def testCompatibleNames(self): with self.test_session(use_gpu=True, graph=ops.Graph()): cell = rnn_cell.LSTMCell(10) pcell = rnn_cell.LSTMCell(10, use_peepholes=True) inputs = [array_ops.zeros([4, 5])] * 6 rnn.static_rnn(cell, inputs, dtype=dtypes.float32, scope="basic") rnn.static_rnn(pcell, inputs, dtype=dtypes.float32, scope="peephole") basic_names = { v.name: v.get_shape() for v in variables.trainable_variables() } with self.test_session(use_gpu=True, graph=ops.Graph()): cell = lstm_ops.LSTMBlockCell(10) pcell = lstm_ops.LSTMBlockCell(10, use_peephole=True) inputs = [array_ops.zeros([4, 5])] * 6 rnn.static_rnn(cell, inputs, dtype=dtypes.float32, scope="basic") rnn.static_rnn(pcell, inputs, dtype=dtypes.float32, scope="peephole") block_names = { v.name: v.get_shape() for v in variables.trainable_variables() } with self.test_session(use_gpu=True, graph=ops.Graph()): cell = lstm_ops.LSTMBlockFusedCell(10) pcell = lstm_ops.LSTMBlockFusedCell(10, use_peephole=True) inputs = array_ops.stack([array_ops.zeros([4, 5])] * 6) cell(inputs, dtype=dtypes.float32, scope="basic/lstm_cell") pcell(inputs, dtype=dtypes.float32, scope="peephole/lstm_cell") fused_names = { v.name: v.get_shape() for v in variables.trainable_variables() } self.assertEqual(basic_names, block_names) self.assertEqual(basic_names, fused_names)
def testStaticRNNWithKerasSimpleRNNCell(self): with self.cached_session() as sess: input_shape = 10 output_shape = 5 timestep = 4 batch = 100 (x_train, y_train), _ = testing_utils.get_test_data( train_samples=batch, test_samples=0, input_shape=(timestep, input_shape), num_classes=output_shape) x_train = np.transpose(x_train, (1, 0, 2)) y_train = keras.utils.to_categorical(y_train) cell = keras.layers.SimpleRNNCell(output_shape) inputs = [array_ops.placeholder( dtypes.float32, shape=(None, input_shape))] * timestep predict = array_ops.placeholder( dtypes.float32, shape=(None, output_shape)) outputs, state = rnn.static_rnn( cell, inputs, dtype=dtypes.float32) self.assertEqual(len(outputs), timestep) self.assertEqual(outputs[0].shape.as_list(), [None, output_shape]) self.assertEqual(state.shape.as_list(), [None, output_shape]) loss = losses.softmax_cross_entropy(predict, state) train_op = training.GradientDescentOptimizer(0.001).minimize(loss) sess.run([variables_lib.global_variables_initializer()]) feed_dict = {i: d for i, d in zip(inputs, x_train)} feed_dict[predict] = y_train _, outputs, state = sess.run( [train_op, outputs, state], feed_dict) self.assertEqual(len(outputs), timestep) self.assertEqual(len(outputs[0]), batch) self.assertEqual(len(state), batch)
def testGrid2LSTMCellWithRNNAndDynamicBatchSize(self): """Test for #4296.""" input_size = 5 max_length = 6 # unrolled up to this length num_units = 2 with variable_scope.variable_scope( 'root', initializer=init_ops.constant_initializer(0.5)): cell = grid_rnn_cell.Grid2LSTMCell(num_units=num_units) inputs = max_length * [ array_ops.placeholder(dtypes.float32, shape=(None, input_size)) ] outputs, state = rnn.static_rnn(cell, inputs, dtype=dtypes.float32) self.assertEqual(len(outputs), len(inputs)) for out, inp in zip(outputs, inputs): self.assertEqual(len(out), 1) self.assertTrue(out[0].get_shape()[0].value is None) self.assertEqual(out[0].get_shape()[1], num_units) self.assertEqual(out[0].dtype, inp.dtype) with self.cached_session() as sess: sess.run(variables.global_variables_initializer()) input_value = np.ones((3, input_size)) values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value}) for tp in values[:-1]: for v in tp: self.assertTrue(np.all(np.isfinite(v))) for tp in values[-1]: for st in tp: for v in st: self.assertTrue(np.all(np.isfinite(v)))
def __call__(self, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): is_list = isinstance(inputs, list) if self._use_dynamic_rnn: if is_list: inputs = array_ops.stack(inputs) outputs, state = rnn.dynamic_rnn( self._cell, inputs, sequence_length=sequence_length, initial_state=initial_state, dtype=dtype, time_major=True, scope=scope) if is_list: # Convert outputs back to list outputs = array_ops.unstack(outputs) else: # non-dynamic rnn if not is_list: inputs = array_ops.unstack(inputs) outputs, state = rnn.static_rnn( self._cell, inputs, initial_state=initial_state, dtype=dtype, sequence_length=sequence_length, scope=scope) if not is_list: # Convert outputs back to tensor outputs = array_ops.stack(outputs) return outputs, state
def testLSTMBasicToBlockPeeping(self): with self.test_session(use_gpu=True) as sess: batch_size = 2 input_size = 3 cell_size = 4 sequence_length = 5 inputs = [] for _ in range(sequence_length): inp = ops.convert_to_tensor( np.random.randn(batch_size, input_size), dtype=dtypes.float32) inputs.append(inp) initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=19890212) with variable_scope.variable_scope("basic", initializer=initializer): cell = rnn_cell.LSTMCell( cell_size, use_peepholes=True, state_is_tuple=True) outputs, state = rnn.static_rnn(cell, inputs, dtype=dtypes.float32) sess.run([variables.global_variables_initializer()]) basic_outputs, basic_state = sess.run([outputs, state[0]]) basic_grads = sess.run(gradients_impl.gradients(outputs, inputs)) basic_wgrads = sess.run( gradients_impl.gradients(outputs, variables.trainable_variables())) with variable_scope.variable_scope("block", initializer=initializer): w = variable_scope.get_variable( "w", shape=[input_size + cell_size, cell_size * 4], dtype=dtypes.float32) b = variable_scope.get_variable( "b", shape=[cell_size * 4], dtype=dtypes.float32, initializer=init_ops.zeros_initializer()) wci = variable_scope.get_variable( "wci", shape=[cell_size], dtype=dtypes.float32) wcf = variable_scope.get_variable( "wcf", shape=[cell_size], dtype=dtypes.float32) wco = variable_scope.get_variable( "wco", shape=[cell_size], dtype=dtypes.float32) _, _, _, _, _, _, outputs = block_lstm( ops.convert_to_tensor( sequence_length, dtype=dtypes.int64), inputs, w, b, wci=wci, wcf=wcf, wco=wco, cell_clip=0, use_peephole=True) sess.run([variables.global_variables_initializer()]) block_outputs = sess.run(outputs) block_grads = sess.run(gradients_impl.gradients(outputs, inputs)) block_wgrads = sess.run( gradients_impl.gradients(outputs, [w, b, wci, wcf, wco])) self.assertAllClose(basic_outputs, block_outputs) self.assertAllClose(basic_grads, block_grads) for basic, block in zip(basic_wgrads, block_wgrads): self.assertAllClose(basic, block, rtol=1e-2, atol=1e-2) with variable_scope.variable_scope("fused", initializer=initializer): cell = lstm_ops.LSTMBlockFusedCell( cell_size, cell_clip=0, use_peephole=True) outputs, state = cell(inputs, dtype=dtypes.float32) sess.run([variables.global_variables_initializer()]) fused_outputs, fused_state = sess.run([outputs, state[0]]) fused_grads = sess.run(gradients_impl.gradients(outputs, inputs)) fused_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused/") ] fused_wgrads = sess.run(gradients_impl.gradients(outputs, fused_vars)) self.assertAllClose(basic_outputs, fused_outputs) self.assertAllClose(basic_state, fused_state) self.assertAllClose(basic_grads, fused_grads) for basic, fused in zip(basic_wgrads, fused_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
def blocks_match(sess, use_peephole): batch_size = 2 input_size = 3 cell_size = 4 sequence_length = 4 inputs = [] for _ in range(sequence_length): inp = ops.convert_to_tensor( np.random.randn(batch_size, input_size), dtype=dtypes.float32) inputs.append(inp) stacked_inputs = array_ops.stack(inputs) initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=19890212) with variable_scope.variable_scope("test", initializer=initializer): # magic naming so that the cells pick up these variables and reuse them if use_peephole: wci = variable_scope.get_variable( "rnn/lstm_cell/w_i_diag", shape=[cell_size], dtype=dtypes.float32) wcf = variable_scope.get_variable( "rnn/lstm_cell/w_f_diag", shape=[cell_size], dtype=dtypes.float32) wco = variable_scope.get_variable( "rnn/lstm_cell/w_o_diag", shape=[cell_size], dtype=dtypes.float32) w = variable_scope.get_variable( "rnn/lstm_cell/kernel", shape=[input_size + cell_size, cell_size * 4], dtype=dtypes.float32) b = variable_scope.get_variable( "rnn/lstm_cell/bias", shape=[cell_size * 4], dtype=dtypes.float32, initializer=init_ops.zeros_initializer()) basic_cell = rnn_cell.LSTMCell( cell_size, use_peepholes=use_peephole, state_is_tuple=True, reuse=True) basic_outputs_op, basic_state_op = rnn.static_rnn( basic_cell, inputs, dtype=dtypes.float32) if use_peephole: _, _, _, _, _, _, block_outputs_op = block_lstm( ops.convert_to_tensor(sequence_length, dtype=dtypes.int64), inputs, w, b, wci=wci, wcf=wcf, wco=wco, cell_clip=0, use_peephole=True) else: _, _, _, _, _, _, block_outputs_op = block_lstm( ops.convert_to_tensor(sequence_length, dtype=dtypes.int64), inputs, w, b, cell_clip=0) fused_cell = lstm_ops.LSTMBlockFusedCell( cell_size, cell_clip=0, use_peephole=use_peephole, reuse=True, name="rnn/lstm_cell") fused_outputs_op, fused_state_op = fused_cell( stacked_inputs, dtype=dtypes.float32) sess.run([variables.global_variables_initializer()]) basic_outputs, basic_state = sess.run([basic_outputs_op, basic_state_op[0]]) basic_grads = sess.run(gradients_impl.gradients(basic_outputs_op, inputs)) xs = [w, b] if use_peephole: xs += [wci, wcf, wco] basic_wgrads = sess.run(gradients_impl.gradients(basic_outputs_op, xs)) block_outputs = sess.run(block_outputs_op) block_grads = sess.run(gradients_impl.gradients(block_outputs_op, inputs)) block_wgrads = sess.run(gradients_impl.gradients(block_outputs_op, xs)) xs = [w, b] if use_peephole: xs += [wci, wcf, wco] fused_outputs, fused_state = sess.run([fused_outputs_op, fused_state_op[0]]) fused_grads = sess.run(gradients_impl.gradients(fused_outputs_op, inputs)) fused_wgrads = sess.run(gradients_impl.gradients(fused_outputs_op, xs)) return (basic_state, fused_state, basic_outputs, block_outputs, fused_outputs, basic_grads, block_grads, fused_grads, basic_wgrads, block_wgrads, fused_wgrads)
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, enc_cell, dec_cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Warning: when output_projection is None, the size of the attention vectors and variables will be made proportional to num_decoder_symbols, can be large. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: tf.nn.rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. num_heads: Number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope( scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = enc_cell encoder_cell = core_rnn_cell.EmbeddingWrapper( encoder_cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.static_rnn( encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ array_ops.reshape(e, [-1, 1, encoder_cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) # Decoder. output_size = None if output_projection is None: dec_cell = core_rnn_cell.OutputProjectionWrapper(dec_cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return seq2seq.embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, dec_cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse): outputs, state = seq2seq.embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, dec_cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as( structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def testBasicRNNFusedWrapper(self): """This test checks that using a wrapper for BasicRNN works as expected.""" with self.cached_session() as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=19890212) cell = rnn_cell.BasicRNNCell(10) batch_size = 5 input_size = 20 timelen = 15 inputs = constant_op.constant( np.random.randn(timelen, batch_size, input_size)) with variable_scope.variable_scope("basic", initializer=initializer): unpacked_inputs = array_ops.unstack(inputs) outputs, state = rnn.static_rnn( cell, unpacked_inputs, dtype=dtypes.float64) packed_outputs = array_ops.stack(outputs) basic_vars = [ v for v in variables.trainable_variables() if v.name.startswith("basic/") ] sess.run([variables.global_variables_initializer()]) basic_outputs, basic_state = sess.run([packed_outputs, state]) basic_grads = sess.run(gradients_impl.gradients(packed_outputs, inputs)) basic_wgrads = sess.run( gradients_impl.gradients(packed_outputs, basic_vars)) with variable_scope.variable_scope( "fused_static", initializer=initializer): fused_cell = fused_rnn_cell.FusedRNNCellAdaptor( rnn_cell.BasicRNNCell(10)) outputs, state = fused_cell(inputs, dtype=dtypes.float64) fused_static_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused_static/") ] sess.run([variables.global_variables_initializer()]) fused_static_outputs, fused_static_state = sess.run([outputs, state]) fused_static_grads = sess.run(gradients_impl.gradients(outputs, inputs)) fused_static_wgrads = sess.run( gradients_impl.gradients(outputs, fused_static_vars)) self.assertAllClose(basic_outputs, fused_static_outputs) self.assertAllClose(basic_state, fused_static_state) self.assertAllClose(basic_grads, fused_static_grads) for basic, fused in zip(basic_wgrads, fused_static_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2) with variable_scope.variable_scope( "fused_dynamic", initializer=initializer): fused_cell = fused_rnn_cell.FusedRNNCellAdaptor( rnn_cell.BasicRNNCell(10), use_dynamic_rnn=True) outputs, state = fused_cell(inputs, dtype=dtypes.float64) fused_dynamic_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused_dynamic/") ] sess.run([variables.global_variables_initializer()]) fused_dynamic_outputs, fused_dynamic_state = sess.run([outputs, state]) fused_dynamic_grads = sess.run( gradients_impl.gradients(outputs, inputs)) fused_dynamic_wgrads = sess.run( gradients_impl.gradients(outputs, fused_dynamic_vars)) self.assertAllClose(basic_outputs, fused_dynamic_outputs) self.assertAllClose(basic_state, fused_dynamic_state) self.assertAllClose(basic_grads, fused_dynamic_grads) for basic, fused in zip(basic_wgrads, fused_dynamic_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
def testLSTMFusedSequenceLengths(self): """Verify proper support for sequence lengths in LSTMBlockFusedCell.""" with self.test_session(use_gpu=True) as sess: batch_size = 3 input_size = 4 cell_size = 5 max_sequence_length = 6 inputs = [] for _ in range(max_sequence_length): inp = ops.convert_to_tensor( np.random.randn(batch_size, input_size), dtype=dtypes.float32) inputs.append(inp) seq_lengths = constant_op.constant([3, 4, 5]) initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=19890213) with variable_scope.variable_scope("basic", initializer=initializer): cell = rnn_cell.BasicLSTMCell(cell_size, state_is_tuple=True) outputs, state = rnn.static_rnn( cell, inputs, dtype=dtypes.float32, sequence_length=seq_lengths) sess.run([variables.global_variables_initializer()]) basic_outputs, basic_state = sess.run([outputs, state[0]]) basic_grads = sess.run(gradients_impl.gradients(outputs, inputs)) basic_wgrads = sess.run( gradients_impl.gradients(outputs, variables.trainable_variables())) with variable_scope.variable_scope("fused", initializer=initializer): cell = lstm_ops.LSTMBlockFusedCell( cell_size, cell_clip=0, use_peephole=False) outputs, state = cell( inputs, dtype=dtypes.float32, sequence_length=seq_lengths) sess.run([variables.global_variables_initializer()]) fused_outputs, fused_state = sess.run([outputs, state[0]]) fused_grads = sess.run(gradients_impl.gradients(outputs, inputs)) fused_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused/") ] fused_wgrads = sess.run(gradients_impl.gradients(outputs, fused_vars)) self.assertAllClose(basic_outputs, fused_outputs) self.assertAllClose(basic_state, fused_state) self.assertAllClose(basic_grads, fused_grads) for basic, fused in zip(basic_wgrads, fused_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2) # Verify that state propagation works if we turn our sequence into # tiny (single-time) subsequences, i.e. unfuse the cell with variable_scope.variable_scope( "unfused", initializer=initializer) as vs: cell = lstm_ops.LSTMBlockFusedCell( cell_size, cell_clip=0, use_peephole=False) outputs = [] state = None for i, inp in enumerate(inputs): lengths = [int(i < l) for l in seq_lengths.eval()] output, state = cell( [inp], initial_state=state, dtype=dtypes.float32, sequence_length=lengths) vs.reuse_variables() outputs.append(output[0]) outputs = array_ops.stack(outputs) sess.run([variables.global_variables_initializer()]) unfused_outputs, unfused_state = sess.run([outputs, state[0]]) unfused_grads = sess.run(gradients_impl.gradients(outputs, inputs)) unfused_vars = [ v for v in variables.trainable_variables() if v.name.startswith("unfused/") ] unfused_wgrads = sess.run( gradients_impl.gradients(outputs, unfused_vars)) self.assertAllClose(basic_outputs, unfused_outputs) self.assertAllClose(basic_state, unfused_state) self.assertAllClose(basic_grads, unfused_grads) for basic, unfused in zip(basic_wgrads, unfused_wgrads): self.assertAllClose(basic, unfused, rtol=1e-2, atol=1e-2)