def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases): # input shape: (batch_size, n_steps, n_input) _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size # Reshape to prepare input to hidden activation _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input) # Linear activation _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] # Define lstm cells with tensorflow # Forward direction cell rnn_fw_cell = rnn_cell.BasicRNNCell(n_hidden) # Backward direction cell rnn_bw_cell = rnn_cell.BasicRNNCell(n_hidden) # Split data because rnn cell needs a list of inputs for the RNN inner loop _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden) # # Get lstm cell output outputs, final_fw, final_bw = rnn.bidirectional_rnn(rnn_fw_cell, rnn_bw_cell, _X, initial_state_fw=_istate_fw, initial_state_bw=_istate_bw) # # Linear activation # Get inner loop last output toreturn = [] for o in outputs: toreturn.append(tf.matmul(o, _weights['out']) + _biases['out']) return toreturn, final_fw, final_bw
def testBasicRNNCell(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) m = tf.zeros([1, 2]) g, _ = rnn_cell.BasicRNNCell(2)(x, m) sess.run([tf.variables.initialize_all_variables()]) res = sess.run([g], { x.name: np.array([[1., 1.]]), m.name: np.array([[0.1, 0.1]]) }) self.assertEqual(res[0].shape, (1, 2))
def __init__(self, vocabularySize, config_param): self.vocabularySize = vocabularySize self.config = config_param self._inputX = tf.placeholder( tf.int32, [self.config.batch_size, self.config.sequence_size], "InputsX") self._inputTargetsY = tf.placeholder( tf.int32, [self.config.batch_size, self.config.sequence_size], "InputTargetsY") #Converting Input in an Embedded form with tf.device( "/cpu:0"): #Tells Tensorflow what GPU to use specifically embedding = tf.get_variable( "embedding", [self.vocabularySize, self.config.embeddingSize]) embeddingLookedUp = tf.nn.embedding_lookup(embedding, self._inputX) inputs = tf.split(1, self.config.sequence_size, embeddingLookedUp) inputTensorsAsList = [tf.squeeze(input_, [1]) for input_ in inputs] #Define Tensor RNN singleRNNCell = rnn_cell.BasicRNNCell(self.config.hidden_size) self.multilayerRNN = rnn_cell.MultiRNNCell([singleRNNCell] * self.config.num_layers) self._initial_state = self.multilayerRNN.zero_state( self.config.batch_size, tf.float32) #Defining Logits hidden_layer_output, states = rnn.rnn( self.multilayerRNN, inputTensorsAsList, initial_state=self._initial_state) hidden_layer_output = tf.reshape(tf.concat(1, hidden_layer_output), [-1, self.config.hidden_size]) self._logits = tf.nn.xw_plus_b( hidden_layer_output, tf.get_variable("softmax_w", [self.config.hidden_size, self.vocabularySize]), tf.get_variable("softmax_b", [self.vocabularySize])) self._predictionSoftmax = tf.nn.softmax(self._logits) #Define the loss loss = seq2seq.sequence_loss_by_example( [self._logits], [tf.reshape(self._inputTargetsY, [-1])], [tf.ones([self.config.batch_size * self.config.sequence_size])], self.vocabularySize) self._cost = tf.div(tf.reduce_sum(loss), self.config.batch_size) self._final_state = states[-1]
x_data = np.array([[1, 0, 0, 0], #h [0, 1, 0, 0], #e [0, 0, 1, 0], #l [0, 0, 1, 0]], #l dtype='f') sample = [char_dic[c] for c in 'hello'] # to index #Configuration char_vocab_size = len(char_dic) rnn_size = char_vocab_size # 1 hot coding (one of 4) time_step_size = 4 #'hell' -> predict 'ello' batch_size = 1 #RNN model rnn_cell = rnn_cell.BasicRNNCell(rnn_size) state = tf.zeros([batch_size, rnn_cell.state_size]) X_split = tf.split(0, time_step_size, x_data) outputs, state = rnn.rnn(rnn_cell, X_split, state) # logits: list of 2D Tensors of shape [batch_size x num_decoder_symbols] # targets: list of 1D batch-sized int32 Tensors of the same length as logits # weights: list of 1D batch-sized float-Tensors of the same length as logits logits = tf.reshape(tf.concat(1, outputs), [-1, rnn_size]) targets = tf.reshape(sample[1:], [-1]) weights = tf.ones([time_step_size * batch_size]) loss = tf.nn.seq2seq.sequence_loss_by_example([logits], [targets], [weights]) cost = tf.reduce_sum(loss) / batch_size train_op = tf.train.RMSPropOptimizer(0.01, 0.9).minimize(cost)
#!/usr/bin/python3 # coding: utf-8 # 目前TF的RNN APIs主要集中在tensorflow.models.rnn中的rnn和rnn_cell两个模块。 # 其中,后者定义了一些常用的RNN cells,包括RNN和优化的LSTM、GRU等等;前者则提供了一些helper方法。 # 创建一个基础的RNN很简单: from tensorflow.models.rnn import rnn_cell cell = rnn_cell.BasicRNNCell(inputs, state) # 创建一个LSTM或者GRU的cell? cell = rnn_cell.BasicLSTMCell(num_units) #最最基础的,不带peephole。 cell = rnn_cell.LSTMCell(num_units, input_size) #可以设置peephole等属性。 cell = rnn_cell.GRUCell(num_units) # 调用呢? output, state = cell(input, state) # 这样自己按timestep调用需要设置variable_scope的reuse属性为True,懒人怎么做,TF也给想好了: state = cell.zero_state(batch_size, dtype=tf.float32) outputs, states = rnn.rnn(cell, inputs, initial_state=state) # 再懒一点: outputs, states = rnn.rnn(cell, inputs, dtype=tf.float32) # 怕overfit,加个Dropout如何? cell = rnn_cell.DropoutWrapper(cell, input_keep_prob=0.5, output_keep_prob=0.5) # 做个三层的带Dropout的网络? cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=0.5)