def testTimeReversedFusedRNN(self): with self.cached_session() as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=19890213) fw_cell = rnn_cell.BasicRNNCell(10) bw_cell = rnn_cell.BasicRNNCell(10) batch_size = 5 input_size = 20 timelen = 15 inputs = constant_op.constant( np.random.randn(timelen, batch_size, input_size)) # test bi-directional rnn with variable_scope.variable_scope("basic", initializer=initializer): unpacked_inputs = array_ops.unstack(inputs) outputs, fw_state, bw_state = rnn.static_bidirectional_rnn( fw_cell, bw_cell, unpacked_inputs, dtype=dtypes.float64) packed_outputs = array_ops.stack(outputs) basic_vars = [ v for v in variables.trainable_variables() if v.name.startswith("basic/") ] sess.run([variables.global_variables_initializer()]) basic_outputs, basic_fw_state, basic_bw_state = sess.run( [packed_outputs, fw_state, bw_state]) basic_grads = sess.run(gradients_impl.gradients(packed_outputs, inputs)) basic_wgrads = sess.run( gradients_impl.gradients(packed_outputs, basic_vars)) with variable_scope.variable_scope("fused", initializer=initializer): fused_cell = fused_rnn_cell.FusedRNNCellAdaptor( rnn_cell.BasicRNNCell(10)) fused_bw_cell = fused_rnn_cell.TimeReversedFusedRNN( fused_rnn_cell.FusedRNNCellAdaptor(rnn_cell.BasicRNNCell(10))) fw_outputs, fw_state = fused_cell( inputs, dtype=dtypes.float64, scope="fw") bw_outputs, bw_state = fused_bw_cell( inputs, dtype=dtypes.float64, scope="bw") outputs = array_ops.concat([fw_outputs, bw_outputs], 2) fused_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused/") ] sess.run([variables.global_variables_initializer()]) fused_outputs, fused_fw_state, fused_bw_state = sess.run( [outputs, fw_state, bw_state]) fused_grads = sess.run(gradients_impl.gradients(outputs, inputs)) fused_wgrads = sess.run(gradients_impl.gradients(outputs, fused_vars)) self.assertAllClose(basic_outputs, fused_outputs) self.assertAllClose(basic_fw_state, fused_fw_state) self.assertAllClose(basic_bw_state, fused_bw_state) self.assertAllClose(basic_grads, fused_grads) for basic, fused in zip(basic_wgrads, fused_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
def model(self): cells = [] for i in range(1, len(self.layers_size) - 1): if self.cell_type == 0: cell = rnn_cell.BasicLSTMCell(self.layers_size[i]) elif self.cell_type == 1: cell = rnn_cell.BasicRNNCell(self.layers_size[i]) elif self.cell_type == 2: cell = rnn_cell.GRUCell(self.layers_size[i]) cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=self.keep_prob) cells.append(cell) multilayer_cell = rnn_cell.MultiRNNCell(cells) multilayer_cell = rnn_cell.DropoutWrapper( multilayer_cell, output_keep_prob=self.keep_prob) output, state = tf.nn.dynamic_rnn(multilayer_cell, self.input_tensor, dtype=tf.float32) output = tf.transpose(output, [1, 0, 2]) last = tf.gather(output, int(output.get_shape()[0]) - 1) # This may be a bottleneck (memory) last_weights = tf.Variable( tf.random_normal([self.layers_size[-2], self.layers_size[-1]])) if self.enable_bias: bias = tf.Variable(tf.random_normal(([self.layers_size[-1]]))) return tf.nn.softmax(tf.matmul(last, last_weights) + bias) return tf.nn.softmax(tf.matmul(last, last_weights))
def __init__(self, num_units, tied=False, non_recurrent_fn=None): super(Grid2BasicRNNCell, self).__init__( num_units=num_units, num_dims=2, input_dims=0, output_dims=0, priority_dims=0, tied=tied, non_recurrent_dims=None if non_recurrent_fn is None else 0, cell_fn=lambda n, i: rnn_cell.BasicRNNCell(num_units=n, input_size=i), non_recurrent_fn=non_recurrent_fn)
def __init__(self, vocabularySize, config_param): self.vocabularySize = vocabularySize self.config = config_param self._inputX = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputsX") self._inputTargetsY = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputTargetsY") #Converting Input in an Embedded form with tf.device("/cpu:0"): #Tells Tensorflow what GPU to use specifically embedding = tf.get_variable("embedding", [self.vocabularySize, self.config.embeddingSize]) embeddingLookedUp = tf.nn.embedding_lookup(embedding, self._inputX) inputs = tf.split(1, self.config.sequence_size, embeddingLookedUp) inputTensorsAsList = [tf.squeeze(input_, [1]) for input_ in inputs] #Define Tensor RNN singleRNNCell = rnn_cell.BasicRNNCell(self.config.hidden_size) self.multilayerRNN = rnn_cell.MultiRNNCell([singleRNNCell] * self.config.num_layers) self._initial_state = self.multilayerRNN.zero_state(self.config.batch_size, tf.float32) #Defining Logits hidden_layer_output, last_state = rnn.rnn(self.multilayerRNN, inputTensorsAsList, initial_state=self._initial_state) hidden_layer_output = tf.reshape(tf.concat(1, hidden_layer_output), [-1, self.config.hidden_size]) self._logits = tf.nn.xw_plus_b(hidden_layer_output, tf.get_variable("softmax_w", [self.config.hidden_size, self.vocabularySize]), tf.get_variable("softmax_b", [self.vocabularySize])) self._predictionSoftmax = tf.nn.softmax(self._logits) #Define the loss loss = seq2seq.sequence_loss_by_example([self._logits], [tf.reshape(self._inputTargetsY, [-1])], [tf.ones([self.config.batch_size * self.config.sequence_size])], self.vocabularySize) self._cost = tf.div(tf.reduce_sum(loss), self.config.batch_size) self._final_state = last_state
def setUp(self): self.rnn_cell = rnn_cell.BasicRNNCell(self.NUM_RNN_CELL_UNITS) self.mock_target_column = MockTargetColumn( num_label_columns=self.NUM_LABEL_COLUMNS) location = tf.contrib.layers.sparse_column_with_keys( 'location', keys=['west_side', 'east_side', 'nyc']) location_onehot = tf.contrib.layers.one_hot_column(location) self.context_feature_columns = [location_onehot] wire_cast = tf.contrib.layers.sparse_column_with_keys( 'wire_cast', ['marlo', 'omar', 'stringer']) wire_cast_embedded = tf.contrib.layers.embedding_column( wire_cast, dimension=8) measurements = tf.contrib.layers.real_valued_column( 'measurements', dimension=2) self.sequence_feature_columns = [measurements, wire_cast_embedded] self.columns_to_tensors = { 'location': tf.SparseTensor( indices=[[0, 0], [1, 0], [2, 0]], values=['west_side', 'west_side', 'nyc'], shape=[3, 1]), 'wire_cast': tf.SparseTensor( indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0], [1, 1, 1], [2, 0, 0]], values=[b'marlo', b'stringer', b'omar', b'stringer', b'marlo', b'marlo'], shape=[3, 2, 2]), 'measurements': tf.random_uniform([3, 2, 2])}
def RNN(x, weights, biases, type, layer_norm): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define a lstm cell with tensorflow cell_class_map = { "LSTM": rnn_cell.BasicLSTMCell(n_hidden), "GRU": rnn_cell.GRUCell(n_hidden), "BasicRNN": rnn_cell.BasicRNNCell(n_hidden), "LNGRU": LNGRUCell(n_hidden), "LNLSTM": LNBasicLSTMCell(n_hidden), 'HyperLnLSTMCell':HyperLnLSTMCell(n_hidden, is_layer_norm = layer_norm)} lstm_cell = cell_class_map.get(type) cell = rnn_cell.MultiRNNCell([lstm_cell] * FLAGS.layers) print "Using %s model" % type # Get lstm cell output outputs, states = rnn.rnn(cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def build(self): if self.lstm: self.cell = rnn_cell.BasicLSTMCell(self.n_unit, state_is_tuple=True) else: self.cell = rnn_cell.BasicRNNCell(self.n_unit) self.X = tf.placeholder(tf.float32, [None, self.n_step, self.n_input]) self.Y = tf.placeholder(tf.float32, [None, self.n_class]) self.W = tf.Variable( tf.truncated_normal([self.n_unit, self.n_class], stddev=0.1)) self.b = tf.Variable(tf.constant(0.1, shape=[self.n_class])) rnn_input = tf.split( 0, self.n_step, tf.reshape(tf.transpose(self.X, [1, 0, 2]), [-1, self.n_input])) output, state = rnn.rnn(self.cell, rnn_input, dtype=tf.float32) prediction = tf.matmul(output[-1], self.W) + self.b cross_entropy = tf.nn.softmax_cross_entropy_with_logits( prediction, self.Y) correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(self.Y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) self.learning_rate = tf.placeholder(tf.float32, shape=[]) self.step = tf.train.GradientDescentOptimizer( self.learning_rate).minimize(cross_entropy)
def RNN(x, weights, biases): x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, n_input]) x = tf.split(0, n_steps, x) rnn_cell_2 = rnn_cell.BasicRNNCell(n_hidden) outputs, states = rnn.rnn(rnn_cell_2, x, dtype=tf.float32) predction = tf.matmul(outputs[-1], weights['out']) + biases['out'] return predction,outputs
def __init__(self, num_units): super(Grid1BasicRNNCell, self).__init__(num_units=num_units, num_dims=1, input_dims=0, output_dims=0, priority_dims=0, tied=False, cell_fn=lambda n, i: rnn_cell.BasicRNNCell( num_units=n, input_size=i))
def RNN(x, weights, biases): x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, nInput]) x = tf.split(x, nSteps, 0) lstmCell = rnn_cell.BasicRNNCell(nHidden) outputs, states = rnn.static_rnn(lstmCell, x, dtype=tf.float32) return tf.matmul(outputs[-1], weights['out']) + biases['out']
def RNN(x, weights, biases): # configuring so you can get it as needed for the 28 pixels x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, nInput]) x = tf.split(x, nSteps, 0) # configuring so you can get it as needed for the 28 pixels lstmCell = rnn_cell.BasicRNNCell(nHidden) #find which lstm to use in the documentation outputs, states = tf.contrib.rnn.static_rnn(lstmCell, x, dtype=tf.float32)#for the rnn where to get the output and hidden state return tf.matmul(outputs[-1], weights['out'])+ biases['out']
def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases): _X = tf.transpose(_X, [1, 0, 2]) _X = tf.reshape(_X, [-1, n_input]) fw_cell_1 = rnn_cell.BasicRNNCell(n_hidden) bw_cell_1 = rnn_cell.BasicRNNCell(n_hidden) fw_cell = rnn_cell.MultiRNNCell([fw_cell_1] * num_layers) bw_cell = rnn_cell.MultiRNNCell([bw_cell_1] * num_layers) _X = tf.split(0, n_steps, _X) seq = np.int32(np.ones(batch_size) * Truncated) outputs, statefw, statebw = rnn.bidirectional_rnn( fw_cell, bw_cell, _X, initial_state_fw=_istate_fw, initial_state_bw=_istate_bw, sequence_length=seq) return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
def __classOptoRNN__(self, _Z1): ''' Reccurent neural network with a classifer (logistic) as output layer that tries to predicted if there was an otpogenetic stimulation in a neuron j. Input will be time serie of neuron(s) i starting at time t and output will be a binary value, where the label is whether x was stimulated or not at t-z. ''' #Defining weights self.weights = { 'classi_HO_W': varInit([self.nhidclassi, 1], 'classi_HO_W', std=0.01) } self.biases = {'classi_HO_B': varInit([1], 'classi_HO_B', std=1)} self.masks = {} #classiCell = rnn_cell.BasicLSTMCell(self.nhidclassi) classiCell = rnn_cell.BasicRNNCell(self.nhidclassi, activation=self.actfct) #classiCell = rnn_cell.GRUCell(self.nhidclassi, activation = self.actfct) #INITIAL STATE DOES NOT WORK #initClassi = tf.zeros([self.batchSize,classiCell.state_size], dtype='float32') if self.multiLayer: #Stacking classifier cells stackCell = rnn_cell.MultiRNNCell([classiCell] * self.multiLayer) S = stackCell.zero_state(self._batchSize, tf.float32) with tf.variable_scope("") as scope: for i in range(self.seqLen): if i == 1: scope.reuse_variables() O, S = stackCell(_Z1, S) predCell = tf.matmul(O, self.weights['classi_HO_W']) + \ self.biases['classi_HO_B'] else: #classi O, S = rnn.rnn(classiCell, _Z1, dtype=tf.float32) #Output and state #classi to output layer predCell = tf.matmul(O[-1], self.weights['classi_HO_W']) + \ self.biases['classi_HO_B'] return predCell
def testBasicRNNCell(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) m = tf.zeros([1, 2]) g, _ = rnn_cell.BasicRNNCell(2)(x, m) sess.run([tf.initialize_all_variables()]) res = sess.run([g], { x.name: np.array([[1., 1.]]), m.name: np.array([[0.1, 0.1]]) }) self.assertEqual(res[0].shape, (1, 2))
def seq_predict_model(X, w, b, time_step_size, vector_size): # input X shape: [batch_size, time_step_size, vector_size] # transpose X to [time_step_size, batch_size, vector_size] X = tf.transpose(X, [1, 0, 2]) # reshape X to [time_step_size * batch_size, vector_size] X = tf.reshape(X, [-1, vector_size]) # split X, array[time_step_size], shape: [batch_size, vector_size] X = tf.split(X, time_step_size, 0) cell = rnn_cell.BasicRNNCell(num_units=10) initial_state = tf.zeros([batch_size, cell.state_size]) outputs, _states = rnn.static_rnn(cell, X, initial_state=initial_state) # Linear activation return tf.matmul(outputs[-1], w) + b, cell.state_size
def BuildFullModel(): """Build the full model with conv,rnn,opt.""" seq = [] for i in range(4): with variable_scope.variable_scope('inp_%d' % i): seq.append(array_ops.reshape(BuildSmallModel(), [2, 1, -1])) cell = rnn_cell.BasicRNNCell(16) out = rnn.dynamic_rnn( cell, array_ops.concat(seq, axis=1), dtype=dtypes.float32)[0] target = array_ops.ones_like(out) loss = nn_ops.l2_loss(math_ops.reduce_mean(target - out)) sgd_op = gradient_descent.GradientDescentOptimizer(1e-2) return sgd_op.minimize(loss)
def setUp(self): super(DynamicRnnEstimatorTest, self).setUp() self.rnn_cell = rnn_cell.BasicRNNCell(self.NUM_RNN_CELL_UNITS) self.mock_target_column = MockTargetColumn( num_label_columns=self.NUM_LABEL_COLUMNS) location = feature_column.sparse_column_with_keys( 'location', keys=['west_side', 'east_side', 'nyc']) location_onehot = feature_column.one_hot_column(location) self.context_feature_columns = [location_onehot] wire_cast = feature_column.sparse_column_with_keys( 'wire_cast', ['marlo', 'omar', 'stringer']) wire_cast_embedded = feature_column.embedding_column(wire_cast, dimension=8) measurements = feature_column.real_valued_column( 'measurements', dimension=2) self.sequence_feature_columns = [measurements, wire_cast_embedded]
def RNN(x, W, B): x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, nInput]) x = tf.split(0, nSteps, x) #configuring so you can get it as needed for the 28 pixels if typeCell == 'basic': lstmCell = rnn_cell.BasicRNNCell(nHidden) elif typeCell == 'lstm': lstmCell = rnn_cell.LSTMCell(nHidden) elif typeCell == 'gru': lstmCell = rnn_cell.GRUCell(nHidden) else: raise Exception("Bad typeCell value!") outputs, states = rnn.rnn( lstmCell, x, dtype=tf.float32 ) #for the rnn where to get the output and hidden state return tf.matmul(outputs[-1], W['out']) + B['out']
def __init__(self, sequence_length, num_classes, vocab_size, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0): self.sequence_length = sequence_length # Placeholders for input, output and dropout self.input_x = tf.placeholder( tf.int32, [None, sequence_length], name="input_x") # sequence_length个列, 一般是sequence_length = 60列 self.input_y = tf.placeholder( tf.float32, [None, num_classes], name="input_y") # num_classes可能是每一列的数量,不确定 self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) # self.rnn_sequence_length = tf.cast(self.sequence_length, tf.int32) lstm_cell = rnn_cell.BasicLSTMCell(num_filters, forget_bias=1.0) r_cell = rnn_cell.BasicRNNCell(num_filters) # num_filters =RNN层神经元个数 # Get lstm cell output self.cell = rnn_cell.MultiRNNCell([lstm_cell, r_cell], state_is_tuple=True) # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): W_E = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="W_E") self.embedded_chars = tf.nn.embedding_lookup(W_E, self.input_x) self.embedded_chars_expanded = tf.expand_dims( self.embedded_chars, -1) cnn_input = self.embedded_chars_expanded # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): h = self.CNN(cnn_input, filter_size, embedding_size, num_filters) # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) self.h_pool = tf.concat(pooled_outputs, 3) self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) # Add dropout with tf.name_scope("dropout"): self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob) # Final (unnormalized) scores and predictions with tf.name_scope("output"): W = tf.get_variable( "W_output", shape=[num_filters_total, num_classes], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b_output") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores") self.predictions = tf.argmax(self.scores, 1, name="predictions") # CalculateMean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits( logits=self.scores, labels=self.input_y) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def rnn_cell_fn(mode): del mode # unused cells = [rnn_cell.BasicRNNCell(num_units=n) for n in cell_units] return rnn_cell.MultiRNNCell(cells)
def __NGCmodel__(self, _Z1): ''' RNN(Network+Global cells) & Calcium dynamic Define weights & masks ng_H0_W : Network&Global hidden -> output (HO) alpha : Decay of data input at t-1 0alpha_M: Contrains values between 0 and 1 ng_IH_HH : Network&Global cell Input -> Hidden (IH) & Hidden -> Hidden (HH) 1ng_IH_HH: Mask will be applied so that netw cell receives input from glob and data, but glob cell only receive data. Furthermore, network cell self-connectivity is prevented by putting the identity to 0. 2ng_IH_HH: Noise is added to this weight matrix for bayesian learning. ''' #Total number of hidden units nhid = self.nhidNetw + self.nhidGlob #Defining weights self.weights = { 'ng_H0_W': varInit([nhid, self.nOut], 'ng_HO_W'), 'alpha_W': varInit([self.nInput, 1], 'alpha_W'), } #Defining masks self.masks = { '1ng_IH_HH': np.vstack([ np.ones([self.nInput, nhid], dtype='float32'), np.hstack([ np.ones([self.nhidNetw] * 2, dtype='float32') - np.identity(self.nhidNetw, dtype='float32'), np.zeros([self.nhidNetw, self.nhidGlob], dtype='float32') ]), np.ones([self.nhidGlob, nhid], dtype='float32') ]), '2ng_IH_HH': tf.random_normal([self.nInput + nhid, nhid], 0.001) * self.learnRate / 2, '0alpha_M': tf.clip_by_value(self.weights['alpha_W'], 0, 1) } #Defining biases self.biases = {'ng_H0_B': varInit(self.nOut, 'ng_H0_B')} #Noise distribution parameters ng_Gmean = varInit([1], 'ng_Gmean') ng_Gstd = varInit([1], 'ng_Gstd') #Network + Global dynamic cell (concatenated) ngCell = rnn_cell.BasicRNNCell(nhid, activation=self.actfct) ngCellS = rnn_cell.MultiRNNCell([ngCell]) #Initialization ngO = ngCellS.zero_state(self.batchSize, tf.float32) #Netw+Glob state initialization Z2 = tf.zeros(1) #Model prediction #RNN looping through sequence time points with tf.variable_scope("ng_IH_HH") as scope: for i in range(self.seqLen): #Reusing variables for RNN if i == 1: scope.reuse_variables() #Prediction error for time t ZD = _Z1[i] - Z2 #Network + global cell ngO, ngS = ngCellS(ZD, ngO) #NG to output cells #ng_Z2 = tf.tanh(tf.matmul(ngO, self.weights['ng_H0_W'] + self.biases['ng_H0_B'])) ng_Z2 = ngO[:, :self.nhidNetw] #Gaussian noise gNoise = tf.random_normal([self.batchSize, self.nOut], mean=ng_Gmean, stddev=ng_Gstd, dtype='float32') gNoise = 0 #Prediction with calcium dynamic #Z2 = tf.tanh(tf.matmul(_Z1[i], self.weights['alpha_W']) + ng_Z2 + gNoise) Z2 = tf.tanh(ng_Z2 + gNoise) return Z2
def testBasicRNNFusedWrapper(self): """This test checks that using a wrapper for BasicRNN works as expected.""" with self.cached_session() as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=19890212) cell = rnn_cell.BasicRNNCell(10) batch_size = 5 input_size = 20 timelen = 15 inputs = constant_op.constant( np.random.randn(timelen, batch_size, input_size)) with variable_scope.variable_scope("basic", initializer=initializer): unpacked_inputs = array_ops.unstack(inputs) outputs, state = rnn.static_rnn( cell, unpacked_inputs, dtype=dtypes.float64) packed_outputs = array_ops.stack(outputs) basic_vars = [ v for v in variables.trainable_variables() if v.name.startswith("basic/") ] sess.run([variables.global_variables_initializer()]) basic_outputs, basic_state = sess.run([packed_outputs, state]) basic_grads = sess.run(gradients_impl.gradients(packed_outputs, inputs)) basic_wgrads = sess.run( gradients_impl.gradients(packed_outputs, basic_vars)) with variable_scope.variable_scope( "fused_static", initializer=initializer): fused_cell = fused_rnn_cell.FusedRNNCellAdaptor( rnn_cell.BasicRNNCell(10)) outputs, state = fused_cell(inputs, dtype=dtypes.float64) fused_static_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused_static/") ] sess.run([variables.global_variables_initializer()]) fused_static_outputs, fused_static_state = sess.run([outputs, state]) fused_static_grads = sess.run(gradients_impl.gradients(outputs, inputs)) fused_static_wgrads = sess.run( gradients_impl.gradients(outputs, fused_static_vars)) self.assertAllClose(basic_outputs, fused_static_outputs) self.assertAllClose(basic_state, fused_static_state) self.assertAllClose(basic_grads, fused_static_grads) for basic, fused in zip(basic_wgrads, fused_static_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2) with variable_scope.variable_scope( "fused_dynamic", initializer=initializer): fused_cell = fused_rnn_cell.FusedRNNCellAdaptor( rnn_cell.BasicRNNCell(10), use_dynamic_rnn=True) outputs, state = fused_cell(inputs, dtype=dtypes.float64) fused_dynamic_vars = [ v for v in variables.trainable_variables() if v.name.startswith("fused_dynamic/") ] sess.run([variables.global_variables_initializer()]) fused_dynamic_outputs, fused_dynamic_state = sess.run([outputs, state]) fused_dynamic_grads = sess.run( gradients_impl.gradients(outputs, inputs)) fused_dynamic_wgrads = sess.run( gradients_impl.gradients(outputs, fused_dynamic_vars)) self.assertAllClose(basic_outputs, fused_dynamic_outputs) self.assertAllClose(basic_state, fused_dynamic_state) self.assertAllClose(basic_grads, fused_dynamic_grads) for basic, fused in zip(basic_wgrads, fused_dynamic_wgrads): self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
def __init__(self,args,data,isGen,model="lstm"): """ Args: args: data: ifGen: control the model if for training or genertaion 0: training 1: genertaion """ if isGen == 1: args.batch_size = 1 args.seq_length = 1 with tf.name_scope('input'): self.input_data= tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.target_data= tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) with tf.name_scope('model'): self.cell = None if model =="lstm": self.cell = rnn_cell.BasicLSTMCell(args.cell_size) elif model =="gru": self.cell = rnn_cell.GRUCell(args.cell_size) elif model == 'rnn': self.cell = rnn_cell.BasicRNNCell(args.cell_size) self.cell = rnn_cell.MultiRNNCell([self.cell] * args.num_layers) self.initial_state = self.cell.zero_state( args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): w = tf.get_variable( 'softmax_w', [args.cell_size, data.vocab_size]) b = tf.get_variable('softmax_b', [data.vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable( 'embedding', [data.vocab_size, args.cell_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) outputs, last_state = tf.nn.dynamic_rnn( self.cell, inputs, initial_state=self.initial_state) with tf.name_scope('loss'): output = tf.reshape(outputs,[-1,args.cell_size]) self.logits = tf.matmul(output, w) + b self.probs = tf.nn.softmax(self.logits) self.last_state = last_state targets = tf.reshape(self.target_data, [-1]) loss = seq2seq.sequence_loss_by_example([self.logits], [targets], [tf.ones_like(targets, dtype=tf.float32)]) self.cost = tf.reduce_sum(loss) / args.batch_size tf.scalar_summary('loss', self.cost) with tf.name_scope('optimize'): self.lr = tf.placeholder(tf.float32, []) tf.scalar_summary('learning_rate', self.lr) optimizer = tf.train.AdamOptimizer(self.lr) tvars = tf.trainable_variables() grads = tf.gradients(self.cost, tvars) for g in grads: tf.histogram_summary(g.name, g) grads, _ = tf.clip_by_global_norm(grads, args.grad_clip) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.merged_op = tf.merge_all_summaries()
[1,2,3]]) # batch_size = 2 seq5 = tf.constant([[1,2,3,4,5,6], [1,2,3,4,5,6]]) # batch_size = 2 unpacked_seqs_embeds = [None,] seqs_embeds = [None,] for seq in [seq1,seq2,seq3,seq4,seq5]: seqs_embed = tf.nn.embedding_lookup(embed,seq) seqs_embeds.append(seqs_embed) unpacked_seqs_embeds.append(tf.unstack(tf.transpose(seqs_embed,perm=[1, 0, 2]))) with tf.variable_scope("naive",initializer=tf.truncated_normal_initializer(seed=1)) as scope: if FLAGS.rnn_type == "fw": #n_output1,_ = rnn.rnn(rnn_cell.BasicRNNCell(2),unpacked_seqs_embeds[1],dtype=tf.float32,sequence_length=None ) n_output1,_ = rnn.static_rnn(rnn_cell.BasicRNNCell(2),unpacked_seqs_embeds[1],dtype=tf.float32,sequence_length=None ) elif FLAGS.rnn_type == "bi": #n_output1,_,_ = rnn.bidirectional_rnn(rnn_cell.BasicRNNCell(1),rnn_cell.BasicRNNCell(1),unpacked_seqs_embeds[1],dtype=tf.float32,sequence_length=None ) n_output1,_,_ = rnn.static_bidirectional_rnn(rnn_cell.BasicRNNCell(1),rnn_cell.BasicRNNCell(1),unpacked_seqs_embeds[1],dtype=tf.float32,sequence_length=None ) with tf.variable_scope("naive2",initializer=tf.truncated_normal_initializer(seed=1)) as scope: if FLAGS.rnn_type == "fw": n_output2,_ = rnn.static_rnn(rnn_cell.BasicRNNCell(2),unpacked_seqs_embeds[2],dtype=tf.float32,sequence_length=None ) elif FLAGS.rnn_type == "bi": n_output2,_,_ = rnn.static_bidirectional_rnn(rnn_cell.BasicRNNCell(1),rnn_cell.BasicRNNCell(1),unpacked_seqs_embeds[2],dtype=tf.float32,sequence_length=None ) with tf.variable_scope("batch1",initializer=tf.truncated_normal_initializer(seed=1)) as scope: if FLAGS.rnn_type == "fw": n_output3,_ = rnn.static_rnn(rnn_cell.BasicRNNCell(2),unpacked_seqs_embeds[3],dtype=tf.float32,sequence_length=None ) elif FLAGS.rnn_type == "bi": n_output3,_,_ = rnn.static_bidirectional_rnn(rnn_cell.BasicRNNCell(1),rnn_cell.BasicRNNCell(1),unpacked_seqs_embeds[3],dtype=tf.float32,sequence_length=None ) with tf.variable_scope("batch2",initializer=tf.truncated_normal_initializer(seed=1)) as scope: if FLAGS.rnn_type == "fw":