def setup_encoder(self): # encoder的设置 with vs.variable_scope("Encoder"): # 在encoder的作用域下 inp = tf.nn.dropout( self.encoder_inputs, self.keep_prob ) # 对encoder进行dropout dropout率为 keep_prob -> 减少过拟合 fw_cell = rnn_cell.GRUCell(self.size) # GRU单元 fw_cell = rnn_cell.DropoutWrapper( fw_cell, output_keep_prob=self.keep_prob) # 对单元进行dropout self.encoder_fw_cell = rnn_cell.MultiRNNCell( # 创建多层RNN的函数 encoder 前向单元 [fw_cell] * self.num_layers, state_is_tuple=True) # 设置multi-rnn cell bw_cell = rnn_cell.GRUCell(self.size) # 设置size大小的GRU单元 bw_cell = rnn_cell.DropoutWrapper( # 根据dropout率,随机在抛弃GRU中计算的数据 bw_cell, output_keep_prob=self.keep_prob) self.encoder_bw_cell = rnn_cell.MultiRNNCell( # 设置 encoder 反向单元 [bw_cell] * self.num_layers, state_is_tuple=True) out, _ = rnn.bidirectional_dynamic_rnn( self.encoder_fw_cell, # 设置动态双向RNN self.encoder_bw_cell, inp, self.src_len, dtype=tf.float32, time_major=True, initial_state_fw=self.encoder_fw_cell.zero_state( self.batch_size, dtype=tf.float32), # 状态全部初始化为0 initial_state_bw=self.encoder_bw_cell.zero_state( self.batch_size, dtype=tf.float32)) out = tf.concat([out[0], out[1]], axis=2) # 把 1 和 2拼接起来 self.encoder_output = out
def recurrent_neural_network(x, keep_prob): # Bidirectional LSTM; needs layer = {'weights': tf.Variable(tf.random_normal([2*rnn_size ,n_classes])), 'biases': tf.Variable(tf.random_normal([n_classes])) } # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.BasicLSTMCell(rnn_size, state_is_tuple=True, forget_bias=1.0) lstm_fw_cell = rnn_cell.DropoutWrapper(lstm_fw_cell, output_keep_prob=keep_prob) lstm_fw_cell = rnn_cell.MultiRNNCell([lstm_fw_cell] * num_layers) # Backward direction cell lstm_bw_cell = rnn_cell.BasicLSTMCell(rnn_size,state_is_tuple=True, forget_bias=1.0) lstm_bw_cell = rnn_cell.DropoutWrapper(lstm_bw_cell, output_keep_prob=keep_prob) lstm_bw_cell = rnn_cell.MultiRNNCell([lstm_bw_cell] * num_layers) # Get lstm cell output #try: outputs, states = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32, sequence_length=length(x)) #sequence_length=early_stop) #except Exception: # Old TensorFlow version only returns outputs not states # outputs = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, x, # dtype=tf.float32, sequence_length=early_stop) output_fw, output_bw = outputs last = last_relevant(output_fw, length(x)) first = last_relevant(output_fw, length(x)) return tf.matmul(tf.concat(1,[first,last]) , layer['weights']) + layer['biases']
def __init__(self, hidden_size, keep_prob, num_layers): """ Inputs: hidden_size: int. Hidden size of the RNN keep_prob: Tensor containing a single scalar that is the keep probability (for dropout) """ self.hidden_size = hidden_size self.keep_prob = keep_prob self.num_layers = num_layers self.rnn_cell_fw = [ rnn_cell.GRUCell(self.hidden_size) for _ in range(self.num_layers) ] self.rnn_cell_fw = [ DropoutWrapper(cell, input_keep_prob=self.keep_prob) for cell in self.rnn_cell_fw ] self.multi_rnn_cell_fw = rnn_cell.MultiRNNCell(self.rnn_cell_fw, state_is_tuple=False) self.rnn_cell_bw = [ rnn_cell.GRUCell(self.hidden_size) for _ in range(self.num_layers) ] self.rnn_cell_bw = [ DropoutWrapper(cell, input_keep_prob=self.keep_prob) for cell in self.rnn_cell_bw ] self.multi_rnn_cell_bw = rnn_cell.MultiRNNCell(self.rnn_cell_bw, state_is_tuple=False)
def build_nmt_multicell_rnn(num_layers_encoder, num_layers_decoder, encoder_size, decoder_size, source_proj_size, use_lstm=True, input_feeding=True, dropout=0.0): if use_lstm: print("I'm building the model with LSTM cells") cell_class = rnn_cell.LSTMCell else: print("I'm building the model with GRU cells") cell_class = GRUCell initializer = tf.random_uniform_initializer(minval=-0.1, maxval=0.1, seed=1234) encoder_cell = cell_class(num_units=encoder_size, input_size=source_proj_size, initializer=initializer) if input_feeding: decoder_cell0 = cell_class(num_units=decoder_size, input_size=decoder_size * 2, initializer=initializer) else: decoder_cell0 = cell_class(num_units=decoder_size, input_size=decoder_size, initializer=initializer) # if dropout > 0.0: # if dropout is 0.0, it is turned off encoder_cell = rnn_cell.DropoutWrapper(encoder_cell, output_keep_prob=1.0 - dropout) encoder_rnncell = rnn_cell.MultiRNNCell([encoder_cell] * num_layers_encoder) decoder_cell0 = rnn_cell.DropoutWrapper(decoder_cell0, output_keep_prob=1.0 - dropout) if num_layers_decoder > 1: decoder_cell1 = cell_class(num_units=decoder_size, input_size=decoder_size, initializer=initializer) decoder_cell1 = rnn_cell.DropoutWrapper(decoder_cell1, output_keep_prob=1.0 - dropout) decoder_rnncell = rnn_cell.MultiRNNCell([decoder_cell0] + [decoder_cell1] * (num_layers_decoder - 1)) else: decoder_rnncell = rnn_cell.MultiRNNCell([decoder_cell0]) return encoder_rnncell, decoder_rnncell
def encoder(self): with tf.variable_scope("encoder") as encoder_scope: encoder_w_in = self._weight_variable( [self.input_dim, self.hidden_size], name='encoder_w_in') encoder_b_in = self._bias_variable([ self.hidden_size, ], name='encoder_b_in') encoder_inputs_2d = tf.reshape( self.encoder_inputs, [self.batch_size * self.max_time, self.input_dim]) encoder_cell_inputs = tf.nn.relu( tf.add(tf.matmul(encoder_inputs_2d, encoder_w_in), encoder_b_in)) encoder_cell_inputs_3d = tf.reshape( encoder_cell_inputs, [self.batch_size, self.max_time, self.hidden_size]) encoder_fw_cells = [] encoder_bw_cells = [] for i in range(self.num_layers): with tf.variable_scope('encoder_lstm_{}'.format(i)): encoder_fw_cells.append( rnn_cell.DropoutWrapper( cell=rnn_cell.BasicLSTMCell(self.hidden_size, forget_bias=1.0, state_is_tuple=True), input_keep_prob=1.0, output_keep_prob=self.output_keep_prob)) encoder_bw_cells.append( rnn_cell.DropoutWrapper( cell=rnn_cell.BasicLSTMCell(self.hidden_size, forget_bias=1.0, state_is_tuple=True), input_keep_prob=1.0, output_keep_prob=self.output_keep_prob)) encoder_muti_fw_cell = rnn_cell.MultiRNNCell(encoder_fw_cells) encoder_muti_bw_cell = rnn_cell.MultiRNNCell(encoder_bw_cells) (encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state) = \ tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_muti_fw_cell, cell_bw=encoder_muti_bw_cell, inputs=encoder_cell_inputs_3d, #sequence_length=self.sequence_length, dtype=tf.float32, time_major=False) encoder_outputs = tf.concat( (encoder_fw_outputs, encoder_bw_outputs), 2) #encoder_final_state_c = tf.concat( # (encoder_fw_final_state.c, encoder_bw_final_state.c), 1) #encoder_final_state_h = tf.concat( # (encoder_fw_final_state.h, encoder_bw_final_state.h), 1) #encoder_final_state = tf.contrib.rnn.LSTMStateTuple( # c=encoder_final_state_c, # h=encoder_final_state_h #) return encoder_outputs
def __init__(self, dim_image, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, enc_timesteps, bias_init_vector=None): self.dim_image = np.int(dim_image) self.dim_embed = np.int(dim_embed) self.dim_hidden = np.int(dim_hidden) self.batch_size = np.int(batch_size) self.n_lstm_steps = np.int(n_lstm_steps) self.n_words = np.int(n_words) self.enc_timesteps = np.int(enc_timesteps) with tf.device("/cpu:0"): self.Wemb = tf.Variable(tf.random_uniform( [n_words, dim_embed], -0.1, 0.1), name='Wemb') self.bemb = self.init_bias(dim_embed, name='bemb') self.lstm = rnn_cell.LSTMCell(dim_hidden, state_is_tuple=True) self.lstm = rnn_cell.DropoutWrapper(self.lstm, input_keep_prob=1) self.lstm = rnn_cell.MultiRNNCell([self.lstm ]) self.back_lstm = rnn_cell.LSTMCell(dim_hidden, state_is_tuple=True) self.back_lstm = rnn_cell.DropoutWrapper(self.back_lstm, input_keep_prob=1) self.back_lstm = rnn_cell.MultiRNNCell([self.back_lstm]) self.encode_img_W = tf.Variable(tf.random_uniform( [dim_image, dim_hidden], -0.1, 0.1), name='encode_img_W') self.encode_img_b = self.init_bias(dim_hidden, name='encode_img_b') self.embed_word_W = tf.Variable(tf.random_uniform( [dim_hidden, n_words], -0.1, 0.1), name='embed_word_W') if bias_init_vector is not None: self.embed_word_b = tf.Variable( bias_init_vector.astype(np.float32), name='embed_word_b') else: self.embed_word_b = self.init_bias(n_words, name='embed_word_b')
def testLSTMBasicToBlockCell(self): with self.session(use_gpu=True) as sess: x = array_ops.zeros([1, 2]) x_values = np.random.randn(1, 2) m0_val = 0.1 * np.ones([1, 2]) m1_val = -0.1 * np.ones([1, 2]) m2_val = -0.2 * np.ones([1, 2]) m3_val = 0.2 * np.ones([1, 2]) initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=19890212) with variable_scope.variable_scope("basic", initializer=initializer): m0 = array_ops.zeros([1, 2]) m1 = array_ops.zeros([1, 2]) m2 = array_ops.zeros([1, 2]) m3 = array_ops.zeros([1, 2]) g, ((out_m0, out_m1), (out_m2, out_m3)) = rnn_cell.MultiRNNCell( [ rnn_cell.BasicLSTMCell(2, state_is_tuple=True) for _ in range(2) ], state_is_tuple=True)(x, ((m0, m1), (m2, m3))) sess.run([variables.global_variables_initializer()]) basic_res = sess.run( [g, out_m0, out_m1, out_m2, out_m3], { x.name: x_values, m0.name: m0_val, m1.name: m1_val, m2.name: m2_val, m3.name: m3_val }) with variable_scope.variable_scope("block", initializer=initializer): m0 = array_ops.zeros([1, 2]) m1 = array_ops.zeros([1, 2]) m2 = array_ops.zeros([1, 2]) m3 = array_ops.zeros([1, 2]) g, ((out_m0, out_m1), (out_m2, out_m3)) = rnn_cell.MultiRNNCell( [lstm_ops.LSTMBlockCell(2) for _ in range(2)], state_is_tuple=True)(x, ((m0, m1), (m2, m3))) sess.run([variables.global_variables_initializer()]) block_res = sess.run( [g, out_m0, out_m1, out_m2, out_m3], { x.name: x_values, m0.name: m0_val, m1.name: m1_val, m2.name: m2_val, m3.name: m3_val }) self.assertEqual(len(basic_res), len(block_res)) for basic, block in zip(basic_res, block_res): self.assertAllClose(basic, block)
def getCell(self, is_training, dp, config): # code for RNN if is_training == True: print("==> Construct ", config.cell_type, " graph for training") else: print("==> Construct ", config.cell_type, " graph for testing") if config.cell_type == "LSTM": if config.num_layer == 1: basicCell = LSTMCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True) elif config.num_layer == 2: basicCell = LSTMCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True) basicCell_2 = LSTMCell(config.hidden_size_2, forget_bias=0.0, state_is_tuple=True) else: raise ValueError("config.num_layer should be 1:2 ") elif config.cell_type == "RNN": if config.num_layer == 1: basicCell = BasicRNNCell(config.hidden_size) elif config.num_layer == 2: basicCell = BasicRNNCell(config.hidden_size) basicCell_2 = BasicRNNCell(config.hidden_size_2) else: raise ValueError("config.num_layer should be [1-3] ") elif config.cell_type == "GRU": if config.num_layer == 1: basicCell = GRUCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True) elif config.num_layer == 2: basicCell = GRUCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True) basicCell_2 = GRUCell(config.hidden_size_2, forget_bias=0.0, state_is_tuple=True) else: raise ValueError("only support 1-2 layers ") else: raise ValueError("cell type should be GRU,LSTM,RNN") # add dropout layer between hidden layers if is_training and config.keep_prob < 1: if config.num_layer == 1: basicCell = DropoutWrapper(basicCell, input_keep_prob=config.keep_prob, output_keep_prob=config.keep_prob) elif config.num_layer == 2: basicCell = DropoutWrapper(basicCell, input_keep_prob=config.keep_prob, output_keep_prob=config.keep_prob) basicCell_2 = DropoutWrapper(basicCell_2, input_keep_prob=config.keep_prob, output_keep_prob=config.keep_prob) else: pass if config.num_layer == 1: cell = rnn_cell.MultiRNNCell([basicCell], state_is_tuple=True) elif config.num_layer == 2: cell = rnn_cell.MultiRNNCell([basicCell, basicCell_2], state_is_tuple=True) return cell
def __init__(self, seq_length, vocab_size, stack_dimension, batch_size): config = tf.ConfigProto(allow_soft_placement=True) self.sess = tf.Session(config=config) self.seq_length = seq_length self.vocab_size = vocab_size self.memory_dim = vocab_size self.enc_inp = [ tf.placeholder(tf.float32, shape=(vocab_size, batch_size), name="enc_inp%i" % t) for t in range(seq_length) ] self.dec_inp = self.enc_inp[:-1] + [ tf.zeros_like(self.enc_inp[0], dtype=np.float32, name="GO") ] single_enc_cell = rnn_cell.LSTMCell(self.memory_dim, state_is_tuple=False) self.enc_cell = rnn_cell.MultiRNNCell([single_enc_cell] * stack_dimension, state_is_tuple=True) _, encoder_state = rnn.rnn(self.enc_cell, self.enc_inp, dtype=tf.float32) single_dec_cell = rnn_cell.LSTMCell(self.memory_dim, state_is_tuple=False) self.dec_cell = rnn_cell.MultiRNNCell([single_dec_cell] * stack_dimension, state_is_tuple=True) self.Ws = tf.Variable( tf.random_uniform([self.memory_dim, self.vocab_size], 0, 0.1)) self.bs = tf.Variable(tf.random_uniform([self.vocab_size], -0.1, 0.1)) self.dec_outputs, self.dec_state = rnn_decoder( self.dec_inp, encoder_state, self.dec_cell, self.Ws, self.bs, vocab_size, batch_size, self.memory_dim) self.labels = [ tf.placeholder(tf.float32, [vocab_size, batch_size], name='LABEL%i' % t) for t in range(seq_length) ] self.weights = [ tf.ones_like(labels_t, dtype=tf.float32) for labels_t in self.labels ] self.loss = loss(self.labels, self.dec_outputs) self.train_op = tf.train.AdamOptimizer(1e-3).minimize(self.loss) self.sess.run(tf.initialize_all_variables())
def encoder(self): with tf.variable_scope("encoder", reuse=tf.AUTO_REUSE) as encoder_scope: encoder_inputs_2d = tf.reshape( self.encoder_inputs, [self.batch_size * self.max_time, self.input_dim]) encoder_cell_inputs = tf.layers.dense(inputs=encoder_inputs_2d, units=self.hidden_size, activation=tf.nn.relu) encoder_cell_inputs_3d = tf.reshape( encoder_cell_inputs, [self.batch_size, self.max_time, self.hidden_size]) encoder_fw_cells = [] encoder_bw_cells = [] keep_prob = self.output_keep_prob for i in range(self.num_layers): with tf.variable_scope('encoder_lstm_{}'.format(i)): cell = tf.contrib.rnn.GLSTMCell(self.hidden_size) #keep_prob+= self.output_keep_prob * ( i*1.0 / float(self.num_layers)) #cell=rnn_cell.DropoutWrapper(cell=cell, input_keep_prob=1.0, output_keep_prob=self.output_keep_prob) encoder_fw_cells.append(cell) encoder_bw_cells.append(cell) encoder_muti_fw_cell = rnn_cell.MultiRNNCell(encoder_fw_cells) encoder_muti_bw_cell = rnn_cell.MultiRNNCell(encoder_bw_cells) (encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state) = \ tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_muti_fw_cell, cell_bw=encoder_muti_bw_cell, inputs=encoder_cell_inputs_3d, sequence_length=self.sequence_length, dtype=tf.float32, time_major=False) encoder_outputs = tf.concat( (encoder_fw_outputs, encoder_bw_outputs), 2) self.encoder_final_state = [] for i in range(self.num_layers): encoder_final_state_c = tf.concat( (encoder_fw_final_state[i].c, encoder_bw_final_state[i].c), 1) encoder_final_state_h = tf.concat( (encoder_fw_final_state[i].h, encoder_bw_final_state[i].h), 1) encoder_final_state = LSTMStateTuple(c=encoder_final_state_c, h=encoder_final_state_h) self.encoder_final_state.append(encoder_final_state) return encoder_outputs, encoder_bw_final_state
def BRNN(x, weight, bias): cell1_fw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True) cell2_fw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True) cell_fw = rnn_cell.MultiRNNCell([cell1_fw, cell2_fw]) cell1_bw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True) cell2_bw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True) cell_bw = rnn_cell.MultiRNNCell([cell1_bw, cell2_bw]) output, out_states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, x, dtype = tf.float32) # print(output[-1].get_shape().as_list()) output = tf.transpose(output[-1], [1, 0, 2]) last = tf.gather(output, int(output.get_shape()[0]) - 1) return tf.nn.softmax(tf.matmul(last, weight) + bias, name="pred")
def __init__(self, rnn_size, rnn_layer, batch_size, input_embedding_size, dim_image, dim_hidden, max_words_q, vocabulary_size, drop_out_rate): self.rnn_size = rnn_size self.rnn_layer = rnn_layer self.batch_size = batch_size self.input_embedding_size = input_embedding_size self.dim_image = dim_image self.dim_hidden = dim_hidden self.max_words_q = max_words_q self.vocabulary_size = vocabulary_size self.drop_out_rate = drop_out_rate # Before-LSTM-embedding self.embed_BLSTM_Q_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_BLSTM_Q_W') self.embed_BLSTM_A_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_BLSTM_A_W') # encoder: RNN body self.lstm_1_q = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True,state_is_tuple=False) self.lstm_dropout_1_q = rnn_cell.DropoutWrapper(self.lstm_1_q, output_keep_prob = 1 - self.drop_out_rate) self.lstm_2_q = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True,state_is_tuple=False) self.lstm_dropout_2_q = rnn_cell.DropoutWrapper(self.lstm_2_q, output_keep_prob = 1 - self.drop_out_rate) self.stacked_lstm_q = rnn_cell.MultiRNNCell([self.lstm_dropout_1_q, self.lstm_dropout_2_q],state_is_tuple=False) self.lstm_1_a = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True,state_is_tuple=False) self.lstm_dropout_1_a = rnn_cell.DropoutWrapper(self.lstm_1_a, output_keep_prob = 1 - self.drop_out_rate) self.lstm_2_a = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True,state_is_tuple=False) self.lstm_dropout_2_a = rnn_cell.DropoutWrapper(self.lstm_2_a, output_keep_prob = 1 - self.drop_out_rate) self.stacked_lstm_a = rnn_cell.MultiRNNCell([self.lstm_dropout_1_a, self.lstm_dropout_2_a],state_is_tuple=False) # question-embedding W1 self.embed_Q_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_Q_W') self.embed_Q_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_Q_b') # Answer-embedding W3 self.embed_A_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_A_W') self.embed_A_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_A_b') # image-embedding W2 self.embed_image_W = tf.Variable(tf.random_uniform([dim_image, self.dim_hidden], -0.08, 0.08), name='embed_image_W') self.embed_image_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_image_b') # score-embedding W4 #self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W') #self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b') self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W') self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b') # QI-embedding W3 self.embed_QI_W = tf.Variable(tf.random_uniform([dim_hidden, dim_hidden], -0.08, 0.08), name='embed_QI_W') self.embed_QI_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_QI_b')
def h_rnn(input): i = 0 num_layer = 0 layer = [input] while True: print(num_layer) layer.append([]) _input = layer[num_layer] length = len(_input) with tf.variable_scope("RNN_" + str(num_layer)) as scope: cell = rnn_cell.BasicLSTMCell(self.dim) cell = rnn_cell.DropoutWrapper( cell, output_keep_prob=self.keep_prob) stacked_cell = rnn_cell.MultiRNNCell([cell] * self.number_of_layers) i = 0 while i < length: state = _rnn(stacked_cell, _input[i:min(i + self.seg_len, length)]) layer[num_layer + 1].append(state) scope.reuse_variables() i += self.seg_len num_layer += 1 if length <= self.seg_len: break return layer[num_layer][0]
def model(): x = tf.transpose(covariates, [1, 0, 2]) x = tf.reshape(covariates, [-1, N]) x = tf.split(0, datalen, x) lstm_cell = rnn_cell.BasicLSTMCell(rnn_size, state_is_tuple=True) #DCell = rnn_cell.DropoutWrapper(lstm_cell,output_keep_prob=0.8) multi_cell = rnn_cell.MultiRNNCell([lstm_cell] * 2, state_is_tuple=True) #init_state = multi_cell.zero_state(1,tf.float32) #outputs,states = rnn.rnn(multi_cell,x,dtype=tf.float32) outputs, _ = rnn.dynamic_rnn(multi_cell, x, dtype=tf.float32) tsize = int(outputs.get_shape()[0]) #last = tf.gather(outputs,int(outputs.get_shape()[0])-1) #output = tf.matmul(outputs[-1],weights) + biases #output = tf.matmul(last,weights) + biases #output = [tf.matmul(tf.gather(outputs,i),weights)+biases for i in range(tsize)] output = tf.batch_matmul(outputs, weights) + biases output = tf.transpose(output, [1, 0, 2]) return output
def benchmarkTfRNNLSTMBlockCellTraining(self): test_configs = self._GetTestConfig() for config_name, config in test_configs.items(): num_layers = config["num_layers"] num_units = config["num_units"] batch_size = config["batch_size"] seq_length = config["seq_length"] with ops.Graph().as_default(), ops.device("/gpu:0"): inputs = seq_length * [ array_ops.zeros([batch_size, num_units], dtypes.float32) ] cell = lambda: lstm_ops.LSTMBlockCell(num_units=num_units) # pylint: disable=cell-var-from-loop multi_cell = rnn_cell.MultiRNNCell( [cell() for _ in range(num_layers)]) outputs, final_state = core_rnn.static_rnn( multi_cell, inputs, dtype=dtypes.float32) trainable_variables = ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) gradients = gradients_impl.gradients([outputs, final_state], trainable_variables) training_op = control_flow_ops.group(*gradients) self._BenchmarkOp( training_op, "tf_rnn_lstm_block_cell %s %s" % (config_name, self._GetConfigDesc(config)))
def benchmarkTfRNNLSTMTraining(self): test_configs = self._GetTestConfig() for config_name, config in test_configs.items(): num_layers = config["num_layers"] num_units = config["num_units"] batch_size = config["batch_size"] seq_length = config["seq_length"] with ops.Graph().as_default(), ops.device("/gpu:0"): inputs = seq_length * [ array_ops.zeros([batch_size, num_units], dtypes.float32) ] initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=127) cell = rnn_cell.LSTMCell(num_units=num_units, initializer=initializer, state_is_tuple=True) multi_cell = rnn_cell.MultiRNNCell( [cell() for _ in range(num_layers)]) outputs, final_state = core_rnn.static_rnn( multi_cell, inputs, dtype=dtypes.float32) trainable_variables = ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) gradients = gradients_impl.gradients([outputs, final_state], trainable_variables) training_op = control_flow_ops.group(*gradients) self._BenchmarkOp( training_op, "tf_rnn_lstm %s %s" % (config_name, self._GetConfigDesc(config)))
def RNN(x): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define a lstm cell with tensorflow lstm_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden) cell = rnn_cell.MultiRNNCell([lstm_cell] * 2) # Get lstm cell output outputs, states = rnn.rnn(cell, x, dtype=tf.float32) weights_out = tf.get_variable( name="weights_out", shape=[n_hidden, n_classes], initializer=tf.truncated_normal_initializer()) biases_out = tf.get_variable(name="biases_out", shape=[n_classes], initializer=tf.truncated_normal_initializer()) # Linear activation, using rnn inner loop last output return tf.sigmoid(tf.matmul(outputs[-1], weights_out) + biases_out)
def _create_encoder(self, args): # Create LSTM portion of network lstm = rnn_cell.LSTMCell(args.encoder_size, state_is_tuple=True, initializer=initializers.xavier_initializer()) self.full_lstm = rnn_cell.MultiRNNCell([lstm] * args.num_encoder_layers, state_is_tuple=True) self.lstm_state = self.full_lstm.zero_state(args.batch_size, tf.float32) # Forward pass encoder_input = tf.concat(1, [self.states_encode, self.actions_encode]) output, self.final_state = seq2seq.rnn_decoder([encoder_input], self.lstm_state, self.full_lstm) output = tf.reshape(tf.concat(1, output), [-1, args.encoder_size]) # Fully connected layer to latent variable distribution parameters W = tf.get_variable("latent_w", [args.encoder_size, 2 * args.z_dim], initializer=initializers.xavier_initializer()) b = tf.get_variable("latent_b", [2 * args.z_dim]) logits = tf.nn.xw_plus_b(output, W, b) # Separate into mean and logstd self.z_mean, self.z_logstd = tf.split(1, 2, logits)
def RNN(x, weights, biases): x = tf.reshape(x, [-1, RNN_IN_DIMENS]) x = tf.split(0, SEQUENCE_LENGTH, x) lstm_cell = rnn_cell.BasicLSTMCell(RNN_NEURONS, forget_bias = 1.0, state_is_tuple=False) stacked_lstm = rnn_cell.MultiRNNCell([lstm_cell] * RNN_LAYERS, state_is_tuple=False) outputs, states = rnn.rnn(stacked_lstm, x, dtype=tf.float32) return (outputs, states, tf.matmul(outputs[-1], weights['out']) + biases['out'])
def model(self): """ Builds the Tensorflow graph :return: """ print('Building model\n') # We don't want to modify to original tensor x = self.x # Reshape input into a list of tensors of the correct size x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, INPUT_SIZE]) # Since we're using one pixel at a time, transform list of vector of # 784x1 x = tf.split(0, STEPS, x) # Define LSTM cells and get outputs list and states gru = rnn_cell.GRUCell(self.num_hid_units) gru = rnn_cell.DropoutWrapper(gru, output_keep_prob=1) gru = rnn_cell.MultiRNNCell([gru] * self.num_hid_layers) outputs, state = rnn.rnn(gru, x, dtype=tf.float32) # First affine-transformation - output from last input y1 = tf.matmul(outputs[-1], self.weights_H2L) + self.bias_H2L y2 = tf.nn.relu(y1) y_pred = tf.matmul(y2, self.weights_L2O) + self.bias_L2O return y_pred
def RNN(x, is_training, weights, biases): x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, n_input]) x = tf.split(0, n_time_step, x) lstm_cell_1 = rnn_cell.LSTMCell(n_hidden_1, forget_bias=0.8) lstm_cell_2 = rnn_cell.LSTMCell(n_hidden_2, forget_bias=0.8) if is_training and keep_prob < 1: lstm_cell_1 = rnn_cell.DropoutWrapper(lstm_cell_1, output_keep_prob=keep_prob) lstm_cell_2 = rnn_cell.DropoutWrapper(lstm_cell_2, output_keep_prob=keep_prob) cell = rnn_cell.MultiRNNCell([lstm_cell_1, lstm_cell_2]) #if is_training and keep_prob < 1: # x = tf.nn.dropout(x,keep_prob) #initial_state = cell.zero_state(batch_size,tf.float32) #state = initial_state output = [] output, states = rnn.rnn(cell, x, dtype=tf.float32) #outputs = tf.reshape(tf.concat(1,output),[-1,n_hidden_2]) #maybe a softmax return tf.matmul(output[-1], weights['out']) + biases['out']
def _create_lstm_policy(self, args): # Create LSTM portion of network lstm = rnn_cell.LSTMCell(args.policy_size, state_is_tuple=True, initializer=initializers.xavier_initializer()) self.full_lstm = rnn_cell.MultiRNNCell([lstm] * args.num_policy_layers, state_is_tuple=True) self.lstm_state = self.full_lstm.zero_state(args.batch_size, tf.float32) # Forward pass policy_input = self.states output, self.final_state = seq2seq.rnn_decoder([policy_input], self.lstm_state, self.full_lstm) output = tf.reshape(tf.concat(1, output), [-1, args.policy_size]) # Fully connected layer to latent variable distribution parameters W = tf.get_variable("lstm_w", [args.policy_size, args.action_dim], initializer=initializers.xavier_initializer()) b = tf.get_variable("lstm_b", [args.action_dim]) self.a_mean = tf.nn.xw_plus_b(output, W, b) # Initialize logstd self.a_logstd = tf.Variable(np.zeros(args.action_dim), name="a_logstd", dtype=tf.float32)
def RNN(x, weights, biases, type, layer_norm): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define a lstm cell with tensorflow cell_class_map = { "LSTM": rnn_cell.BasicLSTMCell(n_hidden), "GRU": rnn_cell.GRUCell(n_hidden), "BasicRNN": rnn_cell.BasicRNNCell(n_hidden), "LNGRU": LNGRUCell(n_hidden), "LNLSTM": LNBasicLSTMCell(n_hidden), 'HyperLnLSTMCell':HyperLnLSTMCell(n_hidden, is_layer_norm = layer_norm)} lstm_cell = cell_class_map.get(type) cell = rnn_cell.MultiRNNCell([lstm_cell] * FLAGS.layers) print "Using %s model" % type # Get lstm cell output outputs, states = rnn.rnn(cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def RNN(x): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define a lstm cell with tensorflow lstm_cell = tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True) cell = rnn_cell.MultiRNNCell([lstm_cell] * 3, state_is_tuple=True) # Get lstm cell output outputs, states = rnn.rnn(cell, x, dtype=tf.float32) weights_2 = tf.get_variable(name="weights_2", shape=[n_hidden, 2],\ initializer=tf.truncated_normal_initializer()) biases_2 = tf.get_variable(name="biases_2", shape=[2],\ initializer=tf.truncated_normal_initializer()) weights_1 = tf.get_variable(name="weights_1", shape=[2, 1],\ initializer=tf.truncated_normal_initializer()) biases_1 = tf.get_variable(name="biases_1", shape=[1],\ initializer=tf.truncated_normal_initializer()) drawing_layer = tf.sigmoid(tf.matmul(outputs[-1], weights_2) + biases_2) # Linear activation, using rnn inner loop last output return tf.sigmoid(tf.matmul(drawing_layer, weights_1) + biases_1), drawing_layer
def testLSTMBlockCell(self): with self.session(use_gpu=True, graph=ops.Graph()) as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 2]) m0 = array_ops.zeros([1, 2]) m1 = array_ops.zeros([1, 2]) m2 = array_ops.zeros([1, 2]) m3 = array_ops.zeros([1, 2]) g, ((out_m0, out_m1), (out_m2, out_m3)) = rnn_cell.MultiRNNCell( [lstm_ops.LSTMBlockCell(2) for _ in range(2)], state_is_tuple=True)(x, ((m0, m1), (m2, m3))) sess.run([variables.global_variables_initializer()]) res = sess.run( [g, out_m0, out_m1, out_m2, out_m3], { x.name: np.array([[1., 1.]]), m0.name: 0.1 * np.ones([1, 2]), m1.name: 0.1 * np.ones([1, 2]), m2.name: 0.1 * np.ones([1, 2]), m3.name: 0.1 * np.ones([1, 2]) }) self.assertEqual(len(res), 5) self.assertAllClose(res[0], [[0.24024698, 0.24024698]]) # These numbers are from testBasicLSTMCell and only test c/h. self.assertAllClose(res[1], [[0.68967271, 0.68967271]]) self.assertAllClose(res[2], [[0.44848421, 0.44848421]]) self.assertAllClose(res[3], [[0.39897051, 0.39897051]]) self.assertAllClose(res[4], [[0.24024698, 0.24024698]])
def RNN(x, weight, bias): cell = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True) cell = rnn_cell.MultiRNNCell([cell] * 2) output, state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32) output = tf.transpose(output, [1, 0, 2]) last = tf.gather(output, int(output.get_shape()[0]) - 1) return tf.nn.softmax(tf.matmul(last, weight) + bias)
def _build_graph(self, input_vars): input, nextinput = input_vars cell = rnn_cell.BasicLSTMCell(num_units=param.rnn_size) cell = rnn_cell.MultiRNNCell([cell] * param.num_rnn_layer) self.initial = initial = cell.zero_state( tf.shape(input)[0], tf.float32) embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size]) input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x rnnsize input_list = tf.split(1, param.seq_len, input_feature) #seqlen x (Bx1xrnnsize) input_list = [tf.squeeze(x, [1]) for x in input_list] # seqlen is 1 in inference. don't need loop_function outputs, last_state = rnn.rnn(cell, input_list, initial, scope='rnnlm') self.last_state = tf.identity(last_state, 'last_state') # seqlen x (Bxrnnsize) output = tf.reshape(tf.concat(1, outputs), [-1, param.rnn_size]) # (Bxseqlen) x rnnsize logits = FullyConnected('fc', output, param.vocab_size, nl=tf.identity) self.prob = tf.nn.softmax(logits / param.softmax_temprature) xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits, symbolic_functions.flatten(nextinput)) self.cost = tf.reduce_mean(xent_loss, name='cost') summary.add_param_summary([('.*/W', ['histogram']) ]) # monitor histogram of all W
def model(self): print('Building model\n') # We don't want to modify to original tensor x = self.x # Reshape input into a list of tensors of the correct size x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, INPUT_SIZE]) # Since we're using one pixel at a time, transform list of vector of # 784x1 x = tf.split(0, STEPS, x) # Define LSTM cells and get outputs list and states gru = rnn_cell.GRUCell(self.num_hid_units) gru = rnn_cell.DropoutWrapper(gru, output_keep_prob=1) if self.num_hid_layers > 1: gru = rnn_cell.MultiRNNCell([gru] * self.num_hid_layers) outputs, state = rnn.rnn(gru, x, dtype=tf.float32) # Turn result back into [batch_size, steps, hidden_units] format. outputs = tf.transpose(outputs, [1, 0, 2]) # Flatten into [batch_size x steps, hidden_units] to allow matrix # multiplication outputs = tf.reshape(outputs, [-1, self.num_hid_units]) # Apply affine transformation to reshape output [batch_size x steps, 1] y1 = tf.matmul(outputs, self.weights_H2O) + self.bias_H2O y1 = tf.reshape(y1, [-1, STEPS]) # Keep prediction (sigmoid applied) and non-sigmoid (apply sigmoid in # cost function) y_ns = y1[:, :783] y_pred = tf.sigmoid(y1)[:, :783] return y_ns, y_pred
def _get_rnn_cell(cell_type, num_units, num_layers): """Constructs and return an `RNNCell`. Args: cell_type: either a string identifying the `RNNCell` type, or a subclass of `RNNCell`. num_units: the number of units in the `RNNCell`. num_layers: the number of layers in the RNN. Returns: An initialized `RNNCell`. Raises: ValueError: `cell_type` is an invalid `RNNCell` name. TypeError: `cell_type` is not a string or a subclass of `RNNCell`. """ if isinstance(cell_type, str): cell_type = _CELL_TYPES.get(cell_type) if cell_type is None: raise ValueError('The supported cell types are {}; got {}'.format( list(_CELL_TYPES.keys()), cell_type)) if not issubclass(cell_type, rnn_cell.RNNCell): raise TypeError( 'cell_type must be a subclass of RNNCell or one of {}.'.format( list(_CELL_TYPES.keys()))) cell = cell_type(num_units=num_units) if num_layers > 1: cell = rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True) return cell
def __init__(self, rnn_size, num_layers, batch_size, seq_length, vocab_size, grad_clip,\ infer=False): """ Constructor for an RNN using LSTMs. @param rnn_size: The size of the RNN @param num_layers: The number of layers for the RNN to have @param batch_size: The batch size to train with @param seq_length: The length of the sequences to use in training @param vocab_size: The size of the vocab @param grad_clip: The point at which to clip the gradient in the gradient descent @param infer: """ #TODO: During training, (and when sampling), the input to the RNN should be # the list of ingredients that goes with that recipe text. if infer: batch_size = 1 seq_length = 1 cell_fn = rnn_cell.GRUCell #BasicLSTMCell cell = cell_fn(rnn_size) self.cell = cell = rnn_cell.MultiRNNCell([cell] * num_layers) self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length]) self.targets = tf.placeholder(tf.int32, [batch_size, seq_length]) self.initial_state = cell.zero_state(batch_size, tf.float32) with tf.variable_scope("rnnlm"): softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size]) softmax_b = tf.get_variable("softmax_b", [vocab_size]) with (tf.device("/cpu:0")): embedding = tf.get_variable("embedding", [vocab_size, rnn_size]) inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(\ embedding, self.input_data)) inputs = [tf.squeeze(inp, [1]) for inp in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) loop_func = loop if infer else None outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state,\ cell, loop_function=loop_func, scope="rnnlm") output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example([self.logits],\ [tf.reshape(self.targets, [-1])],\ [tf.ones([batch_size * seq_length])], vocab_size) self.cost = tf.reduce_sum(loss) / batch_size / seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))