def __init__(self, is_training, glove_word_vectors, vocabulary, config): self.size = config.hidden_size self.config = config self.is_training = is_training self.word_vec_size = config.word_vec_size vocab_size = config.vocab_size self.glove_word_vectors = glove_word_vectors self.vocabulary = vocabulary # Slightly better results can be obtained with forget gate biases # initialized to 1 but the hyperparameters of the model would need to be # different than reported in the paper. # TODO: these might be able to be improved if used the LSTMCell which has other features # to improve performance, but then need the sentence_length with tf.variable_scope("LeftLSTM"): self.left_lstm_cell = rnn_cell.BasicLSTMCell(self.size, forget_bias=1.0) with tf.variable_scope("RightLSTM"): self.right_lstm_cell = rnn_cell.BasicLSTMCell(self.size, forget_bias=1.0) if is_training and config.keep_prob < 1: with tf.variable_scope("LeftLSTM"): self.left_lstm_cell = rnn_cell.DropoutWrapper( self.left_lstm_cell, output_keep_prob=config.keep_prob) with tf.variable_scope("RightLSTM"): self.right_lstm_cell = rnn_cell.DropoutWrapper( self.right_lstm_cell, output_keep_prob=config.keep_prob) with tf.variable_scope("LeftLSTM"): self.left_lstm_cell = rnn_cell.MultiRNNCell([self.left_lstm_cell] * config.num_layers) with tf.variable_scope("RightLSTM"): self.right_lstm_cell = rnn_cell.MultiRNNCell( [self.right_lstm_cell] * config.num_layers)
def __init__(self, rnn_size, rnn_layer, batch_size, input_embedding_size, dim_image, dim_hidden, max_words_q, vocabulary_size, drop_out_rate): self.rnn_size = rnn_size self.rnn_layer = rnn_layer self.batch_size = batch_size self.input_embedding_size = input_embedding_size self.dim_image = dim_image self.dim_hidden = dim_hidden self.max_words_q = max_words_q self.vocabulary_size = vocabulary_size self.drop_out_rate = drop_out_rate # 问题embedding self.embed_ques_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_ques_W') # RNN编码器 self.lstm_1 = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True) self.lstm_dropout_1 = rnn_cell.DropoutWrapper(self.lstm_1, output_keep_prob = 1 - self.drop_out_rate) self.lstm_2 = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True) self.lstm_dropout_2 = rnn_cell.DropoutWrapper(self.lstm_2, output_keep_prob = 1 - self.drop_out_rate) self.stacked_lstm = rnn_cell.MultiRNNCell([self.lstm_dropout_1, self.lstm_dropout_2]) # 状态embedding self.embed_state_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_state_W') self.embed_state_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_state_b') # 图像embedding self.embed_image_W = tf.Variable(tf.random_uniform([dim_image, self.dim_hidden], -0.08, 0.08), name='embed_image_W') self.embed_image_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_image_b') # 打分embedding self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W') self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b')
def build_lm_multicell_rnn(num_layers, hidden_size, word_proj_size, use_lstm=True, hidden_projection=None, input_feeding=False, dropout=0.0): if use_lstm: print("I'm building the model with LSTM cells") cell_class = rnn_cell.LSTMCell else: print("I'm building the model with GRU cells") if hidden_projection is not None: print("I'm ignoring the projection size for GRUs.") hidden_projection = None cell_class = GRUCell initializer = tf.random_uniform_initializer(minval=-0.1, maxval=0.1, seed=1234) if input_feeding: lm_cell0 = cell_class(num_units=hidden_size, input_size=word_proj_size + hidden_size, initializer=initializer, num_proj=hidden_projection) else: lm_cell0 = cell_class(num_units=hidden_size, input_size=hidden_size, initializer=initializer, num_proj=hidden_projection) lm_cell0 = rnn_cell.DropoutWrapper(lm_cell0, output_keep_prob=1.0 - dropout) if num_layers > 1: hidden_input = hidden_size if hidden_projection is not None: hidden_input = hidden_projection lm_cell1 = cell_class(num_units=hidden_size, input_size=hidden_input, initializer=initializer, num_proj=hidden_projection) lm_cell1 = rnn_cell.DropoutWrapper(lm_cell1, output_keep_prob=1.0 - dropout) lm_rnncell = rnn_cell.MultiRNNCell([lm_cell0] + [lm_cell1] * (num_layers - 1)) else: lm_rnncell = rnn_cell.MultiRNNCell([lm_cell0]) return lm_rnncell
def build_nmt_multicell_rnn(num_layers_encoder, num_layers_decoder, encoder_size, decoder_size, source_proj_size, use_lstm=True, input_feeding=True, dropout=0.0): if use_lstm: print("I'm building the model with LSTM cells") cell_class = rnn_cell.LSTMCell else: print("I'm building the model with GRU cells") cell_class = GRUCell initializer = tf.random_uniform_initializer(minval=-0.1, maxval=0.1, seed=1234) encoder_cell = cell_class(num_units=encoder_size, input_size=source_proj_size, initializer=initializer) if input_feeding: decoder_cell0 = cell_class(num_units=decoder_size, input_size=decoder_size * 2, initializer=initializer) else: decoder_cell0 = cell_class(num_units=decoder_size, input_size=decoder_size, initializer=initializer) # if dropout > 0.0: # if dropout is 0.0, it is turned off encoder_cell = rnn_cell.DropoutWrapper(encoder_cell, output_keep_prob=1.0 - dropout) encoder_rnncell = rnn_cell.MultiRNNCell([encoder_cell] * num_layers_encoder) decoder_cell0 = rnn_cell.DropoutWrapper(decoder_cell0, output_keep_prob=1.0 - dropout) if num_layers_decoder > 1: decoder_cell1 = cell_class(num_units=decoder_size, input_size=decoder_size, initializer=initializer) decoder_cell1 = rnn_cell.DropoutWrapper(decoder_cell1, output_keep_prob=1.0 - dropout) decoder_rnncell = rnn_cell.MultiRNNCell([decoder_cell0] + [decoder_cell1] * (num_layers_decoder - 1)) else: decoder_rnncell = rnn_cell.MultiRNNCell([decoder_cell0]) return encoder_rnncell, decoder_rnncell
def __init__(self, dim_image, n_words, dim_hidden, batch_size, n_lstm_steps, drop_out_rate, bias_init_vector=None): self.dim_image = dim_image self.n_words = n_words self.dim_hidden = dim_hidden self.batch_size = batch_size self.n_lstm_steps = n_lstm_steps self.drop_out_rate = drop_out_rate with tf.device("/cpu:0"): self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_hidden], -0.1, 0.1), name='Wemb') self.lstm3 = rnn_cell.LSTMCell(self.dim_hidden, 2 * self.dim_hidden, use_peepholes=True) self.lstm3_dropout = rnn_cell.DropoutWrapper(self.lstm3, output_keep_prob=1 - self.drop_out_rate) self.encode_image_W = tf.Variable(tf.random_uniform( [dim_image, dim_hidden], -0.1, 0.1), name='encode_image_W') self.encode_image_b = tf.Variable(tf.zeros([dim_hidden]), name='encode_image_b') self.embed_att_w = tf.Variable(tf.random_uniform([dim_hidden, 1], -0.1, 0.1), name='embed_att_w') self.embed_att_Wa = tf.Variable(tf.random_uniform( [dim_hidden, dim_hidden], -0.1, 0.1), name='embed_att_Wa') self.embed_att_Ua = tf.Variable(tf.random_uniform( [dim_hidden, dim_hidden], -0.1, 0.1), name='embed_att_Ua') self.embed_att_ba = tf.Variable(tf.zeros([dim_hidden]), name='embed_att_ba') self.embed_word_W = tf.Variable(tf.random_uniform( [dim_hidden, n_words], -0.1, 0.1), name='embed_word_W') if bias_init_vector is not None: self.embed_word_b = tf.Variable(bias_init_vector.astype( np.float32), name='embed_word_b') else: self.embed_word_b = tf.Variable(tf.zeros([n_words]), name='embed_word_b') self.embed_nn_Wp = tf.Variable(tf.random_uniform( [3 * dim_hidden, dim_hidden], -0.1, 0.1), name='embed_nn_Wp') self.embed_nn_bp = tf.Variable(tf.zeros([dim_hidden]), name='embed_nn_bp')
def testDropout(self): cell = Plus1RNNCell() full_dropout_cell = rnn_cell.DropoutWrapper(cell, input_keep_prob=1e-12, seed=0) batch_size = 2 inputs = [tf.placeholder(tf.float32, shape=(batch_size, 5))] * 10 with tf.variable_scope("share_scope"): outputs, states = rnn.rnn(cell, inputs, dtype=tf.float32) with tf.variable_scope("drop_scope"): dropped_outputs, _ = rnn.rnn(full_dropout_cell, inputs, dtype=tf.float32) self.assertEqual(len(outputs), len(inputs)) for out, inp in zip(outputs, inputs): self.assertEqual(out.get_shape().as_list(), inp.get_shape().as_list()) self.assertEqual(out.dtype, inp.dtype) with self.test_session(use_gpu=False) as sess: input_value = np.random.randn(batch_size, 5) values = sess.run(outputs + [states[-1]], feed_dict={inputs[0]: input_value}) full_dropout_values = sess.run(dropped_outputs, feed_dict={inputs[0]: input_value}) for v in values[:-1]: self.assertAllClose(v, input_value + 1.0) for d_v in full_dropout_values[: -1]: # Add 1.0 to dropped_out (all zeros) self.assertAllClose(d_v, np.ones_like(input_value))
def __init__(self, vocab_size, size=256, depth=2, learning_rate=1e-4, batch_size=32, keep_prob=0.1, num_steps=100, checkpoint_dir="checkpoint", forward_only=False): """Initialize the parameters for an Deep Bidirectional LSTM model. Args: vocab_size: int, The dimensionality of the input vocab size: int, The dimensionality of the inputs into the Deep LSTM cell [32, 64, 256] learning_rate: float, [1e-3, 5e-4, 1e-4, 5e-5] batch_size: int, The size of a batch [16, 32] keep_prob: unit Tensor or float between 0 and 1 [0.0, 0.1, 0.2] num_steps: int, The max time unit [100] """ super(DeepBiLSTM, self).__init__() self.vocab_size = int(vocab_size) self.size = int(size) self.depth = int(depth) self.learning_rate = float(learning_rate) self.batch_size = int(batch_size) self.keep_prob = float(keep_prob) self.num_steps = int(seq_length) self.inputs = tf.placeholder(tf.int32, [self.batch_size, self.num_steps]) self.input_lengths = tf.placeholder(tf.int64, [self.batch_size]) with tf.device("/cpu:0"): self.emb = tf.Variable(tf.truncated_normal( [self.vocab_size, self.size], -0.1, 0.1), name='emb') import ipdb ipdb.set_trace() self.embed_inputs = tf.nn.embedding_lookup( self.emb, tf.transpose(self.inputs)) self.cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) self.stacked_cell = rnn_cell.MultiRNNCell([self.cell] * depth) self.initial_state = self.stacked_cell.zero_state( batch_size, tf.float32) if not forward_only and self.keep_prob < 1: lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob) self.outputs, self.states = rnn.rnn(self.stacked_cell, tf.unpack(self.embed_inputs), dtype=tf.float32, sequence_length=self.input_lengths, initial_state=self.initial_state) output = tf.reduce_sum(tf.pack(self.output), 0)
def _shared_layer(input_data, config): """Build the model to decoding Args: input_data = size batch_size X num_steps X embedding size Returns: output units """ cell = rnn_cell.BasicLSTMCell(config.encoder_size) inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, config.num_steps, input_data) ] if is_training and config.keep_prob < 1: cell = rnn_cell.DropoutWrapper( cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([cell] * config.num_shared_layers) initial_state = cell.zero_state(config.batch_size, tf.float32) encoder_outputs, encoder_states = rnn.rnn( cell, inputs, initial_state=initial_state, scope="encoder_rnn") return encoder_outputs, initial_state
def prediction(self): fw_cell = rnn_cell.LSTMCell(self._num_hidden) fw_cell = rnn_cell.DropoutWrapper(fw_cell, output_keep_prob=self.dropout) bw_cell = rnn_cell.LSTMCell(self._num_hidden) bw_cell = rnn_cell.DropoutWrapper(bw_cell, output_keep_prob=self.dropout) if self._num_layers > 1: fw_cell = rnn_cell.MultiRNNCell([fw_cell] * self._num_layers) bw_cell = rnn_cell.MultiRNNCell([bw_cell] * self._num_layers) output, _, _ = rnn.bidirectional_rnn(fw_cell, bw_cell, tf.unpack(tf.transpose(self.data, perm=[1, 0, 2])), dtype=tf.float32, sequence_length=self.length) max_length = int(self.target.get_shape()[1]) num_classes = int(self.target.get_shape()[2]) weight, bias = self._weight_and_bias(2*self._num_hidden, num_classes) output = tf.reshape(tf.transpose(tf.pack(output), perm=[1, 0, 2]), [-1, 2*self._num_hidden]) prediction = tf.nn.softmax(tf.matmul(output, weight) + bias) prediction = tf.reshape(prediction, [-1, max_length, num_classes]) return prediction
def BiRNN(self, _X, _istate_fw, _istate_bw, _weights, _biases): # input shape: (batch_size, n_steps, n_input) _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size # Reshape to prepare input to hidden activation # (n_steps*batch_size, n_input) _X = tf.reshape(_X, [-1, self.config.num_input]) # Linear activation _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] # Forward direction cell single_fw_cell = rnn_cell.BasicLSTMCell(self.config.num_hidden) single_fw_cell = rnn_cell.DropoutWrapper(single_fw_cell, self.config.input_keep_prob, self.config.output_keep_prob, 0.8) rnn_fw_cell = rnn_cell.MultiRNNCell([single_fw_cell] * self.config.model_depth) # Backward direction cell single_bw_cell = rnn_cell.BasicLSTMCell(self.config.num_hidden) single_bw_cell = rnn_cell.DropoutWrapper(single_bw_cell, self.config.input_keep_prob, self.config.output_keep_prob) rnn_bw_cell = rnn_cell.MultiRNNCell([single_bw_cell] * self.config.model_depth) # Split data because rnn cell needs a list of inputs for the RNN inner # loop # n_steps * (batch_size, n_hidden) _X = tf.split(0, self.config.num_steps, _X) # Get lstm cell output outputs, final_fw, final_bw = rnn.bidirectional_rnn( rnn_fw_cell, rnn_bw_cell, _X, initial_state_fw=_istate_fw, initial_state_bw=_istate_bw) # Linear activation return [ tf.matmul(output, _weights['out']) + _biases['out'] for output in outputs ], final_fw, final_bw
def __init__(self, config, is_training): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) if is_training and config.keep_prob < 1: cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=config.keep_prob) self.cell = cell self.input_data = tf.placeholder(dtype=tf.float32, shape=[None, num_steps, 1]) self.target_data = tf.placeholder(dtype=tf.float32, shape=[None, num_steps, 1]) self.initial_state = cell.zero_state(batch_size=config.batch_size, dtype=tf.float32) inputs = tf.split(1, num_steps, self.input_data) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] with tf.variable_scope('rnnvm'): output_w = tf.get_variable("output_w", [size, 1]) output_b = tf.get_variable("output_b", [1]) outputs, states = seq2seq.rnn_decoder(inputs, self.initial_state, cell, scope='rnnvm') output = tf.reshape(tf.concat(1, outputs), [-1, size]) output = tf.nn.xw_plus_b(output, output_w, output_b) entropy = tf.nn.sigmoid_cross_entropy_with_logits( output, tf.reshape(self.target_data, shape=[num_steps * batch_size, 1])) self.cost = cost = tf.reduce_mean(entropy) self.final_state = states[-1] if not is_training: return self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def _chunk_private(encoder_units, pos_prediction, config): """Decode model for chunks Args: encoder_units - these are the encoder units: [batch_size X encoder_size] with the one the pos prediction pos_prediction: must be the same size as the encoder_size returns: logits """ # concatenate the encoder_units and the pos_prediction pos_prediction = tf.reshape( pos_prediction, [batch_size, num_steps, pos_embedding_size]) chunk_inputs = tf.concat(2, [pos_prediction, encoder_units]) with tf.variable_scope("chunk_decoder"): cell = rnn_cell.BasicLSTMCell(config.chunk_decoder_size, forget_bias=1.0) if is_training and config.keep_prob < 1: cell = rnn_cell.DropoutWrapper( cell, output_keep_prob=config.keep_prob) initial_state = cell.zero_state(config.batch_size, tf.float32) # this function puts the 3d tensor into a 2d tensor: batch_size x input size inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, config.num_steps, chunk_inputs) ] decoder_outputs, decoder_states = rnn.rnn( cell, inputs, initial_state=initial_state, scope="chunk_rnn") output = tf.reshape(tf.concat(1, decoder_outputs), [-1, config.chunk_decoder_size]) softmax_w = tf.get_variable( "softmax_w", [config.chunk_decoder_size, config.num_chunk_tags]) softmax_b = tf.get_variable("softmax_b", [config.num_chunk_tags]) logits = tf.matmul(output, softmax_w) + softmax_b return logits, decoder_states
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size self._input_data = tf.placeholder(tf.float32, [batch_size, num_steps]) self._targets = tf.placeholder(tf.float32, [batch_size, num_steps]) lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) if is_training and config.keep_prob < 1: lstm_cell = rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self._initial_state = cell.zero_state(batch_size, tf.float32) iw = tf.get_variable("input_w", [1, size]) ib = tf.get_variable("input_b", [size]) inputs = [ tf.nn.xw_plus_b(i_, iw, ib) for i_ in tf.split(1, num_steps, self._input_data) ] if is_training and config.keep_prob < 1: inputs = [ tf.nn.dropout(input_, config.keep_prob) for input_ in inputs ] outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state) rnn_output = tf.reshape(tf.concat(1, outputs), [-1, size]) self._output = output = tf.nn.xw_plus_b( rnn_output, tf.get_variable("out_w", [size, 1]), tf.get_variable("out_b", [1])) self._cost = cost = tf.reduce_mean( tf.square(output - tf.reshape(self._targets, [-1]))) self._final_state = states[-1] if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) #optimizer = tf.train.GradientDescentOptimizer(self.lr) optimizer = tf.train.AdamOptimizer(self.lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars))
def _testDoubleInputWithDropoutAndDynamicCalculation(self, use_gpu): """Smoke test for using LSTM with doubles, dropout, dynamic calculation.""" num_units = 3 input_size = 5 batch_size = 2 num_proj = 4 num_proj_shards = 4 num_unit_shards = 2 with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: sequence_length = tf.placeholder(tf.int64) initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed) inputs = 10 * [tf.placeholder(tf.float64)] cell = rnn_cell.LSTMCell(num_units, input_size=input_size, use_peepholes=True, num_proj=num_proj, num_unit_shards=num_unit_shards, num_proj_shards=num_proj_shards, initializer=initializer) dropout_cell = rnn_cell.DropoutWrapper(cell, 0.5, seed=0) outputs, states = rnn.rnn(dropout_cell, inputs, sequence_length=sequence_length, initial_state=cell.zero_state( batch_size, tf.float64)) self.assertEqual(len(outputs), len(inputs)) self.assertEqual(len(outputs), len(states)) tf.initialize_all_variables().run( feed_dict={sequence_length: [2, 3]}) input_value = np.asarray(np.random.randn(batch_size, input_size), dtype=np.float64) values = sess.run(outputs, feed_dict={ inputs[0]: input_value, sequence_length: [2, 3] }) state_values = sess.run(states, feed_dict={ inputs[0]: input_value, sequence_length: [2, 3] }) self.assertEqual(values[0].dtype, input_value.dtype) self.assertEqual(state_values[0].dtype, input_value.dtype)
def create_cell(input_size): if cell_type == "vanilla": cell_class = rnn_cell.BasicRNNCell elif cell_type == "gru": cell_class = rnn_cell.BasicGRUCell elif cell_type == "lstm": cell_class = rnn_cell.BasicLSTMCell else: raise Exception("Invalid cell type: {}".format(cell_type)) cell = cell_class(hidden_size, input_size = input_size) if training: return rnn_cell.DropoutWrapper(cell, output_keep_prob = dropout_prob) else: return cell
def prediction(self): # Recurrent network. network = rnn_cell.GRUCell(self._num_hidden) network = rnn_cell.DropoutWrapper( network, output_keep_prob=self.dropout) network = rnn_cell.MultiRNNCell([network] * self._num_layers) output, _ = rnn.dynamic_rnn(network, data, dtype=tf.float32) # Softmax layer. max_length = int(self.target.get_shape()[1]) num_classes = int(self.target.get_shape()[2]) weight, bias = self._weight_and_bias(self._num_hidden, num_classes) # Flatten to apply same weights to all time steps. output = tf.reshape(output, [-1, self._num_hidden]) prediction = tf.nn.softmax(tf.matmul(output, weight) + bias) prediction = tf.reshape(prediction, [-1, max_length, num_classes]) return prediction
def __init__(self, is_training, config): # 类似于C++的“构造函数” self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size self._input_data = tf.placeholder(tf.float32, [batch_size, num_steps]) # 输入batch_size×num_steps个数据,输出个数相同 self._targets = tf.placeholder(tf.float32, [batch_size, num_steps]) # placeholder:训练时需要传进真实数据的参数 # lstm_cell = rnn_cell.BasicRNNCell(size) # 封装好的普通RNN单元 lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) # 封装好的LSTM单元 if is_training and config.keep_prob < 1: lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) # 多个RNN单元 self._initial_state = cell.zero_state(batch_size, tf.float32) # 以下是RNN_LSTM算法核心:最简单的线性函数y=wx+b(太简单了,精度不够?) iw = tf.get_variable("input_w", [1, size]) ib = tf.get_variable("input_b", [size]) inputs = [tf.nn.xw_plus_b(i_, iw, ib) for i_ in tf.split(1, num_steps, self._input_data)] # split沿列均匀分割成num_steps个张量(矩阵) if is_training and config.keep_prob < 1: inputs = [tf.nn.dropout(input_, config.keep_prob) for input_ in inputs] outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state) # c_out = tf.concat(1, outputs) # outputs:p个m×n; c_out:m × n*p # c_out = tf.concat(0, outputs) # outputs:p个m×n; c_out:m*p × n rnn_output = tf.reshape(tf.concat(1, outputs), [-1, size]) # [-1, size]:保持总元素个数不变,size=1×200表示一个数的权重 # rnn_output:所得到的是n×200,代表有n个输出(对应n个输入),每个输出由1×200的一维向量表示 # output:神经元计算的最终输出结果(即我们需要的结果),输出个数与输入个数相同 self._output = output = tf.nn.xw_plus_b(rnn_output, tf.get_variable("out_w", [size, 1]), tf.get_variable("out_b", [1])) self._cost = cost = tf.sqrt(tf.reduce_mean((output - tf.reshape(self._targets, [-1]))**2)) # 均方根误差RMSE:平均单个数据的实际值与预测值之间的偏差 self._cost_MAPE = cost_MAPE = tf.reduce_mean(tf.abs(output - tf.reshape(self._targets, [-1])) / tf.reshape(self._targets, [-1])) self._final_state = states if not is_training: # 验证/测试或着真实预测时,则不更新权重(即不执行下面的语句) return # 训练网络,反向传播,更新权重 self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self.lr) # optimizer = tf.train.AdamOptimizer(self.lr) # 反向传播,更新权重 self._train_op = optimizer.apply_gradients(zip(grads, tvars))
def testDropoutWrapper(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 3]) m = tf.zeros([1, 3]) keep = tf.zeros([]) + 1 g, new_m = rnn_cell.DropoutWrapper(rnn_cell.GRUCell(3), keep, keep)(x, m) sess.run([tf.variables.initialize_all_variables()]) res = sess.run( [g, new_m], { x.name: np.array([[1., 1., 1.]]), m.name: np.array([[0.1, 0.1, 0.1]]) }) self.assertEqual(res[1].shape, (1, 3)) # The numbers in results were not calculated, this is just a smoke test. self.assertAllClose(res[0], [[0.154605, 0.154605, 0.154605]])
def __init__(self, is_training): # Need to define self._train_op self.batch_size = batch_size = 50 self.num_steps = num_steps = 1000 self.hidden_size = 5000 self.keep_prob = 0.5 #self.num_layers = 2 self._input_data = tf.placeholder(tf.int8, [256, batch_size, num_steps]) self._targets = tf.placeholder(tf.int8, [256, batch_size, num_steps]) logging = tf.logging lstm_cell = rnn_cell.BasicLSTMCell(self.hidden_size, forget_bias=0.0) if is_training and self.keep_prob < 1: lstm_cell = rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=self.keep_prob) #self._cell = cell = rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers) self._cell = cell = lstm_cell
def _pos_private(encoder_units, config): """Decode model for pos Args: encoder_units - these are the encoder units num_pos - the number of pos tags there are (output units) returns: logits """ with tf.variable_scope("pos_decoder"): cell = rnn_cell.BasicLSTMCell(config.pos_decoder_size, forget_bias=1.0) if is_training and config.keep_prob < 1: cell = rnn_cell.DropoutWrapper( cell, output_keep_prob=config.keep_prob) initial_state = cell.zero_state(config.batch_size, tf.float32) # puts it into batch_size X input_size inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, config.num_steps, encoder_units) ] decoder_outputs, decoder_states = rnn.rnn( cell, inputs, initial_state=initial_state, scope="pos_rnn") output = tf.reshape(tf.concat(1, decoder_outputs), [-1, config.pos_decoder_size]) softmax_w = tf.get_variable( "softmax_w", [config.pos_decoder_size, config.num_pos_tags]) softmax_b = tf.get_variable("softmax_b", [config.num_pos_tags]) logits = tf.matmul(output, softmax_w) + softmax_b return logits, decoder_states
def __init__(self, size=256, depth=3, batch_size=32, keep_prob=0.1, max_nsteps=1000, checkpoint_dir="checkpoint", forward_only=False): """Initialize the parameters for an Deep LSTM model. Args: size: int, The dimensionality of the inputs into the Deep LSTM cell [32, 64, 256] learning_rate: float, [1e-3, 5e-4, 1e-4, 5e-5] batch_size: int, The size of a batch [16, 32] keep_prob: unit Tensor or float between 0 and 1 [0.0, 0.1, 0.2] max_nsteps: int, The max time unit [1000] """ super(DeepLSTM, self).__init__() self.size = int(size) self.depth = int(depth) self.batch_size = int(batch_size) self.output_size = self.depth * self.size self.keep_prob = float(keep_prob) self.max_nsteps = int(max_nsteps) self.checkpoint_dir = checkpoint_dir start = time.clock() print(" [*] Building Deep LSTM...") self.cell = LSTMCell(size, forget_bias=0.0) if not forward_only and self.keep_prob < 1: self.cell = rnn_cell.DropoutWrapper(self.cell, output_keep_prob=keep_prob) self.stacked_cell = MultiRNNCellWithSkipConn([self.cell] * depth) self.initial_state = self.stacked_cell.zero_state( batch_size, tf.float32)
label_input_size = sentence_length + 1 train1_input_size = sentence_length train2_input_size = train_input_size - train1_input_size graph = tf.Graph() with graph.as_default(): # Dropout keep_prob = tf.placeholder(tf.float32) # Parameters: # Definition of the LSTM cells lstm = rnn_cell.BasicLSTMCell(num_nodes) if keep_prob < 1: lstm = rnn_cell.DropoutWrapper(lstm, output_keep_prob=keep_prob) stacked_lstm = rnn_cell.MultiRNNCell([lstm] * number_of_layers) # Variables saving state across unrollings. saved_output = tf.Variable(tf.zeros([batch_size, num_nodes]), trainable=False) saved_state = tf.Variable(tf.zeros( [batch_size, num_nodes * (2 * number_of_layers)]), trainable=False) # Embedding variables embeddings = tf.Variable( tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) x_embeddings = tf.Variable( tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
def __init__(self, is_training, config): self._batch_size = batch_size = config.batch_size self._min_lr = config.min_lr self.num_skills = num_skills = config.num_skills self.hidden_size = config.hidden_size size = config.hidden_size input_size = num_skills*2 inputs = self._input_data = tf.placeholder(tf.int32, [batch_size]) self._target_id = target_id = tf.placeholder(tf.int32, [batch_size]) self._target_correctness = target_correctness = tf.placeholder(tf.float32, [batch_size]) hidden1 = rnn_cell.LSTMCell(size, input_size) #hidden2 = rnn_cell.LSTMCell(size, size) #hidden3 = rnn_cell.LSTMCell(size, size) #add dropout layer between hidden layers if is_training and config.keep_prob < 1: hidden1 = rnn_cell.DropoutWrapper(hidden1, output_keep_prob=config.keep_prob) #hidden2 = rnn_cell.DropoutWrapper(hidden2, output_keep_prob=config.keep_prob) #hidden3 = rnn_cell.DropoutWrapper(hidden3, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([hidden1]) # initial state self._initial_state = cell.zero_state(batch_size, tf.float32) #one-hot encoding with tf.device("/cpu:0"): labels = tf.expand_dims(self._input_data, 1) indices = tf.expand_dims(tf.range(0, batch_size, 1), 1) concated = tf.concat(1, [indices, labels]) inputs = tf.sparse_to_dense(concated, tf.pack([batch_size, input_size]), 1.0, 0.0) inputs.set_shape([batch_size, input_size]) outputs = [] states = [] state = self._initial_state with tf.variable_scope("RNN"): #outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state) (cell_output, state) = cell(inputs, state) #outputs = cell_output self._final_state = self._initial_state = state # calculate the logits from last hidden layer to output layer softmax_w = tf.get_variable("softmax_w", [size, num_skills]) softmax_b = tf.get_variable("softmax_b", [num_skills]) logits = tf.matmul(cell_output, softmax_w) + softmax_b # from output nodes to pick up the right one we want logits = tf.reshape(logits, [-1]) logit_values = tf.gather(logits, self.target_id) #make prediction self._pred = self._pred_values = pred_values = tf.sigmoid(logit_values) loss = -tf.reduce_sum(target_correctness*tf.log(pred_values)+(1-target_correctness)*tf.log(1-pred_values)) # loss function #loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logit_values, target_correctness)) self._cost = cost = tf.reduce_mean(loss) #self._cost = cost = loss if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() # apply gradient descent to minimize loss function grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self.lr) # Momentum algorithm #optimizer = tf.train.MomentumOptimizer(self.lr, config.momentum) self._train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, is_training, config, batch_size=FLAGS.batch_size, do_train=True): self._batch_size = batch_size encoder_size = config.encoder_hidden_size vocab_size = config.vocab_size self.max_phrase_num = config.buckets[-1][0] self.max_sequence_length = config.buckets[-1][0] self.max_phrase_len = config.buckets[-1][1] self.buckets = config.buckets self._lr_decay = config.lr_decay self.max_grad_norm = config.max_grad_norm self.global_step = tf.Variable(0, trainable=False) self.init_scale = config.init_scale self.do_train = do_train self._input_refinement = tf.placeholder( tf.int32, [self._batch_size, self.max_phrase_len]) self._input_refinement = [] for i in xrange(self.max_phrase_len): self._input_refinement.append( tf.placeholder(tf.int32, shape=[self._batch_size], name="refinement_{0}".format(i))) self._target = [] self._input_recipe_segments = [] for i in xrange(self.max_sequence_length): self._target.append( tf.placeholder(tf.int32, shape=[self._batch_size], name="target_{0}".format(i))) self._input_recipe_segments.append([]) for j in xrange(self.max_phrase_len): self._input_recipe_segments[-1].append( tf.placeholder(tf.int32, shape=[self._batch_size], name="recipe_segment{0}/{1}".format(i, j))) #ENCODER (1st LSTM Layer) encoder_lstm_cell = rnn_cell.BasicLSTMCell(encoder_size, forget_bias=0.0) if is_training and config.keep_prob < 1: encoder_lstm_cell = rnn_cell.DropoutWrapper( encoder_lstm_cell, output_keep_prob=config.keep_prob) self.encoder = rnn_cell.MultiRNNCell([encoder_lstm_cell] * config.num_layers) self._initial_encoder_state = self.encoder.zero_state( self._batch_size, tf.float32 ) #tf.ones([self._batch_size, config.num_layers * encoder_lstm_cell.state_size]) self._embedding_size = config.num_layers * int( encoder_lstm_cell.state_size) with tf.device('/cpu:0'): self._embedding_matrix = tf.get_variable( "embedding_matrix", [vocab_size, self._embedding_size]) tf.histogram_summary('embedding_matrix', self._embedding_matrix) #RECIPE PROCESSOR (2nd LSTM Layer) recipe_processor_size = config.recipe_processor_hidden_size with tf.variable_scope("recipe_processor_cell"): recipe_processor_lstm_cell = rnn_cell.BasicLSTMCell( recipe_processor_size, forget_bias=0.0) if is_training and config.keep_prob < 1: recipe_processor_lstm_cell = rnn_cell.DropoutWrapper( recipe_processor_lstm_cell, output_keep_prob=config.keep_prob) self.recipe_processor = rnn_cell.MultiRNNCell( [recipe_processor_lstm_cell] * config.num_layers) self._initial_recipe_processor_state = self.recipe_processor.zero_state( self._batch_size, tf.float32 ) #tf.ones([self._batch_size, recipe_processor_size]) #FINAL REDUCTION TO DISTRIBUTION OVER INDICES self.index_predictor_W = weight_variable([recipe_processor_size, 2]) tf.histogram_summary('index_predictor_w', self.index_predictor_W) self.index_predictor_b = bias_variable([2]) tf.histogram_summary('index_predictor_b', self.index_predictor_b) self._lr = tf.Variable(float(config.learning_rate), trainable=False) tf.scalar_summary('lr', self._lr) self.learning_rate_decay_op = self._lr.assign(self._lr * self._lr_decay) #BUILD MODEL self.outputs, self.losses, self.costs = self.model_with_buckets() #CALC GRADIENTS if not self.do_train: self.calc_gradients() self.saver = tf.train.Saver(tf.all_variables())
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size vocab_size = config.vocab_size self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) self._targets = tf.placeholder(tf.int32, [batch_size, num_steps]) # Slightly better results can be obtained with forget gate biases # initialized to 1 but the hyperparameters of the model would need to be # different than reported in the paper. lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) if is_training and config.keep_prob < 1: lstm_cell = rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self._initial_state = cell.zero_state(batch_size, tf.float32) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, size]) inputs = tf.nn.embedding_lookup(embedding, self._input_data) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) # Simplified version of tensorflow.models.rnn.rnn.py's rnn(). # This builds an unrolled LSTM for tutorial purposes only. # In general, use the rnn() or state_saving_rnn() from rnn.py. # # The alternative version of the code below is: # # from tensorflow.models.rnn import rnn # inputs = [tf.squeeze(input_, [1]) # for input_ in tf.split(1, num_steps, inputs)] # outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state) outputs = [] states = [] state = self._initial_state with tf.variable_scope("RNN"): for time_step in range(num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(inputs[:, time_step, :], state) outputs.append(cell_output) states.append(state) output = tf.reshape(tf.concat(1, outputs), [-1, size]) logits = tf.nn.xw_plus_b( output, tf.get_variable("softmax_w", [size, vocab_size]), tf.get_variable("softmax_b", [vocab_size])) loss = seq2seq.sequence_loss_by_example( [logits], [tf.reshape(self._targets, [-1])], [tf.ones([batch_size * num_steps])], vocab_size) self._cost = cost = tf.reduce_sum(loss) / batch_size self._final_state = states[-1] if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self.lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, is_training, config): """constructs a graph""" self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size vocab_size = config.vocab_size self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps], name="input_data") self._targets = tf.placeholder(tf.int32, [batch_size, num_steps], name="targets") # here it is lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=1.0) if is_training and config.keep_prob < 1: lstm_cell = rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self._initial_state = cell.zero_state(batch_size, tf.float32) # do an embedding (always on cpu) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, size]) inputs = tf.split( 1, num_steps, tf.nn.embedding_lookup(embedding, self._input_data)) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] if is_training and config.keep_prob < 1: inputs = [ tf.nn.dropout(input_, config.keep_prob) for input_ in inputs ] from tensorflow.models.rnn import rnn outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state) # reshape outputs = tf.reshape(tf.concat(1, outputs), [-1, size]) logits = tf.nn.xw_plus_b( outputs, tf.get_variable("softmax_W", [size, vocab_size]), tf.get_variable("softmax_b", [vocab_size])) self._softmax_out = tf.nn.softmax( logits) # this is just used for sampling loss = seq2seq.sequence_loss_by_example( [logits], [tf.reshape(self._targets, [-1])], [tf.ones([batch_size * num_steps])], vocab_size) self._cost = cost = tf.div(tf.reduce_sum(loss), tf.constant(batch_size, dtype=tf.float32)) self._final_state = states[-1] if not is_training: return # don't need to optimisation ops self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) # actually the simple guy does good # with the grad clipping and the lr schedule and whatnot #ftrl? #optimizer = tf.train.GradientDescentOptimizer(self.lr) optimizer = tf.train.FtrlOptimizer(self.lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, embedding_mat, non_static, lstm_type, hidden_unit, sequence_length, max_pool_size, num_classes, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0): # Placeholders for input, output and dropout self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.batch_size = tf.placeholder(tf.int32) self.pad = tf.placeholder(tf.float32, [None, 1, embedding_size, 1], name="pad") self.real_len = tf.placeholder(tf.int32, [None], name="real_len") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) # Extend input to a 4D Tensor, because tf.nn.conv2d requires so. with tf.device('/cpu:0'), tf.name_scope("embedding"): if not non_static: W = tf.constant(embedding_mat, name="W") else: W = tf.Variable(embedding_mat, name="W") self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x) emb = tf.expand_dims(self.embedded_chars, -1) # CNN pooled_concat = [] reduced = np.int32(np.ceil((sequence_length) * 1.0 / max_pool_size)) for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Zero paddings so that the convolution output have dimension batch x sequence_length x emb_size x channel num_prio = (filter_size - 1) // 2 num_post = (filter_size - 1) - num_prio pad_prio = tf.concat(1, [self.pad] * num_prio) pad_post = tf.concat(1, [self.pad] * num_post) emb_pad = tf.concat(1, [pad_prio, emb, pad_post]) # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv2d(emb_pad, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool(h, ksize=[1, max_pool_size, 1, 1], strides=[1, max_pool_size, 1, 1], padding='SAME', name="pool") pooled = tf.reshape(pooled, [-1, reduced, num_filters]) pooled_concat.append(pooled) pooled_concat = tf.concat(2, pooled_concat) pooled_concat = tf.nn.dropout(pooled_concat, self.dropout_keep_prob) # LSTM if lstm_type == "gru": lstm_cell = rnn_cell.GRUCell(num_units=hidden_unit, input_size=embedding_size) else: if lstm_type == "basic": lstm_cell = rnn_cell.BasicLSTMCell(num_units=hidden_unit, input_size=embedding_size) else: lstm_cell = rnn_cell.LSTMCell(num_units=hidden_unit, input_size=embedding_size, use_peepholes=True) lstm_cell = rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=self.dropout_keep_prob) self._initial_state = lstm_cell.zero_state(self.batch_size, tf.float32) inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, reduced, pooled_concat) ] outputs, state = rnn.rnn(lstm_cell, inputs, initial_state=self._initial_state, sequence_length=self.real_len) # Collect the appropriate last words into variable output (dimension = batch x embedding_size) output = outputs[0] with tf.variable_scope("Output"): tf.get_variable_scope().reuse_variables() one = tf.ones([1, hidden_unit], tf.float32) for i in range(1, len(outputs)): ind = self.real_len < (i + 1) ind = tf.to_float(ind) ind = tf.expand_dims(ind, -1) mat = tf.matmul(ind, one) output = tf.add(tf.mul(output, mat), tf.mul(outputs[i], 1.0 - mat)) # Final (unnormalized) scores and predictions with tf.name_scope("output"): self.W = tf.Variable(tf.truncated_normal( [hidden_unit, num_classes], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(output, self.W, b, name="scores") self.predictions = tf.argmax(self.scores, 1, name="predictions") # CalculateMean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits( self.scores, self.input_y) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def __init__(self, CellType, is_training, config): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size vocab_size = config.vocab_size self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps], name="input_data") self.targets = tf.placeholder(tf.int32, [batch_size, num_steps], name="targets") lstm_cell = CellType(size) if is_training and config.keep_prob < 1: lstm_cell = rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self.initial_state = cell.zero_state(batch_size, tf.float32) # initializer used for reusable variable initializer (see `get_variable`) initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, size], initializer=initializer) inputs = tf.nn.embedding_lookup(embedding, self.input_data) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) outputs = [] states = [] state = self.initial_state with tf.variable_scope("RNN", initializer=initializer): for time_step in range(num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() inputs_slice = inputs[:, time_step, :] (cell_output, state) = cell(inputs_slice, state) outputs.append(cell_output) states.append(state) self.final_state = states[-1] output = tf.reshape(tf.concat(1, outputs), [-1, size]) w = tf.get_variable("softmax_w", [size, vocab_size], initializer=initializer) b = tf.get_variable("softmax_b", [vocab_size], initializer=initializer) logits = tf.nn.xw_plus_b(output, w, b) # compute logits for loss targets = tf.reshape(self.targets, [-1]) # reshape our target outputs weights = tf.ones([batch_size * num_steps ]) # used to scale the loss average # computes loss and performs softmax on our fully-connected output layer loss = sequence_loss_by_example([logits], [targets], [weights], vocab_size) self.cost = cost = tf.div(tf.reduce_sum(loss), batch_size, name="cost") if is_training: # setup learning rate variable to decay self.lr = tf.Variable(1.0, trainable=False) # define training operation and clip the gradients tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars), name="train") else: # if this model isn't for training (i.e. testing/validation) then we don't do anything here self.train_op = tf.no_op()
#Proclaim the epochs epochs = np.floor(batch_size*max_iterations / N) print('Train with approximately %d epochs' %(epochs)) # Nodes for the input variables x = tf.placeholder("float", shape=[batch_size, H,W,C], name = 'Input_data') y_ = tf.placeholder(tf.int64, shape=[batch_size], name = 'Ground_truth') keep_prob = tf.placeholder("float") with tf.name_scope("LSTM") as scope: cell = rnn_cell.LSTMCell(hidden_size) #cell = rnn_cell.MultiRNNCell([cell] * num_layers) cell = rnn_cell.DropoutWrapper(cell,output_keep_prob=keep_prob) #XW_plus_b W_a = tf.Variable(tf.random_normal([hidden_size,hidden_size], stddev=0.01)) b_a = tf.Variable(tf.constant(0.5, shape=[hidden_size])) #Initial state initial_state = cell.zero_state(batch_size, tf.float32) #initial input vector is a sum over the activation map x_in = tf.reduce_sum(x,[1,2]) time = sl*tf.ones([batch_size,1]) x_in = tf.concat(1,[x_in,time]) outputs = [] masks = [] state = initial_state for time_step in range(sl): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(x_in, state)
def __init__(self, dim, args, infer=False): self.dim = dim self.args = args if infer: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size) cell = rnn_cell.MultiRNNCell([cell] * args.num_layers) if (infer == False and args.keep_prob < 1): # training mode cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=args.keep_prob) self.cell = cell self.input_data = tf.placeholder( dtype=tf.float32, shape=[None, args.seq_length, self.dim]) self.target_data = tf.placeholder( dtype=tf.float32, shape=[None, args.seq_length, self.dim]) self.initial_state = cell.zero_state(batch_size=args.batch_size, dtype=tf.float32) self.num_mixture = args.num_mixture NOUT = self.num_mixture * (1 + 2 * self.dim) # prob + mu + sig # [prob 1-20, dim1 mu, dim1 sig, dim2,... ] with tf.variable_scope('rnnlm'): output_w = tf.get_variable("output_w", [args.rnn_size, NOUT]) output_b = tf.get_variable("output_b", [NOUT]) inputs = tf.split(1, args.seq_length, self.input_data) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] outputs, states = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=None, scope='rnnlm') output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size]) output = tf.nn.xw_plus_b(output, output_w, output_b) self.final_state = states # reshape target data so that it is compatible with prediction shape flat_target_data = tf.reshape(self.target_data, [-1, self.dim]) #[x1_data, x2_data, eos_data] = tf.split(1, 3, flat_target_data) x_data = flat_target_data def tf_normal(x, mu, sig): return tf.exp(-tf.square(x - mu) / (2 * tf.square(sig))) / (sig * tf.sqrt(2 * np.pi)) #def tf_multi_normal(x, mu, sig, ang): # use n (n+1) / 2 to parametrize covariance matrix # 1. http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.31.494&rep=rep1&type=pdf # 2. https://en.wikipedia.org/wiki/Triangular_matrix # 3. https://makarandtapaswi.wordpress.com/2011/07/08/cholesky-decomposition-for-matrix-inversion/ # A = LL' by 1 # det(L) = prod of diagonals by 2 # det(A) = det(L)^2 by 3 # A-1 = (L-1)'(L-1) by 3 # We're parametrizing using L^-1 # Sigma^-1 = (L^-1)'(L^-1) # |Sigma| = 1 / det(L^-1)^2 = 1 / (diagonal product of L^-1)^2 #return tf.exp(-tf.square(x - mu) / (2 * tf.square(sig + 0.01))) / ((sig + 0.01) * tf.sqrt(2 * np.pi)) # z_mu, z_sig, x_data [batch_size x mixture], z_pi [batch_size x mixture] def get_lossfunc(z_pi, z_mu, z_sig, x_data): result0 = tf_normal(x_data, z_mu, z_sig) result1 = tf.reduce_sum(result0 * z_pi, 1, keep_dims=True) result2 = -tf.log(tf.maximum(result1, 1e-20)) return tf.reduce_sum(result2) self.pi = output[:, 0:self.num_mixture] max_pi = tf.reduce_max(self.pi, 1, keep_dims=True) self.pi = tf.exp(tf.sub(self.pi, max_pi)) normalize_pi = tf.inv(tf.reduce_sum(self.pi, 1, keep_dims=True)) self.pi = normalize_pi * self.pi output_each_dim = tf.split(1, self.dim, output[:, self.num_mixture:]) self.mu = [] self.sig = [] self.cost = 0 for i in range(self.dim): [o_mu, o_sig] = tf.split(1, 2, output_each_dim[i]) o_sig = tf.exp(o_sig) + args.sig_epsilon self.mu.append(o_mu) self.sig.append(o_sig) lossfunc = get_lossfunc(self.pi, o_mu, o_sig, x_data[:, i:i + 1]) self.cost += lossfunc / (args.batch_size * args.seq_length * self.dim) self.mu = tf.concat(1, self.mu) self.sig = tf.concat(1, self.sig) self.loss_summary = tf.scalar_summary("loss", self.cost) self.summary = tf.merge_all_summaries() self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))