def RNN(x, weights, biases, dropout): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) x = tf.unstack(x, timesteps, 1) #lstm_fw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) # Backward direction cell #lstm_bw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) ####################################New Code cell =tf.nn.rnn_cell.MultiRNNCell([IndRNNCell(num_hidden, recurrent_max_abs=recurrent_max), IndRNNCell(num_hidden, recurrent_max_abs=recurrent_max), IndRNNCell(num_hidden, recurrent_max_abs=recurrent_max)]) #lstm_fw_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_rnn_cell(num_hidden, dropout = 1-dropout) for _ in range(num_layers)], state_is_tuple = True) #lstm_bw_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_rnn_cell(num_hidden, dropout = 1-dropout) for _ in range(num_layers)], state_is_tuple = True) # Get lstm cell output #try: outputs, state = tf.nn.static_rnn(cell, x, dtype=tf.float32) #outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, # dtype=tf.float32) ###### # lstm_cell2 = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) # dropout2 = tf.nn.rnn_cell.DropoutWrapper(lstm_cell2, output_keep_prob=0.35) # dropout = tf.nn.rnn_cell.DropoutWrapper # Get lstm cell output #outputs, states = rnn.static_rnn(dropoutOut, x, dtype=tf.float32) #sum = tf.reduce_mean(outputs,axis=0)#.reduce_sum(outputs, axis=1) # Linear activation, using rnn inner loop last output #act = tf.matmul(sum, weights['out']) + biases['out'] act = tf.matmul(outputs[-1], weights['out']) + biases['out'] tf.summary.histogram("activations", act) return act
def indRNN_model(feed_in, hidden_units = 128): from ind_rnn_cell import IndRNNCell with tf.variable_scope('indRNN-layer'): batch_size, seq_length, num_features = feed_in.get_shape().as_list() TIME_STEPS = seq_length input_init = tf.random_uniform_initializer(-0.001, 0.001) LAST_LAYER_LOWER_BOUND = pow(0.5, 1 / TIME_STEPS) # Init only the last layer's recurrent weights around 1 recurrent_init_lower_0 = 0 recurrent_init_lower_1 = LAST_LAYER_LOWER_BOUND # Regulate each neuron's recurrent weight as recommended in the paper RECURRENT_MAX = pow(2, 1 / TIME_STEPS) recurrent_init_0 = tf.random_uniform_initializer(recurrent_init_lower_0, RECURRENT_MAX) recurrent_init_1 = tf.random_uniform_initializer(recurrent_init_lower_1, RECURRENT_MAX) indRnnCells = tf.contrib.rnn.MultiRNNCell([IndRNNCell(hidden_units, recurrent_max_abs=RECURRENT_MAX, input_kernel_initializer=input_init, recurrent_kernel_initializer=recurrent_init_0 ), IndRNNCell(hidden_units, recurrent_max_abs=RECURRENT_MAX, input_kernel_initializer=input_init, recurrent_kernel_initializer=recurrent_init_1 )]) outputs, _ = tf.nn.dynamic_rnn(indRnnCells, feed_in, dtype=tf.float32) outputs = tf.reshape(outputs, (batch_size * seq_length, hidden_units)) return outputs
def __init__(self, features, recurrent_max_abs): super(AttentionCell, self).__init__() self._in_channels = features.get_shape()[2].value # DICT_SIZE self._features = features self._bias = self.add_variable("bias", shape=[1], initializer=tf.zeros_initializer()) self._filt_shape = (-1, 5, self._in_channels, 1) self._indrnn = IndRNNCell(self._filt_shape[1] * self._in_channels, recurrent_max_abs=recurrent_max_abs)
def simple_indRNN_model(feed_in, hidden_units = 128): from ind_rnn_cell import IndRNNCell with tf.variable_scope('indRNN-layer'): batch_size, seq_length, num_features = feed_in.get_shape().as_list() TIME_STEPS = seq_length # Regulate each neuron's recurrent weight as recommended in the paper RECURRENT_MAX = pow(2, 1 / TIME_STEPS) indRnnCells = tf.contrib.rnn.MultiRNNCell([IndRNNCell(hidden_units, recurrent_max_abs=RECURRENT_MAX), IndRNNCell(hidden_units, recurrent_max_abs=RECURRENT_MAX)]) outputs, _ = tf.nn.dynamic_rnn(indRnnCells, feed_in, dtype=tf.float32) outputs = tf.reshape(outputs, (batch_size * seq_length, hidden_units)) return outputs
def testIndRNNCellBounds(self): """Tests cell with recurrent weights exceeding the bounds.""" with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(1.)): x = array_ops.zeros([1, 4]) m = array_ops.zeros([1, 4]) # Create the cell with input weights = 1 and constant recurrent weights recurrent_init = init_ops.constant_initializer( [-5., -2., 0.1, 5.]) cell = IndRNNCell(4, recurrent_min_abs=1., recurrent_max_abs=3., recurrent_initializer=recurrent_init, activation=array_ops.identity) output, _ = cell(x, m) sess.run([variables.global_variables_initializer()]) res = sess.run( [output], { x.name: np.array([[1., 0., 0., 0.]]), m.name: np.array([[2., 2., 2., 2.]]) }) # Recurrent weights should be clipped to -3, -2, 1, 3 # (Pre)activations (1*1 + 2*rec_weight) should be -5, -3, 3, 7 self.assertAllEqual(res[0], [[-5., -3., 3., 7.]])
def testIndRNNCell(self): """Tests basic cell functionality""" with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(1.)): x = array_ops.zeros([1, 4]) m = array_ops.zeros([1, 4]) # Create the cell with input weights = 1 and constant recurrent weights recurrent_init = init_ops.constant_initializer( [-3., -2., 1., 3.]) cell = IndRNNCell(4, recurrent_initializer=recurrent_init, activation=array_ops.identity) output, _ = cell(x, m) sess.run([variables.global_variables_initializer()]) res = sess.run( [output], { x.name: np.array([[1., 0., 0., 0.]]), m.name: np.array([[2., 2., 2., 2.]]) }) # (Pre)activations (1*1 + 2*rec_weight) should be -5, -3, 3, 7 self.assertAllEqual(res[0], [[-5., -3., 3., 7.]])
def main(): # Placeholders for training data inputs_ph = tf.placeholder(tf.float32, shape=(BATCH_SIZE, TIME_STEPS, 2)) targets_ph = tf.placeholder(tf.float32, shape=BATCH_SIZE) # Build the graph first_input_init = tf.random_uniform_initializer(-RECURRENT_MAX, RECURRENT_MAX) first_layer = IndRNNCell(NUM_UNITS, recurrent_max_abs=RECURRENT_MAX, recurrent_kernel_initializer=first_input_init) second_layer = IndRNNCell(NUM_UNITS, recurrent_max_abs=RECURRENT_MAX) cell = tf.nn.rnn_cell.MultiRNNCell([first_layer, second_layer]) # cell = tf.nn.rnn_cell.BasicLSTMCell(NUM_UNITS) uncomment this for LSTM runs output, state = tf.nn.dynamic_rnn(cell, inputs_ph, dtype=tf.float32) last = output[:, -1, :] weight = tf.get_variable("softmax_weight", shape=[NUM_UNITS, 1]) bias = tf.get_variable("softmax_bias", shape=[1], initializer=tf.constant_initializer(0.1)) prediction = tf.squeeze(tf.matmul(last, weight) + bias) loss_op = tf.losses.mean_squared_error(tf.squeeze(targets_ph), prediction) global_step = tf.get_variable("global_step", shape=[], trainable=False, initializer=tf.zeros_initializer) learning_rate = tf.train.exponential_decay(LEARNING_RATE_INIT, global_step, LEARNING_RATE_DECAY_STEPS, 0.1, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) optimize = optimizer.minimize(loss_op, global_step=global_step) # Train the model with tf.Session() as sess: sess.run(tf.global_variables_initializer()) step = 0 while True: losses = [] for _ in range(100): # Generate new input data inputs, targets = get_batch() loss, _ = sess.run([loss_op, optimize], {inputs_ph: inputs, targets_ph: targets}) losses.append(loss) step += 1 print("Step [x100] {} MSE {}".format(int(step / 100), np.mean(losses)))
class AttentionCell(tf.nn.rnn_cell.RNNCell): def __init__(self, features, recurrent_max_abs): super(AttentionCell, self).__init__() self._in_channels = features.get_shape()[2].value # DICT_SIZE self._features = features self._bias = self.add_variable("bias", shape=[1], initializer=tf.zeros_initializer()) self._filt_shape = (-1, 5, self._in_channels, 1) self._indrnn = IndRNNCell(self._filt_shape[1] * self._in_channels, recurrent_max_abs=recurrent_max_abs) @property def state_size(self): return self._indrnn.state_size @property def output_size(self): return self._in_channels + 2 def build(self, inputs_shape): self._indrnn.build(inputs_shape) def __call__(self, inputs, state, scope=None): filt, new_state = self._indrnn(inputs, state, scope) filt = tf.reshape(filt, self._filt_shape) # filt has shape (B, width, in_channels, out_channels) conv = batchwise_conv_2(self._features, filt) # conv has shape (B, width, out_channels) conv = tf.nn.relu(conv + self._bias) # (B, width, 1) # TODO: try other methods for squashing or normalizing, etc attention = tf.nn.softmax(conv, axis=1) # (B, width, 1) output = tf.multiply(attention, conv) # (B, width, dict_size) output = tf.reduce_mean(output, axis=1) # (B, dict_size) output = tf.concat([output, inputs], axis=1) # (B, dict_size + 2) return output, new_state
def indrnn_model(first_input_init, inputs_ph): """indrnn模型:搭建两层indrnn模型,每层神经元的数量为NUM_UNITS,两层总参数为:TIME_STEPS*NUM_UNITS+NUM_UNITS*NUM_UNITS+2*NUM_UNITS """ first_layer = IndRNNCell(NUM_UNITS, recurrent_max_abs=RECURRENT_MAX, recurrent_kernel_initializer=first_input_init) second_layer = IndRNNCell(NUM_UNITS, recurrent_max_abs=RECURRENT_MAX) cell = tf.nn.rnn_cell.MultiRNNCell([first_layer, second_layer]) # cell = tf.nn.rnn_cell.BasicLSTMCell(NUM_UNITS) uncomment this for LSTM runs output, state = tf.nn.dynamic_rnn(cell, inputs_ph, dtype=tf.float32) last = output[:, -1, :] weight = tf.get_variable("softmax_weight", shape=[NUM_UNITS, 1]) bias = tf.get_variable("softmax_bias", shape=[1], initializer=tf.constant_initializer(0.1)) prediction = tf.squeeze(tf.matmul(last, weight) + bias) return prediction
def _get_lstm_cell(self, config, is_training): #if config.rnn_mode == BASIC: # return tf.contrib.rnn.BasicLSTMCell( # config.hidden_size, forget_bias=0.0, state_is_tuple=True, # reuse=not is_training) #if config.rnn_mode == BLOCK: # return tf.contrib.rnn.LSTMBlockCell( # config.hidden_size, forget_bias=0.0) #if config.rnn_mode == INDRNN: return IndRNNCell(config.hidden_size, recurrent_max_abs=RECURRENT_MAX) raise ValueError("rnn_mode %s not supported" % config.rnn_mode)
def main(): # Placeholders for training data inputs_ph = tf.placeholder(tf.float32, shape=(BATCH_SIZE, TIME_STEPS, 2)) targets_ph = tf.placeholder(tf.float32, shape=BATCH_SIZE) # Build the graph cell = MultiRNNCell([ IndRNNCell(NUM_UNITS, RECURRENT_MAX), IndRNNCell(NUM_UNITS, RECURRENT_MAX) ]) output, state = tf.nn.dynamic_rnn(cell, inputs_ph, dtype=tf.float32) last = output[:, -1, :] weight = tf.Variable(tf.truncated_normal([NUM_UNITS, 1], stddev=0.01)) bias = tf.Variable(tf.constant(0.1, shape=[1])) prediction = tf.squeeze(tf.matmul(last, weight) + bias) loss_op = tf.losses.mean_squared_error(tf.squeeze(targets_ph), prediction) optimize = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss_op) # Train the model with tf.Session() as sess: sess.run(tf.global_variables_initializer()) step = 0 while True: losses = [] for _ in range(100): # Generate new input data inputs, targets = get_batch() loss, _ = sess.run([loss_op, optimize], { inputs_ph: inputs, targets_ph: targets }) losses.append(loss) step += 1 print("Step [x100] {} MSE {}".format(int(step / 100), np.mean(losses)))
class IndCatCell(tf.nn.rnn_cell.RNNCell): def __init__(self, num_units, recurrent_max_abs): super(IndCatCell, self).__init__() self._indrnn = IndRNNCell( num_units, recurrent_max_abs=recurrent_max_abs) @property def state_size(self): return self._indrnn.state_size @property def output_size(self): return self._indrnn.output_size def build(self, inputs_shape): self._indrnn.build(inputs_shape) def __call__(self, inputs, state, scope=None): out, state = self._indrnn(inputs, state, scope) pad_size = self._indrnn.output_size - tf.shape(inputs)[1] out = tf.pad(inputs, [[0, 0], [0, pad_size]]) # residual connection return out, state
def build_rnn(inputs, phase): # Build the RNN with sequence-wise batch normalization. We cannot use # MultiRNNCell here, because we have to add batch normalization layers after # each RNN layer. Thus, we need to unroll each RNN layer separately. layer_input = inputs layer_output = None input_init = tf.random_uniform_initializer(-0.001, 0.001) for layer in range(1, NUM_LAYERS + 1): # Init only the last layer's recurrent weights around 1 recurrent_init_lower = 0 if layer < NUM_LAYERS else LAST_LAYER_LOWER_BOUND recurrent_init = tf.random_uniform_initializer(recurrent_init_lower, RECURRENT_MAX) # Build the layer cell = IndRNNCell(NUM_UNITS, recurrent_max_abs=RECURRENT_MAX, input_kernel_initializer=input_init, recurrent_kernel_initializer=recurrent_init) # Unroll the layer layer_output, _ = tf.nn.dynamic_rnn(cell, layer_input, dtype=tf.float32, scope="rnn%d" % layer) is_training = tf.logical_or(tf.equal(phase, PHASE_TRAIN), tf.equal(phase, PHASE_BN_STATS)) layer_output = tf.layers.batch_normalization(layer_output, training=is_training, momentum=0) # Tie the BN population statistics updates to the layer_output op only, when # we are in the PHASE_BN_STATS phase def update_population_stats(): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): return tf.identity(layer_output) layer_output = tf.cond(tf.equal(phase, PHASE_BN_STATS), true_fn=update_population_stats, false_fn=lambda: layer_output) layer_input = layer_output # Return the output of the last layer in the last time step # layer_output has shape [?, TIME_STEPS, NUM_UNITS] return layer_output[:, -1, :]
def testIndRNNCell(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(1.)): x = array_ops.zeros([1, 4]) m = array_ops.zeros([1, 4]) recurrent_init = init_ops.constant_initializer( [-5., -2., 0.1, 5.]) cell = IndRNNCell(4, recurrent_min_abs=1., recurrent_max_abs=3., recurrent_initializer=recurrent_init) output, _ = cell(x, m) sess.run([variables_lib.global_variables_initializer()]) res = sess.run( [output], { x.name: np.array([[1., 1., 1., 1.]]), m.name: np.array([[2., 2., 2., 2.]]) }) # Recurrent Weights u should be -3, -2, 1, 3 # Pre-activations (4 + 2*u) should be -2, 0, 6, 10 self.assertAllEqual(res[0], [[0., 0., 6., 10.]])
def __init__(self, is_training, batch_size): """ :param is_training: is or not training, True/False :param batch_size: the size of one batch :param num_steps: the length of one lstm """ # 定义网络参数 self.learning_rate = tf.Variable(float(LEARNING_RATE), trainable=False, dtype=tf.float32) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * LEARNING_RATE_DECAY_FACTOR) self.global_step = 0 self.global_epoch = 0 self.batch_size = batch_size # 定义输入层,其维度是batch_size * num_steps self.pre_input = tf.placeholder(tf.int32, [batch_size, None]) self.pre_input_seq_length = tf.placeholder(tf.int32, [ batch_size, ]) self.fol_input = tf.placeholder(tf.int32, [batch_size, None]) self.fol_input_seq_length = tf.placeholder(tf.int32, [ batch_size, ]) self.candidate_words_input = tf.placeholder(tf.int32, [batch_size, None]) self.is_candidate = tf.placeholder(tf.float32, [batch_size, None]) self.one_hot_labels = tf.placeholder(tf.float32, [batch_size, None]) # 定义预期输出,它的维度和上面维度相同 self.targets = tf.placeholder(tf.int32, [ batch_size, ]) embedding = tf.get_variable("embedding", [VOCAB_SIZE, HIDDEN_SIZE]) # embedding矩阵 self.embedding = embedding input_init = tf.random_uniform_initializer(-0.001, 0.001) recurrent_init = tf.random_uniform_initializer(0, RECURRENT_MAX) # pre_context_model with tf.variable_scope('Pre') as scope: pre_cell = IndRNNCell(num_units=PRE_CONTEXT_HIDDEN_SIZE, recurrent_max_abs=RECURRENT_MAX, input_kernel_initializer=input_init, recurrent_kernel_initializer=recurrent_init) if is_training: pre_cell = tf.contrib.rnn.DropoutWrapper( pre_cell, output_keep_prob=KEEP_PROB) pre_lstm_cell = tf.contrib.rnn.MultiRNNCell([pre_cell] * PRE_CONTEXT_NUM_LAYERS, state_is_tuple=True) pre_input = tf.nn.embedding_lookup( embedding, self.pre_input) # 将原本单词ID转为单词向量。 if is_training: pre_input = tf.nn.dropout(pre_input, KEEP_PROB) pre_outputs, pre_states = tf.nn.dynamic_rnn( pre_lstm_cell, pre_input, sequence_length=self.pre_input_seq_length, dtype=tf.float32) pre_outputs = pre_states self.pre_final_state = pre_states # 上文LSTM的最终状态 # fol_context_model with tf.variable_scope('Fol') as scope: fol_cell = IndRNNCell(num_units=PRE_CONTEXT_HIDDEN_SIZE, recurrent_max_abs=RECURRENT_MAX, input_kernel_initializer=input_init, recurrent_kernel_initializer=recurrent_init) if is_training: fol_cell = tf.contrib.rnn.DropoutWrapper( fol_cell, output_keep_prob=KEEP_PROB) fol_lstm_cell = tf.contrib.rnn.MultiRNNCell([fol_cell] * FOL_CONTEXT_NUM_LAYERS, state_is_tuple=True) fol_input = tf.nn.embedding_lookup( embedding, self.fol_input) # 将原本单词ID转为单词向量。 if is_training: fol_input = tf.nn.dropout(fol_input, KEEP_PROB) fol_outputs, fol_states = tf.nn.dynamic_rnn( fol_lstm_cell, fol_input, sequence_length=self.fol_input_seq_length, dtype=tf.float32) fol_outputs = fol_states self.fol_final_state = fol_states # 下文lstm的最终状态 # 简单拼接 concat_output = tf.concat([pre_outputs[-1], fol_outputs[-1]], axis=-1) # 双线性attention with tf.variable_scope('bilinear'): # Bilinear Layer (Attention Step) candidate_words_input_vector = tf.nn.embedding_lookup( embedding, self.candidate_words_input) bilinear_weight = tf.get_variable("bilinear_weight", [2 * HIDDEN_SIZE, HIDDEN_SIZE]) '''计算候选词与上下文的匹配度''' M = candidate_words_input_vector * tf.expand_dims( tf.matmul(concat_output, bilinear_weight), axis=1) # M = [batch_size,candi_num,hidden_size] # attention概率(匹配度) alpha = tf.nn.softmax(tf.reduce_sum( M, axis=2)) # [batch_size,candi_num] # 非候选词概率置0 tmp_prob = alpha * self.is_candidate # 重算概率 self.logits = tmp_prob / tf.expand_dims( tf.reduce_sum(tmp_prob, axis=1), axis=1) self.logits = tf.clip_by_value(self.logits, 1e-7, 1.0 - 1e-7) # 求交叉熵 loss = -tf.reduce_sum(self.one_hot_labels * tf.log(self.logits), reduction_indices=1) # 记录cost with tf.variable_scope('cost'): self.cost = tf.reduce_mean(loss) self.ave_cost = tf.Variable(0.0, trainable=False, dtype=tf.float32) self.ave_cost_op = self.ave_cost.assign( tf.divide( tf.add(tf.multiply(self.ave_cost, self.global_step), self.cost), self.global_step + 1)) # global_step从0开始 tf.summary.scalar('cost', self.cost) tf.summary.scalar('ave_cost', self.ave_cost) # 只在训练模型时定义反向传播操作。 # 记录accuracy with tf.variable_scope('accuracy'): correct_prediction = tf.equal( self.targets, tf.cast(tf.argmax(self.logits, -1), tf.int32)) self.accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) self.ave_accuracy = tf.Variable(0.0, trainable=False, dtype=tf.float32) self.ave_accuracy_op = self.ave_accuracy.assign( tf.divide( tf.add(tf.multiply(self.ave_accuracy, self.global_step), self.accuracy), self.global_step + 1)) # global_step从0开始 tf.summary.scalar('accuracy', self.accuracy) tf.summary.scalar('ave_accuracy', self.ave_accuracy) # 只在训练模型时定义反向传播操作。 # 只在训练模型时定义反向传播操作。 if not is_training: return self.train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.cost) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate) # self.train_op = optimizer.minimize(self.cost) self.merged_summary_op = tf.summary.merge_all() # 收集节点
def main(): inputs_ph = tf.placeholder(tf.int64, shape=(None, None)) labels_ph = tf.placeholder(tf.int64, shape=(None, None)) embedding = tf.get_variable("embedding", [vocab_size, NUM_UNITS], dtype=tf.float32) inputs = tf.nn.embedding_lookup(embedding, inputs_ph) in_training = True #if in_training: # inputs = tf.nn.dropout(inputs, 0.75) cell = MultiRNNCell([ IndRNNCell(NUM_UNITS, recurrent_max_abs=RECURRENT_MAX, batch_norm=False, in_training=in_training) for _ in range(NUM_LAYERS) ]) # cell = tf.nn.rnn_cell.BasicLSTMCell(NUM_UNITS) #uncomment this for LSTM runs output, state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32) softmax_w = tf.get_variable("softmax_w", [NUM_UNITS, vocab_size], dtype=tf.float32) softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=tf.float32) output = tf.reshape(output, [-1, NUM_UNITS]) logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) print(logits) # Reshape logits to be a 3-D tensor for sequence loss logits = tf.reshape(logits, [BATCH_SIZE, -1, vocab_size]) # Use the contrib sequence loss and average over the batches loss = tf.contrib.seq2seq.sequence_loss(logits, labels_ph, tf.ones([BATCH_SIZE, 50], dtype=tf.float32), average_across_timesteps=False, average_across_batch=True) # Update the cost _cost = tf.reduce_sum(loss) _final_state = state ######### if not in_training: return global_step = tf.get_variable("global_step", shape=[], trainable=False, initializer=tf.zeros_initializer) learning_rate = tf.train.exponential_decay(LEARNING_RATE_INIT, global_step, LEARNING_RATE_DECAY_STEPS, 0.1, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) optimize = optimizer.minimize(_cost, global_step=global_step) # Train the model fout = open('ptb_ind.txt', 'w') with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(NUM_EPOCHS): train_per = [] for iteration in range(ITERATIONS_PER_EPOCH): x, y = ptb.train.next_batch(BATCH_SIZE) cost, _ = sess.run([_cost, optimize], feed_dict={ inputs_ph: x, labels_ph: y }) train_per.append(cost) if iteration % ITERATIONS_PER_EPOCH == 20: print("%d/%d %f" % (iteration, ITERATIONS_PER_EPOCH, np.mean(train_per[-20:]))) sys.stdout.flush() valid_per = [] for _ in range(VAL_ITERS): x, y = ptb.valid.next_batch() cost = sess.run(_cost, feed_dict={inputs_ph: x, labels_ph: y}) valid_per.append(cost) #test_per = [] #for _ in range(VAL_ITERS): # x, y = ptb.test.next_batch() # cost = sess.run(_cost, feed_dict={inputs_ph: x, labels_ph: y}) # test_per.append(cost) print("epoch %d, train=%f, valid=%f, test=%f" % (epoch, np.mean(train_per), np.mean(valid_per), np.mean(test_per))) fout.write("%d %.4f %.4f %.4f\n" % (epoch, np.mean(train_per), np.mean(valid_per), np.mean(test_per))) sys.stdout.flush() fout.flush()
def __init__(self, num_units, recurrent_max_abs): super(IndCatCell, self).__init__() self._indrnn = IndRNNCell( num_units, recurrent_max_abs=recurrent_max_abs)
def main(): # Placeholders for training data print("here") sys.stdout.flush() inputs_ph = tf.placeholder(tf.float32, shape=(None, TIME_STEPS)) targets_ph = tf.placeholder(tf.int64, shape=(None)) inputs_ph1 = tf.expand_dims(inputs_ph, -1) in_training = tf.placeholder(tf.bool, shape=[]) input_init = tf.random_uniform_initializer(-0.001, 0.001) cells = [] for layer in range(1, NUM_LAYERS + 1): recurrent_init_lower = 0 if layer < NUM_LAYERS else LAST_LAYER_LOWER_BOUND recurrent_init = tf.random_uniform_initializer(recurrent_init_lower, RECURRENT_MAX) single_cell = IndRNNCell(NUM_UNITS, recurrent_max_abs=RECURRENT_MAX, batch_norm=False, in_training=in_training, layer_idx=layer - 1) cells.append(single_cell) #input_initializer=input_init, #recurrent_initializer=recurrent_init)) print("here1") sys.stdout.flush() # Build the graph #cell = tf.nn.rnn_cell.MultiRNNCell([ cell = MultiRNNCell(cells, BATCH_SIZE) # cell = tf.nn.rnn_cell.BasicLSTMCell(NUM_UNITS) #uncomment this for LSTM runs output, state = tf.nn.dynamic_rnn(cell, inputs_ph1, dtype=tf.float32) #print ( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='rnn/multi_rnn_cell/cell_0')) #print ( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='cell_1')) #print (tf.global_variables()) #exit() #print ( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='rnn/multi_rnn_cell/cell_1' )) #exit() #is_training = True #output = tf.layers.batch_normalization(output, training=is_training, momentum=0) last = output[:, -1, :] weight = tf.get_variable("softmax_weight", shape=[NUM_UNITS, OUTPUT_SIZE]) bias = tf.get_variable("softmax_bias", shape=[1], initializer=tf.constant_initializer(0.1)) prediction = tf.squeeze(tf.matmul(last, weight) + bias) loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=targets_ph) accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(prediction, 1), targets_ph), tf.float32)) print("here2") sys.stdout.flush() global_step = tf.get_variable("global_step", shape=[], trainable=False, initializer=tf.zeros_initializer) learning_rate = tf.train.exponential_decay(LEARNING_RATE_INIT, global_step, LEARNING_RATE_DECAY_STEPS, 0.1, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) optimize = optimizer.minimize(loss_op, global_step=global_step) # Train the model np.random.seed(1234) perm = np.random.permutation(TIME_STEPS) print("here3") sys.stdout.flush() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) #fout = open('ind_semi_W_ckipnorm.txt', 'w') #fout = open('ind_input_init.txt', 'w') #fout = open('ind_bn.txt', 'w') #fout = open('ind_bn_2init.txt', 'w') #fout = open('ind_bn_after.txt', 'w') #fout = open('ind_bn3.txt', 'w') #fout = open('ind_semi_W_clipl2norm_bn.txt', 'w') fout = open('ind_semi_W_clipcrossnorm_bn.txt', 'w') with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) as sess: sess.run(tf.global_variables_initializer()) for epoch in range(NUM_EPOCHS): print("epoch:", epoch) sys.stdout.flush() train_acc = [] for iteration in range(ITERATIONS_PER_EPOCH): x, y = mnist.train.next_batch(BATCH_SIZE) loss, _, acc = sess.run([loss_op, optimize, accuracy], { inputs_ph: x[:, perm], targets_ph: y, in_training: False }) train_acc.append(acc) print(iteration, ITERATIONS_PER_EPOCH) sys.stdout.flush() valid_acc = [] for iteration in range(VAL_ITERS): x, y = mnist.validation.next_batch(BATCH_SIZE) loss, acc = sess.run([loss_op, accuracy], { inputs_ph: x[:, perm], targets_ph: y, in_training: False }) valid_acc.append(acc) test_acc = [] for iteration in range(TEST_ITERS): x, y = mnist.test.next_batch(BATCH_SIZE) loss, acc = sess.run([loss_op, accuracy], { inputs_ph: x[:, perm], targets_ph: y, in_training: False }) test_acc.append(acc) print("epoch %d, train=%f, valid=%f, test=%f" % (epoch, np.mean(train_acc), np.mean(valid_acc), np.mean(test_acc))) fout.write("%d %.4f %.4f %.4f\n" % (epoch, np.mean(train_acc), np.mean(valid_acc), np.mean(test_acc))) sys.stdout.flush() fout.flush()
def _INDRNNCells(unit_list, time_steps): recurrent_max = pow(2, 1 / time_steps) return MultiRNNCell([ IndRNNCell(unit, recurrent_max_abs=recurrent_max) for unit in unit_list ], state_is_tuple=True)
def main(): # Placeholders for training data inputs_ph = tf.placeholder(tf.float32, shape=(BATCH_SIZE, TIME_STEPS)) # Build the graph cell = tf.nn.rnn_cell.MultiRNNCell([ tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.ResidualWrapper( IndRNNCell(NUM_UNITS, recurrent_max_abs=RECURRENT_MAX), lambda i, o: o + tf.pad(i, [[0, 0], [0, tf.shape(o)[1] - tf.shape(i)[1]]])), output_keep_prob=0.75) for _ in range(NUM_LAYERS) ]) #cell = tf.nn.rnn_cell.BasicLSTMCell(NUM_UNITS) uncomment this for LSTM runs output, state = tf.nn.dynamic_rnn(cell, tf.expand_dims(inputs_ph, 2), dtype=tf.float32) logits = output[:, :-1] targets = tf.cast((inputs_ph + 1) / 2 * 127, tf.int32)[:, 1:] loss_op = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets, logits=logits)) kernels = tf.get_collection("recurrent_kernel") penalty = sum( tf.reduce_mean(tf.maximum(0.0, (k * (k - RECURRENT_MAX)))) for k in kernels) / len(kernels) summary = tf.summary.merge([ tf.summary.scalar('loss', loss_op), tf.summary.histogram('distribution', tf.nn.softmax(logits)), tf.summary.scalar('penalty', penalty) ]) global_step = tf.get_variable("global_step", shape=[], trainable=False, initializer=tf.zeros_initializer) learning_rate = tf.train.exponential_decay(LEARNING_RATE_INIT, global_step, LEARNING_RATE_DECAY_STEPS, 0.1, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimize = optimizer.minimize(loss_op + 10 * penalty, global_step=global_step) # Train the model with tf.Session() as sess: train_writer = tf.summary.FileWriter('../train_logs/correlated_noise', sess.graph) sess.run(tf.global_variables_initializer()) step = 0 while True: losses = [] for _ in range(100): # Generate new input data noise = get_batch() loss, _, progress = sess.run([loss_op, optimize, summary], {inputs_ph: noise}) losses.append(loss) train_writer.add_summary(progress, step) step += 1 print("Step {} loss {}".format(int(step), np.mean(losses)))
def main(): # Placeholders for training data inputs_ph = tf.placeholder(tf.float32, shape=(BATCH_SIZE, TIME_STEPS, 2)) targets_ph = tf.placeholder(tf.float32, shape=BATCH_SIZE) # Build the graph first_input_init = tf.random_uniform_initializer(0, RECURRENT_MAX) first_layer = IndRNNCell(2, recurrent_max_abs=RECURRENT_MAX, recurrent_kernel_initializer=first_input_init) second_layer = IndRNNCell(2, recurrent_max_abs=RECURRENT_MAX) cell = tf.nn.rnn_cell.MultiRNNCell([ first_layer, second_layer, ]) #cell = tf.nn.rnn_cell.BasicLSTMCell(NUM_UNITS) uncomment this for LSTM runs output, state = tf.nn.dynamic_rnn(cell, inputs_ph, dtype=tf.float32) last = output[:, -1, :] last = tf.layers.batch_normalization( tf.contrib.layers.fully_connected(last, NUM_UNITS)) targets_int = tf.cast(targets_ph * 127, tf.int32) loss_op = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets_int, logits=last)) kernels = tf.get_collection("recurrent_kernel") penalty = sum( tf.reduce_mean(tf.maximum(0.0, (k * (k - RECURRENT_MAX)))) for k in kernels) / len(kernels) summary = tf.summary.merge([ tf.summary.scalar('loss', loss_op), tf.summary.histogram('distribution', tf.nn.softmax(last)), tf.summary.scalar('penalty', penalty) ]) global_step = tf.get_variable("global_step", shape=[], trainable=False, initializer=tf.zeros_initializer) learning_rate = tf.train.exponential_decay(LEARNING_RATE_INIT, global_step, LEARNING_RATE_DECAY_STEPS, 0.1, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimize = optimizer.minimize(loss_op + 10 * penalty, global_step=global_step) # Train the model with tf.Session() as sess: train_writer = tf.summary.FileWriter( '../train_logs/addition_with_penalty_decay_100000', sess.graph) sess.run(tf.global_variables_initializer()) step = 0 while True: losses = [] for _ in range(100): # Generate new input data inputs, targets = get_batch() loss, _, progress = sess.run([loss_op, optimize, summary], { inputs_ph: inputs, targets_ph: targets }) losses.append(loss) train_writer.add_summary(progress, step) step += 1 print("Step {} loss {}".format(int(step), np.mean(losses)))