def run_lstm_mnist(lstm_cell=BasicLSTMCell, hidden_size=32, batch_size=256, steps=1000): mnist = input_data.read_data_sets('MNIST_data', one_hot=True) learning_rate = 0.001 file_logger = FileLogger('log.tsv', ['step', 'training_loss', 'training_accuracy']) x = tf.placeholder( 'float32', [batch_size, 784, 2 if lstm_cell == PhasedLSTMCell else 1]) y_ = tf.placeholder('float32', [batch_size, 10]) initial_states = (tf.random_normal([batch_size, hidden_size], stddev=0.1), tf.random_normal([batch_size, hidden_size], stddev=0.1)) outputs, _ = dynamic_rnn(lstm_cell(hidden_size), x, initial_state=initial_states, dtype=tf.float32) rnn_out = tf.squeeze(outputs[:, -1, :]) fc0_w = create_weight_variable('fc0_w', [hidden_size, 10]) fc0_b = create_bias_variable('fc0_b', [10]) y = tf.matmul(rnn_out, fc0_w) + fc0_b cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_)) grad_update = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(tf.global_variables_initializer()) def transform_x(_x_): if lstm_cell == PhasedLSTMCell: t = np.reshape(np.tile(np.array(range(784)), (batch_size, 1)), (batch_size, 784)) return np.squeeze(np.stack([_x_, t], axis=2)) t_x = np.expand_dims(_x_, axis=2) return t_x for i in range(steps): batch = mnist.train.next_batch(batch_size) st = time() tr_loss, tr_acc, _ = sess.run([cross_entropy, accuracy, grad_update], feed_dict={ x: transform_x(batch[0]), y_: batch[1] }) print( 'Forward-Backward pass took {0:.2f}s to complete.'.format(time() - st)) file_logger.write([i, tr_loss, tr_acc]) file_logger.close()
def run_experiment(init_session=None, placeholder_def_func=get_placeholders): batch_size = BATCH_SIZE hidden_size = HIDDEN_STATES learning_rate = 3e-4 momentum = 0.9 file_logger = FileLogger('log.tsv', ['step', 'training_loss', 'benchmark_loss']) x, y = placeholder_def_func() if ADD_TIME_INPUTS: lstm = PhasedLSTMCell(hidden_size) print('Using PhasedLSTMCell impl.') else: lstm = BasicLSTMCell(hidden_size) print('Using BasicLSTMCell impl.') initial_state = (tf.random_normal([batch_size, hidden_size], stddev=0.1), tf.random_normal([batch_size, hidden_size], stddev=0.1)) outputs, state = dynamic_rnn(lstm, x, initial_state=initial_state, dtype=tf.float32) rnn_out = tf.squeeze( tf.slice(outputs, begin=[0, tf.shape(outputs)[1] - 1, 0], size=[-1, -1, -1])) # _, final_hidden = state fc0_w = create_weight_variable('fc0_w', [hidden_size, 1]) fc0_b = tf.get_variable('fc0_b', [1]) out = tf.matmul(rnn_out, fc0_w) + fc0_b loss = tf.reduce_mean(tf.square(tf.sub(out, y))) optimizer = create_adam_optimizer(learning_rate, momentum) trainable = tf.trainable_variables() grad_update = optimizer.minimize(loss, var_list=trainable) if init_session is not None: sess = init_session else: sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) init = tf.global_variables_initializer() sess.run(init) # lstm.__call__(x[:, 0, :], initial_state, scope=None) d = collections.deque(maxlen=10) benchmark_d = collections.deque(maxlen=10) for step in range(1, int(1e9)): x_s, y_s = next_batch(batch_size) loss_value, _, pred_value = sess.run([loss, grad_update, out], feed_dict={ x: x_s, y: y_s }) # The mean converges to 0.5 for IID U(0,1) random variables. Good benchmark. benchmark_d.append(np.mean(np.square(0.5 - y_s))) d.append(loss_value) mean_loss = np.mean(d) benchmark_mean_loss = np.mean(benchmark_d) file_logger.write([step, mean_loss, benchmark_mean_loss]) file_logger.close()