def evaluate(): """Train ring_net for a number of steps.""" with tf.Graph().as_default(): # make inputs state, reward, action = ring_net.inputs(1, 15) # possible input dropout input_keep_prob = tf.placeholder("float") state_drop = tf.nn.dropout(state, input_keep_prob) # possible dropout inside keep_prob_encoding = tf.placeholder("float") keep_prob_lstm = tf.placeholder("float") # unwrap reward_2_o = [] # first step x_2, reward_2, hidden_state = ring_net.encode_compress_decode( state[:, 0, :, :, :], action[:, 1, :], None, keep_prob_encoding, keep_prob_lstm) tf.get_variable_scope().reuse_variables() # unroll for 9 more steps for i in xrange(8): x_2, reward_2, hidden_state = ring_net.encode_compress_decode( state[:, i + 1, :, :, :], action[:, i + 2, :], hidden_state, keep_prob_encoding, keep_prob_lstm) y_1 = ring_net.encoding(state[:, 9, :, :, :], keep_prob_encoding) y_2, reward_2, hidden_state = ring_net.lstm_compression( y_1, action[:, 10, :], hidden_state, keep_prob_lstm) x_2 = ring_net.decoding(y_2) reward_2_o.append(reward_2) # now collect values for i in xrange(4): y_2, reward_2, hidden_state = ring_net.lstm_compression( y_2, action[:, i + 11, :], hidden_state, keep_prob_lstm) x_2 = ring_net.decoding(y_2) reward_2_o.append(reward_2) reward_2_o = tf.pack(reward_2_o) reward_2_o = tf.transpose(reward_2_o, perm=[1, 0, 2]) # restore network variables_to_restore = tf.all_variables() saver = tf.train.Saver(variables_to_restore) sess = tf.Session() ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) #ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("restored file from " + ckpt.model_checkpoint_path) else: print("no chekcpoint file found from " + FLAGS.checkpoint_dir + ", this is an error") # Start que runner tf.train.start_queue_runners(sess=sess) reward_g_o, reward_o = sess.run([reward_2_o, reward], feed_dict={ keep_prob_encoding: 1.0, keep_prob_lstm: 1.0, input_keep_prob: 1.0 }) print(reward_g_o.shape) print(reward_o.shape) plt.figure(0) plt.plot(reward_g_o[0, :, 0], label="predicted reward") plt.plot(reward_o[0, 10:, 0], label="reward") plt.title("reward vs step") plt.xlabel("step") plt.ylabel("reward") plt.legend() plt.savefig("compress_reward.png")
def train(): """Train ring_net for a number of steps.""" with tf.Graph().as_default(): # make inputs state, reward, action = ring_net.inputs(4, 13) # possible input dropout input_keep_prob = tf.placeholder("float") state_drop = tf.nn.dropout(state, input_keep_prob) # possible dropout inside keep_prob_encoding = tf.placeholder("float") keep_prob_lstm = tf.placeholder("float") # unwrap x_2_o = [] # first step x_2, reward_2, hidden_state = ring_net.encode_compress_decode(state[:,0,:,:,:], action[:,1,:], None, keep_prob_encoding, keep_prob_lstm) tf.get_variable_scope().reuse_variables() # unroll for 9 more steps for i in xrange(8): x_2, reward_2, hidden_state = ring_net.encode_compress_decode(state[:,i+1,:,:,:], action[:,i+2,:], hidden_state, keep_prob_encoding, keep_prob_lstm) y_1 = ring_net.encoding(state[:,9,:,:,:], keep_prob_encoding) y_2, reward_2, hidden_state = ring_net.lstm_compression(y_1, action[:,10,:], hidden_state, keep_prob_lstm) x_2 = ring_net.decoding(y_2) x_2_o.append(x_2) # now collect values for i in xrange(2): y_2, reward_2, hidden_state = ring_net.lstm_compression(y_2, action[:,i+11,:], hidden_state, keep_prob_lstm) x_2 = ring_net.decoding(y_2) x_2_o.append(x_2) tf.image_summary('images_gen_' + str(i), x_2) x_2_o = tf.pack(x_2_o) x_2_o = tf.transpose(x_2_o, perm=[1,0,2,3,4]) # error error = tf.nn.l2_loss(state[:,10:13,:,:,:] - x_2_o) tf.scalar_summary('loss', error) # train (hopefuly) train_op = ring_net.train(error, 1e-5) # List of all Variables variables = tf.all_variables() # Build a saver saver = tf.train.Saver(tf.all_variables(), max_to_keep=1) # Summary op summary_op = tf.merge_all_summaries() # Start running operations on the Graph. sess = tf.Session() # init from seq 1 model print("init from " + RESTORE_DIR) saver_restore = tf.train.Saver(variables) ckpt = tf.train.get_checkpoint_state(RESTORE_DIR) saver_restore.restore(sess, ckpt.model_checkpoint_path) # Start que runner tf.train.start_queue_runners(sess=sess) # Summary op graph_def = sess.graph.as_graph_def(add_shapes=True) summary_writer = tf.train.SummaryWriter(SAVE_DIR, graph_def=graph_def) for step in xrange(100000): t = time.time() _ , loss_value = sess.run([train_op, error],feed_dict={keep_prob_encoding:1.0, keep_prob_lstm:1.0, input_keep_prob:1.0}) elapsed = time.time() - t assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step%100 == 0: print("loss value at " + str(loss_value)) print("time per batch is " + str(elapsed)) summary_str = sess.run(summary_op, feed_dict={keep_prob_encoding:1.0, keep_prob_lstm:1.0, input_keep_prob:1.0}) summary_writer.add_summary(summary_str, step) if step%1000 == 0: checkpoint_path = os.path.join(SAVE_DIR, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) print("saved to " + SAVE_DIR)
def evaluate(): """ Eval the system""" with tf.Graph().as_default(): # make inputs state_start, reward_start, action_start, = ring_net.inputs(1, 15) action_size = int(action_start.get_shape()[2]) action = tf.placeholder(tf.float32, (1, action_size)) # unwrap x_2_o = [] # first step x_2, reward_2, hidden_state = ring_net.encode_compress_decode(state_start[:,0,:,:,:], action_start[:,1,:], None, 1.0, 1.0) tf.get_variable_scope().reuse_variables() # unroll for 9 more steps for i in xrange(8): x_2, reward_2, hidden_state = ring_net.encode_compress_decode(state_start[:,i+1,:,:,:], action_start[:,i+2,:], hidden_state, 1.0, 1.0) y_1 = ring_net.encoding(state_start[:,9,:,:,:], 1.0) y_2, reward_2, hidden_state = ring_net.lstm_compression(y_1, action_start[:,9,:], hidden_state, 1.0) x_2 = ring_net.decoding(y_2) y_1 = y_2 hidden_state_1 = hidden_state y_2, reward_2, hidden_state_2 = ring_net.lstm_compression(y_1, action, hidden_state_1, 1.0) x_2 = ring_net.decoding(y_2) # restore network variables_to_restore = tf.all_variables() saver = tf.train.Saver(variables_to_restore) sess = tf.Session() ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) #ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("restored file from " + ckpt.model_checkpoint_path) else: print("no chekcpoint file found from " + FLAGS.checkpoint_dir + ", this is an error") # get frame tf.train.start_queue_runners(sess=sess) play_action = random_action(action_size) y_2_g, hidden_2_g = sess.run([y_1, hidden_state_1], feed_dict={}) #print(hidden_2_g.shape) # Play!!!! for step in xrange(100): print(step) #time.sleep(.5) # calc generated frame from t play_action = random_action(action_size) x_2_g, y_g_2, hidden_2_g = sess.run([x_2, y_2, hidden_state_2],feed_dict={y_1:y_2_g, hidden_state_1:hidden_2_g, action:play_action}) frame = np.uint8(np.minimum(np.maximum(0, x_2_g*255.0), 255)) frame = frame[0, :, :, :] video.write(frame) #frame = cv2.resize(frame, (500, 500)) #cv2.imshow('frame', frame) #cv2.waitKey(0) #if cv2.waitKey(1) & 0xFF == ord('q'): # break video.release() cv2.destroyAllWindows()