def encoding_process(load_file_name, file_list): import tensorflow as tf file_name_list = [os.path.splitext(file)[0] for file in file_list] data_length = len(file_list) # Load models. vae = VAE() vae.build_model(is_training=False, is_assigning=False) with tf.Session(graph=vae.graph) as sess: # Load variables. saver = tf.train.Saver() saver.restore(sess, SAVE_VAE_DIR + file_name) for i in range(data_length): # Load raw data. data = np.load(RAW_DATA_DIR + file_list[i]) obs = data["obs"] action = data["action"] # Compute the mean and standard deviation rather than the encoding z. mu, sigma = sess.run([vae.mu, vae.sigma], feed_dict={vae.Input: obs / 255.0}) # Save file. np.savez_compressed(ENCODING_DATA_DIR + file_name_list[i], mu=mu, sigma=sigma, action=action)
def vae_visualization(file_name="vae", random_file=None): # Create folders. if not os.path.isdir(FIGURE_VAE_VISUALIZATION_DIR): os.makedirs(FIGURE_VAE_VISUALIZATION_DIR) if random_file == None: # Load random data. file_list = os.listdir(RAW_DATA_DIR) random_file = np.random.choice(file_list) random_file_name = os.path.splitext(random_file)[0] obs = np.load(RAW_DATA_DIR + random_file)["obs"] # Load models. vae = VAE() vae.build_model(is_training=False, is_assigning=False) with tf.Session(graph=vae.graph) as sess: # Load variables. saver = tf.train.Saver() saver.restore(sess, SAVE_VAE_DIR + file_name) # Compute the reconstruction. recons = sess.run(vae.output, feed_dict={vae.Input: obs / 255.0}) tf.contrib.keras.backend.clear_session() imageio.mimsave( FIGURE_VAE_VISUALIZATION_DIR + random_file_name + ".gif", [plot_obs_recons(obs[i], recons[i]) for i in range(MAX_FRAME)], fps=20)
def rnn_visualization(temperature=1.0, vae_file_name="vae", rnn_file_name="rnn", random_file=None): # Create folders. if not os.path.isdir(FIGURE_RNN_VISUALIZATION_DIR): os.makedirs(FIGURE_RNN_VISUALIZATION_DIR) if random_file == None: # Load random data. file_list = os.listdir(ENCODING_DATA_DIR) random_file = np.random.choice(file_list) random_file_name = os.path.splitext(random_file)[0] obs = np.load(RAW_DATA_DIR + random_file)["obs"] encoding = np.load(ENCODING_DATA_DIR + random_file) mu = encoding["mu"] sigma = encoding["sigma"] action = encoding["action"] # Sample z from mu and sigma. z = mu + sigma * np.random.randn(MAX_FRAME, Z_LENGTH) za = np.reshape(np.concatenate((z[:-1, :], action[:-1, :]), -1), (1, MAX_FRAME - 1, Z_LENGTH + A_LENGTH)) # Load RNN-MDN model. rnn = RNN_MDN() rnn.build_model(is_training=False, is_assigning=False, is_single_input=False) with tf.Session(graph=rnn.graph) as sess: # Load variables. saver = tf.train.Saver() saver.restore(sess, SAVE_RNN_DIR + rnn_file_name) # Compute the key parameters. logits, mu, sigma = sess.run([rnn.logits, rnn.mu, rnn.sigma], feed_dict={rnn.ZA: za}) length = len(logits) # Sample next_z from logits, mu and sigma. reduced_logits = logits - np.max(logits, -1, keepdims=True) pi = np.exp(reduced_logits / temperature) / np.sum( np.exp(reduced_logits / temperature), -1, keepdims=True) chosen_mode = np.reshape( np.array([np.random.choice(MODES, p=x) for x in pi]), [-1, 1]) chosen_mu = np.reshape( np.array([mu[i, chosen_mode[i]] for i in range(length)]), [-1, 1]) chosen_sigma = np.reshape( np.array([sigma[i, chosen_mode[i]] for i in range(length)]), [-1, 1]) next_z = chosen_mu + chosen_sigma * np.random.randn( length, 1) * np.sqrt(temperature) next_z = np.reshape(next_z, (MAX_FRAME - 1, Z_LENGTH)) # Add z[0] to next_z. next_z = np.concatenate((np.reshape(z[0], (1, -1)), next_z), 0) tf.contrib.keras.backend.clear_session() # Load VAE model. vae = VAE() vae.build_model(is_training=False, is_assigning=False) with tf.Session(graph=vae.graph) as sess: # Load variables. saver = tf.train.Saver() saver.restore(sess, SAVE_VAE_DIR + vae_file_name) # Compute the reconstruction from direct encoding. recons_from_z = sess.run(vae.output, feed_dict={vae.z: z}) # Compute the reconstruction from predicted encoding. recons_from_next_z = sess.run(vae.output, feed_dict={vae.z: next_z}) tf.contrib.keras.backend.clear_session() imageio.mimsave(FIGURE_RNN_VISUALIZATION_DIR + random_file_name + ".gif", [ plot_obs_recons(obs[i], recons_from_z[i], recons_from_next_z[i]) for i in range(MAX_FRAME) ], fps=20)
def vae_training(file_name="vae"): # Create folders. if not os.path.isdir(SAVE_VAE_DIR): os.makedirs(SAVE_VAE_DIR) if not os.path.isdir(CSV_DIR): os.makedirs(CSV_DIR) if not os.path.isdir(FIGURE_TRAINING_DIR): os.makedirs(FIGURE_TRAINING_DIR) file_list = os.listdir(RAW_DATA_DIR) data_length = len(file_list) num_batch = data_length * MAX_FRAME // BATCH_SIZE num_iter = EPOCH * num_batch # Load data. list_obs = [] for i in range(data_length): obs = np.load(RAW_DATA_DIR + file_list[i])["obs"] list_obs.append(np.reshape(obs, (-1, 64, 64, 3))) list_obs = np.concatenate(list_obs, 0) # Load models. vae = VAE() vae.build_model(is_training=True, is_assigning=False) with tf.Session(graph=vae.graph) as sess: # Initialize the network. sess.run(tf.global_variables_initializer()) list_reconstruction_loss = [] list_kl_divergence = [] list_loss = [] for epoch in range(EPOCH): # Shuffle training data. np.random.shuffle(list_obs) for batch in range(num_batch): obs = list_obs[batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE] _, reconstruction_loss, kl_divergence, loss = sess.run( [ vae.train_op, vae.reconstruction_loss, vae.kl_divergence, vae.loss ], feed_dict={ vae.Input: obs / 255.0, vae.LR: LEARNING_RATE }) list_reconstruction_loss.append(reconstruction_loss) list_kl_divergence.append(kl_divergence) list_loss.append(loss) if (epoch * num_batch + batch) % 100 == 0: print("Iteration ", format(epoch * num_batch + batch, "06d"), ":", sep="") print(" Reconstruction Loss = ", format(reconstruction_loss, ".8f"), ", KL Divergence = ", format(kl_divergence, ".8f"), sep="") # Save the parameters. saver = tf.train.Saver() saver.save(sess, SAVE_VAE_DIR + file_name) tf.contrib.keras.backend.clear_session() # Store data in the csv file. with open(CSV_DIR + file_name + ".csv", "w") as f: fieldnames = [ "Iteration", "Reconstruction Loss", "KL Divergence", "Loss" ] writer = csv.DictWriter(f, fieldnames=fieldnames, lineterminator="\n") writer.writeheader() for iter in range(num_iter): content = { "Iteration": iter, "Reconstruction Loss": list_reconstruction_loss[iter], "KL Divergence": list_kl_divergence[iter], "Loss": list_loss[iter] } writer.writerow(content) # Plot the training loss. list_iter = list(range(num_iter)) f, ax = plt.subplots(nrows=1, ncols=1, figsize=(5, 5)) ax.plot(list_iter, list_reconstruction_loss, "r-", label="Reconstruction Loss") ax.plot(list_iter, list_kl_divergence, "b-", label="KL Divergence") ax.set_title("Training Loss") ax.set_xlabel("Iteration") ax.set_ylabel("Loss") ax.legend(loc="upper right") ax.ticklabel_format(style="sci", axis="x", scilimits=(0, 0)) ax.grid() f.savefig(FIGURE_TRAINING_DIR + file_name + ".png") plt.close(f)
def random_sampling_process(start_index, max_episode): import tensorflow as tf if USING_GPU: gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = MEMORY) gpu_config = tf.ConfigProto(gpu_options = gpu_options) else: gpu_config = None np.random.seed() # Load models. vae = VAE() vae.build_model(is_training = False, is_assigning = True) rnn = RNN_MDN() rnn.build_model(is_training = False, is_assigning = True, is_single_input = True) con = Controller() sess_vae = tf.Session(graph = vae.graph, config = gpu_config) sess_rnn = tf.Session(graph = rnn.graph, config = gpu_config) env = MyCarRacing() for episode in range(max_episode): # Initialize the networks with random parameters. stddev = 0.01 * np.random.rand() with vae.graph.as_default(): for i in range(len(tf.trainable_variables())): random = stddev * np.random.standard_cauchy(tf.trainable_variables()[i].get_shape()) / 10000 sess_vae.run(vae.assign_op[i], feed_dict = {vae.Assigned_Value: random}) with rnn.graph.as_default(): for i in range(len(tf.trainable_variables())): random = stddev * np.random.standard_cauchy(tf.trainable_variables()[i].get_shape()) / 10000 sess_rnn.run(rnn.assign_op[i], feed_dict = {rnn.Assigned_Value: random}) random = stddev * np.random.standard_cauchy(np.prod(np.shape(con.weights)) + np.prod(np.shape(con.bias))) con.weights = np.reshape(random[:np.prod(np.shape(con.weights))], np.shape(con.weights)) con.bias = np.reshape(random[-np.prod(np.shape(con.bias)):], np.shape(con.bias)) # Get intitial state of RNN. state = sess_rnn.run(rnn.initial_state) list_obs = [] list_action = [] # Reset the environment. obs = env.reset() for step in range(MAX_FRAME): env.render(mode = RENDER_MODE) list_obs.append(obs) # Encode the observation. obs = np.reshape(obs / 255.0, (1, 64, 64, 3)) z = sess_vae.run(vae.z, feed_dict = {vae.Input: obs}) # Get action. if CONTROLLER_MODE == "Z": controller_input = z elif CONTROLLER_MODE == "ZH": controller_input = np.concatenate((z, state.h), 1) else: controller_input = np.concatenate((z, state.h, state.c), 1) action = con.get_action(controller_input) list_action.append(action[0]) # Update the hidden state. za = np.reshape(np.concatenate((z, action), 1), (1, 1, -1)) state = sess_rnn.run(rnn.final_state, feed_dict = {rnn.ZA: za, rnn.initial_state: state}) # Interact with the game engine. obs, reward, done, _ = env.step(action[0]) # Save file. list_obs = np.array(list_obs, dtype = np.uint8) list_action = np.array(list_action, dtype = np.float16) np.savez_compressed(RAW_DATA_DIR + format(start_index + episode, "04d"), obs = list_obs, action = list_action) env.render(close = True) sess_vae.close() sess_rnn.close() tf.contrib.keras.backend.clear_session()
def run_env_process(list_solution, list_episode, vae_file_name, rnn_file_name): import tensorflow as tf if USING_GPU: gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=MEMORY) gpu_config = tf.ConfigProto(gpu_options=gpu_options) else: gpu_config = None np.random.seed() # Load models. vae = VAE() vae.build_model(is_training=False, is_assigning=True) rnn = RNN_MDN() rnn.build_model(is_training=False, is_assigning=True, is_single_input=True) con = Controller() sess_vae = tf.Session(graph=vae.graph, config=gpu_config) sess_rnn = tf.Session(graph=rnn.graph, config=gpu_config) # Load VAE and RNN variables. with vae.graph.as_default(): saver_vae = tf.train.Saver() saver_vae.restore(sess_vae, SAVE_VAE_DIR + vae_file_name) with rnn.graph.as_default(): saver_rnn = tf.train.Saver() saver_rnn.restore(sess_rnn, SAVE_RNN_DIR + rnn_file_name) env = MyCarRacing() list_total_reward = [[] for _ in range(len(list_solution))] for i in range(len(list_solution)): # Load controller variables. con.weights = np.reshape( list_solution[i][:np.prod(np.shape(con.weights))], np.shape(con.weights)) con.bias = np.reshape(list_solution[i][-np.prod(np.shape(con.bias)):], np.shape(con.bias)) for j in range(list_episode[i]): # Get intitial state of RNN. state = sess_rnn.run(rnn.initial_state) # Reset the environment. obs = env.reset() total_reward = 0 for step in range(MAX_FRAME): env.render(mode=RENDER_MODE) # Encode the observation. obs = np.reshape(obs / 255.0, (1, 64, 64, 3)) z = sess_vae.run(vae.z, feed_dict={vae.Input: obs}) # Get action. if CONTROLLER_MODE == "Z": controller_input = z elif CONTROLLER_MODE == "ZH": controller_input = np.concatenate((z, state.h), 1) else: controller_input = np.concatenate((z, state.h, state.c), 1) action = con.get_action(controller_input) # Update the hidden state. za = np.reshape(np.concatenate((z, action), 1), (1, 1, -1)) state = sess_rnn.run(rnn.final_state, feed_dict={ rnn.ZA: za, rnn.initial_state: state }) # Interact with the game engine. obs, reward, done, _ = env.step(action[0]) total_reward += reward # Early stop if the game is finished. if done: break # Record the total reward. list_total_reward[i].append(total_reward) env.render(close=True) sess_vae.close() sess_rnn.close() tf.contrib.keras.backend.clear_session() return list_total_reward
def controller_visualization(vae_file_name = "vae", rnn_file_name = "rnn", con_file_name = "controller"): # Create folders. if not os.path.isdir(FIGURE_CONTROLLER_VISUALIZATION_DIR): os.makedirs(FIGURE_CONTROLLER_VISUALIZATION_DIR) # Load models. vae = VAE() vae.build_model(is_training = False, is_assigning = True) rnn = RNN_MDN() rnn.build_model(is_training = False, is_assigning = True, is_single_input = True) con = Controller() sess_vae = tf.Session(graph = vae.graph) sess_rnn = tf.Session(graph = rnn.graph) # Load variables. with vae.graph.as_default(): saver_vae = tf.train.Saver() saver_vae.restore(sess_vae, SAVE_VAE_DIR + vae_file_name) with rnn.graph.as_default(): saver_rnn = tf.train.Saver() saver_rnn.restore(sess_rnn, SAVE_RNN_DIR + rnn_file_name) con_vars = np.load(SAVE_CONTROLLER_DIR + con_file_name + ".npz") con.weights = con_vars["weights"] con.bias = con_vars["bias"] list_obs = [] list_reward = [] total_reward = 0 env = MyCarRacing() # Get intitial state of RNN. state = sess_rnn.run(rnn.initial_state) # Reset the environment. obs = env.reset() for step in range(MAX_FRAME): env.render(mode = RENDER_MODE) list_obs.append(obs) list_reward.append(total_reward) # Encode the observation. obs = np.reshape(obs / 255.0, (1, 64, 64, 3)) z, recons = sess_vae.run([vae.z, vae.output], feed_dict = {vae.Input: obs}) # Get action. if CONTROLLER_MODE == "Z": controller_input = z elif CONTROLLER_MODE == "ZH": controller_input = np.concatenate((z, state.h), 1) else: controller_input = np.concatenate((z, state.h, state.c), 1) action = con.get_action(controller_input) # Update the hidden state. za = np.reshape(np.concatenate((z, action), 1), (1, 1, -1)) state = sess_rnn.run(rnn.final_state, feed_dict = {rnn.ZA: za, rnn.initial_state: state}) # Interact with the game engine. obs, reward, done, _ = env.step(action[0]) total_reward += reward if done: break print(total_reward) env.render(close = True) sess_vae.close() sess_rnn.close() tf.contrib.keras.backend.clear_session() index = len(os.listdir(FIGURE_CONTROLLER_VISUALIZATION_DIR)) imageio.mimsave(FIGURE_CONTROLLER_VISUALIZATION_DIR + format(index, "04d") + ".gif", [plot_obs(list_obs[i], list_reward[i]) for i in range(len(list_obs))], fps = 20)