Esempio n. 1
0
def encoding_process(load_file_name, file_list):
    import tensorflow as tf

    file_name_list = [os.path.splitext(file)[0] for file in file_list]
    data_length = len(file_list)

    # Load models.
    vae = VAE()
    vae.build_model(is_training=False, is_assigning=False)

    with tf.Session(graph=vae.graph) as sess:
        # Load variables.
        saver = tf.train.Saver()
        saver.restore(sess, SAVE_VAE_DIR + file_name)

        for i in range(data_length):
            # Load raw data.
            data = np.load(RAW_DATA_DIR + file_list[i])
            obs = data["obs"]
            action = data["action"]

            # Compute the mean and standard deviation rather than the encoding z.
            mu, sigma = sess.run([vae.mu, vae.sigma],
                                 feed_dict={vae.Input: obs / 255.0})

            # Save file.
            np.savez_compressed(ENCODING_DATA_DIR + file_name_list[i],
                                mu=mu,
                                sigma=sigma,
                                action=action)
Esempio n. 2
0
def vae_visualization(file_name="vae", random_file=None):
    # Create folders.
    if not os.path.isdir(FIGURE_VAE_VISUALIZATION_DIR):
        os.makedirs(FIGURE_VAE_VISUALIZATION_DIR)

    if random_file == None:
        # Load random data.
        file_list = os.listdir(RAW_DATA_DIR)
        random_file = np.random.choice(file_list)
    random_file_name = os.path.splitext(random_file)[0]
    obs = np.load(RAW_DATA_DIR + random_file)["obs"]

    # Load models.
    vae = VAE()
    vae.build_model(is_training=False, is_assigning=False)

    with tf.Session(graph=vae.graph) as sess:
        # Load variables.
        saver = tf.train.Saver()
        saver.restore(sess, SAVE_VAE_DIR + file_name)

        # Compute the reconstruction.
        recons = sess.run(vae.output, feed_dict={vae.Input: obs / 255.0})
    tf.contrib.keras.backend.clear_session()

    imageio.mimsave(
        FIGURE_VAE_VISUALIZATION_DIR + random_file_name + ".gif",
        [plot_obs_recons(obs[i], recons[i]) for i in range(MAX_FRAME)],
        fps=20)
Esempio n. 3
0
def rnn_visualization(temperature=1.0,
                      vae_file_name="vae",
                      rnn_file_name="rnn",
                      random_file=None):
    # Create folders.
    if not os.path.isdir(FIGURE_RNN_VISUALIZATION_DIR):
        os.makedirs(FIGURE_RNN_VISUALIZATION_DIR)

    if random_file == None:
        # Load random data.
        file_list = os.listdir(ENCODING_DATA_DIR)
        random_file = np.random.choice(file_list)
    random_file_name = os.path.splitext(random_file)[0]
    obs = np.load(RAW_DATA_DIR + random_file)["obs"]
    encoding = np.load(ENCODING_DATA_DIR + random_file)
    mu = encoding["mu"]
    sigma = encoding["sigma"]
    action = encoding["action"]

    # Sample z from mu and sigma.
    z = mu + sigma * np.random.randn(MAX_FRAME, Z_LENGTH)
    za = np.reshape(np.concatenate((z[:-1, :], action[:-1, :]), -1),
                    (1, MAX_FRAME - 1, Z_LENGTH + A_LENGTH))

    # Load RNN-MDN model.
    rnn = RNN_MDN()
    rnn.build_model(is_training=False,
                    is_assigning=False,
                    is_single_input=False)

    with tf.Session(graph=rnn.graph) as sess:
        # Load variables.
        saver = tf.train.Saver()
        saver.restore(sess, SAVE_RNN_DIR + rnn_file_name)

        # Compute the key parameters.
        logits, mu, sigma = sess.run([rnn.logits, rnn.mu, rnn.sigma],
                                     feed_dict={rnn.ZA: za})
        length = len(logits)

        # Sample next_z from logits, mu and sigma.
        reduced_logits = logits - np.max(logits, -1, keepdims=True)
        pi = np.exp(reduced_logits / temperature) / np.sum(
            np.exp(reduced_logits / temperature), -1, keepdims=True)
        chosen_mode = np.reshape(
            np.array([np.random.choice(MODES, p=x) for x in pi]), [-1, 1])
        chosen_mu = np.reshape(
            np.array([mu[i, chosen_mode[i]] for i in range(length)]), [-1, 1])
        chosen_sigma = np.reshape(
            np.array([sigma[i, chosen_mode[i]] for i in range(length)]),
            [-1, 1])
        next_z = chosen_mu + chosen_sigma * np.random.randn(
            length, 1) * np.sqrt(temperature)
        next_z = np.reshape(next_z, (MAX_FRAME - 1, Z_LENGTH))

        # Add z[0] to next_z.
        next_z = np.concatenate((np.reshape(z[0], (1, -1)), next_z), 0)
    tf.contrib.keras.backend.clear_session()

    # Load VAE model.
    vae = VAE()
    vae.build_model(is_training=False, is_assigning=False)

    with tf.Session(graph=vae.graph) as sess:
        # Load variables.
        saver = tf.train.Saver()
        saver.restore(sess, SAVE_VAE_DIR + vae_file_name)

        # Compute the reconstruction from direct encoding.
        recons_from_z = sess.run(vae.output, feed_dict={vae.z: z})
        # Compute the reconstruction from predicted encoding.
        recons_from_next_z = sess.run(vae.output, feed_dict={vae.z: next_z})
    tf.contrib.keras.backend.clear_session()

    imageio.mimsave(FIGURE_RNN_VISUALIZATION_DIR + random_file_name + ".gif", [
        plot_obs_recons(obs[i], recons_from_z[i], recons_from_next_z[i])
        for i in range(MAX_FRAME)
    ],
                    fps=20)
Esempio n. 4
0
def vae_training(file_name="vae"):
    # Create folders.
    if not os.path.isdir(SAVE_VAE_DIR):
        os.makedirs(SAVE_VAE_DIR)
    if not os.path.isdir(CSV_DIR):
        os.makedirs(CSV_DIR)
    if not os.path.isdir(FIGURE_TRAINING_DIR):
        os.makedirs(FIGURE_TRAINING_DIR)

    file_list = os.listdir(RAW_DATA_DIR)
    data_length = len(file_list)
    num_batch = data_length * MAX_FRAME // BATCH_SIZE
    num_iter = EPOCH * num_batch

    # Load data.
    list_obs = []
    for i in range(data_length):
        obs = np.load(RAW_DATA_DIR + file_list[i])["obs"]
        list_obs.append(np.reshape(obs, (-1, 64, 64, 3)))
    list_obs = np.concatenate(list_obs, 0)

    # Load models.
    vae = VAE()
    vae.build_model(is_training=True, is_assigning=False)

    with tf.Session(graph=vae.graph) as sess:
        # Initialize the network.
        sess.run(tf.global_variables_initializer())

        list_reconstruction_loss = []
        list_kl_divergence = []
        list_loss = []

        for epoch in range(EPOCH):
            # Shuffle training data.
            np.random.shuffle(list_obs)

            for batch in range(num_batch):
                obs = list_obs[batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE]
                _, reconstruction_loss, kl_divergence, loss = sess.run(
                    [
                        vae.train_op, vae.reconstruction_loss,
                        vae.kl_divergence, vae.loss
                    ],
                    feed_dict={
                        vae.Input: obs / 255.0,
                        vae.LR: LEARNING_RATE
                    })

                list_reconstruction_loss.append(reconstruction_loss)
                list_kl_divergence.append(kl_divergence)
                list_loss.append(loss)

                if (epoch * num_batch + batch) % 100 == 0:
                    print("Iteration ",
                          format(epoch * num_batch + batch, "06d"),
                          ":",
                          sep="")
                    print("  Reconstruction Loss = ",
                          format(reconstruction_loss, ".8f"),
                          ", KL Divergence = ",
                          format(kl_divergence, ".8f"),
                          sep="")

        # Save the parameters.
        saver = tf.train.Saver()
        saver.save(sess, SAVE_VAE_DIR + file_name)
    tf.contrib.keras.backend.clear_session()

    # Store data in the csv file.
    with open(CSV_DIR + file_name + ".csv", "w") as f:
        fieldnames = [
            "Iteration", "Reconstruction Loss", "KL Divergence", "Loss"
        ]
        writer = csv.DictWriter(f, fieldnames=fieldnames, lineterminator="\n")
        writer.writeheader()
        for iter in range(num_iter):
            content = {
                "Iteration": iter,
                "Reconstruction Loss": list_reconstruction_loss[iter],
                "KL Divergence": list_kl_divergence[iter],
                "Loss": list_loss[iter]
            }
            writer.writerow(content)

    # Plot the training loss.
    list_iter = list(range(num_iter))
    f, ax = plt.subplots(nrows=1, ncols=1, figsize=(5, 5))
    ax.plot(list_iter,
            list_reconstruction_loss,
            "r-",
            label="Reconstruction Loss")
    ax.plot(list_iter, list_kl_divergence, "b-", label="KL Divergence")
    ax.set_title("Training Loss")
    ax.set_xlabel("Iteration")
    ax.set_ylabel("Loss")
    ax.legend(loc="upper right")
    ax.ticklabel_format(style="sci", axis="x", scilimits=(0, 0))
    ax.grid()

    f.savefig(FIGURE_TRAINING_DIR + file_name + ".png")
    plt.close(f)
Esempio n. 5
0
def random_sampling_process(start_index, max_episode):
  import tensorflow as tf
  
  if USING_GPU:
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = MEMORY)
    gpu_config = tf.ConfigProto(gpu_options = gpu_options)
  else:
    gpu_config = None
  
  np.random.seed()
  
  # Load models.
  vae = VAE()
  vae.build_model(is_training = False, is_assigning = True)
  rnn = RNN_MDN()
  rnn.build_model(is_training = False, is_assigning = True, is_single_input = True)
  con = Controller()
  
  sess_vae = tf.Session(graph = vae.graph, config = gpu_config)
  sess_rnn = tf.Session(graph = rnn.graph, config = gpu_config)
  
  env = MyCarRacing()
  
  for episode in range(max_episode):
    # Initialize the networks with random parameters.
    stddev = 0.01 * np.random.rand()
    
    with vae.graph.as_default():
      for i in range(len(tf.trainable_variables())):
        random = stddev * np.random.standard_cauchy(tf.trainable_variables()[i].get_shape()) / 10000
        sess_vae.run(vae.assign_op[i], feed_dict = {vae.Assigned_Value: random})
    with rnn.graph.as_default():
      for i in range(len(tf.trainable_variables())):
        random = stddev * np.random.standard_cauchy(tf.trainable_variables()[i].get_shape()) / 10000
        sess_rnn.run(rnn.assign_op[i], feed_dict = {rnn.Assigned_Value: random})
    random = stddev * np.random.standard_cauchy(np.prod(np.shape(con.weights)) + np.prod(np.shape(con.bias)))
    con.weights = np.reshape(random[:np.prod(np.shape(con.weights))], np.shape(con.weights))
    con.bias = np.reshape(random[-np.prod(np.shape(con.bias)):], np.shape(con.bias))
    
    # Get intitial state of RNN.
    state = sess_rnn.run(rnn.initial_state)
    
    list_obs = []
    list_action = []
    
    # Reset the environment.
    obs = env.reset()
    
    for step in range(MAX_FRAME):
      env.render(mode = RENDER_MODE)
      list_obs.append(obs)
      
      # Encode the observation.
      obs = np.reshape(obs / 255.0, (1, 64, 64, 3))
      z = sess_vae.run(vae.z, feed_dict = {vae.Input: obs})
      
      # Get action.
      if CONTROLLER_MODE == "Z":
        controller_input = z
      elif CONTROLLER_MODE == "ZH":
        controller_input = np.concatenate((z, state.h), 1)
      else:
        controller_input = np.concatenate((z, state.h, state.c), 1)
      action = con.get_action(controller_input)
      list_action.append(action[0])
      
      # Update the hidden state.
      za = np.reshape(np.concatenate((z, action), 1), (1, 1, -1))
      state = sess_rnn.run(rnn.final_state, feed_dict = {rnn.ZA: za, rnn.initial_state: state})
      
      # Interact with the game engine.
      obs, reward, done, _ = env.step(action[0])
    
    # Save file.
    list_obs = np.array(list_obs, dtype = np.uint8)
    list_action = np.array(list_action, dtype = np.float16)
    np.savez_compressed(RAW_DATA_DIR + format(start_index + episode, "04d"), obs = list_obs, action = list_action)
  
  env.render(close = True)
  sess_vae.close()
  sess_rnn.close()
  tf.contrib.keras.backend.clear_session()
Esempio n. 6
0
def run_env_process(list_solution, list_episode, vae_file_name, rnn_file_name):
    import tensorflow as tf

    if USING_GPU:
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=MEMORY)
        gpu_config = tf.ConfigProto(gpu_options=gpu_options)
    else:
        gpu_config = None

    np.random.seed()

    # Load models.
    vae = VAE()
    vae.build_model(is_training=False, is_assigning=True)
    rnn = RNN_MDN()
    rnn.build_model(is_training=False, is_assigning=True, is_single_input=True)
    con = Controller()

    sess_vae = tf.Session(graph=vae.graph, config=gpu_config)
    sess_rnn = tf.Session(graph=rnn.graph, config=gpu_config)

    # Load VAE and RNN variables.
    with vae.graph.as_default():
        saver_vae = tf.train.Saver()
        saver_vae.restore(sess_vae, SAVE_VAE_DIR + vae_file_name)
    with rnn.graph.as_default():
        saver_rnn = tf.train.Saver()
        saver_rnn.restore(sess_rnn, SAVE_RNN_DIR + rnn_file_name)

    env = MyCarRacing()
    list_total_reward = [[] for _ in range(len(list_solution))]

    for i in range(len(list_solution)):
        # Load controller variables.
        con.weights = np.reshape(
            list_solution[i][:np.prod(np.shape(con.weights))],
            np.shape(con.weights))
        con.bias = np.reshape(list_solution[i][-np.prod(np.shape(con.bias)):],
                              np.shape(con.bias))

        for j in range(list_episode[i]):
            # Get intitial state of RNN.
            state = sess_rnn.run(rnn.initial_state)

            # Reset the environment.
            obs = env.reset()
            total_reward = 0

            for step in range(MAX_FRAME):
                env.render(mode=RENDER_MODE)

                # Encode the observation.
                obs = np.reshape(obs / 255.0, (1, 64, 64, 3))
                z = sess_vae.run(vae.z, feed_dict={vae.Input: obs})

                # Get action.
                if CONTROLLER_MODE == "Z":
                    controller_input = z
                elif CONTROLLER_MODE == "ZH":
                    controller_input = np.concatenate((z, state.h), 1)
                else:
                    controller_input = np.concatenate((z, state.h, state.c), 1)
                action = con.get_action(controller_input)

                # Update the hidden state.
                za = np.reshape(np.concatenate((z, action), 1), (1, 1, -1))
                state = sess_rnn.run(rnn.final_state,
                                     feed_dict={
                                         rnn.ZA: za,
                                         rnn.initial_state: state
                                     })

                # Interact with the game engine.
                obs, reward, done, _ = env.step(action[0])
                total_reward += reward

                # Early stop if the game is finished.
                if done:
                    break

            # Record the total reward.
            list_total_reward[i].append(total_reward)

    env.render(close=True)
    sess_vae.close()
    sess_rnn.close()
    tf.contrib.keras.backend.clear_session()

    return list_total_reward
def controller_visualization(vae_file_name = "vae", rnn_file_name = "rnn", con_file_name = "controller"):
  # Create folders.
  if not os.path.isdir(FIGURE_CONTROLLER_VISUALIZATION_DIR):
    os.makedirs(FIGURE_CONTROLLER_VISUALIZATION_DIR)
  
  # Load models.
  vae = VAE()
  vae.build_model(is_training = False, is_assigning = True)
  rnn = RNN_MDN()
  rnn.build_model(is_training = False, is_assigning = True, is_single_input = True)
  con = Controller()
  
  sess_vae = tf.Session(graph = vae.graph)
  sess_rnn = tf.Session(graph = rnn.graph)
  
  # Load variables.
  with vae.graph.as_default():
    saver_vae = tf.train.Saver()
    saver_vae.restore(sess_vae, SAVE_VAE_DIR + vae_file_name)
  with rnn.graph.as_default():
    saver_rnn = tf.train.Saver()
    saver_rnn.restore(sess_rnn, SAVE_RNN_DIR + rnn_file_name)
  con_vars = np.load(SAVE_CONTROLLER_DIR + con_file_name + ".npz")
  con.weights = con_vars["weights"]
  con.bias = con_vars["bias"]
  
  list_obs = []
  list_reward = []
  total_reward = 0
  env = MyCarRacing()
  
  # Get intitial state of RNN.
  state = sess_rnn.run(rnn.initial_state)
  
  # Reset the environment.
  obs = env.reset()
  
  for step in range(MAX_FRAME):
    env.render(mode = RENDER_MODE)
    list_obs.append(obs)
    list_reward.append(total_reward)
    
    # Encode the observation.
    obs = np.reshape(obs / 255.0, (1, 64, 64, 3))
    z, recons = sess_vae.run([vae.z, vae.output], feed_dict = {vae.Input: obs})
    
    # Get action.
    if CONTROLLER_MODE == "Z":
      controller_input = z
    elif CONTROLLER_MODE == "ZH":
      controller_input = np.concatenate((z, state.h), 1)
    else:
      controller_input = np.concatenate((z, state.h, state.c), 1)
    action = con.get_action(controller_input)
    
    # Update the hidden state.
    za = np.reshape(np.concatenate((z, action), 1), (1, 1, -1))
    state = sess_rnn.run(rnn.final_state, feed_dict = {rnn.ZA: za, rnn.initial_state: state})
    
    # Interact with the game engine.
    obs, reward, done, _ = env.step(action[0])
    total_reward += reward
    
    if done:
      break
  
  print(total_reward)
  env.render(close = True)
  sess_vae.close()
  sess_rnn.close()
  tf.contrib.keras.backend.clear_session()
  
  index = len(os.listdir(FIGURE_CONTROLLER_VISUALIZATION_DIR))
  imageio.mimsave(FIGURE_CONTROLLER_VISUALIZATION_DIR + format(index, "04d") + ".gif", [plot_obs(list_obs[i], list_reward[i]) for i in range(len(list_obs))], fps = 20)