Beispiel #1
0
def main():
    args = PARSER.parse_args()

    data_path = get_path(args, "record")
    model_save_path = get_path(args, "tf_vae", create=True)

    ensure_validation_split(data_path)
    _n_train, _avg_frames, mean, var = analyse_dataset(data_path)
    if args.normalize_images:
        train_data, val_data = create_tf_dataset(data_path, args.z_size, True, mean, var)
    else:
        train_data, val_data = create_tf_dataset(data_path, args.z_size)

    shuffle_size = 5 * 1000  # Roughly 20 full episodes for shuffle windows, more increases RAM usage
    train_data = train_data.shuffle(shuffle_size, reshuffle_each_iteration=True).batch(args.vae_batch_size).prefetch(2)
    val_data = val_data.batch(args.vae_batch_size).prefetch(2)

    current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_dir = model_save_path / "tensorboard" / current_time

    vae = CVAE(args=args)
    vae.compile(optimizer=vae.optimizer, loss=vae.get_loss())
    vae.fit(train_data, validation_data=val_data, epochs=args.vae_num_epoch, callbacks=[
        tf.keras.callbacks.TensorBoard(log_dir=str(tensorboard_dir), update_freq=50, histogram_freq=1),
        LogImage(str(tensorboard_dir), val_data),
        tf.keras.callbacks.ModelCheckpoint(str(model_save_path / "ckpt-e{epoch:02d}"), verbose=1),
    ])
    vae.save(str(model_save_path))
Beispiel #2
0
def main():
    args = PARSER.parse_args()

    data_path = get_path(args, "record")
    model_save_path = get_path(args, "tf_gqn", create=True)

    ensure_validation_split(data_path)
    train_data = load_from_tfrecord(data_path,
                                    args.gqn_context_size,
                                    args.gqn_batch_size,
                                    mode='train')
    test_data = load_from_tfrecord(data_path,
                                   args.gqn_context_size,
                                   args.gqn_batch_size,
                                   mode='test')

    current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_dir = model_save_path / "tensorboard" / current_time

    # lr  = tf.optimizers.schedules.ExponentialDecay(mu_i, mu_n, mu_f / mu_i,   name="lr_schedule"   )
    lr = tf.optimizers.schedules.PolynomialDecay(mu_i,
                                                 mu_n,
                                                 mu_f,
                                                 name="lr_schedule")
    sigma = tf.optimizers.schedules.PolynomialDecay(sigma_i,
                                                    sigma_n,
                                                    sigma_f,
                                                    name="sigma_schedule")
    optimizer = tf.optimizers.Adam(learning_rate=lr)

    model = GenerativeQueryNetwork(args.gqn_x_dim,
                                   args.gqn_r_dim,
                                   args.gqn_h_dim,
                                   args.gqn_z_dim,
                                   args.gqn_l,
                                   name="gqn")
    model.compile(optimizer, sigma, const_sigma=sigma_f)
    model.fit(train_data,
              validation_data=test_data,
              validation_steps=5,
              steps_per_epoch=S_epoch,
              epochs=num_epochs,
              callbacks=[
                  tf.keras.callbacks.TensorBoard(log_dir=str(tensorboard_dir),
                                                 update_freq=20,
                                                 histogram_freq=1),
                  tf.keras.callbacks.ModelCheckpoint(
                      str(model_save_path / "ckpt-e{epoch:02d}"),
                      save_freq=checkpoint_every,
                      verbose=1),
                  LogImages(tensorboard_dir, test_data),
              ])
Beispiel #3
0
def main():
    args = PARSER.parse_args()

    data_dir = get_path(args, "series")
    train_data_path = data_dir / "series.npz"
    val_data_path = data_dir / "series_validation.npz"

    train_data = load_data(train_data_path)
    validation_data = load_data(val_data_path)

    initial_z_dir = get_path(args, "tf_initial_z", create=True)
    create_initial_z(initial_z_dir, train_data)

    train_dataset, validation_dataset = create_dataset(
        train_data, validation_data, args.rnn_batch_size, args.rnn_max_seq_len,
        args.z_size, args.rnn_input_seq_width, args.rnn_predict_done,
        args.rnn_predict_reward)
    train_rnn(args, train_dataset, validation_dataset)
Beispiel #4
0
    def __init__(self, env, silent=False):
        super().__init__(env)

        from vae.vae import CVAE
        from utils import PARSER
        args = PARSER.parse_args(['--config_path', 'configs/carracing.config'])
        model_path_name = "models/tf_vae"

        self.vae = CVAE(args)

        # self.vae.set_weights(tf.keras.models.load_model(
        #     model_path_name, compile=False).get_weights())

        self.vae.set_weights(np.load("vae_weights.npy", allow_pickle=True))

        self.observation_space = Box(low=float("-inf"),
                                     high=float("inf"),
                                     shape=(41, ))
        self.silent = silent
Beispiel #5
0
            s, r, done, info = env.step(a)
            total_reward += r
            if steps % 200 == 0 or done:
                print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
                print("step {} total_reward {:+0.2f}".format(
                    steps, total_reward))
            steps += 1
            env.render()
            if done or restart: break
    env.monitor.close()


if __name__ == '__main__':
    import configargparse
    from utils import PARSER
    args = PARSER.parse_args()

    #true_env = make_env(args, dream_env=False, with_obs=True)
    run_caracing_by_hunman()

# from ppaquette_gym_doom.doom_take_cover import DoomTakeCoverEnv
# from gym.utils import seeding
# class DoomTakeCoverMDNRNN(DoomTakeCoverEnv):
#   def __init__(self, args, render_mode=False, load_model=True, with_obs=False):
#     super(DoomTakeCoverMDNRNN, self).__init__()

#     self.with_obs = with_obs

#     self.no_render = True
#     if render_mode:
#       self.no_render = False
Beispiel #6
0
import matplotlib.animation
import matplotlib.pyplot as plt
import wrappers
import gym
import car_racing_environment
import numpy as np
import os
from PIL import Image
import json
import tensorflow as tf
import random
from vae.vae import CVAE
# from env import make_env
from utils import PARSER
args = PARSER.parse_args(['--config_path', 'configs/carracing.config'])

import pygame
pygame.init()
screen = pygame.display.set_mode((600, 300))

frame_skip = 3
seed = 2
env = wrappers.EvaluationWrapper(wrappers.VaeCarWrapper(
    gym.make("CarRacingSoftFS{}-v0".format(frame_skip))),
                                 seed,
                                 evaluate_for=15,
                                 report_each=1)

DATA_DIR = "export"
model_path_name = "models/tf_vae".format(args.exp_name, args.env_name)
vae = CVAE(args)
Beispiel #7
0
def main():
    print("Setting niceness to 19")
    if "nice" in os.__dict__:
        os.nice(19)

    args = PARSER.parse_args()

    def make_env_with_args():
        return make_env(args=args, keep_image=True, wrap_rnn=False)

    dir_name = get_path(args, "record", create=True)

    controller = None
    if args.extract_use_controller:
        controller = make_controller(args=args)
    env = make_env_with_args()

    has_camera_data = isinstance(
        env.observation_space,
        gym.spaces.Dict) and "camera" in env.observation_space.spaces

    format_str = "[{success:s}] {done:s} after {frames:4d} frames, reward {reward:6.1f} " \
                 "(Total: {total_frames:7d} frames, {successful_trials:3d}/{total_trials:3d} successful trials)"

    total_frames = 0
    successful_trials = 0
    for trial in range(args.max_trials):
        try:
            seed = random.randint(0, 2**31 - 1)
            filename = dir_name / (str(seed) + ".npz")

            np.random.seed(seed)
            env.seed(seed)

            recording_image = []
            recording_camera = []
            recording_action = []
            recording_reward = []
            recording_done = []

            # random policy
            if args.extract_use_controller:
                controller.init_random_model_params(stddev=np.random.rand() *
                                                    0.01)
            repeat_action = np.random.randint(1, 11)
            action = [0] * args.a_width

            total_reward = 0
            obs = env.reset()

            frame = 0
            ended_early = False
            for frame in range(args.max_frames):
                # Save current observation
                recording_image.append(obs["image"])
                if has_camera_data:
                    recording_camera.append(obs["camera"])

                # Get next action (random)
                if not args.extract_repeat_actions or frame % repeat_action == 0:
                    if args.extract_use_controller:
                        action = controller.get_action(obs["features"])
                    else:
                        action = np.random.rand(args.a_width) * 2.0 - 1.0
                    if args.extract_repeat_actions:
                        repeat_action = np.random.randint(1, 11)

                # Save action
                recording_action.append(action)

                # Perform action
                obs, reward, done, _info = env.step(action)
                total_reward += reward

                # Save reward and done flag
                recording_reward.append(reward)
                recording_done.append(done)

                # Stop when done
                if done:
                    ended_early = True
                    break

            total_frames += (frame + 1)
            enough_frames = len(recording_image) >= args.min_frames

            # Save episode to disk (if it has required minimum length)
            if enough_frames:
                successful_trials += 1

                recording_image = np.array(recording_image, dtype=np.uint8)
                recording_camera = np.array(recording_camera, dtype=np.float16)
                recording_action = np.array(recording_action, dtype=np.float16)
                recording_reward = np.array(recording_reward, dtype=np.float16)
                recording_done = np.array(recording_done, dtype=np.bool)

                data = {
                    "image": recording_image,
                    "action": recording_action,
                    "reward": recording_reward,
                    "done": recording_done
                }
                if has_camera_data:
                    data["camera"] = recording_camera

                np.savez_compressed(str(filename), **data)

            print(
                format_str.format(success="O" if enough_frames else " ",
                                  done="Done" if ended_early else "Stop",
                                  frames=frame + 1,
                                  reward=total_reward,
                                  total_frames=total_frames,
                                  successful_trials=successful_trials,
                                  total_trials=trial + 1))

        except gym.error.Error as e:
            print("Gym raised an error: " + str(e))
            env.close()
            env = make_env_with_args()

    env.close()