Example #1
0
    def __init__(self, arglist):
        self.env_name = arglist.game
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.vae.load_json(arglist.vae_file)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
        self.rnn.load_json(arglist.rnn_file)
        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 2)
            self.bias_output = np.random.randn(2)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 2 + 2)
        else:
            self.weight = np.random.randn(self.input_size, 2)
            self.bias = np.random.randn(2)
            self.param_count = (self.input_size) * 2 + 2

        self.render_mode = False
Example #2
0
def train_rnn(args, train_dataset, validation_dataset):
    model_save_path = get_path(args, "tf_rnn", create=True)

    rnn = MDNRNN(args=args)
    rnn.compile(optimizer=rnn.optimizer,
                loss=rnn.loss_fn,
                metrics=rnn.get_metrics())

    print("Start training")

    current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_dir = model_save_path / "tensorboard" / current_time

    rnn.fit(train_dataset,
            validation_data=validation_dataset,
            steps_per_epoch=args.rnn_epoch_steps,
            epochs=args.rnn_num_steps // args.rnn_epoch_steps,
            callbacks=[
                tf.keras.callbacks.TensorBoard(log_dir=str(tensorboard_dir),
                                               update_freq=20,
                                               histogram_freq=1,
                                               profile_batch=0),
                tf.keras.callbacks.ModelCheckpoint(str(model_save_path /
                                                       "ckpt-e{epoch:03d}"),
                                                   verbose=1),
            ])

    rnn.save(str(model_save_path))
    print(f"Model saved to {model_save_path}")
Example #3
0
    def __init__(self,
                 args,
                 load_model=True,
                 full_episode=False,
                 with_obs=False):
        super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
        self.with_obs = with_obs  # whether or not to return the frame with the encodings
        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())
            self.rnn.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())

        self.rnn_states = rnn_init_state(self.rnn)

        self.full_episode = False
        self.observation_space = Box(low=np.NINF,
                                     high=np.Inf,
                                     shape=(32 + 256))
Example #4
0
    def __init__(self, load_model=True):
        self.env_name = './VisualPushBlock_withBlock_z_info.x86_64'  #'./VisualPushBlock.x86_64'
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = z_size

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer ###CHANGE is made here
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, ACTION_SIZE)
            self.bias_output = np.random.randn(ACTION_SIZE)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                self.hidden_size * ACTION_SIZE + ACTION_SIZE)
        else:
            self.weight = np.random.randn(self.input_size, ACTION_SIZE)
            self.bias = np.random.randn(ACTION_SIZE)
            self.param_count = (self.input_size) * ACTION_SIZE + ACTION_SIZE

        self.render_mode = False
Example #5
0
    def __init__(self,
                 args,
                 load_model=True,
                 full_episode=False,
                 with_obs=False):
        super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
        self.with_obs = with_obs  # whether or not to return the frame with the encodings
        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights([
                param_i.numpy() for param_i in tf.saved_model.load(
                    'results/{}/tf_vae'.format(args.env_name)).variables
            ])
            self.rnn.set_weights([
                param_i.numpy() for param_i in tf.saved_model.load(
                    'results/{}/tf_rnn'.format(args.env_name)).variables
            ])
        self.rnn_states = rnn_init_state(self.rnn)

        self.full_episode = False
        self.observation_space = Box(low=np.NINF,
                                     high=np.Inf,
                                     shape=(args.z_size +
                                            args.rnn_size * args.state_space))
Example #6
0
    def __init__(self, args, render_mode=False, load_model=True):

        self.render_mode = render_mode
        model_path_name = 'results/{}/{}'.format(args.exp_name, args.env_name)
        with open(os.path.join(model_path_name, 'tf_initial_z/initial_z.json'),
                  'r') as f:
            [initial_mu, initial_logvar] = json.load(f)

        self.initial_mu_logvar = np.array(
            [list(elem) for elem in zip(initial_mu, initial_logvar)])

        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())
            self.rnn.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())

        # future versions of OpenAI gym needs a dtype=np.float32 in the next line:
        self.action_space = Box(low=-1.0, high=1.0, shape=())
        obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space
        # future versions of OpenAI gym needs a dtype=np.float32 in the next line:
        self.observation_space = Box(low=-50., high=50., shape=(obs_size, ))

        self.rnn_states = None
        self.o = None

        self.seed()
        self.reset()
Example #7
0
  def __init__(self, load_model=True):
    # For Mac
    # self.env_name = "/Users/intuinno/codegit/pushBlock/app/mac/VisualPushBlockContinuous"
    # For linux
    self.env_name = "/home/intuinno/codegit/pushblock/app/linux/pushblock.x86_64"
    self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)

    self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

    if load_model:
      self.vae.load_json('vae/vae.json')
      self.rnn.load_json('rnn/rnn.json')

    self.state = rnn_init_state(self.rnn)
    self.rnn_mode = True

    self.input_size = rnn_output_size(EXP_MODE)
    self.z_size = 32


    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      self.hidden_size = 40
      self.weight_hidden = np.random.randn(self.input_size, self.hidden_size)
      self.bias_hidden = np.random.randn(self.hidden_size)
      self.weight_output = np.random.randn(self.hidden_size, 3)
      self.bias_output = np.random.randn(3)
      self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3)
    else:
      self.weight = np.random.randn(self.input_size, 3)
      self.bias = np.random.randn(3)
      self.param_count = (self.input_size)*3+3

    self.render_mode = False
Example #8
0
  def __init__(self):
    self.env_name = "carracing"
    self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
    self.vae.load_json('vae/vae.json')
    self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
    self.rnn.load_json('rnn/rnn.json')
    self.state = rnn_init_state(self.rnn)
    self.rnn_mode = True

    self.input_size = rnn_output_size(EXP_MODE)
    self.z_size = 32

    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      self.hidden_size = 40
      self.weight_hidden = np.random.randn(self.input_size, self.hidden_size)
      self.bias_hidden = np.random.randn(self.hidden_size)
      self.weight_output = np.random.randn(self.hidden_size, 3)
      self.bias_output = np.random.randn(3)
      self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3)
    else:
      self.weight = np.random.randn(self.input_size, 3)
      self.bias = np.random.randn(3)
      self.param_count = (self.input_size)*3+3

    self.render_mode = False
Example #9
0
    def __init__(self, arglist, action_space, scope, load_model=True):
        self.action_space = action_space
        self.arglist = arglist
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        hps_sample = hps_model._replace(
            batch_size=1,
            input_seq_width=32 + arglist.action_space +
            (arglist.agent_num - 1) * arglist.action_space * arglist.timestep,
            max_seq_len=1,
            use_recurrent_dropout=0,
            is_training=0)

        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json(arglist.vae_model_dir)
            self.rnn.load_json(arglist.rnn_model_dir)

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True
        if arglist.inference:
            self.input_size = rnn_output_size(
                EXP_MODE) + (arglist.agent_num - 1) * arglist.action_space
        else:
            self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        # action trajectories recording
        self.act_traj = [
            collections.deque(np.zeros(
                (arglist.timestep, arglist.action_space)),
                              maxlen=arglist.timestep)
        ] * (arglist.agent_num - 1)
        self.oppo_model = Oppo_Model(arglist.agent_num, arglist.timestep,
                                     arglist.action_space,
                                     arglist.action_space,
                                     "oppo_model_{}".format(scope))
        self.inference = arglist.inference

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size,
                                                 self.action_space)
            self.bias_output = np.random.randn(self.action_space)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                self.hidden_size * self.action_space + self.action_space)
        else:
            self.weight = np.random.randn(self.input_size, self.action_space)
            self.bias = np.random.randn(self.action_space)
            self.param_count = (
                self.input_size) * self.action_space + self.action_space
Example #10
0
  def __init__(self, load_model=True, full_episode=False):
    super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
    self.vae = CVAE(batch_size=1)
    self.rnn = MDNRNN(hps_sample)
     
    if load_model:
      self.vae.load_json('tf_vae/vae.json')
      self.rnn.load_json('tf_rnn/rnn.json')

    self.rnn_states = rnn_init_state(self.rnn)
    
    self.full_episode = False 
    self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32+256))
Example #11
0
    def __init__(self,
                 args,
                 render_mode=False,
                 load_model=True,
                 with_obs=False):
        super(DoomTakeCoverMDNRNN, self).__init__()

        self.with_obs = with_obs

        self.no_render = True
        if render_mode:
            self.no_render = False
        self.current_obs = None

        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name)).variables
            ])
            self.rnn.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name)).variables
            ])

        self.action_space = Box(low=-1.0, high=1.0, shape=())
        self.obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space

        self.observation_space = Box(low=0, high=255, shape=(64, 64, 3))
        self.actual_observation_space = Box(low=-50.,
                                            high=50.,
                                            shape=(self.obs_size))

        self._seed()

        self.rnn_states = None
        self.z = None
        self.restart = None
        self.frame_count = None
        self.viewer = None
        self._reset()
Example #12
0
    def __init__(self,
                 model_name='',
                 load_model=True,
                 load_full_model=False,
                 full_model_path=''):
        self.model_name = model_name
        self.env_name = "carracing"
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_full_model:
            self.vae.load_json(os.path.join(full_model_path, 'vae.json'))
            self.rnn.load_json(os.path.join(full_model_path, 'rnn.json'))
        elif load_model:
            self.vae.load_json(
                os.path.join(vae_path, self.model_name + '_vae.json'))
            self.rnn.load_json(
                os.path.join(rnn_path, self.model_name + '_rnn.json'))

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 3)
            self.bias_output = np.random.randn(3)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 3 + 3)
        else:
            self.weight = np.random.randn(self.input_size, 3)
            self.bias = np.random.randn(3)
            self.param_count = (self.input_size) * 3 + 3

        self.render_mode = False
Example #13
0
 def __init__(self, type="CarRacing", history_pick=4, seed=None, detect_edges=False, detect_grass=False, flip=False):
     self.name = type + str(time.time())
     random.seed(30)
     self.env = make_env('CarRacing-v0', random.randint(1,10000000), render_mode = False, full_episode = True)
     self.image_dimension = [64,64]
     self.history_pick = history_pick
     self.state_space_size = history_pick * np.prod(self.image_dimension)
     self.action_space_size = 5
     self.state_shape = [None, self.history_pick] + list(self.image_dimension)
     self.history = []
     self.action_dict = {0: [-1, 0, 0], 1: [1, 0, 0], 2: [0, 1, 0], 3: [0, 0, 0.8], 4: [0, 0, 0]}
     self.seed = seed
     self.detect_edges = detect_edges
     self.detect_grass = detect_grass
     self.flip = flip
     self.flip_episode = False
     self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
     self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
     self.vae.load_json('vae/vae.json')
     self.rnn.load_json('rnn/rnn.json')
    def __init__(self,
                 sess=None,
                 summary_writer=tf.summary.FileWriter("logs/"),
                 rl_training=False,
                 reuse=False,
                 cluster=None,
                 index=0,
                 device='/gpu:0',
                 ppo_load_path=None,
                 ppo_save_path=None,
                 load_worldmodel=True,
                 ntype='worldmodel'):
        self.policy_model_path_load = ppo_load_path + ntype
        self.policy_model_path_save = ppo_save_path + ntype

        self.rl_training = rl_training

        self.use_norm = True

        self.reuse = reuse
        self.sess = sess
        self.cluster = cluster
        self.index = index
        self.device = device

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_worldmodel:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.input_size = rnn_output_size(EXP_MODE)

        self._create_graph()

        self.rl_saver = tf.train.Saver()
        self.summary_writer = summary_writer
Example #15
0
    def __init__(self, load_model=True):
        self.env_name = "Pong"
        self._make_env()

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        hps_sample_dynamic = hps_sample._replace(num_actions=self.num_actions)
        self.rnn = MDNRNN(hps_sample_dynamic, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            raise Exception("not ported for atari")
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size,
                                                 self.num_actions)
            self.bias_output = np.random.randn(self.num_actions)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                (self.hidden_size + 1) * self.num_actions)
        else:
            # TODO: Not known until env.action_space is queried...
            self.weight = np.random.randn(self.input_size, self.num_actions)
            self.bias = np.random.randn(self.num_actions)
            self.param_count = (self.input_size + 1) * self.num_actions

        self.render_mode = False
Example #16
0
    def __init__(self, load_model=True, env_name="Pong-v0", render_mode=False):
        self.env_name = env_name
        self.make_env()
        self.z_size = 32

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        hps_atari = hps_sample._replace(input_seq_width=self.z_size + self.na)
        self.rnn = MDNRNN(hps_atari, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.init_controller()

        self.render_mode = False
Example #17
0
from baselines.ddpg.models import Actor, Critic
from baselines.ddpg.memory import Memory
from baselines.ddpg.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise
from baselines.common import set_global_seeds
import baselines.common.tf_util as U

from baselines import logger
import numpy as np

try:
    from mpi4py import MPI
except ImportError:
    MPI = None

vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
vae.load_json('vae/vae.json')
rnn.load_json('rnn/rnn.json')


def learn(network, env,
          seed=None,
          total_timesteps=None,
          nb_epochs=None, # with default settings, perform 1M steps total
          nb_epoch_cycles=20,
          nb_rollout_steps=100,
          reward_scale=1.0,
          render=False,
          render_eval=False,
          noise_type='adaptive-param_0.2',
          normalize_returns=False,
N_data = len(data_mu)  # should be 10k
batch_size = hps_model.batch_size

# save 1000 initial mu and logvars:
initial_mu = np.copy(data_mu[:1000, 0, :] * 10000).astype(np.int).tolist()
initial_logvar = np.copy(data_logvar[:1000, 0, :] * 10000).astype(
    np.int).tolist()
with open(os.path.join("tf_initial_z", "initial_z.json"), 'wt') as outfile:
    json.dump([initial_mu, initial_logvar],
              outfile,
              sort_keys=True,
              indent=0,
              separators=(',', ': '))

reset_graph()
rnn = MDNRNN(hps_model)

# train loop:
hps = hps_model
start = time.time()
for local_step in range(hps.num_steps):

    step = rnn.sess.run(rnn.global_step)
    curr_learning_rate = (hps.learning_rate - hps.min_learning_rate) * (
        hps.decay_rate)**step + hps.min_learning_rate

    raw_z, raw_a = random_batch()
    inputs = np.concatenate((raw_z[:, :-1, :], raw_a[:, :-1, :]), axis=2)
    outputs = raw_z[:, 1:, :]  # teacher forcing (shift by one predictions)

    feed = {
Example #19
0
              indent=0,
              separators=(',', ': '))


def random_batch():
    indices = np.random.permutation(N_data)[0:args.rnn_batch_size]
    # suboptimal b/c we are always only taking first set of steps
    mu = data_mu[indices][:, :args.rnn_max_seq_len]
    logvar = data_logvar[indices][:, :args.rnn_max_seq_len]
    action = data_action[indices][:, :args.rnn_max_seq_len]
    z = sample_vae(mu, logvar)
    d = tf.cast(data_d[indices], tf.float16)[:, :args.rnn_max_seq_len]
    return z, action, d


rnn = MDNRNN(args=args)
rnn.compile(optimizer=rnn.optimizer, loss=rnn.get_loss())

# train loop:
start = time.time()
step = 0
input_spec = tf.TensorSpec([1, args.max_frames, args.rnn_input_seq_width],
                           tf.float32)
rnn._set_inputs(input_spec)
tf.keras.models.save_model(rnn,
                           model_save_path,
                           include_optimizer=True,
                           save_format='tf')

for step in range(args.rnn_num_steps):
    curr_learning_rate = (
Example #20
0
def make_env(args, dream_env: bool = False, seed: Optional[int] = None,
             keep_image: bool = False, wrap_rnn: bool = True, load_model: bool = True):
    # Prepares an environment that matches the expected format:
    # - The environment returns a 64x64 image in observation["image"]
    #   and camera data (x, y, z, pitch, yaw) in observation["camera"]
    # - If wrapped in the RNN, observation["features"] returns the RNN output to be used for the controller
    # - A dream environment simulates the actual environment using the RNN. It never returns an image
    #   (because the actual environment doesn't get run) and only returns the features
    # - A wrapped environment always returns the features, and can return the original image when keep_image is True

    full_episode = args.full_episode

    # Initialize VAE and MDNRNN networks
    if dream_env or wrap_rnn:
        features_mode = FeatureMode.MODE_ZCH if args.state_space == 2 else FeatureMode.MODE_ZH

        if args.use_gqn:
            encoder = GenerativeQueryNetwork(args.gqn_x_dim, args.gqn_r_dim,
                                             args.gqn_h_dim, args.gqn_z_dim, args.gqn_l, name="gqn")
            encoder_path = get_path(args, "tf_gqn")
        else:
            encoder = CVAE(args)
            encoder_path = get_path(args, "tf_vae")
        rnn = MDNRNN(args)
        rnn_path = get_path(args, "tf_rnn")

        # TODO: Is this still needed? Do we ever NOT load the model?
        if load_model:
            encoder.load_weights(str(encoder_path))
            rnn.load_weights(str(rnn_path))

    if dream_env:
        assert keep_image is False, "Dream environment doesn't support image observations"

        import json
        initial_z_dir = get_path(args, "tf_initial_z")
        if args.use_gqn:
            initial_z_path = initial_z_dir / "initial_z_gqn.json"
            with open(str(initial_z_path), 'r') as f:
                initial_z = json.load(f)
        else:
            initial_z_path = initial_z_dir / "initial_z_vae.json"
            with open(str(initial_z_path), 'r') as f:
                [initial_mu, initial_logvar] = json.load(f)
            # This could probably be done more efficiently
            initial_z = np.array([list(elem) for elem in zip(initial_mu, initial_logvar)], dtype=np.float)

        # Create dream environment
        # noinspection PyUnboundLocalVariable
        env = DreamEnv(initial_z, args.z_size, rnn, features_mode)

    else:
        # Create real environment
        kwargs = {}
        if args.env_name.startswith("VizdoomTakeCover"):
            kwargs["position"] = True  # Include position data as observation for Vizdoom environment

        print("Making environment {}...".format(args.env_name))
        env = gym.make(args.env_name, **kwargs)
        print("Raw environment:", env)

        from gym.envs.box2d import CarRacing
        from vizdoomgym.envs import VizdoomTakeCover
        from gym_minigrid.minigrid import MiniGridEnv
        if isinstance(env.unwrapped, CarRacing):
            # Accept actions in the required format
            env = CarRacingActionWrapper(env)
            # Transform CarRacing observations into expected format and add camera data
            env = CarRacingObservationWrapper(env)
            # Cut off "status bar" at the bottom of CarRacing observation (copied from original paper)
            env = ClipPixelObservationWrapper(env, (slice(84),))
        elif isinstance(env.unwrapped, VizdoomTakeCover):
            # Accept actions in the required format
            env = VizdoomTakeCoverActionWrapper(env)
            # Transform Vizdoom observations into expected format
            env = VizdoomObservationWrapper(env)
            # Cut off "status bar" at the bottom of the screen (copied from original paper)
            env = ClipPixelObservationWrapper(env, (slice(400),))
        elif isinstance(env.unwrapped, MiniGridEnv):
            from gym_minigrid.wrappers import RGBImgPartialObsWrapper
            # Accept actions in the required format
            env = MiniGridActionWrapper(env)
            # Get RGB image observations from the agent's viewpoint
            # (7x7 grid of tiles, with tile size 9 this results in a 63x63 image)
            env = RGBImgPartialObsWrapper(env, tile_size=9)
            # Add camera data to the observation
            env = MiniGridObservationWrapper(env)
            # Pad image to 64x64 to match the requirements (in effect just adding one row at the right and bottom edge
            # with repeated values from the edge)
            env = PadPixelObservationWrapper(env, target_size=64)
        else:
            env = PixelObservationWrapper(env, pixel_keys=("image",))

        if env.observation_space["image"].shape[:2] != (64, 64):
            # Resize image to 64x64
            env = ResizePixelObservationWrapper(env, size=(64, 64))

        # Wrap in RNN to add features to observation
        if wrap_rnn:
            # noinspection PyUnboundLocalVariable
            env = MDNRNNWrapper(env, encoder, rnn, keep_image=keep_image, features_mode=features_mode)

    # TODO: Is this needed? It was only ever implemented for CarRacing and didn't work
    # Force done=False if full_episode is True
    if full_episode:
        env = NoEarlyStopWrapper(env)

    # Set seed if given
    if seed is not None:
        env.seed(seed)

    print("Wrapped environment:", env)
    return env
reset_graph()

# Third, Build the VAE
vae = ConvVAE(z_size=z_size,
              batch_size=1,
              is_training=False,
              reuse=False,
              gpu_mode=False)

vae.load_json(os.path.join('vae', 'vae.json'))


# Fourth, build the RNN
hps_atari_sample = hps_sample._replace(input_seq_width=z_size+na)
OUTWIDTH = hps_atari_sample.output_seq_width
rnn = MDNRNN(hps_atari_sample, gpu_mode=False)
rnn.load_json(os.path.join('rnn', 'rnn.json'))


print("All model loaded.")
# Fifth, run the evaluation. -> We have no predictions about the first frame.

start = time.time()

state = rnn_init_state(rnn) # initialize the state.
pz = None

for i in range(steps):

  ob = obs[i:i+1] # (1, 64, 64, 1)
  action = oh_actions[i:i+1] # (1, n)