def __init__(self, arglist): self.env_name = arglist.game self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json(arglist.vae_file) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.rnn.load_json(arglist.rnn_file) self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 2) self.bias_output = np.random.randn(2) self.param_count = ((self.input_size + 1) * self.hidden_size) + (self.hidden_size * 2 + 2) else: self.weight = np.random.randn(self.input_size, 2) self.bias = np.random.randn(2) self.param_count = (self.input_size) * 2 + 2 self.render_mode = False
def train_rnn(args, train_dataset, validation_dataset): model_save_path = get_path(args, "tf_rnn", create=True) rnn = MDNRNN(args=args) rnn.compile(optimizer=rnn.optimizer, loss=rnn.loss_fn, metrics=rnn.get_metrics()) print("Start training") current_time = datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_dir = model_save_path / "tensorboard" / current_time rnn.fit(train_dataset, validation_data=validation_dataset, steps_per_epoch=args.rnn_epoch_steps, epochs=args.rnn_num_steps // args.rnn_epoch_steps, callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=str(tensorboard_dir), update_freq=20, histogram_freq=1, profile_batch=0), tf.keras.callbacks.ModelCheckpoint(str(model_save_path / "ckpt-e{epoch:03d}"), verbose=1), ]) rnn.save(str(model_save_path)) print(f"Model saved to {model_save_path}")
def __init__(self, args, load_model=True, full_episode=False, with_obs=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.with_obs = with_obs # whether or not to return the frame with the encodings self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights( tf.keras.models.load_model('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn.set_weights( tf.keras.models.load_model('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32 + 256))
def __init__(self, load_model=True): self.env_name = './VisualPushBlock_withBlock_z_info.x86_64' #'./VisualPushBlock.x86_64' self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = z_size if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer ###CHANGE is made here self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, ACTION_SIZE) self.bias_output = np.random.randn(ACTION_SIZE) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( self.hidden_size * ACTION_SIZE + ACTION_SIZE) else: self.weight = np.random.randn(self.input_size, ACTION_SIZE) self.bias = np.random.randn(ACTION_SIZE) self.param_count = (self.input_size) * ACTION_SIZE + ACTION_SIZE self.render_mode = False
def __init__(self, args, load_model=True, full_episode=False, with_obs=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.with_obs = with_obs # whether or not to return the frame with the encodings self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([ param_i.numpy() for param_i in tf.saved_model.load( 'results/{}/tf_vae'.format(args.env_name)).variables ]) self.rnn.set_weights([ param_i.numpy() for param_i in tf.saved_model.load( 'results/{}/tf_rnn'.format(args.env_name)).variables ]) self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(args.z_size + args.rnn_size * args.state_space))
def __init__(self, args, render_mode=False, load_model=True): self.render_mode = render_mode model_path_name = 'results/{}/{}'.format(args.exp_name, args.env_name) with open(os.path.join(model_path_name, 'tf_initial_z/initial_z.json'), 'r') as f: [initial_mu, initial_logvar] = json.load(f) self.initial_mu_logvar = np.array( [list(elem) for elem in zip(initial_mu, initial_logvar)]) self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights( tf.keras.models.load_model('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn.set_weights( tf.keras.models.load_model('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name), compile=False).get_weights()) # future versions of OpenAI gym needs a dtype=np.float32 in the next line: self.action_space = Box(low=-1.0, high=1.0, shape=()) obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space # future versions of OpenAI gym needs a dtype=np.float32 in the next line: self.observation_space = Box(low=-50., high=50., shape=(obs_size, )) self.rnn_states = None self.o = None self.seed() self.reset()
def __init__(self, load_model=True): # For Mac # self.env_name = "/Users/intuinno/codegit/pushBlock/app/mac/VisualPushBlockContinuous" # For linux self.env_name = "/home/intuinno/codegit/pushblock/app/linux/pushblock.x86_64" self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size)*3+3 self.render_mode = False
def __init__(self): self.env_name = "carracing" self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json('vae/vae.json') self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size)*3+3 self.render_mode = False
def __init__(self, arglist, action_space, scope, load_model=True): self.action_space = action_space self.arglist = arglist self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_sample = hps_model._replace( batch_size=1, input_seq_width=32 + arglist.action_space + (arglist.agent_num - 1) * arglist.action_space * arglist.timestep, max_seq_len=1, use_recurrent_dropout=0, is_training=0) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json(arglist.vae_model_dir) self.rnn.load_json(arglist.rnn_model_dir) self.state = rnn_init_state(self.rnn) self.rnn_mode = True if arglist.inference: self.input_size = rnn_output_size( EXP_MODE) + (arglist.agent_num - 1) * arglist.action_space else: self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 # action trajectories recording self.act_traj = [ collections.deque(np.zeros( (arglist.timestep, arglist.action_space)), maxlen=arglist.timestep) ] * (arglist.agent_num - 1) self.oppo_model = Oppo_Model(arglist.agent_num, arglist.timestep, arglist.action_space, arglist.action_space, "oppo_model_{}".format(scope)) self.inference = arglist.inference if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, self.action_space) self.bias_output = np.random.randn(self.action_space) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( self.hidden_size * self.action_space + self.action_space) else: self.weight = np.random.randn(self.input_size, self.action_space) self.bias = np.random.randn(self.action_space) self.param_count = ( self.input_size) * self.action_space + self.action_space
def __init__(self, load_model=True, full_episode=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.vae = CVAE(batch_size=1) self.rnn = MDNRNN(hps_sample) if load_model: self.vae.load_json('tf_vae/vae.json') self.rnn.load_json('tf_rnn/rnn.json') self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32+256))
def __init__(self, args, render_mode=False, load_model=True, with_obs=False): super(DoomTakeCoverMDNRNN, self).__init__() self.with_obs = with_obs self.no_render = True if render_mode: self.no_render = False self.current_obs = None self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name)).variables ]) self.rnn.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name)).variables ]) self.action_space = Box(low=-1.0, high=1.0, shape=()) self.obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space self.observation_space = Box(low=0, high=255, shape=(64, 64, 3)) self.actual_observation_space = Box(low=-50., high=50., shape=(self.obs_size)) self._seed() self.rnn_states = None self.z = None self.restart = None self.frame_count = None self.viewer = None self._reset()
def __init__(self, model_name='', load_model=True, load_full_model=False, full_model_path=''): self.model_name = model_name self.env_name = "carracing" self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_full_model: self.vae.load_json(os.path.join(full_model_path, 'vae.json')) self.rnn.load_json(os.path.join(full_model_path, 'rnn.json')) elif load_model: self.vae.load_json( os.path.join(vae_path, self.model_name + '_vae.json')) self.rnn.load_json( os.path.join(rnn_path, self.model_name + '_rnn.json')) self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size + 1) * self.hidden_size) + (self.hidden_size * 3 + 3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size) * 3 + 3 self.render_mode = False
def __init__(self, type="CarRacing", history_pick=4, seed=None, detect_edges=False, detect_grass=False, flip=False): self.name = type + str(time.time()) random.seed(30) self.env = make_env('CarRacing-v0', random.randint(1,10000000), render_mode = False, full_episode = True) self.image_dimension = [64,64] self.history_pick = history_pick self.state_space_size = history_pick * np.prod(self.image_dimension) self.action_space_size = 5 self.state_shape = [None, self.history_pick] + list(self.image_dimension) self.history = [] self.action_dict = {0: [-1, 0, 0], 1: [1, 0, 0], 2: [0, 1, 0], 3: [0, 0, 0.8], 4: [0, 0, 0]} self.seed = seed self.detect_edges = detect_edges self.detect_grass = detect_grass self.flip = flip self.flip_episode = False self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json')
def __init__(self, sess=None, summary_writer=tf.summary.FileWriter("logs/"), rl_training=False, reuse=False, cluster=None, index=0, device='/gpu:0', ppo_load_path=None, ppo_save_path=None, load_worldmodel=True, ntype='worldmodel'): self.policy_model_path_load = ppo_load_path + ntype self.policy_model_path_save = ppo_save_path + ntype self.rl_training = rl_training self.use_norm = True self.reuse = reuse self.sess = sess self.cluster = cluster self.index = index self.device = device self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_worldmodel: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.input_size = rnn_output_size(EXP_MODE) self._create_graph() self.rl_saver = tf.train.Saver() self.summary_writer = summary_writer
def __init__(self, load_model=True): self.env_name = "Pong" self._make_env() self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_sample_dynamic = hps_sample._replace(num_actions=self.num_actions) self.rnn = MDNRNN(hps_sample_dynamic, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer raise Exception("not ported for atari") self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, self.num_actions) self.bias_output = np.random.randn(self.num_actions) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( (self.hidden_size + 1) * self.num_actions) else: # TODO: Not known until env.action_space is queried... self.weight = np.random.randn(self.input_size, self.num_actions) self.bias = np.random.randn(self.num_actions) self.param_count = (self.input_size + 1) * self.num_actions self.render_mode = False
def __init__(self, load_model=True, env_name="Pong-v0", render_mode=False): self.env_name = env_name self.make_env() self.z_size = 32 self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_atari = hps_sample._replace(input_seq_width=self.z_size + self.na) self.rnn = MDNRNN(hps_atari, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.init_controller() self.render_mode = False
from baselines.ddpg.models import Actor, Critic from baselines.ddpg.memory import Memory from baselines.ddpg.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise from baselines.common import set_global_seeds import baselines.common.tf_util as U from baselines import logger import numpy as np try: from mpi4py import MPI except ImportError: MPI = None vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) vae.load_json('vae/vae.json') rnn.load_json('rnn/rnn.json') def learn(network, env, seed=None, total_timesteps=None, nb_epochs=None, # with default settings, perform 1M steps total nb_epoch_cycles=20, nb_rollout_steps=100, reward_scale=1.0, render=False, render_eval=False, noise_type='adaptive-param_0.2', normalize_returns=False,
N_data = len(data_mu) # should be 10k batch_size = hps_model.batch_size # save 1000 initial mu and logvars: initial_mu = np.copy(data_mu[:1000, 0, :] * 10000).astype(np.int).tolist() initial_logvar = np.copy(data_logvar[:1000, 0, :] * 10000).astype( np.int).tolist() with open(os.path.join("tf_initial_z", "initial_z.json"), 'wt') as outfile: json.dump([initial_mu, initial_logvar], outfile, sort_keys=True, indent=0, separators=(',', ': ')) reset_graph() rnn = MDNRNN(hps_model) # train loop: hps = hps_model start = time.time() for local_step in range(hps.num_steps): step = rnn.sess.run(rnn.global_step) curr_learning_rate = (hps.learning_rate - hps.min_learning_rate) * ( hps.decay_rate)**step + hps.min_learning_rate raw_z, raw_a = random_batch() inputs = np.concatenate((raw_z[:, :-1, :], raw_a[:, :-1, :]), axis=2) outputs = raw_z[:, 1:, :] # teacher forcing (shift by one predictions) feed = {
indent=0, separators=(',', ': ')) def random_batch(): indices = np.random.permutation(N_data)[0:args.rnn_batch_size] # suboptimal b/c we are always only taking first set of steps mu = data_mu[indices][:, :args.rnn_max_seq_len] logvar = data_logvar[indices][:, :args.rnn_max_seq_len] action = data_action[indices][:, :args.rnn_max_seq_len] z = sample_vae(mu, logvar) d = tf.cast(data_d[indices], tf.float16)[:, :args.rnn_max_seq_len] return z, action, d rnn = MDNRNN(args=args) rnn.compile(optimizer=rnn.optimizer, loss=rnn.get_loss()) # train loop: start = time.time() step = 0 input_spec = tf.TensorSpec([1, args.max_frames, args.rnn_input_seq_width], tf.float32) rnn._set_inputs(input_spec) tf.keras.models.save_model(rnn, model_save_path, include_optimizer=True, save_format='tf') for step in range(args.rnn_num_steps): curr_learning_rate = (
def make_env(args, dream_env: bool = False, seed: Optional[int] = None, keep_image: bool = False, wrap_rnn: bool = True, load_model: bool = True): # Prepares an environment that matches the expected format: # - The environment returns a 64x64 image in observation["image"] # and camera data (x, y, z, pitch, yaw) in observation["camera"] # - If wrapped in the RNN, observation["features"] returns the RNN output to be used for the controller # - A dream environment simulates the actual environment using the RNN. It never returns an image # (because the actual environment doesn't get run) and only returns the features # - A wrapped environment always returns the features, and can return the original image when keep_image is True full_episode = args.full_episode # Initialize VAE and MDNRNN networks if dream_env or wrap_rnn: features_mode = FeatureMode.MODE_ZCH if args.state_space == 2 else FeatureMode.MODE_ZH if args.use_gqn: encoder = GenerativeQueryNetwork(args.gqn_x_dim, args.gqn_r_dim, args.gqn_h_dim, args.gqn_z_dim, args.gqn_l, name="gqn") encoder_path = get_path(args, "tf_gqn") else: encoder = CVAE(args) encoder_path = get_path(args, "tf_vae") rnn = MDNRNN(args) rnn_path = get_path(args, "tf_rnn") # TODO: Is this still needed? Do we ever NOT load the model? if load_model: encoder.load_weights(str(encoder_path)) rnn.load_weights(str(rnn_path)) if dream_env: assert keep_image is False, "Dream environment doesn't support image observations" import json initial_z_dir = get_path(args, "tf_initial_z") if args.use_gqn: initial_z_path = initial_z_dir / "initial_z_gqn.json" with open(str(initial_z_path), 'r') as f: initial_z = json.load(f) else: initial_z_path = initial_z_dir / "initial_z_vae.json" with open(str(initial_z_path), 'r') as f: [initial_mu, initial_logvar] = json.load(f) # This could probably be done more efficiently initial_z = np.array([list(elem) for elem in zip(initial_mu, initial_logvar)], dtype=np.float) # Create dream environment # noinspection PyUnboundLocalVariable env = DreamEnv(initial_z, args.z_size, rnn, features_mode) else: # Create real environment kwargs = {} if args.env_name.startswith("VizdoomTakeCover"): kwargs["position"] = True # Include position data as observation for Vizdoom environment print("Making environment {}...".format(args.env_name)) env = gym.make(args.env_name, **kwargs) print("Raw environment:", env) from gym.envs.box2d import CarRacing from vizdoomgym.envs import VizdoomTakeCover from gym_minigrid.minigrid import MiniGridEnv if isinstance(env.unwrapped, CarRacing): # Accept actions in the required format env = CarRacingActionWrapper(env) # Transform CarRacing observations into expected format and add camera data env = CarRacingObservationWrapper(env) # Cut off "status bar" at the bottom of CarRacing observation (copied from original paper) env = ClipPixelObservationWrapper(env, (slice(84),)) elif isinstance(env.unwrapped, VizdoomTakeCover): # Accept actions in the required format env = VizdoomTakeCoverActionWrapper(env) # Transform Vizdoom observations into expected format env = VizdoomObservationWrapper(env) # Cut off "status bar" at the bottom of the screen (copied from original paper) env = ClipPixelObservationWrapper(env, (slice(400),)) elif isinstance(env.unwrapped, MiniGridEnv): from gym_minigrid.wrappers import RGBImgPartialObsWrapper # Accept actions in the required format env = MiniGridActionWrapper(env) # Get RGB image observations from the agent's viewpoint # (7x7 grid of tiles, with tile size 9 this results in a 63x63 image) env = RGBImgPartialObsWrapper(env, tile_size=9) # Add camera data to the observation env = MiniGridObservationWrapper(env) # Pad image to 64x64 to match the requirements (in effect just adding one row at the right and bottom edge # with repeated values from the edge) env = PadPixelObservationWrapper(env, target_size=64) else: env = PixelObservationWrapper(env, pixel_keys=("image",)) if env.observation_space["image"].shape[:2] != (64, 64): # Resize image to 64x64 env = ResizePixelObservationWrapper(env, size=(64, 64)) # Wrap in RNN to add features to observation if wrap_rnn: # noinspection PyUnboundLocalVariable env = MDNRNNWrapper(env, encoder, rnn, keep_image=keep_image, features_mode=features_mode) # TODO: Is this needed? It was only ever implemented for CarRacing and didn't work # Force done=False if full_episode is True if full_episode: env = NoEarlyStopWrapper(env) # Set seed if given if seed is not None: env.seed(seed) print("Wrapped environment:", env) return env
reset_graph() # Third, Build the VAE vae = ConvVAE(z_size=z_size, batch_size=1, is_training=False, reuse=False, gpu_mode=False) vae.load_json(os.path.join('vae', 'vae.json')) # Fourth, build the RNN hps_atari_sample = hps_sample._replace(input_seq_width=z_size+na) OUTWIDTH = hps_atari_sample.output_seq_width rnn = MDNRNN(hps_atari_sample, gpu_mode=False) rnn.load_json(os.path.join('rnn', 'rnn.json')) print("All model loaded.") # Fifth, run the evaluation. -> We have no predictions about the first frame. start = time.time() state = rnn_init_state(rnn) # initialize the state. pz = None for i in range(steps): ob = obs[i:i+1] # (1, 64, 64, 1) action = oh_actions[i:i+1] # (1, n)