class CarRacingMDNRNN(CarRacingWrapper): def __init__(self, args, load_model=True, full_episode=False, with_obs=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.with_obs = with_obs # whether or not to return the frame with the encodings self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format(args.exp_name, args.env_name)).variables]) self.rnn.set_weights([param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format(args.exp_name, args.env_name)).variables]) self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(args.z_size+args.rnn_size*args.state_space)) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) z = self.vae.encode(result)[0] return z def reset(self): self.rnn_states = rnn_init_state(self.rnn) if self.with_obs: [z_state, obs] = super(CarRacingMDNRNN, self).reset() # calls step self.N_tiles = len(self.track) return [z_state, obs] else: z_state = super(CarRacingMDNRNN, self).reset() # calls step self.N_tiles = len(self.track) return z_state def _step(self, action): obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action) z = tf.squeeze(self.encode_obs(obs)) h = tf.squeeze(self.rnn_states[0]) c = tf.squeeze(self.rnn_states[1]) if self.rnn.args.state_space == 2: z_state = tf.concat([z, c, h], axis=-1) else: z_state = tf.concat([z, h], axis=-1) if action is not None: # don't compute state on reset self.rnn_states = rnn_next_state(self.rnn, z, action, self.rnn_states) if self.with_obs: return [z_state, obs], reward, done, {} else: return z_state, reward, done, {} def close(self): super(CarRacingMDNRNN, self).close() tf.keras.backend.clear_session() gc.collect()
class DreamDoomTakeCoverMDNRNN: def __init__(self, args, render_mode=False, load_model=True): self.render_mode = render_mode model_path_name = 'results/{}/{}'.format(args.exp_name, args.env_name) with open(os.path.join(model_path_name, 'tf_initial_z/initial_z.json'), 'r') as f: [initial_mu, initial_logvar] = json.load(f) self.initial_mu_logvar = np.array( [list(elem) for elem in zip(initial_mu, initial_logvar)]) self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name)).variables ]) self.rnn.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name)).variables ]) # future versions of OpenAI gym needs a dtype=np.float32 in the next line: self.action_space = Box(low=-1.0, high=1.0, shape=()) obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space # future versions of OpenAI gym needs a dtype=np.float32 in the next line: self.observation_space = Box(low=-50., high=50., shape=(obs_size, )) self.rnn_states = None self.o = None self._training = True self.seed() self.reset() def _sample_init_z(self): idx = self.np_random.randint(low=0, high=self.initial_mu_logvar.shape[0]) init_mu, init_logvar = self.initial_mu_logvar[idx] init_mu = init_mu / 10000.0 init_logvar = init_logvar / 10000.0 init_z = init_mu + np.exp( init_logvar / 2.0) * self.np_random.randn(*init_logvar.shape) return init_z def reset(self): self.rnn_states = rnn_init_state(self.rnn) z = np.expand_dims(self._sample_init_z(), axis=0) self.o = z z_ch = tf.concat([z, self.rnn_states[1], self.rnn_states[0]], axis=-1) return tf.squeeze(z_ch) def seed(self, seed=None): if seed: tf.random.set_seed(seed) self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): rnn_states_p1, z_tp1, r_tp1, d_tp1 = rnn_sim(self.rnn, self.o, self.rnn_states, action, training=self._training) self.rnn_states = rnn_states_p1 self.o = z_tp1 z_ch = tf.squeeze( tf.concat([z_tp1, self.rnn_states[1], self.rnn_states[0]], axis=-1)) return z_ch.numpy(), tf.squeeze(r_tp1), d_tp1.numpy(), {} def close(self): tf.keras.backend.clear_session() gc.collect() def render(self, mode): pass
class DoomTakeCoverMDNRNN(DoomTakeCoverEnv): def __init__(self, args, render_mode=False, load_model=True, with_obs=False): super(DoomTakeCoverMDNRNN, self).__init__() self.with_obs = with_obs self.no_render = True if render_mode: self.no_render = False self.current_obs = None self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name)).variables ]) self.rnn.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name)).variables ]) self.action_space = Box(low=-1.0, high=1.0, shape=()) self.obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space self.observation_space = Box(low=0, high=255, shape=(64, 64, 3)) self.actual_observation_space = Box(low=-50., high=50., shape=(self.obs_size)) self._seed() self.rnn_states = None self.z = None self.restart = None self.frame_count = None self.viewer = None self._reset() def close(self): super(DoomTakeCoverMDNRNN, self).close() tf.keras.backend.clear_session() gc.collect() def _step(self, action): # update states of rnn self.frame_count += 1 self.rnn_states = rnn_next_state(self.rnn, self.z, action, self.rnn_states) # actual action in wrapped env: threshold = 0.3333 full_action = [0] * 43 if action < -threshold: full_action[11] = 1 if action > threshold: full_action[10] = 1 obs, reward, done, _ = super(DoomTakeCoverMDNRNN, self)._step(full_action) small_obs = self._process_frame(obs) self.current_obs = small_obs self.z = self._encode(small_obs) if done: self.restart = 1 else: self.restart = 0 if self.with_obs: return [self._current_state(), self.current_obs], reward, done, {} else: return self._current_state(), reward, done, {} def _encode(self, img): simple_obs = np.copy(img).astype(np.float) / 255.0 simple_obs = simple_obs.reshape(1, 64, 64, 3) z = self.vae.encode(simple_obs)[0] return z def _reset(self): obs = super(DoomTakeCoverMDNRNN, self)._reset() small_obs = self._process_frame(obs) self.current_obs = small_obs self.rnn_states = rnn_init_state(self.rnn) self.z = self._encode(small_obs) self.restart = 1 self.frame_count = 0 if self.with_obs: return [self._current_state(), self.current_obs] else: return self._current_state() def _process_frame(self, frame): obs = frame[0:400, :, :] obs = Image.fromarray(obs, mode='RGB').resize((64, 64)) obs = np.array(obs) return obs def _current_state(self): if self.rnn.args.state_space == 2: return np.concatenate([ self.z, tf.keras.backend.flatten(self.rnn_states[1]), tf.keras.backend.flatten(self.rnn_states[0]) ], axis=0) # cell then hidden fro some reason return np.concatenate( [self.z, tf.keras.backend.flatten(self.rnn_states[0])], axis=0) # only the hidden state def _seed(self, seed=None): if seed: tf.random.set_seed(seed) self.np_random, seed = seeding.np_random(seed) return [seed]
class CarRacingMDNRNN(CarRacingWrapper): def __init__(self, args, load_model=True, full_episode=False, with_obs=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.with_obs = with_obs # whether or not to return the frame with the encodings self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights( tf.keras.models.load_model('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn.set_weights( tf.keras.models.load_model('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32 + 256)) def encode_obs(self, obs): # convert raw obs to z, mu, logvar obs = self._process_frame(obs) result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 64, 64, 4) z = self.vae.encode(result)[0] return z def reset(self): self.rnn_states = rnn_init_state(self.rnn) obs = super(CarRacingMDNRNN, self).reset() obs = self._process_frame(obs) z = self.encode_obs(obs) h = tf.squeeze(self.rnn_states[0]) z_h = tf.concat([z, h], axis=-1) if self.with_obs: return [z_h, obs] else: z_h = super(CarRacingMDNRNN, self).reset() # calls step return z_h def _step(self, action): obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action) z = self.encode_obs(obs) h = tf.squeeze(self.rnn_states[0]) z_h = tf.concat([z, h], axis=-1) if action is not None: # don't compute state on reset self.rnn_states = rnn_next_state(self.rnn, z, action, self.rnn_states) if self.with_obs: return [z_h, obs], reward, done, {} else: return z_h, reward, done, {} def close(self): super(CarRacingMDNRNN, self).close() tf.keras.backend.clear_session() gc.collect()