def __init__(self, args, load_model=True, full_episode=False, with_obs=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.with_obs = with_obs # whether or not to return the frame with the encodings self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights( tf.keras.models.load_model('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn.set_weights( tf.keras.models.load_model('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32 + 256))
def __init__(self, args, render_mode=False, load_model=True): self.render_mode = render_mode model_path_name = 'results/{}/{}'.format(args.exp_name, args.env_name) with open(os.path.join(model_path_name, 'tf_initial_z/initial_z.json'), 'r') as f: [initial_mu, initial_logvar] = json.load(f) self.initial_mu_logvar = np.array( [list(elem) for elem in zip(initial_mu, initial_logvar)]) self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights( tf.keras.models.load_model('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn.set_weights( tf.keras.models.load_model('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name), compile=False).get_weights()) # future versions of OpenAI gym needs a dtype=np.float32 in the next line: self.action_space = Box(low=-1.0, high=1.0, shape=()) obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space # future versions of OpenAI gym needs a dtype=np.float32 in the next line: self.observation_space = Box(low=-50., high=50., shape=(obs_size, )) self.rnn_states = None self.o = None self.seed() self.reset()
def __init__(self, args, load_model=True, full_episode=False, with_obs=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.with_obs = with_obs # whether or not to return the frame with the encodings self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([ param_i.numpy() for param_i in tf.saved_model.load( 'results/{}/tf_vae'.format(args.env_name)).variables ]) self.rnn.set_weights([ param_i.numpy() for param_i in tf.saved_model.load( 'results/{}/tf_rnn'.format(args.env_name)).variables ]) self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(args.z_size + args.rnn_size * args.state_space))
def main(): args = PARSER.parse_args() data_path = get_path(args, "record") model_save_path = get_path(args, "tf_vae", create=True) ensure_validation_split(data_path) _n_train, _avg_frames, mean, var = analyse_dataset(data_path) if args.normalize_images: train_data, val_data = create_tf_dataset(data_path, args.z_size, True, mean, var) else: train_data, val_data = create_tf_dataset(data_path, args.z_size) shuffle_size = 5 * 1000 # Roughly 20 full episodes for shuffle windows, more increases RAM usage train_data = train_data.shuffle(shuffle_size, reshuffle_each_iteration=True).batch(args.vae_batch_size).prefetch(2) val_data = val_data.batch(args.vae_batch_size).prefetch(2) current_time = datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_dir = model_save_path / "tensorboard" / current_time vae = CVAE(args=args) vae.compile(optimizer=vae.optimizer, loss=vae.get_loss()) vae.fit(train_data, validation_data=val_data, epochs=args.vae_num_epoch, callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=str(tensorboard_dir), update_freq=50, histogram_freq=1), LogImage(str(tensorboard_dir), val_data), tf.keras.callbacks.ModelCheckpoint(str(model_save_path / "ckpt-e{epoch:02d}"), verbose=1), ]) vae.save(str(model_save_path))
class CarRacingMDNRNN(CarRacingWrapper): def __init__(self, args, load_model=True, full_episode=False, with_obs=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.with_obs = with_obs # whether or not to return the frame with the encodings self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format(args.exp_name, args.env_name)).variables]) self.rnn.set_weights([param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format(args.exp_name, args.env_name)).variables]) self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(args.z_size+args.rnn_size*args.state_space)) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) z = self.vae.encode(result)[0] return z def reset(self): self.rnn_states = rnn_init_state(self.rnn) if self.with_obs: [z_state, obs] = super(CarRacingMDNRNN, self).reset() # calls step self.N_tiles = len(self.track) return [z_state, obs] else: z_state = super(CarRacingMDNRNN, self).reset() # calls step self.N_tiles = len(self.track) return z_state def _step(self, action): obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action) z = tf.squeeze(self.encode_obs(obs)) h = tf.squeeze(self.rnn_states[0]) c = tf.squeeze(self.rnn_states[1]) if self.rnn.args.state_space == 2: z_state = tf.concat([z, c, h], axis=-1) else: z_state = tf.concat([z, h], axis=-1) if action is not None: # don't compute state on reset self.rnn_states = rnn_next_state(self.rnn, z, action, self.rnn_states) if self.with_obs: return [z_state, obs], reward, done, {} else: return z_state, reward, done, {} def close(self): super(CarRacingMDNRNN, self).close() tf.keras.backend.clear_session() gc.collect()
def __init__(self, args, render_mode=False, load_model=True, with_obs=False): super(DoomTakeCoverMDNRNN, self).__init__() self.with_obs = with_obs self.no_render = True if render_mode: self.no_render = False self.current_obs = None self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name)).variables ]) self.rnn.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name)).variables ]) self.action_space = Box(low=-1.0, high=1.0, shape=()) self.obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space self.observation_space = Box(low=0, high=255, shape=(64, 64, 3)) self.actual_observation_space = Box(low=-50., high=50., shape=(self.obs_size)) self._seed() self.rnn_states = None self.z = None self.restart = None self.frame_count = None self.viewer = None self._reset()
def __init__(self, env, silent=False): super().__init__(env) from vae.vae import CVAE from utils import PARSER args = PARSER.parse_args(['--config_path', 'configs/carracing.config']) model_path_name = "models/tf_vae" self.vae = CVAE(args) # self.vae.set_weights(tf.keras.models.load_model( # model_path_name, compile=False).get_weights()) self.vae.set_weights(np.load("vae_weights.npy", allow_pickle=True)) self.observation_space = Box(low=float("-inf"), high=float("inf"), shape=(41, )) self.silent = silent
def __init__(self, load_model=True, full_episode=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.vae = CVAE(batch_size=1) self.rnn = MDNRNN(hps_sample) if load_model: self.vae.load_json('tf_vae/vae.json') self.rnn.load_json('tf_rnn/rnn.json') self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32+256))
for i, img in enumerate(data['obs']): img_i = img / 255.0 yield img_i if __name__ == "__main__": model_save_path = "results/{}/{}/tf_vae".format(args.exp_name, args.env_name) if not os.path.exists(model_save_path): os.makedirs(model_save_path) tensorboard_dir = os.path.join(model_save_path, 'tensorboard') summary_writer = tf.summary.create_file_writer(tensorboard_dir) summary_writer.set_as_default() tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=tensorboard_dir, write_graph=False) shuffle_size = 20 * 1000 # only loads ~20 episodes for shuffle windows b/c im poor and don't have much RAM ds = tf.data.Dataset.from_generator(ds_gen, output_types=tf.float32, output_shapes=(64, 64, 3)) ds = ds.shuffle(shuffle_size, reshuffle_each_iteration=True).batch(args.vae_batch_size) ds = ds.prefetch(100) # prefetch 100 batches in the buffer #tf.data.experimental.AUTOTUNE) vae = CVAE(args=args) tensorboard_callback.set_model(vae) loss_weights = [1.0, 1.0] # weight both the reconstruction and KL loss the same vae.compile(optimizer=vae.optimizer, loss=vae.get_loss(), loss_weights=loss_weights) step = 0 blank_batch = np.zeros([2*args.z_size]) for i in range(args.vae_num_epoch): j = 0 for x_batch in ds: if i == 0 and j == 0: vae._set_inputs(x_batch) j += 1 step += 1 loss = vae.train_on_batch(x=x_batch, y={'reconstruction': x_batch, 'KL': blank_batch}, return_dict=True) [tf.summary.scalar(loss_key, loss_val, step=step) for loss_key, loss_val in loss.items()]
class DreamDoomTakeCoverMDNRNN: def __init__(self, args, render_mode=False, load_model=True): self.render_mode = render_mode model_path_name = 'results/{}/{}'.format(args.exp_name, args.env_name) with open(os.path.join(model_path_name, 'tf_initial_z/initial_z.json'), 'r') as f: [initial_mu, initial_logvar] = json.load(f) self.initial_mu_logvar = np.array( [list(elem) for elem in zip(initial_mu, initial_logvar)]) self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name)).variables ]) self.rnn.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name)).variables ]) # future versions of OpenAI gym needs a dtype=np.float32 in the next line: self.action_space = Box(low=-1.0, high=1.0, shape=()) obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space # future versions of OpenAI gym needs a dtype=np.float32 in the next line: self.observation_space = Box(low=-50., high=50., shape=(obs_size, )) self.rnn_states = None self.o = None self._training = True self.seed() self.reset() def _sample_init_z(self): idx = self.np_random.randint(low=0, high=self.initial_mu_logvar.shape[0]) init_mu, init_logvar = self.initial_mu_logvar[idx] init_mu = init_mu / 10000.0 init_logvar = init_logvar / 10000.0 init_z = init_mu + np.exp( init_logvar / 2.0) * self.np_random.randn(*init_logvar.shape) return init_z def reset(self): self.rnn_states = rnn_init_state(self.rnn) z = np.expand_dims(self._sample_init_z(), axis=0) self.o = z z_ch = tf.concat([z, self.rnn_states[1], self.rnn_states[0]], axis=-1) return tf.squeeze(z_ch) def seed(self, seed=None): if seed: tf.random.set_seed(seed) self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): rnn_states_p1, z_tp1, r_tp1, d_tp1 = rnn_sim(self.rnn, self.o, self.rnn_states, action, training=self._training) self.rnn_states = rnn_states_p1 self.o = z_tp1 z_ch = tf.squeeze( tf.concat([z_tp1, self.rnn_states[1], self.rnn_states[0]], axis=-1)) return z_ch.numpy(), tf.squeeze(r_tp1), d_tp1.numpy(), {} def close(self): tf.keras.backend.clear_session() gc.collect() def render(self, mode): pass
class DoomTakeCoverMDNRNN(DoomTakeCoverEnv): def __init__(self, args, render_mode=False, load_model=True, with_obs=False): super(DoomTakeCoverMDNRNN, self).__init__() self.with_obs = with_obs self.no_render = True if render_mode: self.no_render = False self.current_obs = None self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name)).variables ]) self.rnn.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name)).variables ]) self.action_space = Box(low=-1.0, high=1.0, shape=()) self.obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space self.observation_space = Box(low=0, high=255, shape=(64, 64, 3)) self.actual_observation_space = Box(low=-50., high=50., shape=(self.obs_size)) self._seed() self.rnn_states = None self.z = None self.restart = None self.frame_count = None self.viewer = None self._reset() def close(self): super(DoomTakeCoverMDNRNN, self).close() tf.keras.backend.clear_session() gc.collect() def _step(self, action): # update states of rnn self.frame_count += 1 self.rnn_states = rnn_next_state(self.rnn, self.z, action, self.rnn_states) # actual action in wrapped env: threshold = 0.3333 full_action = [0] * 43 if action < -threshold: full_action[11] = 1 if action > threshold: full_action[10] = 1 obs, reward, done, _ = super(DoomTakeCoverMDNRNN, self)._step(full_action) small_obs = self._process_frame(obs) self.current_obs = small_obs self.z = self._encode(small_obs) if done: self.restart = 1 else: self.restart = 0 if self.with_obs: return [self._current_state(), self.current_obs], reward, done, {} else: return self._current_state(), reward, done, {} def _encode(self, img): simple_obs = np.copy(img).astype(np.float) / 255.0 simple_obs = simple_obs.reshape(1, 64, 64, 3) z = self.vae.encode(simple_obs)[0] return z def _reset(self): obs = super(DoomTakeCoverMDNRNN, self)._reset() small_obs = self._process_frame(obs) self.current_obs = small_obs self.rnn_states = rnn_init_state(self.rnn) self.z = self._encode(small_obs) self.restart = 1 self.frame_count = 0 if self.with_obs: return [self._current_state(), self.current_obs] else: return self._current_state() def _process_frame(self, frame): obs = frame[0:400, :, :] obs = Image.fromarray(obs, mode='RGB').resize((64, 64)) obs = np.array(obs) return obs def _current_state(self): if self.rnn.args.state_space == 2: return np.concatenate([ self.z, tf.keras.backend.flatten(self.rnn_states[1]), tf.keras.backend.flatten(self.rnn_states[0]) ], axis=0) # cell then hidden fro some reason return np.concatenate( [self.z, tf.keras.backend.flatten(self.rnn_states[0])], axis=0) # only the hidden state def _seed(self, seed=None): if seed: tf.random.set_seed(seed) self.np_random, seed = seeding.np_random(seed) return [seed]
import pygame pygame.init() screen = pygame.display.set_mode((600, 300)) frame_skip = 3 seed = 2 env = wrappers.EvaluationWrapper(wrappers.VaeCarWrapper( gym.make("CarRacingSoftFS{}-v0".format(frame_skip))), seed, evaluate_for=15, report_each=1) DATA_DIR = "export" model_path_name = "models/tf_vae".format(args.exp_name, args.env_name) vae = CVAE(args) vae.set_weights( tf.keras.models.load_model(model_path_name, compile=False).get_weights()) filelist = os.listdir(DATA_DIR) obs = np.load(os.path.join(DATA_DIR, random.choice(filelist)))["obs"] obs = obs.astype(np.float32) / 255.0 def resize(img, factor): obs = Image.fromarray(img, mode="RGB").resize((64 * factor, 64 * factor)) return np.array(obs) while True: state, done = env.reset(start_evaluation=True), False
class CarRacingMDNRNN(CarRacingWrapper): def __init__(self, args, load_model=True, full_episode=False, with_obs=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.with_obs = with_obs # whether or not to return the frame with the encodings self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights( tf.keras.models.load_model('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn.set_weights( tf.keras.models.load_model('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32 + 256)) def encode_obs(self, obs): # convert raw obs to z, mu, logvar obs = self._process_frame(obs) result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 64, 64, 4) z = self.vae.encode(result)[0] return z def reset(self): self.rnn_states = rnn_init_state(self.rnn) obs = super(CarRacingMDNRNN, self).reset() obs = self._process_frame(obs) z = self.encode_obs(obs) h = tf.squeeze(self.rnn_states[0]) z_h = tf.concat([z, h], axis=-1) if self.with_obs: return [z_h, obs] else: z_h = super(CarRacingMDNRNN, self).reset() # calls step return z_h def _step(self, action): obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action) z = self.encode_obs(obs) h = tf.squeeze(self.rnn_states[0]) z_h = tf.concat([z, h], axis=-1) if action is not None: # don't compute state on reset self.rnn_states = rnn_next_state(self.rnn, z, action, self.rnn_states) if self.with_obs: return [z_h, obs], reward, done, {} else: return z_h, reward, done, {} def close(self): super(CarRacingMDNRNN, self).close() tf.keras.backend.clear_session() gc.collect()
def save_dataset(dataset, filepath: Path): if use_gqn: print("Loading GQN model...") gqn = GenerativeQueryNetwork(args.gqn_x_dim, args.gqn_r_dim, args.gqn_h_dim, args.gqn_z_dim, args.gqn_l, name="gqn") gqn.load_weights(str(model_path)) else: print("Loading VAE model...") vae = CVAE(args=args) vae.load_weights(str(model_path)) print(f"Weights loaded from checkpoint {model_path}") mu_data = [] # VAE mu (for z distribution) logvar_data = [] # VAE logvar (for z distribution) r_data = [] # GQN representation v_data = [] # GQN viewpoint action_data = [] reward_data = [] done_data = [] i = 0 for i, batch in enumerate(dataset): image, camera, action, r, d = batch # shape = (sequence_len, *data_shape) if use_gqn: # Convert (x,y,z,pitch,yaw) into (x,y,z,sin(yaw),cos(yaw),sin(pitch),cos(pitch)) pos = camera[:, 0:3] pitch = camera[:, 3:4] yaw = camera[:, 4:5] camera = tf.concat( [pos, tf.sin(yaw), tf.cos(yaw), tf.sin(pitch), tf.cos(pitch)], axis=1) # noinspection PyUnboundLocalVariable r = encode_batch_gqn(gqn, image, camera) r_data.append(r.numpy().astype(np.float32)) v_data.append(camera.numpy().astype(np.float32)) else: # noinspection PyUnboundLocalVariable mu, logvar = encode_batch_vae(vae, image) mu_data.append(mu.numpy().astype(np.float32)) logvar_data.append(logvar.numpy().astype(np.float32)) action_data.append(action.numpy()) reward_data.append(r.numpy().astype(np.float32)) done_data.append(d.numpy().astype(np.bool)) print("\r{:5d}".format(i), end="") print(" Done!".format(i)) data = { "action": np.array(action_data), "reward": np.array(reward_data), "done": np.array(done_data), } if use_gqn: data["r"] = np.array(r_data) data["v"] = np.array(v_data) else: data["mu"] = np.array(mu_data) data["logvar"] = np.array(logvar_data) np.savez_compressed(str(filepath), **data) print(f"Encoded samples saved to {filepath}")
def encode_batch_vae(vae: CVAE, batch_img): batch_img = batch_img / 255.0 return vae.encode_mu_logvar(batch_img)
def decode_batch(batch_z): # decode the latent vector batch_img = vae.decode(z.reshape(batch_size, z_size)) * 255. batch_img = np.round(batch_img).astype(np.uint8) batch_img = batch_img.reshape(batch_size, 64, 64, 3) return batch_img filelist = os.listdir(DATA_DIR) filelist.sort() filelist = filelist[0:10000] dataset = create_tf_dataset() dataset = dataset.batch(1, drop_remainder=True) vae = CVAE(args=args) print(model_path_name) vae.set_weights( tf.keras.models.load_model(model_path_name, compile=False).get_weights()) mu_dataset = [] logvar_dataset = [] action_dataset = [] r_dataset = [] d_dataset = [] N_dataset = [] i = 0 for batch in dataset: i += 1 obs_batch, action_batch, r, d = batch obs_batch = tf.squeeze(obs_batch, axis=0)
def make_env(args, dream_env: bool = False, seed: Optional[int] = None, keep_image: bool = False, wrap_rnn: bool = True, load_model: bool = True): # Prepares an environment that matches the expected format: # - The environment returns a 64x64 image in observation["image"] # and camera data (x, y, z, pitch, yaw) in observation["camera"] # - If wrapped in the RNN, observation["features"] returns the RNN output to be used for the controller # - A dream environment simulates the actual environment using the RNN. It never returns an image # (because the actual environment doesn't get run) and only returns the features # - A wrapped environment always returns the features, and can return the original image when keep_image is True full_episode = args.full_episode # Initialize VAE and MDNRNN networks if dream_env or wrap_rnn: features_mode = FeatureMode.MODE_ZCH if args.state_space == 2 else FeatureMode.MODE_ZH if args.use_gqn: encoder = GenerativeQueryNetwork(args.gqn_x_dim, args.gqn_r_dim, args.gqn_h_dim, args.gqn_z_dim, args.gqn_l, name="gqn") encoder_path = get_path(args, "tf_gqn") else: encoder = CVAE(args) encoder_path = get_path(args, "tf_vae") rnn = MDNRNN(args) rnn_path = get_path(args, "tf_rnn") # TODO: Is this still needed? Do we ever NOT load the model? if load_model: encoder.load_weights(str(encoder_path)) rnn.load_weights(str(rnn_path)) if dream_env: assert keep_image is False, "Dream environment doesn't support image observations" import json initial_z_dir = get_path(args, "tf_initial_z") if args.use_gqn: initial_z_path = initial_z_dir / "initial_z_gqn.json" with open(str(initial_z_path), 'r') as f: initial_z = json.load(f) else: initial_z_path = initial_z_dir / "initial_z_vae.json" with open(str(initial_z_path), 'r') as f: [initial_mu, initial_logvar] = json.load(f) # This could probably be done more efficiently initial_z = np.array([list(elem) for elem in zip(initial_mu, initial_logvar)], dtype=np.float) # Create dream environment # noinspection PyUnboundLocalVariable env = DreamEnv(initial_z, args.z_size, rnn, features_mode) else: # Create real environment kwargs = {} if args.env_name.startswith("VizdoomTakeCover"): kwargs["position"] = True # Include position data as observation for Vizdoom environment print("Making environment {}...".format(args.env_name)) env = gym.make(args.env_name, **kwargs) print("Raw environment:", env) from gym.envs.box2d import CarRacing from vizdoomgym.envs import VizdoomTakeCover from gym_minigrid.minigrid import MiniGridEnv if isinstance(env.unwrapped, CarRacing): # Accept actions in the required format env = CarRacingActionWrapper(env) # Transform CarRacing observations into expected format and add camera data env = CarRacingObservationWrapper(env) # Cut off "status bar" at the bottom of CarRacing observation (copied from original paper) env = ClipPixelObservationWrapper(env, (slice(84),)) elif isinstance(env.unwrapped, VizdoomTakeCover): # Accept actions in the required format env = VizdoomTakeCoverActionWrapper(env) # Transform Vizdoom observations into expected format env = VizdoomObservationWrapper(env) # Cut off "status bar" at the bottom of the screen (copied from original paper) env = ClipPixelObservationWrapper(env, (slice(400),)) elif isinstance(env.unwrapped, MiniGridEnv): from gym_minigrid.wrappers import RGBImgPartialObsWrapper # Accept actions in the required format env = MiniGridActionWrapper(env) # Get RGB image observations from the agent's viewpoint # (7x7 grid of tiles, with tile size 9 this results in a 63x63 image) env = RGBImgPartialObsWrapper(env, tile_size=9) # Add camera data to the observation env = MiniGridObservationWrapper(env) # Pad image to 64x64 to match the requirements (in effect just adding one row at the right and bottom edge # with repeated values from the edge) env = PadPixelObservationWrapper(env, target_size=64) else: env = PixelObservationWrapper(env, pixel_keys=("image",)) if env.observation_space["image"].shape[:2] != (64, 64): # Resize image to 64x64 env = ResizePixelObservationWrapper(env, size=(64, 64)) # Wrap in RNN to add features to observation if wrap_rnn: # noinspection PyUnboundLocalVariable env = MDNRNNWrapper(env, encoder, rnn, keep_image=keep_image, features_mode=features_mode) # TODO: Is this needed? It was only ever implemented for CarRacing and didn't work # Force done=False if full_episode is True if full_episode: env = NoEarlyStopWrapper(env) # Set seed if given if seed is not None: env.seed(seed) print("Wrapped environment:", env) return env
model_save_path = "results/{}/{}/tf_vae".format(args.exp_name, args.env_name) if not os.path.exists(model_save_path): os.makedirs(model_save_path) tensorboard_dir = os.path.join(model_save_path, 'tensorboard') summary_writer = tf.summary.create_file_writer(tensorboard_dir) tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=tensorboard_dir) dataset_size = 10000 * 1000 # 10k episodes each 1k steps long shuffle_size = 20 * 1000 # only loads 20 episodes for shuffle windows b/c im poor and don't have much RAM dataset = create_tf_dataset() dataset = dataset.shuffle( shuffle_size, reshuffle_each_iteration=True).batch(args.vae_batch_size) vae = CVAE(args=args) tensorboard_callback.set_model(vae) loss_weights = [1.0, 1.0] # weight both the reconstruction and KL loss the same vae.compile(optimizer=vae.optimizer, loss=vae.get_loss(), loss_weights=loss_weights) step = 0 n_mb = dataset_size / args.vae_batch_size for i in range(args.vae_num_epoch): print('epoch: {}'.format(i)) j = 0 for x_batch, targ_batch, blank_batch in dataset: j += 1 step += 1
class VaeCarWrapper(gym.ObservationWrapper): def __init__(self, env, silent=False): super().__init__(env) from vae.vae import CVAE from utils import PARSER args = PARSER.parse_args(['--config_path', 'configs/carracing.config']) model_path_name = "models/tf_vae" self.vae = CVAE(args) # self.vae.set_weights(tf.keras.models.load_model( # model_path_name, compile=False).get_weights()) self.vae.set_weights(np.load("vae_weights.npy", allow_pickle=True)) self.observation_space = Box(low=float("-inf"), high=float("inf"), shape=(40,)) self.silent = silent def _process_frame(self, frame): obs = (frame[0:84, :, :] * 255).astype(np.uint8) obs = Image.fromarray(obs, mode="RGB").resize((64, 64)) obs = np.array(obs) return np.array(self.vae.encode(obs.reshape(1, 64, 64, 3)/255)[0]) def observation(self, frame): # far-front spike car_body = np.sum((frame[56:59, 47, 1] > 0.5).flatten()) # main headlights car_body = np.sum((frame[59:74, 46:49, 1] > 0.5).flatten()) # rear wheels car_body += np.sum((frame[72:76, 44, 1] > 0.5).flatten()) car_body += np.sum((frame[72:76, 50, 1] > 0.5).flatten()) #sides car_body += np.sum((frame[67:77, 45, 1] > 0.5).flatten()) car_body += np.sum((frame[67:77, 49, 1] > 0.5).flatten()) self.green = car_body / 55.0 self.speed = sum(frame[85:, 2, 0]) / 5 self.abs1 = sum(frame[85:, 9, 2]) self.abs2 = sum(frame[85:, 14, 2]) self.abs3 = sum(frame[85:, 19, 2]) self.abs4 = sum(frame[85:, 24, 2]) steering_input_left = sum(frame[90, 37:48, 1]) steering_input_right = sum(frame[90, 47:58, 1]) self.steering = steering_input_right - steering_input_left rotation_left = sum(frame[90, 59:72, 0]) rotation_right = sum(frame[90, 72:85, 0]) self.rotation = rotation_right - rotation_left if not self.silent: print(f"green:{self.green}\tspeed:{self.speed}\tabs:\t{self.abs1}\t{self.abs2}\t{self.abs3}\t{self.abs4}\tsteering:{self.steering}\trotation:{self.rotation}") features = self._process_frame(frame) return np.concatenate([features, [self.speed, self.green, self.abs1, self.abs2, self.abs3, self.abs4, self.steering, self.rotation]])