def __init__(self, args, load_model=True, full_episode=False, with_obs=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.with_obs = with_obs # whether or not to return the frame with the encodings self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([ param_i.numpy() for param_i in tf.saved_model.load( 'results/{}/tf_vae'.format(args.env_name)).variables ]) self.rnn.set_weights([ param_i.numpy() for param_i in tf.saved_model.load( 'results/{}/tf_rnn'.format(args.env_name)).variables ]) self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(args.z_size + args.rnn_size * args.state_space))
def __init__(self, args, load_model=True, full_episode=False, with_obs=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.with_obs = with_obs # whether or not to return the frame with the encodings self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights( tf.keras.models.load_model('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn.set_weights( tf.keras.models.load_model('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32 + 256))
def main(): args = PARSER.parse_args() data_path = get_path(args, "record") model_save_path = get_path(args, "tf_vae", create=True) ensure_validation_split(data_path) _n_train, _avg_frames, mean, var = analyse_dataset(data_path) if args.normalize_images: train_data, val_data = create_tf_dataset(data_path, args.z_size, True, mean, var) else: train_data, val_data = create_tf_dataset(data_path, args.z_size) shuffle_size = 5 * 1000 # Roughly 20 full episodes for shuffle windows, more increases RAM usage train_data = train_data.shuffle(shuffle_size, reshuffle_each_iteration=True).batch(args.vae_batch_size).prefetch(2) val_data = val_data.batch(args.vae_batch_size).prefetch(2) current_time = datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_dir = model_save_path / "tensorboard" / current_time vae = CVAE(args=args) vae.compile(optimizer=vae.optimizer, loss=vae.get_loss()) vae.fit(train_data, validation_data=val_data, epochs=args.vae_num_epoch, callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=str(tensorboard_dir), update_freq=50, histogram_freq=1), LogImage(str(tensorboard_dir), val_data), tf.keras.callbacks.ModelCheckpoint(str(model_save_path / "ckpt-e{epoch:02d}"), verbose=1), ]) vae.save(str(model_save_path))
def __init__(self, args, render_mode=False, load_model=True): self.render_mode = render_mode model_path_name = 'results/{}/{}'.format(args.exp_name, args.env_name) with open(os.path.join(model_path_name, 'tf_initial_z/initial_z.json'), 'r') as f: [initial_mu, initial_logvar] = json.load(f) self.initial_mu_logvar = np.array( [list(elem) for elem in zip(initial_mu, initial_logvar)]) self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights( tf.keras.models.load_model('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn.set_weights( tf.keras.models.load_model('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name), compile=False).get_weights()) # future versions of OpenAI gym needs a dtype=np.float32 in the next line: self.action_space = Box(low=-1.0, high=1.0, shape=()) obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space # future versions of OpenAI gym needs a dtype=np.float32 in the next line: self.observation_space = Box(low=-50., high=50., shape=(obs_size, )) self.rnn_states = None self.o = None self.seed() self.reset()
def __init__(self, load_model=True, full_episode=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.vae = CVAE(batch_size=1) self.rnn = MDNRNN(hps_sample) if load_model: self.vae.load_json('tf_vae/vae.json') self.rnn.load_json('tf_rnn/rnn.json') self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32+256))
def __init__(self, args, render_mode=False, load_model=True, with_obs=False): super(DoomTakeCoverMDNRNN, self).__init__() self.with_obs = with_obs self.no_render = True if render_mode: self.no_render = False self.current_obs = None self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name)).variables ]) self.rnn.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name)).variables ]) self.action_space = Box(low=-1.0, high=1.0, shape=()) self.obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space self.observation_space = Box(low=0, high=255, shape=(64, 64, 3)) self.actual_observation_space = Box(low=-50., high=50., shape=(self.obs_size)) self._seed() self.rnn_states = None self.z = None self.restart = None self.frame_count = None self.viewer = None self._reset()
def __init__(self, env, silent=False): super().__init__(env) from vae.vae import CVAE from utils import PARSER args = PARSER.parse_args(['--config_path', 'configs/carracing.config']) model_path_name = "models/tf_vae" self.vae = CVAE(args) # self.vae.set_weights(tf.keras.models.load_model( # model_path_name, compile=False).get_weights()) self.vae.set_weights(np.load("vae_weights.npy", allow_pickle=True)) self.observation_space = Box(low=float("-inf"), high=float("inf"), shape=(41, )) self.silent = silent
for i, img in enumerate(data['obs']): img_i = img / 255.0 yield img_i if __name__ == "__main__": model_save_path = "results/{}/{}/tf_vae".format(args.exp_name, args.env_name) if not os.path.exists(model_save_path): os.makedirs(model_save_path) tensorboard_dir = os.path.join(model_save_path, 'tensorboard') summary_writer = tf.summary.create_file_writer(tensorboard_dir) summary_writer.set_as_default() tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=tensorboard_dir, write_graph=False) shuffle_size = 20 * 1000 # only loads ~20 episodes for shuffle windows b/c im poor and don't have much RAM ds = tf.data.Dataset.from_generator(ds_gen, output_types=tf.float32, output_shapes=(64, 64, 3)) ds = ds.shuffle(shuffle_size, reshuffle_each_iteration=True).batch(args.vae_batch_size) ds = ds.prefetch(100) # prefetch 100 batches in the buffer #tf.data.experimental.AUTOTUNE) vae = CVAE(args=args) tensorboard_callback.set_model(vae) loss_weights = [1.0, 1.0] # weight both the reconstruction and KL loss the same vae.compile(optimizer=vae.optimizer, loss=vae.get_loss(), loss_weights=loss_weights) step = 0 blank_batch = np.zeros([2*args.z_size]) for i in range(args.vae_num_epoch): j = 0 for x_batch in ds: if i == 0 and j == 0: vae._set_inputs(x_batch) j += 1 step += 1 loss = vae.train_on_batch(x=x_batch, y={'reconstruction': x_batch, 'KL': blank_batch}, return_dict=True) [tf.summary.scalar(loss_key, loss_val, step=step) for loss_key, loss_val in loss.items()]
def save_dataset(dataset, filepath: Path): if use_gqn: print("Loading GQN model...") gqn = GenerativeQueryNetwork(args.gqn_x_dim, args.gqn_r_dim, args.gqn_h_dim, args.gqn_z_dim, args.gqn_l, name="gqn") gqn.load_weights(str(model_path)) else: print("Loading VAE model...") vae = CVAE(args=args) vae.load_weights(str(model_path)) print(f"Weights loaded from checkpoint {model_path}") mu_data = [] # VAE mu (for z distribution) logvar_data = [] # VAE logvar (for z distribution) r_data = [] # GQN representation v_data = [] # GQN viewpoint action_data = [] reward_data = [] done_data = [] i = 0 for i, batch in enumerate(dataset): image, camera, action, r, d = batch # shape = (sequence_len, *data_shape) if use_gqn: # Convert (x,y,z,pitch,yaw) into (x,y,z,sin(yaw),cos(yaw),sin(pitch),cos(pitch)) pos = camera[:, 0:3] pitch = camera[:, 3:4] yaw = camera[:, 4:5] camera = tf.concat( [pos, tf.sin(yaw), tf.cos(yaw), tf.sin(pitch), tf.cos(pitch)], axis=1) # noinspection PyUnboundLocalVariable r = encode_batch_gqn(gqn, image, camera) r_data.append(r.numpy().astype(np.float32)) v_data.append(camera.numpy().astype(np.float32)) else: # noinspection PyUnboundLocalVariable mu, logvar = encode_batch_vae(vae, image) mu_data.append(mu.numpy().astype(np.float32)) logvar_data.append(logvar.numpy().astype(np.float32)) action_data.append(action.numpy()) reward_data.append(r.numpy().astype(np.float32)) done_data.append(d.numpy().astype(np.bool)) print("\r{:5d}".format(i), end="") print(" Done!".format(i)) data = { "action": np.array(action_data), "reward": np.array(reward_data), "done": np.array(done_data), } if use_gqn: data["r"] = np.array(r_data) data["v"] = np.array(v_data) else: data["mu"] = np.array(mu_data) data["logvar"] = np.array(logvar_data) np.savez_compressed(str(filepath), **data) print(f"Encoded samples saved to {filepath}")
def make_env(args, dream_env: bool = False, seed: Optional[int] = None, keep_image: bool = False, wrap_rnn: bool = True, load_model: bool = True): # Prepares an environment that matches the expected format: # - The environment returns a 64x64 image in observation["image"] # and camera data (x, y, z, pitch, yaw) in observation["camera"] # - If wrapped in the RNN, observation["features"] returns the RNN output to be used for the controller # - A dream environment simulates the actual environment using the RNN. It never returns an image # (because the actual environment doesn't get run) and only returns the features # - A wrapped environment always returns the features, and can return the original image when keep_image is True full_episode = args.full_episode # Initialize VAE and MDNRNN networks if dream_env or wrap_rnn: features_mode = FeatureMode.MODE_ZCH if args.state_space == 2 else FeatureMode.MODE_ZH if args.use_gqn: encoder = GenerativeQueryNetwork(args.gqn_x_dim, args.gqn_r_dim, args.gqn_h_dim, args.gqn_z_dim, args.gqn_l, name="gqn") encoder_path = get_path(args, "tf_gqn") else: encoder = CVAE(args) encoder_path = get_path(args, "tf_vae") rnn = MDNRNN(args) rnn_path = get_path(args, "tf_rnn") # TODO: Is this still needed? Do we ever NOT load the model? if load_model: encoder.load_weights(str(encoder_path)) rnn.load_weights(str(rnn_path)) if dream_env: assert keep_image is False, "Dream environment doesn't support image observations" import json initial_z_dir = get_path(args, "tf_initial_z") if args.use_gqn: initial_z_path = initial_z_dir / "initial_z_gqn.json" with open(str(initial_z_path), 'r') as f: initial_z = json.load(f) else: initial_z_path = initial_z_dir / "initial_z_vae.json" with open(str(initial_z_path), 'r') as f: [initial_mu, initial_logvar] = json.load(f) # This could probably be done more efficiently initial_z = np.array([list(elem) for elem in zip(initial_mu, initial_logvar)], dtype=np.float) # Create dream environment # noinspection PyUnboundLocalVariable env = DreamEnv(initial_z, args.z_size, rnn, features_mode) else: # Create real environment kwargs = {} if args.env_name.startswith("VizdoomTakeCover"): kwargs["position"] = True # Include position data as observation for Vizdoom environment print("Making environment {}...".format(args.env_name)) env = gym.make(args.env_name, **kwargs) print("Raw environment:", env) from gym.envs.box2d import CarRacing from vizdoomgym.envs import VizdoomTakeCover from gym_minigrid.minigrid import MiniGridEnv if isinstance(env.unwrapped, CarRacing): # Accept actions in the required format env = CarRacingActionWrapper(env) # Transform CarRacing observations into expected format and add camera data env = CarRacingObservationWrapper(env) # Cut off "status bar" at the bottom of CarRacing observation (copied from original paper) env = ClipPixelObservationWrapper(env, (slice(84),)) elif isinstance(env.unwrapped, VizdoomTakeCover): # Accept actions in the required format env = VizdoomTakeCoverActionWrapper(env) # Transform Vizdoom observations into expected format env = VizdoomObservationWrapper(env) # Cut off "status bar" at the bottom of the screen (copied from original paper) env = ClipPixelObservationWrapper(env, (slice(400),)) elif isinstance(env.unwrapped, MiniGridEnv): from gym_minigrid.wrappers import RGBImgPartialObsWrapper # Accept actions in the required format env = MiniGridActionWrapper(env) # Get RGB image observations from the agent's viewpoint # (7x7 grid of tiles, with tile size 9 this results in a 63x63 image) env = RGBImgPartialObsWrapper(env, tile_size=9) # Add camera data to the observation env = MiniGridObservationWrapper(env) # Pad image to 64x64 to match the requirements (in effect just adding one row at the right and bottom edge # with repeated values from the edge) env = PadPixelObservationWrapper(env, target_size=64) else: env = PixelObservationWrapper(env, pixel_keys=("image",)) if env.observation_space["image"].shape[:2] != (64, 64): # Resize image to 64x64 env = ResizePixelObservationWrapper(env, size=(64, 64)) # Wrap in RNN to add features to observation if wrap_rnn: # noinspection PyUnboundLocalVariable env = MDNRNNWrapper(env, encoder, rnn, keep_image=keep_image, features_mode=features_mode) # TODO: Is this needed? It was only ever implemented for CarRacing and didn't work # Force done=False if full_episode is True if full_episode: env = NoEarlyStopWrapper(env) # Set seed if given if seed is not None: env.seed(seed) print("Wrapped environment:", env) return env