Example #1
0
    def __init__(self,
                 args,
                 load_model=True,
                 full_episode=False,
                 with_obs=False):
        super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
        self.with_obs = with_obs  # whether or not to return the frame with the encodings
        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())
            self.rnn.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())

        self.rnn_states = rnn_init_state(self.rnn)

        self.full_episode = False
        self.observation_space = Box(low=np.NINF,
                                     high=np.Inf,
                                     shape=(32 + 256))
Example #2
0
    def __init__(self, args, render_mode=False, load_model=True):

        self.render_mode = render_mode
        model_path_name = 'results/{}/{}'.format(args.exp_name, args.env_name)
        with open(os.path.join(model_path_name, 'tf_initial_z/initial_z.json'),
                  'r') as f:
            [initial_mu, initial_logvar] = json.load(f)

        self.initial_mu_logvar = np.array(
            [list(elem) for elem in zip(initial_mu, initial_logvar)])

        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())
            self.rnn.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())

        # future versions of OpenAI gym needs a dtype=np.float32 in the next line:
        self.action_space = Box(low=-1.0, high=1.0, shape=())
        obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space
        # future versions of OpenAI gym needs a dtype=np.float32 in the next line:
        self.observation_space = Box(low=-50., high=50., shape=(obs_size, ))

        self.rnn_states = None
        self.o = None

        self.seed()
        self.reset()
Example #3
0
    def __init__(self,
                 args,
                 load_model=True,
                 full_episode=False,
                 with_obs=False):
        super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
        self.with_obs = with_obs  # whether or not to return the frame with the encodings
        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights([
                param_i.numpy() for param_i in tf.saved_model.load(
                    'results/{}/tf_vae'.format(args.env_name)).variables
            ])
            self.rnn.set_weights([
                param_i.numpy() for param_i in tf.saved_model.load(
                    'results/{}/tf_rnn'.format(args.env_name)).variables
            ])
        self.rnn_states = rnn_init_state(self.rnn)

        self.full_episode = False
        self.observation_space = Box(low=np.NINF,
                                     high=np.Inf,
                                     shape=(args.z_size +
                                            args.rnn_size * args.state_space))
Example #4
0
def main():
    args = PARSER.parse_args()

    data_path = get_path(args, "record")
    model_save_path = get_path(args, "tf_vae", create=True)

    ensure_validation_split(data_path)
    _n_train, _avg_frames, mean, var = analyse_dataset(data_path)
    if args.normalize_images:
        train_data, val_data = create_tf_dataset(data_path, args.z_size, True, mean, var)
    else:
        train_data, val_data = create_tf_dataset(data_path, args.z_size)

    shuffle_size = 5 * 1000  # Roughly 20 full episodes for shuffle windows, more increases RAM usage
    train_data = train_data.shuffle(shuffle_size, reshuffle_each_iteration=True).batch(args.vae_batch_size).prefetch(2)
    val_data = val_data.batch(args.vae_batch_size).prefetch(2)

    current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_dir = model_save_path / "tensorboard" / current_time

    vae = CVAE(args=args)
    vae.compile(optimizer=vae.optimizer, loss=vae.get_loss())
    vae.fit(train_data, validation_data=val_data, epochs=args.vae_num_epoch, callbacks=[
        tf.keras.callbacks.TensorBoard(log_dir=str(tensorboard_dir), update_freq=50, histogram_freq=1),
        LogImage(str(tensorboard_dir), val_data),
        tf.keras.callbacks.ModelCheckpoint(str(model_save_path / "ckpt-e{epoch:02d}"), verbose=1),
    ])
    vae.save(str(model_save_path))
Example #5
0
class CarRacingMDNRNN(CarRacingWrapper):
  def __init__(self, args, load_model=True, full_episode=False, with_obs=False):
    super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
    self.with_obs = with_obs # whether or not to return the frame with the encodings
    self.vae = CVAE(args)
    self.rnn = MDNRNN(args)
     
    if load_model:
      self.vae.set_weights([param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format(args.exp_name, args.env_name)).variables])
      self.rnn.set_weights([param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format(args.exp_name, args.env_name)).variables])
    self.rnn_states = rnn_init_state(self.rnn)
    
    self.full_episode = False 
    self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(args.z_size+args.rnn_size*args.state_space))
  def encode_obs(self, obs):
    # convert raw obs to z, mu, logvar
    result = np.copy(obs).astype(np.float)/255.0
    result = result.reshape(1, 64, 64, 3)
    z = self.vae.encode(result)[0]
    return z
  def reset(self):
    self.rnn_states = rnn_init_state(self.rnn)
    if self.with_obs:
        [z_state, obs] = super(CarRacingMDNRNN, self).reset() # calls step
        self.N_tiles = len(self.track)
        return [z_state, obs]
    else:
        z_state = super(CarRacingMDNRNN, self).reset() # calls step
        self.N_tiles = len(self.track)
        return z_state
  def _step(self, action):
    obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action)
    z = tf.squeeze(self.encode_obs(obs))
    h = tf.squeeze(self.rnn_states[0])
    c = tf.squeeze(self.rnn_states[1])
    if self.rnn.args.state_space == 2:
        z_state = tf.concat([z, c, h], axis=-1)
    else:
        z_state = tf.concat([z, h], axis=-1)
    if action is not None: # don't compute state on reset
        self.rnn_states = rnn_next_state(self.rnn, z, action, self.rnn_states)
    if self.with_obs:
        return [z_state, obs], reward, done, {}
    else:
        return z_state, reward, done, {}
  def close(self):
    super(CarRacingMDNRNN, self).close()
    tf.keras.backend.clear_session()
    gc.collect()
Example #6
0
    def __init__(self,
                 args,
                 render_mode=False,
                 load_model=True,
                 with_obs=False):
        super(DoomTakeCoverMDNRNN, self).__init__()

        self.with_obs = with_obs

        self.no_render = True
        if render_mode:
            self.no_render = False
        self.current_obs = None

        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name)).variables
            ])
            self.rnn.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name)).variables
            ])

        self.action_space = Box(low=-1.0, high=1.0, shape=())
        self.obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space

        self.observation_space = Box(low=0, high=255, shape=(64, 64, 3))
        self.actual_observation_space = Box(low=-50.,
                                            high=50.,
                                            shape=(self.obs_size))

        self._seed()

        self.rnn_states = None
        self.z = None
        self.restart = None
        self.frame_count = None
        self.viewer = None
        self._reset()
Example #7
0
    def __init__(self, env, silent=False):
        super().__init__(env)

        from vae.vae import CVAE
        from utils import PARSER
        args = PARSER.parse_args(['--config_path', 'configs/carracing.config'])
        model_path_name = "models/tf_vae"

        self.vae = CVAE(args)

        # self.vae.set_weights(tf.keras.models.load_model(
        #     model_path_name, compile=False).get_weights())

        self.vae.set_weights(np.load("vae_weights.npy", allow_pickle=True))

        self.observation_space = Box(low=float("-inf"),
                                     high=float("inf"),
                                     shape=(41, ))
        self.silent = silent
Example #8
0
  def __init__(self, load_model=True, full_episode=False):
    super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
    self.vae = CVAE(batch_size=1)
    self.rnn = MDNRNN(hps_sample)
     
    if load_model:
      self.vae.load_json('tf_vae/vae.json')
      self.rnn.load_json('tf_rnn/rnn.json')

    self.rnn_states = rnn_init_state(self.rnn)
    
    self.full_episode = False 
    self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32+256))
Example #9
0
            for i, img in enumerate(data['obs']):
                img_i = img / 255.0
                yield img_i
if __name__ == "__main__": 
    model_save_path = "results/{}/{}/tf_vae".format(args.exp_name, args.env_name)
    if not os.path.exists(model_save_path):
        os.makedirs(model_save_path)
    tensorboard_dir = os.path.join(model_save_path, 'tensorboard')
    summary_writer = tf.summary.create_file_writer(tensorboard_dir)
    summary_writer.set_as_default()
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=tensorboard_dir, write_graph=False)
    shuffle_size = 20 * 1000 # only loads ~20 episodes for shuffle windows b/c im poor and don't have much RAM
    ds = tf.data.Dataset.from_generator(ds_gen, output_types=tf.float32, output_shapes=(64, 64, 3))
    ds = ds.shuffle(shuffle_size, reshuffle_each_iteration=True).batch(args.vae_batch_size)
    ds = ds.prefetch(100) # prefetch 100 batches in the buffer #tf.data.experimental.AUTOTUNE)
    vae = CVAE(args=args)
    tensorboard_callback.set_model(vae)
    loss_weights = [1.0, 1.0] # weight both the reconstruction and KL loss the same
    vae.compile(optimizer=vae.optimizer, loss=vae.get_loss(), loss_weights=loss_weights)
    step = 0
    blank_batch = np.zeros([2*args.z_size])
    for i in range(args.vae_num_epoch):
        j = 0
        for x_batch in ds:
            if i == 0 and j == 0:
                vae._set_inputs(x_batch)
            j += 1
            step += 1 
           
            loss = vae.train_on_batch(x=x_batch, y={'reconstruction': x_batch, 'KL': blank_batch}, return_dict=True)
            [tf.summary.scalar(loss_key, loss_val, step=step) for loss_key, loss_val in loss.items()] 
Example #10
0
class DreamDoomTakeCoverMDNRNN:
    def __init__(self, args, render_mode=False, load_model=True):

        self.render_mode = render_mode
        model_path_name = 'results/{}/{}'.format(args.exp_name, args.env_name)
        with open(os.path.join(model_path_name, 'tf_initial_z/initial_z.json'),
                  'r') as f:
            [initial_mu, initial_logvar] = json.load(f)

        self.initial_mu_logvar = np.array(
            [list(elem) for elem in zip(initial_mu, initial_logvar)])

        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name)).variables
            ])
            self.rnn.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name)).variables
            ])

        # future versions of OpenAI gym needs a dtype=np.float32 in the next line:
        self.action_space = Box(low=-1.0, high=1.0, shape=())
        obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space
        # future versions of OpenAI gym needs a dtype=np.float32 in the next line:
        self.observation_space = Box(low=-50., high=50., shape=(obs_size, ))

        self.rnn_states = None
        self.o = None

        self._training = True

        self.seed()
        self.reset()

    def _sample_init_z(self):
        idx = self.np_random.randint(low=0,
                                     high=self.initial_mu_logvar.shape[0])
        init_mu, init_logvar = self.initial_mu_logvar[idx]
        init_mu = init_mu / 10000.0
        init_logvar = init_logvar / 10000.0
        init_z = init_mu + np.exp(
            init_logvar / 2.0) * self.np_random.randn(*init_logvar.shape)
        return init_z

    def reset(self):
        self.rnn_states = rnn_init_state(self.rnn)
        z = np.expand_dims(self._sample_init_z(), axis=0)
        self.o = z
        z_ch = tf.concat([z, self.rnn_states[1], self.rnn_states[0]], axis=-1)
        return tf.squeeze(z_ch)

    def seed(self, seed=None):
        if seed:
            tf.random.set_seed(seed)

        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        rnn_states_p1, z_tp1, r_tp1, d_tp1 = rnn_sim(self.rnn,
                                                     self.o,
                                                     self.rnn_states,
                                                     action,
                                                     training=self._training)
        self.rnn_states = rnn_states_p1
        self.o = z_tp1

        z_ch = tf.squeeze(
            tf.concat([z_tp1, self.rnn_states[1], self.rnn_states[0]],
                      axis=-1))
        return z_ch.numpy(), tf.squeeze(r_tp1), d_tp1.numpy(), {}

    def close(self):
        tf.keras.backend.clear_session()
        gc.collect()

    def render(self, mode):
        pass
Example #11
0
class DoomTakeCoverMDNRNN(DoomTakeCoverEnv):
    def __init__(self,
                 args,
                 render_mode=False,
                 load_model=True,
                 with_obs=False):
        super(DoomTakeCoverMDNRNN, self).__init__()

        self.with_obs = with_obs

        self.no_render = True
        if render_mode:
            self.no_render = False
        self.current_obs = None

        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name)).variables
            ])
            self.rnn.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name)).variables
            ])

        self.action_space = Box(low=-1.0, high=1.0, shape=())
        self.obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space

        self.observation_space = Box(low=0, high=255, shape=(64, 64, 3))
        self.actual_observation_space = Box(low=-50.,
                                            high=50.,
                                            shape=(self.obs_size))

        self._seed()

        self.rnn_states = None
        self.z = None
        self.restart = None
        self.frame_count = None
        self.viewer = None
        self._reset()

    def close(self):
        super(DoomTakeCoverMDNRNN, self).close()
        tf.keras.backend.clear_session()
        gc.collect()

    def _step(self, action):

        # update states of rnn
        self.frame_count += 1

        self.rnn_states = rnn_next_state(self.rnn, self.z, action,
                                         self.rnn_states)

        # actual action in wrapped env:

        threshold = 0.3333
        full_action = [0] * 43

        if action < -threshold:
            full_action[11] = 1

        if action > threshold:
            full_action[10] = 1

        obs, reward, done, _ = super(DoomTakeCoverMDNRNN,
                                     self)._step(full_action)
        small_obs = self._process_frame(obs)
        self.current_obs = small_obs
        self.z = self._encode(small_obs)

        if done:
            self.restart = 1
        else:
            self.restart = 0

        if self.with_obs:
            return [self._current_state(), self.current_obs], reward, done, {}
        else:
            return self._current_state(), reward, done, {}

    def _encode(self, img):
        simple_obs = np.copy(img).astype(np.float) / 255.0
        simple_obs = simple_obs.reshape(1, 64, 64, 3)
        z = self.vae.encode(simple_obs)[0]
        return z

    def _reset(self):
        obs = super(DoomTakeCoverMDNRNN, self)._reset()
        small_obs = self._process_frame(obs)
        self.current_obs = small_obs
        self.rnn_states = rnn_init_state(self.rnn)
        self.z = self._encode(small_obs)
        self.restart = 1
        self.frame_count = 0

        if self.with_obs:
            return [self._current_state(), self.current_obs]
        else:
            return self._current_state()

    def _process_frame(self, frame):
        obs = frame[0:400, :, :]
        obs = Image.fromarray(obs, mode='RGB').resize((64, 64))
        obs = np.array(obs)
        return obs

    def _current_state(self):
        if self.rnn.args.state_space == 2:
            return np.concatenate([
                self.z,
                tf.keras.backend.flatten(self.rnn_states[1]),
                tf.keras.backend.flatten(self.rnn_states[0])
            ],
                                  axis=0)  # cell then hidden fro some reason
        return np.concatenate(
            [self.z, tf.keras.backend.flatten(self.rnn_states[0])],
            axis=0)  # only the hidden state

    def _seed(self, seed=None):
        if seed:
            tf.random.set_seed(seed)
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
Example #12
0
import pygame
pygame.init()
screen = pygame.display.set_mode((600, 300))

frame_skip = 3
seed = 2
env = wrappers.EvaluationWrapper(wrappers.VaeCarWrapper(
    gym.make("CarRacingSoftFS{}-v0".format(frame_skip))),
                                 seed,
                                 evaluate_for=15,
                                 report_each=1)

DATA_DIR = "export"
model_path_name = "models/tf_vae".format(args.exp_name, args.env_name)
vae = CVAE(args)
vae.set_weights(
    tf.keras.models.load_model(model_path_name, compile=False).get_weights())

filelist = os.listdir(DATA_DIR)
obs = np.load(os.path.join(DATA_DIR, random.choice(filelist)))["obs"]
obs = obs.astype(np.float32) / 255.0


def resize(img, factor):
    obs = Image.fromarray(img, mode="RGB").resize((64 * factor, 64 * factor))
    return np.array(obs)


while True:
    state, done = env.reset(start_evaluation=True), False
Example #13
0
class CarRacingMDNRNN(CarRacingWrapper):
    def __init__(self,
                 args,
                 load_model=True,
                 full_episode=False,
                 with_obs=False):
        super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
        self.with_obs = with_obs  # whether or not to return the frame with the encodings
        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())
            self.rnn.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())

        self.rnn_states = rnn_init_state(self.rnn)

        self.full_episode = False
        self.observation_space = Box(low=np.NINF,
                                     high=np.Inf,
                                     shape=(32 + 256))

    def encode_obs(self, obs):
        # convert raw obs to z, mu, logvar
        obs = self._process_frame(obs)
        result = np.copy(obs).astype(np.float) / 255.0
        result = result.reshape(1, 64, 64, 4)
        z = self.vae.encode(result)[0]
        return z

    def reset(self):
        self.rnn_states = rnn_init_state(self.rnn)
        obs = super(CarRacingMDNRNN, self).reset()
        obs = self._process_frame(obs)
        z = self.encode_obs(obs)
        h = tf.squeeze(self.rnn_states[0])
        z_h = tf.concat([z, h], axis=-1)

        if self.with_obs:
            return [z_h, obs]
        else:
            z_h = super(CarRacingMDNRNN, self).reset()  # calls step
            return z_h

    def _step(self, action):
        obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action)
        z = self.encode_obs(obs)
        h = tf.squeeze(self.rnn_states[0])
        z_h = tf.concat([z, h], axis=-1)

        if action is not None:  # don't compute state on reset
            self.rnn_states = rnn_next_state(self.rnn, z, action,
                                             self.rnn_states)
        if self.with_obs:
            return [z_h, obs], reward, done, {}
        else:
            return z_h, reward, done, {}

    def close(self):
        super(CarRacingMDNRNN, self).close()
        tf.keras.backend.clear_session()
        gc.collect()
Example #14
0
def save_dataset(dataset, filepath: Path):
    if use_gqn:
        print("Loading GQN model...")
        gqn = GenerativeQueryNetwork(args.gqn_x_dim,
                                     args.gqn_r_dim,
                                     args.gqn_h_dim,
                                     args.gqn_z_dim,
                                     args.gqn_l,
                                     name="gqn")
        gqn.load_weights(str(model_path))
    else:
        print("Loading VAE model...")
        vae = CVAE(args=args)
        vae.load_weights(str(model_path))
    print(f"Weights loaded from checkpoint {model_path}")

    mu_data = []  # VAE mu (for z distribution)
    logvar_data = []  # VAE logvar (for z distribution)

    r_data = []  # GQN representation
    v_data = []  # GQN viewpoint

    action_data = []
    reward_data = []
    done_data = []

    i = 0
    for i, batch in enumerate(dataset):
        image, camera, action, r, d = batch
        # shape = (sequence_len, *data_shape)

        if use_gqn:
            # Convert (x,y,z,pitch,yaw) into (x,y,z,sin(yaw),cos(yaw),sin(pitch),cos(pitch))
            pos = camera[:, 0:3]
            pitch = camera[:, 3:4]
            yaw = camera[:, 4:5]
            camera = tf.concat(
                [pos,
                 tf.sin(yaw),
                 tf.cos(yaw),
                 tf.sin(pitch),
                 tf.cos(pitch)],
                axis=1)

            # noinspection PyUnboundLocalVariable
            r = encode_batch_gqn(gqn, image, camera)
            r_data.append(r.numpy().astype(np.float32))
            v_data.append(camera.numpy().astype(np.float32))
        else:
            # noinspection PyUnboundLocalVariable
            mu, logvar = encode_batch_vae(vae, image)
            mu_data.append(mu.numpy().astype(np.float32))
            logvar_data.append(logvar.numpy().astype(np.float32))

        action_data.append(action.numpy())
        reward_data.append(r.numpy().astype(np.float32))
        done_data.append(d.numpy().astype(np.bool))

        print("\r{:5d}".format(i), end="")
    print(" Done!".format(i))

    data = {
        "action": np.array(action_data),
        "reward": np.array(reward_data),
        "done": np.array(done_data),
    }

    if use_gqn:
        data["r"] = np.array(r_data)
        data["v"] = np.array(v_data)
    else:
        data["mu"] = np.array(mu_data)
        data["logvar"] = np.array(logvar_data)

    np.savez_compressed(str(filepath), **data)
    print(f"Encoded samples saved to {filepath}")
Example #15
0
def encode_batch_vae(vae: CVAE, batch_img):
    batch_img = batch_img / 255.0
    return vae.encode_mu_logvar(batch_img)
Example #16
0
def decode_batch(batch_z):
    # decode the latent vector
    batch_img = vae.decode(z.reshape(batch_size, z_size)) * 255.
    batch_img = np.round(batch_img).astype(np.uint8)
    batch_img = batch_img.reshape(batch_size, 64, 64, 3)
    return batch_img


filelist = os.listdir(DATA_DIR)
filelist.sort()
filelist = filelist[0:10000]
dataset = create_tf_dataset()
dataset = dataset.batch(1, drop_remainder=True)

vae = CVAE(args=args)
print(model_path_name)
vae.set_weights(
    tf.keras.models.load_model(model_path_name, compile=False).get_weights())
mu_dataset = []
logvar_dataset = []
action_dataset = []
r_dataset = []
d_dataset = []
N_dataset = []

i = 0
for batch in dataset:
    i += 1
    obs_batch, action_batch, r, d = batch
    obs_batch = tf.squeeze(obs_batch, axis=0)
Example #17
0
def make_env(args, dream_env: bool = False, seed: Optional[int] = None,
             keep_image: bool = False, wrap_rnn: bool = True, load_model: bool = True):
    # Prepares an environment that matches the expected format:
    # - The environment returns a 64x64 image in observation["image"]
    #   and camera data (x, y, z, pitch, yaw) in observation["camera"]
    # - If wrapped in the RNN, observation["features"] returns the RNN output to be used for the controller
    # - A dream environment simulates the actual environment using the RNN. It never returns an image
    #   (because the actual environment doesn't get run) and only returns the features
    # - A wrapped environment always returns the features, and can return the original image when keep_image is True

    full_episode = args.full_episode

    # Initialize VAE and MDNRNN networks
    if dream_env or wrap_rnn:
        features_mode = FeatureMode.MODE_ZCH if args.state_space == 2 else FeatureMode.MODE_ZH

        if args.use_gqn:
            encoder = GenerativeQueryNetwork(args.gqn_x_dim, args.gqn_r_dim,
                                             args.gqn_h_dim, args.gqn_z_dim, args.gqn_l, name="gqn")
            encoder_path = get_path(args, "tf_gqn")
        else:
            encoder = CVAE(args)
            encoder_path = get_path(args, "tf_vae")
        rnn = MDNRNN(args)
        rnn_path = get_path(args, "tf_rnn")

        # TODO: Is this still needed? Do we ever NOT load the model?
        if load_model:
            encoder.load_weights(str(encoder_path))
            rnn.load_weights(str(rnn_path))

    if dream_env:
        assert keep_image is False, "Dream environment doesn't support image observations"

        import json
        initial_z_dir = get_path(args, "tf_initial_z")
        if args.use_gqn:
            initial_z_path = initial_z_dir / "initial_z_gqn.json"
            with open(str(initial_z_path), 'r') as f:
                initial_z = json.load(f)
        else:
            initial_z_path = initial_z_dir / "initial_z_vae.json"
            with open(str(initial_z_path), 'r') as f:
                [initial_mu, initial_logvar] = json.load(f)
            # This could probably be done more efficiently
            initial_z = np.array([list(elem) for elem in zip(initial_mu, initial_logvar)], dtype=np.float)

        # Create dream environment
        # noinspection PyUnboundLocalVariable
        env = DreamEnv(initial_z, args.z_size, rnn, features_mode)

    else:
        # Create real environment
        kwargs = {}
        if args.env_name.startswith("VizdoomTakeCover"):
            kwargs["position"] = True  # Include position data as observation for Vizdoom environment

        print("Making environment {}...".format(args.env_name))
        env = gym.make(args.env_name, **kwargs)
        print("Raw environment:", env)

        from gym.envs.box2d import CarRacing
        from vizdoomgym.envs import VizdoomTakeCover
        from gym_minigrid.minigrid import MiniGridEnv
        if isinstance(env.unwrapped, CarRacing):
            # Accept actions in the required format
            env = CarRacingActionWrapper(env)
            # Transform CarRacing observations into expected format and add camera data
            env = CarRacingObservationWrapper(env)
            # Cut off "status bar" at the bottom of CarRacing observation (copied from original paper)
            env = ClipPixelObservationWrapper(env, (slice(84),))
        elif isinstance(env.unwrapped, VizdoomTakeCover):
            # Accept actions in the required format
            env = VizdoomTakeCoverActionWrapper(env)
            # Transform Vizdoom observations into expected format
            env = VizdoomObservationWrapper(env)
            # Cut off "status bar" at the bottom of the screen (copied from original paper)
            env = ClipPixelObservationWrapper(env, (slice(400),))
        elif isinstance(env.unwrapped, MiniGridEnv):
            from gym_minigrid.wrappers import RGBImgPartialObsWrapper
            # Accept actions in the required format
            env = MiniGridActionWrapper(env)
            # Get RGB image observations from the agent's viewpoint
            # (7x7 grid of tiles, with tile size 9 this results in a 63x63 image)
            env = RGBImgPartialObsWrapper(env, tile_size=9)
            # Add camera data to the observation
            env = MiniGridObservationWrapper(env)
            # Pad image to 64x64 to match the requirements (in effect just adding one row at the right and bottom edge
            # with repeated values from the edge)
            env = PadPixelObservationWrapper(env, target_size=64)
        else:
            env = PixelObservationWrapper(env, pixel_keys=("image",))

        if env.observation_space["image"].shape[:2] != (64, 64):
            # Resize image to 64x64
            env = ResizePixelObservationWrapper(env, size=(64, 64))

        # Wrap in RNN to add features to observation
        if wrap_rnn:
            # noinspection PyUnboundLocalVariable
            env = MDNRNNWrapper(env, encoder, rnn, keep_image=keep_image, features_mode=features_mode)

    # TODO: Is this needed? It was only ever implemented for CarRacing and didn't work
    # Force done=False if full_episode is True
    if full_episode:
        env = NoEarlyStopWrapper(env)

    # Set seed if given
    if seed is not None:
        env.seed(seed)

    print("Wrapped environment:", env)
    return env
Example #18
0
    model_save_path = "results/{}/{}/tf_vae".format(args.exp_name,
                                                    args.env_name)
    if not os.path.exists(model_save_path):
        os.makedirs(model_save_path)
    tensorboard_dir = os.path.join(model_save_path, 'tensorboard')
    summary_writer = tf.summary.create_file_writer(tensorboard_dir)
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=tensorboard_dir)

    dataset_size = 10000 * 1000  # 10k episodes each 1k steps long
    shuffle_size = 20 * 1000  # only loads 20 episodes for shuffle windows b/c im poor and don't have much RAM
    dataset = create_tf_dataset()
    dataset = dataset.shuffle(
        shuffle_size, reshuffle_each_iteration=True).batch(args.vae_batch_size)

    vae = CVAE(args=args)
    tensorboard_callback.set_model(vae)

    loss_weights = [1.0,
                    1.0]  # weight both the reconstruction and KL loss the same
    vae.compile(optimizer=vae.optimizer,
                loss=vae.get_loss(),
                loss_weights=loss_weights)
    step = 0
    n_mb = dataset_size / args.vae_batch_size
    for i in range(args.vae_num_epoch):
        print('epoch: {}'.format(i))
        j = 0
        for x_batch, targ_batch, blank_batch in dataset:
            j += 1
            step += 1
Example #19
0
class VaeCarWrapper(gym.ObservationWrapper):
    def __init__(self, env, silent=False):
        super().__init__(env)


        from vae.vae import CVAE
        from utils import PARSER
        args = PARSER.parse_args(['--config_path', 'configs/carracing.config'])
        model_path_name = "models/tf_vae"

        self.vae = CVAE(args)



        # self.vae.set_weights(tf.keras.models.load_model(
        #     model_path_name, compile=False).get_weights())

        self.vae.set_weights(np.load("vae_weights.npy", allow_pickle=True))


        self.observation_space = Box(low=float("-inf"), high=float("inf"), shape=(40,))
        self.silent = silent

    def _process_frame(self, frame):
        obs = (frame[0:84, :, :] * 255).astype(np.uint8)
        obs = Image.fromarray(obs, mode="RGB").resize((64, 64))
        obs = np.array(obs)


        return np.array(self.vae.encode(obs.reshape(1, 64, 64, 3)/255)[0])

    def observation(self, frame):
        # far-front spike
        car_body = np.sum((frame[56:59, 47, 1] > 0.5).flatten())

        # main headlights
        car_body = np.sum((frame[59:74, 46:49, 1] > 0.5).flatten())

        # rear wheels
        car_body += np.sum((frame[72:76, 44, 1] > 0.5).flatten())   
        car_body += np.sum((frame[72:76, 50, 1] > 0.5).flatten())


        #sides
        car_body += np.sum((frame[67:77, 45, 1] > 0.5).flatten())   
        car_body += np.sum((frame[67:77, 49, 1] > 0.5).flatten())

        self.green = car_body / 55.0

        self.speed = sum(frame[85:, 2, 0]) / 5


        self.abs1 = sum(frame[85:, 9, 2])
        self.abs2 = sum(frame[85:, 14, 2])
        self.abs3 = sum(frame[85:, 19, 2])
        self.abs4 = sum(frame[85:, 24, 2])

        steering_input_left = sum(frame[90, 37:48, 1])
        steering_input_right = sum(frame[90, 47:58, 1])
        self.steering = steering_input_right - steering_input_left

        rotation_left = sum(frame[90, 59:72, 0])
        rotation_right = sum(frame[90, 72:85, 0])
        self.rotation = rotation_right - rotation_left

        if not self.silent:
            print(f"green:{self.green}\tspeed:{self.speed}\tabs:\t{self.abs1}\t{self.abs2}\t{self.abs3}\t{self.abs4}\tsteering:{self.steering}\trotation:{self.rotation}") 

        features = self._process_frame(frame)

        return np.concatenate([features, [self.speed, self.green, self.abs1, self.abs2, self.abs3, self.abs4, self.steering, self.rotation]])