Exemple #1
0
class DoomTakeCoverWrapper(DoomTakeCoverEnv):
    def __init__(self, render_mode=False, load_model=True):
        super(DoomTakeCoverWrapper, self).__init__()

        self.no_render = True
        if render_mode:
            self.no_render = False
        self.current_obs = None

        reset_graph()

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=tf.AUTO_REUSE)

        self.rnn = Model(hps_sample, gpu_mode=False)

        if load_model:
            self.vae.load_json(os.path.join(model_path_name, 'vae.json'))
            self.rnn.load_json(os.path.join(model_path_name, 'rnn.json'))

        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=())
        self.outwidth = self.rnn.hps.seq_width
        self.obs_size = self.outwidth + model_rnn_size * model_state_space

        self.observation_space = Box(low=0,
                                     high=255,
                                     shape=(SCREEN_Y, SCREEN_X, 3))
        self.actual_observation_space = spaces.Box(low=-50.,
                                                   high=50.,
                                                   shape=(self.obs_size))

        self.zero_state = self.rnn.sess.run(self.rnn.zero_state)

        self._seed()

        self.rnn_state = None
        self.z = None
        self.restart = None
        self.frame_count = None
        self.viewer = None
        self._reset()

    def _step(self, action):

        # update states of rnn
        self.frame_count += 1

        prev_z = np.zeros((1, 1, self.outwidth))
        prev_z[0][0] = self.z

        prev_action = np.zeros((1, 1))
        prev_action[0] = action

        prev_restart = np.ones((1, 1))
        prev_restart[0] = self.restart

        s_model = self.rnn

        feed = {
            s_model.input_z: prev_z,
            s_model.input_action: prev_action,
            s_model.input_restart: prev_restart,
            s_model.initial_state: self.rnn_state
        }

        self.rnn_state = s_model.sess.run(s_model.final_state, feed)

        # actual action in wrapped env:

        threshold = 0.3333
        full_action = [0] * 43

        if action < -threshold:
            full_action[11] = 1

        if action > threshold:
            full_action[10] = 1

        obs, reward, done, _ = super(DoomTakeCoverWrapper,
                                     self)._step(full_action)
        small_obs = _process_frame(obs)
        self.current_obs = small_obs
        self.z = self._encode(small_obs)

        if done:
            self.restart = 1
        else:
            self.restart = 0

        return self._current_state(), reward, done, {}

    def _encode(self, img):
        simple_obs = np.copy(img).astype(np.float) / 255.0
        simple_obs = simple_obs.reshape(1, 64, 64, 3)
        mu, logvar = self.vae.encode_mu_logvar(simple_obs)
        return (mu +
                np.exp(logvar / 2.0) * self.np_random.randn(*logvar.shape))[0]

    def _decode(self, z):
        # decode the latent vector
        img = self.vae.decode(z.reshape(1, 64)) * 255.
        img = np.round(img).astype(np.uint8)
        img = img.reshape(64, 64, 3)
        return img

    def _reset(self):
        obs = super(DoomTakeCoverWrapper, self)._reset()
        small_obs = _process_frame(obs)
        self.current_obs = small_obs
        self.rnn_state = self.zero_state
        self.z = self._encode(small_obs)
        self.restart = 1
        self.frame_count = 0
        return self._current_state()

    def _current_state(self):
        if model_state_space == 2:
            return np.concatenate([
                self.z,
                self.rnn_state.c.flatten(),
                self.rnn_state.h.flatten()
            ],
                                  axis=0)
        return np.concatenate([self.z, self.rnn_state.h.flatten()], axis=0)

    def _seed(self, seed=None):
        if seed:
            tf.set_random_seed(seed)
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None  # If we don't None out this reference pyglet becomes unhappy
            return
        try:
            state = self.game.get_state()
            img = state.image_buffer
            small_img = self.current_obs
            if img is None:
                img = np.zeros(shape=(480, 640, 3), dtype=np.uint8)
            if small_img is None:
                small_img = np.zeros(shape=(SCREEN_Y, SCREEN_X, 3),
                                     dtype=np.uint8)
            small_img = resize(small_img, (img.shape[0], img.shape[0]))
            vae_img = self._decode(self.z)
            vae_img = resize(vae_img, (img.shape[0], img.shape[0]))
            all_img = np.concatenate((img, small_img, vae_img), axis=1)
            img = all_img
            if mode == 'rgb_array':
                return img
            elif mode is 'human':
                from gym.envs.classic_control import rendering
                if self.viewer is None:
                    self.viewer = rendering.SimpleImageViewer()
                self.viewer.imshow(img)
        except doom_py.vizdoom.ViZDoomIsNotRunningException:
            pass  # Doom has been closed
class RNNAnalyzer:
    def __init__(self, rnn_load_path, num_mixtures, temperature):
        #RNN parameters - modelled after hps_sample in doomrnn.py
        self.vae = VAE(z_size=LATENT_SPACE_DIMENSIONALITY,
                       batch_size=1,
                       is_training=False,
                       reuse=False,
                       gpu_mode=False)

        self.vae.load_json(os.path.join(VAE_PATH, 'vae.json'))
        hps = default_prediction_hps(num_mixtures)
        self.rnn = RNN(hps, gpu_mode=False)

        self.rnn.load_json(os.path.join(rnn_load_path, 'rnn.json'))
        self.frame_count = 0
        self.temperature = temperature
        self.zero_state = self.rnn.sess.run(self.rnn.zero_state)
        self.outwidth = self.rnn.hps.seq_width
        self.restart = 1
        self.rnn_state = self.zero_state

    def _reset(self, initial_z):
        #Resets RNN, with an initial z.
        self.rnn_state = self.zero_state
        self.z = initial_z
        self.restart = 1
        self.frame_count = 0

    def decode_with_vae(self, latent_vector_sequence):
        reconstructions = self.vae.decode(np.array(latent_vector_sequence))
        return reconstructions

    def predict_one_step(self, action, previous_z=[]):
        #Predicts one step ahead from the previous state.
        #If previous z is given, we predict with that as input. Otherwise, we dream from the previous output we generated.
        print("Test")
        self.frame_count += 1
        prev_z = np.zeros((1, 1, self.outwidth))
        if len(previous_z) > 0:
            prev_z[0][0] = previous_z
        else:
            prev_z[0][0] = self.z

        prev_action = np.zeros((1, 1))
        prev_action[0] = action

        prev_restart = np.ones((1, 1))
        prev_restart[0] = self.restart

        s_model = self.rnn

        feed = {
            s_model.input_z: prev_z,
            s_model.input_action: prev_action,
            s_model.input_restart: prev_restart,
            s_model.initial_state: self.rnn_state
        }

        [logmix, mean, logstd, logrestart, next_state] = s_model.sess.run([
            s_model.out_logmix, s_model.out_mean, s_model.out_logstd,
            s_model.out_restart_logits, s_model.final_state
        ], feed)

        OUTWIDTH = self.outwidth
        # adjust temperatures
        logmix2 = np.copy(logmix) / self.temperature
        logmix2 -= logmix2.max()
        logmix2 = np.exp(logmix2)
        logmix2 /= logmix2.sum(axis=1).reshape(OUTWIDTH, 1)

        mixture_idx = np.zeros(OUTWIDTH)
        chosen_mean = np.zeros(OUTWIDTH)
        chosen_logstd = np.zeros(OUTWIDTH)
        for j in range(OUTWIDTH):
            idx = get_pi_idx(np_random.rand(), logmix2[j])
            mixture_idx[j] = idx
            chosen_mean[j] = mean[j][idx]
            chosen_logstd[j] = logstd[j][idx]

        rand_gaussian = np_random.randn(OUTWIDTH) * np.sqrt(self.temperature)
        next_z = chosen_mean + np.exp(chosen_logstd) * rand_gaussian
        self.restart = 0
        next_restart = 0  #Never telling it that we got a restart.
        #if (logrestart[0] > 0):
        #next_restart = 1

        self.z = next_z
        self.restart = next_restart
        self.rnn_state = next_state

        return next_z, logmix2