Exemple #1
1
class DoomEnvironment:
    def __init__(self, scenario, path_to_config="doom/config"):
        self.game = DoomGame()
        self.game.load_config(path_to_config + "/" + scenario + ".cfg")
        self.game.set_doom_scenario_path(path_to_config + "/" + scenario +
                                         ".wad")
        self.game.set_window_visible(False)
        self.game.init()
        self.num_actions = len(self.game.get_available_buttons())

    def reset(self):
        self.game.new_episode()
        game_state = self.game.get_state()
        obs = game_state.screen_buffer
        self.h, self.w = obs.shape[1:3]
        self.current_obs = self.preprocess_obs(obs)
        if self.game.get_available_game_variables_size() == 2:
            self.ammo, self.health = game_state.game_variables
        return self.get_obs()

    def get_obs(self):
        return self.current_obs[:, :, None]

    def get_obs_rgb(self):
        img = self.game.get_state().screen_buffer
        img = np.rollaxis(img, 0, 3)
        img = np.reshape(img, [self.h, self.w, 3])
        return img.astype(np.uint8)

    def preprocess_obs(self, obs):
        img = np.rollaxis(obs, 0, 3)
        img = np.reshape(img, [self.h, self.w, 3]).astype(np.float32)
        img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :,
                                                                2] * 0.114
        img = Image.fromarray(img)
        img = img.resize((84, 84), Image.BILINEAR)
        img = np.array(img)
        return img.astype(np.uint8)

    def action_to_doom(self, a):
        action = [0 for i in range(self.num_actions)]
        action[int(a)] = 1
        return action

    def step(self, a):
        action = self.action_to_doom(a)
        reward = self.game.make_action(action)

        done = self.game.is_episode_finished()

        if done:
            new_obs = np.zeros_like(self.current_obs, dtype=np.uint8)
        else:
            game_state = self.game.get_state()
            new_obs = game_state.screen_buffer
            new_obs = self.preprocess_obs(new_obs)

        self.current_obs = new_obs

        return self.get_obs(), reward, done

    def watch_random_play(self, max_ep_length=1000, frame_skip=4):
        self.reset()
        for i in range(max_ep_length):
            a = np.random.randint(self.num_actions)
            obs, reward, done = self.step(a)
            if done: break

            img = self.get_obs_rgb()
            if i % frame_skip == 0:
                plt.imshow(img)
                display.clear_output(wait=True)
                display.display(plt.gcf())
Exemple #2
0
class ViZDoom(Environment):
    """
    [ViZDoom](https://github.com/mwydmuch/ViZDoom) environment adapter (specification key:
    `vizdoom`).

    Args:
        level (string): ViZDoom configuration file
            (<span style="color:#C00000"><b>required</b></span>).
        include_variables (bool): Whether to include game variables to state
            (<span style="color:#00C000"><b>default</b></span>: false).
        factored_action (bool): Whether to use factored action representation
            (<span style="color:#00C000"><b>default</b></span>: false).
        visualize (bool): Whether to visualize interaction
            (<span style="color:#00C000"><b>default</b></span>: false).
        frame_skip (int > 0): Number of times to repeat an action without observing
            (<span style="color:#00C000"><b>default</b></span>: 12).
        seed (int): Random seed
            (<span style="color:#00C000"><b>default</b></span>: none).
    """
    def __init__(self,
                 level,
                 visualize=False,
                 include_variables=False,
                 factored_action=False,
                 frame_skip=12,
                 seed=None):
        from vizdoom import DoomGame, Mode, ScreenFormat, ScreenResolution

        self.config_file = level
        self.include_variables = include_variables
        self.factored_action = factored_action
        self.visualize = visualize
        self.frame_skip = frame_skip

        self.environment = DoomGame()
        self.environment.load_config(self.config_file)
        if self.visualize:
            self.environment.set_window_visible(True)
            self.environment.set_mode(Mode.ASYNC_PLAYER)
        else:
            self.environment.set_window_visible(False)
            self.environment.set_mode(Mode.PLAYER)
        # e.g. CRCGCB, RGB24, GRAY8
        self.environment.set_screen_format(ScreenFormat.RGB24)
        # e.g. RES_320X240, RES_640X480, RES_1920X1080
        self.environment.set_screen_resolution(ScreenResolution.RES_640X480)
        self.environment.set_depth_buffer_enabled(False)
        self.environment.set_labels_buffer_enabled(False)
        self.environment.set_automap_buffer_enabled(False)
        if seed is not None:
            self.environment.setSeed(seed)
        self.environment.init()

        self.state_shape = (640, 480, 3)
        self.num_variables = self.environment.get_available_game_variables_size(
        )
        self.num_buttons = self.environment.get_available_buttons_size()
        self.actions = [
            tuple(a)
            for a in itertools.product([0, 1], repeat=self.num_buttons)
        ]

    def __str__(self):
        return super().__str__() + '({})'.format(self.config_file)

    def states(self):
        if self.include_variables:
            return OrderedDict(screen=dict(type='float',
                                           shape=self.state_shape),
                               variables=dict(type='float',
                                              shape=self.num_variables))
        else:
            return dict(type='float', shape=self.state_shape)

    def actions(self):
        if self.factored_action:
            return dict(type='bool', shape=self.num_buttons)
        else:
            return dict(type='int', shape=(), num_values=len(self.actions))

    def close(self):
        self.environment.close()
        self.environment = None

    def get_states(self):
        state = self.environment.get_state()
        screen = state.screen_buffer.astype(dtype=np.float32) / 255.0
        if self.include_variables:
            return OrderedDict(screen=screen, variables=state.game_variables)
        else:
            return screen

    def reset(self):
        self.environment.new_episode()
        return self.get_states()

    def execute(self, actions):
        if self.factored_action:
            action = np.where(actions, 1.0, 0.0)
        else:
            action = self.actions[actions]
        if self.visualize:
            self.environment.set_action(action)
            reward = 0.0
            for _ in range(self.frame_skip):
                self.environment.advance_action()
                reward += self.environment.get_last_reward()
        else:
            reward = self.environment.make_action(action, self.frame_skip)
        terminal = self.environment.is_episode_finished()
        states = self.get_states()
        return states, terminal, reward