Exemple #1
1
def test_environment():
    game = DoomGame()
    # https://github.com/simoninithomas/Deep_reinforcement_learning_Course/blob/master/Deep%20Q%20Learning/Doom/basic.cfg
    game.load_config('basic.cfg')
    game.set_doom_scenario_path('basic.wad')
    game.init()
    shoot = [0, 0, 1]
    left = [1, 0, 0]
    right = [0, 1, 0]
    actions = [shoot, left, right]

    episodes = 10
    for i in range(episodes):
        game.new_episode()
        while not game.is_episode_finished():
            state = game.get_state()
            img = state.screen_buffer
            misc = state.game_variables
            action = random.choice(actions)
            print('Action', action)
            reward = game.make_action(action)
            print('Reward', reward)
            time.sleep(0.02)
        print('Result', game.get_total_reward())
        time.sleep(2)
    game.close()
Exemple #2
1
class DoomEnv(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 35
    }

    def __init__(self, level):
        self.previous_level = -1
        self.level = level
        self.game = DoomGame()
        self.loader = Loader()
        self.doom_dir = os.path.dirname(os.path.abspath(__file__))
        self._mode = 'algo'  # 'algo' or 'human'
        self.no_render = False  # To disable double rendering in human mode
        self.viewer = None
        self.is_initialized = False  # Indicates that reset() has been called
        self.curr_seed = 0
        self.lock = (DoomLock()).get_lock()
        # self.action_space = spaces.Discrete(43)   # used to be in the old code
        self.action_space = spaces.MultiBinary(NUM_ACTIONS)
        self.allowed_actions = list(range(NUM_ACTIONS))
        self.screen_height = 120
        self.screen_width = 160
        self.screen_resolution = ScreenResolution.RES_160X120
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_height,
                                                   self.screen_width, 3),
                                            dtype=np.uint8)
        self.seed()
        self._configure()

    def _configure(self, lock=None, **kwargs):
        if 'screen_resolution' in kwargs:
            logger.warn(
                'Deprecated - Screen resolution must now be set using a wrapper. See documentation for details.'
            )
        # Multiprocessing lock
        if lock is not None:
            self.lock = lock

    def _load_level(self):
        # Closing if is_initialized
        if self.is_initialized:
            self.is_initialized = False
            self.game.close()
            self.game = DoomGame()

        # Customizing level
        if getattr(self, '_customize_game', None) is not None and callable(
                self._customize_game):
            self.level = -1
            self._customize_game()

        else:
            # Loading Paths
            if not self.is_initialized:
                self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
                self.game.set_doom_game_path(self.loader.get_freedoom_path())

            # Common settings
            self.game.load_config(
                os.path.join(self.doom_dir,
                             'assets/%s' % DOOM_SETTINGS[self.level][CONFIG]))
            self.game.set_doom_scenario_path(
                self.loader.get_scenario_path(
                    DOOM_SETTINGS[self.level][SCENARIO]))
            if DOOM_SETTINGS[self.level][MAP] != '':
                if RANDOMIZE_MAPS > 0 and 'labyrinth' in DOOM_SETTINGS[
                        self.level][CONFIG].lower():
                    if 'fix' in DOOM_SETTINGS[self.level][SCENARIO].lower():
                        # mapId = 'map%02d'%np.random.randint(1, 23)
                        mapId = 'map%02d' % np.random.randint(4, 8)
                    else:
                        mapId = 'map%02d' % np.random.randint(
                            1, RANDOMIZE_MAPS + 1)
                    print(
                        '\t=> Special Config: Randomly Loading Maps. MapID = '
                        + mapId)
                    self.game.set_doom_map(mapId)
                else:
                    print('\t=> Default map loaded. MapID = ' +
                          DOOM_SETTINGS[self.level][MAP])
                    self.game.set_doom_map(DOOM_SETTINGS[self.level][MAP])
            self.game.set_doom_skill(DOOM_SETTINGS[self.level][DIFFICULTY])
            self.allowed_actions = DOOM_SETTINGS[self.level][ACTIONS]
            self.game.set_screen_resolution(self.screen_resolution)

        self.previous_level = self.level
        self._closed = False

        # Algo mode
        if 'human' != self._mode:
            if NO_MONSTERS:
                print('\t=> Special Config: Monsters Removed.')
                self.game.add_game_args('-nomonsters 1')
            self.game
            self.game.set_window_visible(False)
            self.game.set_mode(Mode.PLAYER)
            self.no_render = False
            try:
                with self.lock:
                    self.game.init()
            except (ViZDoomUnexpectedExitException, ViZDoomErrorException):
                raise error.Error(
                    'VizDoom exited unexpectedly. This is likely caused by a missing multiprocessing lock. '
                    +
                    'To run VizDoom across multiple processes, you need to pass a lock when you configure the env '
                    +
                    '[e.g. env.configure(lock=my_multiprocessing_lock)], or create and close an env '
                    +
                    'before starting your processes [e.g. env = gym.make("DoomBasic-v0"); env.close()] to cache a '
                    + 'singleton lock in memory.')
            self._start_episode()
            self.is_initialized = True
            return self.game.get_state().screen_buffer.copy()

        # Human mode
        else:
            if NO_MONSTERS:
                print('\t=> Special Config: Monsters Removed.')
                self.game.add_game_args('-nomonsters 1')
            self.game.add_game_args('+freelook 1')
            self.game.set_window_visible(True)
            self.game.set_mode(Mode.SPECTATOR)
            self.no_render = True
            with self.lock:
                self.game.init()
            self._start_episode()
            self.is_initialized = True
            self._play_human_mode()
            return np.zeros(shape=self.observation_space.shape, dtype=np.uint8)

    def _start_episode(self):
        if self.curr_seed > 0:
            self.game.set_seed(self.curr_seed)
            self.curr_seed = 0
        self.game.new_episode()
        return

    def _play_human_mode(self):
        while not self.game.is_episode_finished():
            self.game.advance_action()
            state = self.game.get_state()
            total_reward = self.game.get_total_reward()
            info = self._get_game_variables(state.game_variables)
            info["TOTAL_REWARD"] = round(total_reward, 4)
            print('===============================')
            print('State: #' + str(state.number))
            print('Action: \t' + str(self.game.get_last_action()) +
                  '\t (=> only allowed actions)')
            print('Reward: \t' + str(self.game.get_last_reward()))
            print('Total Reward: \t' + str(total_reward))
            print('Variables: \n' + str(info))
            sleep(0.02857)  # 35 fps = 0.02857 sleep between frames
        print('===============================')
        print('Done')
        return

    def old_step(self, action):
        """
        action: a number in range 0..42

        We get this from the simontudo and his predecessors, it transforms
        a numeric action from space Discrete(43) into a indicator action .

        However, we can only press one button at the same time.
        """
        # Convert to array
        action_arr = np.zeros(NUM_ACTIONS, dtype=int)
        action_arr[action] = 1
        action = action_arr
        assert self.is_initialized, "Doom env not reset, call .reset()"
        # action is a list of numbers but DoomGame.make_action expects a list of ints
        if len(self.allowed_actions) > 0:
            list_action = [
                int(action[action_idx]) for action_idx in self.allowed_actions
            ]
        else:
            list_action = [int(x) for x in action]
        try:
            reward = self.game.make_action(list_action)
            state = self.game.get_state()

            if self.game.is_episode_finished():
                info = {"TOTAL_REWARD": round(self.game.get_total_reward(), 4)}
                is_finished = True
                return np.zeros(shape=self.observation_space.shape,
                                dtype=np.uint8), reward, is_finished, info
            else:
                info = self._get_game_variables(state.game_variables)
                info["TOTAL_REWARD"] = round(self.game.get_total_reward(), 4)
                is_finished = False
                return state.screen_buffer.copy(), reward, is_finished, info

        except vizdoom.vizdoom.ViZDoomIsNotRunningException:
            return np.zeros(shape=self.observation_space.shape,
                            dtype=np.uint8), 0, True, {}

    def step(self, action):
        """
        action: iterable of length 43, contains indicators of whether given buttons was pressed.

        Written by me.
        """
        list_action = [int(x) for x in action]

        try:
            reward = self.game.make_action(list_action)
            state = self.game.get_state()

            if self.game.is_episode_finished():
                info = {"TOTAL_REWARD": round(self.game.get_total_reward(), 4)}
                is_finished = True
                return np.zeros(shape=self.observation_space.shape,
                                dtype=np.uint8), reward, is_finished, info
            else:
                info = self._get_game_variables(state.game_variables)
                info["TOTAL_REWARD"] = round(self.game.get_total_reward(), 4)
                is_finished = False
                return state.screen_buffer.copy(), reward, is_finished, info

        except vizdoom.vizdoom.ViZDoomIsNotRunningException:
            return np.zeros(shape=self.observation_space.shape,
                            dtype=np.uint8), 0, True, {}

    def reset(self):
        if self.is_initialized and not self._closed:
            self._start_episode()
            screen_buffer = self.game.get_state().screen_buffer
            if screen_buffer is None:
                raise error.Error(
                    'VizDoom incorrectly initiated. This is likely caused by a missing multiprocessing lock. '
                    +
                    'To run VizDoom across multiple processes, you need to pass a lock when you configure the env '
                    +
                    '[e.g. env.configure(lock=my_multiprocessing_lock)], or create and close an env '
                    +
                    'before starting your processes [e.g. env = gym.make("DoomBasic-v0"); env.close()] to cache a '
                    + 'singleton lock in memory.')
            return screen_buffer.copy()
        else:
            return self._load_level()

    def render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None  # If we don't None out this reference pyglet becomes unhappy
            return
        try:
            if 'human' == mode and self.no_render:
                return
            state = self.game.get_state()
            img = state.screen_buffer
            # VizDoom returns None if the episode is finished, let's make it
            # an empty image so the recorder doesn't stop
            if img is None:
                img = np.zeros(shape=self.observation_space.shape,
                               dtype=np.uint8)
            if mode == 'rgb_array':
                return img
            elif mode is 'human':
                from gym.envs.classic_control import rendering
                if self.viewer is None:
                    self.viewer = rendering.SimpleImageViewer()
                self.viewer.imshow(img)
        except vizdoom.vizdoom.ViZDoomIsNotRunningException:
            return np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
        except AttributeError:
            return np.zeros(shape=self.observation_space.shape, dtype=np.uint8)

    def close(self):
        # Lock required for VizDoom to close processes properly
        with self.lock:
            self.game.close()

    def seed(self, seed=None):
        self.curr_seed = seeding.hash_seed(seed) % 2**32
        return [self.curr_seed]

    def _get_game_variables(self, state_variables):
        info = {"LEVEL": self.level}
        if state_variables is None:
            return info
        info['KILLCOUNT'] = state_variables[0]
        info['ITEMCOUNT'] = state_variables[1]
        info['SECRETCOUNT'] = state_variables[2]
        info['FRAGCOUNT'] = state_variables[3]
        info['HEALTH'] = state_variables[4]
        info['ARMOR'] = state_variables[5]
        info['DEAD'] = state_variables[6]
        info['ON_GROUND'] = state_variables[7]
        info['ATTACK_READY'] = state_variables[8]
        info['ALTATTACK_READY'] = state_variables[9]
        info['SELECTED_WEAPON'] = state_variables[10]
        info['SELECTED_WEAPON_AMMO'] = state_variables[11]
        info['AMMO1'] = state_variables[12]
        info['AMMO2'] = state_variables[13]
        info['AMMO3'] = state_variables[14]
        info['AMMO4'] = state_variables[15]
        info['AMMO5'] = state_variables[16]
        info['AMMO6'] = state_variables[17]
        info['AMMO7'] = state_variables[18]
        info['AMMO8'] = state_variables[19]
        info['AMMO9'] = state_variables[20]
        info['AMMO0'] = state_variables[21]
        return info
Exemple #3
1
class DoomEnv(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 35
    }

    def __init__(self, level='deathmatch', obs_type='ram'):
        # super(DoomEnv, self).__init__()
        EzPickle.__init__(self, level.split('.')[0], obs_type)
        assert obs_type in ('ram', 'image')
        level = level.split('.')[0]
        Config.init(level)

        self.curr_seed = 0
        self.game = DoomGame()
        self.lock = (DoomLock()).get_lock()

        self.level = level
        self.obs_type = obs_type
        self.tick = 4

        self._mode = 'algo'

        self.is_render_in_human_mode = True
        self.is_game_initialized = False
        self.is_level_loaded = False

        self.viewer = None

        self.set_game(self.level, resolution=None, render=True)
        print()

    # todo: add frame skip option by using tick
    def step(self, action):
        reward = 0.0
        # self.tick = 4
        if self._mode == 'algo':
            if self.tick:
                reward = self.game.make_action(action, self.tick)
            else:
                reward = self.game.make_action(action)

            # self.game.set_action(action)
            # self.game.advance_action(4)
            # reward = self.game.get_last_reward()

        return self.get_obs(), reward, self.isDone(), self.get_info()

    def reset(self):
        if not self.is_game_initialized:
            self.__load_level()
            self.__init_game()

        self.__start_episode()
        return self.get_obs()

    def render(self, mode='human', **kwargs):
        if 'close' in kwargs and kwargs['close']:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return

        if mode == 'human' and not self.is_render_in_human_mode:
            return
        img = self.get_image()

        if mode == 'rgb_array':
            return img
        elif mode is 'human':
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def close(self):
        with self.lock:
            self.game.close()

    def seed(self, seed=None):
        self.curr_seed = seeding.hash_seed(seed) % 2**32
        return [self.curr_seed]

    # ================================== GETTERS SETTERS ===============================================================
    def set_game(self, level, resolution, render):
        self.__configure()
        self.__load_level(level)
        self.__set_resolution(resolution)
        self.__set_obs_and_ac_space()
        self.__set_player(render)

    def __configure(self, lock=None, **kwargs):
        self.seed()
        if lock is not None:
            self.lock = lock

    def __load_level(self, level=None):
        if level is not None:
            self.level = level.split('.')[0]
            self.is_level_loaded = False

        if self.is_level_loaded:
            return
        if self.is_game_initialized:
            self.is_game_initialized = False
            self.game.close()
            self.game = DoomGame()

        if not self.is_game_initialized:
            self.game.set_vizdoom_path(Config.VIZDOOM_PATH)
            self.game.set_doom_game_path(Config.FREEDOOM_PATH)

        # Common settings
        self.record_file_path = Config.RECORD_FILE_PATH
        self.game.load_config(Config.VIZDOOM_SCENARIO_PATH +
                              Config.DOOM_SETTINGS[self.level][Config.CONFIG])
        self.game.set_doom_scenario_path(
            Config.VIZDOOM_SCENARIO_PATH +
            Config.DOOM_SETTINGS[self.level][Config.SCENARIO])

        if Config.DOOM_SETTINGS[self.level][Config.MAP] != '':
            self.game.set_doom_map(
                Config.DOOM_SETTINGS[self.level][Config.MAP])
        self.game.set_doom_skill(
            Config.DOOM_SETTINGS[self.level][Config.DIFFICULTY])

        self.allowed_actions = Config.DOOM_SETTINGS[self.level][Config.ACTIONS]
        self.available_game_variables = Config.DOOM_SETTINGS[self.level][
            Config.GAME_VARIABLES]

        self.is_level_loaded = True

    def __set_resolution(self, resolution=None):
        if resolution is None:
            resolution = Config.DEFAULT_SCREEN_RESOLUTION
        resolution_l = resolution.lower()
        if resolution_l not in resolutions:
            raise gym.error.Error(
                'Error - The specified resolution "{}" is not supported by Vizdoom.\n The list of valid'
                'resolutions: {}'.format(resolution, resolutions))
        if '_' in resolution_l:
            resolution_l = resolution_l.split('_')[1]
        self.scr_width = int(resolution_l.split("x")[0])
        self.scr_height = int(resolution_l.split("x")[1])
        self.game.set_screen_resolution(
            getattr(ScreenResolution,
                    'RES_{}X{}'.format(self.scr_width, self.scr_height)))

        self.screen_format = self.game.get_screen_format()
        self.screen_height = self.game.get_screen_height()
        self.screen_width = self.game.get_screen_width()

    def __set_obs_and_ac_space(self):
        if self.obs_type == 'ram':
            self.observation_space = spaces.Box(
                low=0,
                high=255,
                dtype=np.uint8,
                shape=(len(self.available_game_variables), ))
        elif self.obs_type == 'image':
            # self.observation_space = self.screen_resized
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self.scr_height,
                                                       self.scr_width, 3),
                                                dtype=np.uint8)
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self.obs_type))

        if self.screen_format in inverted_screen_formats:
            self.dummy_screen = np.zeros(shape=(3, self.scr_height,
                                                self.scr_width),
                                         dtype=np.uint8)
        else:
            self.dummy_screen = np.zeros(shape=(self.scr_height,
                                                self.scr_width, 3),
                                         dtype=np.uint8)

        self.dummy_ram = [0] * len(self.available_game_variables)

        self.available_action_codes = [
            list(a)
            for a in it.product([0, 1],
                                repeat=self.game.get_available_buttons_size())
        ]
        # self.__delete_conflict_actions()
        self.action_space = spaces.MultiDiscrete(
            [len(self.available_action_codes)])

    def __set_player(self, render=True):
        self.game.set_window_visible(render)
        self.game.set_mode(Mode.PLAYER)

    def __init_game(self):
        try:
            with self.lock:
                self.game.init()
                self.is_game_initialized = True
        except (ViZDoomUnexpectedExitException, ViZDoomErrorException):
            raise error.Error('Could not start the game.')

    def __start_episode(self):
        if self.curr_seed > 0:
            self.game.set_seed(self.curr_seed)
            self.curr_seed = 0
        if self.record_file_path:
            self.game.new_episode(self.record_file_path)
        else:
            self.game.new_episode()
        return

    def getState(self):
        return self.game.get_state()

    def getLastAction(self):
        return self.game.get_last_action()

    def getButtonsNames(self, action):
        return action_to_buttons(self.allowed_actions, action)

    def get_info(self):
        info = {
            "LEVEL": self.level,
            "TOTAL_REWARD": round(self.game.get_total_reward(), 4)
        }

        state_variables = self.get_ram()
        for i in range(len(self.available_game_variables)):
            info[self.available_game_variables[i]] = state_variables[i]

        return info

    def get_ram(self):
        if not self.is_game_initialized:
            raise NotImplementedError(
                "The game was not initialized. Run env.reset() first!")
        try:
            ram = self.getState().game_variables
        except AttributeError:
            ram = self.dummy_ram
        return ram

    def get_image(self):
        try:
            screen = self.getState().screen_buffer.copy()
        except AttributeError:
            screen = self.dummy_screen
        return self.invert_screen(screen)

    def get_obs(self):
        if self.obs_type == 'ram':
            return self.get_ram()
        elif self.obs_type == 'image':
            return self.get_image()

    def isDone(self):
        return self.game.is_episode_finished() or self.game.is_player_dead(
        ) or self.getState() is None

    # ===========================================  ==============================================================

    def invert_screen(self, img):
        if self.screen_format in inverted_screen_formats:
            return np.rollaxis(img, 0, 3)
        else:
            return img

    def __delete_conflict_actions(self):
        if self._mode == 'human':
            return
        action_codes_copy = self.available_action_codes.copy()

        print("Initial actions size: " + str(len(action_codes_copy)))
        for i in tqdm.trange(len(self.available_action_codes)):
            action = self.available_action_codes[i]
            ac_names = action_to_buttons(self.allowed_actions, action)

            if all(elem in ac_names
                   for elem in ['MOVE_LEFT', 'MOVE_RIGHT']) or all(
                       elem in ac_names
                       for elem in ['MOVE_BACKWARD', 'MOVE_FORWARD']) or all(
                           elem in ac_names
                           for elem in ['TURN_RIGHT', 'TURN_LEFT']) or all(
                               elem in ac_names for elem in
                               ['SELECT_NEXT_WEAPON', 'SELECT_PREV_WEAPON']):
                action_codes_copy.remove(action)

        print("Final actions size: " + str(len(action_codes_copy)))
        self.available_action_codes = action_codes_copy

    def __initHumanPlayer(self):
        self._mode = 'human'
        self.__load_level()

        self.game.add_game_args('+freelook 1')
        self.game.set_window_visible(True)
        self.game.set_mode(Mode.SPECTATOR)
        self.is_render_in_human_mode = False

        self.__init_game()

    def advanceAction(self, tick=0):
        try:
            if tick:
                self.game.advance_action(tick)
            else:
                self.game.advance_action()
            return True
        except ViZDoomUnexpectedExitException:
            return False

    def playHuman(self):
        self.__initHumanPlayer()

        while not self.game.is_episode_finished(
        ) and not self.game.is_player_dead():
            self.advanceAction()

            state = self.getState()
            if state is None:
                if self.record_file_path is None:
                    self.game.new_episode()
                else:
                    self.game.new_episode(self.record_file_path)
                state = self.getState()

            total_reward = self.game.get_total_reward()
            info = self.get_info()
            info["TOTAL_REWARD"] = round(total_reward, 4)
            print('===============================')
            print('State: #' + str(state.number))
            print('Action: \t' + str(self.game.get_last_action()) +
                  '\t (=> only allowed actions)')
            print('Reward: \t' + str(self.game.get_last_reward()))
            print('Total Reward: \t' + str(total_reward))
            print('Variables: \n' + str(info))
            sleep(0.02857)  # 35 fps = 0.02857 sleep between frames
        print('===============================')
        print('Done')
        return
Exemple #4
0
class DoomEnvironment:
    def __init__(self, config, visible, skiprate):
        self._game = DoomGame()
        self._game.load_config(config)
        self._game.set_window_visible(visible)
        self._game.set_mode(Mode.PLAYER)
        self._game.init()

        n_actions = self._game.get_available_buttons_size()
        self._actions = [list(a) for a in it.product([0, 1], repeat=n_actions)]
        self._skiprate = skiprate

    def make_visible(self):
        self._game.close()
        self._game.set_window_visible(True)
        self._game.set_mode(Mode.ASYNC_PLAYER)
        self._game.init()

    def get_n_buttons(self):
        return self._game.get_available_buttons_size()

    def observe(self):
        observation = self._game.get_state()
        screen = observation.screen_buffer
        game_variables = observation.game_variables
        return screen, game_variables

    def step(self, action_id):
        """Takes id of single action and performs it for self.skiprate frames

        :param action_id: index of action to perform
        :return: reward, is_done
        """
        reward = self._game.make_action(self._actions[action_id],
                                        self._skiprate)
        return reward, self._game.is_episode_finished()

    def advance_action_step(self, action_id):
        """Takes id of single action and performs it for self.skiprate frames
        and renders every frame

        :param action_id: index of action to perform
        :return: is_done
        """
        reward = 0.0
        for _ in range(self._skiprate):
            reward += self._game.make_action(self._actions[action_id])
            # it is vital to break if done for correct reward shaping
            if self._game.is_episode_finished():
                break
        return reward, self._game.is_episode_finished()

    def reset(self):
        self._game.new_episode()

    def get_episode_reward(self):
        """Careful! Returns ___non-shaped___ episode reward"""
        return self._game.get_total_reward()
Exemple #5
0
        s = game.get_state()
        img = s.image_buffer
        misc = s.game_variables

        # Makes a random action and save the reward.
        r = game.make_action(choice(actions))

        # Makes a "prolonged" action and skip frames:
        # skiprate = 3
        # r = game.make_action(choice(actions), skiprate)

        # The same could be achieved with:
        # game.set_action(choice(actions))
        # skiprate = 3
        # game.advance_action(skiprate)
        # r = game.get_last_reward()

        print("State #" + str(s.number))
        print("Game Variables:", misc)
        print("Performed action:", game.get_last_action())
        print("Last Reward:", r)
        print("=====================")

        # Sleep some time because processing is too fast to watch.
        if sleep_time > 0:
            sleep(sleep_time)

    print("Episode finished!")
    print("total reward:", game.get_total_reward())
    print("************************")
class DoomScenario:
    """
    DoomScenario class runs instances of Vizdoom according to scenario
    configuration (.cfg) files.

    Scenario Configuration files for this project are located in
    the /src/configs/ folder.

    """
    def __init__(self, config_filename):
        '''
        Method initiates Vizdoom with desired configuration file.

        '''
        self.config_filename = config_filename
        self.game = DoomGame()
        self.game.load_config("configs/" + config_filename)
        self.game.set_window_visible(False)
        self.game.init()

        self.res = (self.game.get_screen_height(),
                    self.game.get_screen_width())
        self.actions = [
            list(a)
            for a in it.product([0, 1],
                                repeat=self.game.get_available_buttons_size())
        ]

        self.pbar = None
        self.game.new_episode()

    def play(self, action, tics):
        '''
        Method advances state with desired action for a number of tics.

        '''
        self.game.set_action(action)
        self.game.advance_action(tics, True)
        if self.pbar: self.pbar.update(int(tics))

    def get_processed_state(self, depth_radius, depth_contrast):
        '''
        Method processes the Vizdoom RGB and depth buffer into
        a composite one channel image that can be used by the Models.

        depth_radius defines how far the depth buffer sees with 1.0 being
        as far as ViZDoom allows.

        depth_contrast defines how much of the depth buffer is in the final
        processed image as compared to the greyscaled RGB buffer.
        **processed = (1-depth_contrast)* grey_buffer + depth_contrast*depth_buffer

        '''
        state = self.game.get_state()
        if not self.game.is_episode_finished():
            img = state.screen_buffer  # screen pixels
            # print(img)
            screen_buffer = np.array(img).astype('float32') / 255
            # print(screen_buffer.shape)    # (3, 120, 160)
        try:
            # Grey Scaling
            grey_buffer = np.dot(np.transpose(screen_buffer, (1, 2, 0)),
                                 [0.21, 0.72, 0.07])
            # print(grey_buffer.shape)     # (120, 160)

            # Depth Radius
            depth_buffer = np.array(state.depth_buffer).astype('float32') / 255
            depth_buffer[(depth_buffer >
                          depth_radius)] = depth_radius  #Effects depth radius
            depth_buffer_filtered = (depth_buffer - np.amin(depth_buffer)) / (
                np.amax(depth_buffer) - np.amin(depth_buffer))

            # Depth Contrast
            processed_buffer = (
                (1 - depth_contrast) * grey_buffer) + (depth_contrast *
                                                       (1 - depth_buffer))
            processed_buffer = (processed_buffer - np.amin(processed_buffer)
                                ) / (np.amax(processed_buffer) -
                                     np.amin(processed_buffer))
            processed_buffer = np.round(processed_buffer, 6)
            processed_buffer = processed_buffer.reshape(self.res[-2:])
        except:
            processed_buffer = np.zeros(self.res[-2:])
        return processed_buffer  # balance the depth & RGB data

    def run(self, agent, save_replay='', verbose=False, return_data=False):
        '''
        Method runs a instance of DoomScenario.

        '''
        if return_data:
            data_S = []
            data_a = []
        if verbose:
            print("\nRunning Simulation:", self.config_filename)
            self.pbar = tqdm(total=self.game.get_episode_timeout())

        # Initiate New Instance
        self.game.close()
        self.game.set_window_visible(False)
        self.game.add_game_args("+vid_forcesurface 1 ")
        self.game.init()
        if save_replay != '':
            self.game.new_episode("../data/replay_data/" + save_replay)
        else:
            self.game.new_episode()

        # Run Simulation
        while not self.game.is_episode_finished():
            S = agent.get_state_data(self)
            q = agent.model.online_network.predict(S)
            if np.random.random() < 0.1:
                q = np.random.choice(len(q[0]), 1, p=softmax(q[0], 1))[0]
            else:
                q = int(np.argmax(q[0]))
            a = agent.model.predict(self, q)
            if return_data:
                delta = np.zeros((len(self.actions)))
                a_ = np.cast['int'](a)
                delta[a_] = 1
                data_S.append(S.reshape(S.shape[1], S.shape[2], S.shape[3]))
                data_a.append(delta)
            if not self.game.is_episode_finished():
                self.play(a, agent.frame_skips + 1)
            if agent.model.__class__.__name__ == 'HDQNModel' and not self.game.is_episode_finished(
            ):
                if q >= len(agent.model.actions):
                    for i in range(agent.model.skill_frame_skip):
                        if not self.game.is_episode_finished():
                            a = agent.model.predict(self, q)
                            self.play(a, agent.frame_skips + 1)
                        else:
                            break

        # Reset Agent and Return Score
        agent.frames = None
        if agent.model.__class__.__name__ == 'HDQNModel':
            agent.model.sub_model_frames = None
        score = self.game.get_total_reward()
        if verbose:
            self.pbar.close()
            print("Total Score:", score)
        if return_data:
            data_S = np.array(data_S)
            data_a = np.array(data_a)
            return [data_S, data_a]
        return score

    def replay(self, filename, verbose=False, doom_like=False):
        '''
        Method runs a replay of the simulations at 800 x 600 resolution.

        '''
        print("\nRunning Replay:", filename)

        # Initiate Replay
        self.game.close()
        self.game.set_screen_resolution(ScreenResolution.RES_800X600)
        self.game.set_window_visible(True)
        self.game.add_game_args("+vid_forcesurface 1")
        if doom_like:
            self.game.set_render_hud(True)
            self.game.set_render_minimal_hud(False)
            self.game.set_render_crosshair(False)
            self.game.set_render_weapon(True)
            self.game.set_render_particles(True)
        self.game.init()
        self.game.replay_episode("../data/replay_data/" + filename)

        # Run Replay
        while not self.game.is_episode_finished():
            if verbose: print("Reward:", self.game.get_last_reward())
            self.game.advance_action()

        # Print Score
        score = self.game.get_total_reward()
        print("Total Score:", score)
        self.game.close()

    def apprentice_run(self, test=False):
        '''
        Method runs an apprentice data gathering.

        '''
        # Initiate New Instance
        self.game.close()
        self.game.set_mode(Mode.SPECTATOR)
        self.game.set_screen_resolution(ScreenResolution.RES_800X600)
        self.game.set_window_visible(True)
        self.game.set_ticrate(30)
        self.game.init()
        self.game.new_episode()

        # Run Simulation
        while not self.game.is_episode_finished():
            self.game.advance_action()
        self.game.close()
Exemple #7
0
                    # Every episode, agent learns from sample returns
                    loss = agent.train_model()

                # Save model every 10000 iterations
                if t % 2000 == 0:
                    print("Save model")
                    agent.model.save_weights(model_path, overwrite=True)

                state = ""
                if t <= agent.observe:
                    state = "Observe mode"
                else:
                    state = "Train mode"

                if (is_terminated):
                    total_reward = game.get_total_reward()
                    # Print performance statistics at every episode end
                    print("Episode", i, "/ ACTION", action_idx,
                          "/ total reward", total_reward, "/ LOSS", loss)

                    with open(REWARDS_FILE, 'a') as fp:
                        fp.write('{},{}\n'.format(i, total_reward))

                    # Save Agent's Performance Statistics
                    if GAME % agent.stats_window_size == 0 and t > agent.observe:
                        print("Update Rolling Statistics")
                        agent.mavg_score.append(np.mean(np.array(life_buffer)))
                        agent.var_score.append(np.var(np.array(life_buffer)))

                        # Reset rolling stats buffer
                        life_buffer = []
    def play(self):

        # Create DoomGame instance. It will run the game and communicate with you.
        print("Initializing doom...")
        game = DoomGame()

        game.load_config("./examples/config/deepdoomplayer.cfg")
        game.init()
        print("Doom initialized.")

        episodes = 1
        training_steps_per_epoch = 100

        sleep_time = 0.100

        train_episodes_finished = 0
        train_rewards = []

        for epoch in range(episodes):

            train_loss = []

            game.new_episode()

            while (train_episodes_finished < 20):

                sleep(sleep_time)

                if game.is_episode_finished():

                    r = game.get_total_reward()
                    train_rewards.append(r)
                    game.new_episode()
                    train_episodes_finished += 1
                    self._last_state = None
                    self.last_action[1] = 1

                # first frame must be handled differently
                if self.last_state is None:
                    # the _last_state will contain the image data from the last self.state_frames frames
                    self.last_state = np.stack(tuple(
                        self.convert_image(game.get_state().image_buffer)
                        for _ in range(self.state_frames)),
                                               axis=2)
                    continue

                reward = game.make_action(
                    DeepDoomPlayer.define_keys_to_action_pressed(
                        self.last_action), 7)

                reward *= 0.01

                imagebuffer = game.get_state().image_buffer

                if imagebuffer is None:
                    terminal = True
                    screen_resized_binary = np.zeros((40, 40))

                imagebufferlast = imagebuffer

                if imagebuffer is not None:
                    terminal = False
                    screen_resized_binary = self.convert_image(imagebuffer)

                # add dimension
                screen_resized_binary = np.expand_dims(screen_resized_binary,
                                                       axis=2)

                current_state = np.append(self.last_state[:, :, 1:],
                                          screen_resized_binary,
                                          axis=2)

                self.last_state = current_state

                self.last_action = self.choose_next_action_only_on_q()

            print(train_episodes_finished, "training episodes played.")
            print("Training results:")

            train_rewards = np.array(train_rewards)

            print("mean:", train_rewards.mean(), "std:", train_rewards.std(),
                  "max:", train_rewards.max(), "min:", train_rewards.min())

        # It will be done automatically anyway but sometimes you need to do it in the middle of the program...
        game.close()
        self._last_state = None
    def play(self):
        
        # Create DoomGame instance. It will run the game and communicate with you.
        print ("Initializing doom...")
        game = DoomGame()

        game.load_config("./examples/config/deepdoomplayer.cfg")
        game.init()
        print ("Doom initialized.")
 
        episodes = 1
        training_steps_per_epoch = 100

        sleep_time = 0.100

        train_episodes_finished = 0
        train_rewards = []
        
        for epoch in range(episodes):
           
            train_loss = []
            
            game.new_episode()
        
            while(train_episodes_finished < 20 ):
        
                sleep(sleep_time)   

                if game.is_episode_finished():
                    
                    r = game.get_total_reward()
                    train_rewards.append(r)
                    game.new_episode()
                    train_episodes_finished += 1
                    self._last_state = None
                    self.last_action[1] = 1

                # first frame must be handled differently
                if self.last_state is None:
                    # the _last_state will contain the image data from the last self.state_frames frames
                    self.last_state = np.stack(tuple(self.convert_image(game.get_state().image_buffer) for _ in range(self.state_frames)), axis=2)
                    continue

                
                reward = game.make_action(DeepDoomPlayer.define_keys_to_action_pressed(self.last_action), 7)
           
                reward *= 0.01

                imagebuffer = game.get_state().image_buffer

                if imagebuffer is None:
                    terminal = True
                    screen_resized_binary =  np.zeros((40,40))
                    
                imagebufferlast = imagebuffer 
                    
                if imagebuffer is not None: 
                    terminal = False
                    screen_resized_binary = self.convert_image(imagebuffer)
                
                # add dimension
                screen_resized_binary = np.expand_dims(screen_resized_binary, axis=2)

                current_state = np.append(self.last_state[:, :, 1:], screen_resized_binary, axis=2)

                self.last_state = current_state

                self.last_action = self.choose_next_action_only_on_q()

            print (train_episodes_finished, "training episodes played.")
            print ("Training results:")
            
            train_rewards = np.array(train_rewards)
  
            print ("mean:", train_rewards.mean(), 
                   "std:", train_rewards.std(), 
                   "max:", train_rewards.max(), 
                   "min:", train_rewards.min())
           
            
        # It will be done automatically anyway but sometimes you need to do it in the middle of the program...
        game.close()
        self._last_state = None
    def start(self):
        """
         this will get passed hier
        """
        
        
        # Create DoomGame instance. It will run the game and communicate with you.
        print ("Initializing doom...")
        game = DoomGame()
        game.load_config("./examples/config/learningtensorflow.cfg")
        game.init()
        print ("Doom initialized.")
        train_rewards = []
        
        
        for epoch in range(DeepDoom.episodes):
            print ("\nEpoch", epoch)
            train_time = 0
            train_episodes_finished = 0
            train_loss = []
            
            
                        
            #start saving after 20 epoch
            if epoch > 20:
                 if not os.path.exists(DeepDoom.checkpoint_path):
                    os.mkdir(DeepDoom.checkpoint_path)
                 self.saver.save(self.session, DeepDoom.checkpoint_path, global_step=epoch )
   
                 

            train_start = time()

            game.new_episode()
        
            for learning_step in tqdm(range(DeepDoom.training_steps_per_epoch)):


                if game.is_episode_finished():
                    #print("game is finished")
                    r = game.get_total_reward()
                    train_rewards.append(r)
                    game.new_episode()
                    train_episodes_finished += 1
                    self.last_state = None
                    #sleep(sleep_time)

                
                # first frame must be handled differently
                if self.last_state is None:
                    #print ("ich bin hier")
                    # the last_state will contain the image data from the last self.state_frames frames
                    self.last_state = np.stack(tuple(self.convert_image(game.get_state().image_buffer) for _ in range(self.state_frames)), axis=2)
                    continue
 
                reward = game.make_action(DeepDoom.define_keys_to_action_pressed(self.last_action), 7)
 
                reward *= 0.01
         
                #if screen_array is not None:   
                imagebuffer = game.get_state().image_buffer

                if imagebuffer is None:
                    terminal = True
                    #print(reward)
                    screen_resized_binary =  np.zeros((40,40))
                    
                imagebufferlast = imagebuffer 
                    
                if imagebuffer is not None: 
                    terminal = False
                    screen_resized_binary = self.convert_image(imagebuffer)
                
                # add dimension
                screen_resized_binary = np.expand_dims(screen_resized_binary, axis=2)

                current_state = np.append(self.last_state[:, :, 1:], screen_resized_binary, axis=2)
        
                self.observations.append((self.last_state, self.last_action, reward, current_state, terminal))


                #zeugs.write("oberservations %s \n" %len(self.observations))

                if len(self.observations) > self.memory_size:
                    self.observations.popleft()
                    #sleep(sleep_time)

                # only train if done observing
                if len(self.observations) > self.observation_steps:
                    #print("train")
                    self.train()
                    self.time += 1
                
                self.last_state = current_state

                self.last_action = self.choose_next_action()
                
                
                if self.probability_of_random_action > self.final_random_action_prob \
                        and len(self.observations) > self.observation_steps:
                    self.probability_of_random_action -= \
                        (self.initial_random_action_prob - self.final_random_action_prob) / self.explore_steps
                        

            print (train_episodes_finished, "training episodes played.")
            print ("Training results:")

            train_rewards = np.array(train_rewards)
            
            train_end = time()
            train_time = train_end - train_start
            mean_loss = np.mean(train_loss)


            print ("mean:", train_rewards.mean(), "std:", train_rewards.std(), "max:", train_rewards.max(), "min:", train_rewards.min(),  "epsilon:", self.probability_of_random_action)
            print ("t:", str(round(train_time, 2)) + "s")
            train_rewards = []
            
            
        
        # It will be done automatically anyway but sometimes you need to do it in the middle of the program...
        game.close()
        self.last_state = None
Exemple #11
0
class VizdoomEnv(gym.Env):
    def __init__(self, level):

        # init game
        self.game = DoomGame()
        self.game.set_screen_resolution(ScreenResolution.RES_640X480)
        scenarios_dir = os.path.join(os.path.dirname(__file__), 'scenarios')
        self.game.load_config(os.path.join(scenarios_dir, CONFIGS[level][0]))
        self.game.set_window_visible(False)
        self.game.init()
        self.state = None

        self.action_space = spaces.Discrete(CONFIGS[level][1])
        self.observation_space = spaces.Box(
            0,
            255, (self.game.get_screen_height(), self.game.get_screen_width(),
                  self.game.get_screen_channels()),
            dtype=np.uint8)
        self.viewer = None

    def step(self, action):
        # convert action to vizdoom action space (one hot)
        act = np.zeros(self.action_space.n)
        act[action] = 1
        act = np.uint8(act)
        act = act.tolist()

        reward = self.game.make_action(act)
        state = self.game.get_state()
        done = self.game.is_episode_finished()
        info = {}
        if not done:
            observation = np.transpose(state.screen_buffer, (1, 2, 0))
        else:
            observation = np.uint8(np.zeros(self.observation_space.shape))
            info = {"episode": {"r": self.game.get_total_reward()}}

        return observation, reward, done, info

    def seed(self, seed):
        self.game.set_seed(seed)

    def close(self):
        self.game.close()

    def reset(self):
        self.game.new_episode()
        self.state = self.game.get_state()
        img = self.state.screen_buffer
        return np.transpose(img, (1, 2, 0))

    def render(self, mode='human'):
        try:
            img = self.game.get_state().screen_buffer
            img = np.transpose(img, [1, 2, 0])

            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)
        except AttributeError:
            pass

    @staticmethod
    def get_keys_to_action():
        # you can press only one key at a time!
        keys = {
            (): 2,
            (ord('a'), ): 0,
            (ord('d'), ): 1,
            (ord('w'), ): 3,
            (ord('s'), ): 4,
            (ord('q'), ): 5,
            (ord('e'), ): 6
        }
        return keys