def __init__(self, params): self.game = DoomGame() self.game.load_config("../scenarios/" + params.scenario + ".cfg") if params.model == 'human': self.game.set_mode(Mode.SPECTATOR) else: self.actions = create_actions(params.scenario)
def _create_doom_game(self, mode): self.game = DoomGame() self.game.load_config(self.config_path) self.game.set_screen_resolution(self.screen_resolution) self.game.set_seed(self.rng.randint(0, 2**32 - 1)) if mode == 'algo': self.game.set_window_visible(False) elif mode == 'human' or mode == 'replay': self.game.add_game_args('+freelook 1') self.game.set_window_visible(True) else: raise Exception('Unsupported mode') self._set_game_mode(mode)
class VizdoomEnv(gym.Env): def __init__(self, action_space, config_file, coord_limits=None, max_histogram_length=200, show_automap=False, skip_frames=1, async_mode=False, record_to=None): self.initialized = False # essential game data self.game = None self.state = None self.curr_seed = 0 self.rng = None self.skip_frames = skip_frames self.async_mode = async_mode # optional - for topdown view rendering and visitation heatmaps self.show_automap = show_automap self.coord_limits = coord_limits # can be adjusted after the environment is created (but before any reset() call) via observation space wrapper self.screen_w, self.screen_h, self.channels = 640, 480, 3 self.screen_resolution = ScreenResolution.RES_640X480 self.calc_observation_space() self.black_screen = None # provided as a part of environment definition, since these depend on the scenario and # can be quite complex multi-discrete spaces self.action_space = action_space self.composite_action_space = hasattr(self.action_space, 'spaces') self.delta_actions_scaling_factor = 7.5 scenarios_dir = join(os.path.dirname(__file__), 'scenarios') self.config_path = join(scenarios_dir, config_file) self.variable_indices = self._parse_variable_indices(self.config_path) # only created if we call render() method self.viewer = None # record full episodes using VizDoom recording functionality self.record_to = record_to self.is_multiplayer = False # overridden in derived classes # (optional) histogram to track positional coverage # do not pass coord_limits if you don't need this, to avoid extra calculation self.max_histogram_length = max_histogram_length self.current_histogram, self.previous_histogram = None, None if self.coord_limits: x = (self.coord_limits[2] - self.coord_limits[0]) y = (self.coord_limits[3] - self.coord_limits[1]) if x > y: len_x = self.max_histogram_length len_y = int((y / x) * self.max_histogram_length) else: len_x = int((x / y) * self.max_histogram_length) len_y = self.max_histogram_length self.current_histogram = np.zeros((len_x, len_y), dtype=np.int32) self.previous_histogram = np.zeros_like(self.current_histogram) # helpers for human play with pynput keyboard input self._terminate = False self._current_actions = [] self._actions_flattened = None self._prev_info = None self._last_episode_info = None self._num_episodes = 0 self.mode = 'algo' self.seed() def seed(self, seed=None): self.curr_seed = seeding.hash_seed(seed, max_bytes=4) self.rng, _ = seeding.np_random(seed=self.curr_seed) return [self.curr_seed, self.rng] def calc_observation_space(self): self.observation_space = gym.spaces.Box( 0, 255, (self.screen_h, self.screen_w, self.channels), dtype=np.uint8) def _set_game_mode(self, mode): if mode == 'replay': self.game.set_mode(Mode.PLAYER) else: if self.async_mode: log.info( 'Starting in async mode! Use this only for testing, otherwise PLAYER mode is much faster' ) self.game.set_mode(Mode.ASYNC_PLAYER) else: self.game.set_mode(Mode.PLAYER) def _create_doom_game(self, mode): self.game = DoomGame() self.game.load_config(self.config_path) self.game.set_screen_resolution(self.screen_resolution) self.game.set_seed(self.rng.randint(0, 2**32 - 1)) if mode == 'algo': self.game.set_window_visible(False) elif mode == 'human' or mode == 'replay': self.game.add_game_args('+freelook 1') self.game.set_window_visible(True) else: raise Exception('Unsupported mode') self._set_game_mode(mode) def initialize(self): self._create_doom_game(self.mode) # (optional) top-down view provided by the game engine if self.show_automap: self.game.set_automap_buffer_enabled(True) self.game.set_automap_mode(AutomapMode.OBJECTS) self.game.set_automap_rotate(False) self.game.set_automap_render_textures(False) # self.game.add_game_args("+am_restorecolors") # self.game.add_game_args("+am_followplayer 1") background_color = 'ffffff' self.game.add_game_args('+viz_am_center 1') self.game.add_game_args('+am_backcolor ' + background_color) self.game.add_game_args('+am_tswallcolor dddddd') # self.game.add_game_args("+am_showthingsprites 0") self.game.add_game_args('+am_yourcolor ' + background_color) self.game.add_game_args('+am_cheat 0') self.game.add_game_args('+am_thingcolor 0000ff') # player color self.game.add_game_args('+am_thingcolor_item 00ff00') # self.game.add_game_args("+am_thingcolor_citem 00ff00") self.game.init() self.initialized = True def _ensure_initialized(self): if not self.initialized: self.initialize() @staticmethod def _parse_variable_indices(config): with open(config, 'r') as config_file: lines = config_file.readlines() lines = [l.strip() for l in lines] variable_indices = {} for line in lines: if line.startswith('#'): continue # comment variables_syntax = r'available_game_variables[\s]*=[\s]*\{(.*)\}' match = re.match(variables_syntax, line) if match is not None: variables_str = match.groups()[0] variables_str = variables_str.strip() variables = variables_str.split(' ') for i, variable in enumerate(variables): variable_indices[variable] = i break return variable_indices def _black_screen(self): if self.black_screen is None: self.black_screen = np.zeros(self.observation_space.shape, dtype=np.uint8) return self.black_screen def _game_variables_dict(self, state): game_variables = state.game_variables variables = {} for variable, idx in self.variable_indices.items(): variables[variable] = game_variables[idx] return variables def demo_path(self, episode_idx): demo_name = f'ep_{episode_idx:03d}_rec.lmp' demo_path = join(self.record_to, demo_name) demo_path = os.path.normpath(demo_path) return demo_path def reset(self): self._ensure_initialized() if self.record_to is not None and not self.is_multiplayer: # does not work in multiplayer (uses different mechanism) if not os.path.exists(self.record_to): os.makedirs(self.record_to) demo_path = self.demo_path(self._num_episodes) log.warning('Recording episode demo to %s', demo_path) self.game.new_episode(demo_path) else: # no demo recording (default) self.game.new_episode() self.state = self.game.get_state() img = self.state.screen_buffer if img is None: log.error( 'Game returned None screen buffer! This is not supposed to happen!' ) img = self._black_screen() # Swap current and previous histogram if self.current_histogram is not None and self.previous_histogram is not None: swap = self.current_histogram self.current_histogram = self.previous_histogram self.previous_histogram = swap self.current_histogram.fill(0) self._actions_flattened = None self._last_episode_info = copy.deepcopy(self._prev_info) self._prev_info = None self._num_episodes += 1 return np.transpose(img, (1, 2, 0)) def _convert_actions(self, actions): """Convert actions from gym action space to the action space expected by Doom game.""" if self.composite_action_space: # composite action space with multiple subspaces spaces = self.action_space.spaces else: # simple action space, e.g. Discrete. We still treat it like composite of length 1 spaces = (self.action_space, ) actions = (actions, ) actions_flattened = [] for i, action in enumerate(actions): if isinstance(spaces[i], gym.spaces.Box): # continuous action actions_flattened.extend( list(action * self.delta_actions_scaling_factor)) elif isinstance(spaces[i], Discretized): # discretized continuous action continuous_action = spaces[i].to_continuous(action) actions_flattened.append(continuous_action) elif isinstance(spaces[i], gym.spaces.Discrete): # standard discrete action num_non_idle_actions = spaces[i].n - 1 action_one_hot = np.zeros(num_non_idle_actions, dtype=np.uint8) if action > 0: action_one_hot[ action - 1] = 1 # 0th action in each subspace is a no-op actions_flattened.extend(action_one_hot) else: raise NotImplementedError( f'Action subspace type {type(spaces[i])} is not supported!' ) return actions_flattened def _vizdoom_variables_bug_workaround(self, info, done): """Some variables don't get reset to zero on game.new_episode(). This fixes it (also check overflow?).""" if done and 'DAMAGECOUNT' in info: log.info('DAMAGECOUNT value on done: %r', info.get('DAMAGECOUNT')) if self._last_episode_info is not None: bugged_vars = ['DEATHCOUNT', 'HITCOUNT', 'DAMAGECOUNT'] for v in bugged_vars: if v in info: info[v] -= self._last_episode_info.get(v, 0) def step(self, actions): """ Action is either a single value (discrete, one-hot), or a tuple with an action for each of the discrete action subspaces. """ info = {'num_frames': self.skip_frames} if self._actions_flattened is not None: # provided externally, e.g. via human play actions_flattened = self._actions_flattened self._actions_flattened = None else: actions_flattened = self._convert_actions(actions) reward = self.game.make_action(actions_flattened, self.skip_frames) state = self.game.get_state() done = self.game.is_episode_finished() if not done: observation = np.transpose(state.screen_buffer, (1, 2, 0)) game_variables = self._game_variables_dict(state) info.update(self.get_info(game_variables)) self._update_histogram(info) self._prev_info = copy.deepcopy(info) else: observation = self._black_screen() # when done=True Doom does not allow us to call get_info, so we provide info from the last frame info.update(self._prev_info) self._vizdoom_variables_bug_workaround(info, done) return observation, reward, done, info def render(self, mode='human'): try: img = self.game.get_state().screen_buffer img = np.transpose(img, [1, 2, 0]) if mode == 'rgb_array': return img h, w = img.shape[:2] render_w = 1280 if w < render_w: render_h = int(render_w * h / w) img = cv2.resize(img, (render_w, render_h)) if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.SimpleImageViewer(maxwidth=render_w) self.viewer.imshow(img) return img except AttributeError: return None def close(self): if self.viewer is not None: self.viewer.close() def get_info(self, variables=None): if variables is None: variables = self._game_variables_dict(self.game.get_state()) info_dict = {'pos': self.get_positions(variables)} info_dict.update(variables) return info_dict def get_info_all(self, variables=None): if variables is None: variables = self._game_variables_dict(self.game.get_state()) info = self.get_info(variables) if self.previous_histogram is not None: info['previous_histogram'] = self.previous_histogram return info def get_positions(self, variables): return self._get_positions(variables) @staticmethod def _get_positions(variables): have_coord_data = True required_vars = ['POSITION_X', 'POSITION_Y', 'ANGLE'] for required_var in required_vars: if required_var not in variables: have_coord_data = False break x = y = a = np.nan if have_coord_data: x = variables['POSITION_X'] y = variables['POSITION_Y'] a = variables['ANGLE'] return {'agent_x': x, 'agent_y': y, 'agent_a': a} def get_automap_buffer(self): if self.game.is_episode_finished(): return None state = self.game.get_state() map_ = state.automap_buffer map_ = np.swapaxes(map_, 0, 2) map_ = np.swapaxes(map_, 0, 1) return map_ def _update_histogram(self, info, eps=1e-8): if self.current_histogram is None: return agent_x, agent_y = info['pos']['agent_x'], info['pos']['agent_y'] # Get agent coordinates normalized to [0, 1] dx = (agent_x - self.coord_limits[0]) / (self.coord_limits[2] - self.coord_limits[0]) dy = (agent_y - self.coord_limits[1]) / (self.coord_limits[3] - self.coord_limits[1]) # Rescale coordinates to histogram dimensions # Subtract eps to exclude upper bound of dx, dy dx = int((dx - eps) * self.current_histogram.shape[0]) dy = int((dy - eps) * self.current_histogram.shape[1]) self.current_histogram[dx, dy] += 1 def _key_to_action(self, key): if hasattr(self.action_space, 'key_to_action'): return self.action_space.key_to_action(key) else: return key_to_action_default(key) def _keyboard_on_press(self, key): from pynput.keyboard import Key if key == Key.esc: self._terminate = True return False action = self._key_to_action(key) if action is not None: if action not in self._current_actions: self._current_actions.append(action) def _keyboard_on_release(self, key): action = self._key_to_action(key) if action is not None: if action in self._current_actions: self._current_actions.remove(action) # noinspection PyProtectedMember @staticmethod def play_human_mode(env, skip_frames=1, num_episodes=3, num_actions=None): from pynput.keyboard import Listener doom = env.unwrapped doom.skip_frames = 1 # handled by this script separately # noinspection PyProtectedMember def start_listener(): with Listener(on_press=doom._keyboard_on_press, on_release=doom._keyboard_on_release) as listener: listener.join() listener_thread = Thread(target=start_listener) listener_thread.start() for episode in range(num_episodes): doom.mode = 'human' env.reset() last_render_time = time.time() time_between_frames = 1.0 / 35.0 total_rew = 0.0 while not doom.game.is_episode_finished() and not doom._terminate: num_actions = 14 if num_actions is None else num_actions turn_delta_action_idx = num_actions - 1 actions = [0] * num_actions for action in doom._current_actions: if isinstance(action, int): actions[ action] = 1 # 1 for buttons currently pressed, 0 otherwise else: if action == 'turn_left': actions[ turn_delta_action_idx] = -doom.delta_actions_scaling_factor elif action == 'turn_right': actions[ turn_delta_action_idx] = doom.delta_actions_scaling_factor for frame in range(skip_frames): doom._actions_flattened = actions _, rew, _, _ = env.step(actions) new_total_rew = total_rew + rew # if new_total_rew != total_rew: # log.info('Reward: %.3f, total: %.3f', rew, new_total_rew) total_rew = new_total_rew state = doom.game.get_state() verbose = True if state is not None and verbose: info = doom.get_info() # print( # 'Weapon:', info['SELECTED_WEAPON'], # 'ready:', info['ATTACK_READY'], # 'ammo:', info['SELECTED_WEAPON_AMMO'], # 'pc:', info['PLAYER_COUNT'], # 'dmg:', info['DAMAGECOUNT'], # ) time_since_last_render = time.time() - last_render_time time_wait = time_between_frames - time_since_last_render if doom.show_automap and state.automap_buffer is not None: map_ = state.automap_buffer map_ = np.swapaxes(map_, 0, 2) map_ = np.swapaxes(map_, 0, 1) cv2.imshow('ViZDoom Automap Buffer', map_) if time_wait > 0: cv2.waitKey(int(time_wait) * 1000) else: if time_wait > 0: time.sleep(time_wait) last_render_time = time.time() if doom.show_automap: cv2.destroyAllWindows() log.debug('Press ESC to exit...') listener_thread.join() # noinspection PyProtectedMember @staticmethod def replay(env, rec_path): doom = env.unwrapped doom.mode = 'replay' doom._ensure_initialized() doom.game.replay_episode(rec_path) episode_reward = 0 start = time.time() while not doom.game.is_episode_finished(): doom.game.advance_action() r = doom.game.get_last_reward() episode_reward += r log.info('Episode reward: %.3f, time so far: %.1f s', episode_reward, time.time() - start) log.info('Finishing replay') doom.close()
def __init__(self, scenario_path="scenarios/defend_center.cfg"): self.reward = 0 game = DoomGame() game.load_config( scenario_path ) # This corresponds to the simple task we will pose our agent\ # game.load_config("../../scenarios/cig.cfg") game.set_doom_map("map01") # Limited deathmatch. # game.set_doom_map("map02") # Full deathmatch. game.set_window_visible(False) # Start multiplayer game only with your AI (with options that will be used in the competition, details in cig_host example). game.add_game_args( "-host 1 -deathmatch +timelimit 1.0 " "+sv_forcerespawn 1 +sv_noautoaim 1 +sv_respawnprotect 1 +sv_spawnfarthest 1" ) # Name your agent and select color # colors: 0 - green, 1 - gray, 2 - brown, 3 - red, 4 - light gray, 5 - light brown, 6 - light red, 7 - light blue game.add_game_args("+name AI +colorset 0") self.game = game # generates the the actual action arrays [True, False, False], etc for each action... # In preparation for actually using config files.... self.real_actions = [[ i == j for i in range(game.get_available_buttons_size()) ] for j in range(game.get_available_buttons_size())] self.last_variables = None self.reset() game.init()
class DoomTrainer: def __init__(self, params): self.game = DoomGame() self.game.load_config("../scenarios/" + params.scenario + ".cfg") if params.model == 'human': self.game.set_mode(Mode.SPECTATOR) else: self.actions = create_actions(params.scenario) def play_human(self): episodes = 10 for i in range(episodes): print("Episode #" + str(i + 1)) self.game.new_episode() while not self.game.is_episode_finished(): s = self.game.get_state() self.game.advance_action() a = self.game.get_last_action() r = self.game.get_last_reward() def start_game(self): self.game.init() def set_seed(self, seed): self.game.set_seed(seed) def new_episode(self): self.game.new_episode() def get_screen(self): return torch.from_numpy( scale(self.game.get_state().screen_buffer, None, None, True)) def make_action(self, action): reward = self.game.make_action(self.actions[action]) done = self.game.is_episode_finished() return reward, done def num_actions(self): return len(self.actions)
def create_game( environment_config: EnvironmentConfig ) -> t.Tuple[DoomGame, ActionList]: """Creates an instance of VizDoom. Args: scenario: The name of the scenario to play. environment_config: An environment configuration instance. Returns: A Doom game instance that respects OpenAI's gym interface. """ game = DoomGame() # Game configuration game.load_config(f'{paths.SCENARIOS}/{environment_config.scenario}.cfg') game.set_doom_scenario_path( f'{paths.SCENARIOS}/{environment_config.scenario}.wad') game.set_mode(environment_config.game_mode) game.set_screen_format(environment_config.screen_mode) game.init() possible_actions = controls.get_available_actions( game.get_available_buttons()) return game, possible_actions
def _setup_game(self): self.game = DoomGame() self.file_path = os.path.dirname(__file__) self.game.load_config(os.path.join(self.file_path, "basic.cfg")) self.game.set_doom_scenario_path( os.path.join(self.file_path, "basic.wad"))
class DoomBasic(AbstractGame): def _setup_game(self): self.game = DoomGame() self.file_path = os.path.dirname(__file__) self.game.load_config(os.path.join(self.file_path, "basic.cfg")) self.game.set_doom_scenario_path( os.path.join(self.file_path, "basic.wad")) def _initialize(self): self._setup_game() self.init() left = [1, 0, 0] right = [0, 1, 0] shoot = [0, 0, 1] self.possible_actions = [left, right, shoot] def set_window_visibility(self, visibility): self._setup_game() self.game.set_window_visible(visibility) self.init() def start_new_game(self): self.game.new_episode() game_start = True state = self.game.get_state().screen_buffer return state, game_start def take_action(self, action): return self.game.make_action(action), self.game.is_episode_finished() def get_state(self): return self.game.get_state().screen_buffer def init(self): self.game.init() def is_done(self): return self.game.is_episode_finished() def close(self): self.game.close()
def __init__(self): """ Default constructor. """ super(ViZDoomGame, self).__init__() self._game_instance = DoomGame()
class ViZDoomGame(Game, IGame): """ VizDoom game wrapper. """ # protected members _action_space_id: str = "action_space" _last_ammo_2: int = 0 _last_kill_count: int = 0 _last_health: int = 0 # public member functions def __init__(self): """ Default constructor. """ super(ViZDoomGame, self).__init__() self._game_instance = DoomGame() @overrides def init(self) -> None: """ Object initialization. """ super(ViZDoomGame, self).init() self._game_instance.init() self._initialized = True @overrides def reset(self) -> IObservation: """ # see : IGame.reset() """ self._game_instance.new_episode() initial_state = self._game_instance.get_state() self._current_observation = DoomObservation(initial_state) return self._current_observation @overrides def process_ticks(self, num_of_ticks: int) -> None: """ # see : IGame.process_ticks() """ self._game_instance.advance_action(num_of_ticks, True) self._update_current_observation() @overrides def is_episode_finished(self) -> bool: """ # see : IGame.is_episode_finished() """ return self._game_instance.is_episode_finished() @overrides def get_observation_space_desc(self) -> Collection[ITensorDescriptor]: """ # see : IGame.get_observation_space_desc() Currently we are filtering out non array like buffers. """ self._init_check() if self._observation_space_desc is None: self._observation_space_desc = [] dummy_state = self._game_instance.get_state() observed_attributes = \ filter(lambda a: not a.startswith('__') and not a.startswith('tic') and not a.startswith('number') and not a.startswith('labels') and not a.startswith('objects') and not a.startswith('sectors') and not callable(getattr(dummy_state, a)), dir(dummy_state)) for attr in observed_attributes: tensor = np.array(getattr(dummy_state, attr)) self._observation_space_desc.append( TensorDescriptor(display_name=attr, data_type=tensor.dtype, tensor_shape=tensor.shape, input_type=NetworkInputType.IMG)) return self._observation_space_desc @overrides def get_total_score(self): """ # see : IGame.get_total_score() """ return self._game_instance.get_total_reward() # protected member functions @overrides def _init_config(self) -> None: """ Initializes the game instance, based on the config_handler attributes. """ for config_property in ViZDoomConfigurationProperty: config_value = self._config_handler.get_config_property( config_property, config_property.prop_type) if config_value is None: continue try: getattr(self._game_instance, SET_ATTRIBUTE_PREFIX + config_property.label)(config_value) except (AttributeError, TypeError): self._log.warning(CONFIGURATION_ATTRIBUTE_ERROR_MSG, config_property.label) @overrides def _make_action(self, action: Action) -> Tuple[IObservation, float]: """ # see : Game._make_action() """ reward = self._game_instance.make_action(action) self._update_current_observation() self._update_variables() return self._current_observation, reward @overrides def _build_actions_space_desc(self): """ # see : Game._build_actions_space_desc() """ return TensorDescriptor( display_name=self._action_space_id, data_type=np.int32, tensor_shape=(self._game_instance.get_available_buttons_size(), ), input_type=NetworkInputType.NONE) def _update_current_observation(self) -> None: """ Updates the current observation based on the environment. """ observation = self._game_instance.get_state() if observation is not None: self._current_observation = DoomObservation(observation) def _update_variables(self) -> None: """ Updates game variables. """ self._last_ammo_2 = self._game_instance.get_game_variable(AMMO2) self._last_health = self._game_instance.get_game_variable(HEALTH) self._last_kill_count = self._game_instance.get_game_variable( KILLCOUNT)