class Experiment(object): """ Used to perform experiment combined with a Agent Main methods : - """ def __init__(self, scenario, action_builder, reward_builder, logger, living_reward=0, custom_reward=False, score_variable='FRAGCOUNT', game_features=[], freedoom=True, screen_resolution='RES_400X225', screen_format='CRCGCB', use_screen_buffer=True, use_depth_buffer=False, use_labels_buffer=True, mode='PLAYER', player_rank=0, players_per_game=1, render_hud=False, render_minimal_hud=False, render_crosshair=True, render_weapon=True, render_decals=False, render_particles=False, render_effects_sprites=False, respawn_protect=True, spawn_farthest=True, name='Hubert_Bonnisseur_de_la_Bate', visible=False, n_bots=0, use_scripted_marines=None, doom_skill=2): """ Create a new game. render_decals: marks on the walls render_particles: particles like for impacts / traces render_effects_sprites: gun puffs / blood splats color: 0 - green, 1 - gray, 2 - brown, 3 - red, 4 - light gray, 5 - light brown, 6 - light red, 7 - light blue """ # game resources game_filename = 'freedoom2.wad' self.scenario = scenario self.scenario_path = os.path.join(PATH, 'scenarios/{}.wad'.format(scenario)) self.game_path = os.path.join(PATH, game_filename) # check parameters assert os.path.isfile(self.scenario_path) assert os.path.isfile(self.game_path) assert hasattr(GameVariable, score_variable) assert hasattr(ScreenResolution, screen_resolution) assert hasattr(ScreenFormat, screen_format) assert use_screen_buffer or use_depth_buffer assert hasattr(Mode, mode) assert not (render_minimal_hud and not render_hud) assert len(name.strip()) > 0 assert n_bots >= 0 assert (type(use_scripted_marines) is bool or use_scripted_marines is None and n_bots == 0) assert 0 <= doom_skill <= 4 assert 0 < players_per_game assert 0 <= player_rank # screen buffer / depth buffer / labels buffer / mode self.screen_resolution = screen_resolution self.screen_format = screen_format self.use_screen_buffer = use_screen_buffer self.use_depth_buffer = use_depth_buffer self.game_features = parse_game_features(game_features, logger) self.use_labels_buffer = use_labels_buffer self.use_game_features = any(self.game_features) self.mode = mode # rendering options self.render_hud = render_hud self.render_minimal_hud = render_minimal_hud self.render_crosshair = render_crosshair self.render_weapon = render_weapon self.render_decals = render_decals self.render_particles = render_particles self.render_effects_sprites = render_effects_sprites # window visibility self.visible = visible # actor reward ''' used for reward shaping (LSTM & Curiosity A3C) ''' self.reward_builder = reward_builder self.living_reward = living_reward self.custom_reward = custom_reward # number of bots in the game self.n_bots = n_bots self.use_scripted_marines = use_scripted_marines # doom skill (ie difficulty of the game) self.doom_skill = doom_skill # bot name self.name = name # action builder self.action_builder = action_builder # save game statistics for each episode (used for model comparison and reward shaping) self.stats = {} # use logging for DEBUG purpose self.logger = logger #============================================================================== # Game start #============================================================================== def start(self, map_id, episode_time=None, log_events=False): """ Start the game. If `episode_time` is given, the game will end after the specified time. """ # Episode time self.episode_time = episode_time # initialize the game self.game = DoomGame() self.game.set_doom_scenario_path(self.scenario_path) self.game.set_doom_game_path(self.game_path) # map assert map_id > 0 self.map_id = map_id self.game.set_doom_map('map{:02d}'.format(map_id)) # time limit if episode_time is not None: self.game.set_episode_timeout(episode_time) # Save statistics for this map self.stats[self.map_id] = [] # log events that happen during the game (useful for testing) # self.log_events = log_events # game parameters args = [] # screen buffer / depth buffer / labels buffer / mode screen_resolution = getattr(ScreenResolution, self.screen_resolution) self.game.set_screen_resolution(screen_resolution) self.game.set_screen_format(getattr(ScreenFormat, self.screen_format)) self.game.set_depth_buffer_enabled(self.use_depth_buffer) self.game.set_labels_buffer_enabled(self.use_labels_buffer) self.game.set_mode(getattr(Mode, self.mode)) # rendering options self.game.set_render_hud(self.render_hud) self.game.set_render_minimal_hud(self.render_minimal_hud) self.game.set_render_crosshair(self.render_crosshair) self.game.set_render_weapon(self.render_weapon) self.game.set_render_decals(self.render_decals) self.game.set_render_particles(self.render_particles) self.game.set_render_effects_sprites(self.render_effects_sprites) # deathmatch mode # players will respawn automatically after they die # autoaim is disabled for all players # args.append('-deathmatch') args.append('+sv_forcerespawn 1') args.append('+sv_noautoaim 1') # agent name args.append('+name %s' % self.name) # load parameters self.args = args for arg in args: self.game.add_game_args(arg) # window visibility self.game.set_window_visible(self.visible) # define available buttons self.action_builder.set_buttons(self.game) # doom skill (https://zdoom.org/wiki/GameSkill) self.game.set_doom_skill(self.doom_skill + 1) # define basic rewards self.game.set_living_reward(self.living_reward) # start the game self.game.init() # initialize the game after player spawns self.initialize_game() self.logger.info('start_game') #============================================================================== # Game statistics #============================================================================== def update_game_properties(self): """ Update game properties. """ # read game variables new_v = { k: self.game.get_game_variable(v) for k, v in GAME_FEATURES.items() } new_v = { k: (int(v) if v.is_integer() else float(v)) for k, v in new_v.items() } # update game properties self.prev_properties = self.properties self.properties = new_v def update_game_statistics(self): """ Calculate game statistics and store them in the running stats dict """ stats = self.run_stats # init r if custom rewards r = [] # calculate stats # kill d = self.properties['kill_count'] - self.prev_properties['kill_count'] if d > 0: r.extend(d * ['kill_count']) stats['kills'] += d # death if self.game.is_player_dead(): r.append('dead') stats['deaths'] += 1 # suicide if self.properties['frag_count'] < self.prev_properties['frag_count']: r.append('suicide') stats['suicides'] += 1 # found health d = self.properties['health'] - self.prev_properties['health'] if d != 0: if d > 0: r.append('medikit') stats['medikit'] += 1 stats['health'] = self.properties['health'] # health lost d = self.properties['damage_count'] - self.prev_properties[ 'damage_count'] if d > 0: r.append('health_lost') # found armor d = self.properties['armor'] - self.prev_properties['armor'] if d != 0: if d > 0: r.append('armor') stats['armor'] += 1 # found weapon if self.prev_properties['sel_weapon'] != self.properties['sel_weapon']: r.append('weapon') stats['found_weapon'] += 1 # found / lost ammo d = self.properties['sel_ammo'] - self.prev_properties['sel_ammo'] if self.prev_properties['sel_weapon'] == self.properties['sel_weapon']: if d != 0: if d > 0: r.append('ammo') stats['ammo'] += 1 else: r.append('use_ammo') # auxiliary stats not used for rewards stats['frag_count'] = self.properties['frag_count'] return r def calculate_final_stats(self): """ Calculate the final stats from the running stats """ self.run_stats['k/d'] = self.run_stats['kills'] * 1.0 / max( 1, self.run_stats['deaths']) #============================================================================== # Game handling #============================================================================== def is_player_dead(self): """ Detect whether the player is dead. """ return self.game.is_player_dead() def is_episode_finished(self): """ Return whether the episode is finished. This should only be the case after the episode timeout. """ return self.game.is_episode_finished() def is_final(self): """ Return whether the game is in a final state. """ return self.is_player_dead() or self.is_episode_finished() def reset(self): """ Reset the game if necessary. This can be because: - we reach the end of an episode (we restart the game) - because the agent is dead (we make it respawn) """ self.stats[self.map_id].append(self.run_stats) # if the player is dead if self.is_player_dead(): # respawn it (deathmatch mode) if self.episode_time is None: self.respawn_player() # or reset the episode (episode ends when the agent dies) else: self.new_episode() # start a new episode if it is finished if self.is_episode_finished(): self.new_episode() # deal with a ViZDoom issue # while self.is_player_dead(): # logger.warning('Player %i is still dead after respawn.' % # self.params.player_rank) # self.respawn_player() def respawn_player(self): """ Respawn the player on death. """ assert self.is_player_dead() self.game.respawn_player() # self.log('Respawn player') self.initialize_game() def new_episode(self): """ Start a new episode. """ # init new stats for the episode self.run_stats = {k: 0 for k in STAT_KEYS} # init new game self.game.new_episode() # init episode properties self.initialize_game() # self.log('New episode') def initialize_game(self): """ Reset game properties """ new_v = { k: self.game.get_game_variable(v) for k, v in GAME_FEATURES.items() } new_v = { k: (int(v) if v.is_integer() else float(v)) for k, v in new_v.items() } self.stats self.prev_properties = None self.properties = new_v def close(self): """ Close the current experiment. """ self.game.close() def observe_state(self, variable_names, feature_names): """ Observe the current state of the game. """ # read game state screen, variables, game_features = process_game_info( self.game, variable_names, feature_names) # last_states.append(GameState(screen, variables, game_features)) # return the screen and the game features return screen, variables, game_features def make_action(self, action, variable_names, feature_names, frame_skip=1, sleep=None): """ Process action and give the next state according to the game motor Inputs : action : frame_skips : nb of frames during which the same action is performed sleep : pause game for sleep seconds in order to smooth visualization Output : reward defined in the game motor or customized screen | variables | of the next state (if not final state) game_features | """ assert frame_skip >= 1 # convert selected action to the ViZDoom action format action = self.action_builder.get_action(action) # smooth visualization if needed for make if self.visible: r = 0 for _ in range(frame_skip): r += self.game.make_action(action) # death or episode finished if self.is_player_dead() or self.is_episode_finished(): break # sleep for smooth visualization if sleep is not None: time.sleep(sleep) else: r = self.game.make_action(action, frame_skip) # observe resulting state if not self.is_final(): screen, variables, game_features = self.observe_state( variable_names, feature_names) else: screen = None variables = None game_features = None # update game statistics and return custom rewards self.update_game_properties() list_r = self.update_game_statistics() r_bis = 0 if self.custom_reward and self.reward_builder: r_bis = self.reward_builder.get_reward(list_r) return r + r_bis, screen, variables, game_features
def new_episode(game: DoomGame, spawn_point_counter: Dict[int, int], n_spawn_points: int) -> None: """ Workaround for improper random number generation with ACS. In certain scenarios the agent is spawned at a random spawn point. However, instead of this distribution being uniform, one single id is heavily preferred. In order to not have the agent encounter too much of the same starting points, this method creates new episodes until one is found with a different id than the most prominent one. :param game: The instance of VizDoom :param spawn_point_counter: The dict holding the counts of the previous spawn points :param n_spawn_points: Number of spawn points in a given scenario """ while True: game.new_episode() spawn_point = game.get_game_variable(GameVariable.USER1) spawn_point %= 21 if spawn_point == 0 or spawn_point is math.isnan(spawn_point): return # Spawn point undefined if spawn_point in spawn_point_counter: spawn_point_counter[spawn_point] += 1 else: spawn_point_counter[spawn_point] = 0 if spawn_point != max(spawn_point_counter, key = spawn_point_counter.get) and len(spawn_point_counter) >= n_spawn_points: return
class DoomEnv(gym.Env): metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 35} def __init__(self, level): self.previous_level = -1 self.level = level self.game = DoomGame() self.loader = Loader() self.doom_dir = os.path.dirname(os.path.abspath(__file__)) self._mode = 'algo' # 'algo' or 'human' self.no_render = False # To disable double rendering in human mode self.viewer = None self.is_initialized = False # Indicates that reset() has been called self.curr_seed = 0 self.lock = (DoomLock()).get_lock() self.action_space = spaces.MultiDiscrete([[0, 1]] * 38 + [[-10, 10]] * 2 + [[-100, 100]] * 3) self.allowed_actions = list(range(NUM_ACTIONS)) self.screen_height = 480 self.screen_width = 640 self.screen_resolution = ScreenResolution.RES_640X480 self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3)) self._seed() self._configure() def _configure(self, lock=None, **kwargs): if 'screen_resolution' in kwargs: logger.warn( 'Deprecated - Screen resolution must now be set using a wrapper. See documentation for details.') # Multiprocessing lock if lock is not None: self.lock = lock # Loading a level def _load_level(self): # Closing the level if it is initialized if self.is_initialized: self.is_initialized = False self.game.close() self.game = DoomGame() # Customizing level if getattr(self, '_customize_game', None) is not None and callable(self._customize_game): self.level = -1 self._customize_game() else: # Loading Paths if not self.is_initialized: self.game.set_vizdoom_path(self.loader.get_vizdoom_path()) self.game.set_doom_game_path(self.loader.get_freedoom_path()) # Common Settings self.game.load_config(os.path.join(self.doom_dir, 'assets/%s' % DOOM_SETTINGS[self.level][CONFIG])) self.game.set_doom_scenario_path(self.loader.get_scenario_path(DOOM_SETTINGS[self.level][SCENARIO])) # Random Map Selection if DOOM_SETTINGS[self.level][MAP] != '': if RANDOMIZE_MAPS > 0 and 'labyrinth' in DOOM_SETTINGS[self.level][CONFIG].lower(): if 'fix' in DOOM_SETTINGS[self.level][SCENARIO].lower(): # mapId = 'map%02d'%np.random.randint(1, 23) mapId = 'map%02d' % np.random.randint(4, 8) else: mapId = 'map%02d' % np.random.randint(1, RANDOMIZE_MAPS + 1) print('\t=> Special Config: Randomly Loading Maps. MapID = ' + mapId) self.game.set_doom_map(mapId) else: print('\t=> Default map loaded. MapID = ' + DOOM_SETTINGS[self.level][MAP]) self.game.set_doom_map(DOOM_SETTINGS[self.level][MAP]) # Setting Vizdoom map settings self.game.set_doom_skill(DOOM_SETTINGS[self.level][DIFFICULTY]) self.allowed_actions = DOOM_SETTINGS[self.level][ACTIONS] self.game.set_screen_resolution(self.screen_resolution) self.previous_level = self.level self._closed = False # Algo mode if 'human' != self._mode: if NO_MONSTERS: print('\t=> Special Config: Monsters Removed.') self.game.add_game_args('-nomonsters 1') self.game self.game.set_window_visible(False) self.game.set_mode(Mode.PLAYER) self.no_render = False try: with self.lock: self.game.init() except (ViZDoomUnexpectedExitException, ViZDoomErrorException): raise error.Error( 'VizDoom exited unexpectedly. This is likely caused by a missing multiprocessing lock. ' + 'To run VizDoom across multiple processes, you need to pass a lock when you configure the env ' + '[e.g. env.configure(lock=my_multiprocessing_lock)], or create and close an env ' + 'before starting your processes [e.g. env = gym.make("DoomBasic-v0"); env.close()] to cache a ' + 'singleton lock in memory.') self._start_episode() self.is_initialized = True return self.game.get_state().image_buffer.copy() # Human mode else: if NO_MONSTERS: print('\t=> Special Config: Monsters Removed.') self.game.add_game_args('-nomonsters 1') self.game.add_game_args('+freelook 1') self.game.set_window_visible(True) self.game.set_mode(Mode.SPECTATOR) self.no_render = True with self.lock: self.game.init() self._start_episode() self.is_initialized = True self._play_human_mode() return np.zeros(shape=self.observation_space.shape, dtype=np.uint8) def _start_episode(self): if self.curr_seed > 0: self.game.set_seed(self.curr_seed) self.curr_seed = 0 self.game.new_episode() return def _play_human_mode(self): while not self.game.is_episode_finished(): self.game.advance_action() state = self.game.get_state() total_reward = self.game.get_total_reward() info = self._get_game_variables(state.game_variables) info["TOTAL_REWARD"] = round(total_reward, 4) print('===============================') print('State: #' + str(state.number)) print('Action: \t' + str(self.game.get_last_action()) + '\t (=> only allowed actions)') print('Reward: \t' + str(self.game.get_last_reward())) print('Total Reward: \t' + str(total_reward)) print('Variables: \n' + str(info)) sleep(0.02857) # 35 fps = 0.02857 sleep between frames print('===============================') print('Done') return # Environment step function according to the action def _step(self, action): if NUM_ACTIONS != len(action): logger.warn('Doom action list must contain %d items. Padding missing items with 0' % NUM_ACTIONS) old_action = action action = [0] * NUM_ACTIONS for i in range(len(old_action)): action[i] = old_action[i] # action is a list of numbers but DoomGame.make_action expects a list of ints if len(self.allowed_actions) > 0: list_action = [int(action[action_idx]) for action_idx in self.allowed_actions] else: list_action = [int(x) for x in action] # Try, except block try: reward = self.game.make_action(list_action) state = self.game.get_state() info = self._get_game_variables(state.game_variables) info["TOTAL_REWARD"] = round(self.game.get_total_reward(), 4) if self.game.is_episode_finished(): is_finished = True return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), reward, is_finished, info else: is_finished = False return state.image_buffer.copy(), reward, is_finished, info except vizdoom.ViZDoomIsNotRunningException: return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), 0, True, {} # State Reset def _reset(self): if self.is_initialized and not self._closed: self._start_episode() image_buffer = self.game.get_state().image_buffer if image_buffer is None: raise error.Error( 'VizDoom incorrectly initiated. This is likely caused by a missing multiprocessing lock. ' + 'To run VizDoom across multiple processes, you need to pass a lock when you configure the env ' + '[e.g. env.configure(lock=my_multiprocessing_lock)], or create and close an env ' + 'before starting your processes [e.g. env = gym.make("DoomBasic-v0"); env.close()] to cache a ' + 'singleton lock in memory.') return image_buffer.copy() else: return self._load_level() # Game State Rendering def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None # If we don't None out this reference pyglet becomes unhappy return try: if 'human' == mode and self.no_render: return state = self.game.get_state() img = state.image_buffer # VizDoom returns None if the episode is finished, let's make it # an empty image so the recorder doesn't stop if img is None: img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8) if mode == 'rgb_array': return img elif mode is 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) except vizdoom.ViZDoomIsNotRunningException: pass # Doom has been closed # Close game processes def _close(self): # Lock required for VizDoom to close processes properly with self.lock: self.game.close() # Get the random seed def _seed(self, seed=None): self.curr_seed = seeding.hash_seed(seed) % 2 ** 32 return [self.curr_seed] # Get the game state variables def _get_game_variables(self, state_variables): info = { "LEVEL": self.level } if state_variables is None: return info info['KILLCOUNT'] = state_variables[0] info['ITEMCOUNT'] = state_variables[1] info['SECRETCOUNT'] = state_variables[2] info['FRAGCOUNT'] = state_variables[3] info['HEALTH'] = state_variables[4] info['ARMOR'] = state_variables[5] info['DEAD'] = state_variables[6] info['ON_GROUND'] = state_variables[7] info['ATTACK_READY'] = state_variables[8] info['ALTATTACK_READY'] = state_variables[9] info['SELECTED_WEAPON'] = state_variables[10] info['SELECTED_WEAPON_AMMO'] = state_variables[11] info['AMMO1'] = state_variables[12] info['AMMO2'] = state_variables[13] info['AMMO3'] = state_variables[14] info['AMMO4'] = state_variables[15] info['AMMO5'] = state_variables[16] info['AMMO6'] = state_variables[17] info['AMMO7'] = state_variables[18] info['AMMO8'] = state_variables[19] info['AMMO9'] = state_variables[20] info['AMMO0'] = state_variables[21] info['POSITION_X'] = doom_fixed_to_double(self.game.get_game_variable(GameVariable.USER1)) info['POSITION_Y'] = doom_fixed_to_double(self.game.get_game_variable(GameVariable.USER2)) return info
def _create_game(self, params, idx, is_train, get_extra_info=False): game = DoomGame() VALID_SCENARIOS = [ 'my_way_home.cfg', 'health_gathering.cfg', 'health_gathering_supreme.cfg', 'health_gathering_supreme_no_death_penalty.cfg', 'deadly_corridor.cfg', 'defend_the_center.cfg', 'defend_the_line.cfg', 'two_color_maze014.cfg', 'labyrinth_maze000.cfg', 'labyrinth_maze11_000.cfg' ] VALID_MULTI_SCENARIOS = [ 'maze_{:003}.cfg', 'custom_scenario{:003}.cfg' 'mino_maze{:003}.cfg', 'labyrinth_maze{:003}.cfg', 'two_item_maze{:003}.cfg', 'six_item_maze{:003}.cfg', 'four_item_maze{:003}.cfg', 'eight_item_maze{:003}.cfg', 'repeated_laby_maze{:003}.cfg', 'two_color_maze{:003}.cfg', 'custom_scenario{:003}.cfg' ] if params.scenario in VALID_SCENARIOS: game.load_config(params.scenario_dir + params.scenario) elif params.scenario in VALID_MULTI_SCENARIOS: assert params.multimaze if not is_train and params.test_scenario_dir: filename = params.test_scenario_dir + params.scenario.format( idx) #print('loading file', filename) game.load_config(filename) else: if not is_train: print( 'WARNING, LOADING TRAINING DATA FOR TESTING, THIS MAY NOT BE WHAT YOU INTENDED!' ) filename = params.scenario_dir + params.scenario.format(idx) #print('loading file', filename) game.load_config(filename) else: assert 0, 'Invalid environment {}'.format(params.scenario) if params.screen_size == '320X180': # TODO: Implement options for other resolutions game.set_screen_resolution(ScreenResolution.RES_320X180) else: assert 0, 'Invalid screen_size {}'.format(params.screen_size) game.set_sound_enabled(False) #game.add_game_args("+vid_forcesurface 1") game.set_window_visible(params.show_window) if params.show_window: game.set_mode(Mode.SPECTATOR) game.add_game_args("+freelook 1") # Player variables for prediction of position etc game.add_available_game_variable(GameVariable.POSITION_X) game.add_available_game_variable(GameVariable.POSITION_Y) game.add_available_game_variable(GameVariable.POSITION_Z) game.add_available_game_variable(GameVariable.VELOCITY_X) game.add_available_game_variable(GameVariable.VELOCITY_Y) game.add_available_game_variable(GameVariable.VELOCITY_Z) game.add_available_game_variable(GameVariable.ANGLE) game.add_available_game_variable(GameVariable.PITCH) game.add_available_game_variable(GameVariable.ROLL) if get_extra_info: game.set_labels_buffer_enabled(True) game.set_automap_buffer_enabled(True) game.set_automap_mode(AutomapMode.OBJECTS) game.set_automap_rotate(True) game.set_automap_render_textures(False) game.set_depth_buffer_enabled(True) game.init() if GameVariable.HEALTH in game.get_available_game_variables(): self.previous_health = game.get_game_variable(GameVariable.HEALTH) if self.use_shaping: self.shaping_reward = doom_fixed_to_double( game.get_game_variable(GameVariable.USER1)) if params.disable_head_bob: game.send_game_command('movebob 0.0') return game
def _create_game(self, params, idx, is_train, get_extra_info=False): game = DoomGame() self.idx = idx game.set_window_visible(params.show_window) game.set_sound_enabled(False) game.add_game_args("+vid_forcesurface 1") VALID_SCENARIOS = [ 'my_way_home.cfg', 'health_gathering.cfg', 'health_gathering_supreme.cfg', 'health_gathering_supreme_no_death_penalty.cfg', 'deadly_corridor.cfg', 'defend_the_center.cfg', 'defend_the_line.cfg', 'custom_maze_001.cfg', 'custom_maze_002.cfg', 'custom_maze_003.cfg', 'custom_mazes_005/train/maze_000.cfg', 'custom_mazes_005/train/maze_004.cfg', 'custom_mazes_005/valid/maze_000.cfg', 'long_term_base.cfg', 'scenario_x.cfg', 'scenario_cw2.cfg', 'scenario_2_item0.cfg', 'scenario_2_item1.cfg', 'scenario_2_item2.cfg', 'scenario_2_item3.cfg', 'scenario_3_item0.cfg', 'two_color_maze040.cfg', 'four_item_maze034.cfg', 'labyrinth_maze000.cfg', 'mino_maze000.cfg', 'labyrinth_maze11_000.cfg', 'mino_maze_simple.cfg' ] VALID_MULTI_SCENARIOS = [ 'maze_{:003}.cfg', 'mino_maze{:003}.cfg', 'labyrinth_maze{:003}.cfg', 'indicator_maze{:003}.cfg', 'two_item_maze{:003}.cfg', 'six_item_maze{:003}.cfg', 'four_item_maze{:003}.cfg', 'eight_item_maze{:003}.cfg', 'repeated_laby_maze{:003}.cfg', 'two_color_maze{:003}.cfg' ] if params.scenario in VALID_SCENARIOS: game.load_config(params.scenario_dir + params.scenario) elif params.scenario in VALID_MULTI_SCENARIOS: assert params.multimaze if not is_train and params.test_scenario_dir: filename = params.test_scenario_dir + params.scenario.format( idx) #print('loading file', filename) game.load_config(filename) else: filename = params.scenario_dir + params.scenario.format(idx) #print('loading file', filename) game.load_config(filename) elif params.scenario == 'curriculum': pass else: assert 0, 'Invalid environment {}'.format(params.scenario) if params.screen_size == '320X180': game.set_screen_resolution(ScreenResolution.RES_320X180) else: assert 0, 'Invalid screen_size {}'.format(params.screen_size) if (params.use_depth or params.predict_depth or params.ego_model or params.depth_as_obs): game.set_depth_buffer_enabled(True) #self.game.set_labels_buffer_enabled(True) game.set_window_visible(params.show_window) game.set_sound_enabled(False) if params.show_window: game.set_mode(Mode.SPECTATOR) game.add_game_args("+freelook 1") # Player variables for prediction of position etc game.add_available_game_variable(GameVariable.POSITION_X) game.add_available_game_variable(GameVariable.POSITION_Y) game.add_available_game_variable(GameVariable.POSITION_Z) game.add_available_game_variable(GameVariable.VELOCITY_X) game.add_available_game_variable(GameVariable.VELOCITY_Y) game.add_available_game_variable(GameVariable.VELOCITY_Z) game.add_available_game_variable(GameVariable.ANGLE) game.add_available_game_variable(GameVariable.PITCH) game.add_available_game_variable(GameVariable.ROLL) if get_extra_info: game.set_labels_buffer_enabled(True) game.set_automap_buffer_enabled(True) game.set_automap_mode(AutomapMode.OBJECTS) game.set_automap_rotate(True) game.set_automap_render_textures(False) game.set_depth_buffer_enabled(True) game.add_game_args("+vid_forcesurface 1") game.init() if GameVariable.HEALTH in game.get_available_game_variables(): self.previous_health = game.get_game_variable(GameVariable.HEALTH) if self.use_shaping: self.shaping_reward = doom_fixed_to_double( game.get_game_variable(GameVariable.USER1)) if params.disable_head_bob: game.send_game_command('movebob 0.0') return game
# Makes a "prolonged" action and skip frames: # skiprate = 4 # r = game.make_action(choice(actions), skiprate) # The same could be achieved with: # game.set_action(choice(actions)) # game.advance_action(skiprate) # r = game.get_last_reward() # Prints state's game variables and reward. print("Labels: %s" % list(map(lambda x: x.object_name, labels))) print("Labels: %s" % list(map(lambda x: x.value, labels))) print("State %s" % state.__dict__) print("State #" + str(n)) print("Game variables:", vars) print("Reward:", r) print("KILLCOUNT: %s" % game.get_game_variable(GameVariable.KILLCOUNT)) print("=====================") if sleep_time > 0: sleep(sleep_time) # Check how the episode went. print("Episode finished.") print("Total reward:", game.get_total_reward()) print("************************") # It will be done automatically anyway but sometimes you need to do it in the middle of the program... game.close()
class VizDoom(gym.Env): """ Wraps a VizDoom environment """ def __init__(self, cfg_path, number_maps, scaled_resolution=(42, 42), action_frame_repeat=4, clip=(-1, 1), seed=None, data_augmentation=False): """ Gym environment for training reinforcement learning agents. :param cfg_path: name of the mission (.cfg) to run :param number_maps: number of maps which are contained within the cfg file :param scaled_resolution: resolution (height, width) of the observation to be returned with each step :param action_frame_repeat: how many game tics should an action be active :param clip: how much the reward returned on each step should be clipped to :param seed: seed for random, used to determine the other that the doom maps should be shown. :param data_augmentation: bool to determine whether or not to use data augmentation (adding randomly colored, randomly sized boxes to observation) """ self.cfg_path = str(cfg_path) if not os.path.exists(self.cfg_path): raise ValueError("Cfg file not found", cfg_path) if not self.cfg_path.endswith('.cfg'): raise ValueError("cfg_path must end with .cfg") self.number_maps = number_maps self.scaled_resolution = scaled_resolution self.action_frame_repeat = action_frame_repeat self.clip = clip self.data_augmentation = data_augmentation if seed: random.seed(seed) super(VizDoom, self).__init__() self._logger = logging.getLogger(__name__) self._logger.info("Creating environment: VizDoom (%s)", self.cfg_path) # Create an instace on VizDoom game, initalise it from a scenario config file self.env = DoomGame() self.env.load_config(self.cfg_path) self.env.init() # Perform config validation: # Only RGB format with a seperate channel per colour is supported # assert self.env.get_screen_format() == ScreenFormat.RGB24 # Only discreete actions are supported (no delta actions) available_actions = self.env.get_available_buttons() not_supported_actions = [ Button.LOOK_UP_DOWN_DELTA, Button.TURN_LEFT_RIGHT_DELTA, Button.MOVE_LEFT_RIGHT_DELTA, Button.MOVE_UP_DOWN_DELTA, Button.MOVE_FORWARD_BACKWARD_DELTA ] assert len((set(available_actions) - set(not_supported_actions))) == len(available_actions) # Allow only one button to be pressed at a given step self.action_space = gym.spaces.Discrete( self.env.get_available_buttons_size()) rows = scaled_resolution[1] columns = scaled_resolution[0] self.observation_space = gym.spaces.Box(0.0, 255.0, shape=(columns, rows, 3), dtype=np.float32) self._rgb_array = None self.reset() def _process_image(self, shape=None): """ Convert the vizdoom environment observation numpy are into the desired resolution and shape :param shape: desired shape in the format (rows, columns) :return: resized and rescaled image in the format (rows, columns, channels) """ if shape is None: rows, columns, _ = self.observation_space.shape else: rows, columns = shape # PIL resize has indexing opposite to numpy array img = VizDoom._resize(self._rgb_array.transpose(1, 2, 0), (columns, rows)) return img @staticmethod def _augment_data(img): """ Augment input image with N randomly colored boxes of dimension x by y where N is randomly sampled between 0 and 6 and x and y are randomly sampled from between 0.1 and 0.35 :param img: input image to be augmented - format (rows, columns, channels) :return img: augmented image - format (rows, columns, channels) """ dimx = img.shape[0] dimy = img.shape[1] max_rand_dim = .25 min_rand_dim = .1 num_blotches = np.random.randint(0, 6) for _ in range(num_blotches): # locations in [0,1] rand = np.random.rand rx = rand() ry = rand() rdx = rand() * max_rand_dim + min_rand_dim rdy = rand() * max_rand_dim + min_rand_dim rx, rdx = [round(r * dimx) for r in (rx, rdx)] ry, rdy = [round(r * dimy) for r in (ry, rdy)] for c in range(3): img[rx:rx + rdx, ry:ry + rdy, c] = np.random.randint(0, 255) return img @staticmethod def _resize(img, shape): """Resize the specified image. :param img: image to resize :param shape: desired shape in the format (rows, columns) :return: resized image """ if not (OPENCV_AVAILABLE or PILLOW_AVAILABLE): raise ValueError('No image library backend found.' ' Install either ' 'OpenCV or Pillow to support image processing.') if OPENCV_AVAILABLE: return cv2.resize(img, shape, interpolation=cv2.INTER_AREA) if PILLOW_AVAILABLE: return np.array(PIL.Image.fromarray(img).resize(shape)) raise NotImplementedError def reset(self): """ Resets environment to start a new mission. If there is more than one maze it will randomly select a new maze. :return: initial observation of the environment as an rgb array in the format (rows, columns, channels) """ if self.number_maps is not 0: self.doom_map = random.choice( ["map" + str(i).zfill(2) for i in range(self.number_maps)]) self.env.set_doom_map(self.doom_map) self.env.new_episode() self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image() return observation def get_target_idx(self): return int(self.env.get_game_variable(GameVariable.USER5)) def step(self, action): """Perform the specified action for the self.action_frame_repeat ticks within the environment. :param action: the index of the action to perform. The actions are specified when the cfg is created. The defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT" :return: tuple following the gym interface, containing: - observation as a numpy array of shape (rows, height, channels) - scalar clipped reward - boolean which is true when the environment is done - {} """ one_hot_action = np.zeros(self.action_space.n, dtype=int) one_hot_action[action] = 1 reward = self.env.make_action(list(one_hot_action), self.action_frame_repeat) done = self.env.is_episode_finished() # state is available only if the episode is still running if not done: self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image() if self.data_augmentation: observation = VizDoom._augment_data(observation) if self.clip: reward = np.clip(reward, self.clip[0], self.clip[1]) return observation, reward, done, {} def step_record(self, action, record_path, record_shape=(120, 140)): """Perform the specified action for the self.action_frame_repeat ticks within the environment. :param action: the index of the action to perform. The actions are specified when the cfg is created. The defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT" :param record_path: the path to save the image of the environment to :param record_shape: the shape of the image to save :return: tuple following the gym interface, containing: - observation as a numpy array of shape (rows, height, channels) - scalar clipped reward - boolean which is true when the environment is done - {} """ one_hot_action = np.zeros(self.action_space.n, dtype=int) one_hot_action[action] = 1 reward = 0 for _ in range(self.action_frame_repeat // 2): reward += self.env.make_action(list(one_hot_action), 2) env_state = self.env.get_state() if env_state: self._rgb_array = self.env.get_state().screen_buffer imageio.imwrite( os.path.join(record_path, str(datetime.datetime.now()) + ".png"), self._process_image(record_shape)) done = self.env.is_episode_finished() # state is available only if the episode is still running if not done: self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image() if self.clip: reward = np.clip(reward, self.clip[0], self.clip[1]) return observation, reward, done, {} def close(self): """Close environment""" self.env.close() def render(self, mode='rgb_array'): """Render frame""" if mode == 'rgb_array': return self._rgb_array raise NotImplementedError def create_env(self): """ Returns a function to create an environment with the generated mazes. Used for vectorising the environment. For example as used by Stable Baselines :return: a function to create an environment with the generated mazes """ return lambda: VizDoom(self.cfg_path, number_maps=self.number_maps, scaled_resolution=self.scaled_resolution, action_frame_repeat=self.action_frame_repeat)
class VizDoomGym(gym.Env): """ Wraps a VizDoom environment """ def __init__(self): raise NotImplementedError def _init(self, mission_file: str, scaled_resolution: list): """ :param mission_file: name of the mission (.cfg) to run, :param scaled_resolution: resolution (height, width) of the video frames to run training on """ super(VizDoomGym, self).__init__() self.mission_file = mission_file self._logger = logging.getLogger(__name__) self._logger.info("Creating environment: VizDoom (%s)", self.mission_file) self.deathmatch = True # distance we need the agent to travel per time-step, otherwise we penalise self.distance_threshold = 15 self.prev_properties = None self.properties = None self.cum_kills = np.array([0]) # Create an instace on VizDoom game, initalise it from a scenario config file self.env = DoomGame() self.env.load_config(self.mission_file) self.env.set_window_visible(False) self.env.set_screen_format(ScreenFormat.RGB24) if self.deathmatch: self.env.add_game_args("-deathmatch") self.env.set_doom_skill(4) self._action_frame_repeat = 4 self.env.init() # Perform config validation: # Only RGB format with a seperate channel per colour is supported assert self.env.get_screen_format() == ScreenFormat.RGB24 # Only discrete actions are supported (no delta actions) self.available_actions = self.env.get_available_buttons() not_supported_actions = [ Button.LOOK_UP_DOWN_DELTA, Button.TURN_LEFT_RIGHT_DELTA, Button.MOVE_LEFT_RIGHT_DELTA, Button.MOVE_UP_DOWN_DELTA, Button.MOVE_FORWARD_BACKWARD_DELTA ] # print(available_actions) assert len((set(self.available_actions) - set(not_supported_actions))) \ == len(self.available_actions) self.metadata['render_modes'] = ['rgb_array'] # Allow only one button to be pressed at a given step self.action_space = gym.spaces.Discrete( self.env.get_available_buttons_size() - 1) self.rows = scaled_resolution[0] self.columns = scaled_resolution[1] self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=(self.rows, self.columns, 3), dtype=np.float32) self._rgb_array = None self.steps = 0 self.global_steps = 0 self.reset() def _process_image(self, img): # PIL resize has indexing opposite to numpy array img = np.array(Image.fromarray(img).resize((self.columns, self.rows))) img = img.astype(np.float32) img = img / 255.0 return img def update_game_variables(self): """ Check and update game variables. """ # read game variables new_v = { k: self.env.get_game_variable(v) for k, v in game_variables.items() } assert all(v.is_integer() or k[-2:] in ['_x', '_y', '_z'] for k, v in new_v.items()) new_v = { k: (int(v) if v.is_integer() else float(v)) for k, v in new_v.items() } health = new_v['health'] armor = new_v['armor'] # check game variables assert 0 <= health <= 200 or health < 0 and self.env.is_player_dead() assert 0 <= armor <= 200, (health, armor) # update actor properties self.prev_properties = self.properties self.properties = new_v def update_reward(self): """ Update reward. """ # we need to know the current and previous properties assert self.prev_properties is not None and self.properties is not None reward = 0 # kill d = self.properties['score'] - self.prev_properties['score'] if d > 0: self.cum_kills += d reward += d * default_reward_values['KILL'] # death if self.env.is_player_dead(): reward += default_reward_values['DEATH'] # suicide if self.properties['frag_count'] < self.prev_properties['frag_count']: reward += default_reward_values['SUICIDE'] # found / lost health d = self.properties['health'] - self.prev_properties['health'] if d != 0: if d > 0: reward += default_reward_values['MEDIKIT'] else: reward += default_reward_values['INJURED'] # found / lost armor d = self.properties['armor'] - self.prev_properties['armor'] if d != 0: if d > 0: reward += default_reward_values['ARMOR'] # found / lost ammo d = self.properties['sel_ammo'] - self.prev_properties['sel_ammo'] if d != 0: if d > 0: reward += default_reward_values['AMMO'] else: reward += default_reward_values['USE_AMMO'] # distance # turn_left = (Button.TURN_LEFT == self.available_actions[action]) # turn_right = (Button.TURN_RIGHT == self.available_actions[action]) # if not (turn_left or turn_right): diff_x = self.properties['position_x'] - self.prev_properties[ 'position_x'] diff_y = self.properties['position_y'] - self.prev_properties[ 'position_y'] distance = np.sqrt(diff_x**2 + diff_y**2) if distance > self.distance_threshold: reward += default_reward_values['DISTANCE'] * distance else: reward += default_reward_values['STANDSTILL'] # living reward += default_reward_values['LIVING'] return reward # def increase_difficulty(self): # self.curr_skill += 1 # self.env.close() # self.env.set_doom_skill(self.curr_skill) # self.env.init() # print('changing skill to', self.curr_skill) # def update_map(self): # self.map_level += 1 # map_str = 'map0' + str(self.map_level) # # go with initial wad file if there's still maps on it # self.env.close() # self.env.set_doom_map(map_str) # self.env.init() def sub_reset(self): """Reset environment""" self.steps = 0 self.cum_kills = np.array([0]) self.prev_properties = None self.properties = None self.env.new_episode() self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image(self._rgb_array) return observation def reset(self): observation = self.sub_reset() return observation def sub_step(self, action): """Take step""" one_hot_action = np.zeros(self.action_space.n, dtype=int) one_hot_action[action] = 1 # ALWAYS SPRINTING one_hot_action = np.append(one_hot_action, [1]) assert len(one_hot_action) == len(self.env.get_available_buttons()) _ = self.env.make_action(list(one_hot_action), self._action_frame_repeat) self.update_game_variables() if self.steps > 1: reward = self.update_reward() else: reward = 0 self.steps += 1 self.global_steps += 1 done = self.env.is_episode_finished() # state is available only if the episode is still running if not done: self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image(self._rgb_array) return observation, reward, done def step(self, action): observation, reward, done = self.sub_step(action) return observation, reward, done, {} def close(self): """Close environment""" self.env.close() def seed(self, seed=None): """Seed""" if seed: self.env.set_seed(seed) def render(self, mode='human'): """Render frame""" if mode == 'rgb_array': return self._rgb_array raise NotImplementedError